From b9024582b0957620e8225c9e722457841edfbe7d Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 28 Apr 2023 09:35:56 +1000
Subject: [PATCH 001/119] fix: Ensure InfluxQL internal errors have a distinct
 message

Closes #7606
---
 iox_query_influxql/src/plan/error.rs    | 19 ++++++++++++++++++-
 iox_query_influxql/src/plan/rewriter.rs | 10 +++++-----
 2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/iox_query_influxql/src/plan/error.rs b/iox_query_influxql/src/plan/error.rs
index c6797ce5c9..6b12535ec9 100644
--- a/iox_query_influxql/src/plan/error.rs
+++ b/iox_query_influxql/src/plan/error.rs
@@ -24,7 +24,11 @@ pub(crate) mod map {
     #[derive(Debug, Error)]
     enum PlannerError {
         /// An unexpected error that represents a bug in IOx.
-        #[error("internal: {0}")]
+        ///
+        /// The message is prefixed with `InfluxQL internal error: `,
+        /// which may be used by clients to identify internal InfluxQL
+        /// errors.
+        #[error("InfluxQL internal error: {0}")]
         Internal(String),
     }
 
@@ -42,4 +46,17 @@ pub(crate) mod map {
     pub(crate) fn not_implemented(feature: impl Into<String>) -> DataFusionError {
         DataFusionError::NotImplemented(feature.into())
     }
+
+    #[cfg(test)]
+    mod test {
+        use crate::plan::error::map::PlannerError;
+
+        #[test]
+        fn test_planner_error_display() {
+            // The InfluxQL internal error:
+            assert!(PlannerError::Internal("****".to_owned())
+                .to_string()
+                .starts_with("InfluxQL internal error: "))
+        }
+    }
 }
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 08a05f9bc0..da8f0f7f7c 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1530,15 +1530,15 @@ mod test {
         let sel = parse_select("SELECT count(distinct('foo')) FROM cpu");
         assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "expected field argument in distinct()");
         let sel = parse_select("SELECT count(distinct foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected distinct clause in count");
+        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected distinct clause in count");
 
         // Test rules for math functions
         let sel = parse_select("SELECT abs(usage_idle) FROM cpu");
         select_statement_info(&sel).unwrap();
         let sel = parse_select("SELECT abs(*) + ceil(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected wildcard");
+        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard");
         let sel = parse_select("SELECT abs(/f/) + ceil(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected regex");
+        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected regex");
 
         // Fallible
 
@@ -1560,11 +1560,11 @@ mod test {
 
         // wildcard expansion is not supported in binary expressions for aggregates
         let sel = parse_select("SELECT count(*) + count(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected wildcard or regex");
+        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex");
 
         // regex expansion is not supported in binary expressions
         let sel = parse_select("SELECT sum(/foo/) + count(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected wildcard or regex");
+        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex");
 
         // aggregate functions require a field reference
         let sel = parse_select("SELECT sum(1) FROM cpu");

From 482221a7d4d29b7dd90acfb1487059a11ab9ec41 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 28 Apr 2023 09:56:07 +1000
Subject: [PATCH 002/119] chore: Redundant attribute

---
 iox_query_influxql/src/plan/rewriter.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index da8f0f7f7c..220426f79f 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1,5 +1,3 @@
-#![allow(dead_code)]
-
 use crate::plan::expr_type_evaluator::evaluate_type;
 use crate::plan::field::{field_by_name, field_name};
 use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet};

From 89143a72fa427f21a771735df9e8d72d6a1fa749 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 28 Apr 2023 10:08:30 +1000
Subject: [PATCH 003/119] chore: Remove schema::Schema dependency

Subquery support cannot rely on IOx schema, as this metadata does not
propagate through DataFusion `LogicalPlan` nodes.
---
 iox_query_influxql/src/plan/planner.rs        | 20 ++++++++--------
 .../src/plan/planner_rewrite_expression.rs    | 24 +++++++------------
 iox_query_influxql/src/plan/util.rs           | 23 +++++++-----------
 iox_query_influxql/src/plan/var_ref.rs        | 21 ++++++++++------
 4 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index 6e3a16ab58..1cc424ffd9 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -11,7 +11,7 @@ use crate::plan::rewriter::{
     rewrite_statement, select_statement_info, ProjectionType, SelectStatementInfo,
 };
 use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
-use crate::plan::var_ref::{column_type_to_var_ref_data_type, var_ref_data_type_to_data_type};
+use crate::plan::var_ref::{data_type_to_var_ref_data_type, var_ref_data_type_to_data_type};
 use crate::plan::{error, planner_rewrite_expression};
 use arrow::array::{StringBuilder, StringDictionaryBuilder};
 use arrow::datatypes::{DataType, Field as ArrowField, Int32Type, Schema as ArrowSchema};
@@ -599,11 +599,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
 
             // Exclude tags that do not exist in the current table schema.
             group_by_exprs.extend(group_by_tag_set.iter().filter_map(|name| {
-                if schemas
-                    .iox_schema
-                    .field_by_name(name)
-                    .map_or(false, |(dt, _)| dt == InfluxColumnType::Tag)
-                {
+                if schemas.is_tag_field(name) {
                     Some(name.as_expr())
                 } else {
                     None
@@ -991,7 +987,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
 
     /// Map an InfluxQL [`IQLExpr`] to a DataFusion [`Expr`].
     fn expr_to_df_expr(&self, ctx: &Context<'_>, iql: &IQLExpr, schemas: &Schemas) -> Result<Expr> {
-        let iox_schema = &schemas.iox_schema;
+        let schema = &schemas.df_schema;
         match iql {
             // rewriter is expected to expand wildcard expressions
             IQLExpr::Wildcard(_) => error::internal("unexpected wildcard in projection"),
@@ -1009,12 +1005,16 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                         "time".as_expr()
                     }
                     (ExprScope::Projection, "time") => "time".as_expr(),
-                    (_, name) => match iox_schema.field_by_name(name) {
-                        Some((col_type, _)) => {
+                    (_, name) => match schema
+                        .fields_with_unqualified_name(name)
+                        .first()
+                        .map(|f| f.data_type().clone())
+                    {
+                        Some(col_type) => {
                             let column = name.as_expr();
+                            let src_type = data_type_to_var_ref_data_type(col_type)?;
                             match opt_dst_type {
                                 Some(dst_type) => {
-                                    let src_type = column_type_to_var_ref_data_type(col_type);
                                     if src_type == *dst_type {
                                         column
                                     } else if src_type.is_numeric_type()
diff --git a/iox_query_influxql/src/plan/planner_rewrite_expression.rs b/iox_query_influxql/src/plan/planner_rewrite_expression.rs
index b0b4dc12fb..08a0189a7b 100644
--- a/iox_query_influxql/src/plan/planner_rewrite_expression.rs
+++ b/iox_query_influxql/src/plan/planner_rewrite_expression.rs
@@ -132,7 +132,6 @@ use datafusion::logical_expr::{
     binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, Operator,
 };
 use datafusion::optimizer::utils::{conjunction, disjunction};
-use schema::{InfluxColumnType, InfluxFieldType};
 
 /// Perform a series of passes to rewrite `expr` in compliance with InfluxQL behavior
 /// in an effort to ensure the query executes without error.
@@ -770,19 +769,17 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
                 op: op @ (Operator::RegexMatch | Operator::RegexNotMatch),
                 right,
             }) => {
-                if let Expr::Column(ref col) = *left {
-                    match self.schemas.iox_schema.field_by_name(&col.name) {
-                        Some((InfluxColumnType::Tag, _)) => {
+                Ok(if let Expr::Column(ref col) = *left {
+                    match self.schemas.df_schema.field_from_column(col)?.data_type() {
+                        DataType::Dictionary(..) => {
                             // Regular expressions expect to be compared with a Utf8
                             let left =
                                 Box::new(left.cast_to(&DataType::Utf8, &self.schemas.df_schema)?);
-                            Ok(Expr::BinaryExpr(BinaryExpr { left, op, right }))
-                        }
-                        Some((InfluxColumnType::Field(InfluxFieldType::String), _)) => {
-                            Ok(Expr::BinaryExpr(BinaryExpr { left, op, right }))
+                            Expr::BinaryExpr(BinaryExpr { left, op, right })
                         }
+                        DataType::Utf8 => Expr::BinaryExpr(BinaryExpr { left, op, right }),
                         // Any other column type should evaluate to false
-                        _ => Ok(lit(false)),
+                        _ => lit(false),
                     }
                 } else {
                     // If this is not a simple column expression, evaluate to false,
@@ -798,8 +795,8 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
                     // Reference example:
                     //
                     // * `SELECT f64 FROM m0 WHERE tag0 = '' + tag0`
-                    Ok(lit(false))
-                }
+                    lit(false)
+                })
             }
             _ => Ok(expr),
         }
@@ -829,10 +826,7 @@ mod test {
             .build()
             .expect("schema failed");
         let df_schema: DFSchemaRef = Arc::clone(iox_schema.inner()).to_dfschema_ref().unwrap();
-        Schemas {
-            df_schema,
-            iox_schema,
-        }
+        Schemas { df_schema }
     }
 
     #[test]
diff --git a/iox_query_influxql/src/plan/util.rs b/iox_query_influxql/src/plan/util.rs
index 5ddd9d5894..afd0d507d1 100644
--- a/iox_query_influxql/src/plan/util.rs
+++ b/iox_query_influxql/src/plan/util.rs
@@ -1,6 +1,6 @@
 use crate::plan::{error, util_copy};
 use arrow::datatypes::{DataType, TimeUnit};
-use datafusion::common::{DFSchema, DFSchemaRef, Result};
+use datafusion::common::{DFSchemaRef, Result};
 use datafusion::logical_expr::utils::expr_as_column_expr;
 use datafusion::logical_expr::{lit, Expr, ExprSchemable, LogicalPlan, Operator};
 use datafusion::scalar::ScalarValue;
@@ -9,7 +9,6 @@ use influxdb_influxql_parser::literal::Number;
 use influxdb_influxql_parser::string::Regex;
 use query_functions::clean_non_meta_escapes;
 use query_functions::coalesce_struct::coalesce_struct;
-use schema::Schema;
 use std::sync::Arc;
 
 pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Operator {
@@ -25,29 +24,25 @@ pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Ope
     }
 }
 
-/// Return the IOx schema for the specified DataFusion schema.
-pub(in crate::plan) fn schema_from_df(schema: &DFSchema) -> Result<Schema> {
-    let s: Arc<arrow::datatypes::Schema> = Arc::new(schema.into());
-    s.try_into().map_err(|err| {
-        error::map::internal(format!(
-            "unable to convert DataFusion schema to IOx schema: {err}"
-        ))
-    })
-}
-
 /// Container for both the DataFusion and equivalent IOx schema.
 pub(in crate::plan) struct Schemas {
     pub(in crate::plan) df_schema: DFSchemaRef,
-    pub(in crate::plan) iox_schema: Schema,
 }
 
 impl Schemas {
     pub(in crate::plan) fn new(df_schema: &DFSchemaRef) -> Result<Self> {
         Ok(Self {
             df_schema: Arc::clone(df_schema),
-            iox_schema: schema_from_df(df_schema)?,
         })
     }
+
+    /// Returns `true` if the field `name` is a tag type.
+    pub(super) fn is_tag_field(&self, name: &str) -> bool {
+        self.df_schema
+            .fields()
+            .iter()
+            .any(|f| f.name() == name && matches!(f.data_type(), DataType::Dictionary(..)))
+    }
 }
 
 /// Sanitize an InfluxQL regular expression and create a compiled [`regex::Regex`].
diff --git a/iox_query_influxql/src/plan/var_ref.rs b/iox_query_influxql/src/plan/var_ref.rs
index cb5f41db16..0e658072d3 100644
--- a/iox_query_influxql/src/plan/var_ref.rs
+++ b/iox_query_influxql/src/plan/var_ref.rs
@@ -1,6 +1,8 @@
+use crate::plan::error;
 use arrow::datatypes::DataType;
+use datafusion::common::Result;
 use influxdb_influxql_parser::expression::VarRefDataType;
-use schema::{InfluxColumnType, InfluxFieldType};
+use schema::InfluxFieldType;
 
 pub(crate) fn var_ref_data_type_to_data_type(v: VarRefDataType) -> Option<DataType> {
     match v {
@@ -25,12 +27,17 @@ pub(crate) fn field_type_to_var_ref_data_type(v: InfluxFieldType) -> VarRefDataT
     }
 }
 
-/// Maps an [`InfluxColumnType`] to a [`VarRefDataType`].
-pub(crate) fn column_type_to_var_ref_data_type(v: InfluxColumnType) -> VarRefDataType {
-    match v {
-        InfluxColumnType::Tag => VarRefDataType::Tag,
-        InfluxColumnType::Field(ft) => field_type_to_var_ref_data_type(ft),
-        InfluxColumnType::Timestamp => VarRefDataType::Timestamp,
+/// Maps an Arrow [`DataType`] to a [`VarRefDataType`].
+pub(crate) fn data_type_to_var_ref_data_type(dt: DataType) -> Result<VarRefDataType> {
+    match dt {
+        DataType::Dictionary(..) => Ok(VarRefDataType::Tag),
+        DataType::Timestamp(..) => Ok(VarRefDataType::Timestamp),
+        DataType::Utf8 => Ok(VarRefDataType::String),
+        DataType::Int64 => Ok(VarRefDataType::Integer),
+        DataType::UInt64 => Ok(VarRefDataType::Unsigned),
+        DataType::Float64 => Ok(VarRefDataType::Float),
+        DataType::Boolean => Ok(VarRefDataType::Boolean),
+        _ => error::internal(format!("unable to map Arrow type {dt} to VarRefDataType")),
     }
 }
 

From 24378bd460ccd167f4717cf198c57be4ccdfe3f1 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Wed, 3 May 2023 07:47:14 +1000
Subject: [PATCH 004/119] chore: `time` is always present and at first position
 of projection

---
 .../src/expression/arithmetic.rs              |  16 +++
 iox_query_influxql/src/plan/rewriter.rs       | 127 +++++++++++++++---
 2 files changed, 126 insertions(+), 17 deletions(-)

diff --git a/influxdb_influxql_parser/src/expression/arithmetic.rs b/influxdb_influxql_parser/src/expression/arithmetic.rs
index 3c3e784c4c..f36037f006 100644
--- a/influxdb_influxql_parser/src/expression/arithmetic.rs
+++ b/influxdb_influxql_parser/src/expression/arithmetic.rs
@@ -194,6 +194,22 @@ impl Display for Expr {
     }
 }
 
+/// Traits to help creating InfluxQL [`Expr`]s containing
+/// a [`VarRef`].
+pub trait AsVarRefExpr {
+    /// Creates an InfluxQL [`VarRef`] expression.
+    fn to_var_ref_expr(&self) -> Expr;
+}
+
+impl AsVarRefExpr for str {
+    fn to_var_ref_expr(&self) -> Expr {
+        Expr::VarRef(VarRef {
+            name: self.into(),
+            data_type: None,
+        })
+    }
+}
+
 /// Specifies the data type of a wildcard (`*`) when using the `::` operator.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum WildcardType {
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 220426f79f..227e3ead89 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -5,7 +5,9 @@ use crate::plan::{error, util, SchemaProvider};
 use datafusion::common::{DataFusionError, Result};
 use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName};
 use influxdb_influxql_parser::expression::walk::{walk_expr, walk_expr_mut};
-use influxdb_influxql_parser::expression::{Call, Expr, VarRef, VarRefDataType, WildcardType};
+use influxdb_influxql_parser::expression::{
+    AsVarRefExpr, Call, Expr, VarRef, VarRefDataType, WildcardType,
+};
 use influxdb_influxql_parser::functions::is_scalar_math_function;
 use influxdb_influxql_parser::identifier::Identifier;
 use influxdb_influxql_parser::literal::Literal;
@@ -18,6 +20,73 @@ use std::borrow::Borrow;
 use std::collections::{HashMap, HashSet};
 use std::ops::{ControlFlow, Deref};
 
+/// Recursively rewrite the specified [`SelectStatement`] by performing a series of passes
+/// to validate and normalize the statement.
+pub(crate) fn rewrite_statement(
+    s: &dyn SchemaProvider,
+    q: &SelectStatement,
+) -> Result<SelectStatement> {
+    let mut stmt = q.clone();
+    from_expand_wildcards(s, &mut stmt)?;
+    field_list_expand_wildcards(s, &mut stmt)?;
+    from_drop_empty(s, &mut stmt);
+    field_list_normalize_time(&mut stmt);
+    field_list_rewrite_aliases(&mut stmt.fields)?;
+
+    Ok(stmt)
+}
+
+/// Ensure the time field is added to all projections,
+/// and is moved to the first position, which is a requirement
+/// for InfluxQL compatibility.
+fn field_list_normalize_time(stmt: &mut SelectStatement) {
+    fn normalize_time(stmt: &mut SelectStatement, is_subquery: bool) {
+        let mut fields = stmt.fields.take();
+
+        if let Some(f) = match fields
+            .iter()
+            .find_position(
+                |f| matches!(&f.expr, Expr::VarRef(VarRef { name, .. }) if name.deref() == "time"),
+            )
+            .map(|(i, _)| i)
+        {
+            Some(0) => None,
+            Some(idx) => Some(fields.remove(idx)),
+            None => Some(Field {
+                expr: "time".to_var_ref_expr(),
+                alias: None,
+            }),
+        } {
+            fields.insert(0, f)
+        }
+
+        let f = &mut fields[0];
+
+        // time aliases in subqueries is ignored
+        if f.alias.is_none() || is_subquery {
+            f.alias = Some("time".into())
+        }
+
+        if let Expr::VarRef(VarRef {
+            ref mut data_type, ..
+        }) = f.expr
+        {
+            *data_type = Some(VarRefDataType::Timestamp);
+        }
+
+        stmt.fields.replace(fields);
+    }
+
+    normalize_time(stmt, false);
+
+    for stmt in stmt.from.iter_mut().filter_map(|ms| match ms {
+        MeasurementSelection::Subquery(stmt) => Some(stmt),
+        _ => None,
+    }) {
+        normalize_time(stmt, true)
+    }
+}
+
 /// Recursively expand the `from` clause of `stmt` and any subqueries.
 fn from_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatement) -> Result<()> {
     let mut new_from = Vec::new();
@@ -535,21 +604,6 @@ fn field_list_rewrite_aliases(field_list: &mut FieldList) -> Result<()> {
     Ok(())
 }
 
-/// Recursively rewrite the specified [`SelectStatement`], expanding any wildcards or regular expressions
-/// found in the projection list, `FROM` clause or `GROUP BY` clause.
-pub(crate) fn rewrite_statement(
-    s: &dyn SchemaProvider,
-    q: &SelectStatement,
-) -> Result<SelectStatement> {
-    let mut stmt = q.clone();
-    from_expand_wildcards(s, &mut stmt)?;
-    field_list_expand_wildcards(s, &mut stmt)?;
-    from_drop_empty(s, &mut stmt);
-    field_list_rewrite_aliases(&mut stmt.fields)?;
-
-    Ok(stmt)
-}
-
 /// Check the length of the arguments slice is within
 /// the expected bounds.
 macro_rules! check_exp_args {
@@ -1265,13 +1319,52 @@ pub(crate) fn select_statement_info(q: &SelectStatement) -> Result<SelectStateme
 #[cfg(test)]
 mod test {
     use crate::plan::rewriter::{
-        has_wildcards, rewrite_statement, select_statement_info, ProjectionType,
+        field_list_normalize_time, has_wildcards, rewrite_statement, select_statement_info,
+        ProjectionType,
     };
     use crate::plan::test_utils::{parse_select, MockSchemaProvider};
     use assert_matches::assert_matches;
     use datafusion::error::DataFusionError;
     use test_helpers::{assert_contains, assert_error};
 
+    #[test]
+    fn test_field_list_normalize_time() {
+        // adds time to to first position
+        let mut sel = parse_select("SELECT foo, bar FROM cpu");
+        field_list_normalize_time(&mut sel);
+        assert_eq!(
+            sel.to_string(),
+            "SELECT time::timestamp AS time, foo, bar FROM cpu"
+        );
+
+        // moves time to first position
+        let mut sel = parse_select("SELECT foo, time, bar FROM cpu");
+        field_list_normalize_time(&mut sel);
+        assert_eq!(
+            sel.to_string(),
+            "SELECT time::timestamp AS time, foo, bar FROM cpu"
+        );
+
+        let mut sel = parse_select("SELECT time as ts, foo, bar FROM cpu");
+        field_list_normalize_time(&mut sel);
+        assert_eq!(
+            sel.to_string(),
+            "SELECT time::timestamp AS ts, foo, bar FROM cpu"
+        );
+
+        // subqueries
+
+        // adds time to to first position
+        let mut sel = parse_select("SELECT foo FROM (SELECT foo, bar FROM cpu)");
+        field_list_normalize_time(&mut sel);
+        assert_eq!(
+            sel.to_string(),
+            "SELECT time::timestamp AS time, foo FROM (SELECT time::timestamp AS time, foo, bar FROM cpu)"
+        );
+
+        // TODO(sgc): add remaining subquery tests
+    }
+
     #[test]
     fn test_select_statement_info() {
         let info = select_statement_info(&parse_select("SELECT foo, bar FROM cpu")).unwrap();

From 91f546437510aebe119165114e6a04d628b586ff Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Wed, 3 May 2023 09:23:15 +1000
Subject: [PATCH 005/119] chore: add additional tests

---
 iox_query_influxql/src/plan/rewriter.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 227e3ead89..07721395bc 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1345,6 +1345,7 @@ mod test {
             "SELECT time::timestamp AS time, foo, bar FROM cpu"
         );
 
+        // Maintains alias for time column
         let mut sel = parse_select("SELECT time as ts, foo, bar FROM cpu");
         field_list_normalize_time(&mut sel);
         assert_eq!(
@@ -1354,7 +1355,7 @@ mod test {
 
         // subqueries
 
-        // adds time to to first position
+        // adds time to to first position of root and subquery
         let mut sel = parse_select("SELECT foo FROM (SELECT foo, bar FROM cpu)");
         field_list_normalize_time(&mut sel);
         assert_eq!(
@@ -1362,7 +1363,15 @@ mod test {
             "SELECT time::timestamp AS time, foo FROM (SELECT time::timestamp AS time, foo, bar FROM cpu)"
         );
 
-        // TODO(sgc): add remaining subquery tests
+        // Removes and ignores alias of time column within subquery, ignores alias in root and adds time column
+        //
+        // Whilst confusing, this matching InfluxQL behaviour
+        let mut sel = parse_select("SELECT ts, foo FROM (SELECT time as ts, foo, bar FROM cpu)");
+        field_list_normalize_time(&mut sel);
+        assert_eq!(
+            sel.to_string(),
+            "SELECT time::timestamp AS time, ts, foo FROM (SELECT time::timestamp AS time, foo, bar FROM cpu)"
+        );
     }
 
     #[test]

From 19ea80390a1aa873d4b65eb49701724562c18912 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Wed, 3 May 2023 11:04:54 +1000
Subject: [PATCH 006/119] chore: ignore time column when processing SELECT

---
 .../cases/in/issue_6112.influxql.expected     | 22 +++---
 iox_query_influxql/src/plan/planner.rs        | 26 +++----
 iox_query_influxql/src/plan/rewriter.rs       | 70 ++++++++++---------
 3 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
index 9581b93e23..c69c7137eb 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
@@ -70,17 +70,17 @@ name: m0
 +---------------------+------+-------+
 -- InfluxQL: SELECT f64, tag0, time FROM m0;
 name: m0
-+------+-------+---------------------+
-| f64  | tag0  | time                |
-+------+-------+---------------------+
-| 10.1 | val00 | 2022-10-31T02:00:00 |
-| 11.3 | val01 | 2022-10-31T02:00:00 |
-| 10.4 | val02 | 2022-10-31T02:00:00 |
-| 21.2 | val00 | 2022-10-31T02:00:10 |
-| 18.9 | val00 | 2022-10-31T02:00:10 |
-| 11.2 | val00 | 2022-10-31T02:00:20 |
-| 19.2 | val00 | 2022-10-31T02:00:30 |
-+------+-------+---------------------+
++---------------------+------+-------+
+| time                | f64  | tag0  |
++---------------------+------+-------+
+| 2022-10-31T02:00:00 | 10.1 | val00 |
+| 2022-10-31T02:00:00 | 11.3 | val01 |
+| 2022-10-31T02:00:00 | 10.4 | val02 |
+| 2022-10-31T02:00:10 | 21.2 | val00 |
+| 2022-10-31T02:00:10 | 18.9 | val00 |
+| 2022-10-31T02:00:20 | 11.2 | val00 |
+| 2022-10-31T02:00:30 | 19.2 | val00 |
++---------------------+------+-------+
 -- InfluxQL: SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
 name: m0
 +---------------------+------+-------+-----+---------+
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index 1cc424ffd9..e5ed74fe67 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -59,7 +59,7 @@ use influxdb_influxql_parser::{
     expression::Expr as IQLExpr,
     identifier::Identifier,
     literal::Literal,
-    select::{Field, FieldList, FromMeasurementClause, MeasurementSelection, SelectStatement},
+    select::{Field, FromMeasurementClause, MeasurementSelection, SelectStatement},
     statement::Statement,
 };
 use iox_query::config::{IoxConfigExt, MetadataCutoff};
@@ -284,18 +284,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             .with_timezone(select.timezone)
             .with_group_by_fill(select);
 
-        // The `time` column is always present in the result set
-        let mut fields = if find_time_column_index(&select.fields).is_none() {
-            vec![Field {
-                expr: IQLExpr::VarRef(VarRef {
-                    name: "time".into(),
-                    data_type: Some(VarRefDataType::Timestamp),
-                }),
-                alias: Some("time".into()),
-            }]
-        } else {
-            vec![]
-        };
+        // Skip the `time` column
+        let fields_no_time = &select.fields[1..];
+        // always start with the time column
+        let mut fields = vec![select.fields.first().cloned().unwrap()];
 
         // group_by_tag_set   : a list of tag columns specified in the GROUP BY clause
         // projection_tag_set : a list of tag columns specified exclusively in the SELECT projection
@@ -304,7 +296,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         let (group_by_tag_set, projection_tag_set, is_projected) =
             if let Some(group_by) = &select.group_by {
                 let mut tag_columns =
-                    find_tag_and_unknown_columns(&select.fields).collect::<HashSet<_>>();
+                    find_tag_and_unknown_columns(fields_no_time).collect::<HashSet<_>>();
 
                 // Find the list of tag keys specified in the `GROUP BY` clause, and
                 // whether any of the tag keys are also projected in the SELECT list.
@@ -344,13 +336,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                     is_projected,
                 )
             } else {
-                let tag_columns = find_tag_and_unknown_columns(&select.fields)
+                let tag_columns = find_tag_and_unknown_columns(fields_no_time)
                     .sorted()
                     .collect::<Vec<_>>();
                 (vec![], tag_columns, vec![])
             };
 
-        fields.extend(select.fields.iter().cloned());
+        fields.extend(fields_no_time.iter().cloned());
 
         // Build the first non-empty plan
         let plan = {
@@ -2095,7 +2087,7 @@ fn is_aggregate_field(f: &Field) -> bool {
 
 /// Find all the columns where the resolved data type
 /// is a tag or is [`None`], which is unknown.
-fn find_tag_and_unknown_columns(fields: &FieldList) -> impl Iterator<Item = &str> {
+fn find_tag_and_unknown_columns(fields: &[Field]) -> impl Iterator<Item = &str> {
     fields.iter().filter_map(|f| match &f.expr {
         IQLExpr::VarRef(VarRef {
             name,
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 07721395bc..9042cc04e9 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -779,6 +779,8 @@ impl FieldChecker {
 impl FieldChecker {
     fn check_expr(&mut self, e: &Expr) -> Result<()> {
         match e {
+            // The `time` column is ignored
+            Expr::VarRef(VarRef { name, .. }) if name.deref() == "time" => Ok(()),
             Expr::VarRef(_) => {
                 self.has_non_aggregate_fields = true;
                 Ok(())
@@ -1679,7 +1681,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_user::float AS usage_user FROM cpu"
+            "SELECT time::timestamp AS time, usage_user::float AS usage_user FROM cpu"
         );
 
         // Duplicate columns do not have conflicting aliases
@@ -1687,7 +1689,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu"
+            "SELECT time::timestamp AS time, usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu"
         );
 
         // Multiple aliases with no conflicts
@@ -1695,21 +1697,21 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu"
+            "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu"
         );
 
         // Multiple aliases with conflicts
         let stmt =
             parse_select("SELECT usage_user as usage_user_1, usage_user, usage_user, usage_user as usage_user_2, usage_user, usage_user_2 FROM cpu");
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(stmt.to_string(), "SELECT usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu");
+        assert_eq!(stmt.to_string(), "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu");
 
         // Only include measurements with at least one field projection
         let stmt = parse_select("SELECT usage_idle FROM cpu, disk");
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle FROM cpu"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
         );
 
         // Rewriting FROM clause
@@ -1719,7 +1721,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio"
+            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio"
         );
 
         // Regex matches multiple measurement, but only one has a matching field
@@ -1727,7 +1729,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT bytes_free::integer AS bytes_free FROM disk"
+            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM disk"
         );
 
         // Exact, no match
@@ -1747,14 +1749,14 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
+            "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
         );
 
         let stmt = parse_select("SELECT * FROM cpu, disk");
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
+            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
         );
 
         // Regular expression selects fields from multiple measurements
@@ -1762,7 +1764,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
+            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
         );
 
         // Selective wildcard for tags
@@ -1770,7 +1772,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu"
+            "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu"
         );
 
         // Selective wildcard for tags only should not select any measurements
@@ -1783,7 +1785,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
         );
 
         // Mixed fields and wildcards
@@ -1791,7 +1793,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu"
         );
 
         // GROUP BY expansion
@@ -1800,14 +1802,14 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle FROM cpu GROUP BY host"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY host"
         );
 
         let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY *");
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region"
         );
 
         // Does not include tags in projection when expanded in GROUP BY
@@ -1815,7 +1817,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
         );
 
         // Does include explicitly listed tags in projection
@@ -1823,7 +1825,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
+            "SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
         );
 
         // Fallible
@@ -1840,7 +1842,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle FROM (SELECT usage_idle::float FROM cpu)"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM (SELECT time::timestamp AS time, usage_idle::float FROM cpu)"
         );
 
         // Subquery, regex, match
@@ -1848,7 +1850,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT bytes_free::integer AS bytes_free FROM (SELECT bytes_free::integer, bytes_read::integer FROM disk, diskio)"
+            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM (SELECT time::timestamp AS time, bytes_free::integer, bytes_read::integer FROM disk, diskio)"
         );
 
         // Subquery, exact, no match
@@ -1866,7 +1868,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT usage_system::float + usage_idle::float FROM cpu)"
+            "SELECT time::timestamp AS time, usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT time::timestamp AS time, usage_system::float + usage_idle::float FROM cpu)"
         );
 
         // Subquery, no fields projected should be dropped
@@ -1874,7 +1876,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT usage_idle::float AS usage_idle FROM cpu"
+            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
         );
 
         // Outer query are permitted to project tags only, as long as there are other fields
@@ -1883,7 +1885,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT cpu::tag AS cpu FROM (SELECT cpu::tag, usage_system::float FROM cpu)"
+            "SELECT time::timestamp AS time, cpu::tag AS cpu FROM (SELECT time::timestamp AS time, cpu::tag, usage_system::float FROM cpu)"
         );
 
         // Outer FROM should be empty, as the subquery does not project any fields
@@ -1896,7 +1898,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk"
+            "SELECT time::timestamp AS time, bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk"
         );
 
         // Unary expressions
@@ -1904,7 +1906,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT -1 * bytes_free::integer AS bytes_free FROM disk"
+            "SELECT time::timestamp AS time, -1 * bytes_free::integer AS bytes_free FROM disk"
         );
 
         // DISTINCT clause
@@ -1914,14 +1916,14 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT count(distinct(bytes_free::integer)) AS count FROM disk"
+            "SELECT time::timestamp AS time, count(distinct(bytes_free::integer)) AS count FROM disk"
         );
 
         let stmt = parse_select("SELECT DISTINCT bytes_free FROM disk");
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT distinct(bytes_free::integer) AS \"distinct\" FROM disk"
+            "SELECT time::timestamp AS time, distinct(bytes_free::integer) AS \"distinct\" FROM disk"
         );
 
         // Call expressions
@@ -1930,7 +1932,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT count(field_i64::integer) AS count FROM temp_01"
+            "SELECT time::timestamp AS time, count(field_i64::integer) AS count FROM temp_01"
         );
 
         // Duplicate aggregate columns
@@ -1938,14 +1940,14 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01"
+            "SELECT time::timestamp AS time, count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01"
         );
 
         let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01");
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT count(field_f64::float) AS count FROM temp_01"
+            "SELECT time::timestamp AS time, count(field_f64::float) AS count FROM temp_01"
         );
 
         // Expands all fields
@@ -1953,7 +1955,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01"
+            "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01"
         );
 
         // Expands matching fields
@@ -1961,7 +1963,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01"
+            "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01"
         );
 
         // Expands only numeric fields
@@ -1969,14 +1971,14 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
+            "SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
         );
 
         let stmt = parse_select("SELECT * FROM merge_00, merge_01");
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01"
+            "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01"
         );
 
         // This should only select merge_01, as col0 is a tag in merge_00
@@ -1984,7 +1986,7 @@ mod test {
         let stmt = rewrite_statement(&namespace, &stmt).unwrap();
         assert_eq!(
             stmt.to_string(),
-            "SELECT col0::float AS col0, col0::tag AS col0_1 FROM merge_01"
+            "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1 FROM merge_01"
         );
 
         // Fallible cases

From 43baecbb1a142b5ccd6e1d4efd53f77648a39dc4 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Wed, 3 May 2023 12:04:24 +1000
Subject: [PATCH 007/119] chore: handle aliased time column in sort expression

---
 .../query_tests2/cases/in/issue_6112.influxql |  8 ++-
 .../cases/in/issue_6112.influxql.expected     | 26 ++++++++++
 iox_query_influxql/src/plan/planner.rs        | 50 +++++++++++++++----
 iox_query_influxql/src/plan/planner/select.rs | 20 --------
 4 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql
index 2af3ffb2ff..b9d94dfeb4 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql
+++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql
@@ -24,9 +24,15 @@ SELECT /64|tag0/ FROM m0;
 -- Projection specific tags and fields
 SELECT f64, tag0 FROM m0;
 
--- Explicitly select time column
+-- Explicitly select time column, should appear in first column
 SELECT f64, tag0, time FROM m0;
 
+-- Alias time column
+SELECT f64, tag0, time as timestamp FROM m0;
+
+-- Alias field and tag columns
+SELECT f64 as f, tag0 as t FROM m0;
+
 -- arithmetic operators
 SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
 
diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
index c69c7137eb..f813926124 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
@@ -81,6 +81,32 @@ name: m0
 | 2022-10-31T02:00:20 | 11.2 | val00 |
 | 2022-10-31T02:00:30 | 19.2 | val00 |
 +---------------------+------+-------+
+-- InfluxQL: SELECT f64, tag0, time as timestamp FROM m0;
+name: m0
++---------------------+------+-------+
+| timestamp           | f64  | tag0  |
++---------------------+------+-------+
+| 2022-10-31T02:00:00 | 10.1 | val00 |
+| 2022-10-31T02:00:00 | 11.3 | val01 |
+| 2022-10-31T02:00:00 | 10.4 | val02 |
+| 2022-10-31T02:00:10 | 21.2 | val00 |
+| 2022-10-31T02:00:10 | 18.9 | val00 |
+| 2022-10-31T02:00:20 | 11.2 | val00 |
+| 2022-10-31T02:00:30 | 19.2 | val00 |
++---------------------+------+-------+
+-- InfluxQL: SELECT f64 as f, tag0 as t FROM m0;
+name: m0
++---------------------+------+-------+
+| time                | f    | t     |
++---------------------+------+-------+
+| 2022-10-31T02:00:00 | 10.1 | val00 |
+| 2022-10-31T02:00:00 | 11.3 | val01 |
+| 2022-10-31T02:00:00 | 10.4 | val02 |
+| 2022-10-31T02:00:10 | 21.2 | val00 |
+| 2022-10-31T02:00:10 | 18.9 | val00 |
+| 2022-10-31T02:00:20 | 11.2 | val00 |
+| 2022-10-31T02:00:30 | 19.2 | val00 |
++---------------------+------+-------+
 -- InfluxQL: SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
 name: m0
 +---------------------+------+-------+-----+---------+
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index e5ed74fe67..6ee3d756af 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -1,8 +1,7 @@
 mod select;
 
 use crate::plan::planner::select::{
-    check_exprs_satisfy_columns, fields_to_exprs_no_nulls, make_tag_key_column_meta,
-    plan_with_sort, ToSortExpr,
+    check_exprs_satisfy_columns, fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort,
 };
 use crate::plan::planner_time_range_expression::{
     duration_expr_to_nanoseconds, expr_to_df_interval_dt, time_range_to_df_expr,
@@ -34,7 +33,7 @@ use datafusion::logical_expr::{
 use datafusion::prelude::{cast, sum, when, Column};
 use datafusion_util::{lit_dict, AsExpr};
 use generated_types::influxdata::iox::querier::v1::InfluxQlMetadata;
-use influxdb_influxql_parser::common::{LimitClause, OffsetClause};
+use influxdb_influxql_parser::common::{LimitClause, OffsetClause, OrderByClause};
 use influxdb_influxql_parser::explain::{ExplainOption, ExplainStatement};
 use influxdb_influxql_parser::expression::walk::walk_expr;
 use influxdb_influxql_parser::expression::{
@@ -419,9 +418,26 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         // The UNION operator indicates the result set produces multiple tables or measurements.
         let is_multiple_measurements = matches!(plan, LogicalPlan::Union(_));
 
+        // the sort planner node must refer to the time column using
+        // the alias that was specified
+        let time_alias = fields[0]
+            .alias
+            .as_ref()
+            .map(|id| id.deref().as_str())
+            .unwrap_or("time");
+
+        let time_sort_expr = time_alias.as_expr().sort(
+            match select.order_by {
+                // Default behaviour is to sort by time in ascending order if there is no ORDER BY
+                None | Some(OrderByClause::Ascending) => true,
+                Some(OrderByClause::Descending) => false,
+            },
+            false,
+        );
+
         let plan = plan_with_sort(
             plan,
-            vec![select.order_by.to_sort_expr()],
+            vec![time_sort_expr.clone()],
             is_multiple_measurements,
             &group_by_tag_set,
             &projection_tag_set,
@@ -431,7 +447,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             plan,
             select.offset,
             select.limit,
-            vec![select.order_by.to_sort_expr()],
+            vec![time_sort_expr],
             is_multiple_measurements,
             &group_by_tag_set,
             &projection_tag_set,
@@ -2649,6 +2665,23 @@ mod test {
     mod select {
         use super::*;
 
+        #[test]
+        fn test_time_column() {
+            // validate time column is explicitly projected
+            assert_snapshot!(plan("SELECT usage_idle, time FROM cpu"), @r###"
+            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), usage_idle:Float64;N]
+              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), usage_idle:Float64;N]
+                TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+            "###);
+
+            // validate time column may be aliased
+            assert_snapshot!(plan("SELECT usage_idle, time AS timestamp FROM cpu"), @r###"
+            Sort: timestamp ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), usage_idle:Float64;N]
+              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS timestamp, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), usage_idle:Float64;N]
+                TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+            "###);
+        }
+
         /// Tests for the `DISTINCT` clause and `DISTINCT` function
         #[test]
         fn test_distinct() {
@@ -3477,10 +3510,9 @@ mod test {
                 TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
             "###);
             assert_snapshot!(plan("SELECT time as timestamp, f64_field FROM data"), @r###"
-            Projection: iox::measurement, timestamp, f64_field [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N]
-              Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N, time:Timestamp(Nanosecond, None)]
-                Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS timestamp, data.f64_field AS f64_field, data.time [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N, time:Timestamp(Nanosecond, None)]
-                  TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+            Sort: timestamp ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS timestamp, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N]
+                TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
             "###);
             assert_snapshot!(plan("SELECT foo, f64_field FROM data"), @r###"
             Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N]
diff --git a/iox_query_influxql/src/plan/planner/select.rs b/iox_query_influxql/src/plan/planner/select.rs
index 0f03ca39c1..dd568c8983 100644
--- a/iox_query_influxql/src/plan/planner/select.rs
+++ b/iox_query_influxql/src/plan/planner/select.rs
@@ -5,7 +5,6 @@ use datafusion::logical_expr::utils::find_column_exprs;
 use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
 use datafusion_util::AsExpr;
 use generated_types::influxdata::iox::querier::v1::influx_ql_metadata::TagKeyColumn;
-use influxdb_influxql_parser::common::OrderByClause;
 use influxdb_influxql_parser::expression::{Expr as IQLExpr, VarRef, VarRefDataType};
 use influxdb_influxql_parser::select::Field;
 use schema::INFLUXQL_MEASUREMENT_COLUMN_NAME;
@@ -121,25 +120,6 @@ pub(super) fn plan_with_sort(
     LogicalPlanBuilder::from(plan).sort(series_sort)?.build()
 }
 
-/// Trait to convert the receiver to a [`Expr::Sort`] expression.
-pub(super) trait ToSortExpr {
-    /// Create a sort expression.
-    fn to_sort_expr(&self) -> Expr;
-}
-
-impl ToSortExpr for Option<OrderByClause> {
-    fn to_sort_expr(&self) -> Expr {
-        "time".as_expr().sort(
-            match self {
-                // Default behaviour is to sort by time in ascending order if there is no ORDER BY
-                None | Some(OrderByClause::Ascending) => true,
-                Some(OrderByClause::Descending) => false,
-            },
-            false,
-        )
-    }
-}
-
 /// Map the fields to DataFusion [`Expr::Column`] expressions, excluding those columns that
 /// are [`DataType::Null`]'s.
 pub(super) fn fields_to_exprs_no_nulls<'a>(

From 231e0f48ab82e0705cc97db3a2c4e445643f8c55 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Wed, 3 May 2023 14:46:51 +0200
Subject: [PATCH 008/119] test: add test for InfluxQL md queries w/ `FROM ""`
 (#7728)

See https://github.com/influxdata/idpe/issues/17559 .

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 .../cases/in/influxql_metadata.influxql           |  3 +++
 .../cases/in/influxql_metadata.influxql.expected  | 15 +++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql
index c8a44f8033..0cdf0f0048 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql
+++ b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql
@@ -36,6 +36,7 @@ SHOW FIELD KEYS FROM cpu,disk,cpu;
 SHOW FIELD KEYS FROM /m.*/;
 SHOW FIELD KEYS FROM /d\isk/;
 SHOW FIELD KEYS FROM does_not_exist;
+SHOW FIELD KEYS FROM "";
 
 -- unimplemented features in `SHOW FIELD KEYS`
 SHOW FIELD KEYS ON my_db;
@@ -59,6 +60,7 @@ SHOW TAG VALUES FROM m1,m0,m1 WITH KEY = "tag0";
 SHOW TAG VALUES FROM /m.*/ WITH KEY = "tag0";
 SHOW TAG VALUES FROM /d\isk/ WITH KEY = "device";
 SHOW TAG VALUES FROM does_not_exist WITH KEY = "tag0";
+SHOW TAG VALUES FROM "" WITH KEY = "tag0";
 SHOW TAG VALUES WITH KEY = "tt_tag";
 SHOW TAG VALUES WITH KEY = "tt_tag" WHERE time >= '1990-01-01T00:00:00Z';
 SHOW TAG VALUES WITH KEY = "tt_tag" WHERE time >= '2022-10-31T02:00:00Z';
@@ -83,6 +85,7 @@ SHOW TAG KEYS FROM cpu,disk,cpu;
 SHOW TAG KEYS FROM /m.*/;
 SHOW TAG KEYS FROM /d\isk/;
 SHOW TAG KEYS FROM does_not_exist;
+SHOW TAG KEYS FROM "";
 SHOW TAG KEYS FROM time_test WHERE time >= '1990-01-01T00:00:00Z';
 SHOW TAG KEYS FROM time_test WHERE time >= '2022-10-31T02:00:00Z';
 SHOW TAG KEYS FROM time_test WHERE time >= '1970-01-01T01:00:00Z';
diff --git a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected
index 1351c6e3ec..015ee6c829 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected
@@ -408,6 +408,11 @@ name: disk
 | fieldKey | fieldType |
 +----------+-----------+
 +----------+-----------+
+-- InfluxQL: SHOW FIELD KEYS FROM "";
++----------+-----------+
+| fieldKey | fieldType |
++----------+-----------+
++----------+-----------+
 -- InfluxQL: SHOW FIELD KEYS ON my_db;
 Error while planning query: This feature is not implemented: SHOW FIELD KEYS ON <database>
 -- InfluxQL: SHOW FIELD KEYS FROM x.my_db;
@@ -923,6 +928,11 @@ name: disk
 | key | value |
 +-----+-------+
 +-----+-------+
+-- InfluxQL: SHOW TAG VALUES FROM "" WITH KEY = "tag0";
++-----+-------+
+| key | value |
++-----+-------+
++-----+-------+
 -- InfluxQL: SHOW TAG VALUES WITH KEY = "tt_tag";
 name: time_test
 +--------+-------------------+
@@ -1284,6 +1294,11 @@ name: disk
 | tagKey |
 +--------+
 +--------+
+-- InfluxQL: SHOW TAG KEYS FROM "";
++--------+
+| tagKey |
++--------+
++--------+
 -- InfluxQL: SHOW TAG KEYS FROM time_test WHERE time >= '1990-01-01T00:00:00Z';
 name: time_test
 +--------------------------+

From dfa184e296ec92091e721dc579f81abfb44c6001 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 27 Feb 2023 14:12:21 -0500
Subject: [PATCH 009/119] fix: Make ingester UUID an expected, required field
 of IngesterPartition

---
 querier/src/ingester/mod.rs    | 44 ++++++++++++++--------------------
 querier/src/table/mod.rs       | 26 +++++++-------------
 querier/src/table/test_util.rs |  2 +-
 3 files changed, 28 insertions(+), 44 deletions(-)

diff --git a/querier/src/ingester/mod.rs b/querier/src/ingester/mod.rs
index 4344d8d84c..5339f2fbb0 100644
--- a/querier/src/ingester/mod.rs
+++ b/querier/src/ingester/mod.rs
@@ -567,16 +567,10 @@ impl IngesterStreamDecoder {
                 // columns that the sort key must cover.
                 let partition_sort_key = None;
 
-                let ingester_uuid = if md.ingester_uuid.is_empty() {
-                    // Using the write buffer path, no UUID specified
-                    None
-                } else {
-                    Some(
-                        Uuid::parse_str(&md.ingester_uuid).context(IngesterUuidSnafu {
-                            ingester_uuid: md.ingester_uuid,
-                        })?,
-                    )
-                };
+                let ingester_uuid =
+                    Uuid::parse_str(&md.ingester_uuid).context(IngesterUuidSnafu {
+                        ingester_uuid: md.ingester_uuid,
+                    })?;
 
                 let partition = IngesterPartition::new(
                     ingester_uuid,
@@ -767,17 +761,15 @@ impl IngesterConnection for IngesterConnectionImpl {
 /// more than one IngesterPartition for each table the ingester knows about.
 #[derive(Debug, Clone)]
 pub struct IngesterPartition {
-    /// If using ingester2/rpc write path, the ingester UUID will be present and will identify
-    /// whether this ingester has restarted since the last time it was queried or not.
-    ///
-    /// When we fully switch over to always using the RPC write path, the `Option` in this type can
-    /// be removed.
-    ingester_uuid: Option<Uuid>,
+    /// The ingester UUID that identifies whether this ingester has restarted since the last time
+    /// it was queried or not, which affects whether we can compare the
+    /// `completed_persistence_count` with a previous count for this ingester to know if we need
+    /// to refresh the catalog cache or not.
+    ingester_uuid: Uuid,
 
     partition_id: PartitionId,
 
-    /// If using ingester2/rpc write path, this will be the number of Parquet files this ingester
-    /// UUID has persisted for this partition.
+    /// The number of Parquet files this ingester UUID has persisted for this partition.
     completed_persistence_count: u64,
 
     /// Maximum sequence number of parquet files the ingester has
@@ -795,7 +787,7 @@ impl IngesterPartition {
     /// `RecordBatches` into the correct types
     #[allow(clippy::too_many_arguments)]
     pub fn new(
-        ingester_uuid: Option<Uuid>,
+        ingester_uuid: Uuid,
         partition_id: PartitionId,
         completed_persistence_count: u64,
         parquet_max_sequence_number: Option<SequenceNumber>,
@@ -860,7 +852,7 @@ impl IngesterPartition {
         Ok(self)
     }
 
-    pub(crate) fn ingester_uuid(&self) -> Option<Uuid> {
+    pub(crate) fn ingester_uuid(&self) -> Uuid {
         self.ingester_uuid
     }
 
@@ -1200,7 +1192,7 @@ mod tests {
         assert_eq!(p.partition_id.get(), 1);
         assert_eq!(p.parquet_max_sequence_number, None);
         assert_eq!(p.chunks.len(), 0);
-        assert_eq!(p.ingester_uuid.unwrap(), ingester_uuid);
+        assert_eq!(p.ingester_uuid, ingester_uuid);
         assert_eq!(p.completed_persistence_count, 5);
     }
 
@@ -1540,7 +1532,7 @@ mod tests {
         assert_eq!(partitions.len(), 3);
 
         let p1 = &partitions[0];
-        assert_eq!(p1.ingester_uuid.unwrap(), ingester_uuid1);
+        assert_eq!(p1.ingester_uuid, ingester_uuid1);
         assert_eq!(p1.completed_persistence_count, 0);
         assert_eq!(p1.partition_id.get(), 1);
         assert_eq!(
@@ -1549,7 +1541,7 @@ mod tests {
         );
 
         let p2 = &partitions[1];
-        assert_eq!(p2.ingester_uuid.unwrap(), ingester_uuid1);
+        assert_eq!(p2.ingester_uuid, ingester_uuid1);
         assert_eq!(p2.completed_persistence_count, 42);
         assert_eq!(p2.partition_id.get(), 2);
         assert_eq!(
@@ -1558,7 +1550,7 @@ mod tests {
         );
 
         let p3 = &partitions[2];
-        assert_eq!(p3.ingester_uuid.unwrap(), ingester_uuid2);
+        assert_eq!(p3.ingester_uuid, ingester_uuid2);
         assert_eq!(p3.completed_persistence_count, 9000);
         assert_eq!(p3.partition_id.get(), 3);
         assert_eq!(
@@ -1824,7 +1816,7 @@ mod tests {
             let parquet_max_sequence_number = None;
             // Construct a partition and ensure it doesn't error
             let ingester_partition = IngesterPartition::new(
-                Some(ingester_uuid),
+                ingester_uuid,
                 PartitionId::new(1),
                 0,
                 parquet_max_sequence_number,
@@ -1853,7 +1845,7 @@ mod tests {
 
         let parquet_max_sequence_number = None;
         let err = IngesterPartition::new(
-            Some(ingester_uuid),
+            ingester_uuid,
             PartitionId::new(1),
             0,
             parquet_max_sequence_number,
diff --git a/querier/src/table/mod.rs b/querier/src/table/mod.rs
index ddc8981693..90b489e76a 100644
--- a/querier/src/table/mod.rs
+++ b/querier/src/table/mod.rs
@@ -441,15 +441,13 @@ impl QuerierTable {
 // therefore needs to refresh its view of the catalog.
 fn collect_persisted_file_counts(
     capacity: usize,
-    partitions: impl Iterator<Item = (Option<Uuid>, u64)>,
+    partitions: impl Iterator<Item = (Uuid, u64)>,
 ) -> HashMap<Uuid, u64> {
     partitions.fold(
         HashMap::with_capacity(capacity),
         |mut map, (uuid, count)| {
-            if let Some(uuid) = uuid {
-                let sum = map.entry(uuid).or_default();
-                *sum += count;
-            }
+            let sum = map.entry(uuid).or_default();
+            *sum += count;
             map
         },
     )
@@ -481,19 +479,13 @@ mod tests {
             "Expected output to be empty, instead was: {output:?}"
         );
 
-        // If there's no UUIDs, don't count anything
-        let input = [(None, 10)];
+        let uuid1 = Uuid::new_v4();
+        let uuid2 = Uuid::new_v4();
+        let input = [(uuid1, 20), (uuid1, 22), (uuid2, 30)];
         let output = collect_persisted_file_counts(input.len(), input.into_iter());
-        assert!(
-            output.is_empty(),
-            "Expected output to be empty, instead was: {output:?}"
-        );
-
-        let uuid = Uuid::new_v4();
-        let input = [(Some(uuid), 20), (Some(uuid), 22), (None, 10)];
-        let output = collect_persisted_file_counts(input.len(), input.into_iter());
-        assert_eq!(output.len(), 1);
-        assert_eq!(*output.get(&uuid).unwrap(), 42);
+        assert_eq!(output.len(), 2);
+        assert_eq!(*output.get(&uuid1).unwrap(), 42);
+        assert_eq!(*output.get(&uuid2).unwrap(), 30);
     }
 
     #[tokio::test]
diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs
index ecf7f40873..f1b61b2554 100644
--- a/querier/src/table/test_util.rs
+++ b/querier/src/table/test_util.rs
@@ -107,7 +107,7 @@ impl IngesterPartitionBuilder {
         let data = self.lp.iter().map(|lp| lp_to_record_batch(lp)).collect();
 
         IngesterPartition::new(
-            Some(Uuid::new_v4()),
+            Uuid::new_v4(),
             self.partition.partition.id,
             0,
             parquet_max_sequence_number,

From 621caab2e92efb71dc50cd78d202511d2f45c9d3 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 27 Feb 2023 15:03:31 -0500
Subject: [PATCH 010/119] fix: Remove unused parquet_max_sequence_number
 metadata

---
 .../src/components/parquet_file_sink/mock.rs  |   5 +-
 .../parquet_file_sink/object_store.rs         |   6 +-
 compactor2_test_utils/src/simulator.rs        |   5 +-
 data_types/src/lib.rs                         |  23 --
 docs/ingester_querier_protocol.md             |  70 +++----
 garbage_collector/src/objectstore/checker.rs  |   3 +-
 .../iox/catalog/v1/parquet_file.proto         |   4 +-
 .../iox/ingester/v1/parquet_metadata.proto    |   6 +-
 .../influxdata/iox/ingester/v1/query.proto    |  19 +-
 .../aggregate_tsm_schema/update_catalog.rs    |   4 -
 ingester2/src/persist/mod.rs                  |   7 +-
 ingester2/src/persist/worker.rs               |   3 +-
 ingester2/src/server/grpc/query.rs            |  16 +-
 ingester2/src/test_util.rs                    |   1 -
 iox_catalog/src/interface.rs                  |  16 +-
 iox_catalog/src/kafkaless_transition.rs       |  16 +-
 iox_catalog/src/mem.rs                        |   4 +-
 iox_catalog/src/postgres.rs                   |  45 ++--
 iox_catalog/src/sqlite.rs                     |  51 ++---
 iox_tests/src/builders.rs                     |   4 +-
 iox_tests/src/catalog.rs                      |  17 +-
 parquet_file/src/metadata.rs                  |  12 +-
 parquet_file/src/serialize.rs                 |   3 +-
 parquet_file/src/storage.rs                   |   3 +-
 parquet_file/tests/metadata.rs                |   9 +-
 querier/src/cache/parquet_file.rs             |   4 +-
 querier/src/ingester/mod.rs                   | 198 ++----------------
 querier/src/namespace/query_access.rs         |   9 -
 querier/src/parquet/creation.rs               |   1 -
 querier/src/parquet/mod.rs                    |  11 +-
 querier/src/table/mod.rs                      |  34 +--
 .../src/table/state_reconciler/interface.rs   |  20 +-
 querier/src/table/test_util.rs                |  16 +-
 service_grpc_catalog/src/lib.rs               |   7 +-
 service_grpc_object_store/src/lib.rs          |   5 +-
 35 files changed, 138 insertions(+), 519 deletions(-)

diff --git a/compactor2/src/components/parquet_file_sink/mock.rs b/compactor2/src/components/parquet_file_sink/mock.rs
index f920270573..3166910e48 100644
--- a/compactor2/src/components/parquet_file_sink/mock.rs
+++ b/compactor2/src/components/parquet_file_sink/mock.rs
@@ -4,7 +4,7 @@ use std::{
 };
 
 use async_trait::async_trait;
-use data_types::{ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp};
+use data_types::{ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
 use datafusion::{
     arrow::{datatypes::SchemaRef, record_batch::RecordBatch},
     error::DataFusionError,
@@ -72,7 +72,6 @@ impl ParquetFileSink for MockParquetFileSink {
             table_id: partition.table.id,
             partition_id: partition.partition_id,
             object_store_id: Uuid::from_u128(guard.len() as u128),
-            max_sequence_number: SequenceNumber::new(0),
             min_time: Timestamp::new(0),
             max_time: Timestamp::new(0),
             file_size_bytes: 1,
@@ -168,7 +167,6 @@ mod tests {
                 table_id: TableId::new(3),
                 partition_id: PartitionId::new(1),
                 object_store_id: Uuid::from_u128(2),
-                max_sequence_number: SequenceNumber::new(0),
                 min_time: Timestamp::new(0),
                 max_time: Timestamp::new(0),
                 file_size_bytes: 1,
@@ -231,7 +229,6 @@ mod tests {
                 table_id: TableId::new(3),
                 partition_id: PartitionId::new(1),
                 object_store_id: Uuid::from_u128(0),
-                max_sequence_number: SequenceNumber::new(0),
                 min_time: Timestamp::new(0),
                 max_time: Timestamp::new(0),
                 file_size_bytes: 1,
diff --git a/compactor2/src/components/parquet_file_sink/object_store.rs b/compactor2/src/components/parquet_file_sink/object_store.rs
index c6102f25e0..16af1d3b96 100644
--- a/compactor2/src/components/parquet_file_sink/object_store.rs
+++ b/compactor2/src/components/parquet_file_sink/object_store.rs
@@ -1,7 +1,7 @@
 use std::{fmt::Display, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber};
+use data_types::{CompactionLevel, ParquetFileParams};
 use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
 use iox_time::{Time, TimeProvider};
 use parquet_file::{
@@ -15,9 +15,6 @@ use crate::partition_info::PartitionInfo;
 
 use super::ParquetFileSink;
 
-// fields no longer used but still exists in the catalog
-const MAX_SEQUENCE_NUMBER: i64 = 0;
-
 #[derive(Debug)]
 pub struct ObjectStoreParquetFileSink {
     store: ParquetStorage,
@@ -57,7 +54,6 @@ impl ParquetFileSink for ObjectStoreParquetFileSink {
             table_name: partition.table.name.clone().into(),
             partition_id: partition.partition_id,
             partition_key: partition.partition_key.clone(),
-            max_sequence_number: SequenceNumber::new(MAX_SEQUENCE_NUMBER),
             compaction_level: level,
             sort_key: partition.sort_key.clone(),
             max_l0_created_at,
diff --git a/compactor2_test_utils/src/simulator.rs b/compactor2_test_utils/src/simulator.rs
index 35bfbdd6f5..ce1620bf5f 100644
--- a/compactor2_test_utils/src/simulator.rs
+++ b/compactor2_test_utils/src/simulator.rs
@@ -7,9 +7,7 @@ use std::{
 };
 
 use async_trait::async_trait;
-use data_types::{
-    ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, SequenceNumber, Timestamp,
-};
+use data_types::{ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, Timestamp};
 use datafusion::physical_plan::SendableRecordBatchStream;
 use iox_time::Time;
 use observability_deps::tracing::info;
@@ -206,7 +204,6 @@ impl SimulatedFile {
             table_id: partition_info.table.id,
             partition_id: partition_info.partition_id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(0),
             min_time,
             max_time,
             file_size_bytes,
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 30cf7775b8..4bdd34d3dc 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -830,23 +830,6 @@ pub struct Partition {
     /// relative order of B and C have been reversed.
     pub sort_key: Vec<String>,
 
-    /// The inclusive maximum [`SequenceNumber`] of the most recently persisted
-    /// data for this partition.
-    ///
-    /// All writes with a [`SequenceNumber`] less than and equal to this
-    /// [`SequenceNumber`] have been persisted to the object store. The inverse
-    /// is not guaranteed to be true due to update ordering; some files for this
-    /// partition may exist in the `parquet_files` table that have a greater
-    /// [`SequenceNumber`] than is specified here - the system will converge so
-    /// long as the ingester progresses.
-    ///
-    /// It is a system invariant that this value monotonically increases over
-    /// time - wrote another way, it is an invariant that partitions are
-    /// persisted (or at least made visible) in sequence order.
-    ///
-    /// If [`None`] no data has been persisted for this partition.
-    pub persisted_sequence_number: Option<SequenceNumber>,
-
     /// The time at which the newest file of the partition is created
     pub new_file_at: Option<Timestamp>,
 }
@@ -945,8 +928,6 @@ pub struct ParquetFile {
     pub partition_id: PartitionId,
     /// the uuid used in the object store path for this file
     pub object_store_id: Uuid,
-    /// the maximum sequence number from a record in this file
-    pub max_sequence_number: SequenceNumber,
     /// the min timestamp of data in this file
     pub min_time: Timestamp,
     /// the max timestamp of data in this file
@@ -1003,7 +984,6 @@ impl ParquetFile {
             table_id: params.table_id,
             partition_id: params.partition_id,
             object_store_id: params.object_store_id,
-            max_sequence_number: params.max_sequence_number,
             min_time: params.min_time,
             max_time: params.max_time,
             to_delete: None,
@@ -1044,8 +1024,6 @@ pub struct ParquetFileParams {
     pub partition_id: PartitionId,
     /// the uuid used in the object store path for this file
     pub object_store_id: Uuid,
-    /// the maximum sequence number from a record in this file
-    pub max_sequence_number: SequenceNumber,
     /// the min timestamp of data in this file
     pub min_time: Timestamp,
     /// the max timestamp of data in this file
@@ -1071,7 +1049,6 @@ impl From<ParquetFile> for ParquetFileParams {
             table_id: value.table_id,
             partition_id: value.partition_id,
             object_store_id: value.object_store_id,
-            max_sequence_number: value.max_sequence_number,
             min_time: value.min_time,
             max_time: value.max_time,
             file_size_bytes: value.file_size_bytes,
diff --git a/docs/ingester_querier_protocol.md b/docs/ingester_querier_protocol.md
index d562abd1d1..427ded15ec 100644
--- a/docs/ingester_querier_protocol.md
+++ b/docs/ingester_querier_protocol.md
@@ -3,8 +3,8 @@ This document describes the query protocol that the querier uses to request data
 
 The protocol is based on [Apache Flight]. We however only support a single request type: `DoGet`.
 
-
 ## Request (Querier ⇒ Ingester)
+
 The `DoGet` ticket contains a [Protocol Buffer] message
 `influxdata.iox.ingester.v1.IngesterQueryRequest` (see our `generated_types` crate). This message
 contains:
@@ -16,20 +16,20 @@ contains:
   the request and the ingester data).
 - **predicate:** Predicate for row-filtering on the ingester side.
 
-The request does NOT contain a selection of partitions or shards. The ingester must respond with
-all partitions and shards it knows for that specified namespace-table combination.
+The request does NOT contain a selection of partitions. The ingester must respond with all
+partitions it knows for that specified namespace-table combination.
 
 ## Response (Ingester ⇒ Querier)
+
 The goal of the response is to stream the following ingester data hierarchy:
 
-- For each shard:
-  - For each partition **(A)**:
-    - Persistence Information:
-      - Sequence number of max. persisted parquet file
-    - For each snapshot (contains _persisting_ data) **(B)**:
-      - Record batches with following operations applied **(C)**:
-        - selection (i.e. row filter via predicates)
-        - projection (i.e. column filter)
+- For each partition **(A)**:
+- Persistence Information:
+  - Sequence number of max. persisted parquet file
+- For each snapshot (contains _persisting_ data) **(B)**:
+  - Record batches with following operations applied **(C)**:
+    - selection (i.e. row filter via predicates)
+    - projection (i.e. column filter)
 
 This is mapped to the following stream of Flight messages:
 
@@ -37,35 +37,36 @@ This is mapped to the following stream of Flight messages:
   `influxdata.iox.ingester.v1.IngesterQueryResponseMetadata`. This message contains:
   - partition id
   - Sequence number of max. persisted parquet file
-- **B:** `Schema` message that announces the snapshot schema. No app metadata is attached. The snapshot belongs to the
-  partition that was just announced. Transmitting a schema resets the dictionary information.
-- **Between B and C:** `DictionaryBatch` messages that set the dictionary information for the next record batch.
-- **C:** `RecordBatch` message that uses the last schema and the current dictionary state. The batch belongs to the
-  snapshot that was just announced.
+- **B:** `Schema` message that announces the snapshot schema. No app metadata is attached. The
+  snapshot belongs to the partition that was just announced. Transmitting a schema resets the
+  dictionary information.
+- **Between B and C:** `DictionaryBatch` messages that set the dictionary information for the next
+  record batch.
+- **C:** `RecordBatch` message that uses the last schema and the current dictionary state. The
+  batch belongs to the snapshot that was just announced.
 
-The protocol is stateful and therefore the order of the messages is important. A specific partition and snapshot may only
-be announced once.
-
-All other messages types (at the time of writing these are `Tensor` and `SparseTensor`) are unsupported.
+The protocol is stateful and therefore the order of the messages is important. A specific partition
+and snapshot may only be announced once.
 
+All other messages types (at the time of writing these are `Tensor` and `SparseTensor`) are
+unsupported.
 
 ## Example
+
 Imagine the following ingester state:
 
-- shard S1:
-  - partition P1:
-    - max. persisted parquet file at `sequence_number=10`
-    - snapshots C1 and C2
-  - partition P2:
-    - max. persisted parquet file at `sequence_number=1`
-    - snapshot C3
-- shard S2:
-  - partition P3:
-    - no persisted parquet file
-    - no snapshots (all deleted)
-  - partition P4:
-    - no persisted parquet file
-    - snapshot C4
+- partition P1:
+  - max. persisted parquet file at `sequence_number=10`
+  - snapshots C1 and C2
+- partition P2:
+  - max. persisted parquet file at `sequence_number=1`
+  - snapshot C3
+- partition P3:
+  - no persisted parquet file
+  - no snapshots (all deleted)
+- partition P4:
+  - no persisted parquet file
+  - snapshot C4
 
 This results in the following response stream:
 
@@ -89,6 +90,5 @@ This results in the following response stream:
 
 Note that P3 was skipped because there was no unpersisted data.
 
-
 [Apache Flight]: https://arrow.apache.org/docs/Format/Flight.html
 [Protocol Buffer]: https://developers.google.com/protocol-buffers
diff --git a/garbage_collector/src/objectstore/checker.rs b/garbage_collector/src/objectstore/checker.rs
index af58c73b55..d9b75d1188 100644
--- a/garbage_collector/src/objectstore/checker.rs
+++ b/garbage_collector/src/objectstore/checker.rs
@@ -138,7 +138,7 @@ mod tests {
     use chrono::TimeZone;
     use data_types::{
         ColumnId, ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileParams,
-        PartitionId, SequenceNumber, TableId, Timestamp,
+        PartitionId, TableId, Timestamp,
     };
     use iox_catalog::{interface::Catalog, mem::MemCatalog};
     use object_store::path::Path;
@@ -178,7 +178,6 @@ mod tests {
             table_id: partition.table_id,
             partition_id: partition.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(140),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(10),
             file_size_bytes: 1337,
diff --git a/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto b/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto
index 6320e9d4bd..a7be9b84e8 100644
--- a/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto
+++ b/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto
@@ -9,6 +9,8 @@ message ParquetFile {
     reserved "sequencer_id";
     reserved 17;
     reserved "shard_id";
+    reserved 8;
+    reserved "max_sequence_number";
 
     // the id of the file in the catalog
     int64 id = 1;
@@ -20,8 +22,6 @@ message ParquetFile {
     int64 partition_id = 5;
     // the object store uuid
     string object_store_id = 6;
-    // the maximum sequence number from a record in this file
-    int64 max_sequence_number = 8;
     // the min timestamp of data in this file
     int64 min_time = 9;
     // the max timestamp of data in this file
diff --git a/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto b/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto
index 36f9c45543..12de13cc22 100644
--- a/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto
+++ b/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto
@@ -19,6 +19,9 @@ message IoxMetadata {
   // shard_id was removed
   reserved 17;
   reserved "shard_id";
+  // max_sequence_number was removed
+  reserved 13;
+  reserved "max_sequence_number";
 
   // Object store ID. Used in the parquet filename. 16 bytes in big-endian order.
   bytes object_store_id = 1;
@@ -44,9 +47,6 @@ message IoxMetadata {
   // Partition key of the partition that holds this parquet file.
   string partition_key = 9;
 
-  // The maximum sequence number from a shard in this parquet file.
-  int64 max_sequence_number = 13;
-
   // The sort key of this chunk
   SortKey sort_key = 15;
 
diff --git a/generated_types/protos/influxdata/iox/ingester/v1/query.proto b/generated_types/protos/influxdata/iox/ingester/v1/query.proto
index e99effdf01..c4597b2a94 100644
--- a/generated_types/protos/influxdata/iox/ingester/v1/query.proto
+++ b/generated_types/protos/influxdata/iox/ingester/v1/query.proto
@@ -73,10 +73,9 @@ message IngesterQueryResponseMetadata {
   // Partition id for this batch.
   int64 partition_id = 7;
 
-  // Optional partition status.
-  //
-  // If this is given, then no schema and no batch will be part of this FlightData object.
-  PartitionStatus status = 8;
+  // Was partition status.
+  reserved "status";
+  reserved 8;
 
   // UUID of this ingester instance.
   string ingester_uuid = 9;
@@ -85,18 +84,6 @@ message IngesterQueryResponseMetadata {
   uint64 completed_persistence_count = 10;
 }
 
-// Status of a partition that has unpersisted data.
-//
-// Note that this structure is specific to a partition (which itself is bound to a table and shard)!
-message PartitionStatus {
-  // Max sequence number persisted
-  optional int64 parquet_max_sequence_number = 1;
-
-  // Deprecated tombstone support in ingester (#5825).
-  reserved "tombstone_max_sequence_number";
-  reserved 2;
-}
-
 // Serialization of `predicate::predicate::Predicate` that contains DataFusion `Expr`s
 message Predicate {
   // Optional field restriction. If any are present, restricts the results to only tables which
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index 78caa606fe..024bb03b77 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -1023,7 +1023,6 @@ mod tests {
         let partition = Partition {
             id: PartitionId::new(1),
             table_id: TableId::new(1),
-            persisted_sequence_number: None,
             partition_key: PartitionKey::from("2022-06-21"),
             sort_key: Vec::new(),
             new_file_at: None,
@@ -1070,7 +1069,6 @@ mod tests {
         let partition = Partition {
             id: PartitionId::new(1),
             table_id: TableId::new(1),
-            persisted_sequence_number: None,
             partition_key: PartitionKey::from("2022-06-21"),
             // N.B. sort key is already what it will computed to; here we're testing the `adjust_sort_key_columns` code path
             sort_key: vec!["host".to_string(), "arch".to_string(), "time".to_string()],
@@ -1117,7 +1115,6 @@ mod tests {
         let partition = Partition {
             id: PartitionId::new(1),
             table_id: TableId::new(1),
-            persisted_sequence_number: None,
             partition_key: PartitionKey::from("2022-06-21"),
             // N.B. is missing host so will need updating
             sort_key: vec!["arch".to_string(), "time".to_string()],
@@ -1166,7 +1163,6 @@ mod tests {
         let partition = Partition {
             id: PartitionId::new(1),
             table_id: TableId::new(1),
-            persisted_sequence_number: None,
             partition_key: PartitionKey::from("2022-06-21"),
             // N.B. is missing arch so will need updating
             sort_key: vec!["host".to_string(), "time".to_string()],
diff --git a/ingester2/src/persist/mod.rs b/ingester2/src/persist/mod.rs
index e9fa51cc3a..d841201197 100644
--- a/ingester2/src/persist/mod.rs
+++ b/ingester2/src/persist/mod.rs
@@ -15,7 +15,7 @@ mod tests {
     use std::{sync::Arc, time::Duration};
 
     use assert_matches::assert_matches;
-    use data_types::{CompactionLevel, ParquetFile, SequenceNumber};
+    use data_types::{CompactionLevel, ParquetFile};
     use dml::DmlOperation;
     use futures::TryStreamExt;
     use iox_catalog::{
@@ -252,7 +252,6 @@ mod tests {
                 table_id: got_table_id,
                 partition_id: got_partition_id,
                 object_store_id,
-                max_sequence_number,
                 row_count,
                 compaction_level,
                 file_size_bytes,
@@ -266,7 +265,6 @@ mod tests {
                 assert_eq!(got_namespace_id, namespace_id);
                 assert_eq!(got_table_id, table_id);
                 assert_eq!(got_partition_id, partition_id);
-                assert_eq!(max_sequence_number, SequenceNumber::new(0));
 
                 assert_eq!(row_count, 1);
                 assert_eq!(compaction_level, CompactionLevel::Initial);
@@ -402,7 +400,6 @@ mod tests {
                 table_id: got_table_id,
                 partition_id: got_partition_id,
                 object_store_id,
-                max_sequence_number,
                 row_count,
                 compaction_level,
                 file_size_bytes,
@@ -420,8 +417,6 @@ mod tests {
                 assert_eq!(row_count, 1);
                 assert_eq!(compaction_level, CompactionLevel::Initial);
 
-                assert_eq!(max_sequence_number.get(), 0); // Unused, dummy value
-
                 (object_store_id, file_size_bytes)
             }
         );
diff --git a/ingester2/src/persist/worker.rs b/ingester2/src/persist/worker.rs
index 17877d2b80..ee69bcce1b 100644
--- a/ingester2/src/persist/worker.rs
+++ b/ingester2/src/persist/worker.rs
@@ -2,7 +2,7 @@ use std::{ops::ControlFlow, sync::Arc};
 
 use async_channel::RecvError;
 use backoff::Backoff;
-use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber};
+use data_types::{CompactionLevel, ParquetFileParams};
 use iox_catalog::interface::{get_table_schema_by_id, CasFailure, Catalog};
 use iox_query::exec::Executor;
 use iox_time::{SystemProvider, TimeProvider};
@@ -263,7 +263,6 @@ where
         table_name: Arc::clone(&*ctx.table_name().get().await),
         partition_id: ctx.partition_id(),
         partition_key: ctx.partition_key().clone(),
-        max_sequence_number: SequenceNumber::new(0), // TODO: not ordered!
         compaction_level: CompactionLevel::Initial,
         sort_key: Some(data_sort_key),
         max_l0_created_at: time_now,
diff --git a/ingester2/src/server/grpc/query.rs b/ingester2/src/server/grpc/query.rs
index 4ea7f2551f..de013c7406 100644
--- a/ingester2/src/server/grpc/query.rs
+++ b/ingester2/src/server/grpc/query.rs
@@ -9,7 +9,7 @@ use arrow_flight::{
 use data_types::{NamespaceId, PartitionId, TableId};
 use flatbuffers::FlatBufferBuilder;
 use futures::{Stream, StreamExt, TryStreamExt};
-use generated_types::influxdata::iox::ingester::v1::{self as proto, PartitionStatus};
+use generated_types::influxdata::iox::ingester::v1 as proto;
 use metric::U64Counter;
 use observability_deps::tracing::*;
 use prost::Message;
@@ -259,8 +259,6 @@ where
 fn encode_partition(
     // Partition ID.
     partition_id: PartitionId,
-    // Partition persistence status.
-    status: PartitionStatus,
     // Count of persisted Parquet files for the [`PartitionData`] instance this
     // [`PartitionResponse`] was generated from.
     //
@@ -272,9 +270,6 @@ fn encode_partition(
     let mut bytes = bytes::BytesMut::new();
     let app_metadata = proto::IngesterQueryResponseMetadata {
         partition_id: partition_id.get(),
-        status: Some(proto::PartitionStatus {
-            parquet_max_sequence_number: status.parquet_max_sequence_number,
-        }),
         ingester_uuid: ingester_id.to_string(),
         completed_persistence_count,
     };
@@ -312,14 +307,7 @@ fn encode_response(
         let partition_id = partition.id();
         let completed_persistence_count = partition.completed_persistence_count();
         let head = futures::stream::once(async move {
-            encode_partition(
-                partition_id,
-                PartitionStatus {
-                    parquet_max_sequence_number: None,
-                },
-                completed_persistence_count,
-                ingester_id,
-            )
+            encode_partition(partition_id, completed_persistence_count, ingester_id)
         });
 
         match partition.into_record_batch_stream() {
diff --git a/ingester2/src/test_util.rs b/ingester2/src/test_util.rs
index aa5a925ecc..140acefcb0 100644
--- a/ingester2/src/test_util.rs
+++ b/ingester2/src/test_util.rs
@@ -126,7 +126,6 @@ pub(crate) fn arbitrary_partition() -> Partition {
         table_id: ARBITRARY_TABLE_ID,
         partition_key: ARBITRARY_PARTITION_KEY.clone(),
         sort_key: Default::default(),
-        persisted_sequence_number: Default::default(),
         new_file_at: Default::default(),
     }
 }
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 7a23d4a866..1b8742a6cd 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -792,7 +792,7 @@ pub(crate) mod test_helpers {
     use super::*;
     use ::test_helpers::{assert_contains, tracing::TracingCapture};
     use assert_matches::assert_matches;
-    use data_types::{ColumnId, ColumnSet, CompactionLevel, SequenceNumber};
+    use data_types::{ColumnId, ColumnSet, CompactionLevel};
     use futures::Future;
     use metric::{Attributes, DurationHistogram, Metric};
     use std::{collections::BTreeSet, ops::DerefMut, sync::Arc, time::Duration};
@@ -1803,7 +1803,6 @@ pub(crate) mod test_helpers {
             table_id: partition.table_id,
             partition_id: partition.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(140),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(10),
             file_size_bytes: 1337,
@@ -1839,7 +1838,6 @@ pub(crate) mod test_helpers {
             table_id: other_partition.table_id,
             partition_id: other_partition.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(200),
             min_time: Timestamp::new(50),
             max_time: Timestamp::new(60),
             ..parquet_file_params.clone()
@@ -1987,7 +1985,6 @@ pub(crate) mod test_helpers {
             object_store_id: Uuid::new_v4(),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(10),
-            max_sequence_number: SequenceNumber::new(10),
             ..parquet_file_params
         };
         let f1 = repos
@@ -2000,7 +1997,6 @@ pub(crate) mod test_helpers {
             object_store_id: Uuid::new_v4(),
             min_time: Timestamp::new(50),
             max_time: Timestamp::new(60),
-            max_sequence_number: SequenceNumber::new(11),
             ..f1_params.clone()
         };
         let f2 = repos
@@ -2019,7 +2015,6 @@ pub(crate) mod test_helpers {
             object_store_id: Uuid::new_v4(),
             min_time: Timestamp::new(50),
             max_time: Timestamp::new(60),
-            max_sequence_number: SequenceNumber::new(12),
             ..f2_params
         };
         let f3 = repos
@@ -2223,7 +2218,6 @@ pub(crate) mod test_helpers {
             table_id: table_1.id,
             partition_id: partition_1.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(140),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(10),
             file_size_bytes: 1337,
@@ -2238,7 +2232,6 @@ pub(crate) mod test_helpers {
             table_id: table_2.id,
             partition_id: partition_2.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(140),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(10),
             file_size_bytes: 1337,
@@ -2327,7 +2320,6 @@ pub(crate) mod test_helpers {
             table_id: partition1.table_id,
             partition_id: partition1.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(140),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(10),
             file_size_bytes: 1337,
@@ -2683,7 +2675,6 @@ pub(crate) mod test_helpers {
             table_id: partition.table_id,
             partition_id: partition.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(140),
             min_time,
             max_time,
             file_size_bytes: 1337,
@@ -2794,7 +2785,6 @@ pub(crate) mod test_helpers {
             table_id: partition.table_id,
             partition_id: partition.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(140),
             min_time: query_min_time + 1,
             max_time: query_max_time - 1,
             file_size_bytes: 1337,
@@ -2884,7 +2874,6 @@ pub(crate) mod test_helpers {
             table_id: partition_1.table_id,
             partition_id: partition_1.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(1),
             min_time: Timestamp::new(100),
             max_time: Timestamp::new(250),
             file_size_bytes: 1337,
@@ -2901,7 +2890,6 @@ pub(crate) mod test_helpers {
             .unwrap();
         let parquet_file_params_2 = ParquetFileParams {
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(3),
             min_time: Timestamp::new(200),
             max_time: Timestamp::new(300),
             ..parquet_file_params
@@ -2941,7 +2929,6 @@ pub(crate) mod test_helpers {
             table_id: partition_2.table_id,
             partition_id: partition_2.id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(1),
             min_time: Timestamp::new(100),
             max_time: Timestamp::new(250),
             file_size_bytes: 1337,
@@ -2958,7 +2945,6 @@ pub(crate) mod test_helpers {
             .unwrap();
         let parquet_file_params_2 = ParquetFileParams {
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(3),
             min_time: Timestamp::new(200),
             max_time: Timestamp::new(300),
             ..parquet_file_params
diff --git a/iox_catalog/src/kafkaless_transition.rs b/iox_catalog/src/kafkaless_transition.rs
index c290507322..408848b794 100644
--- a/iox_catalog/src/kafkaless_transition.rs
+++ b/iox_catalog/src/kafkaless_transition.rs
@@ -1,4 +1,4 @@
-use data_types::{SequenceNumber, TopicId};
+use data_types::TopicId;
 
 /// Magic number to be used shard indices and shard ids in "kafkaless".
 pub(crate) const TRANSITION_SHARD_NUMBER: i32 = 1234;
@@ -66,18 +66,4 @@ pub(crate) struct Shard {
     /// the shard index of the shard the sequence numbers are coming from, sharded by the router
     /// and write buffer
     pub(crate) shard_index: ShardIndex,
-    /// The minimum unpersisted sequence number. Because different tables
-    /// can be persisted at different times, it is possible some data has been persisted
-    /// with a higher sequence number than this. However, all data with a sequence number
-    /// lower than this must have been persisted to Parquet.
-    pub(crate) min_unpersisted_sequence_number: SequenceNumber,
-}
-
-/// Shard index plus offset
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub(crate) struct Sequence {
-    /// The shard index
-    pub(crate) shard_index: ShardIndex,
-    /// The sequence number
-    pub(crate) sequence_number: SequenceNumber,
 }
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index e0e40a3eef..e9de4af7be 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -16,7 +16,7 @@ use async_trait::async_trait;
 use data_types::{
     Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
     ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
-    SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
+    SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
 };
 use iox_time::{SystemProvider, TimeProvider};
 use observability_deps::tracing::warn;
@@ -149,7 +149,6 @@ impl Catalog for MemCatalog {
             id: TRANSITION_SHARD_ID,
             topic_id: topic.id,
             shard_index: TRANSITION_SHARD_INDEX,
-            min_unpersisted_sequence_number: SequenceNumber::new(0),
         };
         stage.shards.push(shard);
         transaction.commit_inplace().await?;
@@ -701,7 +700,6 @@ impl PartitionRepo for MemTxn {
                     table_id,
                     partition_key: key,
                     sort_key: vec![],
-                    persisted_sequence_number: None,
                     new_file_at: None,
                 };
                 stage.partitions.push(p);
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 9550965ce0..5951d75958 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -1097,7 +1097,7 @@ VALUES
     ( $1, $2, $3, '{}')
 ON CONFLICT ON CONSTRAINT partition_key_unique
 DO UPDATE SET partition_key = partition.partition_key
-RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
+RETURNING id, table_id, partition_key, sort_key, new_file_at;
         "#,
         )
         .bind(key) // $1
@@ -1119,7 +1119,7 @@ RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_
     async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>> {
         let rec = sqlx::query_as::<_, Partition>(
             r#"
-SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
+SELECT id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE id = $1;
         "#,
@@ -1140,7 +1140,7 @@ WHERE id = $1;
     async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
         sqlx::query_as::<_, Partition>(
             r#"
-SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
+SELECT id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE table_id = $1;
             "#,
@@ -1181,7 +1181,7 @@ WHERE table_id = $1;
 UPDATE partition
 SET sort_key = $1
 WHERE id = $2 AND sort_key = $3
-RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
+RETURNING id, table_id, partition_key, sort_key, new_file_at;
         "#,
         )
         .bind(new_sort_key) // $1
@@ -1358,7 +1358,6 @@ impl ParquetFileRepo for PostgresTxn {
             table_id,
             partition_id,
             object_store_id,
-            max_sequence_number,
             min_time,
             max_time,
             file_size_bytes,
@@ -1373,12 +1372,12 @@ impl ParquetFileRepo for PostgresTxn {
             r#"
 INSERT INTO parquet_file (
     shard_id, table_id, partition_id, object_store_id,
-    max_sequence_number, min_time, max_time, file_size_bytes,
+    min_time, max_time, file_size_bytes,
     row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at )
-VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 )
+VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 )
 RETURNING
     id, table_id, partition_id, object_store_id,
-    max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+    min_time, max_time, to_delete, file_size_bytes,
     row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at;
         "#,
         )
@@ -1386,16 +1385,15 @@ RETURNING
         .bind(table_id) // $2
         .bind(partition_id) // $3
         .bind(object_store_id) // $4
-        .bind(max_sequence_number) // $5
-        .bind(min_time) // $6
-        .bind(max_time) // $7
-        .bind(file_size_bytes) // $8
-        .bind(row_count) // $9
-        .bind(compaction_level) // $10
-        .bind(created_at) // $11
-        .bind(namespace_id) // $12
-        .bind(column_set) // $13
-        .bind(max_l0_created_at) // $14
+        .bind(min_time) // $5
+        .bind(max_time) // $6
+        .bind(file_size_bytes) // $7
+        .bind(row_count) // $8
+        .bind(compaction_level) // $9
+        .bind(created_at) // $10
+        .bind(namespace_id) // $11
+        .bind(column_set) // $12
+        .bind(max_l0_created_at) // $13
         .fetch_one(&mut self.inner)
         .await
         .map_err(|e| {
@@ -1462,7 +1460,7 @@ RETURNING id;
             r#"
 SELECT parquet_file.id, parquet_file.namespace_id,
        parquet_file.table_id, parquet_file.partition_id, parquet_file.object_store_id,
-       parquet_file.max_sequence_number, parquet_file.min_time,
+       parquet_file.min_time,
        parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes,
        parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at,
        parquet_file.column_set, parquet_file.max_l0_created_at
@@ -1482,7 +1480,7 @@ WHERE table_name.namespace_id = $1
         sqlx::query_as::<_, ParquetFile>(
             r#"
 SELECT id, namespace_id, table_id, partition_id, object_store_id,
-       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       min_time, max_time, to_delete, file_size_bytes,
        row_count, compaction_level, created_at, column_set, max_l0_created_at
 FROM parquet_file
 WHERE table_id = $1 AND to_delete IS NULL;
@@ -1544,7 +1542,7 @@ RETURNING id;
         sqlx::query_as::<_, ParquetFile>(
             r#"
 SELECT id, namespace_id, table_id, partition_id, object_store_id,
-       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       min_time, max_time, to_delete, file_size_bytes,
        row_count, compaction_level, created_at, column_set, max_l0_created_at
 FROM parquet_file
 WHERE parquet_file.partition_id = $1
@@ -1612,7 +1610,7 @@ RETURNING id;
         let rec = sqlx::query_as::<_, ParquetFile>(
             r#"
 SELECT id, namespace_id, table_id, partition_id, object_store_id,
-       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       min_time, max_time, to_delete, file_size_bytes,
        row_count, compaction_level, created_at, column_set, max_l0_created_at
 FROM parquet_file
 WHERE object_store_id = $1;
@@ -1670,7 +1668,7 @@ mod tests {
     use super::*;
     use crate::create_or_get_default_records;
     use assert_matches::assert_matches;
-    use data_types::{ColumnId, ColumnSet, SequenceNumber};
+    use data_types::{ColumnId, ColumnSet};
     use metric::{Attributes, DurationHistogram, Metric};
     use rand::Rng;
     use sqlx::migrate::MigrateDatabase;
@@ -2209,7 +2207,6 @@ mod tests {
             table_id,
             partition_id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(100),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(5),
             file_size_bytes: 1337,
diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs
index ab8a9e0075..d9ffa2fceb 100644
--- a/iox_catalog/src/sqlite.rs
+++ b/iox_catalog/src/sqlite.rs
@@ -15,7 +15,7 @@ use async_trait::async_trait;
 use data_types::{
     Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
     ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
-    SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
+    SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
 };
 use serde::{Deserialize, Serialize};
 use std::ops::Deref;
@@ -871,7 +871,6 @@ struct PartitionPod {
     table_id: TableId,
     partition_key: PartitionKey,
     sort_key: Json<Vec<String>>,
-    persisted_sequence_number: Option<SequenceNumber>,
     new_file_at: Option<Timestamp>,
 }
 
@@ -882,7 +881,6 @@ impl From<PartitionPod> for Partition {
             table_id: value.table_id,
             partition_key: value.partition_key,
             sort_key: value.sort_key.0,
-            persisted_sequence_number: value.persisted_sequence_number,
             new_file_at: value.new_file_at,
         }
     }
@@ -903,7 +901,7 @@ VALUES
     ( $1, $2, $3, '[]')
 ON CONFLICT (table_id, partition_key)
 DO UPDATE SET partition_key = partition.partition_key
-RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
+RETURNING id, table_id, partition_key, sort_key, new_file_at;
         "#,
         )
         .bind(key) // $1
@@ -925,7 +923,7 @@ RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_
     async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>> {
         let rec = sqlx::query_as::<_, PartitionPod>(
             r#"
-SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
+SELECT id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE id = $1;
             "#,
@@ -946,7 +944,7 @@ WHERE id = $1;
     async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
         Ok(sqlx::query_as::<_, PartitionPod>(
             r#"
-SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
+SELECT id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE table_id = $1;
             "#,
@@ -990,7 +988,7 @@ WHERE table_id = $1;
 UPDATE partition
 SET sort_key = $1
 WHERE id = $2 AND sort_key = $3
-RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at;
+RETURNING id, table_id, partition_key, sort_key, new_file_at;
         "#,
         )
         .bind(Json(new_sort_key)) // $1
@@ -1129,7 +1127,7 @@ RETURNING *
     async fn most_recent_n(&mut self, n: usize) -> Result<Vec<Partition>> {
         Ok(sqlx::query_as::<_, PartitionPod>(
             r#"
-SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at
+SELECT id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 ORDER BY id DESC
 LIMIT $1;
@@ -1185,7 +1183,6 @@ struct ParquetFilePod {
     table_id: TableId,
     partition_id: PartitionId,
     object_store_id: Uuid,
-    max_sequence_number: SequenceNumber,
     min_time: Timestamp,
     max_time: Timestamp,
     to_delete: Option<Timestamp>,
@@ -1205,7 +1202,6 @@ impl From<ParquetFilePod> for ParquetFile {
             table_id: value.table_id,
             partition_id: value.partition_id,
             object_store_id: value.object_store_id,
-            max_sequence_number: value.max_sequence_number,
             min_time: value.min_time,
             max_time: value.max_time,
             to_delete: value.to_delete,
@@ -1227,7 +1223,6 @@ impl ParquetFileRepo for SqliteTxn {
             table_id,
             partition_id,
             object_store_id,
-            max_sequence_number,
             min_time,
             max_time,
             file_size_bytes,
@@ -1242,12 +1237,12 @@ impl ParquetFileRepo for SqliteTxn {
             r#"
 INSERT INTO parquet_file (
     shard_id, table_id, partition_id, object_store_id,
-    max_sequence_number, min_time, max_time, file_size_bytes,
+    min_time, max_time, file_size_bytes,
     row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at )
-VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 )
+VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 )
 RETURNING
     id, table_id, partition_id, object_store_id,
-    max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+    min_time, max_time, to_delete, file_size_bytes,
     row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at;
         "#,
         )
@@ -1255,16 +1250,15 @@ RETURNING
         .bind(table_id) // $2
         .bind(partition_id) // $3
         .bind(object_store_id) // $4
-        .bind(max_sequence_number) // $5
-        .bind(min_time) // $6
-        .bind(max_time) // $7
-        .bind(file_size_bytes) // $8
-        .bind(row_count) // $9
-        .bind(compaction_level) // $10
-        .bind(created_at) // $11
-        .bind(namespace_id) // $12
-        .bind(from_column_set(&column_set)) // $13
-        .bind(max_l0_created_at) // $14
+        .bind(min_time) // $5
+        .bind(max_time) // $6
+        .bind(file_size_bytes) // $7
+        .bind(row_count) // $8
+        .bind(compaction_level) // $9
+        .bind(created_at) // $10
+        .bind(namespace_id) // $11
+        .bind(from_column_set(&column_set)) // $12
+        .bind(max_l0_created_at) // $13
         .fetch_one(self.inner.get_mut())
         .await
         .map_err(|e| {
@@ -1332,7 +1326,7 @@ RETURNING id;
         Ok(sqlx::query_as::<_, ParquetFilePod>(
             r#"
 SELECT parquet_file.id, parquet_file.namespace_id, parquet_file.table_id,
-       parquet_file.partition_id, parquet_file.object_store_id, parquet_file.max_sequence_number,
+       parquet_file.partition_id, parquet_file.object_store_id,
        parquet_file.min_time, parquet_file.max_time, parquet_file.to_delete,
        parquet_file.file_size_bytes, parquet_file.row_count, parquet_file.compaction_level,
        parquet_file.created_at, parquet_file.column_set, parquet_file.max_l0_created_at
@@ -1355,7 +1349,7 @@ WHERE table_name.namespace_id = $1
         Ok(sqlx::query_as::<_, ParquetFilePod>(
             r#"
 SELECT id, namespace_id, table_id, partition_id, object_store_id,
-       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       min_time, max_time, to_delete, file_size_bytes,
        row_count, compaction_level, created_at, column_set, max_l0_created_at
 FROM parquet_file
 WHERE table_id = $1 AND to_delete IS NULL;
@@ -1423,7 +1417,7 @@ RETURNING id;
         Ok(sqlx::query_as::<_, ParquetFilePod>(
             r#"
 SELECT id, namespace_id, table_id, partition_id, object_store_id,
-       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       min_time, max_time, to_delete, file_size_bytes,
        row_count, compaction_level, created_at, column_set, max_l0_created_at
 FROM parquet_file
 WHERE parquet_file.partition_id = $1
@@ -1494,7 +1488,7 @@ RETURNING id;
         let rec = sqlx::query_as::<_, ParquetFilePod>(
             r#"
 SELECT id, namespace_id, table_id, partition_id, object_store_id,
-       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       min_time, max_time, to_delete, file_size_bytes,
        row_count, compaction_level, created_at, column_set, max_l0_created_at
 FROM parquet_file
 WHERE object_store_id = $1;
@@ -1863,7 +1857,6 @@ mod tests {
             table_id,
             partition_id,
             object_store_id: Uuid::new_v4(),
-            max_sequence_number: SequenceNumber::new(100),
             min_time: Timestamp::new(1),
             max_time: Timestamp::new(5),
             file_size_bytes: 1337,
diff --git a/iox_tests/src/builders.rs b/iox_tests/src/builders.rs
index 48ecc201f1..7e9a20be1c 100644
--- a/iox_tests/src/builders.rs
+++ b/iox_tests/src/builders.rs
@@ -1,6 +1,6 @@
 use data_types::{
     ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileId, Partition, PartitionId,
-    PartitionKey, SequenceNumber, SkippedCompaction, Table, TableId, Timestamp,
+    PartitionKey, SkippedCompaction, Table, TableId, Timestamp,
 };
 use uuid::Uuid;
 
@@ -21,7 +21,6 @@ impl ParquetFileBuilder {
                 table_id: TableId::new(0),
                 partition_id: PartitionId::new(0),
                 object_store_id: Uuid::from_u128(id.try_into().expect("invalid id")),
-                max_sequence_number: SequenceNumber::new(0),
                 min_time: Timestamp::new(0),
                 max_time: Timestamp::new(0),
                 to_delete: None,
@@ -157,7 +156,6 @@ impl PartitionBuilder {
                 table_id: TableId::new(0),
                 partition_key: PartitionKey::from("key"),
                 sort_key: vec![],
-                persisted_sequence_number: None,
                 new_file_at: None,
             },
         }
diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs
index c4a3f5ad4a..0d3cd9507e 100644
--- a/iox_tests/src/catalog.rs
+++ b/iox_tests/src/catalog.rs
@@ -6,8 +6,8 @@ use arrow::{
 };
 use data_types::{
     Column, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceSchema, ParquetFile,
-    ParquetFileParams, Partition, PartitionId, QueryPool, SequenceNumber, Table, TableId,
-    TableSchema, Timestamp, TopicMetadata,
+    ParquetFileParams, Partition, PartitionId, QueryPool, Table, TableId, TableSchema, Timestamp,
+    TopicMetadata,
 };
 use datafusion::physical_plan::metrics::Count;
 use datafusion_util::MemoryStream;
@@ -456,7 +456,6 @@ impl TestPartition {
             record_batch,
             table,
             schema,
-            max_sequence_number,
             min_time,
             max_time,
             file_size_bytes,
@@ -497,7 +496,6 @@ impl TestPartition {
             table_name: self.table.table.name.clone().into(),
             partition_id: self.partition.id,
             partition_key: self.partition.partition_key.clone(),
-            max_sequence_number,
             compaction_level: CompactionLevel::Initial,
             sort_key: Some(sort_key.clone()),
             max_l0_created_at: Time::from_timestamp_nanos(max_l0_created_at),
@@ -516,7 +514,6 @@ impl TestPartition {
             record_batch: Some(record_batch),
             table: Some(table),
             schema: Some(schema),
-            max_sequence_number,
             min_time,
             max_time,
             file_size_bytes: Some(file_size_bytes.unwrap_or(real_file_size_bytes as u64)),
@@ -543,7 +540,6 @@ impl TestPartition {
     ) -> TestParquetFile {
         let TestParquetFileBuilder {
             record_batch,
-            max_sequence_number,
             min_time,
             max_time,
             file_size_bytes,
@@ -585,7 +581,6 @@ impl TestPartition {
             table_id: self.table.table.id,
             partition_id: self.partition.id,
             object_store_id: object_store_id.unwrap_or_else(Uuid::new_v4),
-            max_sequence_number,
             min_time: Timestamp::new(min_time),
             max_time: Timestamp::new(max_time),
             file_size_bytes: file_size_bytes.unwrap_or(0) as i64,
@@ -628,7 +623,6 @@ pub struct TestParquetFileBuilder {
     record_batch: Option<RecordBatch>,
     table: Option<String>,
     schema: Option<Schema>,
-    max_sequence_number: SequenceNumber,
     min_time: i64,
     max_time: i64,
     file_size_bytes: Option<u64>,
@@ -647,7 +641,6 @@ impl Default for TestParquetFileBuilder {
             record_batch: None,
             table: None,
             schema: None,
-            max_sequence_number: SequenceNumber::new(100),
             min_time: now().timestamp_nanos(),
             max_time: now().timestamp_nanos(),
             file_size_bytes: None,
@@ -690,12 +683,6 @@ impl TestParquetFileBuilder {
         self
     }
 
-    /// Specify the maximum sequence number for the parquet file metadata.
-    pub fn with_max_seq(mut self, max_seq: i64) -> Self {
-        self.max_sequence_number = SequenceNumber::new(max_seq);
-        self
-    }
-
     /// Specify the minimum time for the parquet file metadata.
     pub fn with_min_time(mut self, min_time: i64) -> Self {
         self.min_time = min_time;
diff --git a/parquet_file/src/metadata.rs b/parquet_file/src/metadata.rs
index 5bf26cec17..ea18d486a6 100644
--- a/parquet_file/src/metadata.rs
+++ b/parquet_file/src/metadata.rs
@@ -90,8 +90,7 @@ use base64::{prelude::BASE64_STANDARD, Engine};
 use bytes::Bytes;
 use data_types::{
     ColumnId, ColumnSet, ColumnSummary, CompactionLevel, InfluxDbType, NamespaceId,
-    ParquetFileParams, PartitionId, PartitionKey, SequenceNumber, StatValues, Statistics, TableId,
-    Timestamp,
+    ParquetFileParams, PartitionId, PartitionKey, StatValues, Statistics, TableId, Timestamp,
 };
 use generated_types::influxdata::iox::ingester::v1 as proto;
 use iox_time::Time;
@@ -274,9 +273,6 @@ pub struct IoxMetadata {
     /// partition key of the data
     pub partition_key: PartitionKey,
 
-    /// sequence number of the last write
-    pub max_sequence_number: SequenceNumber,
-
     /// The compaction level of the file.
     ///
     ///  * 0 (`CompactionLevel::Initial`): represents a level-0 file that is persisted by an
@@ -340,7 +336,6 @@ impl IoxMetadata {
             table_name: self.table_name.to_string(),
             partition_id: self.partition_id.get(),
             partition_key: self.partition_key.to_string(),
-            max_sequence_number: self.max_sequence_number.get(),
             sort_key,
             compaction_level: self.compaction_level as i32,
             max_l0_created_at: Some(self.max_l0_created_at.date_time().into()),
@@ -392,7 +387,6 @@ impl IoxMetadata {
             table_name,
             partition_id: PartitionId::new(proto_msg.partition_id),
             partition_key,
-            max_sequence_number: SequenceNumber::new(proto_msg.max_sequence_number),
             sort_key,
             compaction_level: proto_msg.compaction_level.try_into().context(
                 InvalidCompactionLevelSnafu {
@@ -417,7 +411,6 @@ impl IoxMetadata {
             table_name: table_name.into(),
             partition_id: PartitionId::new(1),
             partition_key: "unknown".into(),
-            max_sequence_number: SequenceNumber::new(1),
             compaction_level: CompactionLevel::Initial,
             sort_key: None,
             max_l0_created_at: Time::from_timestamp_nanos(creation_timestamp_ns),
@@ -499,7 +492,6 @@ impl IoxMetadata {
             table_id: self.table_id,
             partition_id: self.partition_id,
             object_store_id: self.object_store_id,
-            max_sequence_number: self.max_sequence_number,
             min_time,
             max_time,
             file_size_bytes: file_size_bytes as i64,
@@ -1017,7 +1009,6 @@ mod tests {
             table_name: Arc::from("weather"),
             partition_id: PartitionId::new(4),
             partition_key: PartitionKey::from("part"),
-            max_sequence_number: SequenceNumber::new(6),
             compaction_level: CompactionLevel::Initial,
             sort_key: Some(sort_key),
             max_l0_created_at: create_time,
@@ -1041,7 +1032,6 @@ mod tests {
             table_name: "platanos".into(),
             partition_id: PartitionId::new(4),
             partition_key: "potato".into(),
-            max_sequence_number: SequenceNumber::new(11),
             compaction_level: CompactionLevel::FileNonOverlapped,
             sort_key: None,
             max_l0_created_at: Time::from_timestamp_nanos(42),
diff --git a/parquet_file/src/serialize.rs b/parquet_file/src/serialize.rs
index dfb8b094a2..0a631bfe69 100644
--- a/parquet_file/src/serialize.rs
+++ b/parquet_file/src/serialize.rs
@@ -197,7 +197,7 @@ mod tests {
         record_batch::RecordBatch,
     };
     use bytes::Bytes;
-    use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId};
+    use data_types::{CompactionLevel, NamespaceId, PartitionId, TableId};
     use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
     use datafusion_util::MemoryStream;
     use iox_time::Time;
@@ -214,7 +214,6 @@ mod tests {
             table_name: "platanos".into(),
             partition_id: PartitionId::new(4),
             partition_key: "potato".into(),
-            max_sequence_number: SequenceNumber::new(11),
             compaction_level: CompactionLevel::FileNonOverlapped,
             sort_key: None,
             max_l0_created_at: Time::from_timestamp_nanos(42),
diff --git a/parquet_file/src/storage.rs b/parquet_file/src/storage.rs
index 52adf34302..7cb04283ad 100644
--- a/parquet_file/src/storage.rs
+++ b/parquet_file/src/storage.rs
@@ -323,7 +323,7 @@ mod tests {
         array::{ArrayRef, Int64Array, StringArray},
         record_batch::RecordBatch,
     };
-    use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId};
+    use data_types::{CompactionLevel, NamespaceId, PartitionId, TableId};
     use datafusion::common::DataFusionError;
     use datafusion_util::MemoryStream;
     use iox_time::Time;
@@ -579,7 +579,6 @@ mod tests {
             table_name: "platanos".into(),
             partition_id: PartitionId::new(4),
             partition_key: "potato".into(),
-            max_sequence_number: SequenceNumber::new(11),
             compaction_level: CompactionLevel::FileNonOverlapped,
             sort_key: None,
             max_l0_created_at: Time::from_timestamp_nanos(42),
diff --git a/parquet_file/tests/metadata.rs b/parquet_file/tests/metadata.rs
index f8a8d0df9d..ec673bea8f 100644
--- a/parquet_file/tests/metadata.rs
+++ b/parquet_file/tests/metadata.rs
@@ -4,9 +4,7 @@ use arrow::{
     array::{ArrayRef, StringArray, TimestampNanosecondArray},
     record_batch::RecordBatch,
 };
-use data_types::{
-    ColumnId, CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId, Timestamp,
-};
+use data_types::{ColumnId, CompactionLevel, NamespaceId, PartitionId, TableId, Timestamp};
 use datafusion_util::MemoryStream;
 use iox_time::Time;
 use object_store::DynObjectStore;
@@ -57,7 +55,6 @@ async fn test_decoded_iox_metadata() {
         table_name: "platanos".into(),
         partition_id: PartitionId::new(4),
         partition_key: "potato".into(),
-        max_sequence_number: SequenceNumber::new(11),
         compaction_level: CompactionLevel::FileNonOverlapped,
         sort_key: None,
         max_l0_created_at: Time::from_timestamp_nanos(42),
@@ -198,7 +195,6 @@ async fn test_empty_parquet_file_panic() {
         table_name: "platanos".into(),
         partition_id: PartitionId::new(4),
         partition_key: "potato".into(),
-        max_sequence_number: SequenceNumber::new(11),
         compaction_level: CompactionLevel::FileNonOverlapped,
         sort_key: None,
         max_l0_created_at: Time::from_timestamp_nanos(42),
@@ -292,7 +288,6 @@ async fn test_decoded_many_columns_with_null_cols_iox_metadata() {
         table_name: "platanos".into(),
         partition_id: PartitionId::new(4),
         partition_key: "potato".into(),
-        max_sequence_number: SequenceNumber::new(11),
         compaction_level: CompactionLevel::FileNonOverlapped,
         sort_key: Some(sort_key),
         max_l0_created_at: Time::from_timestamp_nanos(42),
@@ -380,7 +375,6 @@ async fn test_derive_parquet_file_params() {
         table_name: "platanos".into(),
         partition_id,
         partition_key: "potato".into(),
-        max_sequence_number: SequenceNumber::new(11),
         compaction_level: CompactionLevel::FileNonOverlapped,
         sort_key: None,
         max_l0_created_at: Time::from_timestamp_nanos(1234),
@@ -424,7 +418,6 @@ async fn test_derive_parquet_file_params() {
     assert_eq!(catalog_data.table_id, meta.table_id);
     assert_eq!(catalog_data.partition_id, meta.partition_id);
     assert_eq!(catalog_data.object_store_id, meta.object_store_id);
-    assert_eq!(catalog_data.max_sequence_number, meta.max_sequence_number);
     assert_eq!(catalog_data.file_size_bytes, file_size as i64);
     assert_eq!(catalog_data.compaction_level, meta.compaction_level);
     assert_eq!(catalog_data.created_at, Timestamp::new(1234));
diff --git a/querier/src/cache/parquet_file.rs b/querier/src/cache/parquet_file.rs
index f231093ad0..e7d3eb909e 100644
--- a/querier/src/cache/parquet_file.rs
+++ b/querier/src/cache/parquet_file.rs
@@ -347,8 +347,8 @@ mod tests {
         partition.create_parquet_file(builder).await;
         let table_id = table.table.id;
 
-        let single_file_size = 224;
-        let two_file_size = 408;
+        let single_file_size = 216;
+        let two_file_size = 392;
         assert!(single_file_size < two_file_size);
 
         let cache = make_cache(&catalog);
diff --git a/querier/src/ingester/mod.rs b/querier/src/ingester/mod.rs
index 5339f2fbb0..d55435580e 100644
--- a/querier/src/ingester/mod.rs
+++ b/querier/src/ingester/mod.rs
@@ -13,8 +13,7 @@ use async_trait::async_trait;
 use backoff::{Backoff, BackoffConfig, BackoffError};
 use client_util::connection;
 use data_types::{
-    ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, SequenceNumber, TableSummary,
-    TimestampMinMax,
+    ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, TableSummary, TimestampMinMax,
 };
 use datafusion::error::DataFusionError;
 use futures::{stream::FuturesUnordered, TryStreamExt};
@@ -105,14 +104,6 @@ pub enum Error {
         source: Box<influxdb_iox_client::error::Error>,
     },
 
-    #[snafu(display(
-        "Partition status missing for partition {partition_id}, ingestger: {ingester_address}"
-    ))]
-    PartitionStatusMissing {
-        partition_id: PartitionId,
-        ingester_address: String,
-    },
-
     #[snafu(display("Got batch without chunk information from ingester: {ingester_address}"))]
     BatchWithoutChunk { ingester_address: String },
 
@@ -550,10 +541,6 @@ impl IngesterStreamDecoder {
                 self.flush_partition()?;
 
                 let partition_id = PartitionId::new(md.partition_id);
-                let status = md.status.context(PartitionStatusMissingSnafu {
-                    partition_id,
-                    ingester_address: self.ingester_address.as_ref(),
-                })?;
                 ensure!(
                     !self.finished_partitions.contains_key(&partition_id),
                     DuplicatePartitionInfoSnafu {
@@ -576,7 +563,6 @@ impl IngesterStreamDecoder {
                     ingester_uuid,
                     partition_id,
                     md.completed_persistence_count,
-                    status.parquet_max_sequence_number.map(SequenceNumber::new),
                     partition_sort_key,
                 );
                 self.current_partition = Some(partition);
@@ -772,10 +758,6 @@ pub struct IngesterPartition {
     /// The number of Parquet files this ingester UUID has persisted for this partition.
     completed_persistence_count: u64,
 
-    /// Maximum sequence number of parquet files the ingester has
-    /// persisted for this partition
-    parquet_max_sequence_number: Option<SequenceNumber>,
-
     /// Partition-wide sort key.
     partition_sort_key: Option<Arc<SortKey>>,
 
@@ -790,14 +772,12 @@ impl IngesterPartition {
         ingester_uuid: Uuid,
         partition_id: PartitionId,
         completed_persistence_count: u64,
-        parquet_max_sequence_number: Option<SequenceNumber>,
         partition_sort_key: Option<Arc<SortKey>>,
     ) -> Self {
         Self {
             ingester_uuid,
             partition_id,
             completed_persistence_count,
-            parquet_max_sequence_number,
             partition_sort_key,
             chunks: vec![],
         }
@@ -864,10 +844,6 @@ impl IngesterPartition {
         self.completed_persistence_count
     }
 
-    pub(crate) fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
-        self.parquet_max_sequence_number
-    }
-
     pub(crate) fn chunks(&self) -> &[IngesterChunk] {
         &self.chunks
     }
@@ -1077,7 +1053,6 @@ mod tests {
     };
     use assert_matches::assert_matches;
     use data_types::TableId;
-    use generated_types::influxdata::iox::ingester::v1::PartitionStatus;
     use influxdb_iox_client::flight::generated_types::IngesterQueryResponseMetadata;
     use iox_tests::TestCatalog;
     use metric::Attributes;
@@ -1171,14 +1146,7 @@ mod tests {
             MockFlightClient::new([(
                 "addr1",
                 Ok(MockQueryData {
-                    results: vec![metadata(
-                        1,
-                        Some(PartitionStatus {
-                            parquet_max_sequence_number: None,
-                        }),
-                        ingester_uuid.to_string(),
-                        5,
-                    )],
+                    results: vec![metadata(1, ingester_uuid.to_string(), 5)],
                 }),
             )])
             .await,
@@ -1190,30 +1158,11 @@ mod tests {
 
         let p = &partitions[0];
         assert_eq!(p.partition_id.get(), 1);
-        assert_eq!(p.parquet_max_sequence_number, None);
         assert_eq!(p.chunks.len(), 0);
         assert_eq!(p.ingester_uuid, ingester_uuid);
         assert_eq!(p.completed_persistence_count, 5);
     }
 
-    #[tokio::test]
-    async fn test_flight_err_partition_status_missing() {
-        let ingester_uuid = Uuid::new_v4();
-
-        let mock_flight_client = Arc::new(
-            MockFlightClient::new([(
-                "addr1",
-                Ok(MockQueryData {
-                    results: vec![metadata(1, None, ingester_uuid.to_string(), 5)],
-                }),
-            )])
-            .await,
-        );
-        let ingester_conn = mock_flight_client.ingester_conn().await;
-        let err = get_partitions(&ingester_conn).await.unwrap_err();
-        assert_matches!(err, Error::PartitionStatusMissing { .. });
-    }
-
     #[tokio::test]
     async fn test_flight_err_duplicate_partition_info() {
         let ingester_uuid = Uuid::new_v4();
@@ -1223,30 +1172,9 @@ mod tests {
                 "addr1",
                 Ok(MockQueryData {
                     results: vec![
-                        metadata(
-                            1,
-                            Some(PartitionStatus {
-                                parquet_max_sequence_number: None,
-                            }),
-                            ingester_uuid.to_string(),
-                            3,
-                        ),
-                        metadata(
-                            2,
-                            Some(PartitionStatus {
-                                parquet_max_sequence_number: None,
-                            }),
-                            ingester_uuid.to_string(),
-                            4,
-                        ),
-                        metadata(
-                            1,
-                            Some(PartitionStatus {
-                                parquet_max_sequence_number: None,
-                            }),
-                            ingester_uuid.to_string(),
-                            5,
-                        ),
+                        metadata(1, ingester_uuid.to_string(), 3),
+                        metadata(2, ingester_uuid.to_string(), 4),
+                        metadata(1, ingester_uuid.to_string(), 5),
                     ],
                 }),
             )])
@@ -1320,14 +1248,7 @@ mod tests {
                     "addr1",
                     Ok(MockQueryData {
                         results: vec![
-                            metadata(
-                                1,
-                                Some(PartitionStatus {
-                                    parquet_max_sequence_number: Some(11),
-                                }),
-                                ingester_uuid1.to_string(),
-                                3,
-                            ),
+                            metadata(1, ingester_uuid1.to_string(), 3),
                             Ok((
                                 DecodedPayload::Schema(Arc::clone(&schema_1_1)),
                                 IngesterQueryResponseMetadata::default(),
@@ -1348,14 +1269,7 @@ mod tests {
                                 DecodedPayload::RecordBatch(record_batch_1_2),
                                 IngesterQueryResponseMetadata::default(),
                             )),
-                            metadata(
-                                2,
-                                Some(PartitionStatus {
-                                    parquet_max_sequence_number: Some(21),
-                                }),
-                                ingester_uuid1.to_string(),
-                                4,
-                            ),
+                            metadata(2, ingester_uuid1.to_string(), 4),
                             Ok((
                                 DecodedPayload::Schema(Arc::clone(&schema_2_1)),
                                 IngesterQueryResponseMetadata::default(),
@@ -1371,14 +1285,7 @@ mod tests {
                     "addr2",
                     Ok(MockQueryData {
                         results: vec![
-                            metadata(
-                                3,
-                                Some(PartitionStatus {
-                                    parquet_max_sequence_number: Some(31),
-                                }),
-                                ingester_uuid2.to_string(),
-                                5,
-                            ),
+                            metadata(3, ingester_uuid2.to_string(), 5),
                             Ok((
                                 DecodedPayload::Schema(Arc::clone(&schema_3_1)),
                                 IngesterQueryResponseMetadata::default(),
@@ -1400,10 +1307,6 @@ mod tests {
 
         let p1 = &partitions[0];
         assert_eq!(p1.partition_id.get(), 1);
-        assert_eq!(
-            p1.parquet_max_sequence_number,
-            Some(SequenceNumber::new(11))
-        );
         assert_eq!(p1.chunks.len(), 2);
         assert_eq!(p1.chunks[0].schema().as_arrow(), schema_1_1);
         assert_eq!(p1.chunks[0].batches.len(), 2);
@@ -1415,10 +1318,6 @@ mod tests {
 
         let p2 = &partitions[1];
         assert_eq!(p2.partition_id.get(), 2);
-        assert_eq!(
-            p2.parquet_max_sequence_number,
-            Some(SequenceNumber::new(21))
-        );
         assert_eq!(p2.chunks.len(), 1);
         assert_eq!(p2.chunks[0].schema().as_arrow(), schema_2_1);
         assert_eq!(p2.chunks[0].batches.len(), 1);
@@ -1426,10 +1325,6 @@ mod tests {
 
         let p3 = &partitions[2];
         assert_eq!(p3.partition_id.get(), 3);
-        assert_eq!(
-            p3.parquet_max_sequence_number,
-            Some(SequenceNumber::new(31))
-        );
         assert_eq!(p3.chunks.len(), 1);
         assert_eq!(p3.chunks[0].schema().as_arrow(), schema_3_1);
         assert_eq!(p3.chunks[0].batches.len(), 1);
@@ -1442,14 +1337,7 @@ mod tests {
             MockFlightClient::new([(
                 "addr1",
                 Ok(MockQueryData {
-                    results: vec![metadata(
-                        1,
-                        Some(PartitionStatus {
-                            parquet_max_sequence_number: Some(11),
-                        }),
-                        "not-a-valid-uuid",
-                        42,
-                    )],
+                    results: vec![metadata(1, "not-a-valid-uuid", 42)],
                 }),
             )])
             .await,
@@ -1481,36 +1369,15 @@ mod tests {
                     "addr1",
                     Ok(MockQueryData {
                         results: vec![
-                            metadata(
-                                1,
-                                Some(PartitionStatus {
-                                    parquet_max_sequence_number: Some(11),
-                                }),
-                                ingester_uuid1.to_string(),
-                                0,
-                            ),
-                            metadata(
-                                2,
-                                Some(PartitionStatus {
-                                    parquet_max_sequence_number: Some(21),
-                                }),
-                                ingester_uuid1.to_string(),
-                                42,
-                            ),
+                            metadata(1, ingester_uuid1.to_string(), 0),
+                            metadata(2, ingester_uuid1.to_string(), 42),
                         ],
                     }),
                 ),
                 (
                     "addr2",
                     Ok(MockQueryData {
-                        results: vec![metadata(
-                            3,
-                            Some(PartitionStatus {
-                                parquet_max_sequence_number: Some(31),
-                            }),
-                            ingester_uuid2.to_string(),
-                            9000,
-                        )],
+                        results: vec![metadata(3, ingester_uuid2.to_string(), 9000)],
                     }),
                 ),
             ])
@@ -1535,28 +1402,16 @@ mod tests {
         assert_eq!(p1.ingester_uuid, ingester_uuid1);
         assert_eq!(p1.completed_persistence_count, 0);
         assert_eq!(p1.partition_id.get(), 1);
-        assert_eq!(
-            p1.parquet_max_sequence_number,
-            Some(SequenceNumber::new(11))
-        );
 
         let p2 = &partitions[1];
         assert_eq!(p2.ingester_uuid, ingester_uuid1);
         assert_eq!(p2.completed_persistence_count, 42);
         assert_eq!(p2.partition_id.get(), 2);
-        assert_eq!(
-            p2.parquet_max_sequence_number,
-            Some(SequenceNumber::new(21))
-        );
 
         let p3 = &partitions[2];
         assert_eq!(p3.ingester_uuid, ingester_uuid2);
         assert_eq!(p3.completed_persistence_count, 9000);
         assert_eq!(p3.partition_id.get(), 3);
-        assert_eq!(
-            p3.parquet_max_sequence_number,
-            Some(SequenceNumber::new(31))
-        );
     }
 
     #[tokio::test]
@@ -1694,7 +1549,6 @@ mod tests {
 
     fn metadata(
         partition_id: i64,
-        status: Option<PartitionStatus>,
         ingester_uuid: impl Into<String>,
         completed_persistence_count: u64,
     ) -> MockFlightResult {
@@ -1702,7 +1556,6 @@ mod tests {
             DecodedPayload::None,
             IngesterQueryResponseMetadata {
                 partition_id,
-                status,
                 ingester_uuid: ingester_uuid.into(),
                 completed_persistence_count,
             },
@@ -1813,17 +1666,11 @@ mod tests {
         ];
 
         for case in cases {
-            let parquet_max_sequence_number = None;
             // Construct a partition and ensure it doesn't error
-            let ingester_partition = IngesterPartition::new(
-                ingester_uuid,
-                PartitionId::new(1),
-                0,
-                parquet_max_sequence_number,
-                None,
-            )
-            .try_add_chunk(ChunkId::new(), expected_schema.clone(), vec![case])
-            .unwrap();
+            let ingester_partition =
+                IngesterPartition::new(ingester_uuid, PartitionId::new(1), 0, None)
+                    .try_add_chunk(ChunkId::new(), expected_schema.clone(), vec![case])
+                    .unwrap();
 
             for batch in &ingester_partition.chunks[0].batches {
                 assert_eq!(batch.schema(), expected_schema.as_arrow());
@@ -1843,16 +1690,9 @@ mod tests {
         let batch =
             RecordBatch::try_from_iter(vec![("b", int64_array()), ("time", ts_array())]).unwrap();
 
-        let parquet_max_sequence_number = None;
-        let err = IngesterPartition::new(
-            ingester_uuid,
-            PartitionId::new(1),
-            0,
-            parquet_max_sequence_number,
-            None,
-        )
-        .try_add_chunk(ChunkId::new(), expected_schema, vec![batch])
-        .unwrap_err();
+        let err = IngesterPartition::new(ingester_uuid, PartitionId::new(1), 0, None)
+            .try_add_chunk(ChunkId::new(), expected_schema, vec![batch])
+            .unwrap_err();
 
         assert_matches!(err, Error::RecordBatchType { .. });
     }
diff --git a/querier/src/namespace/query_access.rs b/querier/src/namespace/query_access.rs
index 4ce9353935..c6b1fd4d85 100644
--- a/querier/src/namespace/query_access.rs
+++ b/querier/src/namespace/query_access.rs
@@ -244,7 +244,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_max_l0_created_at(Time::from_timestamp_nanos(1))
             .with_line_protocol("cpu,host=a load=1 11")
-            .with_max_seq(1)
             .with_min_time(11)
             .with_max_time(11);
         partition_cpu_a_1.create_parquet_file(builder).await;
@@ -252,7 +251,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_max_l0_created_at(Time::from_timestamp_nanos(2))
             .with_line_protocol("cpu,host=a load=2 22")
-            .with_max_seq(2)
             .with_min_time(22)
             .with_max_time(22);
         partition_cpu_a_1
@@ -264,7 +262,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_max_l0_created_at(Time::from_timestamp_nanos(3))
             .with_line_protocol("cpu,host=z load=0 0")
-            .with_max_seq(2)
             .with_min_time(22)
             .with_max_time(22);
         partition_cpu_a_1.create_parquet_file(builder).await;
@@ -272,7 +269,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_max_l0_created_at(Time::from_timestamp_nanos(4))
             .with_line_protocol("cpu,host=a load=3 33")
-            .with_max_seq(3)
             .with_min_time(33)
             .with_max_time(33);
         partition_cpu_a_1.create_parquet_file(builder).await;
@@ -280,7 +276,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_max_l0_created_at(Time::from_timestamp_nanos(5))
             .with_line_protocol("cpu,host=a load=4 10001")
-            .with_max_seq(4)
             .with_min_time(10_001)
             .with_max_time(10_001);
         partition_cpu_a_2.create_parquet_file(builder).await;
@@ -288,7 +283,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_creation_time(Time::from_timestamp_nanos(6))
             .with_line_protocol("cpu,host=b load=5 11")
-            .with_max_seq(5)
             .with_min_time(11)
             .with_max_time(11);
         partition_cpu_b_1.create_parquet_file(builder).await;
@@ -302,7 +296,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_max_l0_created_at(Time::from_timestamp_nanos(7))
             .with_line_protocol(&lp)
-            .with_max_seq(6)
             .with_min_time(11)
             .with_max_time(14);
         partition_mem_c_1.create_parquet_file(builder).await;
@@ -310,7 +303,6 @@ mod tests {
         let builder = TestParquetFileBuilder::default()
             .with_max_l0_created_at(Time::from_timestamp_nanos(8))
             .with_line_protocol("mem,host=c perc=50 1001")
-            .with_max_seq(7)
             .with_min_time(1001)
             .with_max_time(1001);
         partition_mem_c_2
@@ -523,7 +515,6 @@ mod tests {
             .with_max_l0_created_at(Time::from_timestamp_nanos(10))
             // duplicate row with different field value (load=14)
             .with_line_protocol("cpu,host=a load=14 10001")
-            .with_max_seq(2_000)
             .with_min_time(10_001)
             .with_max_time(10_001);
         partition_cpu_a_2.create_parquet_file(builder).await;
diff --git a/querier/src/parquet/creation.rs b/querier/src/parquet/creation.rs
index a49cb53c67..29a9f1990e 100644
--- a/querier/src/parquet/creation.rs
+++ b/querier/src/parquet/creation.rs
@@ -226,7 +226,6 @@ impl ChunkAdapter {
             order,
             sort_key: Some(sort_key),
             partition_id: parquet_file.partition_id,
-            max_sequence_number: parquet_file.max_sequence_number,
             compaction_level: parquet_file.compaction_level,
         });
 
diff --git a/querier/src/parquet/mod.rs b/querier/src/parquet/mod.rs
index cf0823d669..9d76fdd47b 100644
--- a/querier/src/parquet/mod.rs
+++ b/querier/src/parquet/mod.rs
@@ -1,8 +1,7 @@
 //! Querier Chunks
 
 use data_types::{
-    ChunkId, ChunkOrder, CompactionLevel, DeletePredicate, PartitionId, SequenceNumber,
-    TableSummary,
+    ChunkId, ChunkOrder, CompactionLevel, DeletePredicate, PartitionId, TableSummary,
 };
 use iox_query::util::create_basic_summary;
 use parquet_file::chunk::ParquetChunk;
@@ -29,9 +28,6 @@ pub struct QuerierParquetChunkMeta {
     /// Partition ID.
     partition_id: PartitionId,
 
-    /// The maximum sequence number within this chunk.
-    max_sequence_number: SequenceNumber,
-
     /// Compaction level of the parquet file of the chunk
     compaction_level: CompactionLevel,
 }
@@ -52,11 +48,6 @@ impl QuerierParquetChunkMeta {
         self.partition_id
     }
 
-    /// The maximum sequence number within this chunk.
-    pub fn max_sequence_number(&self) -> SequenceNumber {
-        self.max_sequence_number
-    }
-
     /// Compaction level of the parquet file of the chunk
     pub fn compaction_level(&self) -> CompactionLevel {
         self.compaction_level
diff --git a/querier/src/table/mod.rs b/querier/src/table/mod.rs
index 90b489e76a..58c89c41ba 100644
--- a/querier/src/table/mod.rs
+++ b/querier/src/table/mod.rs
@@ -461,7 +461,7 @@ mod tests {
         table::test_util::{querier_table, IngesterPartitionBuilder},
     };
     use arrow_util::assert_batches_eq;
-    use data_types::{ChunkId, ColumnType, SequenceNumber};
+    use data_types::{ChunkId, ColumnType};
     use iox_query::exec::IOxSessionContext;
     use iox_tests::{TestCatalog, TestParquetFileBuilder, TestTable};
     use iox_time::TimeProvider;
@@ -522,7 +522,6 @@ mod tests {
         );
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol(&lp)
-            .with_max_seq(1)
             .with_min_time(outside_retention)
             .with_max_time(inside_retention);
         let file_partially_inside = partition.create_parquet_file(builder).await;
@@ -536,7 +535,6 @@ mod tests {
         );
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol(&lp)
-            .with_max_seq(2)
             .with_min_time(inside_retention)
             .with_max_time(inside_retention);
         let file_fully_inside = partition.create_parquet_file(builder).await;
@@ -550,7 +548,6 @@ mod tests {
         );
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol(&lp)
-            .with_max_seq(3)
             .with_min_time(outside_retention)
             .with_max_time(outside_retention);
         let _file_fully_outside = partition.create_parquet_file(builder).await;
@@ -601,63 +598,54 @@ mod tests {
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=1 11")
-            .with_max_seq(2)
             .with_min_time(11)
             .with_max_time(11);
         let file111 = partition11.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=2 22")
-            .with_max_seq(4)
             .with_min_time(22)
             .with_max_time(22);
         let file112 = partition11.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=3 33")
-            .with_max_seq(6)
             .with_min_time(33)
             .with_max_time(33);
         let file113 = partition11.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=4 44")
-            .with_max_seq(8)
             .with_min_time(44)
             .with_max_time(44);
         let file114 = partition11.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=5 55")
-            .with_max_seq(10)
             .with_min_time(55)
             .with_max_time(55);
         let file115 = partition11.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=5 55")
-            .with_max_seq(2)
             .with_min_time(55)
             .with_max_time(55);
         let file121 = partition12.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=10 100")
-            .with_max_seq(2)
             .with_min_time(99)
             .with_max_time(99);
         let file122 = partition12.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table1 foo=10 100")
-            .with_max_seq(2)
             .with_min_time(100)
             .with_max_time(100);
         let _file123 = partition12.create_parquet_file(builder).await;
 
         let builder = TestParquetFileBuilder::default()
             .with_line_protocol("table2 foo=6 66")
-            .with_max_seq(2)
             .with_min_time(66)
             .with_max_time(66);
         let _file211 = partition21.create_parquet_file(builder).await;
@@ -716,8 +704,7 @@ mod tests {
         let builder = IngesterPartitionBuilder::new(schema, &partition)
             .with_lp(["table,tag1=val1,tag2=val2 foo=3,bar=4 11"]);
 
-        let ingester_partition =
-            builder.build_with_max_parquet_sequence_number(Some(SequenceNumber::new(1)));
+        let ingester_partition = builder.build();
 
         let querier_table = TestQuerierTable::new(&catalog, &table)
             .await
@@ -783,13 +770,10 @@ mod tests {
         let builder = IngesterPartitionBuilder::new(schema, &partition).with_lp(["table foo=1 1"]);
 
         // Parquet file between with max sequence number 2
-        let pf_builder = TestParquetFileBuilder::default()
-            .with_line_protocol("table1 foo=1 11")
-            .with_max_seq(2);
+        let pf_builder = TestParquetFileBuilder::default().with_line_protocol("table1 foo=1 11");
         partition.create_parquet_file(pf_builder).await;
 
-        let ingester_partition =
-            builder.build_with_max_parquet_sequence_number(Some(SequenceNumber::new(2)));
+        let ingester_partition = builder.build();
 
         let querier_table = TestQuerierTable::new(&catalog, &table)
             .await
@@ -800,9 +784,7 @@ mod tests {
         assert_eq!(chunks.len(), 2);
 
         // Now, make a second chunk with max sequence number 3
-        let pf_builder = TestParquetFileBuilder::default()
-            .with_line_protocol("table1 foo=1 22")
-            .with_max_seq(3);
+        let pf_builder = TestParquetFileBuilder::default().with_line_protocol("table1 foo=1 22");
         partition.create_parquet_file(pf_builder).await;
 
         // With the same ingester response, still expect 2 chunks: one
@@ -810,10 +792,8 @@ mod tests {
         let chunks = querier_table.chunks().await.unwrap();
         assert_eq!(chunks.len(), 2);
 
-        // update the ingester response to return a new max parquet
-        // sequence number that includes the new file (3)
-        let ingester_partition =
-            builder.build_with_max_parquet_sequence_number(Some(SequenceNumber::new(3)));
+        // update the ingester response
+        let ingester_partition = builder.build();
 
         let querier_table = querier_table
             .clear_ingester_partitions()
diff --git a/querier/src/table/state_reconciler/interface.rs b/querier/src/table/state_reconciler/interface.rs
index ba7bd95afe..5c7b4ed8e4 100644
--- a/querier/src/table/state_reconciler/interface.rs
+++ b/querier/src/table/state_reconciler/interface.rs
@@ -1,7 +1,7 @@
 //! Interface for reconciling Ingester and catalog state
 
 use crate::{ingester::IngesterPartition, parquet::QuerierParquetChunk};
-use data_types::{CompactionLevel, ParquetFile, PartitionId, SequenceNumber};
+use data_types::{CompactionLevel, ParquetFile, PartitionId};
 use std::{ops::Deref, sync::Arc};
 
 /// Information about an ingester partition.
@@ -9,17 +9,12 @@ use std::{ops::Deref, sync::Arc};
 /// This is mostly the same as [`IngesterPartition`] but allows easier mocking.
 pub trait IngesterPartitionInfo {
     fn partition_id(&self) -> PartitionId;
-    fn parquet_max_sequence_number(&self) -> Option<SequenceNumber>;
 }
 
 impl IngesterPartitionInfo for IngesterPartition {
     fn partition_id(&self) -> PartitionId {
         self.deref().partition_id()
     }
-
-    fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
-        self.deref().parquet_max_sequence_number()
-    }
 }
 
 impl<T> IngesterPartitionInfo for Arc<T>
@@ -29,10 +24,6 @@ where
     fn partition_id(&self) -> PartitionId {
         self.deref().partition_id()
     }
-
-    fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
-        self.deref().parquet_max_sequence_number()
-    }
 }
 
 /// Information about a parquet file.
@@ -40,7 +31,6 @@ where
 /// This is mostly the same as [`ParquetFile`] but allows easier mocking.
 pub trait ParquetFileInfo {
     fn partition_id(&self) -> PartitionId;
-    fn max_sequence_number(&self) -> SequenceNumber;
     fn compaction_level(&self) -> CompactionLevel;
 }
 
@@ -49,10 +39,6 @@ impl ParquetFileInfo for Arc<ParquetFile> {
         self.partition_id
     }
 
-    fn max_sequence_number(&self) -> SequenceNumber {
-        self.max_sequence_number
-    }
-
     fn compaction_level(&self) -> CompactionLevel {
         self.compaction_level
     }
@@ -63,10 +49,6 @@ impl ParquetFileInfo for QuerierParquetChunk {
         self.meta().partition_id()
     }
 
-    fn max_sequence_number(&self) -> SequenceNumber {
-        self.meta().max_sequence_number()
-    }
-
     fn compaction_level(&self) -> CompactionLevel {
         self.meta().compaction_level()
     }
diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs
index f1b61b2554..e5a96135a7 100644
--- a/querier/src/table/test_util.rs
+++ b/querier/src/table/test_util.rs
@@ -4,7 +4,7 @@ use crate::{
     IngesterPartition,
 };
 use arrow::record_batch::RecordBatch;
-use data_types::{ChunkId, SequenceNumber};
+use data_types::ChunkId;
 use iox_catalog::interface::{get_schema_by_name, SoftDeletedRows};
 use iox_tests::{TestCatalog, TestPartition, TestTable};
 use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
@@ -91,26 +91,14 @@ impl IngesterPartitionBuilder {
         self
     }
 
-    /// Create a ingester partition with the specified max parquet sequence number
-    pub(crate) fn build_with_max_parquet_sequence_number(
-        &self,
-        parquet_max_sequence_number: Option<SequenceNumber>,
-    ) -> IngesterPartition {
-        self.build(parquet_max_sequence_number)
-    }
-
     /// Create an ingester partition with the specified field values
-    pub(crate) fn build(
-        &self,
-        parquet_max_sequence_number: Option<SequenceNumber>,
-    ) -> IngesterPartition {
+    pub(crate) fn build(&self) -> IngesterPartition {
         let data = self.lp.iter().map(|lp| lp_to_record_batch(lp)).collect();
 
         IngesterPartition::new(
             Uuid::new_v4(),
             self.partition.partition.id,
             0,
-            parquet_max_sequence_number,
             self.partition_sort_key.clone(),
         )
         .try_add_chunk(
diff --git a/service_grpc_catalog/src/lib.rs b/service_grpc_catalog/src/lib.rs
index e19cba4762..22331ee194 100644
--- a/service_grpc_catalog/src/lib.rs
+++ b/service_grpc_catalog/src/lib.rs
@@ -172,7 +172,6 @@ fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile {
         table_id: p.table_id.get(),
         partition_id: p.partition_id.get(),
         object_store_id: p.object_store_id.to_string(),
-        max_sequence_number: p.max_sequence_number.get(),
         min_time: p.min_time.get(),
         max_time: p.max_time.get(),
         to_delete: p.to_delete.map(|t| t.get()).unwrap_or(0),
@@ -198,9 +197,7 @@ fn to_partition(p: data_types::Partition) -> Partition {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use data_types::{
-        ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp,
-    };
+    use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
     use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService;
     use iox_catalog::mem::MemCatalog;
     use uuid::Uuid;
@@ -241,7 +238,6 @@ mod tests {
                 table_id: table.id,
                 partition_id: partition.id,
                 object_store_id: Uuid::new_v4(),
-                max_sequence_number: SequenceNumber::new(40),
                 min_time: Timestamp::new(1),
                 max_time: Timestamp::new(5),
                 file_size_bytes: 2343,
@@ -253,7 +249,6 @@ mod tests {
             };
             let p2params = ParquetFileParams {
                 object_store_id: Uuid::new_v4(),
-                max_sequence_number: SequenceNumber::new(70),
                 ..p1params.clone()
             };
             p1 = repos.parquet_files().create(p1params).await.unwrap();
diff --git a/service_grpc_object_store/src/lib.rs b/service_grpc_object_store/src/lib.rs
index 1abd7f825a..26dfc38bcc 100644
--- a/service_grpc_object_store/src/lib.rs
+++ b/service_grpc_object_store/src/lib.rs
@@ -96,9 +96,7 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
 mod tests {
     use super::*;
     use bytes::Bytes;
-    use data_types::{
-        ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp,
-    };
+    use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
     use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService;
     use iox_catalog::mem::MemCatalog;
     use object_store::{memory::InMemory, ObjectStore};
@@ -138,7 +136,6 @@ mod tests {
                 table_id: table.id,
                 partition_id: partition.id,
                 object_store_id: Uuid::new_v4(),
-                max_sequence_number: SequenceNumber::new(40),
                 min_time: Timestamp::new(1),
                 max_time: Timestamp::new(5),
                 file_size_bytes: 2343,

From 05688799c42bc386fcadb695eb630b7299bd8333 Mon Sep 17 00:00:00 2001
From: "Christopher M. Wolff" <chris.wolff@influxdata.com>
Date: Wed, 3 May 2023 08:20:14 -0700
Subject: [PATCH 011/119] fix: handle aliases in gapfill aggregate columns
 (#7725)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 .../tests/query_tests2/cases/in/gapfill.sql   |   8 +
 .../cases/in/gapfill.sql.expected             | 161 +++++++++-------
 .../src/logical_optimizer/handle_gapfill.rs   | 176 ++++++++++++++----
 3 files changed, 238 insertions(+), 107 deletions(-)

diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql
index fde419cf6c..404edc97fe 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql
+++ b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql
@@ -100,3 +100,11 @@ from cpu
 where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:44:00Z'
 group by four_minute;
 
+-- With an aliased aggregate column
+SELECT
+  region,
+  date_bin_gapfill('10 minute', time) as minute,
+  locf(avg(cpu.user)) as locf_avg_user
+from cpu
+where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
+group by region, minute;
diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
index a0b63e1b76..11c67bb74c 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
@@ -103,11 +103,11 @@
 ----------
 | plan_type    | plan    |
 ----------
-| logical_plan    | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, AVG(cpu.user)    |
+| logical_plan    | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, AVG(cpu.user) AS locf(AVG(cpu.user))    |
 |    |   GapFill: groupBy=[[cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[LOCF(AVG(cpu.user))]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None))    |
 |    |     Aggregate: groupBy=[[cpu.region, datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]]    |
 |    |       TableScan: cpu projection=[region, time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)]    |
-| physical_plan    | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as minute, AVG(cpu.user)@2 as AVG(cpu.user)]    |
+| physical_plan    | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as minute, AVG(cpu.user)@2 as locf(AVG(cpu.user))]    |
 |    |   GapFillExec: group_expr=[region@0, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1], aggr_expr=[LOCF(AVG(cpu.user)@2)], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000")    |
 |    |     SortPreservingMergeExec: [region@0 ASC,date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 ASC]    |
 |    |       SortExec: expr=[region@0 ASC,date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 ASC]    |
@@ -122,76 +122,93 @@
 |    |    |
 ----------
 -- SQL: SELECT region, date_bin_gapfill(interval '5 minute', time) as minute, locf(min(cpu.user)) from cpu where time between timestamp '2000-05-05T12:15:00Z' and timestamp '2000-05-05T12:59:00Z' group by region, minute;
-+--------+----------------------+---------------+
-| region | minute               | MIN(cpu.user) |
-+--------+----------------------+---------------+
-| a      | 2000-05-05T12:15:00Z |               |
-| a      | 2000-05-05T12:20:00Z | 23.2          |
-| a      | 2000-05-05T12:25:00Z | 23.2          |
-| a      | 2000-05-05T12:30:00Z | 23.2          |
-| a      | 2000-05-05T12:35:00Z | 23.2          |
-| a      | 2000-05-05T12:40:00Z | 21.0          |
-| a      | 2000-05-05T12:45:00Z | 21.0          |
-| a      | 2000-05-05T12:50:00Z | 21.0          |
-| a      | 2000-05-05T12:55:00Z | 21.0          |
-| b      | 2000-05-05T12:15:00Z |               |
-| b      | 2000-05-05T12:20:00Z |               |
-| b      | 2000-05-05T12:25:00Z |               |
-| b      | 2000-05-05T12:30:00Z | 25.2          |
-| b      | 2000-05-05T12:35:00Z | 28.9          |
-| b      | 2000-05-05T12:40:00Z | 28.9          |
-| b      | 2000-05-05T12:45:00Z | 28.9          |
-| b      | 2000-05-05T12:50:00Z | 28.9          |
-| b      | 2000-05-05T12:55:00Z | 28.9          |
-+--------+----------------------+---------------+
++--------+----------------------+---------------------+
+| region | minute               | locf(MIN(cpu.user)) |
++--------+----------------------+---------------------+
+| a      | 2000-05-05T12:15:00Z |                     |
+| a      | 2000-05-05T12:20:00Z | 23.2                |
+| a      | 2000-05-05T12:25:00Z | 23.2                |
+| a      | 2000-05-05T12:30:00Z | 23.2                |
+| a      | 2000-05-05T12:35:00Z | 23.2                |
+| a      | 2000-05-05T12:40:00Z | 21.0                |
+| a      | 2000-05-05T12:45:00Z | 21.0                |
+| a      | 2000-05-05T12:50:00Z | 21.0                |
+| a      | 2000-05-05T12:55:00Z | 21.0                |
+| b      | 2000-05-05T12:15:00Z |                     |
+| b      | 2000-05-05T12:20:00Z |                     |
+| b      | 2000-05-05T12:25:00Z |                     |
+| b      | 2000-05-05T12:30:00Z | 25.2                |
+| b      | 2000-05-05T12:35:00Z | 28.9                |
+| b      | 2000-05-05T12:40:00Z | 28.9                |
+| b      | 2000-05-05T12:45:00Z | 28.9                |
+| b      | 2000-05-05T12:50:00Z | 28.9                |
+| b      | 2000-05-05T12:55:00Z | 28.9                |
++--------+----------------------+---------------------+
 -- SQL: SELECT date_bin_gapfill(interval '1 minute', time) as minute, locf(min(cpu.idle)) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z' group by minute;
-+----------------------+---------------+
-| minute               | MIN(cpu.idle) |
-+----------------------+---------------+
-| 2000-05-05T12:19:00Z |               |
-| 2000-05-05T12:20:00Z | 70.0          |
-| 2000-05-05T12:21:00Z | 70.0          |
-| 2000-05-05T12:22:00Z | 70.0          |
-| 2000-05-05T12:23:00Z | 70.0          |
-| 2000-05-05T12:24:00Z | 70.0          |
-| 2000-05-05T12:25:00Z | 70.0          |
-| 2000-05-05T12:26:00Z | 70.0          |
-| 2000-05-05T12:27:00Z | 70.0          |
-| 2000-05-05T12:28:00Z | 70.0          |
-| 2000-05-05T12:29:00Z | 70.0          |
-| 2000-05-05T12:30:00Z | 70.0          |
-| 2000-05-05T12:31:00Z | 70.0          |
-| 2000-05-05T12:32:00Z | 70.0          |
-| 2000-05-05T12:33:00Z | 70.0          |
-| 2000-05-05T12:34:00Z | 70.0          |
-| 2000-05-05T12:35:00Z | 70.0          |
-| 2000-05-05T12:36:00Z | 70.0          |
-| 2000-05-05T12:37:00Z | 70.0          |
-| 2000-05-05T12:38:00Z | 70.0          |
-| 2000-05-05T12:39:00Z | 60.0          |
-| 2000-05-05T12:40:00Z | 60.0          |
-+----------------------+---------------+
++----------------------+---------------------+
+| minute               | locf(MIN(cpu.idle)) |
++----------------------+---------------------+
+| 2000-05-05T12:19:00Z |                     |
+| 2000-05-05T12:20:00Z | 70.0                |
+| 2000-05-05T12:21:00Z | 70.0                |
+| 2000-05-05T12:22:00Z | 70.0                |
+| 2000-05-05T12:23:00Z | 70.0                |
+| 2000-05-05T12:24:00Z | 70.0                |
+| 2000-05-05T12:25:00Z | 70.0                |
+| 2000-05-05T12:26:00Z | 70.0                |
+| 2000-05-05T12:27:00Z | 70.0                |
+| 2000-05-05T12:28:00Z | 70.0                |
+| 2000-05-05T12:29:00Z | 70.0                |
+| 2000-05-05T12:30:00Z | 70.0                |
+| 2000-05-05T12:31:00Z | 70.0                |
+| 2000-05-05T12:32:00Z | 70.0                |
+| 2000-05-05T12:33:00Z | 70.0                |
+| 2000-05-05T12:34:00Z | 70.0                |
+| 2000-05-05T12:35:00Z | 70.0                |
+| 2000-05-05T12:36:00Z | 70.0                |
+| 2000-05-05T12:37:00Z | 70.0                |
+| 2000-05-05T12:38:00Z | 70.0                |
+| 2000-05-05T12:39:00Z | 60.0                |
+| 2000-05-05T12:40:00Z | 60.0                |
++----------------------+---------------------+
 -- SQL: SELECT date_bin_gapfill(interval '4 minutes', time) as four_minute, interpolate(min(cpu.idle)), interpolate(min(cpu."user")), count(*) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z' group by four_minute;
-+----------------------+---------------+---------------+-----------------+
-| four_minute          | MIN(cpu.idle) | MIN(cpu.user) | COUNT(UInt8(1)) |
-+----------------------+---------------+---------------+-----------------+
-| 2000-05-05T12:16:00Z |               |               |                 |
-| 2000-05-05T12:20:00Z | 70.0          | 23.2          | 1               |
-| 2000-05-05T12:24:00Z | 67.5          | 24.2          |                 |
-| 2000-05-05T12:28:00Z | 65.0          | 25.2          | 1               |
-| 2000-05-05T12:32:00Z | 62.5          | 27.05         |                 |
-| 2000-05-05T12:36:00Z | 60.0          | 28.9          | 1               |
-| 2000-05-05T12:40:00Z |               | 21.0          | 1               |
-+----------------------+---------------+---------------+-----------------+
++----------------------+----------------------------+----------------------------+-----------------+
+| four_minute          | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(UInt8(1)) |
++----------------------+----------------------------+----------------------------+-----------------+
+| 2000-05-05T12:16:00Z |                            |                            |                 |
+| 2000-05-05T12:20:00Z | 70.0                       | 23.2                       | 1               |
+| 2000-05-05T12:24:00Z | 67.5                       | 24.2                       |                 |
+| 2000-05-05T12:28:00Z | 65.0                       | 25.2                       | 1               |
+| 2000-05-05T12:32:00Z | 62.5                       | 27.05                      |                 |
+| 2000-05-05T12:36:00Z | 60.0                       | 28.9                       | 1               |
+| 2000-05-05T12:40:00Z |                            | 21.0                       | 1               |
++----------------------+----------------------------+----------------------------+-----------------+
 -- SQL: SELECT date_bin_gapfill(interval '4 minutes 1 nanosecond', time, timestamp '2000-05-05T12:15:59.999999999') as four_minute, interpolate(min(cpu.idle)), interpolate(min(cpu."user")), count(*) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:44:00Z' group by four_minute;
-+--------------------------------+---------------+---------------+-----------------+
-| four_minute                    | MIN(cpu.idle) | MIN(cpu.user) | COUNT(UInt8(1)) |
-+--------------------------------+---------------+---------------+-----------------+
-| 2000-05-05T12:15:59.999999999Z |               |               |                 |
-| 2000-05-05T12:20:00Z           | 70.0          | 23.2          | 1               |
-| 2000-05-05T12:24:00.000000001Z | 67.5          | 24.2          |                 |
-| 2000-05-05T12:28:00.000000002Z | 65.0          | 25.2          | 1               |
-| 2000-05-05T12:32:00.000000003Z | 62.5          | 23.1          |                 |
-| 2000-05-05T12:36:00.000000004Z | 60.0          | 21.0          | 2               |
-| 2000-05-05T12:40:00.000000005Z |               |               |                 |
-+--------------------------------+---------------+---------------+-----------------+
\ No newline at end of file
++--------------------------------+----------------------------+----------------------------+-----------------+
+| four_minute                    | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(UInt8(1)) |
++--------------------------------+----------------------------+----------------------------+-----------------+
+| 2000-05-05T12:15:59.999999999Z |                            |                            |                 |
+| 2000-05-05T12:20:00Z           | 70.0                       | 23.2                       | 1               |
+| 2000-05-05T12:24:00.000000001Z | 67.5                       | 24.2                       |                 |
+| 2000-05-05T12:28:00.000000002Z | 65.0                       | 25.2                       | 1               |
+| 2000-05-05T12:32:00.000000003Z | 62.5                       | 23.1                       |                 |
+| 2000-05-05T12:36:00.000000004Z | 60.0                       | 21.0                       | 2               |
+| 2000-05-05T12:40:00.000000005Z |                            |                            |                 |
++--------------------------------+----------------------------+----------------------------+-----------------+
+-- SQL: SELECT region, date_bin_gapfill('10 minute', time) as minute, locf(avg(cpu.user)) as locf_avg_user from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by region, minute;
++--------+----------------------+--------------------+
+| region | minute               | locf_avg_user      |
++--------+----------------------+--------------------+
+| a      | 2000-05-05T12:00:00Z |                    |
+| a      | 2000-05-05T12:10:00Z |                    |
+| a      | 2000-05-05T12:20:00Z | 23.2               |
+| a      | 2000-05-05T12:30:00Z | 23.2               |
+| a      | 2000-05-05T12:40:00Z | 21.0               |
+| a      | 2000-05-05T12:50:00Z | 21.0               |
+| b      | 2000-05-05T12:00:00Z |                    |
+| b      | 2000-05-05T12:10:00Z |                    |
+| b      | 2000-05-05T12:20:00Z |                    |
+| b      | 2000-05-05T12:30:00Z | 27.049999999999997 |
+| b      | 2000-05-05T12:40:00Z | 27.049999999999997 |
+| b      | 2000-05-05T12:50:00Z | 27.049999999999997 |
++--------+----------------------+--------------------+
\ No newline at end of file
diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs
index 23d346dcc7..6c7526e5e1 100644
--- a/iox_query/src/logical_optimizer/handle_gapfill.rs
+++ b/iox_query/src/logical_optimizer/handle_gapfill.rs
@@ -14,6 +14,7 @@ use datafusion::{
     optimizer::{optimizer::ApplyOrder, OptimizerConfig, OptimizerRule},
     prelude::{col, Expr},
 };
+use hashbrown::{hash_map, HashMap};
 use query_functions::gapfill::{DATE_BIN_GAPFILL_UDF_NAME, INTERPOLATE_UDF_NAME, LOCF_UDF_NAME};
 use std::{
     collections::HashSet,
@@ -205,9 +206,6 @@ fn build_gapfill_node(
         .collect();
     let aggr_expr = new_group_expr.split_off(aggr.group_expr.len());
 
-    // For now, we can only fill with null values.
-    // In the future, this rule will allow a projection to be pushed into the
-    // GapFill node, e.g., if it contains an item like `LOCF(<col>)`.
     let fill_behavior = aggr_expr
         .iter()
         .cloned()
@@ -365,6 +363,16 @@ fn udf_to_fill_strategy(name: &str) -> Option<FillStrategy> {
     }
 }
 
+fn fill_strategy_to_udf(fs: &FillStrategy) -> Result<&'static str> {
+    match fs {
+        FillStrategy::PrevNullAsMissing => Ok(LOCF_UDF_NAME),
+        FillStrategy::LinearInterpolate => Ok(INTERPOLATE_UDF_NAME),
+        _ => Err(DataFusionError::Internal(format!(
+            "unknown UDF for fill strategy {fs:?}"
+        ))),
+    }
+}
+
 fn handle_projection(proj: &Projection) -> Result<Option<LogicalPlan>> {
     let Projection {
         input,
@@ -381,49 +389,32 @@ fn handle_projection(proj: &Projection) -> Result<Option<LogicalPlan>> {
         return Ok(None)
     };
 
-    let fill_cols: Vec<(&Expr, FillStrategy, &str)> = proj_exprs
+    let mut fill_fn_rewriter = FillFnRewriter {
+        aggr_col_fill_map: HashMap::new(),
+    };
+    let new_proj_exprs = proj_exprs
         .iter()
-        .filter_map(|e| match e {
-            Expr::ScalarUDF { fun, args } => {
-                if let Some(strategy) = udf_to_fill_strategy(&fun.name) {
-                    let col = &args[0];
-                    Some((col, strategy, fun.name.as_str()))
-                } else {
-                    None
-                }
-            }
-            _ => None,
-        })
-        .collect();
-    if fill_cols.is_empty() {
-        // No special gap-filling functions, nothing to do.
+        .map(|e| e.clone().rewrite(&mut fill_fn_rewriter))
+        .collect::<Result<Vec<Expr>>>()?;
+
+    let FillFnRewriter { aggr_col_fill_map } = fill_fn_rewriter;
+    if aggr_col_fill_map.is_empty() {
         return Ok(None);
     }
 
     // Clone the existing GapFill node, then modify it in place
     // to reflect the new fill strategy.
     let mut new_gapfill = child_gapfill.clone();
-    for (e, fs, fn_name) in fill_cols {
-        if new_gapfill.replace_fill_strategy(e, fs).is_none() {
+    for (e, fs) in aggr_col_fill_map {
+        let udf = fill_strategy_to_udf(&fs)?;
+        if new_gapfill.replace_fill_strategy(&e, fs).is_none() {
             // There was a gap filling function called on a non-aggregate column.
             return Err(DataFusionError::Plan(format!(
-                "{fn_name} must be called on an aggregate column in a gap-filling query"
+                "{udf} must be called on an aggregate column in a gap-filling query",
             )));
         }
     }
 
-    // Remove the gap filling functions from the projection.
-    let new_proj_exprs: Vec<Expr> = proj_exprs
-        .iter()
-        .cloned()
-        .map(|e| match e {
-            Expr::ScalarUDF { fun, mut args } if udf_to_fill_strategy(&fun.name).is_some() => {
-                args.remove(0)
-            }
-            _ => e,
-        })
-        .collect();
-
     let new_proj = {
         let mut proj = proj.clone();
         proj.expr = new_proj_exprs;
@@ -437,6 +428,58 @@ fn handle_projection(proj: &Projection) -> Result<Option<LogicalPlan>> {
     Ok(Some(new_proj))
 }
 
+/// Implements `TreeNodeRewriter`:
+/// - Traverses over the expressions in a projection node
+/// - If it finds `locf(col)` or `interpolate(col)`,
+///   it replaces them with `col AS <original name>`
+/// - Collects into [`Self::aggr_col_fill_map`] which correlates
+///   aggregate columns to their [`FillStrategy`].
+struct FillFnRewriter {
+    aggr_col_fill_map: HashMap<Expr, FillStrategy>,
+}
+
+impl TreeNodeRewriter for FillFnRewriter {
+    type N = Expr;
+    fn pre_visit(&mut self, expr: &Expr) -> Result<RewriteRecursion> {
+        match expr {
+            Expr::ScalarUDF { fun, .. } if udf_to_fill_strategy(&fun.name).is_some() => {
+                Ok(RewriteRecursion::Mutate)
+            }
+            _ => Ok(RewriteRecursion::Continue),
+        }
+    }
+
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        let orig_name = expr.display_name()?;
+        match expr {
+            Expr::ScalarUDF { ref fun, .. } if udf_to_fill_strategy(&fun.name).is_none() => {
+                Ok(expr)
+            }
+            Expr::ScalarUDF { fun, mut args } => {
+                let fs = udf_to_fill_strategy(&fun.name).expect("must be a fill fn");
+                let arg = args.remove(0);
+                self.add_fill_strategy(arg.clone(), fs)?;
+                Ok(arg.alias(orig_name))
+            }
+            _ => Ok(expr),
+        }
+    }
+}
+
+impl FillFnRewriter {
+    fn add_fill_strategy(&mut self, e: Expr, fs: FillStrategy) -> Result<()> {
+        match self.aggr_col_fill_map.entry(e) {
+            hash_map::Entry::Occupied(_) => Err(DataFusionError::NotImplemented(
+                "multiple fill strategies for the same column".to_string(),
+            )),
+            hash_map::Entry::Vacant(ve) => {
+                ve.insert(fs);
+                Ok(())
+            }
+        }
+    }
+}
+
 fn count_udf(e: &Expr, name: &str) -> Result<usize> {
     let mut count = 0;
     e.apply(&mut |expr| {
@@ -659,6 +702,35 @@ mod test {
         Ok(())
     }
 
+    #[test]
+    fn different_fill_strategies_one_col() -> Result<()> {
+        let plan = LogicalPlanBuilder::from(table_scan()?)
+            .filter(
+                col("time")
+                    .gt_eq(lit_timestamp_nano(1000))
+                    .and(col("time").lt(lit_timestamp_nano(2000))),
+            )?
+            .aggregate(
+                vec![
+                    col("loc"),
+                    date_bin_gapfill(lit(ScalarValue::IntervalDayTime(Some(60_000))), col("time"))?,
+                ],
+                vec![avg(col("temp")), min(col("temp"))],
+            )?
+            .project(vec![
+                locf(col("loc"))?,
+                col("date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)"),
+                locf(col("AVG(temps.temp)"))?,
+                interpolate(col("AVG(temps.temp)"))?,
+            ])?
+            .build()?;
+        assert_optimizer_err(
+            &plan,
+            "This feature is not implemented: multiple fill strategies for the same column",
+        );
+        Ok(())
+    }
+
     #[test]
     fn nonscalar_origin() -> Result<()> {
         let plan = LogicalPlanBuilder::from(table_scan()?)
@@ -928,7 +1000,7 @@ mod test {
             format_optimized_plan(&plan)?,
             @r###"
         ---
-        - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp), MIN(temps.temp)"
+        - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS locf(AVG(temps.temp)), MIN(temps.temp) AS locf(MIN(temps.temp))"
         - "  GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[LOCF(AVG(temps.temp)), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
         - "    Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
         - "      Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
@@ -937,6 +1009,40 @@ mod test {
         Ok(())
     }
 
+    #[test]
+    fn with_locf_aliased() -> Result<()> {
+        let plan = LogicalPlanBuilder::from(table_scan()?)
+            .filter(
+                col("time")
+                    .gt_eq(lit_timestamp_nano(1000))
+                    .and(col("time").lt(lit_timestamp_nano(2000))),
+            )?
+            .aggregate(
+                vec![date_bin_gapfill(
+                    lit(ScalarValue::IntervalDayTime(Some(60_000))),
+                    col("time"),
+                )?],
+                vec![avg(col("temp")), min(col("temp"))],
+            )?
+            .project(vec![
+                col("date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)"),
+                locf(col("MIN(temps.temp)"))?.alias("locf_min_temp"),
+            ])?
+            .build()?;
+
+        insta::assert_yaml_snapshot!(
+            format_optimized_plan(&plan)?,
+            @r###"
+        ---
+        - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), MIN(temps.temp) AS locf(MIN(temps.temp)) AS locf_min_temp"
+        - "  GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
+        - "    Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
+        - "      Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
+        - "        TableScan: temps"
+        "###);
+        Ok(())
+    }
+
     #[test]
     fn with_interpolate() -> Result<()> {
         let plan = LogicalPlanBuilder::from(table_scan()?)
@@ -963,7 +1069,7 @@ mod test {
             format_optimized_plan(&plan)?,
             @r###"
         ---
-        - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp), MIN(temps.temp)"
+        - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS interpolate(AVG(temps.temp)), MIN(temps.temp) AS interpolate(MIN(temps.temp))"
         - "  GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[INTERPOLATE(AVG(temps.temp)), INTERPOLATE(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
         - "    Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
         - "      Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"

From abe5d26d2f52d660c9a1ea17923ea5e9172fa5be Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Wed, 3 May 2023 18:40:00 +0200
Subject: [PATCH 012/119] chore: update DataFusion to
 `2787e7a36a6be83d91201df20827d3695f933300` (#7732)

Required to get:

- https://github.com/apache/arrow-datafusion/pull/6199
---
 Cargo.lock                | 18 +++++++++---------
 Cargo.toml                |  4 ++--
 workspace-hack/Cargo.toml |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1b2685fc3a..dd26a90177 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1432,7 +1432,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1481,7 +1481,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1495,7 +1495,7 @@ dependencies = [
 [[package]]
 name = "datafusion-execution"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "dashmap",
  "datafusion-common",
@@ -1512,7 +1512,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1523,7 +1523,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1540,7 +1540,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1572,7 +1572,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "arrow",
  "chrono",
@@ -1586,7 +1586,7 @@ dependencies = [
 [[package]]
 name = "datafusion-row"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1597,7 +1597,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
 dependencies = [
  "arrow",
  "arrow-schema",
diff --git a/Cargo.toml b/Cargo.toml
index 31910e771a..ea50bde0f2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -115,8 +115,8 @@ license = "MIT OR Apache-2.0"
 [workspace.dependencies]
 arrow = { version = "38.0.0" }
 arrow-flight = { version = "38.0.0" }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="beee1d91303d5eff220fadd08b2c28404c2b3e5a", default-features = false }
-datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="beee1d91303d5eff220fadd08b2c28404c2b3e5a" }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="2787e7a36a6be83d91201df20827d3695f933300", default-features = false }
+datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="2787e7a36a6be83d91201df20827d3695f933300" }
 hashbrown = { version = "0.13.2" }
 parquet = { version = "38.0.0" }
 tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] }
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 2531f168b1..763595b17e 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -30,9 +30,9 @@ bytes = { version = "1" }
 chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
 crossbeam-utils = { version = "0.8" }
 crypto-common = { version = "0.1", default-features = false, features = ["std"] }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "beee1d91303d5eff220fadd08b2c28404c2b3e5a" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "beee1d91303d5eff220fadd08b2c28404c2b3e5a", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "beee1d91303d5eff220fadd08b2c28404c2b3e5a", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300" }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
 digest = { version = "0.10", features = ["mac", "std"] }
 either = { version = "1" }
 fixedbitset = { version = "0.4" }

From 6de18b6544978c287ac76156b40bc9a805df0b56 Mon Sep 17 00:00:00 2001
From: Joe-Blount <73478756+Joe-Blount@users.noreply.github.com>
Date: Wed, 3 May 2023 15:09:00 -0500
Subject: [PATCH 013/119] chore: conditionally parse shard_id from HOSTNAME
 (#7733)

* chore: conditionally parse shard_id from HOSTNAME

* chore: remove HOSTNAME env from test case relying on it not being there.
---
 clap_blocks/src/compactor2.rs                 |  8 +++++++
 compactor2/src/components/hardcoded.rs        |  5 ++++
 influxdb_iox/src/commands/run/all_in_one.rs   |  1 +
 .../tests/end_to_end_cases/compactor.rs       | 15 ++++++++++++
 ioxd_compactor2/src/lib.rs                    | 24 +++++++++++++++++--
 5 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/clap_blocks/src/compactor2.rs b/clap_blocks/src/compactor2.rs
index 64584a802f..6b6cf5d64f 100644
--- a/clap_blocks/src/compactor2.rs
+++ b/clap_blocks/src/compactor2.rs
@@ -235,6 +235,7 @@ pub struct Compactor2Config {
     /// Number of shards.
     ///
     /// If this is set then the shard ID MUST also be set. If both are not provided, sharding is disabled.
+    /// (shard ID can be provided by the host name)
     #[clap(
         long = "compaction-shard-count",
         env = "INFLUXDB_IOX_COMPACTION_SHARD_COUNT",
@@ -254,6 +255,13 @@ pub struct Compactor2Config {
     )]
     pub shard_id: Option<usize>,
 
+    /// Host Name
+    ///
+    /// comprised of leading text (e.g. 'iox-shared-compactor-'), ending with shard_id (e.g. '0').
+    /// When shard_count is specified, but shard_id is not specified, the id is extracted from hostname.
+    #[clap(long = "hostname", env = "HOSTNAME", action)]
+    pub hostname: Option<String>,
+
     /// Minimum number of L1 files to compact to L2.
     ///
     /// If there are more than this many L1 (by definition non
diff --git a/compactor2/src/components/hardcoded.rs b/compactor2/src/components/hardcoded.rs
index 4a6c04f17b..491e1ad7a0 100644
--- a/compactor2/src/components/hardcoded.rs
+++ b/compactor2/src/components/hardcoded.rs
@@ -6,6 +6,7 @@ use std::{sync::Arc, time::Duration};
 
 use data_types::CompactionLevel;
 use object_store::memory::InMemory;
+use observability_deps::tracing::info;
 
 use crate::{
     config::{CompactionType, Config, PartitionsSourceConfig},
@@ -156,6 +157,10 @@ fn make_partitions_source_commit_partition_sink(
     let mut id_only_partition_filters: Vec<Arc<dyn IdOnlyPartitionFilter>> = vec![];
     if let Some(shard_config) = &config.shard_config {
         // add shard filter before performing any catalog IO
+        info!(
+            "starting compactor {} of {}",
+            shard_config.shard_id, shard_config.n_shards
+        );
         id_only_partition_filters.push(Arc::new(ShardPartitionFilter::new(
             shard_config.n_shards,
             shard_config.shard_id,
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index ae136ee291..ca519aecf5 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -505,6 +505,7 @@ impl Config {
             ignore_partition_skip_marker: false,
             shard_count: None,
             shard_id: None,
+            hostname: None,
             min_num_l1_files_to_compact: 1,
             process_once: false,
             process_all_partitions: false,
diff --git a/influxdb_iox/tests/end_to_end_cases/compactor.rs b/influxdb_iox/tests/end_to_end_cases/compactor.rs
index d9490a11d4..7744c011ad 100644
--- a/influxdb_iox/tests/end_to_end_cases/compactor.rs
+++ b/influxdb_iox/tests/end_to_end_cases/compactor.rs
@@ -66,6 +66,7 @@ fn num_shards_without_shard_id_is_invalid() {
         .arg("compactor2")
         .env("INFLUXDB_IOX_COMPACTION_SHARD_COUNT", "1") // only provide shard count
         .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
+        .env_remove("HOSTNAME")
         .assert()
         .failure()
         .stderr(predicate::str::contains(
@@ -73,6 +74,20 @@ fn num_shards_without_shard_id_is_invalid() {
         ));
 }
 
+#[test]
+fn num_shards_with_hostname_is_valid() {
+    Command::cargo_bin("influxdb_iox")
+        .unwrap()
+        .arg("run")
+        .arg("compactor2")
+        .env("INFLUXDB_IOX_COMPACTION_SHARD_COUNT", "3") // provide shard count
+        .env("HOSTNAME", "iox-shared-compactor-8") // provide shard id via hostname
+        .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("shard_id out of range"));
+}
+
 #[tokio::test]
 async fn sharded_compactor_0_always_compacts_partition_1() {
     test_helpers::maybe_start_logging();
diff --git a/ioxd_compactor2/src/lib.rs b/ioxd_compactor2/src/lib.rs
index 5f4901cc1d..397e4aa24e 100644
--- a/ioxd_compactor2/src/lib.rs
+++ b/ioxd_compactor2/src/lib.rs
@@ -143,11 +143,31 @@ pub async fn create_compactor2_server_type(
 ) -> Arc<dyn ServerType> {
     let backoff_config = BackoffConfig::default();
 
+    // if shard_count is specified, shard_id must be provided also.
+    // shard_id may be specified explicitly or extracted from the host name.
+    let mut shard_id = compactor_config.shard_id;
+    if shard_id.is_none()
+        && compactor_config.shard_count.is_some()
+        && compactor_config.hostname.is_some()
+    {
+        let parsed_id = compactor_config
+            .hostname
+            .unwrap()
+            .chars()
+            .skip_while(|ch| !ch.is_ascii_digit())
+            .take_while(|ch| ch.is_ascii_digit())
+            .fold(None, |acc, ch| {
+                ch.to_digit(10).map(|b| acc.unwrap_or(0) * 10 + b)
+            });
+        if parsed_id.is_some() {
+            shard_id = Some(parsed_id.unwrap() as usize);
+        }
+    }
     assert!(
-        compactor_config.shard_id.is_some() == compactor_config.shard_count.is_some(),
+        shard_id.is_some() == compactor_config.shard_count.is_some(),
         "must provide or not provide shard ID and count"
     );
-    let shard_config = compactor_config.shard_id.map(|shard_id| ShardConfig {
+    let shard_config = shard_id.map(|shard_id| ShardConfig {
         shard_id,
         n_shards: compactor_config.shard_count.expect("just checked"),
     });

From 2d601bf21159d6201c5cc3bb7d338ebeb2be4c35 Mon Sep 17 00:00:00 2001
From: Nga Tran <nga-tran@live.com>
Date: Wed, 3 May 2023 18:29:26 -0400
Subject: [PATCH 014/119] test: num files to read exceed the max_parquet_fanout
 and all sorted files are resorted (#7737)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 influxdb_iox/tests/query_tests2/cases.rs      | 12 +++++++++
 .../cases/in/duplicates_parquet_50_files.sql  | 14 +++++++++++
 .../duplicates_parquet_50_files.sql.expected  | 25 +++++++++++++++++++
 influxdb_iox/tests/query_tests2/setups.rs     | 22 ++++++++++++++++
 4 files changed, 73 insertions(+)
 create mode 100644 influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql
 create mode 100644 influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected

diff --git a/influxdb_iox/tests/query_tests2/cases.rs b/influxdb_iox/tests/query_tests2/cases.rs
index 7113ae1eca..6c8d803a6f 100644
--- a/influxdb_iox/tests/query_tests2/cases.rs
+++ b/influxdb_iox/tests/query_tests2/cases.rs
@@ -121,6 +121,18 @@ async fn duplicates_parquet_many() {
     .await;
 }
 
+#[tokio::test]
+async fn duplicates_parquet_50() {
+    test_helpers::maybe_start_logging();
+
+    TestCase {
+        input: "cases/in/duplicates_parquet_50_files.sql",
+        chunk_stage: ChunkStage::Parquet,
+    }
+    .run()
+    .await;
+}
+
 #[tokio::test]
 async fn gapfill() {
     test_helpers::maybe_start_logging();
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql
new file mode 100644
index 0000000000..704756cd9d
--- /dev/null
+++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql
@@ -0,0 +1,14 @@
+-- Test setup for running with 50 parquet files
+-- IOX_SETUP: FiftySortedSameParquetFiles
+
+
+-- each parquet file has either 2 rows, one with f=1 and the other with f=2
+-- and then there are 50 that have a single row with f=3
+select count(1), sum(f1) from m;
+
+-- All 50 files are sorted but since it is larger than max_parquet_fanout which is set 40,
+-- we do not use the presort and add a SortExec
+-- WHen running this test, a warning "cannot use pre-sorted parquet files, fan-out too wide" is printed
+-- IOX_COMPARE: uuid
+EXPLAIN select count(1), sum(f1) from m;
+
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected
new file mode 100644
index 0000000000..0e397532e6
--- /dev/null
+++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected
@@ -0,0 +1,25 @@
+-- Test Setup: FiftySortedSameParquetFiles
+-- SQL: select count(1), sum(f1) from m;
++-----------------+-----------+
+| COUNT(Int64(1)) | SUM(m.f1) |
++-----------------+-----------+
+| 1               | 1.0       |
++-----------------+-----------+
+-- SQL: EXPLAIN select count(1), sum(f1) from m;
+-- Results After Normalizing UUIDs
+----------
+| plan_type    | plan    |
+----------
+| logical_plan    | Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1)), SUM(m.f1)]]    |
+|    |   TableScan: m projection=[f1]    |
+| physical_plan    | AggregateExec: mode=Final, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)]    |
+|    |   CoalescePartitionsExec    |
+|    |     AggregateExec: mode=Partial, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)]    |
+|    |       RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1    |
+|    |         ProjectionExec: expr=[f1@1 as f1]    |
+|    |           DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC]    |
+|    |             SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC]    |
+|    |               SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC]    |
+|    |                 ParquetExec: limit=None, partitions={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time]    |
+|    |    |
+----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/setups.rs b/influxdb_iox/tests/query_tests2/setups.rs
index 74d5b2df59..c87a275942 100644
--- a/influxdb_iox/tests/query_tests2/setups.rs
+++ b/influxdb_iox/tests/query_tests2/setups.rs
@@ -269,6 +269,28 @@ pub static SETUPS: Lazy<HashMap<SetupName, SetupSteps>> = Lazy::new(|| {
                 })
                 .collect::<Vec<_>>(),
         ),
+        (
+            "FiftySortedSameParquetFiles",
+            (0..50)
+                .flat_map(|_i| {
+
+                    let write = Step::WriteLineProtocol(
+                        "m,tag1=A,tag2=B,tag3=C,tag4=D f1=1,f2=2 2001".into(), // duplicated across all chunks
+                    );
+
+                    [
+                        Step::RecordNumParquetFiles,
+                        write,
+                        Step::Persist,
+                        Step::WaitForPersisted2 {
+                            expected_increase: 1,
+                        },
+                    ]
+                    .into_iter()
+                })
+                .collect::<Vec<_>>(),
+        ),
+
         (
             "OneMeasurementManyFields",
             vec![

From ccacd7e78eb54353bf213e32e282c962b2a66294 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Thu, 4 May 2023 18:25:19 +1000
Subject: [PATCH 015/119] chore: Fix doc

---
 influxdb_influxql_parser/src/select.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/influxdb_influxql_parser/src/select.rs b/influxdb_influxql_parser/src/select.rs
index 0a138e1b89..0ef658ba57 100644
--- a/influxdb_influxql_parser/src/select.rs
+++ b/influxdb_influxql_parser/src/select.rs
@@ -44,7 +44,7 @@ pub struct SelectStatement {
     /// Expressions used for grouping the selection.
     pub group_by: Option<GroupByClause>,
 
-    /// The [fill clause] specifies the fill behaviour for the selection. If the value is [`None`],
+    /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`],
     /// it is the same behavior as `fill(none)`.
     ///
     /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill

From b47e0efc85888d33afced07d8eafdfe838217222 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Thu, 4 May 2023 18:27:32 +1000
Subject: [PATCH 016/119] feat: Step 1 on N for an intermediate representation
 of `SELECT`

The end goal is that each `Select` node will contain a schema to be
referenced directly by the InfluxQL planner. Additionally, further
refinement of the field data types used by the `Select` node
are expected, to remove ambiguity from the planner.
---
 .../src/plan/expr_type_evaluator.rs           | 257 ++++++++++++------
 iox_query_influxql/src/plan/field.rs          |  27 +-
 iox_query_influxql/src/plan/ir.rs             |  92 +++++++
 iox_query_influxql/src/plan/mod.rs            |   1 +
 iox_query_influxql/src/plan/rewriter.rs       | 199 +++++---------
 5 files changed, 362 insertions(+), 214 deletions(-)
 create mode 100644 iox_query_influxql/src/plan/ir.rs

diff --git a/iox_query_influxql/src/plan/expr_type_evaluator.rs b/iox_query_influxql/src/plan/expr_type_evaluator.rs
index ae87158002..b563ef97b1 100644
--- a/iox_query_influxql/src/plan/expr_type_evaluator.rs
+++ b/iox_query_influxql/src/plan/expr_type_evaluator.rs
@@ -1,33 +1,33 @@
 use crate::plan::field::field_by_name;
 use crate::plan::field_mapper::map_type;
+use crate::plan::ir::TableReference;
 use crate::plan::{error, SchemaProvider};
 use datafusion::common::Result;
-use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName};
 use influxdb_influxql_parser::expression::{
     Binary, BinaryOperator, Call, Expr, VarRef, VarRefDataType,
 };
 use influxdb_influxql_parser::literal::Literal;
-use influxdb_influxql_parser::select::{Dimension, FromMeasurementClause, MeasurementSelection};
+use influxdb_influxql_parser::select::Dimension;
 use itertools::Itertools;
 
 /// Evaluate the type of the specified expression.
 ///
 /// Derived from [Go implementation](https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4796-L4797).
-pub(crate) fn evaluate_type(
+pub(super) fn evaluate_type(
     s: &dyn SchemaProvider,
     expr: &Expr,
-    from: &FromMeasurementClause,
+    from: &[TableReference],
 ) -> Result<Option<VarRefDataType>> {
     TypeEvaluator::new(from, s).eval_type(expr)
 }
 
 struct TypeEvaluator<'a> {
     s: &'a dyn SchemaProvider,
-    from: &'a FromMeasurementClause,
+    from: &'a [TableReference],
 }
 
 impl<'a> TypeEvaluator<'a> {
-    fn new(from: &'a FromMeasurementClause, s: &'a dyn SchemaProvider) -> Self {
+    fn new(from: &'a [TableReference], s: &'a dyn SchemaProvider) -> Self {
         Self { from, s }
     }
 
@@ -98,14 +98,11 @@ impl<'a> TypeEvaluator<'a> {
             }
             _ => {
                 let mut data_type: Option<VarRefDataType> = None;
-                for ms in self.from.iter() {
-                    match ms {
-                        MeasurementSelection::Name(QualifiedMeasurementName {
-                            name: MeasurementName::Name(ident),
-                            ..
-                        }) => match (
+                for tr in self.from.iter() {
+                    match tr {
+                        TableReference::Name(name) => match (
                             data_type,
-                            map_type(self.s, ident.as_str(), expr.name.as_str())?,
+                            map_type(self.s, name.as_str(), expr.name.as_str())?,
                         ) {
                             (Some(existing), Some(res)) => {
                                 if res < existing {
@@ -115,9 +112,9 @@ impl<'a> TypeEvaluator<'a> {
                             (None, Some(res)) => data_type = Some(res),
                             _ => continue,
                         },
-                        MeasurementSelection::Subquery(select) => {
+                        TableReference::Subquery(select) => {
                             // find the field by name
-                            if let Some(field) = field_by_name(select, expr.name.as_str()) {
+                            if let Some(field) = field_by_name(&select.fields, expr.name.as_str()) {
                                 match (data_type, evaluate_type(self.s, &field.expr, &select.from)?)
                                 {
                                     (Some(existing), Some(res)) => {
@@ -140,9 +137,6 @@ impl<'a> TypeEvaluator<'a> {
                                 }
                             }
                         }
-                        _ => {
-                            return error::internal("eval_var_ref: Unexpected MeasurementSelection")
-                        }
                     }
                 }
 
@@ -252,6 +246,7 @@ fn binary_data_type(
 #[cfg(test)]
 mod test {
     use crate::plan::expr_type_evaluator::{binary_data_type, evaluate_type};
+    use crate::plan::rewriter::map_select;
     use crate::plan::test_utils::{parse_select, MockSchemaProvider};
     use assert_matches::assert_matches;
     use datafusion::common::DataFusionError;
@@ -314,51 +309,72 @@ mod test {
     fn test_evaluate_type() {
         let namespace = MockSchemaProvider::default();
 
-        let stmt = parse_select("SELECT shared_field0 FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT shared_field0 FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt = parse_select("SELECT shared_tag0 FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt =
+            map_select(&namespace, &parse_select("SELECT shared_tag0 FROM temp_01")).unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Tag);
 
         // Unknown
-        let stmt = parse_select("SELECT not_exists FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(&namespace, &parse_select("SELECT not_exists FROM temp_01")).unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from).unwrap();
         assert!(res.is_none());
 
-        let stmt = parse_select("SELECT shared_field0 FROM temp_02");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT shared_field0 FROM temp_02"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
-        let stmt = parse_select("SELECT shared_field0 FROM temp_02");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT shared_field0 FROM temp_02"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
         // Same field across multiple measurements resolves to the highest precedence (float)
-        let stmt = parse_select("SELECT shared_field0 FROM temp_01, temp_02");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT shared_field0 FROM temp_01, temp_02"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
         // Explicit cast of integer field to float
-        let stmt = parse_select("SELECT SUM(field_i64::float) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT SUM(field_i64::float) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
@@ -368,15 +384,23 @@ mod test {
         // Binary expressions
         //
 
-        let stmt = parse_select("SELECT field_f64 + field_i64 FROM all_types");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT field_f64 + field_i64 FROM all_types"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt = parse_select("SELECT field_bool | field_bool FROM all_types");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT field_bool | field_bool FROM all_types"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
@@ -385,98 +409,154 @@ mod test {
         // Fallible
 
         // Verify incompatible operators and operator error
-        let stmt = parse_select("SELECT field_f64 & field_i64 FROM all_types");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT field_f64 & field_i64 FROM all_types"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from);
         assert_matches!(res, Err(DataFusionError::Plan(ref s)) if s == "incompatible operands for operator &: float and integer");
 
         // data types for functions
-        let stmt = parse_select("SELECT SUM(field_f64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT SUM(field_f64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt = parse_select("SELECT SUM(field_i64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT SUM(field_i64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
-        let stmt = parse_select("SELECT SUM(field_u64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT SUM(field_u64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Unsigned);
 
-        let stmt = parse_select("SELECT MIN(field_f64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT MIN(field_f64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt = parse_select("SELECT MAX(field_i64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT MAX(field_i64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
-        let stmt = parse_select("SELECT FIRST(field_str) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT FIRST(field_str) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::String);
 
-        let stmt = parse_select("SELECT LAST(field_str) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT LAST(field_str) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::String);
 
-        let stmt = parse_select("SELECT MEAN(field_i64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT MEAN(field_i64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt = parse_select("SELECT MEAN(field_u64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT MEAN(field_u64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT COUNT(field_f64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
-        let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT COUNT(field_i64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
-        let stmt = parse_select("SELECT COUNT(field_u64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT COUNT(field_u64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
-        let stmt = parse_select("SELECT COUNT(field_str) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT COUNT(field_str) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
@@ -501,8 +581,12 @@ mod test {
             "holt_winters",
             "holt_winters_with_fit",
         ] {
-            let stmt = parse_select(&format!("SELECT {name}(field_i64) FROM temp_01"));
-            let field = stmt.fields.head().unwrap();
+            let stmt = map_select(
+                &namespace,
+                &parse_select(&format!("SELECT {name}(field_i64) FROM temp_01")),
+            )
+            .unwrap();
+            let field = stmt.fields.first().unwrap();
             let res = evaluate_type(&namespace, &field.expr, &stmt.from)
                 .unwrap()
                 .unwrap();
@@ -510,16 +594,24 @@ mod test {
         }
 
         // Integer functions
-        let stmt = parse_select("SELECT elapsed(field_i64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT elapsed(field_i64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Integer);
 
         // Invalid function
-        let stmt = parse_select("SELECT not_valid(field_i64) FROM temp_01");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT not_valid(field_i64) FROM temp_01"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .is_none();
@@ -527,25 +619,34 @@ mod test {
 
         // subqueries
 
-        let stmt = parse_select("SELECT inner FROM (SELECT field_f64 as inner FROM temp_01)");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select("SELECT inner FROM (SELECT field_f64 as inner FROM temp_01)"),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt =
-            parse_select("SELECT inner FROM (SELECT shared_tag0, field_f64 as inner FROM temp_01)");
-        let field = stmt.fields.head().unwrap();
+        let stmt = map_select(
+            &namespace,
+            &parse_select(
+                "SELECT inner FROM (SELECT shared_tag0, field_f64 as inner FROM temp_01)",
+            ),
+        )
+        .unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
         assert_matches!(res, VarRefDataType::Float);
 
-        let stmt = parse_select(
+        let stmt = map_select(&namespace, &parse_select(
             "SELECT shared_tag0, inner FROM (SELECT shared_tag0, field_f64 as inner FROM temp_01)",
-        );
-        let field = stmt.fields.head().unwrap();
+        )).unwrap();
+        let field = stmt.fields.first().unwrap();
         let res = evaluate_type(&namespace, &field.expr, &stmt.from)
             .unwrap()
             .unwrap();
diff --git a/iox_query_influxql/src/plan/field.rs b/iox_query_influxql/src/plan/field.rs
index 238739be4c..c7cad73ccc 100644
--- a/iox_query_influxql/src/plan/field.rs
+++ b/iox_query_influxql/src/plan/field.rs
@@ -1,5 +1,5 @@
 use influxdb_influxql_parser::expression::{Call, Expr, VarRef};
-use influxdb_influxql_parser::select::{Field, SelectStatement};
+use influxdb_influxql_parser::select::Field;
 use influxdb_influxql_parser::visit::{Recursion, Visitable, Visitor};
 use std::ops::Deref;
 
@@ -40,8 +40,8 @@ pub(crate) fn field_name(f: &Field) -> String {
 /// This implementation duplicates the behavior of the original implementation, including skipping the
 /// first argument. It is likely the original intended to skip the _last_ argument, which is the number
 /// of rows.
-pub(crate) fn field_by_name<'a>(select: &'a SelectStatement, name: &str) -> Option<&'a Field> {
-    select.fields
+pub(crate) fn field_by_name<'a>(fields: &'a [Field], name: &str) -> Option<&'a Field> {
+    fields
         .iter()
         .find(|f| {
             field_name(f) == name || match &f.expr {
@@ -128,47 +128,50 @@ mod test {
     fn test_field_by_name() {
         let stmt = parse_select("SELECT usage, idle FROM cpu");
         assert_eq!(
-            format!("{}", field_by_name(&stmt, "usage").unwrap()),
+            format!("{}", field_by_name(&stmt.fields, "usage").unwrap()),
             "usage"
         );
 
         let stmt = parse_select("SELECT usage as foo, usage FROM cpu");
         assert_eq!(
-            format!("{}", field_by_name(&stmt, "foo").unwrap()),
+            format!("{}", field_by_name(&stmt.fields, "foo").unwrap()),
             "usage AS foo"
         );
 
         let stmt = parse_select("SELECT top(idle, usage, 5), usage FROM cpu");
         assert_eq!(
-            format!("{}", field_by_name(&stmt, "usage").unwrap()),
+            format!("{}", field_by_name(&stmt.fields, "usage").unwrap()),
             "top(idle, usage, 5)"
         );
 
         let stmt = parse_select("SELECT bottom(idle, usage, 5), usage FROM cpu");
         assert_eq!(
-            format!("{}", field_by_name(&stmt, "usage").unwrap()),
+            format!("{}", field_by_name(&stmt.fields, "usage").unwrap()),
             "bottom(idle, usage, 5)"
         );
 
         let stmt = parse_select("SELECT top(idle, usage, 5) as foo, usage FROM cpu");
         assert_eq!(
-            format!("{}", field_by_name(&stmt, "usage").unwrap()),
+            format!("{}", field_by_name(&stmt.fields, "usage").unwrap()),
             "top(idle, usage, 5) AS foo"
         );
         assert_eq!(
-            format!("{}", field_by_name(&stmt, "foo").unwrap()),
+            format!("{}", field_by_name(&stmt.fields, "foo").unwrap()),
             "top(idle, usage, 5) AS foo"
         );
 
         // Not exists
 
         let stmt = parse_select("SELECT usage, idle FROM cpu");
-        assert_matches!(field_by_name(&stmt, "bar"), None);
+        assert_matches!(field_by_name(&stmt.fields, "bar"), None);
 
         // Does not match name by first argument to top or bottom, per
         // bug in original implementation.
         let stmt = parse_select("SELECT top(foo, usage, 5), idle FROM cpu");
-        assert_matches!(field_by_name(&stmt, "foo"), None);
-        assert_eq!(format!("{}", field_by_name(&stmt, "idle").unwrap()), "idle");
+        assert_matches!(field_by_name(&stmt.fields, "foo"), None);
+        assert_eq!(
+            format!("{}", field_by_name(&stmt.fields, "idle").unwrap()),
+            "idle"
+        );
     }
 }
diff --git a/iox_query_influxql/src/plan/ir.rs b/iox_query_influxql/src/plan/ir.rs
new file mode 100644
index 0000000000..11547c8342
--- /dev/null
+++ b/iox_query_influxql/src/plan/ir.rs
@@ -0,0 +1,92 @@
+//! Defines data structures which represent an InfluxQL
+//! statement after it has been processed
+
+use influxdb_influxql_parser::common::{
+    LimitClause, MeasurementName, OffsetClause, OrderByClause, QualifiedMeasurementName,
+    WhereClause,
+};
+use influxdb_influxql_parser::expression::ConditionalExpression;
+use influxdb_influxql_parser::select::{
+    Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection,
+    SelectStatement, TimeZoneClause,
+};
+
+#[derive(Debug, Default, Clone)]
+pub(super) struct Select {
+    /// The schema of the selection.
+    // pub(super) schema: Todo,
+
+    /// Projection clause of the selection.
+    pub(super) fields: Vec<Field>,
+
+    /// A list of tables or subqueries used as the source data for the selection.
+    pub(super) from: Vec<TableReference>,
+
+    /// A conditional expression to filter the selection.
+    pub(super) condition: Option<ConditionalExpression>,
+
+    /// The GROUP BY clause of the selection.
+    pub(super) group_by: Option<GroupByClause>,
+
+    /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`],
+    /// it is the same behavior as `fill(none)`.
+    ///
+    /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill
+    pub(super) fill: Option<FillClause>,
+
+    /// Configures the ordering of the selection by time.
+    pub(super) order_by: Option<OrderByClause>,
+
+    /// A value to restrict the number of rows returned.
+    pub(super) limit: Option<u64>,
+
+    /// A value to specify an offset to start retrieving rows.
+    pub(super) offset: Option<u64>,
+
+    /// The timezone for the query, specified as [`tz('<time zone>')`][time_zone_clause].
+    ///
+    /// [time_zone_clause]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#the-time-zone-clause
+    pub(super) timezone: Option<chrono_tz::Tz>,
+}
+
+impl From<Select> for SelectStatement {
+    fn from(value: Select) -> Self {
+        Self {
+            fields: FieldList::new(value.fields),
+            from: FromMeasurementClause::new(
+                value
+                    .from
+                    .into_iter()
+                    .map(|tr| match tr {
+                        TableReference::Name(name) => {
+                            MeasurementSelection::Name(QualifiedMeasurementName {
+                                database: None,
+                                retention_policy: None,
+                                name: MeasurementName::Name(name.as_str().into()),
+                            })
+                        }
+                        TableReference::Subquery(q) => {
+                            MeasurementSelection::Subquery(Box::new((*q).into()))
+                        }
+                    })
+                    .collect(),
+            ),
+            condition: value.condition.map(WhereClause::new),
+            group_by: value.group_by,
+            fill: value.fill,
+            order_by: value.order_by,
+            limit: value.limit.map(LimitClause::new),
+            offset: value.offset.map(OffsetClause::new),
+            series_limit: None,
+            series_offset: None,
+            timezone: value.timezone.map(TimeZoneClause::new),
+        }
+    }
+}
+
+/// Represents a concrete reference to a table in a [`Select`] from clause.
+#[derive(Debug, Clone)]
+pub(super) enum TableReference {
+    Name(String),
+    Subquery(Box<Select>),
+}
diff --git a/iox_query_influxql/src/plan/mod.rs b/iox_query_influxql/src/plan/mod.rs
index 86097185b7..a810812d2d 100644
--- a/iox_query_influxql/src/plan/mod.rs
+++ b/iox_query_influxql/src/plan/mod.rs
@@ -2,6 +2,7 @@ mod error;
 mod expr_type_evaluator;
 mod field;
 mod field_mapper;
+mod ir;
 mod planner;
 mod planner_rewrite_expression;
 mod planner_time_range_expression;
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 9042cc04e9..83667609df 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1,6 +1,7 @@
 use crate::plan::expr_type_evaluator::evaluate_type;
 use crate::plan::field::{field_by_name, field_name};
 use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet};
+use crate::plan::ir::{Select, TableReference};
 use crate::plan::{error, util, SchemaProvider};
 use datafusion::common::{DataFusionError, Result};
 use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName};
@@ -12,8 +13,7 @@ use influxdb_influxql_parser::functions::is_scalar_math_function;
 use influxdb_influxql_parser::identifier::Identifier;
 use influxdb_influxql_parser::literal::Literal;
 use influxdb_influxql_parser::select::{
-    Dimension, Field, FieldList, FromMeasurementClause, GroupByClause, MeasurementSelection,
-    SelectStatement,
+    Dimension, Field, FromMeasurementClause, GroupByClause, MeasurementSelection, SelectStatement,
 };
 use itertools::Itertools;
 use std::borrow::Borrow;
@@ -22,28 +22,49 @@ use std::ops::{ControlFlow, Deref};
 
 /// Recursively rewrite the specified [`SelectStatement`] by performing a series of passes
 /// to validate and normalize the statement.
-pub(crate) fn rewrite_statement(
+pub(super) fn rewrite_statement(
     s: &dyn SchemaProvider,
     q: &SelectStatement,
 ) -> Result<SelectStatement> {
-    let mut stmt = q.clone();
-    from_expand_wildcards(s, &mut stmt)?;
-    field_list_expand_wildcards(s, &mut stmt)?;
+    let mut stmt = map_select(s, q)?;
     from_drop_empty(s, &mut stmt);
     field_list_normalize_time(&mut stmt);
     field_list_rewrite_aliases(&mut stmt.fields)?;
 
-    Ok(stmt)
+    Ok(stmt.into())
+}
+
+/// Map a `SelectStatement` to a `Select`, which is an intermediate representation to be
+/// used by the InfluxQL planner.
+///
+/// # NOTE
+///
+/// The goal is that `Select` will eventually be used by the InfluxQL planner.
+pub(super) fn map_select(s: &dyn SchemaProvider, stmt: &SelectStatement) -> Result<Select> {
+    let mut sel = Select {
+        fields: vec![],
+        from: vec![],
+        condition: stmt.condition.as_ref().map(|v| (**v).clone()),
+        group_by: stmt.group_by.clone(),
+        fill: stmt.fill,
+        order_by: stmt.order_by,
+        limit: stmt.limit.map(|v| *v),
+        offset: stmt.offset.map(|v| *v),
+        timezone: stmt.timezone.map(|v| *v),
+    };
+    from_expand_wildcards(s, stmt, &mut sel)?;
+    field_list_expand_wildcards(s, stmt, &mut sel)?;
+
+    Ok(sel)
 }
 
 /// Ensure the time field is added to all projections,
 /// and is moved to the first position, which is a requirement
 /// for InfluxQL compatibility.
-fn field_list_normalize_time(stmt: &mut SelectStatement) {
-    fn normalize_time(stmt: &mut SelectStatement, is_subquery: bool) {
-        let mut fields = stmt.fields.take();
-
-        if let Some(f) = match fields
+fn field_list_normalize_time(stmt: &mut Select) {
+    fn normalize_time(stmt: &mut Select, is_subquery: bool) {
+        if let Some(f) = match stmt
+            .fields
             .iter()
             .find_position(
                 |f| matches!(&f.expr, Expr::VarRef(VarRef { name, .. }) if name.deref() == "time"),
@@ -51,16 +72,16 @@ fn field_list_normalize_time(stmt: &mut SelectStatement) {
             .map(|(i, _)| i)
         {
             Some(0) => None,
-            Some(idx) => Some(fields.remove(idx)),
+            Some(idx) => Some(stmt.fields.remove(idx)),
             None => Some(Field {
                 expr: "time".to_var_ref_expr(),
                 alias: None,
             }),
         } {
-            fields.insert(0, f)
+            stmt.fields.insert(0, f)
         }
 
-        let f = &mut fields[0];
+        let f = &mut stmt.fields[0];
 
         // time aliases in subqueries is ignored
         if f.alias.is_none() || is_subquery {
@@ -73,14 +94,12 @@ fn field_list_normalize_time(stmt: &mut SelectStatement) {
         {
             *data_type = Some(VarRefDataType::Timestamp);
         }
-
-        stmt.fields.replace(fields);
     }
 
     normalize_time(stmt, false);
 
     for stmt in stmt.from.iter_mut().filter_map(|ms| match ms {
-        MeasurementSelection::Subquery(stmt) => Some(stmt),
+        TableReference::Subquery(stmt) => Some(stmt),
         _ => None,
     }) {
         normalize_time(stmt, true)
@@ -88,7 +107,11 @@ fn field_list_normalize_time(stmt: &mut SelectStatement) {
 }
 
 /// Recursively expand the `from` clause of `stmt` and any subqueries.
-fn from_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatement) -> Result<()> {
+fn from_expand_wildcards(
+    s: &dyn SchemaProvider,
+    stmt: &SelectStatement,
+    sel: &mut Select,
+) -> Result<()> {
     let mut new_from = Vec::new();
     for ms in stmt.from.iter() {
         match ms {
@@ -98,7 +121,7 @@ fn from_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatement) ->
                     ..
                 } => {
                     if s.table_exists(name) {
-                        new_from.push(ms.clone())
+                        new_from.push(TableReference::Name(name.deref().to_owned()))
                     }
                 }
                 QualifiedMeasurementName {
@@ -109,41 +132,29 @@ fn from_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatement) ->
                     s.table_names()
                         .into_iter()
                         .filter(|table| re.is_match(table))
-                        .for_each(|table| {
-                            new_from.push(MeasurementSelection::Name(QualifiedMeasurementName {
-                                database: None,
-                                retention_policy: None,
-                                name: MeasurementName::Name(table.into()),
-                            }))
-                        });
+                        .for_each(|table| new_from.push(TableReference::Name(table.to_owned())));
                 }
             },
             MeasurementSelection::Subquery(q) => {
-                let mut q = *q.clone();
-                from_expand_wildcards(s, &mut q)?;
-                new_from.push(MeasurementSelection::Subquery(Box::new(q)))
+                new_from.push(TableReference::Subquery(Box::new(map_select(s, q)?)))
             }
         }
     }
-    stmt.from = FromMeasurementClause::new(new_from);
+    sel.from = new_from;
     Ok(())
 }
 
 /// Recursively drop any measurements of the `from` clause of `stmt` that do not project
 /// any fields.
-fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut SelectStatement) {
+fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut Select) {
     use schema::InfluxColumnType;
-    let mut from = stmt.from.take();
-    from.retain_mut(|ms| {
-        match ms {
-            MeasurementSelection::Name(QualifiedMeasurementName {
-                name: MeasurementName::Name(name),
-                ..
-            }) => {
+    stmt.from.retain_mut(|tr| {
+        match tr {
+            TableReference::Name(name) => {
                 // drop any measurements that have no matching fields in the
                 // projection
 
-                if let Some(table) = s.table_schema(name.deref()) {
+                if let Some(table) = s.table_schema(name.as_str()) {
                     stmt.fields.iter().any(|f| {
                         walk_expr(&f.expr, &mut |e| {
                             if matches!(e, Expr::VarRef(VarRef { name, ..}) if matches!(table.field_type_by_name(name.deref()), Some(InfluxColumnType::Field(_)))) {
@@ -157,7 +168,7 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut SelectStatement) {
                     false
                 }
             }
-            MeasurementSelection::Subquery(q) => {
+            TableReference::Subquery(q) => {
                 from_drop_empty(s, q);
                 if q.from.is_empty() {
                     return false;
@@ -165,35 +176,29 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut SelectStatement) {
 
                 stmt.fields.iter().any(|f| {
                     walk_expr(&f.expr, &mut |e| {
-                        if matches!(e, Expr::VarRef(VarRef{ name, ..}) if matches!(field_by_name(q, name.deref()), Some(_))) {
+                        if matches!(e, Expr::VarRef(VarRef{ name, ..}) if matches!(field_by_name(&q.fields, name.as_str()), Some(_))) {
                             ControlFlow::Break(())
                         } else {
                             ControlFlow::Continue(())
                         }
                     }).is_break()
                 })
-            },
-            _ => unreachable!("wildcards should have been expanded"),
+            }
         }
     });
-
-    stmt.from.replace(from);
 }
 
 /// Determine the merged fields and tags of the `FROM` clause.
 fn from_field_and_dimensions(
     s: &dyn SchemaProvider,
-    from: &FromMeasurementClause,
+    from: &[TableReference],
 ) -> Result<(FieldTypeMap, TagSet)> {
     let mut fs = FieldTypeMap::new();
     let mut ts = TagSet::new();
 
-    for ms in from.deref() {
-        match ms {
-            MeasurementSelection::Name(QualifiedMeasurementName {
-                name: MeasurementName::Name(name),
-                ..
-            }) => {
+    for tr in from {
+        match tr {
+            TableReference::Name(name) => {
                 let (field_set, tag_set) = match field_and_dimensions(s, name.as_str())? {
                     Some(res) => res,
                     None => continue,
@@ -215,8 +220,8 @@ fn from_field_and_dimensions(
 
                 ts.extend(tag_set);
             }
-            MeasurementSelection::Subquery(select) => {
-                for f in select.fields.iter() {
+            TableReference::Subquery(select) => {
+                for f in &select.fields {
                     let dt = match evaluate_type(s, &f.expr, &select.from)? {
                         Some(dt) => dt,
                         None => continue,
@@ -244,10 +249,6 @@ fn from_field_and_dimensions(
                     }));
                 }
             }
-            _ => {
-                // Unreachable, as the from clause should be normalised at this point.
-                return error::internal("Unexpected MeasurementSelection in from");
-            }
         }
     }
     Ok((fs, ts))
@@ -304,16 +305,14 @@ fn has_wildcards(stmt: &SelectStatement) -> (bool, bool) {
 ///   underlying schema.
 ///
 /// Derived from [Go implementation](https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L1185).
-fn field_list_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatement) -> Result<()> {
-    // Iterate through the `FROM` clause and rewrite any subqueries first.
-    for ms in stmt.from.iter_mut() {
-        if let MeasurementSelection::Subquery(subquery) = ms {
-            field_list_expand_wildcards(s, subquery)?;
-        }
-    }
-
+fn field_list_expand_wildcards(
+    s: &dyn SchemaProvider,
+    stmt: &SelectStatement,
+    sel: &mut Select,
+) -> Result<()> {
+    sel.fields = stmt.fields.iter().cloned().collect::<Vec<_>>();
     // Rewrite all `DISTINCT <identifier>` expressions to `DISTINCT(<var ref>)`
-    if let ControlFlow::Break(e) = stmt.fields.iter_mut().try_for_each(|f| {
+    if let ControlFlow::Break(e) = sel.fields.iter_mut().try_for_each(|f| {
         walk_expr_mut::<DataFusionError>(&mut f.expr, &mut |e| {
             if let Expr::Distinct(ident) = e {
                 *e = Expr::Call(Call {
@@ -332,10 +331,10 @@ fn field_list_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatemen
 
     // Attempt to rewrite all variable references in the fields with their types, if one
     // hasn't been specified.
-    if let ControlFlow::Break(e) = stmt.fields.iter_mut().try_for_each(|f| {
+    if let ControlFlow::Break(e) = sel.fields.iter_mut().try_for_each(|f| {
         walk_expr_mut::<DataFusionError>(&mut f.expr, &mut |e| {
             if matches!(e, Expr::VarRef(_)) {
-                let new_type = match evaluate_type(s, e.borrow(), &stmt.from) {
+                let new_type = match evaluate_type(s, e.borrow(), &sel.from) {
                     Err(e) => ControlFlow::Break(e)?,
                     Ok(v) => v,
                 };
@@ -355,7 +354,7 @@ fn field_list_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatemen
         return Ok(());
     }
 
-    let (field_set, mut tag_set) = from_field_and_dimensions(s, &stmt.from)?;
+    let (field_set, mut tag_set) = from_field_and_dimensions(s, &sel.from)?;
 
     if !has_group_by_wildcard {
         if let Some(group_by) = &stmt.group_by {
@@ -393,7 +392,7 @@ fn field_list_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatemen
     if has_field_wildcard {
         let mut new_fields = Vec::new();
 
-        for f in stmt.fields.iter() {
+        for f in &sel.fields {
             let add_field = |f: &VarRef| {
                 new_fields.push(Field {
                     expr: Expr::VarRef(f.clone()),
@@ -528,7 +527,7 @@ fn field_list_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatemen
             }
         }
 
-        stmt.fields = FieldList::new(new_fields);
+        sel.fields = new_fields;
     }
 
     if has_group_by_wildcard {
@@ -561,7 +560,7 @@ fn field_list_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatemen
                     _ => new_dimensions.push(dim.clone()),
                 }
             }
-            stmt.group_by = Some(GroupByClause::new(new_dimensions));
+            sel.group_by = Some(GroupByClause::new(new_dimensions));
         }
     }
 
@@ -572,7 +571,7 @@ fn field_list_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatemen
 /// [original implementation]. The names are assigned to the `alias` field of the [`Field`] struct.
 ///
 /// [original implementation]: https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L1651
-fn field_list_rewrite_aliases(field_list: &mut FieldList) -> Result<()> {
+fn field_list_rewrite_aliases(field_list: &mut [Field]) -> Result<()> {
     let names = field_list.iter().map(field_name).collect::<Vec<_>>();
     let mut column_aliases = HashMap::<&str, _>::from_iter(names.iter().map(|f| (f.as_str(), 0)));
     names
@@ -1321,61 +1320,13 @@ pub(crate) fn select_statement_info(q: &SelectStatement) -> Result<SelectStateme
 #[cfg(test)]
 mod test {
     use crate::plan::rewriter::{
-        field_list_normalize_time, has_wildcards, rewrite_statement, select_statement_info,
-        ProjectionType,
+        has_wildcards, rewrite_statement, select_statement_info, ProjectionType,
     };
     use crate::plan::test_utils::{parse_select, MockSchemaProvider};
     use assert_matches::assert_matches;
     use datafusion::error::DataFusionError;
     use test_helpers::{assert_contains, assert_error};
 
-    #[test]
-    fn test_field_list_normalize_time() {
-        // adds time to to first position
-        let mut sel = parse_select("SELECT foo, bar FROM cpu");
-        field_list_normalize_time(&mut sel);
-        assert_eq!(
-            sel.to_string(),
-            "SELECT time::timestamp AS time, foo, bar FROM cpu"
-        );
-
-        // moves time to first position
-        let mut sel = parse_select("SELECT foo, time, bar FROM cpu");
-        field_list_normalize_time(&mut sel);
-        assert_eq!(
-            sel.to_string(),
-            "SELECT time::timestamp AS time, foo, bar FROM cpu"
-        );
-
-        // Maintains alias for time column
-        let mut sel = parse_select("SELECT time as ts, foo, bar FROM cpu");
-        field_list_normalize_time(&mut sel);
-        assert_eq!(
-            sel.to_string(),
-            "SELECT time::timestamp AS ts, foo, bar FROM cpu"
-        );
-
-        // subqueries
-
-        // adds time to to first position of root and subquery
-        let mut sel = parse_select("SELECT foo FROM (SELECT foo, bar FROM cpu)");
-        field_list_normalize_time(&mut sel);
-        assert_eq!(
-            sel.to_string(),
-            "SELECT time::timestamp AS time, foo FROM (SELECT time::timestamp AS time, foo, bar FROM cpu)"
-        );
-
-        // Removes and ignores alias of time column within subquery, ignores alias in root and adds time column
-        //
-        // Whilst confusing, this matching InfluxQL behaviour
-        let mut sel = parse_select("SELECT ts, foo FROM (SELECT time as ts, foo, bar FROM cpu)");
-        field_list_normalize_time(&mut sel);
-        assert_eq!(
-            sel.to_string(),
-            "SELECT time::timestamp AS time, ts, foo FROM (SELECT time::timestamp AS time, foo, bar FROM cpu)"
-        );
-    }
-
     #[test]
     fn test_select_statement_info() {
         let info = select_statement_info(&parse_select("SELECT foo, bar FROM cpu")).unwrap();

From e1825ec45b8ee6fa1fe3286d2c53e068eac68099 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Thu, 4 May 2023 11:08:46 +0200
Subject: [PATCH 017/119] refactor: use struct-style selectors in InfluxRPC
 (#7742)

Some clean up before I implement the core logic for #7533.
---
 iox_query_influxrpc/src/lib.rs | 60 ++++++++++++++++------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/iox_query_influxrpc/src/lib.rs b/iox_query_influxrpc/src/lib.rs
index 1532e5e039..23fb1bfec7 100644
--- a/iox_query_influxrpc/src/lib.rs
+++ b/iox_query_influxrpc/src/lib.rs
@@ -5,8 +5,11 @@ use data_types::ChunkId;
 use datafusion::{
     common::DFSchemaRef,
     error::DataFusionError,
-    logical_expr::{utils::exprlist_to_columns, ExprSchemable, LogicalPlan, LogicalPlanBuilder},
+    logical_expr::{
+        utils::exprlist_to_columns, ExprSchemable, GetIndexedField, LogicalPlan, LogicalPlanBuilder,
+    },
     prelude::{when, Column, Expr},
+    scalar::ScalarValue,
 };
 use datafusion_util::AsExpr;
 use futures::{Stream, StreamExt, TryStreamExt};
@@ -35,7 +38,9 @@ use predicate::{
 use query_functions::{
     group_by::{Aggregate, WindowDuration},
     make_window_bound_expr,
-    selectors::{selector_first, selector_last, selector_max, selector_min, SelectorOutput},
+    selectors::{
+        struct_selector_first, struct_selector_last, struct_selector_max, struct_selector_min,
+    },
 };
 use schema::{InfluxColumnType, Projection, Schema, TIME_COLUMN_NAME};
 use snafu::{ensure, OptionExt, ResultExt, Snafu};
@@ -1636,7 +1641,6 @@ pub(crate) struct AggExprs {
 struct FieldExpr<'a> {
     expr: Expr,
     name: &'a str,
-    datatype: &'a DataType,
 }
 
 // Returns an iterator of fields from schema that pass the predicate. If there
@@ -1674,7 +1678,6 @@ fn filtered_fields_iter<'a>(
         Some(FieldExpr {
             expr: expr.alias(f.name()),
             name: f.name(),
-            datatype: f.data_type(),
         })
     })
 }
@@ -1735,22 +1738,25 @@ impl AggExprs {
         let mut field_list = Vec::new();
 
         for field in filtered_fields_iter(schema, predicate) {
+            let selector = make_selector_expr(agg, field.clone())?;
+
             let field_name = field.name;
-            agg_exprs.push(make_selector_expr(
-                agg,
-                SelectorOutput::Value,
-                field.clone(),
-                field_name,
-            )?);
+            agg_exprs.push(
+                Expr::GetIndexedField(GetIndexedField {
+                    expr: Box::new(selector.clone()),
+                    key: ScalarValue::from("value"),
+                })
+                .alias(field_name),
+            );
 
             let time_column_name = format!("{TIME_COLUMN_NAME}_{field_name}");
-
-            agg_exprs.push(make_selector_expr(
-                agg,
-                SelectorOutput::Time,
-                field,
-                &time_column_name,
-            )?);
+            agg_exprs.push(
+                Expr::GetIndexedField(GetIndexedField {
+                    expr: Box::new(selector.clone()),
+                    key: ScalarValue::from("time"),
+                })
+                .alias(&time_column_name),
+            );
 
             field_list.push((
                 Arc::from(field_name), // value name
@@ -1782,7 +1788,6 @@ impl AggExprs {
                     agg,
                     FieldExpr {
                         expr: field.name().as_expr(),
-                        datatype: field.data_type(),
                         name: field.name(),
                     },
                 )
@@ -1866,23 +1871,16 @@ fn make_agg_expr(agg: Aggregate, field_expr: FieldExpr<'_>) -> Result<Expr> {
 ///     ELSE NULL
 /// END) as col_name
 ///
-fn make_selector_expr<'a>(
-    agg: Aggregate,
-    output: SelectorOutput,
-    field: FieldExpr<'a>,
-    col_name: &'a str,
-) -> Result<Expr> {
+fn make_selector_expr(agg: Aggregate, field: FieldExpr<'_>) -> Result<Expr> {
     let uda = match agg {
-        Aggregate::First => selector_first(field.datatype, output),
-        Aggregate::Last => selector_last(field.datatype, output),
-        Aggregate::Min => selector_min(field.datatype, output),
-        Aggregate::Max => selector_max(field.datatype, output),
+        Aggregate::First => struct_selector_first(),
+        Aggregate::Last => struct_selector_last(),
+        Aggregate::Min => struct_selector_min(),
+        Aggregate::Max => struct_selector_max(),
         _ => return InternalAggregateNotSelectorSnafu { agg }.fail(),
     };
 
-    Ok(uda
-        .call(vec![field.expr, TIME_COLUMN_NAME.as_expr()])
-        .alias(col_name))
+    Ok(uda.call(vec![field.expr, TIME_COLUMN_NAME.as_expr()]))
 }
 
 /// Orders chunks so it is likely that the ones that already have cached data are pulled first.

From 2cfb30d5dfbfcc9f5f270696a4303b88e02432e7 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Thu, 4 May 2023 12:25:52 +0200
Subject: [PATCH 018/119] refactor: remove non-struct selector functions
 (#7743)

Clean up for #7533.
---
 iox_query_influxql/src/plan/planner.rs    |   12 +-
 iox_query_influxrpc/src/lib.rs            |   12 +-
 query_functions/src/selectors.rs          | 1393 +++++++--------------
 query_functions/src/selectors/internal.rs |  104 +-
 4 files changed, 505 insertions(+), 1016 deletions(-)

diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index 6ee3d756af..f815aac4f3 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -69,9 +69,7 @@ use itertools::Itertools;
 use observability_deps::tracing::debug;
 use query_functions::{
     clean_non_meta_escapes,
-    selectors::{
-        struct_selector_first, struct_selector_last, struct_selector_max, struct_selector_min,
-    },
+    selectors::{selector_first, selector_last, selector_max, selector_min},
 };
 use schema::{
     InfluxColumnType, InfluxFieldType, Schema, INFLUXQL_MEASUREMENT_COLUMN_NAME,
@@ -1175,10 +1173,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                 }
 
                 let selector_udf = match name {
-                    "first" => struct_selector_first(),
-                    "last" => struct_selector_last(),
-                    "max" => struct_selector_max(),
-                    "min" => struct_selector_min(),
+                    "first" => selector_first(),
+                    "last" => selector_last(),
+                    "max" => selector_max(),
+                    "min" => selector_min(),
                     _ => unreachable!(),
                 }
                 .call(vec![expr, "time".as_expr()]);
diff --git a/iox_query_influxrpc/src/lib.rs b/iox_query_influxrpc/src/lib.rs
index 23fb1bfec7..444ce71c99 100644
--- a/iox_query_influxrpc/src/lib.rs
+++ b/iox_query_influxrpc/src/lib.rs
@@ -38,9 +38,7 @@ use predicate::{
 use query_functions::{
     group_by::{Aggregate, WindowDuration},
     make_window_bound_expr,
-    selectors::{
-        struct_selector_first, struct_selector_last, struct_selector_max, struct_selector_min,
-    },
+    selectors::{selector_first, selector_last, selector_max, selector_min},
 };
 use schema::{InfluxColumnType, Projection, Schema, TIME_COLUMN_NAME};
 use snafu::{ensure, OptionExt, ResultExt, Snafu};
@@ -1873,10 +1871,10 @@ fn make_agg_expr(agg: Aggregate, field_expr: FieldExpr<'_>) -> Result<Expr> {
 ///
 fn make_selector_expr(agg: Aggregate, field: FieldExpr<'_>) -> Result<Expr> {
     let uda = match agg {
-        Aggregate::First => struct_selector_first(),
-        Aggregate::Last => struct_selector_last(),
-        Aggregate::Min => struct_selector_min(),
-        Aggregate::Max => struct_selector_max(),
+        Aggregate::First => selector_first(),
+        Aggregate::Last => selector_last(),
+        Aggregate::Min => selector_min(),
+        Aggregate::Max => selector_max(),
         _ => return InternalAggregateNotSelectorSnafu { agg }.fail(),
     };
 
diff --git a/query_functions/src/selectors.rs b/query_functions/src/selectors.rs
index 8b5731dd2e..a990a7b53b 100644
--- a/query_functions/src/selectors.rs
+++ b/query_functions/src/selectors.rs
@@ -122,10 +122,10 @@ use schema::TIME_DATA_TYPE;
 
 /// registers selector functions so they can be invoked via SQL
 pub fn register_selector_aggregates(ctx: &SessionContext) {
-    ctx.register_udaf(struct_selector_first());
-    ctx.register_udaf(struct_selector_last());
-    ctx.register_udaf(struct_selector_min());
-    ctx.register_udaf(struct_selector_max());
+    ctx.register_udaf(selector_first());
+    ctx.register_udaf(selector_last());
+    ctx.register_udaf(selector_min());
+    ctx.register_udaf(selector_max());
 }
 
 /// Returns a DataFusion user defined aggregate function for computing
@@ -142,11 +142,8 @@ pub fn register_selector_aggregates(ctx: &SessionContext) {
 ///
 /// If there are multiple rows with the minimum timestamp value, the
 /// value returned is arbitrary
-pub fn struct_selector_first() -> AggregateUDF {
-    make_uda(
-        "selector_first",
-        FactoryBuilder::new(SelectorType::First, SelectorOutput::Struct),
-    )
+pub fn selector_first() -> AggregateUDF {
+    make_uda("selector_first", FactoryBuilder::new(SelectorType::First))
 }
 
 /// Returns a DataFusion user defined aggregate function for computing
@@ -163,11 +160,8 @@ pub fn struct_selector_first() -> AggregateUDF {
 ///
 /// If there are multiple rows with the maximum timestamp value, the
 /// value is arbitrary
-pub fn struct_selector_last() -> AggregateUDF {
-    make_uda(
-        "selector_last",
-        FactoryBuilder::new(SelectorType::Last, SelectorOutput::Struct),
-    )
+pub fn selector_last() -> AggregateUDF {
+    make_uda("selector_last", FactoryBuilder::new(SelectorType::Last))
 }
 
 /// Returns a DataFusion user defined aggregate function for computing
@@ -184,11 +178,8 @@ pub fn struct_selector_last() -> AggregateUDF {
 ///
 /// If there are multiple rows with the same minimum value, the value
 /// with the first (earliest/smallest) timestamp is chosen
-pub fn struct_selector_min() -> AggregateUDF {
-    make_uda(
-        "selector_min",
-        FactoryBuilder::new(SelectorType::Min, SelectorOutput::Struct),
-    )
+pub fn selector_min() -> AggregateUDF {
+    make_uda("selector_min", FactoryBuilder::new(SelectorType::Min))
 }
 
 /// Returns a DataFusion user defined aggregate function for computing
@@ -205,107 +196,8 @@ pub fn struct_selector_min() -> AggregateUDF {
 ///
 /// If there are multiple rows with the same maximum value, the value
 /// with the first (earliest/smallest) timestamp is chosen
-pub fn struct_selector_max() -> AggregateUDF {
-    make_uda(
-        "selector_max",
-        FactoryBuilder::new(SelectorType::Max, SelectorOutput::Struct),
-    )
-}
-
-/// Returns a DataFusion user defined aggregate function for computing
-/// one field of the first() selector function.
-///
-/// first(value_column, timestamp_column) -> value and timestamp
-///
-/// timestamp is the minimum value of the timestamp_column
-///
-/// value is the value of the value_column at the position of the
-/// minimum of the timestamp column. If there are multiple rows with
-/// the minimum timestamp value, the value of the value_column is
-/// arbitrarily picked
-pub fn selector_first(data_type: &DataType, output: SelectorOutput) -> AggregateUDF {
-    let name = match output {
-        SelectorOutput::Value => "selector_first_value",
-        SelectorOutput::Time => "selector_first_time",
-        SelectorOutput::Struct => "selector_first",
-    };
-
-    make_uda(
-        name,
-        FactoryBuilder::new(SelectorType::First, output).with_value_type(data_type.clone()),
-    )
-}
-
-/// Returns a DataFusion user defined aggregate function for computing
-/// one field of the last() selector function.
-///
-/// selector_last(data_column, timestamp_column) -> value and timestamp
-///
-/// timestamp is the maximum value of the timestamp_column
-///
-/// value is the value of the data_column at the position of the
-/// maximum of the timestamp column. If there are multiple rows with
-/// the maximum timestamp value, the value of the data_column is
-/// arbitrarily picked
-pub fn selector_last(data_type: &DataType, output: SelectorOutput) -> AggregateUDF {
-    let name = match output {
-        SelectorOutput::Value => "selector_last_value",
-        SelectorOutput::Time => "selector_last_time",
-        SelectorOutput::Struct => "selector_last",
-    };
-
-    make_uda(
-        name,
-        FactoryBuilder::new(SelectorType::Last, output).with_value_type(data_type.clone()),
-    )
-}
-
-/// Returns a DataFusion user defined aggregate function for computing
-/// one field of the min() selector function.
-///
-/// selector_min(data_column, timestamp_column) -> value and timestamp
-///
-/// value is the minimum value of the data_column
-///
-/// timestamp is the value of the timestamp_column at the position of
-/// the minimum value_column. If there are multiple rows with the
-/// minimum timestamp value, the value of the data_column with the
-/// first (earliest/smallest) timestamp is chosen
-pub fn selector_min(data_type: &DataType, output: SelectorOutput) -> AggregateUDF {
-    let name = match output {
-        SelectorOutput::Value => "selector_min_value",
-        SelectorOutput::Time => "selector_min_time",
-        SelectorOutput::Struct => "selector_min",
-    };
-
-    make_uda(
-        name,
-        FactoryBuilder::new(SelectorType::Min, output).with_value_type(data_type.clone()),
-    )
-}
-
-/// Returns a DataFusion user defined aggregate function for computing
-/// one field of the max() selector function.
-///
-/// selector_max(data_column, timestamp_column) -> value and timestamp
-///
-/// value is the maximum value of the data_column
-///
-/// timestamp is the value of the timestamp_column at the position of
-/// the maximum value_column. If there are multiple rows with the
-/// maximum timestamp value, the value of the data_column with the
-/// first (earliest/smallest) timestamp is chosen
-pub fn selector_max(data_type: &DataType, output: SelectorOutput) -> AggregateUDF {
-    let name = match output {
-        SelectorOutput::Value => "selector_max_value",
-        SelectorOutput::Time => "selector_max_time",
-        SelectorOutput::Struct => "selector_max",
-    };
-
-    make_uda(
-        name,
-        FactoryBuilder::new(SelectorType::Max, output).with_value_type(data_type.clone()),
-    )
+pub fn selector_max() -> AggregateUDF {
+    make_uda("selector_max", FactoryBuilder::new(SelectorType::Max))
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -321,33 +213,19 @@ enum SelectorType {
 struct FactoryBuilder {
     selector_type: SelectorType,
 
-    output_type: SelectorOutput,
-
     /// If the selector output is "time" we can't determine the
     /// accumuator type from the return type, so hold we pass the data type explicitly
     value_type: Option<DataType>,
 }
 
 impl FactoryBuilder {
-    fn new(selector_type: SelectorType, output_type: SelectorOutput) -> Self {
+    fn new(selector_type: SelectorType) -> Self {
         Self {
             selector_type,
-            output_type,
             value_type: None,
         }
     }
 
-    /// Specify the value_type of this selector (needed when the
-    /// output_type is "Time")
-    fn with_value_type(mut self, value_type: DataType) -> Self {
-        self.value_type = Some(value_type);
-        self
-    }
-
-    fn output_type(&self) -> SelectorOutput {
-        self.output_type
-    }
-
     fn build_state_type_factory(&self) -> StateTypeFactory {
         let value_type = self.value_type.clone();
 
@@ -366,7 +244,6 @@ impl FactoryBuilder {
     fn build_accumulator_factory(self) -> AccumulatorFunctionImplementation {
         let Self {
             selector_type,
-            output_type,
             value_type,
         } = self;
 
@@ -379,40 +256,39 @@ impl FactoryBuilder {
             let accumulator: Box<dyn Accumulator> = match (selector_type, value_type) {
                 // First
                 (SelectorType::First, DataType::Float64) => {
-                    Box::new(SelectorAccumulator::<F64FirstSelector>::new(output_type))
+                    Box::new(SelectorAccumulator::<F64FirstSelector>::new())
                 }
-                (SelectorType::First, DataType::Int64) => Box::new(SelectorAccumulator::<I64FirstSelector>::new(output_type)),
-                (SelectorType::First, DataType::UInt64) => Box::new(SelectorAccumulator::<U64FirstSelector>::new(output_type)),
-                (SelectorType::First, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8FirstSelector>::new(output_type)),
+                (SelectorType::First, DataType::Int64) => Box::new(SelectorAccumulator::<I64FirstSelector>::new()),
+                (SelectorType::First, DataType::UInt64) => Box::new(SelectorAccumulator::<U64FirstSelector>::new()),
+                (SelectorType::First, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8FirstSelector>::new()),
                 (SelectorType::First, DataType::Boolean) => Box::new(SelectorAccumulator::<BooleanFirstSelector>::new(
-                    output_type,
                 )),
 
                 // Last
-                (SelectorType::Last, DataType::Float64) => Box::new(SelectorAccumulator::<F64LastSelector>::new(output_type)),
-                (SelectorType::Last, DataType::Int64) => Box::new(SelectorAccumulator::<I64LastSelector>::new(output_type)),
-                (SelectorType::Last, DataType::UInt64) => Box::new(SelectorAccumulator::<U64LastSelector>::new(output_type)),
-                (SelectorType::Last, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8LastSelector>::new(output_type)),
+                (SelectorType::Last, DataType::Float64) => Box::new(SelectorAccumulator::<F64LastSelector>::new()),
+                (SelectorType::Last, DataType::Int64) => Box::new(SelectorAccumulator::<I64LastSelector>::new()),
+                (SelectorType::Last, DataType::UInt64) => Box::new(SelectorAccumulator::<U64LastSelector>::new()),
+                (SelectorType::Last, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8LastSelector>::new()),
                 (SelectorType::Last, DataType::Boolean) => {
-                    Box::new(SelectorAccumulator::<BooleanLastSelector>::new(output_type))
+                    Box::new(SelectorAccumulator::<BooleanLastSelector>::new())
                 },
 
                 // Min
-                (SelectorType::Min, DataType::Float64) => Box::new(SelectorAccumulator::<F64MinSelector>::new(output_type)),
-                (SelectorType::Min, DataType::Int64) => Box::new(SelectorAccumulator::<I64MinSelector>::new(output_type)),
-                (SelectorType::Min, DataType::UInt64) => Box::new(SelectorAccumulator::<U64MinSelector>::new(output_type)),
-                (SelectorType::Min, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8MinSelector>::new(output_type)),
+                (SelectorType::Min, DataType::Float64) => Box::new(SelectorAccumulator::<F64MinSelector>::new()),
+                (SelectorType::Min, DataType::Int64) => Box::new(SelectorAccumulator::<I64MinSelector>::new()),
+                (SelectorType::Min, DataType::UInt64) => Box::new(SelectorAccumulator::<U64MinSelector>::new()),
+                (SelectorType::Min, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8MinSelector>::new()),
                 (SelectorType::Min, DataType::Boolean) => {
-                    Box::new(SelectorAccumulator::<BooleanMinSelector>::new(output_type))
+                    Box::new(SelectorAccumulator::<BooleanMinSelector>::new())
                 },
 
                 // Max
-                (SelectorType::Max, DataType::Float64) => Box::new(SelectorAccumulator::<F64MaxSelector>::new(output_type)),
-                (SelectorType::Max, DataType::Int64) => Box::new(SelectorAccumulator::<I64MaxSelector>::new(output_type)),
-                (SelectorType::Max, DataType::UInt64) => Box::new(SelectorAccumulator::<U64MaxSelector>::new(output_type)),
-                (SelectorType::Max, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8MaxSelector>::new(output_type)),
+                (SelectorType::Max, DataType::Float64) => Box::new(SelectorAccumulator::<F64MaxSelector>::new()),
+                (SelectorType::Max, DataType::Int64) => Box::new(SelectorAccumulator::<I64MaxSelector>::new()),
+                (SelectorType::Max, DataType::UInt64) => Box::new(SelectorAccumulator::<U64MaxSelector>::new()),
+                (SelectorType::Max, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8MaxSelector>::new()),
                 (SelectorType::Max, DataType::Boolean) => {
-                    Box::new(SelectorAccumulator::<BooleanMaxSelector>::new(output_type))
+                    Box::new(SelectorAccumulator::<BooleanMaxSelector>::new())
                 },
                 // Catch
                 (selector_type, value_type) => return Err(DataFusionError::Internal(format!(
@@ -428,15 +304,11 @@ impl FactoryBuilder {
 /// cutdown version of the Accumulator DataFusion trait, to allow
 /// sharing between implementations)
 trait Selector: Debug + Default + Send + Sync {
-    /// What type of values does this selector function work with (time is
-    /// always I64)
-    fn value_data_type() -> DataType;
-
     /// return state in a form that DataFusion can store during execution
     fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>>;
 
     /// produces the final value of this selector for the specified output type
-    fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue>;
+    fn evaluate(&self) -> DataFusionResult<ScalarValue>;
 
     /// Update this selector's state based on values in value_arr and time_arr
     fn update_batch(&mut self, value_arr: &ArrayRef, time_arr: &ArrayRef) -> DataFusionResult<()>;
@@ -448,31 +320,6 @@ trait Selector: Debug + Default + Send + Sync {
     fn size(&self) -> usize;
 }
 
-/// Describes which part of the selector to return: the timestamp or
-/// the value (when <https://issues.apache.org/jira/browse/ARROW-10945>
-/// is fixed, this enum should be removed)
-#[derive(Debug, Clone, Copy)]
-pub enum SelectorOutput {
-    /// Return the value
-    Value,
-    /// Return the timestamp
-    Time,
-    /// Return the value and timestamp as a struct {value, time}
-    Struct,
-}
-
-impl SelectorOutput {
-    /// return the data type produced for this type of input
-    fn return_type(&self, input_type: &DataType) -> DataType {
-        match self {
-            Self::Value => input_type.clone(),
-            // timestamps are always the same type
-            Self::Time => TIME_DATA_TYPE(),
-            Self::Struct => DataType::Struct(make_struct_fields(input_type.clone())),
-        }
-    }
-}
-
 /// Create the struct fields for a selector with DataType `value_type`
 fn make_struct_fields(value_type: DataType) -> Fields {
     Fields::from(vec![
@@ -495,8 +342,6 @@ type StateTypeFactory =
 
 /// Create a User Defined Aggregate Function (UDAF) for datafusion.
 fn make_uda(name: &str, factory_builder: FactoryBuilder) -> AggregateUDF {
-    let output_type = factory_builder.output_type();
-
     // All selectors support the same input types / signatures
     let input_signature = Signature::one_of(
         vec![
@@ -530,7 +375,7 @@ fn make_uda(name: &str, factory_builder: FactoryBuilder) -> AggregateUDF {
                 "{captured_name} second argument must be a timestamp, but got {time_type}"
             )));
         }
-        let return_type = output_type.return_type(input_type);
+        let return_type = DataType::Struct(make_struct_fields(input_type.clone()));
 
         Ok(Arc::new(return_type))
     });
@@ -561,17 +406,14 @@ where
 {
     // The underlying implementation for the selector
     selector: SELECTOR,
-    // Determine which value is output
-    output: SelectorOutput,
 }
 
 impl<SELECTOR> SelectorAccumulator<SELECTOR>
 where
     SELECTOR: Selector,
 {
-    pub fn new(output: SelectorOutput) -> Self {
+    pub fn new() -> Self {
         Self {
-            output,
             selector: SELECTOR::default(),
         }
     }
@@ -598,7 +440,7 @@ where
 
     // Return the final value of this aggregator.
     fn evaluate(&self) -> DataFusionResult<ScalarValue> {
-        self.selector.evaluate(&self.output)
+        self.selector.evaluate()
     }
 
     // This function receives one entry per argument of this
@@ -643,762 +485,465 @@ mod test {
     use datafusion::{datasource::MemTable, prelude::*};
 
     use super::*;
+    use utils::run_case;
 
-    #[tokio::test]
-    async fn test_selector_first() {
-        let cases = vec![
-            (
-                selector_first(&DataType::Float64, SelectorOutput::Value),
-                selector_first(&DataType::Float64, SelectorOutput::Time),
-                "f64_value",
+    mod first {
+        use super::*;
+
+        #[tokio::test]
+        async fn test_f64() {
+            run_case(
+                selector_first().call(vec![col("f64_value"), col("time")]),
                 vec![
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| selector_first_value(t.f64_value,t.time) | selector_first_time(t.f64_value,t.time) |",
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| 2.0                                      | 1970-01-01T00:00:00.000001              |",
-                    "+------------------------------------------+-----------------------------------------+",
-                ],
-            ),
-            (
-                selector_first(&DataType::Int64, SelectorOutput::Value),
-                selector_first(&DataType::Int64, SelectorOutput::Time),
-                "i64_value",
-                vec![
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| selector_first_value(t.i64_value,t.time) | selector_first_time(t.i64_value,t.time) |",
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| 20                                       | 1970-01-01T00:00:00.000001              |",
-                    "+------------------------------------------+-----------------------------------------+",
-                ],
-            ),
-            (
-                selector_first(&DataType::UInt64, SelectorOutput::Value),
-                selector_first(&DataType::UInt64, SelectorOutput::Time),
-                "u64_value",
-                vec![
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| selector_first_value(t.u64_value,t.time) | selector_first_time(t.u64_value,t.time) |",
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| 20                                       | 1970-01-01T00:00:00.000001              |",
-                    "+------------------------------------------+-----------------------------------------+",
-                ],
-            ),
-            (
-                selector_first(&DataType::Utf8, SelectorOutput::Value),
-                selector_first(&DataType::Utf8, SelectorOutput::Time),
-                "string_value",
-                vec![
-                    "+---------------------------------------------+--------------------------------------------+",
-                    "| selector_first_value(t.string_value,t.time) | selector_first_time(t.string_value,t.time) |",
-                    "+---------------------------------------------+--------------------------------------------+",
-                    "| two                                         | 1970-01-01T00:00:00.000001                 |",
-                    "+---------------------------------------------+--------------------------------------------+",
-                ],
-            ),
-            (
-                selector_first(&DataType::Boolean, SelectorOutput::Value),
-                selector_first(&DataType::Boolean, SelectorOutput::Time),
-                "bool_value",
-                vec![
-                    "+-------------------------------------------+------------------------------------------+",
-                    "| selector_first_value(t.bool_value,t.time) | selector_first_time(t.bool_value,t.time) |",
-                    "+-------------------------------------------+------------------------------------------+",
-                    "| true                                      | 1970-01-01T00:00:00.000001               |",
-                    "+-------------------------------------------+------------------------------------------+",
+                    "+------------------------------------------------+",
+                    "| selector_first(t.f64_value,t.time)             |",
+                    "+------------------------------------------------+",
+                    "| {value: 2.0, time: 1970-01-01T00:00:00.000001} |",
+                    "+------------------------------------------------+",
                 ],
             )
-        ];
+            .await;
+        }
 
-        for (val_func, time_func, val_column, expected) in cases.into_iter() {
-            let args = vec![col(val_column), col("time")];
-            let aggs = vec![val_func.call(args.clone()), time_func.call(args)];
-            let actual = run_plan(aggs).await;
+        #[tokio::test]
+        async fn test_i64() {
+            run_case(
+                selector_first().call(vec![col("i64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_first(t.i64_value,t.time)            |",
+                    "+-----------------------------------------------+",
+                    "| {value: 20, time: 1970-01-01T00:00:00.000001} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_u64() {
+            run_case(
+                selector_first().call(vec![col("u64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_first(t.u64_value,t.time)            |",
+                    "+-----------------------------------------------+",
+                    "| {value: 20, time: 1970-01-01T00:00:00.000001} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_string() {
+            run_case(
+                selector_first().call(vec![col("string_value"), col("time")]),
+                vec![
+                    "+------------------------------------------------+",
+                    "| selector_first(t.string_value,t.time)          |",
+                    "+------------------------------------------------+",
+                    "| {value: two, time: 1970-01-01T00:00:00.000001} |",
+                    "+------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_bool() {
+            run_case(
+                selector_first().call(vec![col("bool_value"), col("time")]),
+                vec![
+                    "+-------------------------------------------------+",
+                    "| selector_first(t.bool_value,t.time)             |",
+                    "+-------------------------------------------------+",
+                    "| {value: true, time: 1970-01-01T00:00:00.000001} |",
+                    "+-------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+    }
+
+    mod last {
+        use super::*;
+
+        #[tokio::test]
+        async fn test_f64() {
+            run_case(
+                selector_last().call(vec![col("f64_value"), col("time")]),
+                vec![
+                    "+------------------------------------------------+",
+                    "| selector_last(t.f64_value,t.time)              |",
+                    "+------------------------------------------------+",
+                    "| {value: 3.0, time: 1970-01-01T00:00:00.000006} |",
+                    "+------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_i64() {
+            run_case(
+                selector_last().call(vec![col("i64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_last(t.i64_value,t.time)             |",
+                    "+-----------------------------------------------+",
+                    "| {value: 30, time: 1970-01-01T00:00:00.000006} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_u64() {
+            run_case(
+                selector_last().call(vec![col("u64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_last(t.u64_value,t.time)             |",
+                    "+-----------------------------------------------+",
+                    "| {value: 30, time: 1970-01-01T00:00:00.000006} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_string() {
+            run_case(
+                selector_last().call(vec![col("string_value"), col("time")]),
+                vec![
+                    "+--------------------------------------------------+",
+                    "| selector_last(t.string_value,t.time)             |",
+                    "+--------------------------------------------------+",
+                    "| {value: three, time: 1970-01-01T00:00:00.000006} |",
+                    "+--------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_bool() {
+            run_case(
+                selector_last().call(vec![col("bool_value"), col("time")]),
+                vec![
+                    "+--------------------------------------------------+",
+                    "| selector_last(t.bool_value,t.time)               |",
+                    "+--------------------------------------------------+",
+                    "| {value: false, time: 1970-01-01T00:00:00.000006} |",
+                    "+--------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+    }
+
+    mod min {
+        use super::*;
+
+        #[tokio::test]
+        async fn test_f64() {
+            run_case(
+                selector_min().call(vec![col("f64_value"), col("time")]),
+                vec![
+                    "+------------------------------------------------+",
+                    "| selector_min(t.f64_value,t.time)               |",
+                    "+------------------------------------------------+",
+                    "| {value: 1.0, time: 1970-01-01T00:00:00.000004} |",
+                    "+------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_i64() {
+            run_case(
+                selector_min().call(vec![col("i64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_min(t.i64_value,t.time)              |",
+                    "+-----------------------------------------------+",
+                    "| {value: 10, time: 1970-01-01T00:00:00.000004} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_u64() {
+            run_case(
+                selector_min().call(vec![col("u64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_min(t.u64_value,t.time)              |",
+                    "+-----------------------------------------------+",
+                    "| {value: 10, time: 1970-01-01T00:00:00.000004} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_string() {
+            run_case(
+                selector_min().call(vec![col("string_value"), col("time")]),
+                vec![
+                    "+--------------------------------------------------+",
+                    "| selector_min(t.string_value,t.time)              |",
+                    "+--------------------------------------------------+",
+                    "| {value: a_one, time: 1970-01-01T00:00:00.000004} |",
+                    "+--------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_bool() {
+            run_case(
+                selector_min().call(vec![col("bool_value"), col("time")]),
+                vec![
+                    "+--------------------------------------------------+",
+                    "| selector_min(t.bool_value,t.time)                |",
+                    "+--------------------------------------------------+",
+                    "| {value: false, time: 1970-01-01T00:00:00.000002} |",
+                    "+--------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+    }
+
+    mod max {
+        use super::*;
+
+        #[tokio::test]
+        async fn test_f64() {
+            run_case(
+                selector_max().call(vec![col("f64_value"), col("time")]),
+                vec![
+                    "+------------------------------------------------+",
+                    "| selector_max(t.f64_value,t.time)               |",
+                    "+------------------------------------------------+",
+                    "| {value: 5.0, time: 1970-01-01T00:00:00.000005} |",
+                    "+------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_i64() {
+            run_case(
+                selector_max().call(vec![col("i64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_max(t.i64_value,t.time)              |",
+                    "+-----------------------------------------------+",
+                    "| {value: 50, time: 1970-01-01T00:00:00.000005} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_u64() {
+            run_case(
+                selector_max().call(vec![col("u64_value"), col("time")]),
+                vec![
+                    "+-----------------------------------------------+",
+                    "| selector_max(t.u64_value,t.time)              |",
+                    "+-----------------------------------------------+",
+                    "| {value: 50, time: 1970-01-01T00:00:00.000005} |",
+                    "+-----------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_string() {
+            run_case(
+                selector_max().call(vec![col("string_value"), col("time")]),
+                vec![
+                    "+---------------------------------------------------+",
+                    "| selector_max(t.string_value,t.time)               |",
+                    "+---------------------------------------------------+",
+                    "| {value: z_five, time: 1970-01-01T00:00:00.000005} |",
+                    "+---------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+
+        #[tokio::test]
+        async fn test_bool() {
+            run_case(
+                selector_max().call(vec![col("bool_value"), col("time")]),
+                vec![
+                    "+-------------------------------------------------+",
+                    "| selector_max(t.bool_value,t.time)               |",
+                    "+-------------------------------------------------+",
+                    "| {value: true, time: 1970-01-01T00:00:00.000001} |",
+                    "+-------------------------------------------------+",
+                ],
+            )
+            .await;
+        }
+    }
+
+    mod utils {
+        use super::*;
+
+        /// Runs the expr using `run_plan` and compares the result to `expected`
+        pub async fn run_case(expr: Expr, expected: Vec<&'static str>) {
+            println!("Running case for {expr}");
+
+            let actual = run_plan(vec![expr.clone()]).await;
 
             assert_eq!(
                 expected, actual,
-                "\n\nEXPECTED:\n{expected:#?}\nACTUAL:\n{actual:#?}\n"
+                "\n\nexpr: {expr}\n\nEXPECTED:\n{expected:#?}\nACTUAL:\n{actual:#?}\n"
             );
         }
-    }
 
-    #[tokio::test]
-    async fn test_selector_last() {
-        let cases = vec![
-            (
-                selector_last(&DataType::Float64, SelectorOutput::Value),
-                selector_last(&DataType::Float64, SelectorOutput::Time),
-                "f64_value",
+        /// Run a plan against the following input table as "t"
+        ///
+        /// ```text
+        /// +-----------+-----------+-----------+--------------+------------+----------------------------+,
+        /// | f64_value | i64_value | u64_value | string_value | bool_value | time                       |,
+        /// +-----------+-----------+--------------+------------+----------------------------+,
+        /// | 2         | 20        | 20        | two          | true       | 1970-01-01T00:00:00.000001 |,
+        /// | 4         | 40        | 40        | four         | false      | 1970-01-01T00:00:00.000002 |,
+        /// |           |           |           |              |            | 1970-01-01T00:00:00.000003 |,
+        /// | 1         | 10        | 10        | a_one        | true       | 1970-01-01T00:00:00.000004 |,
+        /// | 5         | 50        | 50        | z_five       | false      | 1970-01-01T00:00:00.000005 |,
+        /// | 3         | 30        | 30        | three        | false      | 1970-01-01T00:00:00.000006 |,
+        /// +-----------+-----------+--------------+------------+----------------------------+,
+        /// ```
+        async fn run_plan(aggs: Vec<Expr>) -> Vec<String> {
+            // define a schema for input
+            // (value) and timestamp
+            let schema = Arc::new(Schema::new(vec![
+                Field::new("f64_value", DataType::Float64, true),
+                Field::new("i64_value", DataType::Int64, true),
+                Field::new("u64_value", DataType::UInt64, true),
+                Field::new("string_value", DataType::Utf8, true),
+                Field::new("bool_value", DataType::Boolean, true),
+                Field::new("time", TIME_DATA_TYPE(), true),
+            ]));
+
+            // define data in two partitions
+            let batch1 = RecordBatch::try_new(
+                Arc::clone(&schema),
                 vec![
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| selector_last_value(t.f64_value,t.time) | selector_last_time(t.f64_value,t.time) |",
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| 3.0                                     | 1970-01-01T00:00:00.000006             |",
-                    "+-----------------------------------------+----------------------------------------+",
-                ],
-            ),
-            (
-                selector_last(&DataType::Int64, SelectorOutput::Value),
-                selector_last(&DataType::Int64, SelectorOutput::Time),
-                "i64_value",
-                vec![
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| selector_last_value(t.i64_value,t.time) | selector_last_time(t.i64_value,t.time) |",
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| 30                                      | 1970-01-01T00:00:00.000006             |",
-                    "+-----------------------------------------+----------------------------------------+",
-                ],
-            ),
-            (
-                selector_last(&DataType::UInt64, SelectorOutput::Value),
-                selector_last(&DataType::UInt64, SelectorOutput::Time),
-                "u64_value",
-                vec![
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| selector_last_value(t.u64_value,t.time) | selector_last_time(t.u64_value,t.time) |",
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| 30                                      | 1970-01-01T00:00:00.000006             |",
-                    "+-----------------------------------------+----------------------------------------+",
-                ],
-            ),
-            (
-                selector_last(&DataType::Utf8, SelectorOutput::Value),
-                selector_last(&DataType::Utf8, SelectorOutput::Time),
-                "string_value",
-                vec![
-                    "+--------------------------------------------+-------------------------------------------+",
-                    "| selector_last_value(t.string_value,t.time) | selector_last_time(t.string_value,t.time) |",
-                    "+--------------------------------------------+-------------------------------------------+",
-                    "| three                                      | 1970-01-01T00:00:00.000006                |",
-                    "+--------------------------------------------+-------------------------------------------+",
-                ],
-            ),
-            (
-                selector_last(&DataType::Boolean, SelectorOutput::Value),
-                selector_last(&DataType::Boolean, SelectorOutput::Time),
-                "bool_value",
-                vec![
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| selector_last_value(t.bool_value,t.time) | selector_last_time(t.bool_value,t.time) |",
-                    "+------------------------------------------+-----------------------------------------+",
-                    "| false                                    | 1970-01-01T00:00:00.000006              |",
-                    "+------------------------------------------+-----------------------------------------+",
+                    Arc::new(Float64Array::from(vec![Some(2.0), Some(4.0), None])),
+                    Arc::new(Int64Array::from(vec![Some(20), Some(40), None])),
+                    Arc::new(UInt64Array::from(vec![Some(20), Some(40), None])),
+                    Arc::new(StringArray::from(vec![Some("two"), Some("four"), None])),
+                    Arc::new(BooleanArray::from(vec![Some(true), Some(false), None])),
+                    Arc::new(TimestampNanosecondArray::from(vec![1000, 2000, 3000])),
                 ],
             )
-        ];
+            .unwrap();
 
-        for (val_func, time_func, val_column, expected) in cases.into_iter() {
-            let args = vec![col(val_column), col("time")];
-            let aggs = vec![val_func.call(args.clone()), time_func.call(args)];
-            let actual = run_plan(aggs).await;
+            // No values in this batch
+            let batch2 = match RecordBatch::try_new(
+                Arc::clone(&schema),
+                vec![
+                    Arc::new(Float64Array::from(vec![] as Vec<Option<f64>>)),
+                    Arc::new(Int64Array::from(vec![] as Vec<Option<i64>>)),
+                    Arc::new(UInt64Array::from(vec![] as Vec<Option<u64>>)),
+                    Arc::new(StringArray::from(vec![] as Vec<Option<&str>>)),
+                    Arc::new(BooleanArray::from(vec![] as Vec<Option<bool>>)),
+                    Arc::new(TimestampNanosecondArray::from(vec![] as Vec<i64>)),
+                ],
+            ) {
+                Ok(a) => a,
+                _ => unreachable!(),
+            };
 
-            assert_eq!(
-                expected, actual,
-                "\n\nEXPECTED:\n{expected:#?}\nACTUAL:\n{actual:#?}\n"
-            );
-        }
-    }
-
-    #[tokio::test]
-    async fn test_selector_min() {
-        let cases = vec![
-            (
-                selector_min(&DataType::Float64, SelectorOutput::Value),
-                selector_min(&DataType::Float64, SelectorOutput::Time),
-                "f64_value",
+            let batch3 = RecordBatch::try_new(
+                Arc::clone(&schema),
                 vec![
-                    "+----------------------------------------+---------------------------------------+",
-                    "| selector_min_value(t.f64_value,t.time) | selector_min_time(t.f64_value,t.time) |",
-                    "+----------------------------------------+---------------------------------------+",
-                    "| 1.0                                    | 1970-01-01T00:00:00.000004            |",
-                    "+----------------------------------------+---------------------------------------+",
-                ],
-            ),
-            (
-                selector_min(&DataType::Int64, SelectorOutput::Value),
-                selector_min(&DataType::Int64, SelectorOutput::Time),
-                "i64_value",
-                vec![
-                    "+----------------------------------------+---------------------------------------+",
-                    "| selector_min_value(t.i64_value,t.time) | selector_min_time(t.i64_value,t.time) |",
-                    "+----------------------------------------+---------------------------------------+",
-                    "| 10                                     | 1970-01-01T00:00:00.000004            |",
-                    "+----------------------------------------+---------------------------------------+",
-                ],
-            ),
-            (
-                selector_min(&DataType::UInt64, SelectorOutput::Value),
-                selector_min(&DataType::UInt64, SelectorOutput::Time),
-                "u64_value",
-                vec![
-                    "+----------------------------------------+---------------------------------------+",
-                    "| selector_min_value(t.u64_value,t.time) | selector_min_time(t.u64_value,t.time) |",
-                    "+----------------------------------------+---------------------------------------+",
-                    "| 10                                     | 1970-01-01T00:00:00.000004            |",
-                    "+----------------------------------------+---------------------------------------+",
-                ],
-            ),
-            (
-                selector_min(&DataType::Utf8, SelectorOutput::Value),
-                selector_min(&DataType::Utf8, SelectorOutput::Time),
-                "string_value",
-                vec![
-                    "+-------------------------------------------+------------------------------------------+",
-                    "| selector_min_value(t.string_value,t.time) | selector_min_time(t.string_value,t.time) |",
-                    "+-------------------------------------------+------------------------------------------+",
-                    "| a_one                                     | 1970-01-01T00:00:00.000004               |",
-                    "+-------------------------------------------+------------------------------------------+",
-                ],
-            ),
-            (
-                selector_min(&DataType::Boolean, SelectorOutput::Value),
-                selector_min(&DataType::Boolean, SelectorOutput::Time),
-                "bool_value",
-                vec![
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| selector_min_value(t.bool_value,t.time) | selector_min_time(t.bool_value,t.time) |",
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| false                                   | 1970-01-01T00:00:00.000002             |",
-                    "+-----------------------------------------+----------------------------------------+",
+                    Arc::new(Float64Array::from(vec![Some(1.0), Some(5.0), Some(3.0)])),
+                    Arc::new(Int64Array::from(vec![Some(10), Some(50), Some(30)])),
+                    Arc::new(UInt64Array::from(vec![Some(10), Some(50), Some(30)])),
+                    Arc::new(StringArray::from(vec![
+                        Some("a_one"),
+                        Some("z_five"),
+                        Some("three"),
+                    ])),
+                    Arc::new(BooleanArray::from(vec![
+                        Some(true),
+                        Some(false),
+                        Some(false),
+                    ])),
+                    Arc::new(TimestampNanosecondArray::from(vec![4000, 5000, 6000])),
                 ],
             )
-        ];
+            .unwrap();
 
-        for (val_func, time_func, val_column, expected) in cases.into_iter() {
-            let args = vec![col(val_column), col("time")];
-            let aggs = vec![val_func.call(args.clone()), time_func.call(args)];
-            let actual = run_plan(aggs).await;
+            // Ensure the answer is the same regardless of the order of inputs
+            let input = vec![batch1, batch2, batch3];
+            let input_string = pretty_format_batches(&input).unwrap();
+            let results = run_with_inputs(Arc::clone(&schema), aggs.clone(), input.clone()).await;
 
-            assert_eq!(
-                expected, actual,
-                "\n\nEXPECTED:\n{expected:#?}\nACTUAL:\n{actual:#?}\n"
-            );
-        }
-    }
+            use itertools::Itertools;
+            // Get all permutations of the input
+            for p in input.iter().permutations(3) {
+                let p_batches = p.into_iter().cloned().collect::<Vec<_>>();
+                let p_input_string = pretty_format_batches(&p_batches).unwrap();
+                let p_results = run_with_inputs(Arc::clone(&schema), aggs.clone(), p_batches).await;
+                assert_eq!(
+                    results, p_results,
+                    "Mismatch with permutation.\n\
+                            Input1 \n\n\
+                            {input_string}\n\n\
+                            produces output:\n\n\
+                            {results:#?}\n\n\
+                            Input 2\n\n\
+                            {p_input_string}\n\n\
+                            produces output:\n\n\
+                            {p_results:#?}\n\n"
+                );
+            }
 
-    #[tokio::test]
-    async fn test_selector_max() {
-        let cases = vec![
-            (
-                selector_max(&DataType::Float64, SelectorOutput::Value),
-                selector_max(&DataType::Float64, SelectorOutput::Time),
-                "f64_value",
-                vec![
-                    "+----------------------------------------+---------------------------------------+",
-                    "| selector_max_value(t.f64_value,t.time) | selector_max_time(t.f64_value,t.time) |",
-                    "+----------------------------------------+---------------------------------------+",
-                    "| 5.0                                    | 1970-01-01T00:00:00.000005            |",
-                    "+----------------------------------------+---------------------------------------+",
-                ],
-            ),
-            (
-                selector_max(&DataType::Int64, SelectorOutput::Value),
-                selector_max(&DataType::Int64, SelectorOutput::Time),
-                "i64_value",
-                vec![
-                    "+----------------------------------------+---------------------------------------+",
-                    "| selector_max_value(t.i64_value,t.time) | selector_max_time(t.i64_value,t.time) |",
-                    "+----------------------------------------+---------------------------------------+",
-                    "| 50                                     | 1970-01-01T00:00:00.000005            |",
-                    "+----------------------------------------+---------------------------------------+",
-                ],
-            ),
-            (
-                selector_max(&DataType::UInt64, SelectorOutput::Value),
-                selector_max(&DataType::UInt64, SelectorOutput::Time),
-                "u64_value",
-                vec![
-                    "+----------------------------------------+---------------------------------------+",
-                    "| selector_max_value(t.u64_value,t.time) | selector_max_time(t.u64_value,t.time) |",
-                    "+----------------------------------------+---------------------------------------+",
-                    "| 50                                     | 1970-01-01T00:00:00.000005            |",
-                    "+----------------------------------------+---------------------------------------+",
-                ],
-            ),
-            (
-                selector_max(&DataType::Utf8, SelectorOutput::Value),
-                selector_max(&DataType::Utf8, SelectorOutput::Time),
-                "string_value",
-                vec![
-                    "+-------------------------------------------+------------------------------------------+",
-                    "| selector_max_value(t.string_value,t.time) | selector_max_time(t.string_value,t.time) |",
-                    "+-------------------------------------------+------------------------------------------+",
-                    "| z_five                                    | 1970-01-01T00:00:00.000005               |",
-                    "+-------------------------------------------+------------------------------------------+",
-                ],
-            ),
-            (
-                selector_max(&DataType::Boolean, SelectorOutput::Value),
-                selector_max(&DataType::Boolean, SelectorOutput::Time),
-                "bool_value",
-                vec![
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| selector_max_value(t.bool_value,t.time) | selector_max_time(t.bool_value,t.time) |",
-                    "+-----------------------------------------+----------------------------------------+",
-                    "| true                                    | 1970-01-01T00:00:00.000001             |",
-                    "+-----------------------------------------+----------------------------------------+",
-                ],
-            )
-        ];
-
-        for (val_func, time_func, val_column, expected) in cases.into_iter() {
-            let args = vec![col(val_column), col("time")];
-            let aggs = vec![val_func.call(args.clone()), time_func.call(args)];
-            let actual = run_plan(aggs).await;
-
-            assert_eq!(
-                expected, actual,
-                "\n\nEXPECTED:\n{expected:#?}\nACTUAL:\n{actual:#?}\n"
-            );
-        }
-    }
-
-    // Begin `first`
-
-    #[tokio::test]
-    async fn test_struct_selector_first_f64() {
-        run_case(
-            struct_selector_first().call(vec![col("f64_value"), col("time")]),
-            vec![
-                "+------------------------------------------------+",
-                "| selector_first(t.f64_value,t.time)             |",
-                "+------------------------------------------------+",
-                "| {value: 2.0, time: 1970-01-01T00:00:00.000001} |",
-                "+------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_first_i64() {
-        run_case(
-            struct_selector_first().call(vec![col("i64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_first(t.i64_value,t.time)            |",
-                "+-----------------------------------------------+",
-                "| {value: 20, time: 1970-01-01T00:00:00.000001} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_first_u64() {
-        run_case(
-            struct_selector_first().call(vec![col("u64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_first(t.u64_value,t.time)            |",
-                "+-----------------------------------------------+",
-                "| {value: 20, time: 1970-01-01T00:00:00.000001} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_first_string() {
-        run_case(
-            struct_selector_first().call(vec![col("string_value"), col("time")]),
-            vec![
-                "+------------------------------------------------+",
-                "| selector_first(t.string_value,t.time)          |",
-                "+------------------------------------------------+",
-                "| {value: two, time: 1970-01-01T00:00:00.000001} |",
-                "+------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_first_bool() {
-        run_case(
-            struct_selector_first().call(vec![col("bool_value"), col("time")]),
-            vec![
-                "+-------------------------------------------------+",
-                "| selector_first(t.bool_value,t.time)             |",
-                "+-------------------------------------------------+",
-                "| {value: true, time: 1970-01-01T00:00:00.000001} |",
-                "+-------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    // Begin `last`
-
-    #[tokio::test]
-    async fn test_struct_selector_last_f64() {
-        run_case(
-            struct_selector_last().call(vec![col("f64_value"), col("time")]),
-            vec![
-                "+------------------------------------------------+",
-                "| selector_last(t.f64_value,t.time)              |",
-                "+------------------------------------------------+",
-                "| {value: 3.0, time: 1970-01-01T00:00:00.000006} |",
-                "+------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_last_i64() {
-        run_case(
-            struct_selector_last().call(vec![col("i64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_last(t.i64_value,t.time)             |",
-                "+-----------------------------------------------+",
-                "| {value: 30, time: 1970-01-01T00:00:00.000006} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_last_u64() {
-        run_case(
-            struct_selector_last().call(vec![col("u64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_last(t.u64_value,t.time)             |",
-                "+-----------------------------------------------+",
-                "| {value: 30, time: 1970-01-01T00:00:00.000006} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_last_string() {
-        run_case(
-            struct_selector_last().call(vec![col("string_value"), col("time")]),
-            vec![
-                "+--------------------------------------------------+",
-                "| selector_last(t.string_value,t.time)             |",
-                "+--------------------------------------------------+",
-                "| {value: three, time: 1970-01-01T00:00:00.000006} |",
-                "+--------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_last_bool() {
-        run_case(
-            struct_selector_last().call(vec![col("bool_value"), col("time")]),
-            vec![
-                "+--------------------------------------------------+",
-                "| selector_last(t.bool_value,t.time)               |",
-                "+--------------------------------------------------+",
-                "| {value: false, time: 1970-01-01T00:00:00.000006} |",
-                "+--------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    // Begin `min`
-
-    #[tokio::test]
-    async fn test_struct_selector_min_f64() {
-        run_case(
-            struct_selector_min().call(vec![col("f64_value"), col("time")]),
-            vec![
-                "+------------------------------------------------+",
-                "| selector_min(t.f64_value,t.time)               |",
-                "+------------------------------------------------+",
-                "| {value: 1.0, time: 1970-01-01T00:00:00.000004} |",
-                "+------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_min_i64() {
-        run_case(
-            struct_selector_min().call(vec![col("i64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_min(t.i64_value,t.time)              |",
-                "+-----------------------------------------------+",
-                "| {value: 10, time: 1970-01-01T00:00:00.000004} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_min_u64() {
-        run_case(
-            struct_selector_min().call(vec![col("u64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_min(t.u64_value,t.time)              |",
-                "+-----------------------------------------------+",
-                "| {value: 10, time: 1970-01-01T00:00:00.000004} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_min_string() {
-        run_case(
-            struct_selector_min().call(vec![col("string_value"), col("time")]),
-            vec![
-                "+--------------------------------------------------+",
-                "| selector_min(t.string_value,t.time)              |",
-                "+--------------------------------------------------+",
-                "| {value: a_one, time: 1970-01-01T00:00:00.000004} |",
-                "+--------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_min_bool() {
-        run_case(
-            struct_selector_min().call(vec![col("bool_value"), col("time")]),
-            vec![
-                "+--------------------------------------------------+",
-                "| selector_min(t.bool_value,t.time)                |",
-                "+--------------------------------------------------+",
-                "| {value: false, time: 1970-01-01T00:00:00.000002} |",
-                "+--------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    // Begin `max`
-
-    #[tokio::test]
-    async fn test_struct_selector_max_f64() {
-        run_case(
-            struct_selector_max().call(vec![col("f64_value"), col("time")]),
-            vec![
-                "+------------------------------------------------+",
-                "| selector_max(t.f64_value,t.time)               |",
-                "+------------------------------------------------+",
-                "| {value: 5.0, time: 1970-01-01T00:00:00.000005} |",
-                "+------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_max_i64() {
-        run_case(
-            struct_selector_max().call(vec![col("i64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_max(t.i64_value,t.time)              |",
-                "+-----------------------------------------------+",
-                "| {value: 50, time: 1970-01-01T00:00:00.000005} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_max_u64() {
-        run_case(
-            struct_selector_max().call(vec![col("u64_value"), col("time")]),
-            vec![
-                "+-----------------------------------------------+",
-                "| selector_max(t.u64_value,t.time)              |",
-                "+-----------------------------------------------+",
-                "| {value: 50, time: 1970-01-01T00:00:00.000005} |",
-                "+-----------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_max_string() {
-        run_case(
-            struct_selector_max().call(vec![col("string_value"), col("time")]),
-            vec![
-                "+---------------------------------------------------+",
-                "| selector_max(t.string_value,t.time)               |",
-                "+---------------------------------------------------+",
-                "| {value: z_five, time: 1970-01-01T00:00:00.000005} |",
-                "+---------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    #[tokio::test]
-    async fn test_struct_selector_max_bool() {
-        run_case(
-            struct_selector_max().call(vec![col("bool_value"), col("time")]),
-            vec![
-                "+-------------------------------------------------+",
-                "| selector_max(t.bool_value,t.time)               |",
-                "+-------------------------------------------------+",
-                "| {value: true, time: 1970-01-01T00:00:00.000001} |",
-                "+-------------------------------------------------+",
-            ],
-        )
-        .await;
-    }
-
-    // Begin utility functions
-
-    /// Runs the expr using `run_plan` and compares the result to `expected`
-    async fn run_case(expr: Expr, expected: Vec<&'static str>) {
-        println!("Running case for {expr}");
-
-        let actual = run_plan(vec![expr.clone()]).await;
-
-        assert_eq!(
-            expected, actual,
-            "\n\nexpr: {expr}\n\nEXPECTED:\n{expected:#?}\nACTUAL:\n{actual:#?}\n"
-        );
-    }
-
-    /// Run a plan against the following input table as "t"
-    ///
-    /// ```text
-    /// +-----------+-----------+-----------+--------------+------------+----------------------------+,
-    /// | f64_value | i64_value | u64_value | string_value | bool_value | time                       |,
-    /// +-----------+-----------+--------------+------------+----------------------------+,
-    /// | 2         | 20        | 20        | two          | true       | 1970-01-01T00:00:00.000001 |,
-    /// | 4         | 40        | 40        | four         | false      | 1970-01-01T00:00:00.000002 |,
-    /// |           |           |           |              |            | 1970-01-01T00:00:00.000003 |,
-    /// | 1         | 10        | 10        | a_one        | true       | 1970-01-01T00:00:00.000004 |,
-    /// | 5         | 50        | 50        | z_five       | false      | 1970-01-01T00:00:00.000005 |,
-    /// | 3         | 30        | 30        | three        | false      | 1970-01-01T00:00:00.000006 |,
-    /// +-----------+-----------+--------------+------------+----------------------------+,
-    /// ```
-    async fn run_plan(aggs: Vec<Expr>) -> Vec<String> {
-        // define a schema for input
-        // (value) and timestamp
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("f64_value", DataType::Float64, true),
-            Field::new("i64_value", DataType::Int64, true),
-            Field::new("u64_value", DataType::UInt64, true),
-            Field::new("string_value", DataType::Utf8, true),
-            Field::new("bool_value", DataType::Boolean, true),
-            Field::new("time", TIME_DATA_TYPE(), true),
-        ]));
-
-        // define data in two partitions
-        let batch1 = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(Float64Array::from(vec![Some(2.0), Some(4.0), None])),
-                Arc::new(Int64Array::from(vec![Some(20), Some(40), None])),
-                Arc::new(UInt64Array::from(vec![Some(20), Some(40), None])),
-                Arc::new(StringArray::from(vec![Some("two"), Some("four"), None])),
-                Arc::new(BooleanArray::from(vec![Some(true), Some(false), None])),
-                Arc::new(TimestampNanosecondArray::from(vec![1000, 2000, 3000])),
-            ],
-        )
-        .unwrap();
-
-        // No values in this batch
-        let batch2 = match RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(Float64Array::from(vec![] as Vec<Option<f64>>)),
-                Arc::new(Int64Array::from(vec![] as Vec<Option<i64>>)),
-                Arc::new(UInt64Array::from(vec![] as Vec<Option<u64>>)),
-                Arc::new(StringArray::from(vec![] as Vec<Option<&str>>)),
-                Arc::new(BooleanArray::from(vec![] as Vec<Option<bool>>)),
-                Arc::new(TimestampNanosecondArray::from(vec![] as Vec<i64>)),
-            ],
-        ) {
-            Ok(a) => a,
-            _ => unreachable!(),
-        };
-
-        let batch3 = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(Float64Array::from(vec![Some(1.0), Some(5.0), Some(3.0)])),
-                Arc::new(Int64Array::from(vec![Some(10), Some(50), Some(30)])),
-                Arc::new(UInt64Array::from(vec![Some(10), Some(50), Some(30)])),
-                Arc::new(StringArray::from(vec![
-                    Some("a_one"),
-                    Some("z_five"),
-                    Some("three"),
-                ])),
-                Arc::new(BooleanArray::from(vec![
-                    Some(true),
-                    Some(false),
-                    Some(false),
-                ])),
-                Arc::new(TimestampNanosecondArray::from(vec![4000, 5000, 6000])),
-            ],
-        )
-        .unwrap();
-
-        // Ensure the answer is the same regardless of the order of inputs
-        let input = vec![batch1, batch2, batch3];
-        let input_string = pretty_format_batches(&input).unwrap();
-        let results = run_with_inputs(Arc::clone(&schema), aggs.clone(), input.clone()).await;
-
-        use itertools::Itertools;
-        // Get all permutations of the input
-        for p in input.iter().permutations(3) {
-            let p_batches = p.into_iter().cloned().collect::<Vec<_>>();
-            let p_input_string = pretty_format_batches(&p_batches).unwrap();
-            let p_results = run_with_inputs(Arc::clone(&schema), aggs.clone(), p_batches).await;
-            assert_eq!(
-                results, p_results,
-                "Mismatch with permutation.\n\
-                        Input1 \n\n\
-                        {input_string}\n\n\
-                        produces output:\n\n\
-                        {results:#?}\n\n\
-                        Input 2\n\n\
-                        {p_input_string}\n\n\
-                        produces output:\n\n\
-                        {p_results:#?}\n\n"
-            );
+            results
         }
 
-        results
-    }
+        async fn run_with_inputs(
+            schema: SchemaRef,
+            aggs: Vec<Expr>,
+            inputs: Vec<RecordBatch>,
+        ) -> Vec<String> {
+            let provider = MemTable::try_new(Arc::clone(&schema), vec![inputs]).unwrap();
+            let ctx = SessionContext::new();
+            ctx.register_table("t", Arc::new(provider)).unwrap();
 
-    async fn run_with_inputs(
-        schema: SchemaRef,
-        aggs: Vec<Expr>,
-        inputs: Vec<RecordBatch>,
-    ) -> Vec<String> {
-        let provider = MemTable::try_new(Arc::clone(&schema), vec![inputs]).unwrap();
-        let ctx = SessionContext::new();
-        ctx.register_table("t", Arc::new(provider)).unwrap();
+            let df = ctx.table("t").await.unwrap();
+            let df = df.aggregate(vec![], aggs).unwrap();
 
-        let df = ctx.table("t").await.unwrap();
-        let df = df.aggregate(vec![], aggs).unwrap();
+            // execute the query
+            let record_batches = df.collect().await.unwrap();
 
-        // execute the query
-        let record_batches = df.collect().await.unwrap();
-
-        pretty_format_batches(&record_batches)
-            .unwrap()
-            .to_string()
-            .split('\n')
-            .map(|s| s.to_owned())
-            .collect()
+            pretty_format_batches(&record_batches)
+                .unwrap()
+                .to_string()
+                .split('\n')
+                .map(|s| s.to_owned())
+                .collect()
+        }
     }
 }
diff --git a/query_functions/src/selectors/internal.rs b/query_functions/src/selectors/internal.rs
index c2d92628dc..94404ff26e 100644
--- a/query_functions/src/selectors/internal.rs
+++ b/query_functions/src/selectors/internal.rs
@@ -17,13 +17,13 @@ use arrow::{
         max as array_max, max_boolean as array_max_boolean, max_string as array_max_string,
         min as array_min, min_boolean as array_min_boolean, min_string as array_min_string,
     },
-    datatypes::{DataType, Field, Fields},
+    datatypes::{Field, Fields},
 };
 use datafusion::{error::Result as DataFusionResult, scalar::ScalarValue};
 
 use observability_deps::tracing::debug;
 
-use super::{Selector, SelectorOutput};
+use super::Selector;
 
 /// Trait for comparing values in arrays with their native
 /// representation. This so the same comparison expression can be used
@@ -117,7 +117,7 @@ fn make_scalar_struct(data_fields: Vec<ScalarValue>) -> ScalarValue {
 }
 
 macro_rules! make_first_selector {
-    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARROWTYPE:expr, $ARRTYPE:ident, $MINFUNC:ident, $TO_SCALARVALUE: expr) => {
+    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARRTYPE:ident, $MINFUNC:ident, $TO_SCALARVALUE: expr) => {
         #[derive(Debug)]
         pub struct $STRUCTNAME {
             value: Option<$RUSTTYPE>,
@@ -134,10 +134,6 @@ macro_rules! make_first_selector {
         }
 
         impl Selector for $STRUCTNAME {
-            fn value_data_type() -> DataType {
-                $ARROWTYPE
-            }
-
             fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
                 Ok(vec![
                     $TO_SCALARVALUE(self.value.clone()),
@@ -145,15 +141,11 @@ macro_rules! make_first_selector {
                 ])
             }
 
-            fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue> {
-                match output {
-                    SelectorOutput::Value => Ok($TO_SCALARVALUE(self.value.clone())),
-                    SelectorOutput::Time => Ok(ScalarValue::TimestampNanosecond(self.time, None)),
-                    SelectorOutput::Struct => Ok(make_scalar_struct(vec![
-                        $TO_SCALARVALUE(self.value.clone()),
-                        ScalarValue::TimestampNanosecond(self.time, None),
-                    ])),
-                }
+            fn evaluate(&self) -> DataFusionResult<ScalarValue> {
+                Ok(make_scalar_struct(vec![
+                    $TO_SCALARVALUE(self.value.clone()),
+                    ScalarValue::TimestampNanosecond(self.time, None),
+                ]))
             }
 
             fn update_batch(
@@ -233,7 +225,7 @@ macro_rules! make_first_selector {
 }
 
 macro_rules! make_last_selector {
-    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARROWTYPE:expr, $ARRTYPE:ident, $MAXFUNC:ident, $TO_SCALARVALUE: expr) => {
+    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARRTYPE:ident, $MAXFUNC:ident, $TO_SCALARVALUE: expr) => {
         #[derive(Debug)]
         pub struct $STRUCTNAME {
             value: Option<$RUSTTYPE>,
@@ -250,10 +242,6 @@ macro_rules! make_last_selector {
         }
 
         impl Selector for $STRUCTNAME {
-            fn value_data_type() -> DataType {
-                $ARROWTYPE
-            }
-
             fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
                 Ok(vec![
                     $TO_SCALARVALUE(self.value.clone()),
@@ -261,15 +249,11 @@ macro_rules! make_last_selector {
                 ])
             }
 
-            fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue> {
-                match output {
-                    SelectorOutput::Value => Ok($TO_SCALARVALUE(self.value.clone())),
-                    SelectorOutput::Time => Ok(ScalarValue::TimestampNanosecond(self.time, None)),
-                    SelectorOutput::Struct => Ok(make_scalar_struct(vec![
-                        $TO_SCALARVALUE(self.value.clone()),
-                        ScalarValue::TimestampNanosecond(self.time, None),
-                    ])),
-                }
+            fn evaluate(&self) -> DataFusionResult<ScalarValue> {
+                Ok(make_scalar_struct(vec![
+                    $TO_SCALARVALUE(self.value.clone()),
+                    ScalarValue::TimestampNanosecond(self.time, None),
+                ]))
             }
 
             fn update_batch(
@@ -373,7 +357,7 @@ impl ActionNeeded {
 }
 
 macro_rules! make_min_selector {
-    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARROWTYPE:expr, $ARRTYPE:ident, $MINFUNC:ident, $TO_SCALARVALUE: expr) => {
+    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARRTYPE:ident, $MINFUNC:ident, $TO_SCALARVALUE: expr) => {
         #[derive(Debug)]
         pub struct $STRUCTNAME {
             value: Option<$RUSTTYPE>,
@@ -390,10 +374,6 @@ macro_rules! make_min_selector {
         }
 
         impl Selector for $STRUCTNAME {
-            fn value_data_type() -> DataType {
-                $ARROWTYPE
-            }
-
             fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
                 Ok(vec![
                     $TO_SCALARVALUE(self.value.clone()),
@@ -401,15 +381,11 @@ macro_rules! make_min_selector {
                 ])
             }
 
-            fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue> {
-                match output {
-                    SelectorOutput::Value => Ok($TO_SCALARVALUE(self.value.clone())),
-                    SelectorOutput::Time => Ok(ScalarValue::TimestampNanosecond(self.time, None)),
-                    SelectorOutput::Struct => Ok(make_scalar_struct(vec![
-                        $TO_SCALARVALUE(self.value.clone()),
-                        ScalarValue::TimestampNanosecond(self.time, None),
-                    ])),
-                }
+            fn evaluate(&self) -> DataFusionResult<ScalarValue> {
+                Ok(make_scalar_struct(vec![
+                    $TO_SCALARVALUE(self.value.clone()),
+                    ScalarValue::TimestampNanosecond(self.time, None),
+                ]))
             }
 
             fn update_batch(
@@ -494,7 +470,7 @@ macro_rules! make_min_selector {
 }
 
 macro_rules! make_max_selector {
-    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARROWTYPE:expr, $ARRTYPE:ident, $MAXFUNC:ident, $TO_SCALARVALUE: expr) => {
+    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARRTYPE:ident, $MAXFUNC:ident, $TO_SCALARVALUE: expr) => {
         #[derive(Debug)]
         pub struct $STRUCTNAME {
             value: Option<$RUSTTYPE>,
@@ -511,10 +487,6 @@ macro_rules! make_max_selector {
         }
 
         impl Selector for $STRUCTNAME {
-            fn value_data_type() -> DataType {
-                $ARROWTYPE
-            }
-
             fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
                 Ok(vec![
                     $TO_SCALARVALUE(self.value.clone()),
@@ -522,15 +494,11 @@ macro_rules! make_max_selector {
                 ])
             }
 
-            fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue> {
-                match output {
-                    SelectorOutput::Value => Ok($TO_SCALARVALUE(self.value.clone())),
-                    SelectorOutput::Time => Ok(ScalarValue::TimestampNanosecond(self.time, None)),
-                    SelectorOutput::Struct => Ok(make_scalar_struct(vec![
-                        $TO_SCALARVALUE(self.value.clone()),
-                        ScalarValue::TimestampNanosecond(self.time, None),
-                    ])),
-                }
+            fn evaluate(&self) -> DataFusionResult<ScalarValue> {
+                Ok(make_scalar_struct(vec![
+                    $TO_SCALARVALUE(self.value.clone()),
+                    ScalarValue::TimestampNanosecond(self.time, None),
+                ]))
             }
 
             fn update_batch(
@@ -620,7 +588,6 @@ macro_rules! make_max_selector {
 make_first_selector!(
     F64FirstSelector,
     f64,
-    DataType::Float64,
     Float64Array,
     array_min,
     ScalarValue::Float64
@@ -628,7 +595,6 @@ make_first_selector!(
 make_first_selector!(
     I64FirstSelector,
     i64,
-    DataType::Int64,
     Int64Array,
     array_min,
     ScalarValue::Int64
@@ -636,7 +602,6 @@ make_first_selector!(
 make_first_selector!(
     U64FirstSelector,
     u64,
-    DataType::UInt64,
     UInt64Array,
     array_min,
     ScalarValue::UInt64
@@ -644,7 +609,6 @@ make_first_selector!(
 make_first_selector!(
     Utf8FirstSelector,
     String,
-    DataType::Utf8,
     StringArray,
     array_min,
     ScalarValue::Utf8
@@ -652,7 +616,6 @@ make_first_selector!(
 make_first_selector!(
     BooleanFirstSelector,
     bool,
-    DataType::Boolean,
     BooleanArray,
     array_min,
     ScalarValue::Boolean
@@ -663,7 +626,6 @@ make_first_selector!(
 make_last_selector!(
     F64LastSelector,
     f64,
-    DataType::Float64,
     Float64Array,
     array_max,
     ScalarValue::Float64
@@ -671,7 +633,6 @@ make_last_selector!(
 make_last_selector!(
     I64LastSelector,
     i64,
-    DataType::Int64,
     Int64Array,
     array_max,
     ScalarValue::Int64
@@ -679,7 +640,6 @@ make_last_selector!(
 make_last_selector!(
     U64LastSelector,
     u64,
-    DataType::UInt64,
     UInt64Array,
     array_max,
     ScalarValue::UInt64
@@ -687,7 +647,6 @@ make_last_selector!(
 make_last_selector!(
     Utf8LastSelector,
     String,
-    DataType::Utf8,
     StringArray,
     array_max,
     ScalarValue::Utf8
@@ -695,7 +654,6 @@ make_last_selector!(
 make_last_selector!(
     BooleanLastSelector,
     bool,
-    DataType::Boolean,
     BooleanArray,
     array_max,
     ScalarValue::Boolean
@@ -706,7 +664,6 @@ make_last_selector!(
 make_min_selector!(
     F64MinSelector,
     f64,
-    DataType::Float64,
     Float64Array,
     array_min,
     ScalarValue::Float64
@@ -714,7 +671,6 @@ make_min_selector!(
 make_min_selector!(
     I64MinSelector,
     i64,
-    DataType::Int64,
     Int64Array,
     array_min,
     ScalarValue::Int64
@@ -722,7 +678,6 @@ make_min_selector!(
 make_min_selector!(
     U64MinSelector,
     u64,
-    DataType::UInt64,
     UInt64Array,
     array_min,
     ScalarValue::UInt64
@@ -730,7 +685,6 @@ make_min_selector!(
 make_min_selector!(
     Utf8MinSelector,
     String,
-    DataType::Utf8,
     StringArray,
     array_min_string,
     ScalarValue::Utf8
@@ -738,7 +692,6 @@ make_min_selector!(
 make_min_selector!(
     BooleanMinSelector,
     bool,
-    DataType::Boolean,
     BooleanArray,
     array_min_boolean,
     ScalarValue::Boolean
@@ -749,7 +702,6 @@ make_min_selector!(
 make_max_selector!(
     F64MaxSelector,
     f64,
-    DataType::Float64,
     Float64Array,
     array_max,
     ScalarValue::Float64
@@ -757,7 +709,6 @@ make_max_selector!(
 make_max_selector!(
     I64MaxSelector,
     i64,
-    DataType::Int64,
     Int64Array,
     array_max,
     ScalarValue::Int64
@@ -765,7 +716,6 @@ make_max_selector!(
 make_max_selector!(
     U64MaxSelector,
     u64,
-    DataType::UInt64,
     UInt64Array,
     array_max,
     ScalarValue::UInt64
@@ -773,7 +723,6 @@ make_max_selector!(
 make_max_selector!(
     Utf8MaxSelector,
     String,
-    DataType::Utf8,
     StringArray,
     array_max_string,
     ScalarValue::Utf8
@@ -781,7 +730,6 @@ make_max_selector!(
 make_max_selector!(
     BooleanMaxSelector,
     bool,
-    DataType::Boolean,
     BooleanArray,
     array_max_boolean,
     ScalarValue::Boolean

From 23a74111c8cf4c43ee5c19a6ff67d45577a8339d Mon Sep 17 00:00:00 2001
From: Andrew Lamb <alamb@influxdata.com>
Date: Thu, 4 May 2023 06:33:15 -0400
Subject: [PATCH 019/119] chore: Stop using cargo cache in CI (#7721)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 .circleci/config.yml | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 774b15dc94..438a3b7171 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -67,24 +67,6 @@ commands:
             cargo clippy --version
             cargo install cargo-hakari && cargo hakari --version
 
-  cache_restore:
-    description: Restore Cargo Cache
-    steps:
-      - restore_cache:
-          name: Restoring Cargo Cache
-          keys:
-            - cargo-cache-{{ arch }}-{{ .Branch }}-{{ checksum "Cargo.lock" }}
-            - cargo-cache-{{ arch }}-{{ .Branch }}
-            - cargo-cache
-  cache_save:
-    description: Save Cargo Cache
-    steps:
-      - save_cache:
-          name: Save Cargo Cache
-          paths:
-            - /usr/local/cargo/registry
-          key: cargo-cache-{{ arch }}-{{ .Branch }}-{{ checksum "Cargo.lock" }}
-
   login_to_gcloud:
     steps:
       - run:
@@ -110,11 +92,9 @@ jobs:
     steps:
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: Rust fmt
           command: cargo fmt --all -- --check
-      - cache_save
   lint:
     docker:
       - image: quay.io/influxdb/rust:ci
@@ -129,7 +109,6 @@ jobs:
     steps:
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: Clippy
           command: cargo clippy --all-targets --all-features --workspace -- -D warnings
@@ -139,7 +118,6 @@ jobs:
       - run:
           name: Yamllint
           command: yamllint --config-file .circleci/yamllint.yml --strict .
-      - cache_save
   cargo_audit:
     docker:
       - image: quay.io/influxdb/rust:ci
@@ -154,14 +132,12 @@ jobs:
     steps:
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: Install cargo-deny
           command: cargo install cargo-deny
       - run:
           name: cargo-deny Checks
           command: cargo deny check -s
-      - cache_save
   doc:
     docker:
       - image: quay.io/influxdb/rust:ci
@@ -179,11 +155,9 @@ jobs:
     steps:
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: Cargo doc
           command: cargo doc --document-private-items --no-deps --workspace
-      - cache_save
       - run:
           name: Compress Docs
           command: tar -cvzf rustdoc.tar.gz target/doc/
@@ -201,7 +175,6 @@ jobs:
             - "77:99:88:4a:ac:1f:55:9e:39:c7:1f:e4:7f:1e:60:4b"
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: Configure git
           command: |
@@ -270,7 +243,6 @@ jobs:
             sudo apt-get update
             sudo apt-get install openjdk-11-jdk -y
       - rust_components
-      - cache_restore
       - run:
           name: Download flight-sql-jdbc-driver-10.0.0.jar
           command: make -C influxdb_iox/tests/jdbc_client flight-sql-jdbc-driver-10.0.0.jar
@@ -283,7 +255,6 @@ jobs:
       - run:
           name: cargo test --test end_to_end
           command: cargo test --test end_to_end
-      - cache_save
 
   # Run all tests (without external dependencies, like a developer would)
   test:
@@ -302,11 +273,9 @@ jobs:
     steps:
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: cargo test --workspace
           command: cargo test --workspace
-      - cache_save
 
 
   # end to end tests with Heappy (heap profiling enabled)
@@ -326,11 +295,9 @@ jobs:
     steps:
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: cargo test --no-default-features --features=heappy end_to_end
           command: cargo test --no-default-features --features=heappy end_to_end
-      - cache_save
 
   # Build a dev binary.
   #
@@ -355,7 +322,6 @@ jobs:
     steps:
       - checkout
       - rust_components
-      - cache_restore
       - run:
           name: Cargo build
           command: cargo build --workspace
@@ -369,7 +335,6 @@ jobs:
           # Validate that the data generator compiles (in the same way as it does in release builds)
           name: Check iox_data_generator compiles
           command: cargo check  --package="iox_data_generator"  --no-default-features
-      - cache_save
 
   # Lint protobufs.
   protobuf-lint:
@@ -470,7 +435,6 @@ jobs:
           root: /tmp/images
           paths:
             - "*.tar.gz"
-      - cache_save
 
   deploy_release:
     docker:

From 667da5eea447f0f93a825c5c113ed42fd7951267 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <alamb@influxdata.com>
Date: Thu, 4 May 2023 06:48:18 -0400
Subject: [PATCH 020/119] chore: Add test for `dsn-file://` catalog urls
 (#7735)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 Cargo.lock                                    |  1 +
 .../tests/end_to_end_cases/catalog.rs         | 54 +++++++++++++++++++
 influxdb_iox/tests/end_to_end_cases/mod.rs    |  1 +
 iox_catalog/src/postgres.rs                   | 17 ++++--
 test_helpers_end_to_end/Cargo.toml            |  1 +
 test_helpers_end_to_end/src/database.rs       |  5 +-
 6 files changed, 74 insertions(+), 5 deletions(-)
 create mode 100644 influxdb_iox/tests/end_to_end_cases/catalog.rs

diff --git a/Cargo.lock b/Cargo.lock
index dd26a90177..477b3d4cf4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5616,6 +5616,7 @@ dependencies = [
  "http",
  "hyper",
  "influxdb_iox_client",
+ "iox_catalog",
  "mutable_batch_lp",
  "mutable_batch_pb",
  "nix",
diff --git a/influxdb_iox/tests/end_to_end_cases/catalog.rs b/influxdb_iox/tests/end_to_end_cases/catalog.rs
new file mode 100644
index 0000000000..b05417e56d
--- /dev/null
+++ b/influxdb_iox/tests/end_to_end_cases/catalog.rs
@@ -0,0 +1,54 @@
+//! End to end test of catalog configurations
+
+use std::{io::Write, path::Path};
+
+use tempfile::TempDir;
+use test_helpers_end_to_end::{maybe_skip_integration, MiniCluster, Step, StepTest};
+#[tokio::test]
+async fn dsn_file() {
+    test_helpers::maybe_start_logging();
+    let database_url = maybe_skip_integration!();
+
+    // tests using the special `dsn-file://` url for catalog
+    // used in IDPE / InfluxCloud production
+    //
+    // In this case the dsn is `dsn-file://<filename>` and the actual
+    // catalog url is stored in <filename> (which is updated from time
+    // to time with rotated credentials).
+    let tmpdir = TempDir::new().unwrap();
+    let dsn_file_path = tmpdir.path().join("catalog.dsn");
+
+    // Write the actual database url to the temporary file
+    write_to_file(&dsn_file_path, &database_url);
+
+    let database_url = format!("dsn-file://{}", dsn_file_path.display());
+    println!("databse_url is {database_url}");
+
+    // Set up the cluster  ====================================
+    let mut cluster = MiniCluster::create_non_shared2(database_url).await;
+
+    StepTest::new(
+        &mut cluster,
+        vec![
+            Step::WriteLineProtocol("my_table val=42i 123456".to_string()),
+            Step::Query {
+                sql: "select * from my_table".to_string(),
+                expected: vec![
+                    "+--------------------------------+-----+",
+                    "| time                           | val |",
+                    "+--------------------------------+-----+",
+                    "| 1970-01-01T00:00:00.000123456Z | 42  |",
+                    "+--------------------------------+-----+",
+                ],
+            },
+        ],
+    )
+    .run()
+    .await
+}
+
+fn write_to_file(path: &Path, contents: &str) {
+    let mut file = std::fs::File::create(path).unwrap();
+    file.write_all(contents.as_bytes()).unwrap();
+    file.flush().unwrap();
+}
diff --git a/influxdb_iox/tests/end_to_end_cases/mod.rs b/influxdb_iox/tests/end_to_end_cases/mod.rs
index 800fd8f256..3509b00983 100644
--- a/influxdb_iox/tests/end_to_end_cases/mod.rs
+++ b/influxdb_iox/tests/end_to_end_cases/mod.rs
@@ -1,6 +1,7 @@
 mod all_in_one;
 // CLI errors when run with heappy (only works via `cargo run`):
 // loading shared libraries: libjemalloc.so.2: cannot open shared object file: No such file or directory"
+mod catalog;
 #[cfg(not(feature = "heappy"))]
 mod cli;
 mod compactor;
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 5951d75958..360527029f 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -432,6 +432,18 @@ async fn new_raw_pool(
     Ok(pool)
 }
 
+/// Parse a postgres catalog dsn, handling the special `dsn-file://`
+/// syntax (see [`new_pool`] for more details).
+///
+/// Returns an error if the dsn-file could not be read correctly.
+pub fn parse_dsn(dsn: &str) -> Result<String, sqlx::Error> {
+    let dsn = match get_dsn_file_path(dsn) {
+        Some(filename) => std::fs::read_to_string(filename)?,
+        None => dsn.to_string(),
+    };
+    Ok(dsn)
+}
+
 /// Creates a new HotSwapPool
 ///
 /// This function understands the IDPE specific `dsn-file://` dsn uri scheme
@@ -447,10 +459,7 @@ async fn new_raw_pool(
 async fn new_pool(
     options: &PostgresConnectionOptions,
 ) -> Result<HotSwapPool<Postgres>, sqlx::Error> {
-    let parsed_dsn = match get_dsn_file_path(&options.dsn) {
-        Some(filename) => std::fs::read_to_string(filename)?,
-        None => options.dsn.clone(),
-    };
+    let parsed_dsn = parse_dsn(&options.dsn)?;
     let pool = HotSwapPool::new(new_raw_pool(options, &parsed_dsn).await?);
     let polling_interval = options.hotswap_poll_interval;
 
diff --git a/test_helpers_end_to_end/Cargo.toml b/test_helpers_end_to_end/Cargo.toml
index a847ebe36f..ef31b0a402 100644
--- a/test_helpers_end_to_end/Cargo.toml
+++ b/test_helpers_end_to_end/Cargo.toml
@@ -18,6 +18,7 @@ generated_types = { path = "../generated_types" }
 http = "0.2.9"
 hyper = "0.14"
 influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] }
+iox_catalog = { path = "../iox_catalog" }
 mutable_batch_lp = { path = "../mutable_batch_lp" }
 mutable_batch_pb = { path = "../mutable_batch_pb" }
 nix = "0.26"
diff --git a/test_helpers_end_to_end/src/database.rs b/test_helpers_end_to_end/src/database.rs
index 6686cfbe37..1a3664adac 100644
--- a/test_helpers_end_to_end/src/database.rs
+++ b/test_helpers_end_to_end/src/database.rs
@@ -21,8 +21,11 @@ pub async fn initialize_db(dsn: &str, schema_name: &str) {
 
     info!("Initializing database...");
 
+    let dsn = iox_catalog::postgres::parse_dsn(dsn).unwrap();
+    let dsn = &dsn;
+
     // Create the catalog database if it doesn't exist
-    if !Postgres::database_exists(dsn).await.unwrap() {
+    if dsn.starts_with("posgresql") && !Postgres::database_exists(dsn).await.unwrap() {
         info!("Creating database...");
         Postgres::create_database(dsn).await.unwrap();
     }

From b0959667d52272cd74a7cf50ba314e1ad634c9ec Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)"
 <193874+carols10cents@users.noreply.github.com>
Date: Thu, 4 May 2023 09:45:56 -0400
Subject: [PATCH 021/119] fix: Move topic and query pool within iox catalog
 (#7734)

Still insert them into the database and associate them with namespaces,
but don't ever query them back out.

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 clap_blocks/src/catalog_dsn.rs                |  10 +-
 clap_blocks/src/router2.rs                    |  20 --
 .../src/components/namespaces_source/mock.rs  |  10 +-
 data_types/src/lib.rs                         |  71 ------
 docs/cli.md                                   |   2 -
 docs/underground_guide.md                     |   7 -
 garbage_collector/src/objectstore/checker.rs  |   4 +-
 .../influxdata/iox/schema/v1/service.proto    |  10 +-
 .../aggregate_tsm_schema/update_catalog.rs    | 137 ++----------
 influxdb_iox/src/commands/catalog.rs          |  11 -
 influxdb_iox/src/commands/catalog/topic.rs    |  54 -----
 influxdb_iox/src/commands/import/schema.rs    |  28 +--
 influxdb_iox/src/commands/run/all_in_one.rs   |  13 --
 .../buffer_tree/partition/resolver/catalog.rs |   4 +-
 ingester2/src/test_util.rs                    |   9 +-
 ingester2_test_ctx/src/lib.rs                 |  33 +--
 iox_catalog/README.md                         |   1 -
 iox_catalog/src/interface.rs                  | 208 ++++--------------
 iox_catalog/src/kafkaless_transition.rs       |  30 ++-
 iox_catalog/src/lib.rs                        |  27 +--
 iox_catalog/src/mem.rs                        |  92 +-------
 iox_catalog/src/metrics.rs                    |  41 +---
 iox_catalog/src/postgres.rs                   | 174 +++++----------
 iox_catalog/src/sqlite.rs                     | 181 ++++++---------
 iox_tests/src/catalog.rs                      |  12 +-
 ioxd_router/src/lib.rs                        |  50 +----
 router/src/namespace_cache/memory.rs          |  13 +-
 router/src/namespace_cache/metrics.rs         |   6 +-
 .../src/namespace_cache/read_through_cache.rs |  13 +-
 router/src/namespace_cache/sharded_cache.rs   |   4 +-
 router/src/namespace_resolver.rs              |  12 +-
 .../src/namespace_resolver/ns_autocreation.rs |  31 +--
 router/src/server/grpc.rs                     |  20 +-
 router/tests/common/mod.rs                    |  20 +-
 router/tests/http.rs                          |  20 +-
 scripts/docker_catalog.sh                     |   1 -
 service_grpc_catalog/src/lib.rs               |  16 +-
 service_grpc_namespace/src/lib.rs             |  78 +------
 service_grpc_object_store/src/lib.rs          |   8 +-
 service_grpc_schema/src/lib.rs                |   6 +-
 test_helpers_end_to_end/src/database.rs       |  12 -
 41 files changed, 283 insertions(+), 1216 deletions(-)
 delete mode 100644 influxdb_iox/src/commands/catalog/topic.rs

diff --git a/clap_blocks/src/catalog_dsn.rs b/clap_blocks/src/catalog_dsn.rs
index efc1fac7be..54fda61500 100644
--- a/clap_blocks/src/catalog_dsn.rs
+++ b/clap_blocks/src/catalog_dsn.rs
@@ -1,14 +1,13 @@
 //! Catalog-DSN-related configs.
 use iox_catalog::sqlite::{SqliteCatalog, SqliteConnectionOptions};
 use iox_catalog::{
-    create_or_get_default_records,
     interface::Catalog,
     mem::MemCatalog,
     postgres::{PostgresCatalog, PostgresConnectionOptions},
 };
 use observability_deps::tracing::*;
 use snafu::{OptionExt, ResultExt, Snafu};
-use std::{ops::DerefMut, sync::Arc, time::Duration};
+use std::{sync::Arc, time::Duration};
 
 #[derive(Debug, Snafu)]
 #[allow(missing_docs)]
@@ -211,13 +210,6 @@ impl CatalogDsnConfig {
             }
             CatalogType::Memory => {
                 let mem = MemCatalog::new(metrics);
-
-                let mut txn = mem.start_transaction().await.context(CatalogSnafu)?;
-                create_or_get_default_records(txn.deref_mut())
-                    .await
-                    .context(CatalogSnafu)?;
-                txn.commit().await.context(CatalogSnafu)?;
-
                 Arc::new(mem) as Arc<dyn Catalog>
             }
             CatalogType::Sqlite => {
diff --git a/clap_blocks/src/router2.rs b/clap_blocks/src/router2.rs
index 149a5219ce..5fd0506dd9 100644
--- a/clap_blocks/src/router2.rs
+++ b/clap_blocks/src/router2.rs
@@ -71,26 +71,6 @@ pub struct Router2Config {
     )]
     pub ingester_addresses: Vec<IngesterAddress>,
 
-    /// Write buffer topic/database that should be used.
-    // This isn't really relevant to the RPC write path and will be removed eventually.
-    #[clap(
-        long = "write-buffer-topic",
-        env = "INFLUXDB_IOX_WRITE_BUFFER_TOPIC",
-        default_value = "iox-shared",
-        action
-    )]
-    pub topic: String,
-
-    /// Query pool name to dispatch writes to.
-    // This isn't really relevant to the RPC write path and will be removed eventually.
-    #[clap(
-        long = "query-pool",
-        env = "INFLUXDB_IOX_QUERY_POOL_NAME",
-        default_value = "iox-shared",
-        action
-    )]
-    pub query_pool_name: String,
-
     /// Retention period to use when auto-creating namespaces.
     /// For infinite retention, leave this unset and it will default to `None`.
     /// Setting it to zero will not make it infinite.
diff --git a/compactor2/src/components/namespaces_source/mock.rs b/compactor2/src/components/namespaces_source/mock.rs
index 5bebc24dc2..fab5243e5d 100644
--- a/compactor2/src/components/namespaces_source/mock.rs
+++ b/compactor2/src/components/namespaces_source/mock.rs
@@ -49,9 +49,7 @@ impl NamespacesSource for MockNamespacesSource {
 mod tests {
     use std::collections::BTreeMap;
 
-    use data_types::{
-        ColumnId, ColumnSchema, ColumnType, QueryPoolId, TableId, TableSchema, TopicId,
-    };
+    use data_types::{ColumnId, ColumnSchema, ColumnType, TableId, TableSchema};
 
     use super::*;
 
@@ -182,15 +180,11 @@ mod tests {
             ]);
 
             let id = NamespaceId::new(id);
-            let topic_id = TopicId::new(0);
-            let query_pool_id = QueryPoolId::new(0);
             Self {
                 namespace: NamespaceWrapper {
                     ns: Namespace {
                         id,
                         name: "ns".to_string(),
-                        topic_id,
-                        query_pool_id,
                         max_tables: 10,
                         max_columns_per_table: 10,
                         retention_period_ns: None,
@@ -198,8 +192,6 @@ mod tests {
                     },
                     schema: NamespaceSchema {
                         id,
-                        topic_id,
-                        query_pool_id,
                         tables,
                         max_columns_per_table: 10,
                         max_tables: 42,
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 4bdd34d3dc..941f350f90 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -129,42 +129,6 @@ impl std::fmt::Display for NamespaceId {
     }
 }
 
-/// Unique ID for a Topic, assigned by the catalog and used in [`TopicMetadata`]
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
-#[sqlx(transparent)]
-pub struct TopicId(i64);
-
-#[allow(missing_docs)]
-impl TopicId {
-    pub const fn new(v: i64) -> Self {
-        Self(v)
-    }
-    pub fn get(&self) -> i64 {
-        self.0
-    }
-}
-
-impl std::fmt::Display for TopicId {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-/// Unique ID for a `QueryPool`
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
-#[sqlx(transparent)]
-pub struct QueryPoolId(i64);
-
-#[allow(missing_docs)]
-impl QueryPoolId {
-    pub fn new(v: i64) -> Self {
-        Self(v)
-    }
-    pub fn get(&self) -> i64 {
-        self.0
-    }
-}
-
 /// Unique ID for a `Table`
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
 #[sqlx(transparent)]
@@ -341,25 +305,6 @@ impl std::fmt::Display for ParquetFileId {
     }
 }
 
-/// Data object for a topic. When Kafka is used as the write buffer, this is the Kafka topic name
-/// plus a catalog-assigned ID.
-#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
-pub struct TopicMetadata {
-    /// The id of the topic
-    pub id: TopicId,
-    /// The unique name of the topic
-    pub name: String,
-}
-
-/// Data object for a query pool
-#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
-pub struct QueryPool {
-    /// The id of the pool
-    pub id: QueryPoolId,
-    /// The unique name of the pool
-    pub name: String,
-}
-
 /// Data object for a namespace
 #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct Namespace {
@@ -370,10 +315,6 @@ pub struct Namespace {
     #[sqlx(default)]
     /// The retention period in ns. None represents infinite duration (i.e. never drop data).
     pub retention_period_ns: Option<i64>,
-    /// The topic that writes to this namespace will land in
-    pub topic_id: TopicId,
-    /// The query pool assigned to answer queries for this namespace
-    pub query_pool_id: QueryPoolId,
     /// The maximum number of tables that can exist in this namespace
     pub max_tables: i32,
     /// The maximum number of columns per table in this namespace
@@ -388,10 +329,6 @@ pub struct Namespace {
 pub struct NamespaceSchema {
     /// the namespace id
     pub id: NamespaceId,
-    /// the topic this namespace gets data written to
-    pub topic_id: TopicId,
-    /// the query pool assigned to answer queries for this namespace
-    pub query_pool_id: QueryPoolId,
     /// the tables in the namespace by name
     pub tables: BTreeMap<String, TableSchema>,
     /// the number of columns per table this namespace allows
@@ -407,8 +344,6 @@ impl NamespaceSchema {
     /// Create a new `NamespaceSchema`
     pub fn new(
         id: NamespaceId,
-        topic_id: TopicId,
-        query_pool_id: QueryPoolId,
         max_columns_per_table: i32,
         max_tables: i32,
         retention_period_ns: Option<i64>,
@@ -416,8 +351,6 @@ impl NamespaceSchema {
         Self {
             id,
             tables: BTreeMap::new(),
-            topic_id,
-            query_pool_id,
             max_columns_per_table: max_columns_per_table as usize,
             max_tables: max_tables as usize,
             retention_period_ns,
@@ -3006,8 +2939,6 @@ mod tests {
     fn test_namespace_schema_size() {
         let schema1 = NamespaceSchema {
             id: NamespaceId::new(1),
-            topic_id: TopicId::new(2),
-            query_pool_id: QueryPoolId::new(3),
             tables: BTreeMap::from([]),
             max_columns_per_table: 4,
             max_tables: 42,
@@ -3015,8 +2946,6 @@ mod tests {
         };
         let schema2 = NamespaceSchema {
             id: NamespaceId::new(1),
-            topic_id: TopicId::new(2),
-            query_pool_id: QueryPoolId::new(3),
             tables: BTreeMap::from([(String::from("foo"), TableSchema::new(TableId::new(1)))]),
             max_columns_per_table: 4,
             max_tables: 42,
diff --git a/docs/cli.md b/docs/cli.md
index f426f489cb..3ca67714ab 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -143,8 +143,6 @@ $ influxdb_iox namespace list
 $ influxdb_iox debug schema get 26f7e5a4b7be365b_917b97a92e883afc
 {
   "id": "1",
-  "topicId": "1",
-  "queryPoolId": "1",
   "tables": {
     "cpu": {
       "id": "5",
diff --git a/docs/underground_guide.md b/docs/underground_guide.md
index 0406902df6..1a0403ac5f 100644
--- a/docs/underground_guide.md
+++ b/docs/underground_guide.md
@@ -48,13 +48,6 @@ OBJECT_STORE=file \
 DATABASE_DIRECTORY=~/data_dir \
 LOG_FILTER=debug \
 ./target/release/influxdb_iox catalog setup
-
-# initialize the topic
-INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \
-OBJECT_STORE=file \
-DATABASE_DIRECTORY=~/data_dir \
-LOG_FILTER=debug \
-./target/release/influxdb_iox catalog topic update iox-shared
 ```
 
 ## Inspecting Catalog state
diff --git a/garbage_collector/src/objectstore/checker.rs b/garbage_collector/src/objectstore/checker.rs
index d9b75d1188..2913fc458f 100644
--- a/garbage_collector/src/objectstore/checker.rs
+++ b/garbage_collector/src/objectstore/checker.rs
@@ -155,11 +155,9 @@ mod tests {
         let metric_registry = Arc::new(metric::Registry::new());
         let catalog = Arc::new(MemCatalog::new(Arc::clone(&metric_registry)));
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace = repos
             .namespaces()
-            .create("namespace_parquet_file_test", None, topic.id, pool.id)
+            .create("namespace_parquet_file_test", None)
             .await
             .unwrap();
         let table = repos
diff --git a/generated_types/protos/influxdata/iox/schema/v1/service.proto b/generated_types/protos/influxdata/iox/schema/v1/service.proto
index 7aac5a4164..a028132295 100644
--- a/generated_types/protos/influxdata/iox/schema/v1/service.proto
+++ b/generated_types/protos/influxdata/iox/schema/v1/service.proto
@@ -20,13 +20,15 @@ message NamespaceSchema {
   // Renamed to topic_id
   reserved 2;
   reserved "kafka_topic_id";
+  // Removed topic ID
+  reserved 5;
+  reserved "topic_id";
+  // Removed query pool ID
+  reserved 3;
+  reserved "query_pool_id";
 
   // Namespace ID
   int64 id = 1;
-  // Topic ID
-  int64 topic_id = 5;
-  // Query Pool ID
-  int64 query_pool_id = 3;
   // Map of Table Name -> Table Schema
   map<string, TableSchema> tables = 4;
 }
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index 024bb03b77..1232de3255 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -2,7 +2,7 @@ use crate::{AggregateTSMMeasurement, AggregateTSMSchema};
 use chrono::{format::StrftimeItems, offset::FixedOffset, DateTime, Duration};
 use data_types::{
     ColumnType, Namespace, NamespaceName, NamespaceSchema, OrgBucketMappingError, Partition,
-    PartitionKey, QueryPoolId, TableSchema, TopicId,
+    PartitionKey, TableSchema,
 };
 use iox_catalog::interface::{
     get_schema_by_name, CasFailure, Catalog, RepoCollection, SoftDeletedRows,
@@ -25,9 +25,6 @@ pub enum UpdateCatalogError {
     #[error("Couldn't construct namespace from org and bucket: {0}")]
     InvalidOrgBucket(#[from] OrgBucketMappingError),
 
-    #[error("No topic named '{topic_name}' found in the catalog")]
-    TopicCatalogLookup { topic_name: String },
-
     #[error("No namespace named {0} in Catalog")]
     NamespaceNotFound(String),
 
@@ -43,14 +40,10 @@ pub enum UpdateCatalogError {
 
 /// Given a merged schema, update the IOx catalog to either merge that schema into the existing one
 /// for the namespace, or create the namespace and schema using the merged schema.
-/// Will error if the namespace needs to be created but the user hasn't explicitly set the query
-/// pool name and retention setting, allowing the user to not provide them if they're not needed.
-/// Would have done the same for `topic` but that comes from the shared clap block and isn't
-/// an `Option`.
-pub async fn update_iox_catalog<'a>(
-    merged_tsm_schema: &'a AggregateTSMSchema,
-    topic: &'a str,
-    query_pool_name: &'a str,
+/// Will error if the namespace needs to be created but the user hasn't explicitly set the
+/// retention setting, allowing the user to not provide it if it's not needed.
+pub async fn update_iox_catalog(
+    merged_tsm_schema: &AggregateTSMSchema,
     catalog: Arc<dyn Catalog>,
 ) -> Result<(), UpdateCatalogError> {
     let namespace_name =
@@ -67,15 +60,7 @@ pub async fn update_iox_catalog<'a>(
         Ok(iox_schema) => iox_schema,
         Err(iox_catalog::interface::Error::NamespaceNotFoundByName { .. }) => {
             // create the namespace
-            let (topic_id, query_id) =
-                get_topic_id_and_query_id(repos.deref_mut(), topic, query_pool_name).await?;
-            let _namespace = create_namespace(
-                namespace_name.as_str(),
-                topic_id,
-                query_id,
-                repos.deref_mut(),
-            )
-            .await?;
+            let _namespace = create_namespace(namespace_name.as_str(), repos.deref_mut()).await?;
             // fetch the newly-created schema (which will be empty except for the time column,
             // which won't impact the merge we're about to do)
             match get_schema_by_name(
@@ -98,46 +83,11 @@ pub async fn update_iox_catalog<'a>(
     Ok(())
 }
 
-async fn get_topic_id_and_query_id<'a, R>(
-    repos: &mut R,
-    topic_name: &'a str,
-    query_pool_name: &'a str,
-) -> Result<(TopicId, QueryPoolId), UpdateCatalogError>
+async fn create_namespace<R>(name: &str, repos: &mut R) -> Result<Namespace, UpdateCatalogError>
 where
     R: RepoCollection + ?Sized,
 {
-    let topic_id = repos
-        .topics()
-        .get_by_name(topic_name)
-        .await
-        .map_err(UpdateCatalogError::CatalogError)?
-        .map(|v| v.id)
-        .ok_or_else(|| UpdateCatalogError::TopicCatalogLookup {
-            topic_name: topic_name.to_string(),
-        })?;
-    let query_id = repos
-        .query_pools()
-        .create_or_get(query_pool_name)
-        .await
-        .map(|v| v.id)
-        .map_err(UpdateCatalogError::CatalogError)?;
-    Ok((topic_id, query_id))
-}
-
-async fn create_namespace<R>(
-    name: &str,
-    topic_id: TopicId,
-    query_id: QueryPoolId,
-    repos: &mut R,
-) -> Result<Namespace, UpdateCatalogError>
-where
-    R: RepoCollection + ?Sized,
-{
-    match repos
-        .namespaces()
-        .create(name, None, topic_id, query_id)
-        .await
-    {
+    match repos.namespaces().create(name, None).await {
         Ok(ns) => Ok(ns),
         Err(iox_catalog::interface::Error::NameExists { .. }) => {
             // presumably it got created in the meantime?
@@ -401,13 +351,6 @@ mod tests {
         // init a test catalog stack
         let metrics = Arc::new(metric::Registry::default());
         let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
-        catalog
-            .repositories()
-            .await
-            .topics()
-            .create_or_get("iox-shared")
-            .await
-            .expect("topic created");
 
         let json = r#"
         {
@@ -428,14 +371,9 @@ mod tests {
         }
         "#;
         let agg_schema: AggregateTSMSchema = json.try_into().unwrap();
-        update_iox_catalog(
-            &agg_schema,
-            "iox-shared",
-            "iox-shared",
-            Arc::clone(&catalog),
-        )
-        .await
-        .expect("schema update worked");
+        update_iox_catalog(&agg_schema, Arc::clone(&catalog))
+            .await
+            .expect("schema update worked");
         let mut repos = catalog.repositories().await;
         let iox_schema = get_schema_by_name(
             "1234_5678",
@@ -487,15 +425,10 @@ mod tests {
             .start_transaction()
             .await
             .expect("started transaction");
-        txn.topics()
-            .create_or_get("iox-shared")
-            .await
-            .expect("topic created");
-
         // create namespace, table and columns for weather measurement
         let namespace = txn
             .namespaces()
-            .create("1234_5678", None, TopicId::new(1), QueryPoolId::new(1))
+            .create("1234_5678", None)
             .await
             .expect("namespace created");
         let mut table = txn
@@ -545,14 +478,9 @@ mod tests {
         }
         "#;
         let agg_schema: AggregateTSMSchema = json.try_into().unwrap();
-        update_iox_catalog(
-            &agg_schema,
-            "iox-shared",
-            "iox-shared",
-            Arc::clone(&catalog),
-        )
-        .await
-        .expect("schema update worked");
+        update_iox_catalog(&agg_schema, Arc::clone(&catalog))
+            .await
+            .expect("schema update worked");
         let mut repos = catalog.repositories().await;
         let iox_schema = get_schema_by_name(
             "1234_5678",
@@ -589,15 +517,10 @@ mod tests {
             .start_transaction()
             .await
             .expect("started transaction");
-        txn.topics()
-            .create_or_get("iox-shared")
-            .await
-            .expect("topic created");
-
         // create namespace, table and columns for weather measurement
         let namespace = txn
             .namespaces()
-            .create("1234_5678", None, TopicId::new(1), QueryPoolId::new(1))
+            .create("1234_5678", None)
             .await
             .expect("namespace created");
         let mut table = txn
@@ -640,14 +563,9 @@ mod tests {
         }
         "#;
         let agg_schema: AggregateTSMSchema = json.try_into().unwrap();
-        let err = update_iox_catalog(
-            &agg_schema,
-            "iox-shared",
-            "iox-shared",
-            Arc::clone(&catalog),
-        )
-        .await
-        .expect_err("should fail catalog update");
+        let err = update_iox_catalog(&agg_schema, Arc::clone(&catalog))
+            .await
+            .expect_err("should fail catalog update");
         assert_matches!(err, UpdateCatalogError::SchemaUpdateError(_));
         assert!(err
             .to_string()
@@ -663,15 +581,11 @@ mod tests {
             .start_transaction()
             .await
             .expect("started transaction");
-        txn.topics()
-            .create_or_get("iox-shared")
-            .await
-            .expect("topic created");
 
         // create namespace, table and columns for weather measurement
         let namespace = txn
             .namespaces()
-            .create("1234_5678", None, TopicId::new(1), QueryPoolId::new(1))
+            .create("1234_5678", None)
             .await
             .expect("namespace created");
         let mut table = txn
@@ -713,14 +627,9 @@ mod tests {
         }
         "#;
         let agg_schema: AggregateTSMSchema = json.try_into().unwrap();
-        let err = update_iox_catalog(
-            &agg_schema,
-            "iox-shared",
-            "iox-shared",
-            Arc::clone(&catalog),
-        )
-        .await
-        .expect_err("should fail catalog update");
+        let err = update_iox_catalog(&agg_schema, Arc::clone(&catalog))
+            .await
+            .expect_err("should fail catalog update");
         assert_matches!(err, UpdateCatalogError::SchemaUpdateError(_));
         assert!(err.to_string().ends_with(
             "a column with name temperature already exists in the schema with a different type"
diff --git a/influxdb_iox/src/commands/catalog.rs b/influxdb_iox/src/commands/catalog.rs
index 456d29c4c3..d6eabdf341 100644
--- a/influxdb_iox/src/commands/catalog.rs
+++ b/influxdb_iox/src/commands/catalog.rs
@@ -5,14 +5,9 @@ use thiserror::Error;
 
 use crate::process_info::setup_metric_registry;
 
-mod topic;
-
 #[allow(clippy::enum_variant_names)]
 #[derive(Debug, Error)]
 pub enum Error {
-    #[error("Error in topic subcommand: {0}")]
-    Topic(#[from] topic::Error),
-
     #[error("Catalog error: {0}")]
     Catalog(#[from] iox_catalog::interface::Error),
 
@@ -39,9 +34,6 @@ struct Setup {
 enum Command {
     /// Run database migrations
     Setup(Setup),
-
-    /// Manage topic
-    Topic(topic::Config),
 }
 
 pub async fn command(config: Config) -> Result<(), Error> {
@@ -52,9 +44,6 @@ pub async fn command(config: Config) -> Result<(), Error> {
             catalog.setup().await?;
             println!("OK");
         }
-        Command::Topic(config) => {
-            topic::command(config).await?;
-        }
     }
 
     Ok(())
diff --git a/influxdb_iox/src/commands/catalog/topic.rs b/influxdb_iox/src/commands/catalog/topic.rs
deleted file mode 100644
index 8783db3805..0000000000
--- a/influxdb_iox/src/commands/catalog/topic.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-//! This module implements the `catalog topic` CLI subcommand
-
-use thiserror::Error;
-
-use clap_blocks::catalog_dsn::CatalogDsnConfig;
-
-use crate::process_info::setup_metric_registry;
-
-#[allow(clippy::enum_variant_names)]
-#[derive(Debug, Error)]
-pub enum Error {
-    #[error("Error updating catalog: {0}")]
-    UpdateCatalogError(#[from] iox_catalog::interface::Error),
-
-    #[error("Catalog DSN error: {0}")]
-    CatalogDsn(#[from] clap_blocks::catalog_dsn::Error),
-}
-
-/// Manage IOx chunks
-#[derive(Debug, clap::Parser)]
-pub struct Config {
-    #[clap(subcommand)]
-    command: Command,
-}
-
-/// Create or update a topic
-#[derive(Debug, clap::Parser)]
-struct Update {
-    #[clap(flatten)]
-    catalog_dsn: CatalogDsnConfig,
-
-    /// The name of the topic
-    #[clap(action)]
-    db_name: String,
-}
-
-/// All possible subcommands for topic
-#[derive(Debug, clap::Parser)]
-enum Command {
-    Update(Update),
-}
-
-pub async fn command(config: Config) -> Result<(), Error> {
-    match config.command {
-        Command::Update(update) => {
-            let metrics = setup_metric_registry();
-            let catalog = update.catalog_dsn.get_catalog("cli", metrics).await?;
-            let mut repos = catalog.repositories().await;
-            let topic = repos.topics().create_or_get(&update.db_name).await?;
-            println!("{}", topic.id);
-            Ok(())
-        }
-    }
-}
diff --git a/influxdb_iox/src/commands/import/schema.rs b/influxdb_iox/src/commands/import/schema.rs
index 193ff96d89..1092dbbe05 100644
--- a/influxdb_iox/src/commands/import/schema.rs
+++ b/influxdb_iox/src/commands/import/schema.rs
@@ -84,26 +84,6 @@ pub struct MergeConfig {
     #[clap(flatten)]
     catalog_dsn: CatalogDsnConfig,
 
-    /// Write buffer topic/database that should be used.
-    // This isn't really relevant to the RPC write path and will be removed eventually.
-    #[clap(
-        long = "write-buffer-topic",
-        env = "INFLUXDB_IOX_WRITE_BUFFER_TOPIC",
-        default_value = "iox-shared",
-        action
-    )]
-    pub topic: String,
-
-    /// Query pool name to dispatch writes to.
-    // This isn't really relevant to the RPC write path and will be removed eventually.
-    #[clap(
-        long = "query-pool",
-        env = "INFLUXDB_IOX_QUERY_POOL_NAME",
-        default_value = "iox-shared",
-        action
-    )]
-    pub query_pool_name: String,
-
     #[clap(long)]
     /// Retention setting setting (used only if we need to create the namespace)
     retention: Option<String>,
@@ -192,13 +172,7 @@ pub async fn command(config: Config) -> Result<(), SchemaCommandError> {
 
             // given we have a valid aggregate TSM schema, fetch the schema for the namespace from
             // the IOx catalog, if it exists, and update it with our aggregate schema
-            update_iox_catalog(
-                &merged_tsm_schema,
-                &merge_config.topic,
-                &merge_config.query_pool_name,
-                Arc::clone(&catalog),
-            )
-            .await?;
+            update_iox_catalog(&merged_tsm_schema, Arc::clone(&catalog)).await?;
 
             Ok(())
         }
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index ca519aecf5..85d303424e 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -64,9 +64,6 @@ pub const DEFAULT_INGESTER_GRPC_BIND_ADDR: &str = "127.0.0.1:8083";
 /// The default bind address for the Compactor gRPC
 pub const DEFAULT_COMPACTOR_GRPC_BIND_ADDR: &str = "127.0.0.1:8084";
 
-// If you want this level of control, should be instantiating the services individually
-const QUERY_POOL_NAME: &str = "iox-shared";
-
 #[derive(Debug, Error)]
 pub enum Error {
     #[error("Run: {0}")]
@@ -472,13 +469,11 @@ impl Config {
         let router_config = Router2Config {
             authz_address: authz_address.clone(),
             single_tenant_deployment,
-            query_pool_name: QUERY_POOL_NAME.to_string(),
             http_request_limit: 1_000,
             ingester_addresses: ingester_addresses.clone(),
             new_namespace_retention_hours: None, // infinite retention
             namespace_autocreation_enabled: true,
             partition_key_pattern: "%Y-%m-%d".to_string(),
-            topic: QUERY_POOL_NAME.to_string(),
             rpc_write_timeout_seconds: Duration::new(3, 0),
             rpc_write_replicas: None,
             rpc_write_max_outgoing_bytes: ingester_config.rpc_write_max_incoming_bytes,
@@ -590,14 +585,6 @@ pub async fn command(config: Config) -> Result<()> {
     info!("running db migrations");
     catalog.setup().await?;
 
-    // Create a topic
-    catalog
-        .repositories()
-        .await
-        .topics()
-        .create_or_get(QUERY_POOL_NAME)
-        .await?;
-
     let object_store: Arc<DynObjectStore> =
         make_object_store(router_run_config.object_store_config())
             .map_err(Error::ObjectStoreParsing)?;
diff --git a/ingester2/src/buffer_tree/partition/resolver/catalog.rs b/ingester2/src/buffer_tree/partition/resolver/catalog.rs
index eb5b69160c..ff76eaf3ff 100644
--- a/ingester2/src/buffer_tree/partition/resolver/catalog.rs
+++ b/ingester2/src/buffer_tree/partition/resolver/catalog.rs
@@ -114,11 +114,9 @@ mod tests {
 
         let (namespace_id, table_id) = {
             let mut repos = catalog.repositories().await;
-            let t = repos.topics().create_or_get("platanos").await.unwrap();
-            let q = repos.query_pools().create_or_get("platanos").await.unwrap();
             let ns = repos
                 .namespaces()
-                .create(TABLE_NAME, None, t.id, q.id)
+                .create(TABLE_NAME, None)
                 .await
                 .unwrap();
 
diff --git a/ingester2/src/test_util.rs b/ingester2/src/test_util.rs
index 140acefcb0..6e710bd06a 100644
--- a/ingester2/src/test_util.rs
+++ b/ingester2/src/test_util.rs
@@ -283,14 +283,7 @@ pub(crate) async fn populate_catalog(
     table: &str,
 ) -> (NamespaceId, TableId) {
     let mut c = catalog.repositories().await;
-    let topic = c.topics().create_or_get("kafka-topic").await.unwrap();
-    let query_pool = c.query_pools().create_or_get("query-pool").await.unwrap();
-    let ns_id = c
-        .namespaces()
-        .create(namespace, None, topic.id, query_pool.id)
-        .await
-        .unwrap()
-        .id;
+    let ns_id = c.namespaces().create(namespace, None).await.unwrap().id;
     let table_id = c.tables().create_or_get(table, ns_id).await.unwrap().id;
 
     (ns_id, table_id)
diff --git a/ingester2_test_ctx/src/lib.rs b/ingester2_test_ctx/src/lib.rs
index 9e89bccbd0..ad50f7edb9 100644
--- a/ingester2_test_ctx/src/lib.rs
+++ b/ingester2_test_ctx/src/lib.rs
@@ -17,8 +17,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
 use arrow::record_batch::RecordBatch;
 use arrow_flight::{decode::FlightRecordBatchStream, flight_service_server::FlightService, Ticket};
 use data_types::{
-    Namespace, NamespaceId, NamespaceSchema, ParquetFile, PartitionKey, QueryPoolId,
-    SequenceNumber, TableId, TopicId,
+    Namespace, NamespaceId, NamespaceSchema, ParquetFile, PartitionKey, SequenceNumber, TableId,
 };
 use dml::{DmlMeta, DmlWrite};
 use futures::{stream::FuturesUnordered, FutureExt, StreamExt, TryStreamExt};
@@ -43,9 +42,6 @@ use tokio::sync::oneshot;
 use tokio_util::sync::CancellationToken;
 use tonic::Request;
 
-/// The (legacy) topic name this ingester uses.
-pub const TEST_TOPIC_NAME: &str = "banana-topics";
-
 /// The default max persist queue depth - configurable with
 /// [`TestContextBuilder::with_max_persist_queue_depth()`].
 pub const DEFAULT_MAX_PERSIST_QUEUE_DEPTH: usize = 5;
@@ -53,6 +49,11 @@ pub const DEFAULT_MAX_PERSIST_QUEUE_DEPTH: usize = 5;
 /// [`TestContextBuilder::with_persist_hot_partition_cost()`].
 pub const DEFAULT_PERSIST_HOT_PARTITION_COST: usize = 20_000_000;
 
+/// Construct a new [`TestContextBuilder`] to make a [`TestContext`] for an [`ingester2`] instance.
+pub fn test_context() -> TestContextBuilder {
+    TestContextBuilder::default()
+}
+
 /// Configure and construct a [`TestContext`] containing an [`ingester2`] instance.
 #[derive(Debug)]
 pub struct TestContextBuilder {
@@ -126,20 +127,6 @@ impl TestContextBuilder {
         let storage =
             ParquetStorage::new(object_store, parquet_file::storage::StorageId::from("iox"));
 
-        // Initialise a topic and query pool.
-        //
-        // Note that tests should set up their own namespace via
-        // ensure_namespace()
-        let mut txn = catalog.start_transaction().await.unwrap();
-        let topic = txn.topics().create_or_get(TEST_TOPIC_NAME).await.unwrap();
-        let query_id = txn
-            .query_pools()
-            .create_or_get("banana-query-pool")
-            .await
-            .unwrap()
-            .id;
-        txn.commit().await.unwrap();
-
         // Settings so that the ingester will effectively never persist by itself, only on demand
         let wal_rotation_period = Duration::from_secs(1_000_000);
 
@@ -171,8 +158,6 @@ impl TestContextBuilder {
             _dir: dir,
             catalog,
             _storage: storage,
-            query_id,
-            topic_id: topic.id,
             metrics,
             namespaces: Default::default(),
         }
@@ -189,8 +174,6 @@ pub struct TestContext<T> {
     shutdown_tx: oneshot::Sender<CancellationToken>,
     catalog: Arc<dyn Catalog>,
     _storage: ParquetStorage,
-    query_id: QueryPoolId,
-    topic_id: TopicId,
     metrics: Arc<metric::Registry>,
 
     /// Once the last [`TempDir`] reference is dropped, the directory it
@@ -224,7 +207,7 @@ where
             .repositories()
             .await
             .namespaces()
-            .create(name, None, self.topic_id, self.query_id)
+            .create(name, None)
             .await
             .expect("failed to create test namespace");
 
@@ -234,8 +217,6 @@ where
                     ns.id,
                     NamespaceSchema::new(
                         ns.id,
-                        self.topic_id,
-                        self.query_id,
                         iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
                         iox_catalog::DEFAULT_MAX_TABLES,
                         retention_period_ns,
diff --git a/iox_catalog/README.md b/iox_catalog/README.md
index 38a6ea8b20..21388cf02a 100644
--- a/iox_catalog/README.md
+++ b/iox_catalog/README.md
@@ -27,7 +27,6 @@ You'll then need to create the database. You can do this via the sqlx command li
 cargo install sqlx-cli
 DATABASE_URL=<dsn> sqlx database create
 cargo run -q -- catalog setup
-cargo run -- catalog topic update iox-shared
 ```
 
 This will set up the database based on the files in `./migrations` in this crate. SQLx also creates
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 1b8742a6cd..884d224d01 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -3,8 +3,8 @@
 use async_trait::async_trait;
 use data_types::{
     Column, ColumnSchema, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceSchema,
-    ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool,
-    QueryPoolId, SkippedCompaction, Table, TableId, TableSchema, Timestamp, TopicId, TopicMetadata,
+    ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey,
+    SkippedCompaction, Table, TableId, TableSchema, Timestamp,
 };
 use iox_time::TimeProvider;
 use snafu::{OptionExt, Snafu};
@@ -285,12 +285,6 @@ impl<T> Transaction for T where T: Send + Sync + Debug + sealed::TransactionFina
 /// should and must not care how these are implemented.
 #[async_trait]
 pub trait RepoCollection: Send + Sync + Debug {
-    /// Repository for [topics](data_types::TopicMetadata).
-    fn topics(&mut self) -> &mut dyn TopicMetadataRepo;
-
-    /// Repository for [query pools](data_types::QueryPool).
-    fn query_pools(&mut self) -> &mut dyn QueryPoolRepo;
-
     /// Repository for [namespaces](data_types::Namespace).
     fn namespaces(&mut self) -> &mut dyn NamespaceRepo;
 
@@ -307,36 +301,13 @@ pub trait RepoCollection: Send + Sync + Debug {
     fn parquet_files(&mut self) -> &mut dyn ParquetFileRepo;
 }
 
-/// Functions for working with topics in the catalog.
-#[async_trait]
-pub trait TopicMetadataRepo: Send + Sync {
-    /// Creates the topic in the catalog or gets the existing record by name.
-    async fn create_or_get(&mut self, name: &str) -> Result<TopicMetadata>;
-
-    /// Gets the topic by its unique name
-    async fn get_by_name(&mut self, name: &str) -> Result<Option<TopicMetadata>>;
-}
-
-/// Functions for working with query pools in the catalog.
-#[async_trait]
-pub trait QueryPoolRepo: Send + Sync {
-    /// Creates the query pool in the catalog or gets the existing record by name.
-    async fn create_or_get(&mut self, name: &str) -> Result<QueryPool>;
-}
-
 /// Functions for working with namespaces in the catalog
 #[async_trait]
 pub trait NamespaceRepo: Send + Sync {
     /// Creates the namespace in the catalog. If one by the same name already exists, an
     /// error is returned.
     /// Specify `None` for `retention_period_ns` to get infinite retention.
-    async fn create(
-        &mut self,
-        name: &str,
-        retention_period_ns: Option<i64>,
-        topic_id: TopicId,
-        query_pool_id: QueryPoolId,
-    ) -> Result<Namespace>;
+    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace>;
 
     /// Update retention period for a namespace
     async fn update_retention_period(
@@ -619,8 +590,6 @@ where
 
     let mut namespace = NamespaceSchema::new(
         namespace.id,
-        namespace.topic_id,
-        namespace.query_pool_id,
         namespace.max_columns_per_table,
         namespace.max_tables,
         namespace.retention_period_ns,
@@ -772,8 +741,6 @@ pub async fn list_schemas(
 
             let mut ns = NamespaceSchema::new(
                 v.id,
-                v.topic_id,
-                v.query_pool_id,
                 v.max_columns_per_table,
                 v.max_tables,
                 v.retention_period_ns,
@@ -805,7 +772,6 @@ pub(crate) mod test_helpers {
         test_setup(clean_state().await).await;
         test_namespace_soft_deletion(clean_state().await).await;
         test_partitions_new_file_between(clean_state().await).await;
-        test_query_pool(clean_state().await).await;
         test_column(clean_state().await).await;
         test_partition(clean_state().await).await;
         test_parquet_file(clean_state().await).await;
@@ -818,10 +784,6 @@ pub(crate) mod test_helpers {
         test_list_schemas_soft_deleted_rows(clean_state().await).await;
         test_delete_namespace(clean_state().await).await;
 
-        let catalog = clean_state().await;
-        test_topic(Arc::clone(&catalog)).await;
-        assert_metric_hit(&catalog.metrics(), "topic_create_or_get");
-
         let catalog = clean_state().await;
         test_namespace(Arc::clone(&catalog)).await;
         assert_metric_hit(&catalog.metrics(), "namespace_create");
@@ -848,41 +810,12 @@ pub(crate) mod test_helpers {
         catalog.setup().await.expect("second catalog setup");
     }
 
-    async fn test_topic(catalog: Arc<dyn Catalog>) {
-        let mut repos = catalog.repositories().await;
-        let topic_repo = repos.topics();
-
-        let k = topic_repo.create_or_get("foo").await.unwrap();
-        assert!(k.id > TopicId::new(0));
-        assert_eq!(k.name, "foo");
-        let k2 = topic_repo.create_or_get("foo").await.unwrap();
-        assert_eq!(k, k2);
-        let k3 = topic_repo.get_by_name("foo").await.unwrap().unwrap();
-        assert_eq!(k3, k);
-        let k3 = topic_repo.get_by_name("asdf").await.unwrap();
-        assert!(k3.is_none());
-    }
-
-    async fn test_query_pool(catalog: Arc<dyn Catalog>) {
-        let mut repos = catalog.repositories().await;
-        let query_repo = repos.query_pools();
-
-        let q = query_repo.create_or_get("foo").await.unwrap();
-        assert!(q.id > QueryPoolId::new(0));
-        assert_eq!(q.name, "foo");
-        let q2 = query_repo.create_or_get("foo").await.unwrap();
-        assert_eq!(q, q2);
-    }
-
     async fn test_namespace(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
-
         let namespace_name = "test_namespace";
         let namespace = repos
             .namespaces()
-            .create(namespace_name, None, topic.id, pool.id)
+            .create(namespace_name, None)
             .await
             .unwrap();
         assert!(namespace.id > NamespaceId::new(0));
@@ -895,10 +828,7 @@ pub(crate) mod test_helpers {
             DEFAULT_MAX_COLUMNS_PER_TABLE
         );
 
-        let conflict = repos
-            .namespaces()
-            .create(namespace_name, None, topic.id, pool.id)
-            .await;
+        let conflict = repos.namespaces().create(namespace_name, None).await;
         assert!(matches!(
             conflict.unwrap_err(),
             Error::NameExists { name: _ }
@@ -937,7 +867,7 @@ pub(crate) mod test_helpers {
         let namespace2_name = "test_namespace2";
         let namespace2 = repos
             .namespaces()
-            .create(namespace2_name, None, topic.id, pool.id)
+            .create(namespace2_name, None)
             .await
             .unwrap();
         let mut namespaces = repos
@@ -986,7 +916,7 @@ pub(crate) mod test_helpers {
         let namespace3_name = "test_namespace3";
         let namespace3 = repos
             .namespaces()
-            .create(namespace3_name, None, topic.id, pool.id)
+            .create(namespace3_name, None)
             .await
             .expect("namespace with NULL retention should be created");
         assert!(namespace3.retention_period_ns.is_none());
@@ -995,12 +925,7 @@ pub(crate) mod test_helpers {
         let namespace4_name = "test_namespace4";
         let namespace4 = repos
             .namespaces()
-            .create(
-                namespace4_name,
-                Some(NEW_RETENTION_PERIOD_NS),
-                topic.id,
-                pool.id,
-            )
+            .create(namespace4_name, Some(NEW_RETENTION_PERIOD_NS))
             .await
             .expect("namespace with 5-hour retention should be created");
         assert_eq!(
@@ -1046,19 +971,9 @@ pub(crate) mod test_helpers {
     /// the expected rows for all three states of [`SoftDeletedRows`].
     async fn test_namespace_soft_deletion(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
 
-        let deleted_ns = repos
-            .namespaces()
-            .create("deleted-ns", None, topic.id, pool.id)
-            .await
-            .unwrap();
-        let active_ns = repos
-            .namespaces()
-            .create("active-ns", None, topic.id, pool.id)
-            .await
-            .unwrap();
+        let deleted_ns = repos.namespaces().create("deleted-ns", None).await.unwrap();
+        let active_ns = repos.namespaces().create("active-ns", None).await.unwrap();
 
         // Mark "deleted-ns" as soft-deleted.
         repos.namespaces().soft_delete("deleted-ns").await.unwrap();
@@ -1212,11 +1127,9 @@ pub(crate) mod test_helpers {
 
     async fn test_table(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace = repos
             .namespaces()
-            .create("namespace_table_test", None, topic.id, pool.id)
+            .create("namespace_table_test", None)
             .await
             .unwrap();
 
@@ -1251,11 +1164,7 @@ pub(crate) mod test_helpers {
         assert_eq!(vec![t.clone()], tables);
 
         // test we can create a table of the same name in a different namespace
-        let namespace2 = repos
-            .namespaces()
-            .create("two", None, topic.id, pool.id)
-            .await
-            .unwrap();
+        let namespace2 = repos.namespaces().create("two", None).await.unwrap();
         assert_ne!(namespace, namespace2);
         let test_table = repos
             .tables()
@@ -1351,11 +1260,9 @@ pub(crate) mod test_helpers {
 
     async fn test_column(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace = repos
             .namespaces()
-            .create("namespace_column_test", None, topic.id, pool.id)
+            .create("namespace_column_test", None)
             .await
             .unwrap();
         let table = repos
@@ -1486,11 +1393,9 @@ pub(crate) mod test_helpers {
 
     async fn test_partition(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace = repos
             .namespaces()
-            .create("namespace_partition_test", None, topic.id, pool.id)
+            .create("namespace_partition_test", None)
             .await
             .unwrap();
         let table = repos
@@ -1770,11 +1675,9 @@ pub(crate) mod test_helpers {
     /// tests many interactions with the catalog and parquet files. See the individual conditions herein
     async fn test_parquet_file(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace = repos
             .namespaces()
-            .create("namespace_parquet_file_test", None, topic.id, pool.id)
+            .create("namespace_parquet_file_test", None)
             .await
             .unwrap();
         let table = repos
@@ -1959,7 +1862,7 @@ pub(crate) mod test_helpers {
         // test list_by_namespace_not_to_delete
         let namespace2 = repos
             .namespaces()
-            .create("namespace_parquet_file_test1", None, topic.id, pool.id)
+            .create("namespace_parquet_file_test1", None)
             .await
             .unwrap();
         let table2 = repos
@@ -2180,16 +2083,14 @@ pub(crate) mod test_helpers {
 
     async fn test_parquet_file_delete_broken(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace_1 = repos
             .namespaces()
-            .create("retention_broken_1", None, topic.id, pool.id)
+            .create("retention_broken_1", None)
             .await
             .unwrap();
         let namespace_2 = repos
             .namespaces()
-            .create("retention_broken_2", Some(1), topic.id, pool.id)
+            .create("retention_broken_2", Some(1))
             .await
             .unwrap();
         let table_1 = repos
@@ -2262,19 +2163,9 @@ pub(crate) mod test_helpers {
 
     async fn test_partitions_new_file_between(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos
-            .topics()
-            .create_or_get("new_file_between")
-            .await
-            .unwrap();
-        let pool = repos
-            .query_pools()
-            .create_or_get("new_file_between")
-            .await
-            .unwrap();
         let namespace = repos
             .namespaces()
-            .create("test_partitions_new_file_between", None, topic.id, pool.id)
+            .create("test_partitions_new_file_between", None)
             .await
             .unwrap();
         let table = repos
@@ -2638,15 +2529,11 @@ pub(crate) mod test_helpers {
 
     async fn test_list_by_partiton_not_to_delete(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace = repos
             .namespaces()
             .create(
                 "namespace_parquet_file_test_list_by_partiton_not_to_delete",
                 None,
-                topic.id,
-                pool.id,
             )
             .await
             .unwrap();
@@ -2752,16 +2639,9 @@ pub(crate) mod test_helpers {
 
     async fn test_update_to_compaction_level_1(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace = repos
             .namespaces()
-            .create(
-                "namespace_update_to_compaction_level_1_test",
-                None,
-                topic.id,
-                pool.id,
-            )
+            .create("namespace_update_to_compaction_level_1_test", None)
             .await
             .unwrap();
         let table = repos
@@ -2845,11 +2725,9 @@ pub(crate) mod test_helpers {
     /// effective.
     async fn test_delete_namespace(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
         let namespace_1 = repos
             .namespaces()
-            .create("namespace_test_delete_namespace_1", None, topic.id, pool.id)
+            .create("namespace_test_delete_namespace_1", None)
             .await
             .unwrap();
         let table_1 = repos
@@ -2904,7 +2782,7 @@ pub(crate) mod test_helpers {
         // it, let's create another so we can ensure that doesn't get deleted.
         let namespace_2 = repos
             .namespaces()
-            .create("namespace_test_delete_namespace_2", None, topic.id, pool.id)
+            .create("namespace_test_delete_namespace_2", None)
             .await
             .unwrap();
         let table_2 = repos
@@ -3089,8 +2967,8 @@ pub(crate) mod test_helpers {
             barrier_captured.wait().await;
 
             let mut txn = catalog_captured.start_transaction().await.unwrap();
-            txn.topics()
-                .create_or_get("test_txn_isolation")
+            txn.namespaces()
+                .create("test_txn_isolation", None)
                 .await
                 .unwrap();
 
@@ -3103,30 +2981,33 @@ pub(crate) mod test_helpers {
         barrier.wait().await;
         tokio::time::sleep(Duration::from_millis(100)).await;
 
-        let topic = txn
-            .topics()
-            .get_by_name("test_txn_isolation")
+        let namespace = txn
+            .namespaces()
+            .get_by_name("test_txn_isolation", SoftDeletedRows::AllRows)
             .await
             .unwrap();
-        assert!(topic.is_none());
+        assert!(namespace.is_none());
         txn.abort().await.unwrap();
 
         insertion_task.await.unwrap();
 
         let mut txn = catalog.start_transaction().await.unwrap();
-        let topic = txn
-            .topics()
-            .get_by_name("test_txn_isolation")
+        let namespace = txn
+            .namespaces()
+            .get_by_name("test_txn_isolation", SoftDeletedRows::AllRows)
             .await
             .unwrap();
-        assert!(topic.is_none());
+        assert!(namespace.is_none());
         txn.abort().await.unwrap();
     }
 
     async fn test_txn_drop(catalog: Arc<dyn Catalog>) {
         let capture = TracingCapture::new();
         let mut txn = catalog.start_transaction().await.unwrap();
-        txn.topics().create_or_get("test_txn_drop").await.unwrap();
+        txn.namespaces()
+            .create("test_txn_drop", None)
+            .await
+            .unwrap();
         drop(txn);
 
         // got a warning
@@ -3135,8 +3016,12 @@ pub(crate) mod test_helpers {
 
         // data is NOT committed
         let mut txn = catalog.start_transaction().await.unwrap();
-        let topic = txn.topics().get_by_name("test_txn_drop").await.unwrap();
-        assert!(topic.is_none());
+        let namespace = txn
+            .namespaces()
+            .get_by_name("test_txn_drop", SoftDeletedRows::AllRows)
+            .await
+            .unwrap();
+        assert!(namespace.is_none());
         txn.abort().await.unwrap();
     }
 
@@ -3149,12 +3034,7 @@ pub(crate) mod test_helpers {
     where
         R: RepoCollection + ?Sized,
     {
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
-        let namespace = repos
-            .namespaces()
-            .create(namespace_name, None, topic.id, pool.id)
-            .await;
+        let namespace = repos.namespaces().create(namespace_name, None).await;
 
         let namespace = match namespace {
             Ok(v) => v,
@@ -3171,8 +3051,6 @@ pub(crate) mod test_helpers {
         let batches = batches.iter().map(|(table, batch)| (table.as_str(), batch));
         let ns = NamespaceSchema::new(
             namespace.id,
-            topic.id,
-            pool.id,
             namespace.max_columns_per_table,
             namespace.max_tables,
             namespace.retention_period_ns,
diff --git a/iox_catalog/src/kafkaless_transition.rs b/iox_catalog/src/kafkaless_transition.rs
index 408848b794..fa89731d3c 100644
--- a/iox_catalog/src/kafkaless_transition.rs
+++ b/iox_catalog/src/kafkaless_transition.rs
@@ -1,11 +1,13 @@
-use data_types::TopicId;
-
 /// Magic number to be used shard indices and shard ids in "kafkaless".
 pub(crate) const TRANSITION_SHARD_NUMBER: i32 = 1234;
 /// In kafkaless mode all new persisted data uses this shard id.
 pub(crate) const TRANSITION_SHARD_ID: ShardId = ShardId::new(TRANSITION_SHARD_NUMBER as i64);
 /// In kafkaless mode all new persisted data uses this shard index.
 pub(crate) const TRANSITION_SHARD_INDEX: ShardIndex = ShardIndex::new(TRANSITION_SHARD_NUMBER);
+pub(crate) const SHARED_TOPIC_NAME: &str = "iox-shared";
+pub(crate) const SHARED_TOPIC_ID: TopicId = TopicId::new(1);
+pub(crate) const SHARED_QUERY_POOL_ID: QueryPoolId = QueryPoolId::new(1);
+pub(crate) const SHARED_QUERY_POOL: &str = SHARED_TOPIC_NAME;
 
 /// Unique ID for a `Shard`, assigned by the catalog. Joins to other catalog tables to uniquely
 /// identify shards independently of the underlying write buffer implementation.
@@ -67,3 +69,27 @@ pub(crate) struct Shard {
     /// and write buffer
     pub(crate) shard_index: ShardIndex,
 }
+
+/// Unique ID for a Topic, assigned by the catalog
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct TopicId(i64);
+
+#[allow(missing_docs)]
+impl TopicId {
+    pub const fn new(v: i64) -> Self {
+        Self(v)
+    }
+}
+
+/// Unique ID for a `QueryPool`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct QueryPoolId(i64);
+
+#[allow(missing_docs)]
+impl QueryPoolId {
+    pub const fn new(v: i64) -> Self {
+        Self(v)
+    }
+}
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 4ce2a7c397..406ec7737a 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -13,15 +13,12 @@
     clippy::dbg_macro
 )]
 
-use crate::interface::{ColumnTypeMismatchSnafu, Error, RepoCollection, Result, Transaction};
-use data_types::{ColumnType, NamespaceSchema, QueryPool, TableSchema, TopicId, TopicMetadata};
+use crate::interface::{ColumnTypeMismatchSnafu, Error, RepoCollection, Result};
+use data_types::{ColumnType, NamespaceSchema, TableSchema};
 use mutable_batch::MutableBatch;
 use std::{borrow::Cow, collections::HashMap};
 use thiserror::Error;
 
-const SHARED_TOPIC_NAME: &str = "iox-shared";
-const SHARED_TOPIC_ID: TopicId = TopicId::new(1);
-const SHARED_QUERY_POOL: &str = SHARED_TOPIC_NAME;
 const TIME_COLUMN: &str = "time";
 
 /// Default per-namespace table count service protection limit.
@@ -204,19 +201,6 @@ where
     Ok(())
 }
 
-/// Creates or gets records in the catalog for the shared topic and query pool for each of the
-/// partitions.
-///
-/// Used in tests and when creating an in-memory catalog.
-pub async fn create_or_get_default_records(
-    txn: &mut dyn Transaction,
-) -> Result<(TopicMetadata, QueryPool)> {
-    let topic = txn.topics().create_or_get(SHARED_TOPIC_NAME).await?;
-    let query_pool = txn.query_pools().create_or_get(SHARED_QUERY_POOL).await?;
-
-    Ok((topic, query_pool))
-}
-
 #[cfg(test)]
 mod tests {
     use std::{collections::BTreeMap, sync::Arc};
@@ -251,20 +235,15 @@ mod tests {
                     let metrics = Arc::new(metric::Registry::default());
                     let repo = MemCatalog::new(metrics);
                     let mut txn = repo.start_transaction().await.unwrap();
-                    let (topic, query_pool) = create_or_get_default_records(
-                        txn.deref_mut()
-                    ).await.unwrap();
 
                     let namespace = txn
                         .namespaces()
-                        .create(NAMESPACE_NAME, None, topic.id, query_pool.id)
+                        .create(NAMESPACE_NAME, None)
                         .await
                         .unwrap();
 
                     let schema = NamespaceSchema::new(
                         namespace.id,
-                        namespace.topic_id,
-                        namespace.query_pool_id,
                         namespace.max_columns_per_table,
                         namespace.max_tables,
                         namespace.retention_period_ns,
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index e9de4af7be..930e84b1c2 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -4,19 +4,18 @@
 use crate::{
     interface::{
         sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo, ColumnTypeMismatchSnafu,
-        Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, QueryPoolRepo, RepoCollection,
-        Result, SoftDeletedRows, TableRepo, TopicMetadataRepo, Transaction,
-        MAX_PARQUET_FILES_SELECTED_ONCE,
+        Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, RepoCollection, Result,
+        SoftDeletedRows, TableRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
     },
-    kafkaless_transition::{Shard, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
+    kafkaless_transition::{Shard, SHARED_TOPIC_ID, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
     metrics::MetricDecorator,
-    DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
+    DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
 };
 use async_trait::async_trait;
 use data_types::{
     Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
-    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
-    SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
+    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction,
+    Table, TableId, Timestamp,
 };
 use iox_time::{SystemProvider, TimeProvider};
 use observability_deps::tracing::warn;
@@ -64,8 +63,6 @@ impl std::fmt::Debug for MemCatalog {
 
 #[derive(Default, Debug, Clone)]
 struct MemCollections {
-    topics: Vec<TopicMetadata>,
-    query_pools: Vec<QueryPool>,
     namespaces: Vec<Namespace>,
     tables: Vec<Table>,
     columns: Vec<Column>,
@@ -136,18 +133,10 @@ impl Catalog for MemCatalog {
         };
         let stage = transaction.stage();
 
-        // We need to manually insert the topic here so that we can create the transition shard
-        // below.
-        let topic = TopicMetadata {
-            id: SHARED_TOPIC_ID,
-            name: SHARED_TOPIC_NAME.to_string(),
-        };
-        stage.topics.push(topic.clone());
-
         // The transition shard must exist and must have magic ID and INDEX.
         let shard = Shard {
             id: TRANSITION_SHARD_ID,
-            topic_id: topic.id,
+            topic_id: SHARED_TOPIC_ID,
             shard_index: TRANSITION_SHARD_INDEX,
         };
         stage.shards.push(shard);
@@ -227,14 +216,6 @@ impl TransactionFinalize for MemTxn {
 
 #[async_trait]
 impl RepoCollection for MemTxn {
-    fn topics(&mut self) -> &mut dyn TopicMetadataRepo {
-        self
-    }
-
-    fn query_pools(&mut self) -> &mut dyn QueryPoolRepo {
-        self
-    }
-
     fn namespaces(&mut self) -> &mut dyn NamespaceRepo {
         self
     }
@@ -256,64 +237,9 @@ impl RepoCollection for MemTxn {
     }
 }
 
-#[async_trait]
-impl TopicMetadataRepo for MemTxn {
-    async fn create_or_get(&mut self, name: &str) -> Result<TopicMetadata> {
-        let stage = self.stage();
-
-        let topic = match stage.topics.iter().find(|t| t.name == name) {
-            Some(t) => t,
-            None => {
-                let topic = TopicMetadata {
-                    id: TopicId::new(stage.topics.len() as i64 + 1),
-                    name: name.to_string(),
-                };
-                stage.topics.push(topic);
-                stage.topics.last().unwrap()
-            }
-        };
-
-        Ok(topic.clone())
-    }
-
-    async fn get_by_name(&mut self, name: &str) -> Result<Option<TopicMetadata>> {
-        let stage = self.stage();
-
-        let topic = stage.topics.iter().find(|t| t.name == name).cloned();
-        Ok(topic)
-    }
-}
-
-#[async_trait]
-impl QueryPoolRepo for MemTxn {
-    async fn create_or_get(&mut self, name: &str) -> Result<QueryPool> {
-        let stage = self.stage();
-
-        let pool = match stage.query_pools.iter().find(|t| t.name == name) {
-            Some(t) => t,
-            None => {
-                let pool = QueryPool {
-                    id: QueryPoolId::new(stage.query_pools.len() as i64 + 1),
-                    name: name.to_string(),
-                };
-                stage.query_pools.push(pool);
-                stage.query_pools.last().unwrap()
-            }
-        };
-
-        Ok(pool.clone())
-    }
-}
-
 #[async_trait]
 impl NamespaceRepo for MemTxn {
-    async fn create(
-        &mut self,
-        name: &str,
-        retention_period_ns: Option<i64>,
-        topic_id: TopicId,
-        query_pool_id: QueryPoolId,
-    ) -> Result<Namespace> {
+    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace> {
         let stage = self.stage();
 
         if stage.namespaces.iter().any(|n| n.name == name) {
@@ -325,8 +251,6 @@ impl NamespaceRepo for MemTxn {
         let namespace = Namespace {
             id: NamespaceId::new(stage.namespaces.len() as i64 + 1),
             name: name.to_string(),
-            topic_id,
-            query_pool_id,
             max_tables: DEFAULT_MAX_TABLES,
             max_columns_per_table: DEFAULT_MAX_COLUMNS_PER_TABLE,
             retention_period_ns,
diff --git a/iox_catalog/src/metrics.rs b/iox_catalog/src/metrics.rs
index 561e1347c6..d4a899718d 100644
--- a/iox_catalog/src/metrics.rs
+++ b/iox_catalog/src/metrics.rs
@@ -2,14 +2,13 @@
 
 use crate::interface::{
     sealed::TransactionFinalize, CasFailure, ColumnRepo, NamespaceRepo, ParquetFileRepo,
-    PartitionRepo, QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo,
-    TopicMetadataRepo,
+    PartitionRepo, RepoCollection, Result, SoftDeletedRows, TableRepo,
 };
 use async_trait::async_trait;
 use data_types::{
     Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId,
-    ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
-    SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
+    ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction, Table, TableId,
+    Timestamp,
 };
 use iox_time::{SystemProvider, TimeProvider};
 use metric::{DurationHistogram, Metric};
@@ -42,24 +41,9 @@ impl<T> MetricDecorator<T> {
 
 impl<T, P> RepoCollection for MetricDecorator<T, P>
 where
-    T: TopicMetadataRepo
-        + QueryPoolRepo
-        + NamespaceRepo
-        + TableRepo
-        + ColumnRepo
-        + PartitionRepo
-        + ParquetFileRepo
-        + Debug,
+    T: NamespaceRepo + TableRepo + ColumnRepo + PartitionRepo + ParquetFileRepo + Debug,
     P: TimeProvider,
 {
-    fn topics(&mut self) -> &mut dyn TopicMetadataRepo {
-        self
-    }
-
-    fn query_pools(&mut self) -> &mut dyn QueryPoolRepo {
-        self
-    }
-
     fn namespaces(&mut self) -> &mut dyn NamespaceRepo {
         self
     }
@@ -158,25 +142,10 @@ macro_rules! decorate {
     };
 }
 
-decorate!(
-    impl_trait = TopicMetadataRepo,
-    methods = [
-        "topic_create_or_get" = create_or_get(&mut self, name: &str) -> Result<TopicMetadata>;
-        "topic_get_by_name" = get_by_name(&mut self, name: &str) -> Result<Option<TopicMetadata>>;
-    ]
-);
-
-decorate!(
-    impl_trait = QueryPoolRepo,
-    methods = [
-        "query_create_or_get" = create_or_get(&mut self, name: &str) -> Result<QueryPool>;
-    ]
-);
-
 decorate!(
     impl_trait = NamespaceRepo,
     methods = [
-        "namespace_create" = create(&mut self, name: &str, retention_period_ns: Option<i64>, topic_id: TopicId, query_pool_id: QueryPoolId) -> Result<Namespace>;
+        "namespace_create" = create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace>;
         "namespace_update_retention_period" = update_retention_period(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace>;
         "namespace_list" = list(&mut self, deleted: SoftDeletedRows) -> Result<Vec<Namespace>>;
         "namespace_get_by_id" = get_by_id(&mut self, id: NamespaceId, deleted: SoftDeletedRows) -> Result<Option<Namespace>>;
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 360527029f..9b83420963 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -4,18 +4,21 @@ use crate::{
     interface::{
         self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo,
         ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo,
-        QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo, TopicMetadataRepo,
-        Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
+        RepoCollection, Result, SoftDeletedRows, TableRepo, Transaction,
+        MAX_PARQUET_FILES_SELECTED_ONCE,
+    },
+    kafkaless_transition::{
+        SHARED_QUERY_POOL, SHARED_QUERY_POOL_ID, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
+        TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX,
     },
-    kafkaless_transition::{TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
     metrics::MetricDecorator,
-    DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
+    DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
 };
 use async_trait::async_trait;
 use data_types::{
     Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId,
-    ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
-    SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
+    ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction, Table, TableId,
+    Timestamp,
 };
 use iox_time::{SystemProvider, TimeProvider};
 use observability_deps::tracing::{debug, info, warn};
@@ -304,7 +307,8 @@ impl Catalog for PostgresCatalog {
             .await
             .map_err(|e| Error::Setup { source: e.into() })?;
 
-        // We need to manually insert the topic here so that we can create the transition shard below.
+        // We need to manually insert the topic here so that we can create the transition shard
+        // below.
         sqlx::query(
             r#"
 INSERT INTO topic (name)
@@ -335,6 +339,21 @@ DO NOTHING;
         .await
         .map_err(|e| Error::Setup { source: e })?;
 
+        // We need to manually insert the query pool here so that we can create namespaces that
+        // reference it.
+        sqlx::query(
+            r#"
+INSERT INTO query_pool (name)
+VALUES ($1)
+ON CONFLICT ON CONSTRAINT query_pool_name_unique
+DO NOTHING;
+        "#,
+        )
+        .bind(SHARED_QUERY_POOL)
+        .execute(&self.pool)
+        .await
+        .map_err(|e| Error::Setup { source: e })?;
+
         Ok(())
     }
 
@@ -536,14 +555,6 @@ fn get_dsn_file_path(dsn: &str) -> Option<String> {
 
 #[async_trait]
 impl RepoCollection for PostgresTxn {
-    fn topics(&mut self) -> &mut dyn TopicMetadataRepo {
-        self
-    }
-
-    fn query_pools(&mut self) -> &mut dyn QueryPoolRepo {
-        self
-    }
-
     fn namespaces(&mut self) -> &mut dyn NamespaceRepo {
         self
     }
@@ -565,88 +576,19 @@ impl RepoCollection for PostgresTxn {
     }
 }
 
-#[async_trait]
-impl TopicMetadataRepo for PostgresTxn {
-    async fn create_or_get(&mut self, name: &str) -> Result<TopicMetadata> {
-        let rec = sqlx::query_as::<_, TopicMetadata>(
-            r#"
-INSERT INTO topic ( name )
-VALUES ( $1 )
-ON CONFLICT ON CONSTRAINT topic_name_unique
-DO UPDATE SET name = topic.name
-RETURNING *;
-        "#,
-        )
-        .bind(name) // $1
-        .fetch_one(&mut self.inner)
-        .await
-        .map_err(|e| Error::SqlxError { source: e })?;
-
-        Ok(rec)
-    }
-
-    async fn get_by_name(&mut self, name: &str) -> Result<Option<TopicMetadata>> {
-        let rec = sqlx::query_as::<_, TopicMetadata>(
-            r#"
-SELECT *
-FROM topic
-WHERE name = $1;
-        "#,
-        )
-        .bind(name) // $1
-        .fetch_one(&mut self.inner)
-        .await;
-
-        if let Err(sqlx::Error::RowNotFound) = rec {
-            return Ok(None);
-        }
-
-        let topic = rec.map_err(|e| Error::SqlxError { source: e })?;
-
-        Ok(Some(topic))
-    }
-}
-
-#[async_trait]
-impl QueryPoolRepo for PostgresTxn {
-    async fn create_or_get(&mut self, name: &str) -> Result<QueryPool> {
-        let rec = sqlx::query_as::<_, QueryPool>(
-            r#"
-INSERT INTO query_pool ( name )
-VALUES ( $1 )
-ON CONFLICT ON CONSTRAINT query_pool_name_unique
-DO UPDATE SET name = query_pool.name
-RETURNING *;
-        "#,
-        )
-        .bind(name) // $1
-        .fetch_one(&mut self.inner)
-        .await
-        .map_err(|e| Error::SqlxError { source: e })?;
-
-        Ok(rec)
-    }
-}
-
 #[async_trait]
 impl NamespaceRepo for PostgresTxn {
-    async fn create(
-        &mut self,
-        name: &str,
-        retention_period_ns: Option<i64>,
-        topic_id: TopicId,
-        query_pool_id: QueryPoolId,
-    ) -> Result<Namespace> {
+    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
                 INSERT INTO namespace ( name, topic_id, query_pool_id, retention_period_ns, max_tables )
                 VALUES ( $1, $2, $3, $4, $5 )
-                RETURNING *;
+                RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
             "#,
         )
         .bind(name) // $1
-        .bind(topic_id) // $2
-        .bind(query_pool_id) // $3
+        .bind(SHARED_TOPIC_ID) // $2
+        .bind(SHARED_QUERY_POOL_ID) // $3
         .bind(retention_period_ns) // $4
         .bind(DEFAULT_MAX_TABLES); // $5
 
@@ -672,7 +614,11 @@ impl NamespaceRepo for PostgresTxn {
     async fn list(&mut self, deleted: SoftDeletedRows) -> Result<Vec<Namespace>> {
         let rec = sqlx::query_as::<_, Namespace>(
             format!(
-                r#"SELECT * FROM namespace WHERE {v};"#,
+                r#"
+SELECT id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at
+FROM namespace
+WHERE {v};
+                "#,
                 v = deleted.as_sql_predicate()
             )
             .as_str(),
@@ -691,7 +637,11 @@ impl NamespaceRepo for PostgresTxn {
     ) -> Result<Option<Namespace>> {
         let rec = sqlx::query_as::<_, Namespace>(
             format!(
-                r#"SELECT * FROM namespace WHERE id=$1 AND {v};"#,
+                r#"
+SELECT id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at
+FROM namespace
+WHERE id=$1 AND {v};
+                "#,
                 v = deleted.as_sql_predicate()
             )
             .as_str(),
@@ -716,7 +666,11 @@ impl NamespaceRepo for PostgresTxn {
     ) -> Result<Option<Namespace>> {
         let rec = sqlx::query_as::<_, Namespace>(
             format!(
-                r#"SELECT * FROM namespace WHERE name=$1 AND {v};"#,
+                r#"
+SELECT id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at
+FROM namespace
+WHERE name=$1 AND {v};
+                "#,
                 v = deleted.as_sql_predicate()
             )
             .as_str(),
@@ -753,7 +707,7 @@ impl NamespaceRepo for PostgresTxn {
 UPDATE namespace
 SET max_tables = $1
 WHERE name = $2
-RETURNING *;
+RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
         "#,
         )
         .bind(new_max)
@@ -777,7 +731,7 @@ RETURNING *;
 UPDATE namespace
 SET max_columns_per_table = $1
 WHERE name = $2
-RETURNING *;
+RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
         "#,
         )
         .bind(new_max)
@@ -801,7 +755,12 @@ RETURNING *;
         retention_period_ns: Option<i64>,
     ) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
-            r#"UPDATE namespace SET retention_period_ns = $1 WHERE name = $2 RETURNING *;"#,
+            r#"
+UPDATE namespace
+SET retention_period_ns = $1
+WHERE name = $2
+RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
+        "#,
         )
         .bind(retention_period_ns) // $1
         .bind(name) // $2
@@ -1675,13 +1634,12 @@ fn is_fk_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::create_or_get_default_records;
     use assert_matches::assert_matches;
     use data_types::{ColumnId, ColumnSet};
     use metric::{Attributes, DurationHistogram, Metric};
     use rand::Rng;
     use sqlx::migrate::MigrateDatabase;
-    use std::{env, io::Write, ops::DerefMut, sync::Arc, time::Instant};
+    use std::{env, io::Write, sync::Arc, time::Instant};
     use tempfile::NamedTempFile;
 
     // Helper macro to skip tests if TEST_INTEGRATION and TEST_INFLUXDB_IOX_CATALOG_DSN environment
@@ -1858,19 +1816,13 @@ mod tests {
         maybe_skip_integration!();
 
         let postgres = setup_db().await;
-
         let postgres: Arc<dyn Catalog> = Arc::new(postgres);
-        let mut txn = postgres.start_transaction().await.expect("txn start");
-        let (kafka, query) = create_or_get_default_records(txn.deref_mut())
-            .await
-            .expect("db init failed");
-        txn.commit().await.expect("txn commit");
 
         let namespace_id = postgres
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None, kafka.id, query.id)
+            .create("ns4", None)
             .await
             .expect("namespace create failed")
             .id;
@@ -2002,19 +1954,13 @@ mod tests {
 
                     let postgres = setup_db().await;
                     let metrics = Arc::clone(&postgres.metrics);
-
                     let postgres: Arc<dyn Catalog> = Arc::new(postgres);
-                    let mut txn = postgres.start_transaction().await.expect("txn start");
-                    let (kafka, query) = create_or_get_default_records(txn.deref_mut())
-                        .await
-                        .expect("db init failed");
-                    txn.commit().await.expect("txn commit");
 
                     let namespace_id = postgres
                         .repositories()
                         .await
                         .namespaces()
-                        .create("ns4", None, kafka.id, query.id)
+                        .create("ns4", None)
                         .await
                         .expect("namespace create failed")
                         .id;
@@ -2171,19 +2117,13 @@ mod tests {
 
         let postgres = setup_db().await;
         let pool = postgres.pool.clone();
-
         let postgres: Arc<dyn Catalog> = Arc::new(postgres);
-        let mut txn = postgres.start_transaction().await.expect("txn start");
-        let (kafka, query) = create_or_get_default_records(txn.deref_mut())
-            .await
-            .expect("db init failed");
-        txn.commit().await.expect("txn commit");
 
         let namespace_id = postgres
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None, kafka.id, query.id)
+            .create("ns4", None)
             .await
             .expect("namespace create failed")
             .id;
diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs
index d9ffa2fceb..8bed76f556 100644
--- a/iox_catalog/src/sqlite.rs
+++ b/iox_catalog/src/sqlite.rs
@@ -4,18 +4,21 @@ use crate::{
     interface::{
         self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo,
         ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo,
-        QueryPoolRepo, RepoCollection, Result, SoftDeletedRows, TableRepo, TopicMetadataRepo,
-        Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
+        RepoCollection, Result, SoftDeletedRows, TableRepo, Transaction,
+        MAX_PARQUET_FILES_SELECTED_ONCE,
+    },
+    kafkaless_transition::{
+        SHARED_QUERY_POOL, SHARED_QUERY_POOL_ID, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
+        TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX,
     },
-    kafkaless_transition::{TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
     metrics::MetricDecorator,
-    DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
+    DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
 };
 use async_trait::async_trait;
 use data_types::{
     Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
-    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId,
-    SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata,
+    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction,
+    Table, TableId, Timestamp,
 };
 use serde::{Deserialize, Serialize};
 use std::ops::Deref;
@@ -228,7 +231,8 @@ impl Catalog for SqliteCatalog {
             .await
             .map_err(|e| Error::Setup { source: e.into() })?;
 
-        // We need to manually insert the topic here so that we can create the transition shard below.
+        // We need to manually insert the topic here so that we can create the transition shard
+        // below.
         sqlx::query(
             r#"
 INSERT INTO topic (name)
@@ -258,6 +262,21 @@ DO NOTHING;
         .await
         .map_err(|e| Error::Setup { source: e })?;
 
+        // We need to manually insert the query pool here so that we can create namespaces that
+        // reference it.
+        sqlx::query(
+            r#"
+INSERT INTO query_pool (name)
+VALUES ($1)
+ON CONFLICT (name)
+DO NOTHING;
+        "#,
+        )
+        .bind(SHARED_QUERY_POOL)
+        .execute(&self.pool)
+        .await
+        .map_err(|e| Error::Setup { source: e })?;
+
         Ok(())
     }
 
@@ -298,14 +317,6 @@ DO NOTHING;
 
 #[async_trait]
 impl RepoCollection for SqliteTxn {
-    fn topics(&mut self) -> &mut dyn TopicMetadataRepo {
-        self
-    }
-
-    fn query_pools(&mut self) -> &mut dyn QueryPoolRepo {
-        self
-    }
-
     fn namespaces(&mut self) -> &mut dyn NamespaceRepo {
         self
     }
@@ -327,90 +338,21 @@ impl RepoCollection for SqliteTxn {
     }
 }
 
-#[async_trait]
-impl TopicMetadataRepo for SqliteTxn {
-    async fn create_or_get(&mut self, name: &str) -> Result<TopicMetadata> {
-        let rec = sqlx::query_as::<_, TopicMetadata>(
-            r#"
-INSERT INTO topic ( name )
-VALUES ( $1 )
-ON CONFLICT (name)
-DO UPDATE SET name = topic.name
-RETURNING *;
-        "#,
-        )
-        .bind(name) // $1
-        .fetch_one(self.inner.get_mut())
-        .await
-        .map_err(|e| Error::SqlxError { source: e })?;
-
-        Ok(rec)
-    }
-
-    async fn get_by_name(&mut self, name: &str) -> Result<Option<TopicMetadata>> {
-        let rec = sqlx::query_as::<_, TopicMetadata>(
-            r#"
-SELECT *
-FROM topic
-WHERE name = $1;
-        "#,
-        )
-        .bind(name) // $1
-        .fetch_one(self.inner.get_mut())
-        .await;
-
-        if let Err(sqlx::Error::RowNotFound) = rec {
-            return Ok(None);
-        }
-
-        let topic = rec.map_err(|e| Error::SqlxError { source: e })?;
-
-        Ok(Some(topic))
-    }
-}
-
-#[async_trait]
-impl QueryPoolRepo for SqliteTxn {
-    async fn create_or_get(&mut self, name: &str) -> Result<QueryPool> {
-        let rec = sqlx::query_as::<_, QueryPool>(
-            r#"
-INSERT INTO query_pool ( name )
-VALUES ( $1 )
-ON CONFLICT (name)
-DO UPDATE SET name = query_pool.name
-RETURNING *;
-        "#,
-        )
-        .bind(name) // $1
-        .fetch_one(self.inner.get_mut())
-        .await
-        .map_err(|e| Error::SqlxError { source: e })?;
-
-        Ok(rec)
-    }
-}
-
 #[async_trait]
 impl NamespaceRepo for SqliteTxn {
-    async fn create(
-        &mut self,
-        name: &str,
-        retention_period_ns: Option<i64>,
-        topic_id: TopicId,
-        query_pool_id: QueryPoolId,
-    ) -> Result<Namespace> {
+    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
-                INSERT INTO namespace ( name, topic_id, query_pool_id, retention_period_ns, max_tables )
-                VALUES ( $1, $2, $3, $4, $5 )
-                RETURNING *;
+INSERT INTO namespace ( name, topic_id, query_pool_id, retention_period_ns, max_tables )
+VALUES ( $1, $2, $3, $4, $5 )
+RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
             "#,
         )
-            .bind(name) // $1
-            .bind(topic_id) // $2
-            .bind(query_pool_id) // $3
-            .bind(retention_period_ns) // $4
-            .bind(DEFAULT_MAX_TABLES); // $5
+        .bind(name) // $1
+        .bind(SHARED_TOPIC_ID) // $2
+        .bind(SHARED_QUERY_POOL_ID) // $3
+        .bind(retention_period_ns) // $4
+        .bind(DEFAULT_MAX_TABLES); // $5
 
         let rec = rec.fetch_one(self.inner.get_mut()).await.map_err(|e| {
             if is_unique_violation(&e) {
@@ -434,7 +376,11 @@ impl NamespaceRepo for SqliteTxn {
     async fn list(&mut self, deleted: SoftDeletedRows) -> Result<Vec<Namespace>> {
         let rec = sqlx::query_as::<_, Namespace>(
             format!(
-                r#"SELECT * FROM namespace WHERE {v};"#,
+                r#"
+SELECT id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at
+FROM namespace
+WHERE {v};
+                "#,
                 v = deleted.as_sql_predicate()
             )
             .as_str(),
@@ -453,7 +399,11 @@ impl NamespaceRepo for SqliteTxn {
     ) -> Result<Option<Namespace>> {
         let rec = sqlx::query_as::<_, Namespace>(
             format!(
-                r#"SELECT * FROM namespace WHERE id=$1 AND {v};"#,
+                r#"
+SELECT id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at
+FROM namespace
+WHERE id=$1 AND {v};
+                "#,
                 v = deleted.as_sql_predicate()
             )
             .as_str(),
@@ -478,7 +428,11 @@ impl NamespaceRepo for SqliteTxn {
     ) -> Result<Option<Namespace>> {
         let rec = sqlx::query_as::<_, Namespace>(
             format!(
-                r#"SELECT * FROM namespace WHERE name=$1 AND {v};"#,
+                r#"
+SELECT id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at
+FROM namespace
+WHERE name=$1 AND {v};
+                "#,
                 v = deleted.as_sql_predicate()
             )
             .as_str(),
@@ -515,7 +469,7 @@ impl NamespaceRepo for SqliteTxn {
 UPDATE namespace
 SET max_tables = $1
 WHERE name = $2
-RETURNING *;
+RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
         "#,
         )
         .bind(new_max)
@@ -539,7 +493,7 @@ RETURNING *;
 UPDATE namespace
 SET max_columns_per_table = $1
 WHERE name = $2
-RETURNING *;
+RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
         "#,
         )
         .bind(new_max)
@@ -563,7 +517,12 @@ RETURNING *;
         retention_period_ns: Option<i64>,
     ) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
-            r#"UPDATE namespace SET retention_period_ns = $1 WHERE name = $2 RETURNING *;"#,
+            r#"
+UPDATE namespace
+SET retention_period_ns = $1
+WHERE name = $2
+RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
+            "#,
         )
         .bind(retention_period_ns) // $1
         .bind(name) // $2
@@ -1545,10 +1504,9 @@ fn is_unique_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::create_or_get_default_records;
     use assert_matches::assert_matches;
     use metric::{Attributes, DurationHistogram, Metric};
-    use std::{ops::DerefMut, sync::Arc};
+    use std::sync::Arc;
 
     fn assert_metric_hit(metrics: &Registry, name: &'static str) {
         let histogram = metrics
@@ -1589,17 +1547,12 @@ mod tests {
         let sqlite = setup_db().await;
 
         let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
-        let mut txn = sqlite.start_transaction().await.expect("txn start");
-        let (kafka, query) = create_or_get_default_records(txn.deref_mut())
-            .await
-            .expect("db init failed");
-        txn.commit().await.expect("txn commit");
 
         let namespace_id = sqlite
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None, kafka.id, query.id)
+            .create("ns4", None)
             .await
             .expect("namespace create failed")
             .id;
@@ -1647,17 +1600,12 @@ mod tests {
                     let metrics = Arc::clone(&sqlite.metrics);
 
                     let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
-                    let mut txn = sqlite.start_transaction().await.expect("txn start");
-                    let (kafka, query) = create_or_get_default_records(txn.deref_mut())
-                        .await
-                        .expect("db init failed");
-                    txn.commit().await.expect("txn commit");
 
                     let namespace_id = sqlite
                         .repositories()
                         .await
                         .namespaces()
-                        .create("ns4", None, kafka.id, query.id)
+                        .create("ns4", None)
                         .await
                         .expect("namespace create failed")
                         .id;
@@ -1814,17 +1762,12 @@ mod tests {
         let pool = sqlite.pool.clone();
 
         let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
-        let mut txn = sqlite.start_transaction().await.expect("txn start");
-        let (kafka, query) = create_or_get_default_records(txn.deref_mut())
-            .await
-            .expect("db init failed");
-        txn.commit().await.expect("txn commit");
 
         let namespace_id = sqlite
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None, kafka.id, query.id)
+            .create("ns4", None)
             .await
             .expect("namespace create failed")
             .id;
diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs
index 0d3cd9507e..6564abede0 100644
--- a/iox_tests/src/catalog.rs
+++ b/iox_tests/src/catalog.rs
@@ -6,8 +6,7 @@ use arrow::{
 };
 use data_types::{
     Column, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceSchema, ParquetFile,
-    ParquetFileParams, Partition, PartitionId, QueryPool, Table, TableId, TableSchema, Timestamp,
-    TopicMetadata,
+    ParquetFileParams, Partition, PartitionId, Table, TableId, TableSchema, Timestamp,
 };
 use datafusion::physical_plan::metrics::Count;
 use datafusion_util::MemoryStream;
@@ -144,19 +143,14 @@ impl TestCatalog {
         retention_period_ns: Option<i64>,
     ) -> Arc<TestNamespace> {
         let mut repos = self.catalog.repositories().await;
-
-        let topic = repos.topics().create_or_get("topic").await.unwrap();
-        let query_pool = repos.query_pools().create_or_get("pool").await.unwrap();
         let namespace = repos
             .namespaces()
-            .create(name, retention_period_ns, topic.id, query_pool.id)
+            .create(name, retention_period_ns)
             .await
             .unwrap();
 
         Arc::new(TestNamespace {
             catalog: Arc::clone(self),
-            topic,
-            query_pool,
             namespace,
         })
     }
@@ -216,8 +210,6 @@ impl TestCatalog {
 #[allow(missing_docs)]
 pub struct TestNamespace {
     pub catalog: Arc<TestCatalog>,
-    pub topic: TopicMetadata,
-    pub query_pool: QueryPool,
     pub namespace: Namespace,
 }
 
diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index 24ca699d54..992958f78f 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -64,9 +64,6 @@ pub enum Error {
     #[error("Catalog DSN error: {0}")]
     CatalogDsn(#[from] clap_blocks::catalog_dsn::Error),
 
-    #[error("No topic named '{topic_name}' found in the catalog")]
-    TopicCatalogLookup { topic_name: String },
-
     #[error("authz configuration error for '{addr}': '{source}'")]
     AuthzConfig {
         source: Box<dyn std::error::Error>,
@@ -273,47 +270,10 @@ pub async fn create_router2_server_type(
     // Initialise the Namespace ID lookup + cache
     let namespace_resolver = NamespaceSchemaResolver::new(Arc::clone(&ns_cache));
 
-    ////////////////////////////////////////////////////////////////////////////
-    //
-    // THIS CODE IS FOR TESTING ONLY.
-    //
-    // The source of truth for the topics & query pools will be read from
-    // the DB, rather than CLI args for a prod deployment.
-    //
-    ////////////////////////////////////////////////////////////////////////////
-    //
-    // Look up the topic ID needed to populate namespace creation
-    // requests.
-    //
-    // This code / auto-creation is for architecture testing purposes only - a
-    // prod deployment would expect namespaces to be explicitly created and this
-    // layer would be removed.
-    let mut txn = catalog.start_transaction().await?;
-    let topic_id = txn
-        .topics()
-        .get_by_name(&router_config.topic)
-        .await?
-        .map(|v| v.id)
-        .unwrap_or_else(|| panic!("no topic named {} in catalog", router_config.topic));
-    let query_id = txn
-        .query_pools()
-        .create_or_get(&router_config.query_pool_name)
-        .await
-        .map(|v| v.id)
-        .unwrap_or_else(|e| {
-            panic!(
-                "failed to upsert query pool {} in catalog: {}",
-                router_config.query_pool_name, e
-            )
-        });
-    txn.commit().await?;
-
     let namespace_resolver = NamespaceAutocreation::new(
         namespace_resolver,
         Arc::clone(&ns_cache),
         Arc::clone(&catalog),
-        topic_id,
-        query_id,
         {
             if router_config.namespace_autocreation_enabled {
                 MissingNamespaceAction::AutoCreate(
@@ -395,7 +355,7 @@ pub async fn create_router2_server_type(
     // Initialize the gRPC API delegate that creates the services relevant to the RPC
     // write router path and use it to create the relevant `RpcWriteRouterServer` and
     // `RpcWriteRouterServerType`.
-    let grpc = RpcWriteGrpcDelegate::new(catalog, object_store, topic_id, query_id);
+    let grpc = RpcWriteGrpcDelegate::new(catalog, object_store);
 
     let router_server =
         RpcWriteRouterServer::new(http, grpc, metrics, common_state.trace_collector());
@@ -435,13 +395,7 @@ mod tests {
         let catalog = Arc::new(MemCatalog::new(Default::default()));
 
         let mut repos = catalog.repositories().await;
-        let topic = repos.topics().create_or_get("foo").await.unwrap();
-        let pool = repos.query_pools().create_or_get("foo").await.unwrap();
-        let namespace = repos
-            .namespaces()
-            .create("test_ns", None, topic.id, pool.id)
-            .await
-            .unwrap();
+        let namespace = repos.namespaces().create("test_ns", None).await.unwrap();
 
         let table = repos
             .tables()
diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index bfe0b5c97a..8cdc04f942 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -139,8 +139,7 @@ mod tests {
 
     use assert_matches::assert_matches;
     use data_types::{
-        Column, ColumnId, ColumnSchema, ColumnType, NamespaceId, QueryPoolId, TableId, TableSchema,
-        TopicId,
+        Column, ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableSchema,
     };
     use proptest::{prelude::*, prop_compose, proptest};
 
@@ -162,8 +161,6 @@ mod tests {
 
         let schema1 = NamespaceSchema {
             id: TEST_NAMESPACE_ID,
-            topic_id: TopicId::new(24),
-            query_pool_id: QueryPoolId::new(1234),
             tables: Default::default(),
             max_columns_per_table: 50,
             max_tables: 24,
@@ -180,8 +177,6 @@ mod tests {
 
         let schema2 = NamespaceSchema {
             id: TEST_NAMESPACE_ID,
-            topic_id: TopicId::new(2),
-            query_pool_id: QueryPoolId::new(2),
             tables: Default::default(),
             max_columns_per_table: 10,
             max_tables: 42,
@@ -228,8 +223,6 @@ mod tests {
 
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
-            topic_id: TopicId::new(76),
-            query_pool_id: QueryPoolId::new(64),
             tables: BTreeMap::from([(String::from(table_name), first_write_table_schema)]),
             max_columns_per_table: 50,
             max_tables: 24,
@@ -311,8 +304,6 @@ mod tests {
 
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
-            topic_id: TopicId::new(76),
-            query_pool_id: QueryPoolId::new(64),
             tables: BTreeMap::from([
                 (String::from("table_1"), table_1.to_owned()),
                 (String::from("table_2"), table_2.to_owned()),
@@ -410,8 +401,6 @@ mod tests {
             NamespaceSchema {
                 id: TEST_NAMESPACE_ID,
                 tables,
-                topic_id: TopicId::new(1), // Ignored
-                query_pool_id: QueryPoolId::new(1), // Ignored
                 max_columns_per_table,
                 max_tables,
                 retention_period_ns,
diff --git a/router/src/namespace_cache/metrics.rs b/router/src/namespace_cache/metrics.rs
index 7273afc91d..d76815357c 100644
--- a/router/src/namespace_cache/metrics.rs
+++ b/router/src/namespace_cache/metrics.rs
@@ -128,9 +128,7 @@ mod tests {
     use std::collections::BTreeMap;
 
     use assert_matches::assert_matches;
-    use data_types::{
-        ColumnId, ColumnSchema, ColumnType, NamespaceId, QueryPoolId, TableId, TableSchema, TopicId,
-    };
+    use data_types::{ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableSchema};
     use metric::{Attributes, MetricObserver, Observation};
 
     use super::*;
@@ -168,8 +166,6 @@ mod tests {
 
         NamespaceSchema {
             id: NamespaceId::new(42),
-            topic_id: TopicId::new(24),
-            query_pool_id: QueryPoolId::new(1234),
             tables,
             max_columns_per_table: 100,
             max_tables: 42,
diff --git a/router/src/namespace_cache/read_through_cache.rs b/router/src/namespace_cache/read_through_cache.rs
index 7691e8a1bc..1c600eed76 100644
--- a/router/src/namespace_cache/read_through_cache.rs
+++ b/router/src/namespace_cache/read_through_cache.rs
@@ -98,7 +98,7 @@ where
 #[cfg(test)]
 mod tests {
     use assert_matches::assert_matches;
-    use data_types::{NamespaceId, QueryPoolId, TopicId};
+    use data_types::NamespaceId;
     use iox_catalog::mem::MemCatalog;
 
     use super::*;
@@ -120,8 +120,6 @@ mod tests {
         // Place a schema in the cache for that name
         let schema1 = NamespaceSchema::new(
             NamespaceId::new(1),
-            TopicId::new(2),
-            QueryPoolId::new(3),
             iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
             iox_catalog::DEFAULT_MAX_TABLES,
             iox_catalog::DEFAULT_RETENTION_PERIOD,
@@ -156,8 +154,6 @@ mod tests {
         // Place a schema in the catalog for that name
         let schema1 = NamespaceSchema::new(
             NamespaceId::new(1),
-            TopicId::new(2),
-            QueryPoolId::new(3),
             iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
             iox_catalog::DEFAULT_MAX_TABLES,
             iox_catalog::DEFAULT_RETENTION_PERIOD,
@@ -167,12 +163,7 @@ mod tests {
                 .repositories()
                 .await
                 .namespaces()
-                .create(
-                    &ns,
-                    iox_catalog::DEFAULT_RETENTION_PERIOD,
-                    schema1.topic_id,
-                    schema1.query_pool_id,
-                )
+                .create(&ns, iox_catalog::DEFAULT_RETENTION_PERIOD,)
                 .await,
             Ok(_)
         );
diff --git a/router/src/namespace_cache/sharded_cache.rs b/router/src/namespace_cache/sharded_cache.rs
index c9ba7a70c7..3b91649343 100644
--- a/router/src/namespace_cache/sharded_cache.rs
+++ b/router/src/namespace_cache/sharded_cache.rs
@@ -51,7 +51,7 @@ mod tests {
     use std::{collections::HashMap, iter};
 
     use assert_matches::assert_matches;
-    use data_types::{NamespaceId, QueryPoolId, TopicId};
+    use data_types::NamespaceId;
     use rand::{distributions::Alphanumeric, thread_rng, Rng};
 
     use super::*;
@@ -70,8 +70,6 @@ mod tests {
     fn schema_with_id(id: i64) -> NamespaceSchema {
         NamespaceSchema {
             id: NamespaceId::new(id),
-            topic_id: TopicId::new(1),
-            query_pool_id: QueryPoolId::new(1),
             tables: Default::default(),
             max_columns_per_table: 7,
             max_tables: 42,
diff --git a/router/src/namespace_resolver.rs b/router/src/namespace_resolver.rs
index b0caacb2a9..555489ab25 100644
--- a/router/src/namespace_resolver.rs
+++ b/router/src/namespace_resolver.rs
@@ -71,7 +71,7 @@ mod tests {
     use std::sync::Arc;
 
     use assert_matches::assert_matches;
-    use data_types::{NamespaceId, NamespaceSchema, QueryPoolId, TopicId};
+    use data_types::{NamespaceId, NamespaceSchema};
     use iox_catalog::{
         interface::{Catalog, SoftDeletedRows},
         mem::MemCatalog,
@@ -96,8 +96,6 @@ mod tests {
             ns.clone(),
             NamespaceSchema {
                 id: NamespaceId::new(42),
-                topic_id: TopicId::new(2),
-                query_pool_id: QueryPoolId::new(3),
                 tables: Default::default(),
                 max_columns_per_table: 4,
                 max_tables: 42,
@@ -143,11 +141,9 @@ mod tests {
         // Create the namespace in the catalog
         {
             let mut repos = catalog.repositories().await;
-            let topic = repos.topics().create_or_get("bananas").await.unwrap();
-            let query_pool = repos.query_pools().create_or_get("platanos").await.unwrap();
             repos
                 .namespaces()
-                .create(&ns, None, topic.id, query_pool.id)
+                .create(&ns, None)
                 .await
                 .expect("failed to setup catalog state");
         }
@@ -177,11 +173,9 @@ mod tests {
         // Create the namespace in the catalog and mark it as deleted
         {
             let mut repos = catalog.repositories().await;
-            let topic = repos.topics().create_or_get("bananas").await.unwrap();
-            let query_pool = repos.query_pools().create_or_get("platanos").await.unwrap();
             repos
                 .namespaces()
-                .create(&ns, None, topic.id, query_pool.id)
+                .create(&ns, None)
                 .await
                 .expect("failed to setup catalog state");
             repos
diff --git a/router/src/namespace_resolver/ns_autocreation.rs b/router/src/namespace_resolver/ns_autocreation.rs
index a0514ee098..2ec498e5f2 100644
--- a/router/src/namespace_resolver/ns_autocreation.rs
+++ b/router/src/namespace_resolver/ns_autocreation.rs
@@ -1,7 +1,7 @@
 use std::{fmt::Debug, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, QueryPoolId, TopicId};
+use data_types::{NamespaceId, NamespaceName};
 use iox_catalog::interface::Catalog;
 use observability_deps::tracing::*;
 use thiserror::Error;
@@ -43,8 +43,6 @@ pub struct NamespaceAutocreation<C, T> {
     cache: C,
     catalog: Arc<dyn Catalog>,
 
-    topic_id: TopicId,
-    query_id: QueryPoolId,
     action: MissingNamespaceAction,
 }
 
@@ -53,7 +51,7 @@ impl<C, T> NamespaceAutocreation<C, T> {
     /// namespace exists in `catalog`.
     ///
     /// If the namespace does not exist, it is created with the specified
-    /// `topic_id`, `query_id` and `retention` policy.
+    /// `retention` policy.
     ///
     /// Namespaces are looked up in `cache`, skipping the creation request to
     /// the catalog if there's a hit.
@@ -61,16 +59,12 @@ impl<C, T> NamespaceAutocreation<C, T> {
         inner: T,
         cache: C,
         catalog: Arc<dyn Catalog>,
-        topic_id: TopicId,
-        query_id: QueryPoolId,
         action: MissingNamespaceAction,
     ) -> Self {
         Self {
             inner,
             cache,
             catalog,
-            topic_id,
-            query_id,
             action,
         }
     }
@@ -113,12 +107,7 @@ where
                         .repositories()
                         .await
                         .namespaces()
-                        .create(
-                            namespace.as_str(),
-                            retention_period_ns,
-                            self.topic_id,
-                            self.query_id,
-                        )
+                        .create(namespace.as_str(), retention_period_ns)
                         .await
                     {
                         Ok(_) => {
@@ -178,8 +167,6 @@ mod tests {
             ns.clone(),
             NamespaceSchema {
                 id: NAMESPACE_ID,
-                topic_id: TopicId::new(2),
-                query_pool_id: QueryPoolId::new(3),
                 tables: Default::default(),
                 max_columns_per_table: 4,
                 max_tables: 42,
@@ -191,8 +178,6 @@ mod tests {
             MockNamespaceResolver::default().with_mapping(ns.clone(), NAMESPACE_ID),
             cache,
             Arc::clone(&catalog),
-            TopicId::new(42),
-            QueryPoolId::new(42),
             MissingNamespaceAction::AutoCreate(TEST_RETENTION_PERIOD_NS),
         );
 
@@ -233,8 +218,6 @@ mod tests {
             MockNamespaceResolver::default().with_mapping(ns.clone(), NamespaceId::new(1)),
             cache,
             Arc::clone(&catalog),
-            TopicId::new(42),
-            QueryPoolId::new(42),
             MissingNamespaceAction::AutoCreate(TEST_RETENTION_PERIOD_NS),
         );
 
@@ -259,8 +242,6 @@ mod tests {
             Namespace {
                 id: NamespaceId::new(1),
                 name: ns.to_string(),
-                topic_id: TopicId::new(42),
-                query_pool_id: QueryPoolId::new(42),
                 max_tables: iox_catalog::DEFAULT_MAX_TABLES,
                 max_columns_per_table: iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
                 retention_period_ns: TEST_RETENTION_PERIOD_NS,
@@ -284,8 +265,6 @@ mod tests {
             MockNamespaceResolver::default(),
             cache,
             Arc::clone(&catalog),
-            TopicId::new(42),
-            QueryPoolId::new(42),
             MissingNamespaceAction::Reject,
         );
 
@@ -324,8 +303,6 @@ mod tests {
             NamespaceSchemaResolver::new(Arc::clone(&cache)),
             Arc::clone(&cache),
             Arc::clone(&catalog),
-            TopicId::new(42),
-            QueryPoolId::new(42),
             MissingNamespaceAction::AutoCreate(TEST_RETENTION_PERIOD_NS),
         );
 
@@ -339,8 +316,6 @@ mod tests {
             NamespaceSchemaResolver::new(Arc::clone(&cache)),
             cache,
             Arc::clone(&catalog),
-            TopicId::new(42),
-            QueryPoolId::new(42),
             MissingNamespaceAction::Reject,
         );
 
diff --git a/router/src/server/grpc.rs b/router/src/server/grpc.rs
index f00029c517..34ecae0a04 100644
--- a/router/src/server/grpc.rs
+++ b/router/src/server/grpc.rs
@@ -1,6 +1,5 @@
 //! gRPC service implementations for `router`.
 
-use data_types::{QueryPoolId, TopicId};
 use generated_types::influxdata::iox::{catalog::v1::*, namespace::v1::*, object_store::v1::*};
 use iox_catalog::interface::Catalog;
 use object_store::DynObjectStore;
@@ -15,25 +14,14 @@ use std::sync::Arc;
 pub struct RpcWriteGrpcDelegate {
     catalog: Arc<dyn Catalog>,
     object_store: Arc<DynObjectStore>,
-
-    // Temporary values during kafka -> kafkaless transition.
-    topic_id: TopicId,
-    query_id: QueryPoolId,
 }
 
 impl RpcWriteGrpcDelegate {
     /// Create a new gRPC handler
-    pub fn new(
-        catalog: Arc<dyn Catalog>,
-        object_store: Arc<DynObjectStore>,
-        topic_id: TopicId,
-        query_id: QueryPoolId,
-    ) -> Self {
+    pub fn new(catalog: Arc<dyn Catalog>, object_store: Arc<DynObjectStore>) -> Self {
         Self {
             catalog,
             object_store,
-            topic_id,
-            query_id,
         }
     }
 
@@ -62,10 +50,6 @@ impl RpcWriteGrpcDelegate {
     ///
     /// [`NamespaceService`]: generated_types::influxdata::iox::namespace::v1::namespace_service_server::NamespaceService.
     pub fn namespace_service(&self) -> impl namespace_service_server::NamespaceService {
-        NamespaceService::new(
-            Arc::clone(&self.catalog),
-            Some(self.topic_id),
-            Some(self.query_id),
-        )
+        NamespaceService::new(Arc::clone(&self.catalog))
     }
 }
diff --git a/router/tests/common/mod.rs b/router/tests/common/mod.rs
index d755b2e7d3..bea90ebaad 100644
--- a/router/tests/common/mod.rs
+++ b/router/tests/common/mod.rs
@@ -1,6 +1,6 @@
 use std::{iter, string::String, sync::Arc, time::Duration};
 
-use data_types::{PartitionTemplate, QueryPoolId, TableId, TemplatePart, TopicId};
+use data_types::{PartitionTemplate, TableId, TemplatePart};
 use generated_types::influxdata::iox::ingester::v1::WriteRequest;
 use hashbrown::HashMap;
 use hyper::{Body, Request, Response};
@@ -24,14 +24,6 @@ use router::{
     },
 };
 
-/// The topic catalog ID assigned by the namespace auto-creator in the
-/// handler stack for namespaces it has not yet observed.
-pub const TEST_TOPIC_ID: i64 = 1;
-
-/// The query pool catalog ID assigned by the namespace auto-creator in the
-/// handler stack for namespaces it has not yet observed.
-pub const TEST_QUERY_POOL_ID: i64 = 1;
-
 /// Common retention period value we'll use in tests
 pub const TEST_RETENTION_PERIOD: Duration = Duration::from_secs(3600);
 
@@ -168,8 +160,6 @@ impl TestContext {
             namespace_resolver,
             Arc::clone(&ns_cache),
             Arc::clone(&catalog),
-            TopicId::new(TEST_TOPIC_ID),
-            QueryPoolId::new(TEST_QUERY_POOL_ID),
             namespace_autocreation,
         );
 
@@ -193,12 +183,8 @@ impl TestContext {
             write_request_unifier,
         );
 
-        let grpc_delegate = RpcWriteGrpcDelegate::new(
-            Arc::clone(&catalog),
-            Arc::new(InMemory::default()),
-            TopicId::new(TEST_TOPIC_ID),
-            QueryPoolId::new(TEST_QUERY_POOL_ID),
-        );
+        let grpc_delegate =
+            RpcWriteGrpcDelegate::new(Arc::clone(&catalog), Arc::new(InMemory::default()));
 
         Self {
             client,
diff --git a/router/tests/http.rs b/router/tests/http.rs
index cb1203c43c..264c0b1a36 100644
--- a/router/tests/http.rs
+++ b/router/tests/http.rs
@@ -1,6 +1,6 @@
-use crate::common::{TestContextBuilder, TEST_QUERY_POOL_ID, TEST_RETENTION_PERIOD, TEST_TOPIC_ID};
+use crate::common::{TestContextBuilder, TEST_RETENTION_PERIOD};
 use assert_matches::assert_matches;
-use data_types::{ColumnType, QueryPoolId, TopicId};
+use data_types::ColumnType;
 use futures::{stream::FuturesUnordered, StreamExt};
 use generated_types::influxdata::{iox::ingester::v1::WriteRequest, pbdata::v1::DatabaseBatch};
 use hashbrown::HashMap;
@@ -62,8 +62,6 @@ async fn test_write_ok() {
         .expect("query should succeed")
         .expect("namespace not found");
     assert_eq!(ns.name, "bananas_test");
-    assert_eq!(ns.topic_id, TopicId::new(TEST_TOPIC_ID));
-    assert_eq!(ns.query_pool_id, QueryPoolId::new(TEST_QUERY_POOL_ID));
     assert_eq!(ns.retention_period_ns, None);
 
     // Ensure the metric instrumentation was hit
@@ -272,12 +270,7 @@ async fn test_write_propagate_ids() {
         .repositories()
         .await
         .namespaces()
-        .create(
-            "bananas_test",
-            None,
-            TopicId::new(TEST_TOPIC_ID),
-            QueryPoolId::new(TEST_QUERY_POOL_ID),
-        )
+        .create("bananas_test", None)
         .await
         .expect("failed to update table limit");
 
@@ -359,12 +352,7 @@ async fn test_delete_unsupported() {
         .repositories()
         .await
         .namespaces()
-        .create(
-            "bananas_test",
-            None,
-            TopicId::new(TEST_TOPIC_ID),
-            QueryPoolId::new(TEST_QUERY_POOL_ID),
-        )
+        .create("bananas_test", None)
         .await
         .expect("failed to update table limit");
 
diff --git a/scripts/docker_catalog.sh b/scripts/docker_catalog.sh
index af25b70e9a..78fad806b7 100755
--- a/scripts/docker_catalog.sh
+++ b/scripts/docker_catalog.sh
@@ -31,7 +31,6 @@ export INFLUXDB_IOX_CATALOG_DSN="postgresql://postgres@localhost:5432/postgres"
 export DATABASE_URL="${INFLUXDB_IOX_CATALOG_DSN}"
 cargo sqlx database create
 cargo run -q -- catalog setup
-cargo run -q -- catalog topic update iox-shared
 
 echo "Enjoy your database! Point IOx to it by running the following:"
 echo "\$ export INFLUXDB_IOX_CATALOG_DSN=\"${DATABASE_URL}\""
diff --git a/service_grpc_catalog/src/lib.rs b/service_grpc_catalog/src/lib.rs
index 22331ee194..1ee97856c4 100644
--- a/service_grpc_catalog/src/lib.rs
+++ b/service_grpc_catalog/src/lib.rs
@@ -212,15 +212,9 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let topic = repos.topics().create_or_get("iox-shared").await.unwrap();
-            let pool = repos
-                .query_pools()
-                .create_or_get("iox-shared")
-                .await
-                .unwrap();
             let namespace = repos
                 .namespaces()
-                .create("catalog_partition_test", None, topic.id, pool.id)
+                .create("catalog_partition_test", None)
                 .await
                 .unwrap();
             let table = repos
@@ -281,15 +275,9 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let topic = repos.topics().create_or_get("iox-shared").await.unwrap();
-            let pool = repos
-                .query_pools()
-                .create_or_get("iox-shared")
-                .await
-                .unwrap();
             let namespace = repos
                 .namespaces()
-                .create("catalog_partition_test", None, topic.id, pool.id)
+                .create("catalog_partition_test", None)
                 .await
                 .unwrap();
             let table = repos
diff --git a/service_grpc_namespace/src/lib.rs b/service_grpc_namespace/src/lib.rs
index 3fd96297bb..83efb9f6da 100644
--- a/service_grpc_namespace/src/lib.rs
+++ b/service_grpc_namespace/src/lib.rs
@@ -1,7 +1,7 @@
 //! Implementation of the namespace gRPC service
 use std::sync::Arc;
 
-use data_types::{Namespace as CatalogNamespace, NamespaceName, QueryPoolId, TopicId};
+use data_types::{Namespace as CatalogNamespace, NamespaceName};
 use generated_types::influxdata::iox::namespace::v1::{
     update_namespace_service_protection_limit_request::LimitUpdate, *,
 };
@@ -14,21 +14,11 @@ use tonic::{Request, Response, Status};
 pub struct NamespaceService {
     /// Catalog.
     catalog: Arc<dyn Catalog>,
-    topic_id: Option<TopicId>,
-    query_id: Option<QueryPoolId>,
 }
 
 impl NamespaceService {
-    pub fn new(
-        catalog: Arc<dyn Catalog>,
-        topic_id: Option<TopicId>,
-        query_id: Option<QueryPoolId>,
-    ) -> Self {
-        Self {
-            catalog,
-            topic_id,
-            query_id,
-        }
+    pub fn new(catalog: Arc<dyn Catalog>) -> Self {
+        Self { catalog }
     }
 }
 
@@ -58,10 +48,6 @@ impl namespace_service_server::NamespaceService for NamespaceService {
         &self,
         request: Request<CreateNamespaceRequest>,
     ) -> Result<Response<CreateNamespaceResponse>, Status> {
-        if self.topic_id.is_none() || self.query_id.is_none() {
-            return Err(Status::invalid_argument("topic_id or query_id not set"));
-        }
-
         let mut repos = self.catalog.repositories().await;
 
         let CreateNamespaceRequest {
@@ -80,12 +66,7 @@ impl namespace_service_server::NamespaceService for NamespaceService {
 
         let namespace = repos
             .namespaces()
-            .create(
-                &namespace_name,
-                retention_period_ns,
-                self.topic_id.unwrap(),
-                self.query_id.unwrap(),
-            )
+            .create(&namespace_name, retention_period_ns)
             .await
             .map_err(|e| {
                 warn!(error=%e, %namespace_name, "failed to create namespace");
@@ -349,22 +330,7 @@ mod tests {
         let catalog: Arc<dyn Catalog> =
             Arc::new(MemCatalog::new(Arc::new(metric::Registry::default())));
 
-        let topic = catalog
-            .repositories()
-            .await
-            .topics()
-            .create_or_get("kafka-topic")
-            .await
-            .unwrap();
-        let query_pool = catalog
-            .repositories()
-            .await
-            .query_pools()
-            .create_or_get("query-pool")
-            .await
-            .unwrap();
-
-        let handler = NamespaceService::new(catalog, Some(topic.id), Some(query_pool.id));
+        let handler = NamespaceService::new(catalog);
 
         // There should be no namespaces to start with.
         {
@@ -499,22 +465,7 @@ mod tests {
         let catalog: Arc<dyn Catalog> =
             Arc::new(MemCatalog::new(Arc::new(metric::Registry::default())));
 
-        let topic = catalog
-            .repositories()
-            .await
-            .topics()
-            .create_or_get("kafka-topic")
-            .await
-            .unwrap();
-        let query_pool = catalog
-            .repositories()
-            .await
-            .query_pools()
-            .create_or_get("query-pool")
-            .await
-            .unwrap();
-
-        let handler = NamespaceService::new(catalog, Some(topic.id), Some(query_pool.id));
+        let handler = NamespaceService::new(catalog);
         let req = CreateNamespaceRequest {
             name: NS_NAME.to_string(),
             retention_period_ns: Some(RETENTION),
@@ -572,22 +523,7 @@ mod tests {
                     let catalog: Arc<dyn Catalog> =
                         Arc::new(MemCatalog::new(Arc::new(metric::Registry::default())));
 
-                    let topic = catalog
-                        .repositories()
-                        .await
-                        .topics()
-                        .create_or_get("kafka-topic")
-                        .await
-                        .unwrap();
-                    let query_pool = catalog
-                        .repositories()
-                        .await
-                        .query_pools()
-                        .create_or_get("query-pool")
-                        .await
-                        .unwrap();
-
-                    let handler = NamespaceService::new(catalog, Some(topic.id), Some(query_pool.id));
+                    let handler = NamespaceService::new(catalog);
 
                     let req = CreateNamespaceRequest {
                         name: String::from($name),
diff --git a/service_grpc_object_store/src/lib.rs b/service_grpc_object_store/src/lib.rs
index 26dfc38bcc..8a92ac2ba9 100644
--- a/service_grpc_object_store/src/lib.rs
+++ b/service_grpc_object_store/src/lib.rs
@@ -110,15 +110,9 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let topic = repos.topics().create_or_get("iox-shared").await.unwrap();
-            let pool = repos
-                .query_pools()
-                .create_or_get("iox-shared")
-                .await
-                .unwrap();
             let namespace = repos
                 .namespaces()
-                .create("catalog_partition_test", None, topic.id, pool.id)
+                .create("catalog_partition_test", None)
                 .await
                 .unwrap();
             let table = repos
diff --git a/service_grpc_schema/src/lib.rs b/service_grpc_schema/src/lib.rs
index 47e5bdb6ae..f359e57224 100644
--- a/service_grpc_schema/src/lib.rs
+++ b/service_grpc_schema/src/lib.rs
@@ -48,8 +48,6 @@ fn schema_to_proto(schema: Arc<data_types::NamespaceSchema>) -> GetSchemaRespons
     let response = GetSchemaResponse {
         schema: Some(NamespaceSchema {
             id: schema.id.get(),
-            topic_id: schema.topic_id.get(),
-            query_pool_id: schema.query_pool_id.get(),
             tables: schema
                 .tables
                 .iter()
@@ -95,11 +93,9 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let topic = repos.topics().create_or_get("franz").await.unwrap();
-            let pool = repos.query_pools().create_or_get("franz").await.unwrap();
             let namespace = repos
                 .namespaces()
-                .create("namespace_schema_test", None, topic.id, pool.id)
+                .create("namespace_schema_test", None)
                 .await
                 .unwrap();
             let table = repos
diff --git a/test_helpers_end_to_end/src/database.rs b/test_helpers_end_to_end/src/database.rs
index 1a3664adac..59f85319e0 100644
--- a/test_helpers_end_to_end/src/database.rs
+++ b/test_helpers_end_to_end/src/database.rs
@@ -40,17 +40,5 @@ pub async fn initialize_db(dsn: &str, schema_name: &str) {
         .ok()
         .unwrap();
 
-    // Create the shared topic in the catalog
-    Command::cargo_bin("influxdb_iox")
-        .unwrap()
-        .arg("catalog")
-        .arg("topic")
-        .arg("update")
-        .arg("iox-shared")
-        .env("INFLUXDB_IOX_CATALOG_DSN", dsn)
-        .env("INFLUXDB_IOX_CATALOG_POSTGRES_SCHEMA_NAME", schema_name)
-        .ok()
-        .unwrap();
-
     init.insert(schema_name.into());
 }

From 91867e8a593c2ab9b76c3390f03495ab88fec81d Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Thu, 4 May 2023 17:24:39 +0200
Subject: [PATCH 022/119] refactor: simplify `{First,Last}Selector` (#7745)

* refactor: lift selector creation

* refactor: simplify `FirstSelector`

* refactor: simplify `LastSelector`

---------

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 query_functions/src/selectors.rs          |  51 +--
 query_functions/src/selectors/internal.rs | 440 ++++++++--------------
 2 files changed, 175 insertions(+), 316 deletions(-)

diff --git a/query_functions/src/selectors.rs b/query_functions/src/selectors.rs
index a990a7b53b..2e01943e0d 100644
--- a/query_functions/src/selectors.rs
+++ b/query_functions/src/selectors.rs
@@ -112,10 +112,8 @@ use datafusion::{
 /// Internal implementations of the selector functions
 mod internal;
 use internal::{
-    BooleanFirstSelector, BooleanLastSelector, BooleanMaxSelector, BooleanMinSelector,
-    F64FirstSelector, F64LastSelector, F64MaxSelector, F64MinSelector, I64FirstSelector,
-    I64LastSelector, I64MaxSelector, I64MinSelector, U64FirstSelector, U64LastSelector,
-    U64MaxSelector, U64MinSelector, Utf8FirstSelector, Utf8LastSelector, Utf8MaxSelector,
+    BooleanMaxSelector, BooleanMinSelector, F64MaxSelector, F64MinSelector, FirstSelector,
+    I64MaxSelector, I64MinSelector, LastSelector, U64MaxSelector, U64MinSelector, Utf8MaxSelector,
     Utf8MinSelector,
 };
 use schema::TIME_DATA_TYPE;
@@ -255,40 +253,29 @@ impl FactoryBuilder {
 
             let accumulator: Box<dyn Accumulator> = match (selector_type, value_type) {
                 // First
-                (SelectorType::First, DataType::Float64) => {
-                    Box::new(SelectorAccumulator::<F64FirstSelector>::new())
+                (SelectorType::First, value_type) => {
+                    Box::new(SelectorAccumulator::new(FirstSelector::new(value_type)?))
                 }
-                (SelectorType::First, DataType::Int64) => Box::new(SelectorAccumulator::<I64FirstSelector>::new()),
-                (SelectorType::First, DataType::UInt64) => Box::new(SelectorAccumulator::<U64FirstSelector>::new()),
-                (SelectorType::First, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8FirstSelector>::new()),
-                (SelectorType::First, DataType::Boolean) => Box::new(SelectorAccumulator::<BooleanFirstSelector>::new(
-                )),
 
                 // Last
-                (SelectorType::Last, DataType::Float64) => Box::new(SelectorAccumulator::<F64LastSelector>::new()),
-                (SelectorType::Last, DataType::Int64) => Box::new(SelectorAccumulator::<I64LastSelector>::new()),
-                (SelectorType::Last, DataType::UInt64) => Box::new(SelectorAccumulator::<U64LastSelector>::new()),
-                (SelectorType::Last, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8LastSelector>::new()),
-                (SelectorType::Last, DataType::Boolean) => {
-                    Box::new(SelectorAccumulator::<BooleanLastSelector>::new())
-                },
+                (SelectorType::Last, data_type) => Box::new(SelectorAccumulator::new(LastSelector::new(data_type)?)),
 
                 // Min
-                (SelectorType::Min, DataType::Float64) => Box::new(SelectorAccumulator::<F64MinSelector>::new()),
-                (SelectorType::Min, DataType::Int64) => Box::new(SelectorAccumulator::<I64MinSelector>::new()),
-                (SelectorType::Min, DataType::UInt64) => Box::new(SelectorAccumulator::<U64MinSelector>::new()),
-                (SelectorType::Min, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8MinSelector>::new()),
+                (SelectorType::Min, DataType::Float64) => Box::new(SelectorAccumulator::new(F64MinSelector::default())),
+                (SelectorType::Min, DataType::Int64) => Box::new(SelectorAccumulator::new(I64MinSelector::default())),
+                (SelectorType::Min, DataType::UInt64) => Box::new(SelectorAccumulator::new(U64MinSelector::default())),
+                (SelectorType::Min, DataType::Utf8) => Box::new(SelectorAccumulator::new(Utf8MinSelector::default())),
                 (SelectorType::Min, DataType::Boolean) => {
-                    Box::new(SelectorAccumulator::<BooleanMinSelector>::new())
+                    Box::new(SelectorAccumulator::<>::new(BooleanMinSelector::default()))
                 },
 
                 // Max
-                (SelectorType::Max, DataType::Float64) => Box::new(SelectorAccumulator::<F64MaxSelector>::new()),
-                (SelectorType::Max, DataType::Int64) => Box::new(SelectorAccumulator::<I64MaxSelector>::new()),
-                (SelectorType::Max, DataType::UInt64) => Box::new(SelectorAccumulator::<U64MaxSelector>::new()),
-                (SelectorType::Max, DataType::Utf8) => Box::new(SelectorAccumulator::<Utf8MaxSelector>::new()),
+                (SelectorType::Max, DataType::Float64) => Box::new(SelectorAccumulator::new(F64MaxSelector::default())),
+                (SelectorType::Max, DataType::Int64) => Box::new(SelectorAccumulator::new(I64MaxSelector::default())),
+                (SelectorType::Max, DataType::UInt64) => Box::new(SelectorAccumulator::new(U64MaxSelector::default())),
+                (SelectorType::Max, DataType::Utf8) => Box::new(SelectorAccumulator::new(Utf8MaxSelector::default())),
                 (SelectorType::Max, DataType::Boolean) => {
-                    Box::new(SelectorAccumulator::<BooleanMaxSelector>::new())
+                    Box::new(SelectorAccumulator::new(BooleanMaxSelector::default()))
                 },
                 // Catch
                 (selector_type, value_type) => return Err(DataFusionError::Internal(format!(
@@ -303,7 +290,7 @@ impl FactoryBuilder {
 /// Implements the logic of the specific selector function (this is a
 /// cutdown version of the Accumulator DataFusion trait, to allow
 /// sharing between implementations)
-trait Selector: Debug + Default + Send + Sync {
+trait Selector: Debug + Send + Sync {
     /// return state in a form that DataFusion can store during execution
     fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>>;
 
@@ -412,10 +399,8 @@ impl<SELECTOR> SelectorAccumulator<SELECTOR>
 where
     SELECTOR: Selector,
 {
-    pub fn new() -> Self {
-        Self {
-            selector: SELECTOR::default(),
-        }
+    pub fn new(selector: SELECTOR) -> Self {
+        Self { selector }
     }
 }
 
diff --git a/query_functions/src/selectors/internal.rs b/query_functions/src/selectors/internal.rs
index 94404ff26e..e55fe8a614 100644
--- a/query_functions/src/selectors/internal.rs
+++ b/query_functions/src/selectors/internal.rs
@@ -17,7 +17,7 @@ use arrow::{
         max as array_max, max_boolean as array_max_boolean, max_string as array_max_string,
         min as array_min, min_boolean as array_min_boolean, min_string as array_min_string,
     },
-    datatypes::{Field, Fields},
+    datatypes::{DataType, Field, Fields},
 };
 use datafusion::{error::Result as DataFusionResult, scalar::ScalarValue};
 
@@ -116,219 +116,169 @@ fn make_scalar_struct(data_fields: Vec<ScalarValue>) -> ScalarValue {
     ScalarValue::Struct(Some(data_fields), Fields::from(fields))
 }
 
-macro_rules! make_first_selector {
-    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARRTYPE:ident, $MINFUNC:ident, $TO_SCALARVALUE: expr) => {
-        #[derive(Debug)]
-        pub struct $STRUCTNAME {
-            value: Option<$RUSTTYPE>,
-            time: Option<i64>,
-        }
-
-        impl Default for $STRUCTNAME {
-            fn default() -> Self {
-                Self {
-                    value: None,
-                    time: None,
-                }
-            }
-        }
-
-        impl Selector for $STRUCTNAME {
-            fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
-                Ok(vec![
-                    $TO_SCALARVALUE(self.value.clone()),
-                    ScalarValue::TimestampNanosecond(self.time, None),
-                ])
-            }
-
-            fn evaluate(&self) -> DataFusionResult<ScalarValue> {
-                Ok(make_scalar_struct(vec![
-                    $TO_SCALARVALUE(self.value.clone()),
-                    ScalarValue::TimestampNanosecond(self.time, None),
-                ]))
-            }
-
-            fn update_batch(
-                &mut self,
-                value_arr: &ArrayRef,
-                time_arr: &ArrayRef,
-            ) -> DataFusionResult<()> {
-                let value_arr = value_arr
-                    .as_any()
-                    .downcast_ref::<$ARRTYPE>()
-                    // the input type arguments should be ensured by datafusion
-                    .expect("First argument was value");
-
-                let time_arr = time_arr
-                    .as_any()
-                    .downcast_ref::<TimestampNanosecondArray>()
-                    // the input type arguments should be ensured by datafusion
-                    .expect("Second argument was time");
-
-                // Only look for times where the array also has a non
-                // null value (the time array should have no nulls itself)
-                //
-                // For example, for the following input, the correct
-                // current min time is 200 (not 100)
-                //
-                // value | time
-                // --------------
-                // NULL  | 100
-                // A     | 200
-                // B     | 300
-                //
-                // Note this could likely be faster if we used `ArrayData` APIs
-                let time_arr: TimestampNanosecondArray = time_arr
-                    .iter()
-                    .zip(value_arr.iter())
-                    .map(|(ts, value)| if value.is_some() { ts } else { None })
-                    .collect();
-
-                let cur_min_time = $MINFUNC(&time_arr);
-
-                let need_update = match (&self.time, &cur_min_time) {
-                    (Some(time), Some(cur_min_time)) => cur_min_time < time,
-                    // No existing minimum, so update needed
-                    (None, Some(_)) => true,
-                    // No actual minimum time found, so no update needed
-                    (_, None) => false,
-                };
-
-                if need_update {
-                    let index = time_arr
-                        .iter()
-                        // arrow doesn't tell us what index had the
-                        // minimum, so need to find it ourselves see also
-                        // https://github.com/apache/arrow-datafusion/issues/600
-                        .enumerate()
-                        .find(|(_, time)| cur_min_time == *time)
-                        .map(|(idx, _)| idx)
-                        .unwrap(); // value always exists
-
-                    self.time = cur_min_time;
-                    self.value = if value_arr.is_null(index) {
-                        None
-                    } else {
-                        Some(value_arr.value(index).to_owned())
-                    };
-                }
-
-                Ok(())
-            }
-
-            fn size(&self) -> usize {
-                // no nested types
-                std::mem::size_of_val(self)
-            }
-        }
-    };
+#[derive(Debug)]
+pub struct FirstSelector {
+    value: ScalarValue,
+    time: Option<i64>,
 }
 
-macro_rules! make_last_selector {
-    ($STRUCTNAME:ident, $RUSTTYPE:ident, $ARRTYPE:ident, $MAXFUNC:ident, $TO_SCALARVALUE: expr) => {
-        #[derive(Debug)]
-        pub struct $STRUCTNAME {
-            value: Option<$RUSTTYPE>,
-            time: Option<i64>,
+impl FirstSelector {
+    pub fn new(data_type: &DataType) -> DataFusionResult<Self> {
+        Ok(Self {
+            value: ScalarValue::try_from(data_type)?,
+            time: None,
+        })
+    }
+}
+
+impl Selector for FirstSelector {
+    fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
+        Ok(vec![
+            self.value.clone(),
+            ScalarValue::TimestampNanosecond(self.time, None),
+        ])
+    }
+
+    fn evaluate(&self) -> DataFusionResult<ScalarValue> {
+        Ok(make_scalar_struct(vec![
+            self.value.clone(),
+            ScalarValue::TimestampNanosecond(self.time, None),
+        ]))
+    }
+
+    fn update_batch(&mut self, value_arr: &ArrayRef, time_arr: &ArrayRef) -> DataFusionResult<()> {
+        // Only look for times where the array also has a non
+        // null value (the time array should have no nulls itself)
+        //
+        // For example, for the following input, the correct
+        // current min time is 200 (not 100)
+        //
+        // value | time
+        // --------------
+        // NULL  | 100
+        // A     | 200
+        // B     | 300
+        //
+        let time_arr = arrow::compute::nullif(time_arr, &arrow::compute::is_null(&value_arr)?)?;
+
+        let time_arr = time_arr
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            // the input type arguments should be ensured by datafusion
+            .expect("Second argument was time");
+        let cur_min_time = array_min(time_arr);
+
+        let need_update = match (&self.time, &cur_min_time) {
+            (Some(time), Some(cur_min_time)) => cur_min_time < time,
+            // No existing minimum, so update needed
+            (None, Some(_)) => true,
+            // No actual minimum time found, so no update needed
+            (_, None) => false,
+        };
+
+        if need_update {
+            let index = time_arr
+                .iter()
+                // arrow doesn't tell us what index had the
+                // minimum, so need to find it ourselves see also
+                // https://github.com/apache/arrow-datafusion/issues/600
+                .enumerate()
+                .find(|(_, time)| cur_min_time == *time)
+                .map(|(idx, _)| idx)
+                .unwrap(); // value always exists
+
+            self.time = cur_min_time;
+            self.value = ScalarValue::try_from_array(&value_arr, index)?;
         }
 
-        impl Default for $STRUCTNAME {
-            fn default() -> Self {
-                Self {
-                    value: None,
-                    time: None,
-                }
-            }
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self) - std::mem::size_of_val(&self.value) + self.value.size()
+    }
+}
+
+#[derive(Debug)]
+pub struct LastSelector {
+    value: ScalarValue,
+    time: Option<i64>,
+}
+
+impl LastSelector {
+    pub fn new(data_type: &DataType) -> DataFusionResult<Self> {
+        Ok(Self {
+            value: ScalarValue::try_from(data_type)?,
+            time: None,
+        })
+    }
+}
+
+impl Selector for LastSelector {
+    fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
+        Ok(vec![
+            self.value.clone(),
+            ScalarValue::TimestampNanosecond(self.time, None),
+        ])
+    }
+
+    fn evaluate(&self) -> DataFusionResult<ScalarValue> {
+        Ok(make_scalar_struct(vec![
+            self.value.clone(),
+            ScalarValue::TimestampNanosecond(self.time, None),
+        ]))
+    }
+
+    fn update_batch(&mut self, value_arr: &ArrayRef, time_arr: &ArrayRef) -> DataFusionResult<()> {
+        // Only look for times where the array also has a non
+        // null value (the time array should have no nulls itself)
+        //
+        // For example, for the following input, the correct
+        // current max time is 200 (not 300)
+        //
+        // value | time
+        // --------------
+        // A     | 100
+        // B     | 200
+        // NULL  | 300
+        //
+        let time_arr = arrow::compute::nullif(time_arr, &arrow::compute::is_null(&value_arr)?)?;
+
+        let time_arr = time_arr
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            // the input type arguments should be ensured by datafusion
+            .expect("Second argument was time");
+        let cur_max_time = array_max(time_arr);
+
+        let need_update = match (&self.time, &cur_max_time) {
+            (Some(time), Some(cur_max_time)) => time < cur_max_time,
+            // No existing maximum, so update needed
+            (None, Some(_)) => true,
+            // No actual maximum value found, so no update needed
+            (_, None) => false,
+        };
+
+        if need_update {
+            let index = time_arr
+                .iter()
+                // arrow doesn't tell us what index had the
+                // maximum, so need to find it ourselves
+                .enumerate()
+                .find(|(_, time)| cur_max_time == *time)
+                .map(|(idx, _)| idx)
+                .unwrap(); // value always exists
+
+            self.time = cur_max_time;
+            self.value = ScalarValue::try_from_array(&value_arr, index)?;
         }
 
-        impl Selector for $STRUCTNAME {
-            fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
-                Ok(vec![
-                    $TO_SCALARVALUE(self.value.clone()),
-                    ScalarValue::TimestampNanosecond(self.time, None),
-                ])
-            }
+        Ok(())
+    }
 
-            fn evaluate(&self) -> DataFusionResult<ScalarValue> {
-                Ok(make_scalar_struct(vec![
-                    $TO_SCALARVALUE(self.value.clone()),
-                    ScalarValue::TimestampNanosecond(self.time, None),
-                ]))
-            }
-
-            fn update_batch(
-                &mut self,
-                value_arr: &ArrayRef,
-                time_arr: &ArrayRef,
-            ) -> DataFusionResult<()> {
-                let value_arr = value_arr
-                    .as_any()
-                    .downcast_ref::<$ARRTYPE>()
-                    // the input type arguments should be ensured by datafusion
-                    .expect("First argument was value");
-
-                let time_arr = time_arr
-                    .as_any()
-                    .downcast_ref::<TimestampNanosecondArray>()
-                    // the input type arguments should be ensured by datafusion
-                    .expect("Second argument was time");
-
-                // Only look for times where the array also has a non
-                // null value (the time array should have no nulls itself)
-                //
-                // For example, for the following input, the correct
-                // current max time is 200 (not 300)
-                //
-                // value | time
-                // --------------
-                // A     | 100
-                // B     | 200
-                // NULL  | 300
-                //
-                // Note this could likely be faster if we used `ArrayData` APIs
-                let time_arr: TimestampNanosecondArray = time_arr
-                    .iter()
-                    .zip(value_arr.iter())
-                    .map(|(ts, value)| if value.is_some() { ts } else { None })
-                    .collect();
-
-                let cur_max_time = $MAXFUNC(&time_arr);
-
-                let need_update = match (&self.time, &cur_max_time) {
-                    (Some(time), Some(cur_max_time)) => time < cur_max_time,
-                    // No existing maximum, so update needed
-                    (None, Some(_)) => true,
-                    // No actual maximum value found, so no update needed
-                    (_, None) => false,
-                };
-
-                if need_update {
-                    let index = time_arr
-                        .iter()
-                        // arrow doesn't tell us what index had the
-                        // maximum, so need to find it ourselves
-                        .enumerate()
-                        .find(|(_, time)| cur_max_time == *time)
-                        .map(|(idx, _)| idx)
-                        .unwrap(); // value always exists
-
-                    self.time = cur_max_time;
-                    self.value = if value_arr.is_null(index) {
-                        None
-                    } else {
-                        Some(value_arr.value(index).to_owned())
-                    };
-                }
-
-                Ok(())
-            }
-
-            fn size(&self) -> usize {
-                // no nested types
-                std::mem::size_of_val(self)
-            }
-        }
-    };
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self) - std::mem::size_of_val(&self.value) + self.value.size()
+    }
 }
 
 /// Did we find a new min/max
@@ -583,82 +533,6 @@ macro_rules! make_max_selector {
     };
 }
 
-// FIRST
-
-make_first_selector!(
-    F64FirstSelector,
-    f64,
-    Float64Array,
-    array_min,
-    ScalarValue::Float64
-);
-make_first_selector!(
-    I64FirstSelector,
-    i64,
-    Int64Array,
-    array_min,
-    ScalarValue::Int64
-);
-make_first_selector!(
-    U64FirstSelector,
-    u64,
-    UInt64Array,
-    array_min,
-    ScalarValue::UInt64
-);
-make_first_selector!(
-    Utf8FirstSelector,
-    String,
-    StringArray,
-    array_min,
-    ScalarValue::Utf8
-);
-make_first_selector!(
-    BooleanFirstSelector,
-    bool,
-    BooleanArray,
-    array_min,
-    ScalarValue::Boolean
-);
-
-// LAST
-
-make_last_selector!(
-    F64LastSelector,
-    f64,
-    Float64Array,
-    array_max,
-    ScalarValue::Float64
-);
-make_last_selector!(
-    I64LastSelector,
-    i64,
-    Int64Array,
-    array_max,
-    ScalarValue::Int64
-);
-make_last_selector!(
-    U64LastSelector,
-    u64,
-    UInt64Array,
-    array_max,
-    ScalarValue::UInt64
-);
-make_last_selector!(
-    Utf8LastSelector,
-    String,
-    StringArray,
-    array_max,
-    ScalarValue::Utf8
-);
-make_last_selector!(
-    BooleanLastSelector,
-    bool,
-    BooleanArray,
-    array_max,
-    ScalarValue::Boolean
-);
-
 // MIN
 
 make_min_selector!(

From ef9ef75e56b25fa6df6695a353dd86da90d2d7fc Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)"
 <193874+carols10cents@users.noreply.github.com>
Date: Thu, 4 May 2023 12:20:18 -0400
Subject: [PATCH 023/119] fix: Remove unsupported TemplatePart variants (#7746)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 data_types/src/lib.rs                  | 29 --------------------------
 mutable_batch/src/payload/partition.rs |  2 --
 2 files changed, 31 deletions(-)

diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 941f350f90..9adc3e8442 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -1104,35 +1104,6 @@ pub enum TemplatePart {
     /// partition key parts such as "2021-03-14 12:25:21" and
     /// "2021-04-14 12:24:21"
     TimeFormat(String),
-    /// Applies a regex to the value in a string column
-    RegexCapture(RegexCapture),
-    /// Applies a `strftime` pattern to some column other than "time"
-    StrftimeColumn(StrftimeColumn),
-}
-
-/// `RegexCapture` is for pulling parts of a string column into the partition
-/// key.
-#[derive(Debug, Eq, PartialEq, Clone)]
-#[allow(missing_docs)]
-pub struct RegexCapture {
-    pub column: String,
-    pub regex: String,
-}
-
-/// [`StrftimeColumn`] is used to create a time based partition key off some
-/// column other than the builtin `time` column.
-///
-/// The value of the named column is formatted using a `strftime`
-/// style string.
-///
-/// For example, a time format of "%Y-%m-%d %H:%M:%S" will produce
-/// partition key parts such as "2021-03-14 12:25:21" and
-/// "2021-04-14 12:24:21"
-#[derive(Debug, Eq, PartialEq, Clone)]
-#[allow(missing_docs)]
-pub struct StrftimeColumn {
-    pub column: String,
-    pub format: String,
 }
 
 /// Represents a parsed delete predicate for evaluation by the InfluxDB IOx
diff --git a/mutable_batch/src/payload/partition.rs b/mutable_batch/src/payload/partition.rs
index e3fe61dcf2..5ed817019a 100644
--- a/mutable_batch/src/payload/partition.rs
+++ b/mutable_batch/src/payload/partition.rs
@@ -94,8 +94,6 @@ fn partition_keys<'a>(
                 |col| Template::Column(col, name),
             ),
             TemplatePart::TimeFormat(fmt) => Template::TimeFormat(time, StrftimeItems::new(fmt)),
-            TemplatePart::RegexCapture(_) => unimplemented!(),
-            TemplatePart::StrftimeColumn(_) => unimplemented!(),
         })
         .collect();
 

From a98952bff69bc2afc39a7247e57b4c24bf6a8de5 Mon Sep 17 00:00:00 2001
From: Chunchun Ye <14298407+appletreeisyellow@users.noreply.github.com>
Date: Thu, 4 May 2023 12:22:57 -0500
Subject: [PATCH 024/119] feat(flightsql): Support `CommandGetXdbcTypeInfo`
 metadata endpoint with actual data type info (#7696)

* chore: add expected xdbc type info value to jdbc test

* chore: add query skeleton to kick off plan_get_xdbc_type_info()`

* chore: implement a minimun version query for type info

* chore: rewrite `plan_get_xdbc_type_info` to use a static recrod batch

* chore: construct create_params as a string list

* chore: add create_params column in e2e test result

* chore: re-define create_params list items to be non-nullable

* chore: remove comment

* chore: refactor TYPE_INFO_RECORD_BATCH using XdbcTypeInfo struct and rewrite metadata for character types

chore: lint

chore: lint doc

chore: lint doc use automatic link

* chore: add unimplemented error msg

* chore: add `INTEGER`, `FLOAT`, `TIMESTAMP`, `INTERVAL` and  remove `CHAR`, `TEXT`, `STRING`

---------

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 flightsql/src/lib.rs                          |   1 +
 flightsql/src/planner.rs                      |  11 +-
 flightsql/src/xdbc_type_info/mod.rs           | 255 ++++++++++++++++
 flightsql/src/xdbc_type_info/value.rs         | 283 ++++++++++++++++++
 .../tests/end_to_end_cases/flightsql.rs       |  21 +-
 5 files changed, 565 insertions(+), 6 deletions(-)
 create mode 100644 flightsql/src/xdbc_type_info/mod.rs
 create mode 100644 flightsql/src/xdbc_type_info/value.rs

diff --git a/flightsql/src/lib.rs b/flightsql/src/lib.rs
index 762fd4183d..ec1da4b144 100644
--- a/flightsql/src/lib.rs
+++ b/flightsql/src/lib.rs
@@ -6,6 +6,7 @@ mod get_db_schemas;
 mod get_tables;
 mod planner;
 mod sql_info;
+mod xdbc_type_info;
 
 pub use cmd::{FlightSQLCommand, PreparedStatementHandle};
 pub use error::{Error, Result};
diff --git a/flightsql/src/planner.rs b/flightsql/src/planner.rs
index 3af41511e9..2c4498dd1d 100644
--- a/flightsql/src/planner.rs
+++ b/flightsql/src/planner.rs
@@ -31,6 +31,7 @@ use crate::{
     get_db_schemas::{get_db_schemas, get_db_schemas_schema},
     get_tables::{get_tables, get_tables_schema},
     sql_info::iox_sql_info_list,
+    xdbc_type_info::TYPE_INFO_RECORD_BATCH,
 };
 use crate::{FlightSQLCommand, PreparedStatementHandle};
 
@@ -428,10 +429,13 @@ async fn plan_get_table_types(ctx: &IOxSessionContext) -> Result<LogicalPlan> {
 /// Return a `LogicalPlan` for GetXdbcTypeInfo
 async fn plan_get_xdbc_type_info(
     ctx: &IOxSessionContext,
-    _data_type: Option<i32>,
+    data_type: Option<i32>,
 ) -> Result<LogicalPlan> {
-    let batch = RecordBatch::new_empty(Arc::clone(&GET_XDBC_TYPE_INFO_SCHEMA));
-    Ok(ctx.batch_to_logical_plan(batch)?)
+    match data_type {
+        None => Ok(ctx.batch_to_logical_plan(TYPE_INFO_RECORD_BATCH.clone())?),
+        // TODO chunchun: support search by data_type
+        Some(_data_type) => unimplemented!("filter by data_type is not implemented yet"),
+    }
 }
 
 /// The schema for GetTableTypes
@@ -523,6 +527,7 @@ static GET_PRIMARY_KEYS_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
     ]))
 });
 
+/// The schema for GetXdbcTypeInfo
 // From https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1064-L1113
 static GET_XDBC_TYPE_INFO_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
     Arc::new(Schema::new(vec![
diff --git a/flightsql/src/xdbc_type_info/mod.rs b/flightsql/src/xdbc_type_info/mod.rs
new file mode 100644
index 0000000000..6c96a5ff47
--- /dev/null
+++ b/flightsql/src/xdbc_type_info/mod.rs
@@ -0,0 +1,255 @@
+//! Represents the response to FlightSQL `GetXdbcTypeInfo` requests and
+//! handles the conversion to/from the format specified in the
+//! [Arrow FlightSQL Specification].
+//!
+//! <
+//!    type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc),
+//!    data_type: int32 not null (The SQL data type),
+//!    column_size: int32 (The maximum size supported by that column.
+//!                        In case of exact numeric types, this represents the maximum precision.
+//!                        In case of string types, this represents the character length.
+//!                        In case of datetime data types, this represents the length in characters of the string representation.
+//!                        NULL is returned for data types where column size is not applicable.),
+//!    literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for
+//!                          data types where a literal prefix is not applicable.),
+//!    literal_suffix: utf8 (Character or characters used to terminate a literal,
+//!                          NULL is returned for data types where a literal suffix is not applicable.),
+//!    create_params: list< utf8 not null >
+//!                         (A list of keywords corresponding to which parameters can be used when creating
+//!                          a column for that specific type.
+//!                          NULL is returned if there are no parameters for the data type definition.),
+//!    nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the
+//!                              Nullable enum.),
+//!    case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons),
+//!    searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the
+//!                                Searchable enum.),
+//!    unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is
+//!                              not applicable to the data type or the data type is not numeric.),
+//!    fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.),
+//!    auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute
+//!                          is not applicable to the data type or the data type is not numeric.),
+//!    local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL
+//!                           is returned if a localized name is not supported by the data source),
+//!    minimum_scale: int32 (The minimum scale of the data type on the data source.
+//!                          If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE
+//!                          columns both contain this value. NULL is returned if scale is not applicable.),
+//!    maximum_scale: int32 (The maximum scale of the data type on the data source.
+//!                          NULL is returned if scale is not applicable.),
+//!    sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values
+//!                                   as data_type value. Except for interval and datetime, which
+//!                                   uses generic values. More info about those types can be
+//!                                   obtained through datetime_subcode. The possible values can be seen
+//!                                   in the XdbcDataType enum.),
+//!    datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains
+//!                             its sub types. For type different from interval and datetime, this value
+//!                             is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.),
+//!    num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains
+//!                           the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For
+//!                           exact numeric types, this column contains the value 10 to indicate that
+//!                           column size specifies a number of decimal digits. Otherwise, this column is NULL.),
+//!    interval_precision: int32 (If the data type is an interval data type, then this column contains the value
+//!                               of the interval leading precision. Otherwise, this column is NULL. This fields
+//!                               is only relevant to be used by ODBC).
+//! >
+//! The returned data should be ordered by data_type and then by type_name.
+//!
+//!
+//! [Arrow FlightSQL Specification]: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1064-L1113
+
+mod value;
+
+use std::sync::Arc;
+
+use arrow::{
+    array::{
+        ArrayRef, BooleanArray, Int32Array, ListArray, ListBuilder, StringArray, StringBuilder,
+    },
+    datatypes::{DataType, Field, Schema, SchemaRef},
+    record_batch::RecordBatch,
+};
+use once_cell::sync::Lazy;
+
+use value::{XdbcTypeInfo, ALL_DATA_TYPES};
+
+/// The schema for GetXdbcTypeInfo
+static GET_XDBC_TYPE_INFO_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
+    Arc::new(Schema::new(vec![
+        Field::new("type_name", DataType::Utf8, false),
+        Field::new("data_type", DataType::Int32, false),
+        Field::new("column_size", DataType::Int32, true),
+        Field::new("literal_prefix", DataType::Utf8, true),
+        Field::new("literal_suffix", DataType::Utf8, true),
+        Field::new(
+            "create_params",
+            DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))),
+            true,
+        ),
+        Field::new("nullable", DataType::Int32, false), // Nullable enum: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1014-L1029
+        Field::new("case_sensitive", DataType::Boolean, false),
+        Field::new("searchable", DataType::Int32, false), // Searchable enum: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1031-L1056
+        Field::new("unsigned_attribute", DataType::Boolean, true),
+        Field::new("fixed_prec_scale", DataType::Boolean, false),
+        Field::new("auto_increment", DataType::Boolean, true),
+        Field::new("local_type_name", DataType::Utf8, true),
+        Field::new("minimum_scale", DataType::Int32, true),
+        Field::new("maximum_scale", DataType::Int32, true),
+        Field::new("sql_data_type", DataType::Int32, false),
+        Field::new("datetime_subcode", DataType::Int32, true), // XdbcDatetimeSubcode value: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L978-L1012
+        Field::new("num_prec_radix", DataType::Int32, true),
+        Field::new("interval_precision", DataType::Int32, true),
+    ]))
+});
+
+pub static TYPE_INFO_RECORD_BATCH: Lazy<RecordBatch> = Lazy::new(|| {
+    let type_names: Vec<&str> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.type_name)
+        .collect();
+    let type_name = Arc::new(StringArray::from(type_names)) as ArrayRef;
+
+    let data_types: Vec<i32> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.data_type.clone() as i32) // case XdbcDataType enum to i32
+        .collect();
+    let data_type = Arc::new(Int32Array::from(data_types)) as ArrayRef;
+
+    let column_sizes: Vec<Option<i32>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.column_size)
+        .collect();
+    let column_size = Arc::new(Int32Array::from(column_sizes)) as ArrayRef;
+
+    let literal_prefixes: Vec<Option<&str>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.literal_prefix as Option<&str>)
+        .collect();
+    let literal_prefix = Arc::new(StringArray::from(literal_prefixes)) as ArrayRef;
+
+    let literal_suffixes: Vec<Option<&str>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.literal_suffix as Option<&str>)
+        .collect();
+    let literal_suffix = Arc::new(StringArray::from(literal_suffixes)) as ArrayRef;
+
+    let mut create_params_builder: ListBuilder<StringBuilder> =
+        ListBuilder::new(StringBuilder::new());
+    ALL_DATA_TYPES.iter().for_each(|entry: &XdbcTypeInfo| {
+        match &entry.create_params {
+            Some(params) => {
+                params
+                    .iter()
+                    .for_each(|value| create_params_builder.values().append_value(value));
+                create_params_builder.append(true);
+            }
+            None => create_params_builder.append(false), // create_params is nullable
+        }
+    });
+    let (field, offsets, values, nulls) = create_params_builder.finish().into_parts();
+    // Re-defined the field to be non-nullable
+    let new_field = Arc::new(field.as_ref().clone().with_nullable(false));
+    let create_params = Arc::new(ListArray::new(new_field, offsets, values, nulls)) as ArrayRef;
+
+    let nullabilities: Vec<i32> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.nullable.clone() as i32) // cast Nullable enum to i32
+        .collect();
+    let nullable = Arc::new(Int32Array::from(nullabilities)) as ArrayRef;
+
+    let case_sensitivities: Vec<bool> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.case_sensitive)
+        .collect();
+    let case_sensitive = Arc::new(BooleanArray::from(case_sensitivities));
+
+    let searchabilities: Vec<i32> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.searchable.clone() as i32) // cast Searchable enum to i32
+        .collect();
+    let searchable = Arc::new(Int32Array::from(searchabilities)) as ArrayRef;
+
+    let unsigned_attributes: Vec<Option<bool>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.unsigned_attribute as Option<bool>)
+        .collect();
+    let unsigned_attribute = Arc::new(BooleanArray::from(unsigned_attributes)) as ArrayRef;
+
+    let fixed_prec_scales: Vec<bool> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.fixed_prec_scale)
+        .collect();
+    let fixed_prec_scale = Arc::new(BooleanArray::from(fixed_prec_scales)) as ArrayRef;
+
+    let auto_increments: Vec<Option<bool>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.auto_increment)
+        .collect();
+    let auto_increment = Arc::new(BooleanArray::from(auto_increments)) as ArrayRef;
+
+    let local_type_names: Vec<Option<&str>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.local_type_name)
+        .collect();
+    let local_type_name = Arc::new(StringArray::from(local_type_names)) as ArrayRef;
+
+    let minimum_scales: Vec<Option<i32>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.minimum_scale)
+        .collect();
+    let minimum_scale = Arc::new(Int32Array::from(minimum_scales)) as ArrayRef;
+
+    let maximum_scales: Vec<Option<i32>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.maximum_scale)
+        .collect();
+    let maximum_scale = Arc::new(Int32Array::from(maximum_scales)) as ArrayRef;
+
+    let sql_data_types: Vec<i32> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.sql_data_type.clone() as i32) // case XdbcDataType enum to i32
+        .collect();
+    let sql_data_type = Arc::new(Int32Array::from(sql_data_types)) as ArrayRef;
+
+    let datetime_subcodes: Vec<Option<i32>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.datetime_subcode)
+        .collect();
+    let datetime_subcode = Arc::new(Int32Array::from(datetime_subcodes)) as ArrayRef;
+
+    let num_prec_radices: Vec<Option<i32>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.num_prec_radix)
+        .collect();
+    let num_prec_radix = Arc::new(Int32Array::from(num_prec_radices)) as ArrayRef;
+
+    let interval_precisions: Vec<Option<i32>> = ALL_DATA_TYPES
+        .iter()
+        .map(|entry: &XdbcTypeInfo| entry.interval_precision)
+        .collect();
+    let interval_precision = Arc::new(Int32Array::from(interval_precisions)) as ArrayRef;
+
+    RecordBatch::try_new(
+        Arc::clone(&GET_XDBC_TYPE_INFO_SCHEMA),
+        vec![
+            type_name,
+            data_type,
+            column_size,
+            literal_prefix,
+            literal_suffix,
+            create_params,
+            nullable,
+            case_sensitive,
+            searchable,
+            unsigned_attribute,
+            fixed_prec_scale,
+            auto_increment,
+            local_type_name,
+            minimum_scale,
+            maximum_scale,
+            sql_data_type,
+            datetime_subcode,
+            num_prec_radix,
+            interval_precision,
+        ],
+    )
+    .unwrap()
+});
diff --git a/flightsql/src/xdbc_type_info/value.rs b/flightsql/src/xdbc_type_info/value.rs
new file mode 100644
index 0000000000..3a5fa4fa60
--- /dev/null
+++ b/flightsql/src/xdbc_type_info/value.rs
@@ -0,0 +1,283 @@
+use once_cell::sync::Lazy;
+
+/// [Arrow FlightSQL Specification]: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L948-L973
+///
+/// Note: Some of the data types are not supported by DataFusion yet:
+/// <https://arrow.apache.org/datafusion/user-guide/sql/data_types.html#unsupported-sql-types>
+#[derive(Debug, Clone)]
+#[allow(dead_code)]
+pub enum XdbcDataType {
+    UnknownType = 0,
+    Char = 1,
+    Numeric = 2,
+    Decimal = 3,
+    Integer = 4,
+    Smallint = 5,
+    Float = 6,
+    Real = 7,
+    Double = 8,
+    Datetime = 9, // Not yet supported by DataFusion
+    Interval = 10,
+    Varchar = 12,
+    Date = 91,
+    Time = 92,
+    Timestamp = 93,
+    Longvarchar = -1,
+    Binary = -2,    // Not yet supported by DataFusion
+    Varbinary = -3, // Not yet supported by DataFusion
+    Longvarbinary = -4,
+    Bigint = -5,
+    Tinyint = -6,
+    Bit = -7,
+    Wchar = -8,
+    Wvarchar = -9, // Not yet supported by DataFusion
+}
+
+/// [Arrow FlightSQL Specification]: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1014-L1029
+#[derive(Debug, Clone)]
+#[allow(dead_code)]
+pub enum Nullability {
+    // Indicates that the fields does not allow the use of null values.
+    NoNulls = 0,
+    // Indicates that the fields allow the use of null values.
+    Nullable = 1,
+    // Indicates that nullability of the fields can not be determined.
+    Unknown = 2,
+}
+
+/// [Arrow FlightSQL Specification]: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1031-L1056
+#[derive(Debug, Clone)]
+#[allow(dead_code)]
+pub enum Searchable {
+    // Indicates that column can not be used in a WHERE clause.
+    None = 0,
+
+    // Indicates that the column can be used in a WHERE clause if it is using a
+    // LIKE operator.
+    Char = 1,
+
+    // Indicates that the column can be used In a WHERE clause with any
+    // operator other than LIKE.
+    // - Allowed operators: comparison, quantified comparison, BETWEEN,
+    //                      DISTINCT, IN, MATCH, and UNIQUE.
+    Basic = 2,
+
+    // Indicates that the column can be used in a WHERE clause using any operator.
+    Full = 3,
+}
+
+/// Detailed subtype information for XDBC_TYPE_DATETIME and XDBC_TYPE_INTERVAL.
+///
+/// [Arrow FlightSQL Specification]: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L978-L1012
+#[allow(dead_code)]
+struct XdbcDatetimeSubcodeType {
+    // option allow_alias = true; // TODO chunchun: what to do with it?
+    unknown: i32,
+    year: i32,
+    date: i32,
+    time: i32,
+    month: i32,
+    timestamp: i32,
+    day: i32,
+    time_with_timezone: i32,
+    hour: i32,
+    timestamp_with_timezone: i32,
+    minute: i32,
+    second: i32,
+    year_to_month: i32,
+    day_to_hour: i32,
+    day_to_minute: i32,
+    day_to_second: i32,
+    hour_to_minute: i32,
+    hour_to_second: i32,
+    minute_to_second: i32,
+    interval_year: i32,
+    interval_month: i32,
+    interval_day: i32,
+    interval_hour: i32,
+    interval_minute: i32,
+    interval_second: i32,
+    interval_year_to_month: i32,
+    interval_day_to_hour: i32,
+    interval_day_to_minute: i32,
+    interval_day_to_second: i32,
+    interval_hour_to_minute: i32,
+    interval_hour_to_second: i32,
+    interval_minute_to_second: i32,
+}
+
+#[allow(dead_code)]
+static XDBC_DATETIME_SUBCODE: Lazy<XdbcDatetimeSubcodeType> =
+    Lazy::new(|| XdbcDatetimeSubcodeType {
+        unknown: 0,
+        year: 1,
+        date: 1,
+        time: 2,
+        month: 2,
+        timestamp: 3,
+        day: 3,
+        time_with_timezone: 4,
+        hour: 4,
+        timestamp_with_timezone: 5,
+        minute: 5,
+        second: 6,
+        year_to_month: 7,
+        day_to_hour: 8,
+        day_to_minute: 9,
+        day_to_second: 10,
+        hour_to_minute: 11,
+        hour_to_second: 12,
+        minute_to_second: 13,
+        interval_year: 101,
+        interval_month: 102,
+        interval_day: 103,
+        interval_hour: 104,
+        interval_minute: 105,
+        interval_second: 106,
+        interval_year_to_month: 107,
+        interval_day_to_hour: 108,
+        interval_day_to_minute: 109,
+        interval_day_to_second: 110,
+        interval_hour_to_minute: 111,
+        interval_hour_to_second: 112,
+        interval_minute_to_second: 113,
+    });
+
+pub struct XdbcTypeInfo {
+    pub type_name: &'static str,
+    pub data_type: XdbcDataType,
+    // column_size: int32 (The maximum size supported by that column.
+    //              In case of exact numeric types, this represents the maximum precision.
+    //              In case of string types, this represents the character length.
+    //              In case of datetime data types, this represents the length in characters of the string representation.
+    //              NULL is returned for data types where column size is not applicable.)
+    pub column_size: Option<i32>,
+    pub literal_prefix: Option<&'static str>,
+    pub literal_suffix: Option<&'static str>,
+    pub create_params: Option<Vec<&'static str>>,
+    pub nullable: Nullability,
+    pub case_sensitive: bool,
+    pub searchable: Searchable,
+    pub unsigned_attribute: Option<bool>,
+    pub fixed_prec_scale: bool,
+    pub auto_increment: Option<bool>,
+    pub local_type_name: Option<&'static str>,
+    pub minimum_scale: Option<i32>,
+    pub maximum_scale: Option<i32>,
+    pub sql_data_type: XdbcDataType,
+    pub datetime_subcode: Option<i32>, // values are from XDBC_DATETIME_SUBCODE
+    pub num_prec_radix: Option<i32>,
+    pub interval_precision: Option<i32>,
+}
+
+/// Data Types supported by DataFusion
+/// <https://arrow.apache.org/datafusion/user-guide/sql/data_types.html>
+pub static ALL_DATA_TYPES: Lazy<Vec<XdbcTypeInfo>> = Lazy::new(|| {
+    vec![
+        XdbcTypeInfo {
+            type_name: "VARCHAR",
+            data_type: XdbcDataType::Varchar,
+            column_size: Some(i32::MAX), // https://github.com/apache/arrow-datafusion/blob/3801d45fe5ea3d9b207488527b758a0264665263/datafusion/core/src/catalog/information_schema.rs#L532
+            literal_prefix: Some("'"),
+            literal_suffix: Some("'"),
+            create_params: Some(vec!["length"]),
+            nullable: Nullability::Nullable,
+            case_sensitive: true,
+            searchable: Searchable::Full,
+            unsigned_attribute: None,
+            fixed_prec_scale: false,
+            auto_increment: None,
+            local_type_name: Some("VARCHAR"),
+            minimum_scale: None,
+            maximum_scale: None,
+            sql_data_type: XdbcDataType::Varchar,
+            datetime_subcode: None,
+            num_prec_radix: None,
+            interval_precision: None,
+        },
+        XdbcTypeInfo {
+            type_name: "INTEGER",
+            data_type: XdbcDataType::Integer,
+            column_size: Some(32), // https://github.com/apache/arrow-datafusion/blob/3801d45fe5ea3d9b207488527b758a0264665263/datafusion/core/src/catalog/information_schema.rs#L563
+            literal_prefix: None,
+            literal_suffix: None,
+            create_params: None,
+            nullable: Nullability::Nullable,
+            case_sensitive: false,
+            searchable: Searchable::Full,
+            unsigned_attribute: Some(false),
+            fixed_prec_scale: false,
+            auto_increment: Some(false),
+            local_type_name: Some("INTEGER"),
+            minimum_scale: None,
+            maximum_scale: None,
+            sql_data_type: XdbcDataType::Integer,
+            datetime_subcode: None,
+            num_prec_radix: Some(2), // https://github.com/apache/arrow-datafusion/blob/3801d45fe5ea3d9b207488527b758a0264665263/datafusion/core/src/catalog/information_schema.rs#L563
+            interval_precision: None,
+        },
+        XdbcTypeInfo {
+            type_name: "FLOAT",
+            data_type: XdbcDataType::Float,
+            column_size: Some(24), // https://github.com/apache/arrow-datafusion/blob/3801d45fe5ea3d9b207488527b758a0264665263/datafusion/core/src/catalog/information_schema.rs#L568
+            literal_prefix: None,
+            literal_suffix: None,
+            create_params: None,
+            nullable: Nullability::Nullable,
+            case_sensitive: false,
+            searchable: Searchable::Full,
+            unsigned_attribute: Some(false),
+            fixed_prec_scale: false,
+            auto_increment: Some(false),
+            local_type_name: Some("FLOAT"),
+            minimum_scale: None,
+            maximum_scale: None,
+            sql_data_type: XdbcDataType::Float,
+            datetime_subcode: None,
+            num_prec_radix: Some(2), // https://github.com/apache/arrow-datafusion/blob/3801d45fe5ea3d9b207488527b758a0264665263/datafusion/core/src/catalog/information_schema.rs#L568
+            interval_precision: None,
+        },
+        XdbcTypeInfo {
+            type_name: "TIMESTAMP",
+            data_type: XdbcDataType::Timestamp,
+            column_size: Some(i32::MAX), // https://github.com/apache/arrow-datafusion/blob/4297547df6dc297d692ca82566cfdf135d4730b5/datafusion/proto/src/generated/prost.rs#L894
+            literal_prefix: Some("'"),
+            literal_suffix: Some("'"),
+            create_params: None,
+            nullable: Nullability::Nullable,
+            case_sensitive: false,
+            searchable: Searchable::Full,
+            unsigned_attribute: None,
+            fixed_prec_scale: false,
+            auto_increment: None,
+            local_type_name: Some("TIMESTAMP"),
+            minimum_scale: None,
+            maximum_scale: None,
+            sql_data_type: XdbcDataType::Timestamp,
+            datetime_subcode: None,
+            num_prec_radix: None,
+            interval_precision: None,
+        },
+        XdbcTypeInfo {
+            type_name: "INTERVAL",
+            data_type: XdbcDataType::Interval,
+            column_size: Some(i32::MAX), // https://github.com/apache/arrow-datafusion/blob/4297547df6dc297d692ca82566cfdf135d4730b5/datafusion/proto/src/generated/prost.rs#L1031-L1038
+            literal_prefix: Some("'"),
+            literal_suffix: Some("'"),
+            create_params: None,
+            nullable: Nullability::Nullable,
+            case_sensitive: false,
+            searchable: Searchable::Full,
+            unsigned_attribute: None,
+            fixed_prec_scale: false,
+            auto_increment: None,
+            local_type_name: Some("INTERVAL"),
+            minimum_scale: None,
+            maximum_scale: None,
+            sql_data_type: XdbcDataType::Interval,
+            datetime_subcode: Some(XDBC_DATETIME_SUBCODE.unknown),
+            num_prec_radix: None,
+            interval_precision: None, // https://github.com/apache/arrow-datafusion/blob/6be75ff2dcc47128b78a695477512ba86c46373f/datafusion/core/src/catalog/information_schema.rs#L581-L582
+        },
+    ]
+});
diff --git a/influxdb_iox/tests/end_to_end_cases/flightsql.rs b/influxdb_iox/tests/end_to_end_cases/flightsql.rs
index f6c81a3a27..5274f4ff28 100644
--- a/influxdb_iox/tests/end_to_end_cases/flightsql.rs
+++ b/influxdb_iox/tests/end_to_end_cases/flightsql.rs
@@ -1149,6 +1149,7 @@ async fn flightsql_get_xdbc_type_info() {
             Step::Custom(Box::new(move |state: &mut StepTestState| {
                 async move {
                     let mut client = flightsql_client(state.cluster());
+                    // TODO chunchun: search by data_type test case
                     let data_type: Option<i32> = None;
 
                     let stream = client.get_xdbc_type_info(data_type).await.unwrap();
@@ -1158,8 +1159,15 @@ async fn flightsql_get_xdbc_type_info() {
                         batches_to_sorted_lines(&batches),
                         @r###"
                     ---
-                    - ++
-                    - ++
+                    - +-----------+-----------+-------------+----------------+----------------+---------------+----------+----------------+------------+--------------------+------------------+----------------+-----------------+---------------+---------------+---------------+------------------+----------------+--------------------+
+                    - "| type_name | data_type | column_size | literal_prefix | literal_suffix | create_params | nullable | case_sensitive | searchable | unsigned_attribute | fixed_prec_scale | auto_increment | local_type_name | minimum_scale | maximum_scale | sql_data_type | datetime_subcode | num_prec_radix | interval_precision |"
+                    - +-----------+-----------+-------------+----------------+----------------+---------------+----------+----------------+------------+--------------------+------------------+----------------+-----------------+---------------+---------------+---------------+------------------+----------------+--------------------+
+                    - "| FLOAT     | 6         | 24          |                |                |               | 1        | false          | 3          | false              | false            | false          | FLOAT           |               |               | 6             |                  | 2              |                    |"
+                    - "| INTEGER   | 4         | 32          |                |                |               | 1        | false          | 3          | false              | false            | false          | INTEGER         |               |               | 4             |                  | 2              |                    |"
+                    - "| INTERVAL  | 10        | 2147483647  | '              | '              |               | 1        | false          | 3          |                    | false            |                | INTERVAL        |               |               | 10            | 0                |                |                    |"
+                    - "| TIMESTAMP | 93        | 2147483647  | '              | '              |               | 1        | false          | 3          |                    | false            |                | TIMESTAMP       |               |               | 93            |                  |                |                    |"
+                    - "| VARCHAR   | 12        | 2147483647  | '              | '              | [length]      | 1        | true           | 3          |                    | false            |                | VARCHAR         |               |               | 12            |                  |                |                    |"
+                    - +-----------+-----------+-------------+----------------+----------------+---------------+----------+----------------+------------+--------------------+------------------+----------------+-----------------+---------------+---------------+---------------+------------------+----------------+--------------------+
                     "###
                     );
                 }
@@ -1446,6 +1454,12 @@ async fn jdbc_tests(jdbc_url: &str, table_name: &str) {
                                 BASE TABLE\n\
                                 VIEW";
 
+    let expected_xdbc_type_info = "**************\n\
+                                        Type Info:\n\
+                                        **************\n\
+                                        TYPE_NAME,  DATA_TYPE,  PRECISION,  LITERAL_PREFIX,  LITERAL_SUFFIX,  CREATE_PARAMS,  NULLABLE,  CASE_SENSITIVE,  SEARCHABLE,  UNSIGNED_ATTRIBUTE,  FIXED_PREC_SCALE,  AUTO_INCREMENT,  LOCAL_TYPE_NAME,  MINIMUM_SCALE,  MAXIMUM_SCALE,  SQL_DATA_TYPE,  SQL_DATETIME_SUB,  NUM_PREC_RADIX\n\
+                                        ------------";
+
     // Validate metadata: jdbc_client <url> metadata
     let mut assert = Command::new(&path)
         .arg(jdbc_url)
@@ -1459,7 +1473,8 @@ async fn jdbc_tests(jdbc_url: &str, table_name: &str) {
         .stdout(predicate::str::contains(expected_schemas))
         .stdout(predicate::str::contains(expected_tables_no_filter))
         .stdout(predicate::str::contains(expected_tables_with_filters))
-        .stdout(predicate::str::contains(expected_table_types));
+        .stdout(predicate::str::contains(expected_table_types))
+        .stdout(predicate::str::contains(expected_xdbc_type_info));
 
     let expected_metadata = EXPECTED_METADATA
         .trim()

From 8b87a10fe09eb4b14eda37d6b3c20162909a9d93 Mon Sep 17 00:00:00 2001
From: Phil Bracikowski <13472206+philjb@users.noreply.github.com>
Date: Thu, 4 May 2023 10:47:28 -0700
Subject: [PATCH 025/119] fix(garbage collector): larger list batches and
 another tunable (#7738)

This PR increases the batch/page size of list operations in the gc 10x
to 10,000; it introduces a new cli config for the sleep interval between
batches. Previously a single sleep config was used between batches and
between entirely new list operations. This PR also moves to debug some
noisy logging.

* tag to #7689
---
 clap_blocks/src/garbage_collector.rs        | 13 ++++++-
 garbage_collector/src/lib.rs                |  1 +
 garbage_collector/src/objectstore/lister.rs | 40 ++++++++++++---------
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/clap_blocks/src/garbage_collector.rs b/clap_blocks/src/garbage_collector.rs
index 569daedf22..95e6aa3a7d 100644
--- a/clap_blocks/src/garbage_collector.rs
+++ b/clap_blocks/src/garbage_collector.rs
@@ -32,7 +32,8 @@ pub struct GarbageCollectorConfig {
     )]
     pub objectstore_concurrent_deletes: usize,
 
-    /// Number of minutes to sleep between iterations of the objectstore deletion loop.
+    /// Number of minutes to sleep between iterations of the objectstore list loop.
+    /// This is the sleep between entirely fresh list operations.
     /// Defaults to 30 minutes.
     #[clap(
         long,
@@ -41,6 +42,16 @@ pub struct GarbageCollectorConfig {
     )]
     pub objectstore_sleep_interval_minutes: u64,
 
+    /// Number of milliseconds to sleep between listing consecutive chunks of objecstore files.
+    /// Object store listing is processed in batches; this is the sleep between batches.
+    /// Defaults to 1000 milliseconds.
+    #[clap(
+        long,
+        default_value_t = 1000,
+        env = "INFLUXDB_IOX_GC_OBJECTSTORE_SLEEP_INTERVAL_BATCH_MILLISECONDS"
+    )]
+    pub objectstore_sleep_interval_batch_milliseconds: u64,
+
     /// Parquet file rows in the catalog flagged for deletion before this duration will be deleted.
     /// Parsed with <https://docs.rs/humantime/latest/humantime/fn.parse_duration.html>
     ///
diff --git a/garbage_collector/src/lib.rs b/garbage_collector/src/lib.rs
index 06a509ece7..8af2771dbc 100644
--- a/garbage_collector/src/lib.rs
+++ b/garbage_collector/src/lib.rs
@@ -105,6 +105,7 @@ impl GarbageCollector {
                     osa,
                     tx1,
                     sub_config.objectstore_sleep_interval_minutes,
+                    sub_config.objectstore_sleep_interval_batch_milliseconds,
                 ) => {
                     ret
                 },
diff --git a/garbage_collector/src/objectstore/lister.rs b/garbage_collector/src/objectstore/lister.rs
index 21b251eb56..0e2cf32aed 100644
--- a/garbage_collector/src/objectstore/lister.rs
+++ b/garbage_collector/src/objectstore/lister.rs
@@ -8,16 +8,17 @@ use tokio::{sync::mpsc, time::sleep};
 
 /// Object store implementations will generally list all objects in the bucket/prefix. This limits
 /// the total items pulled (assuming lazy streams) at a time to limit impact on the catalog.
-/// Consider increasing this if throughput is an issue or shortening the loop sleep interval.
+/// Consider increasing this if throughput is an issue or shortening the loop/list sleep intervals.
 /// Listing will list all files, including those not to be deleted, which may be a very large number.
-const MAX_ITEMS_PROCESSED_PER_LOOP: usize = 1_000;
+const MAX_ITEMS_PROCESSED_PER_LOOP: usize = 10_000;
 
-/// perform a object store list, limiting to 1000 files per loop iteration, waiting sleep interval
-/// per loop.
+/// perform a object store list, limiting to ['MAX_ITEMS_PROCESSED_PER_LOOP'] files at a time,
+/// waiting sleep interval before listing afresh.
 pub(crate) async fn perform(
     object_store: Arc<DynObjectStore>,
     checker: mpsc::Sender<ObjectMeta>,
-    sleep_interval_minutes: u64,
+    sleep_interval_iteration_minutes: u64,
+    sleep_interval_list_page_milliseconds: u64,
 ) -> Result<()> {
     info!("beginning object store listing");
 
@@ -33,27 +34,32 @@ pub(crate) async fn perform(
 
         let mut chunked_items = items.chunks(MAX_ITEMS_PROCESSED_PER_LOOP);
 
+        let mut count = 0;
         while let Some(v) = chunked_items.next().await {
             // relist and sleep on an error to allow time for transient errors to dissipate
             // todo(pjb): react differently to different errors
-            if let Err(e) = process_item_list(v, &checker).await {
-                warn!("error processing items from object store, continuing: {e}");
-                // go back to start of loop to list again, hopefully to get past error.
-                break;
+            match process_item_list(v, &checker).await {
+                Err(e) => {
+                    warn!("error processing items from object store, continuing: {e}");
+                    // go back to start of loop to list again, hopefully to get past error.
+                    break;
+                }
+                Ok(i) => {
+                    count += i;
+                }
             }
-            sleep(Duration::from_secs(60 * sleep_interval_minutes)).await;
-            info!("starting next chunk of listed files");
-            // next chunk
+            sleep(Duration::from_millis(sleep_interval_list_page_milliseconds)).await;
+            debug!("starting next chunk of listed files");
         }
-        info!("end of object store item list: will relist in {sleep_interval_minutes} minutes");
-        sleep(Duration::from_secs(60 * sleep_interval_minutes)).await;
+        info!("end of object store item list; listed {count} files: will relist in {sleep_interval_iteration_minutes} minutes");
+        sleep(Duration::from_secs(60 * sleep_interval_iteration_minutes)).await;
     }
 }
 
 async fn process_item_list(
     items: Vec<object_store::Result<ObjectMeta>>,
     checker: &mpsc::Sender<ObjectMeta>,
-) -> Result<()> {
+) -> Result<i32> {
     let mut i = 0;
     for item in items {
         let item = item.context(MalformedSnafu)?;
@@ -61,8 +67,8 @@ async fn process_item_list(
         checker.send(item).await?;
         i += 1;
     }
-    info!("processed {i} files of listed chunk");
-    Ok(())
+    debug!("processed {i} files of listed chunk");
+    Ok(i)
 }
 
 #[derive(Debug, Snafu)]

From 187ad74435441f612f0b5f7251c158424c74e368 Mon Sep 17 00:00:00 2001
From: Joe-Blount <73478756+Joe-Blount@users.noreply.github.com>
Date: Thu, 4 May 2023 15:33:58 -0500
Subject: [PATCH 026/119] chore: add compactor logging detail (#7748)

---
 compactor2/src/driver.rs  | 5 ++++-
 compactor2/src/plan_ir.rs | 9 +++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/compactor2/src/driver.rs b/compactor2/src/driver.rs
index 516778ff74..23e71b54f7 100644
--- a/compactor2/src/driver.rs
+++ b/compactor2/src/driver.rs
@@ -448,7 +448,10 @@ async fn execute_plan(
             .expect("semaphore not closed");
         info!(
             partition_id = partition_info.partition_id.get(),
-            permits, "job semaphore acquired",
+            column_count = partition_info.column_count(),
+            input_files = plan_ir.n_input_files(),
+            permits,
+            "job semaphore acquired",
         );
 
         let plan = components
diff --git a/compactor2/src/plan_ir.rs b/compactor2/src/plan_ir.rs
index 61bc1e9f93..85108a28eb 100644
--- a/compactor2/src/plan_ir.rs
+++ b/compactor2/src/plan_ir.rs
@@ -60,6 +60,15 @@ impl PlanIR {
         }
     }
 
+    /// return the number of input files that will be compacted together
+    pub fn n_input_files(&self) -> usize {
+        match self {
+            Self::Compact { files, .. } => files.len(),
+            Self::Split { files, .. } => files.len(),
+            Self::None { .. } => 0,
+        }
+    }
+
     /// return the input files that will be compacted together
     pub fn input_files(&self) -> &[FileIR] {
         match self {

From 55b35367aca10928b98ce5ee6c375532b647011d Mon Sep 17 00:00:00 2001
From: "Christopher M. Wolff" <chris.wolff@influxdata.com>
Date: Thu, 4 May 2023 14:01:45 -0700
Subject: [PATCH 027/119] test: add test for gap fill query missing time bounds
 (#7747)

* test: add test for gap fill query missing time bounds

* chore: update unit test

---------

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 .../tests/query_tests2/cases/in/gapfill.sql   | 27 +++++++++
 .../cases/in/gapfill.sql.expected             | 12 ++++
 .../src/logical_optimizer/handle_gapfill.rs   | 12 ++--
 .../src/snapshot_comparison.rs                | 56 +++++++++----------
 4 files changed, 72 insertions(+), 35 deletions(-)

diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql
index 404edc97fe..8239051102 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql
+++ b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql
@@ -27,6 +27,33 @@ from cpu
 where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
 group by minute;
 
+-- Missing time bounds
+-- Expect to fail because missing both time bounds
+SELECT
+  region,
+  date_bin_gapfill('10 minute', time) as minute,
+  locf(avg(cpu.user))
+from cpu
+group by region, minute;
+
+-- Expect to fail because missing upper time bound
+SELECT
+  region,
+  date_bin_gapfill('10 minute', time) as minute,
+  locf(avg(cpu.user))
+from cpu
+where time >= timestamp '2000-05-05T12:00:00Z'
+group by region, minute;
+
+-- Expect to fail because missing lower time bound
+SELECT
+  region,
+  date_bin_gapfill('10 minute', time) as minute,
+  locf(avg(cpu.user))
+from cpu
+where time < timestamp '2000-05-05T13:00:00Z'
+group by region, minute;
+
 -- Gap filling with no other group keys and no aggregates
 SELECT
   date_bin_gapfill(interval '10 minute', time) as minute
diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
index 11c67bb74c..c2e80ba884 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
@@ -51,6 +51,18 @@
 | 2000-05-05T12:40:00Z | 1               |
 | 2000-05-05T12:50:00Z |                 |
 +----------------------+-----------------+
+-- SQL: SELECT region, date_bin_gapfill('10 minute', time) as minute, locf(avg(cpu.user)) from cpu group by region, minute;
+Error while planning query: Optimizer rule 'handle_gap_fill' failed
+caused by
+Error during planning: gap-filling query is missing both upper and lower time bounds
+-- SQL: SELECT region, date_bin_gapfill('10 minute', time) as minute, locf(avg(cpu.user)) from cpu where time >= timestamp '2000-05-05T12:00:00Z' group by region, minute;
+Error while planning query: Optimizer rule 'handle_gap_fill' failed
+caused by
+Error during planning: gap-filling query is missing upper time bound
+-- SQL: SELECT region, date_bin_gapfill('10 minute', time) as minute, locf(avg(cpu.user)) from cpu where time < timestamp '2000-05-05T13:00:00Z' group by region, minute;
+Error while planning query: Optimizer rule 'handle_gap_fill' failed
+caused by
+Error during planning: gap-filling query is missing lower time bound
 -- SQL: SELECT date_bin_gapfill(interval '10 minute', time) as minute from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by minute;
 +----------------------+
 | minute               |
diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs
index 6c7526e5e1..ebca4636d2 100644
--- a/iox_query/src/logical_optimizer/handle_gapfill.rs
+++ b/iox_query/src/logical_optimizer/handle_gapfill.rs
@@ -233,14 +233,14 @@ fn validate_time_range(range: &Range<Bound<Expr>>) -> Result<()> {
     let (start, end) = match (start, end) {
         (Bound::Unbounded, Bound::Unbounded) => {
             return Err(DataFusionError::Plan(
-                "no time bounds found for gap fill query".to_string(),
+                "gap-filling query is missing both upper and lower time bounds".to_string(),
             ))
         }
         (Bound::Unbounded, _) => Err(DataFusionError::Plan(
-            "no lower time bound found for gap fill query".to_string(),
+            "gap-filling query is missing lower time bound".to_string(),
         )),
         (_, Bound::Unbounded) => Err(DataFusionError::Plan(
-            "no upper time bound found for gap fill query".to_string(),
+            "gap-filling query is missing upper time bound".to_string(),
         )),
         (
             Bound::Included(start) | Bound::Excluded(start),
@@ -788,15 +788,15 @@ mod test {
         let cases = vec![
             (
                 lit(true),
-                "Error during planning: no time bounds found for gap fill query",
+                "Error during planning: gap-filling query is missing both upper and lower time bounds",
             ),
             (
                 col("time").gt_eq(lit_timestamp_nano(1000)),
-                "Error during planning: no upper time bound found for gap fill query",
+                "Error during planning: gap-filling query is missing upper time bound",
             ),
             (
                 col("time").lt(lit_timestamp_nano(2000)),
-                "Error during planning: no lower time bound found for gap fill query",
+                "Error during planning: gap-filling query is missing lower time bound",
             ),
             (
                 col("time").gt_eq(col("time2")).and(
diff --git a/test_helpers_end_to_end/src/snapshot_comparison.rs b/test_helpers_end_to_end/src/snapshot_comparison.rs
index db6210646f..b117c42c82 100644
--- a/test_helpers_end_to_end/src/snapshot_comparison.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison.rs
@@ -1,6 +1,8 @@
 mod queries;
 
-use crate::{run_sql, snapshot_comparison::queries::TestQueries, try_run_influxql, MiniCluster};
+use crate::{
+    snapshot_comparison::queries::TestQueries, try_run_influxql, try_run_sql, MiniCluster,
+};
 use arrow::record_batch::RecordBatch;
 use arrow_flight::error::FlightError;
 use arrow_util::test_util::{sort_record_batch, Normalizer, REGEX_UUID};
@@ -297,42 +299,38 @@ fn make_absolute(path: &Path) -> PathBuf {
 
 async fn run_query(cluster: &MiniCluster, query: &Query) -> Result<Vec<String>> {
     let (query_text, language) = (query.text(), query.language());
-
-    let batches = match language {
+    let result = match language {
         Language::Sql => {
-            let (mut batches, schema) = run_sql(
-                query_text,
-                cluster.namespace(),
-                cluster.querier().querier_grpc_connection(),
-                None,
-            )
-            .await;
-            batches.push(RecordBatch::new_empty(schema));
-
-            batches
-        }
-        Language::InfluxQL => {
-            match try_run_influxql(
+            try_run_sql(
                 query_text,
                 cluster.namespace(),
                 cluster.querier().querier_grpc_connection(),
                 None,
             )
             .await
-            {
-                Ok((mut batches, schema)) => {
-                    batches.push(RecordBatch::new_empty(schema));
-
-                    batches
-                }
-                Err(influxdb_iox_client::flight::Error::ArrowFlightError(FlightError::Tonic(
-                    status,
-                ))) if status.code() == Code::InvalidArgument => {
-                    return Ok(vec![status.message().to_owned()])
-                }
-                Err(err) => return Ok(vec![err.to_string()]),
-            }
         }
+        Language::InfluxQL => {
+            try_run_influxql(
+                query_text,
+                cluster.namespace(),
+                cluster.querier().querier_grpc_connection(),
+                None,
+            )
+            .await
+        }
+    };
+
+    let batches = match result {
+        Ok((mut batches, schema)) => {
+            batches.push(RecordBatch::new_empty(schema));
+            batches
+        }
+        Err(influxdb_iox_client::flight::Error::ArrowFlightError(FlightError::Tonic(status)))
+            if status.code() == Code::InvalidArgument =>
+        {
+            return Ok(status.message().lines().map(str::to_string).collect())
+        }
+        Err(e) => panic!("error running query: {e}"),
     };
 
     Ok(query.normalize_results(batches, language))

From 7e47d48d01ba1d353867c65089e48b7b41ada5ac Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 07:48:20 +1000
Subject: [PATCH 028/119] chore: SLIMIT and SOFFSET are not implemented

---
 iox_query_influxql/src/plan/planner.rs  | 27 +-----------------
 iox_query_influxql/src/plan/rewriter.rs | 37 ++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index f815aac4f3..443e135cfd 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -43,9 +43,7 @@ use influxdb_influxql_parser::expression::{
 use influxdb_influxql_parser::functions::{
     is_aggregate_function, is_now_function, is_scalar_math_function,
 };
-use influxdb_influxql_parser::select::{
-    FillClause, GroupByClause, SLimitClause, SOffsetClause, TimeZoneClause,
-};
+use influxdb_influxql_parser::select::{FillClause, GroupByClause, TimeZoneClause};
 use influxdb_influxql_parser::show_field_keys::ShowFieldKeysStatement;
 use influxdb_influxql_parser::show_measurements::{
     ShowMeasurementsStatement, WithMeasurementClause,
@@ -451,8 +449,6 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             &projection_tag_set,
         )?;
 
-        let plan = self.slimit(plan, select.series_offset, select.series_limit)?;
-
         Ok(plan)
     }
 
@@ -864,27 +860,6 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         }
     }
 
-    /// Verifies the `SLIMIT` and `SOFFSET` clauses are `None`; otherwise, return a
-    /// `NotImplemented` error.
-    ///
-    /// ## Why?
-    /// * `SLIMIT` and `SOFFSET` don't work as expected per issue [#7571]
-    /// * This issue [is noted](https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#the-slimit-clause) in our official documentation
-    ///
-    /// [#7571]: https://github.com/influxdata/influxdb/issues/7571
-    fn slimit(
-        &self,
-        input: LogicalPlan,
-        offset: Option<SOffsetClause>,
-        limit: Option<SLimitClause>,
-    ) -> Result<LogicalPlan> {
-        if offset.is_none() && limit.is_none() {
-            return Ok(input);
-        }
-
-        error::not_implemented("SLIMIT or SOFFSET")
-    }
-
     /// Map the InfluxQL `SELECT` projection list into a list of DataFusion expressions.
     fn field_list_to_exprs(
         &self,
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 83667609df..c23ac6a9aa 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -35,12 +35,15 @@ pub(super) fn rewrite_statement(
 }
 
 /// Map a `SelectStatement` to a `Select`, which is an intermediate representation to be
-/// used by the InfluxQL planner.
+/// used by the InfluxQL planner. Mapping also expands any wildcards in the `FROM` and
+/// projection clauses.
 ///
 /// # NOTE
 ///
 /// The goal is that `Select` will eventually be used by the InfluxQL planner.
 pub(super) fn map_select(s: &dyn SchemaProvider, stmt: &SelectStatement) -> Result<Select> {
+    check_features(stmt)?;
+
     let mut sel = Select {
         fields: vec![],
         from: vec![],
@@ -58,6 +61,24 @@ pub(super) fn map_select(s: &dyn SchemaProvider, stmt: &SelectStatement) -> Resu
     Ok(sel)
 }
 
+/// Asserts that the `SELECT` statement does not use any unimplemented features.
+///
+/// The list of unimplemented or unsupported features are listed below.
+///
+/// # `SLIMIT` and `SOFFSET`
+///
+/// * `SLIMIT` and `SOFFSET` don't work as expected per issue [#7571]
+/// * This issue [is noted](https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#the-slimit-clause) in our official documentation
+///
+/// [#7571]: https://github.com/influxdata/influxdb/issues/7571
+fn check_features(stmt: &SelectStatement) -> Result<()> {
+    if stmt.series_limit.is_some() || stmt.series_offset.is_some() {
+        return error::not_implemented("SLIMIT or SOFFSET");
+    }
+
+    Ok(())
+}
+
 /// Ensure the time field is added to all projections,
 /// and is moved to the first position, which is a requirement
 /// for InfluxQL compatibility.
@@ -1955,6 +1976,20 @@ mod test {
             err.to_string(),
             "Error during planning: unable to use tag as wildcard in count()"
         );
+
+        let stmt = parse_select("SELECT usage_idle FROM cpu SLIMIT 1");
+        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "This feature is not implemented: SLIMIT or SOFFSET"
+        );
+
+        let stmt = parse_select("SELECT usage_idle FROM cpu SOFFSET 1");
+        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "This feature is not implemented: SLIMIT or SOFFSET"
+        );
     }
 
     #[test]

From af726e092ffee2213081b26cd2b3d5533b944c28 Mon Sep 17 00:00:00 2001
From: Joe-Blount <73478756+Joe-Blount@users.noreply.github.com>
Date: Thu, 4 May 2023 17:10:48 -0500
Subject: [PATCH 029/119] chore: add plan_id to compactor logging (#7749)

---
 compactor2/src/driver.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/compactor2/src/driver.rs b/compactor2/src/driver.rs
index 23e71b54f7..fb448a695c 100644
--- a/compactor2/src/driver.rs
+++ b/compactor2/src/driver.rs
@@ -426,6 +426,9 @@ async fn execute_plan(
         // Adjust concurrency based on the column count in the partition.
         let permits = compute_permits(df_semaphore.total_permits(), partition_info.column_count());
 
+        // use the address of the plan as a uniq identifier so logs can be matched despite the concurrency.
+        let plan_id = format!("{:p}", &plan_ir);
+
         info!(
             partition_id = partition_info.partition_id.get(),
             jobs_running = df_semaphore.holders_acquired(),
@@ -433,6 +436,7 @@ async fn execute_plan(
             permits_needed = permits,
             permits_acquired = df_semaphore.permits_acquired(),
             permits_pending = df_semaphore.permits_pending(),
+            plan_id,
             "requesting job semaphore",
         );
 
@@ -451,6 +455,7 @@ async fn execute_plan(
             column_count = partition_info.column_count(),
             input_files = plan_ir.n_input_files(),
             permits,
+            plan_id,
             "job semaphore acquired",
         );
 
@@ -472,7 +477,7 @@ async fn execute_plan(
         drop(permit);
         info!(
             partition_id = partition_info.partition_id.get(),
-            "job semaphore released",
+            plan_id, "job semaphore released",
         );
 
         res?

From f8b1d37d5a02643651571a02eb8f4a43e56f780f Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 10:11:37 +1000
Subject: [PATCH 030/119] fix: Consistent alias names with InfluxQL OG

Fixes #7750
---
 .../cases/in/issue_6112.influxql.expected     | 110 +++++++++---------
 iox_query_influxql/src/plan/field.rs          |   4 +-
 iox_query_influxql/src/plan/planner.rs        |  30 ++---
 3 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
index 5430dbdd58..b8310946e3 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
@@ -1003,11 +1003,11 @@ tags: non_existent=
 +---------------------+-------+--------------------+--------------------+
 -- InfluxQL: SELECT COUNT(f64), COUNT(f64) + COUNT(f64), COUNT(f64) * 3 FROM m0;
 name: m0
-+---------------------+-------+---------------------+-----------+
-| time                | count | count_f64_count_f64 | count_f64 |
-+---------------------+-------+---------------------+-----------+
-| 1970-01-01T00:00:00 | 7     | 14                  | 21        |
-+---------------------+-------+---------------------+-----------+
++---------------------+-------+-------------+---------+
+| time                | count | count_count | count_1 |
++---------------------+-------+-------------+---------+
+| 1970-01-01T00:00:00 | 7     | 14          | 21      |
++---------------------+-------+-------------+---------+
 -- InfluxQL: SELECT COUNT(f64) as the_count, SUM(non_existent) as foo FROM m0;
 name: m0
 +---------------------+-----------+-----+
@@ -1052,31 +1052,31 @@ name: disk
 +---------------------+------+--------+
 -- InfluxQL: SELECT MEAN(usage_idle) + MEAN(bytes_free) FROM cpu, disk;
 name: cpu
-+---------------------+---------------------------------+
-| time                | mean_usage_idle_mean_bytes_free |
-+---------------------+---------------------------------+
-| 1970-01-01T00:00:00 |                                 |
-+---------------------+---------------------------------+
++---------------------+-----------+
+| time                | mean_mean |
++---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
++---------------------+-----------+
 name: disk
-+---------------------+---------------------------------+
-| time                | mean_usage_idle_mean_bytes_free |
-+---------------------+---------------------------------+
-| 1970-01-01T00:00:00 |                                 |
-+---------------------+---------------------------------+
++---------------------+-----------+
+| time                | mean_mean |
++---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
++---------------------+-----------+
 -- InfluxQL: SELECT MEAN(usage_idle) + MEAN(foo) FROM cpu;
 name: cpu
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
++---------------------+-----------+
+| time                | mean_mean |
++---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
++---------------------+-----------+
 -- InfluxQL: SELECT MEAN(usage_idle), MEAN(usage_idle) + MEAN(foo) FROM cpu;
 name: cpu
-+---------------------+--------------------+--------------------------+
-| time                | mean               | mean_usage_idle_mean_foo |
-+---------------------+--------------------+--------------------------+
-| 1970-01-01T00:00:00 | 1.9850000000000003 |                          |
-+---------------------+--------------------+--------------------------+
++---------------------+--------------------+-----------+
+| time                | mean               | mean_mean |
++---------------------+--------------------+-----------+
+| 1970-01-01T00:00:00 | 1.9850000000000003 |           |
++---------------------+--------------------+-----------+
 -- InfluxQL: SELECT MEAN(foo) FROM cpu;
 ++
 ++
@@ -1084,47 +1084,47 @@ name: cpu
 -- InfluxQL: SELECT MEAN(usage_idle) + MEAN(foo) FROM cpu GROUP BY cpu;
 name: cpu
 tags: cpu=cpu-total
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
++---------------------+-----------+
+| time                | mean_mean |
++---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
++---------------------+-----------+
 name: cpu
 tags: cpu=cpu0
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
++---------------------+-----------+
+| time                | mean_mean |
++---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
++---------------------+-----------+
 name: cpu
 tags: cpu=cpu1
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
++---------------------+-----------+
+| time                | mean_mean |
++---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
++---------------------+-----------+
 -- InfluxQL: SELECT MEAN(usage_idle), MEAN(usage_idle) + MEAN(foo) FROM cpu GROUP BY cpu;
 name: cpu
 tags: cpu=cpu-total
-+---------------------+--------------------+--------------------------+
-| time                | mean               | mean_usage_idle_mean_foo |
-+---------------------+--------------------+--------------------------+
-| 1970-01-01T00:00:00 | 2.9850000000000003 |                          |
-+---------------------+--------------------+--------------------------+
++---------------------+--------------------+-----------+
+| time                | mean               | mean_mean |
++---------------------+--------------------+-----------+
+| 1970-01-01T00:00:00 | 2.9850000000000003 |           |
++---------------------+--------------------+-----------+
 name: cpu
 tags: cpu=cpu0
-+---------------------+-------+--------------------------+
-| time                | mean  | mean_usage_idle_mean_foo |
-+---------------------+-------+--------------------------+
-| 1970-01-01T00:00:00 | 0.985 |                          |
-+---------------------+-------+--------------------------+
++---------------------+-------+-----------+
+| time                | mean  | mean_mean |
++---------------------+-------+-----------+
+| 1970-01-01T00:00:00 | 0.985 |           |
++---------------------+-------+-----------+
 name: cpu
 tags: cpu=cpu1
-+---------------------+--------------------+--------------------------+
-| time                | mean               | mean_usage_idle_mean_foo |
-+---------------------+--------------------+--------------------------+
-| 1970-01-01T00:00:00 | 1.9849999999999999 |                          |
-+---------------------+--------------------+--------------------------+
++---------------------+--------------------+-----------+
+| time                | mean               | mean_mean |
++---------------------+--------------------+-----------+
+| 1970-01-01T00:00:00 | 1.9849999999999999 |           |
++---------------------+--------------------+-----------+
 -- InfluxQL: SELECT MEAN(foo) FROM cpu GROUP BY cpu;
 ++
 ++
diff --git a/iox_query_influxql/src/plan/field.rs b/iox_query_influxql/src/plan/field.rs
index c7cad73ccc..158636a550 100644
--- a/iox_query_influxql/src/plan/field.rs
+++ b/iox_query_influxql/src/plan/field.rs
@@ -66,7 +66,7 @@ impl<'a> Visitor for BinaryExprNameVisitor<'a> {
 
     fn pre_visit_call(self, n: &Call) -> Result<Recursion<Self>, Self::Error> {
         self.0.push(n.name.clone());
-        Ok(Recursion::Continue(self))
+        Ok(Recursion::Stop(self))
     }
 }
 
@@ -102,7 +102,7 @@ mod test {
         assert_eq!(field_name(&f), "count");
 
         let f = get_first_field("SELECT COUNT(usage) + SUM(usage_idle) FROM cpu");
-        assert_eq!(field_name(&f), "count_usage_sum_usage_idle");
+        assert_eq!(field_name(&f), "count_sum");
 
         let f = get_first_field("SELECT 1+2 FROM cpu");
         assert_eq!(field_name(&f), "");
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index 443e135cfd..11ec8fae99 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -3239,11 +3239,11 @@ mod test {
 
                 // The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection
                 assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###"
-            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field_count_f64_field:Int64;N, count_f64_field:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_f64_field_count_f64_field, COUNT(data.f64_field) * Int64(3) AS count_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field_count_f64_field:Int64;N, count_f64_field:Int64;N]
-                Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
-                  TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
-            "###);
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_count, COUNT(data.f64_field) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
+                    Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
+                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                "###);
 
                 // non-existent tags are excluded from the Aggregate groupBy and Sort operators
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###"
@@ -3255,31 +3255,31 @@ mod test {
 
                 // Aggregate expression is projected once and reused in final projection
                 assert_snapshot!(plan("SELECT COUNT(f64_field),  COUNT(f64_field) * 2 FROM data"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field:Int64;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
                     Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                       TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
 
                 // Aggregate expression selecting non-existent field
                 assert_snapshot!(plan("SELECT MEAN(f64_field) + MEAN(non_existent) FROM data"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_f64_field_mean_non_existent:Null;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS mean_f64_field_mean_non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_f64_field_mean_non_existent:Null;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_mean:Null;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS mean_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_mean:Null;N]
                     EmptyRelation []
                 "###);
 
                 // Aggregate expression with GROUP BY and non-existent field
                 assert_snapshot!(plan("SELECT MEAN(f64_field) + MEAN(non_existent) FROM data GROUP BY foo"), @r###"
-                Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_f64_field_mean_non_existent:Null;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS mean_f64_field_mean_non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_f64_field_mean_non_existent:Null;N]
+                Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_mean:Null;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS mean_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_mean:Null;N]
                     Aggregate: groupBy=[[data.foo]], aggr=[[]] [foo:Dictionary(Int32, Utf8);N]
                       TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
 
                 // Aggregate expression selecting tag, should treat as non-existent
                 assert_snapshot!(plan("SELECT MEAN(f64_field), MEAN(f64_field) + MEAN(non_existent) FROM data"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_f64_field_mean_non_existent:Null;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, AVG(data.f64_field) AS mean, NULL AS mean_f64_field_mean_non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_f64_field_mean_non_existent:Null;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_mean:Null;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, AVG(data.f64_field) AS mean, NULL AS mean_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_mean:Null;N]
                     Aggregate: groupBy=[[]], aggr=[[AVG(data.f64_field)]] [AVG(data.f64_field):Float64;N]
                       TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
@@ -3396,8 +3396,8 @@ mod test {
 
                 // Aggregates as part of a binary expression
                 assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_f64_field_mean_f64_field:Float64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_f64_field_mean_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_f64_field_mean_f64_field:Float64;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
                     GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
                       Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
                         TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]

From 8b60a95a776d5fd8427015f1cb1d2978a1348756 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 10:12:02 +1000
Subject: [PATCH 031/119] chore: Unnecessary use of `borrow`

---
 iox_query_influxql/src/plan/rewriter.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index c23ac6a9aa..90e2a4089a 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -16,7 +16,6 @@ use influxdb_influxql_parser::select::{
     Dimension, Field, FromMeasurementClause, GroupByClause, MeasurementSelection, SelectStatement,
 };
 use itertools::Itertools;
-use std::borrow::Borrow;
 use std::collections::{HashMap, HashSet};
 use std::ops::{ControlFlow, Deref};
 
@@ -355,7 +354,7 @@ fn field_list_expand_wildcards(
     if let ControlFlow::Break(e) = sel.fields.iter_mut().try_for_each(|f| {
         walk_expr_mut::<DataFusionError>(&mut f.expr, &mut |e| {
             if matches!(e, Expr::VarRef(_)) {
-                let new_type = match evaluate_type(s, e.borrow(), &sel.from) {
+                let new_type = match evaluate_type(s, e, &sel.from) {
                     Err(e) => ControlFlow::Break(e)?,
                     Ok(v) => v,
                 };

From 7b19bf014bb662c3ebf61cefe256dcce30ebd0a6 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 10:12:17 +1000
Subject: [PATCH 032/119] chore: Reorganise `rewrite_statement` unit tests

---
 iox_query_influxql/src/plan/rewriter.rs | 624 +++++++++++++-----------
 1 file changed, 331 insertions(+), 293 deletions(-)

diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 90e2a4089a..ea4f976b5a 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1644,351 +1644,389 @@ mod test {
         assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "expected field argument in sum(), got Literal(Integer(1))");
     }
 
-    #[test]
-    fn test_rewrite_statement() {
-        let namespace = MockSchemaProvider::default();
-        // Exact, match
-        let stmt = parse_select("SELECT usage_user FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_user::float AS usage_user FROM cpu"
-        );
+    mod rewrite_statement {
+        use super::*;
 
-        // Duplicate columns do not have conflicting aliases
-        let stmt = parse_select("SELECT usage_user, usage_user FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu"
-        );
+        /// Validating types for simple projections
+        #[test]
+        fn projection_simple() {
+            let namespace = MockSchemaProvider::default();
+            // Exact, match
+            let stmt = parse_select("SELECT usage_user FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_user::float AS usage_user FROM cpu"
+            );
 
-        // Multiple aliases with no conflicts
-        let stmt = parse_select("SELECT usage_user as usage_user_1, usage_user FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu"
-        );
+            // Duplicate columns do not have conflicting aliases
+            let stmt = parse_select("SELECT usage_user, usage_user FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu"
+            );
 
-        // Multiple aliases with conflicts
-        let stmt =
-            parse_select("SELECT usage_user as usage_user_1, usage_user, usage_user, usage_user as usage_user_2, usage_user, usage_user_2 FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(stmt.to_string(), "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu");
+            // Multiple aliases with no conflicts
+            let stmt = parse_select("SELECT usage_user as usage_user_1, usage_user FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu"
+            );
 
-        // Only include measurements with at least one field projection
-        let stmt = parse_select("SELECT usage_idle FROM cpu, disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
-        );
+            // Multiple aliases with conflicts
+            let stmt =
+                parse_select("SELECT usage_user as usage_user_1, usage_user, usage_user, usage_user as usage_user_2, usage_user, usage_user_2 FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(stmt.to_string(), "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu");
 
-        // Rewriting FROM clause
+            // Only include measurements with at least one field projection
+            let stmt = parse_select("SELECT usage_idle FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
+            );
+        }
 
-        // Regex, match, fields from multiple measurements
-        let stmt = parse_select("SELECT bytes_free, bytes_read FROM /d/");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio"
-        );
+        /// Validate the expansion of the `FROM` clause using regular expressions
+        #[test]
+        fn from_expand_wildcards() {
+            let namespace = MockSchemaProvider::default();
 
-        // Regex matches multiple measurement, but only one has a matching field
-        let stmt = parse_select("SELECT bytes_free FROM /d/");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM disk"
-        );
+            // Regex, match, fields from multiple measurements
+            let stmt = parse_select("SELECT bytes_free, bytes_read FROM /d/");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio"
+            );
 
-        // Exact, no match
-        let stmt = parse_select("SELECT usage_idle FROM foo");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Regex matches multiple measurement, but only one has a matching field
+            let stmt = parse_select("SELECT bytes_free FROM /d/");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM disk"
+            );
 
-        // Regex, no match
-        let stmt = parse_select("SELECT bytes_free FROM /^d$/");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Exact, no match
+            let stmt = parse_select("SELECT usage_idle FROM foo");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
 
-        // Rewriting projection list
+            // Regex, no match
+            let stmt = parse_select("SELECT bytes_free FROM /^d$/");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
+        }
 
-        // Single wildcard, single measurement
-        let stmt = parse_select("SELECT * FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
-        );
+        /// Expanding the projection using wildcards
+        #[test]
+        fn projection_expand_wildcards() {
+            let namespace = MockSchemaProvider::default();
 
-        let stmt = parse_select("SELECT * FROM cpu, disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
-        );
+            // Single wildcard, single measurement
+            let stmt = parse_select("SELECT * FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
+            );
 
-        // Regular expression selects fields from multiple measurements
-        let stmt = parse_select("SELECT /usage|bytes/ FROM cpu, disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
-        );
+            let stmt = parse_select("SELECT * FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
+            );
 
-        // Selective wildcard for tags
-        let stmt = parse_select("SELECT *::tag, usage_idle FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu"
-        );
+            // Regular expression selects fields from multiple measurements
+            let stmt = parse_select("SELECT /usage|bytes/ FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
+            );
 
-        // Selective wildcard for tags only should not select any measurements
-        let stmt = parse_select("SELECT *::tag FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Selective wildcard for tags
+            let stmt = parse_select("SELECT *::tag, usage_idle FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu"
+            );
 
-        // Selective wildcard for fields
-        let stmt = parse_select("SELECT *::field FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
-        );
+            // Selective wildcard for tags only should not select any measurements
+            let stmt = parse_select("SELECT *::tag FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
 
-        // Mixed fields and wildcards
-        let stmt = parse_select("SELECT usage_idle, *::tag FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu"
-        );
+            // Selective wildcard for fields
+            let stmt = parse_select("SELECT *::field FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
+            );
 
-        // GROUP BY expansion
+            // Mixed fields and wildcards
+            let stmt = parse_select("SELECT usage_idle, *::tag FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu"
+            );
 
-        let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY host");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY host"
-        );
+            let stmt = parse_select("SELECT * FROM merge_00, merge_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01"
+            );
 
-        let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY *");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region"
-        );
+            // This should only select merge_01, as col0 is a tag in merge_00
+            let stmt = parse_select("SELECT /col0/ FROM merge_00, merge_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1 FROM merge_01"
+            );
+        }
 
-        // Does not include tags in projection when expanded in GROUP BY
-        let stmt = parse_select("SELECT * FROM cpu GROUP BY *");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
-        );
+        #[test]
+        fn group_by() {
+            let namespace = MockSchemaProvider::default();
 
-        // Does include explicitly listed tags in projection
-        let stmt = parse_select("SELECT host, * FROM cpu GROUP BY *");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
-        );
+            let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY host");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY host"
+            );
 
-        // Fallible
+            let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY *");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region"
+            );
 
-        // Invalid regex
-        let stmt = parse_select("SELECT usage_idle FROM /(not/");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_contains!(err.to_string(), "invalid regular expression");
+            // Does not include tags in projection when expanded in GROUP BY
+            let stmt = parse_select("SELECT * FROM cpu GROUP BY *");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
+            );
 
-        // Subqueries
+            // Does include explicitly listed tags in projection
+            let stmt = parse_select("SELECT host, * FROM cpu GROUP BY *");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
+            );
+        }
 
-        // Subquery, exact, match
-        let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM (SELECT time::timestamp AS time, usage_idle::float FROM cpu)"
-        );
+        /// Uncategorized fallible cases
+        #[test]
+        fn fallible() {
+            let namespace = MockSchemaProvider::default();
 
-        // Subquery, regex, match
-        let stmt = parse_select("SELECT bytes_free FROM (SELECT bytes_free, bytes_read FROM /d/)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM (SELECT time::timestamp AS time, bytes_free::integer, bytes_read::integer FROM disk, diskio)"
-        );
+            // Invalid regex
+            let stmt = parse_select("SELECT usage_idle FROM /(not/");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_contains!(err.to_string(), "invalid regular expression");
 
-        // Subquery, exact, no match
-        let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM foo)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            let stmt = parse_select("SELECT *::field + *::tag FROM cpu");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "Error during planning: unsupported expression: contains a wildcard or regular expression"
+            );
 
-        // Subquery, regex, no match
-        let stmt = parse_select("SELECT bytes_free FROM (SELECT bytes_free FROM /^d$/)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            let stmt = parse_select("SELECT COUNT(*::tag) FROM cpu");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "Error during planning: unable to use tag as wildcard in count()"
+            );
 
-        // Correct data type is resolved from subquery
-        let stmt = parse_select("SELECT *::field FROM (SELECT usage_system + usage_idle FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT time::timestamp AS time, usage_system::float + usage_idle::float FROM cpu)"
-        );
+            let stmt = parse_select("SELECT usage_idle FROM cpu SLIMIT 1");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "This feature is not implemented: SLIMIT or SOFFSET"
+            );
 
-        // Subquery, no fields projected should be dropped
-        let stmt = parse_select("SELECT usage_idle FROM cpu, (SELECT usage_system FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
-        );
+            let stmt = parse_select("SELECT usage_idle FROM cpu SOFFSET 1");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "This feature is not implemented: SLIMIT or SOFFSET"
+            );
+        }
 
-        // Outer query are permitted to project tags only, as long as there are other fields
-        // in the subquery
-        let stmt = parse_select("SELECT cpu FROM (SELECT cpu, usage_system FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, cpu::tag AS cpu FROM (SELECT time::timestamp AS time, cpu::tag, usage_system::float FROM cpu)"
-        );
+        /// Verify subqueries
+        #[test]
+        fn subqueries() {
+            let namespace = MockSchemaProvider::default();
 
-        // Outer FROM should be empty, as the subquery does not project any fields
-        let stmt = parse_select("SELECT cpu FROM (SELECT cpu FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Subquery, exact, match
+            let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM (SELECT time::timestamp AS time, usage_idle::float FROM cpu)"
+            );
 
-        // Binary expression
-        let stmt = parse_select("SELECT bytes_free+bytes_used FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk"
-        );
+            // Subquery, regex, match
+            let stmt =
+                parse_select("SELECT bytes_free FROM (SELECT bytes_free, bytes_read FROM /d/)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM (SELECT time::timestamp AS time, bytes_free::integer, bytes_read::integer FROM disk, diskio)"
+            );
 
-        // Unary expressions
-        let stmt = parse_select("SELECT -bytes_free FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, -1 * bytes_free::integer AS bytes_free FROM disk"
-        );
+            // Subquery, exact, no match
+            let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM foo)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
 
-        // DISTINCT clause
+            // Subquery, regex, no match
+            let stmt = parse_select("SELECT bytes_free FROM (SELECT bytes_free FROM /^d$/)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
 
-        // COUNT(DISTINCT)
-        let stmt = parse_select("SELECT COUNT(DISTINCT bytes_free) FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(distinct(bytes_free::integer)) AS count FROM disk"
-        );
+            // Correct data type is resolved from subquery
+            let stmt =
+                parse_select("SELECT *::field FROM (SELECT usage_system + usage_idle FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT time::timestamp AS time, usage_system::float + usage_idle::float FROM cpu)"
+            );
 
-        let stmt = parse_select("SELECT DISTINCT bytes_free FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, distinct(bytes_free::integer) AS \"distinct\" FROM disk"
-        );
+            // Subquery, no fields projected should be dropped
+            let stmt = parse_select("SELECT usage_idle FROM cpu, (SELECT usage_system FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
+            );
 
-        // Call expressions
+            // Outer query are permitted to project tags only, as long as there are other fields
+            // in the subquery
+            let stmt = parse_select("SELECT cpu FROM (SELECT cpu, usage_system FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, cpu::tag AS cpu FROM (SELECT time::timestamp AS time, cpu::tag, usage_system::float FROM cpu)"
+            );
 
-        let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_i64::integer) AS count FROM temp_01"
-        );
+            // Outer FROM should be empty, as the subquery does not project any fields
+            let stmt = parse_select("SELECT cpu FROM (SELECT cpu FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
+        }
 
-        // Duplicate aggregate columns
-        let stmt = parse_select("SELECT COUNT(field_i64), COUNT(field_i64) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01"
-        );
+        /// `DISTINCT` clause and `distinct` function
+        #[test]
+        fn projection_distinct() {
+            let namespace = MockSchemaProvider::default();
 
-        let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_f64::float) AS count FROM temp_01"
-        );
+            // COUNT(DISTINCT)
+            let stmt = parse_select("SELECT COUNT(DISTINCT bytes_free) FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(distinct(bytes_free::integer)) AS count FROM disk"
+            );
 
-        // Expands all fields
-        let stmt = parse_select("SELECT COUNT(*) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01"
-        );
+            let stmt = parse_select("SELECT DISTINCT bytes_free FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, distinct(bytes_free::integer) AS \"distinct\" FROM disk"
+            );
+        }
 
-        // Expands matching fields
-        let stmt = parse_select("SELECT COUNT(/64$/) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01"
-        );
+        /// Projections with unary and binary expressions
+        #[test]
+        fn projection_unary_binary_expr() {
+            let namespace = MockSchemaProvider::default();
 
-        // Expands only numeric fields
-        let stmt = parse_select("SELECT SUM(*) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
-        );
+            // Binary expression
+            let stmt = parse_select("SELECT bytes_free+bytes_used FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk"
+            );
 
-        let stmt = parse_select("SELECT * FROM merge_00, merge_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01"
-        );
+            // Unary expressions
+            let stmt = parse_select("SELECT -bytes_free FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, -1 * bytes_free::integer AS bytes_free FROM disk"
+            );
+        }
 
-        // This should only select merge_01, as col0 is a tag in merge_00
-        let stmt = parse_select("SELECT /col0/ FROM merge_00, merge_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1 FROM merge_01"
-        );
+        /// Projections which contain function calls
+        #[test]
+        fn projection_call_expr() {
+            let namespace = MockSchemaProvider::default();
 
-        // Fallible cases
+            let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_i64::integer) AS count FROM temp_01"
+            );
 
-        let stmt = parse_select("SELECT *::field + *::tag FROM cpu");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: unsupported expression: contains a wildcard or regular expression"
-        );
+            // Duplicate aggregate columns
+            let stmt = parse_select("SELECT COUNT(field_i64), COUNT(field_i64) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01"
+            );
 
-        let stmt = parse_select("SELECT COUNT(*::tag) FROM cpu");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: unable to use tag as wildcard in count()"
-        );
+            let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_f64::float) AS count FROM temp_01"
+            );
 
-        let stmt = parse_select("SELECT usage_idle FROM cpu SLIMIT 1");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "This feature is not implemented: SLIMIT or SOFFSET"
-        );
+            // Expands all fields
+            let stmt = parse_select("SELECT COUNT(*) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01"
+            );
 
-        let stmt = parse_select("SELECT usage_idle FROM cpu SOFFSET 1");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "This feature is not implemented: SLIMIT or SOFFSET"
-        );
+            // Expands matching fields
+            let stmt = parse_select("SELECT COUNT(/64$/) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01"
+            );
+
+            // Expands only numeric fields
+            let stmt = parse_select("SELECT SUM(*) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
+            );
+        }
     }
 
     #[test]

From 8deb1832e7ef0db727636c6f59ecd111d80f7a2c Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 10:54:45 +1000
Subject: [PATCH 033/119] chore: Expose TypeEvaluator for more efficient type
 evaluation

---
 .../src/plan/expr_type_evaluator.rs           | 16 +++++---
 iox_query_influxql/src/plan/rewriter.rs       | 40 +++++++++++++------
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/iox_query_influxql/src/plan/expr_type_evaluator.rs b/iox_query_influxql/src/plan/expr_type_evaluator.rs
index b563ef97b1..538f43be3f 100644
--- a/iox_query_influxql/src/plan/expr_type_evaluator.rs
+++ b/iox_query_influxql/src/plan/expr_type_evaluator.rs
@@ -18,20 +18,23 @@ pub(super) fn evaluate_type(
     expr: &Expr,
     from: &[TableReference],
 ) -> Result<Option<VarRefDataType>> {
-    TypeEvaluator::new(from, s).eval_type(expr)
+    TypeEvaluator::new(s, from).eval_type(expr)
 }
 
-struct TypeEvaluator<'a> {
+/// Evaluate the type of the specified expression.
+///
+/// Derived from [Go implementation](https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4796-L4797).
+pub(super) struct TypeEvaluator<'a> {
     s: &'a dyn SchemaProvider,
     from: &'a [TableReference],
 }
 
 impl<'a> TypeEvaluator<'a> {
-    fn new(from: &'a [TableReference], s: &'a dyn SchemaProvider) -> Self {
+    pub(super) fn new(s: &'a dyn SchemaProvider, from: &'a [TableReference]) -> Self {
         Self { from, s }
     }
 
-    fn eval_type(&self, expr: &Expr) -> Result<Option<VarRefDataType>> {
+    pub(super) fn eval_type(&self, expr: &Expr) -> Result<Option<VarRefDataType>> {
         Ok(match expr {
             Expr::VarRef(v) => self.eval_var_ref(v)?,
             Expr::Call(v) => self.eval_call(v)?,
@@ -80,9 +83,10 @@ impl<'a> TypeEvaluator<'a> {
         }
     }
 
-    /// Returns the type for the specified [`Expr`].
+    /// Returns the type for the specified [`VarRef`].
+    ///
     /// This function assumes that the expression has already been reduced.
-    fn eval_var_ref(&self, expr: &VarRef) -> Result<Option<VarRefDataType>> {
+    pub(super) fn eval_var_ref(&self, expr: &VarRef) -> Result<Option<VarRefDataType>> {
         Ok(match expr.data_type {
             Some(dt)
                 if matches!(
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index ea4f976b5a..9421e636c5 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1,4 +1,4 @@
-use crate::plan::expr_type_evaluator::evaluate_type;
+use crate::plan::expr_type_evaluator::TypeEvaluator;
 use crate::plan::field::{field_by_name, field_name};
 use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet};
 use crate::plan::ir::{Select, TableReference};
@@ -241,10 +241,10 @@ fn from_field_and_dimensions(
                 ts.extend(tag_set);
             }
             TableReference::Subquery(select) => {
+                let tv = TypeEvaluator::new(s, &select.from);
                 for f in &select.fields {
-                    let dt = match evaluate_type(s, &f.expr, &select.from)? {
-                        Some(dt) => dt,
-                        None => continue,
+                    let Some(dt) = tv.eval_type(&f.expr)? else {
+                        continue
                     };
 
                     let name = field_name(f);
@@ -352,16 +352,14 @@ fn field_list_expand_wildcards(
     // Attempt to rewrite all variable references in the fields with their types, if one
     // hasn't been specified.
     if let ControlFlow::Break(e) = sel.fields.iter_mut().try_for_each(|f| {
-        walk_expr_mut::<DataFusionError>(&mut f.expr, &mut |e| {
-            if matches!(e, Expr::VarRef(_)) {
-                let new_type = match evaluate_type(s, e, &sel.from) {
-                    Err(e) => ControlFlow::Break(e)?,
-                    Ok(v) => v,
-                };
+        let tv = TypeEvaluator::new(s, &sel.from);
 
-                if let Expr::VarRef(v) = e {
-                    v.data_type = new_type;
-                }
+        walk_expr_mut::<DataFusionError>(&mut f.expr, &mut |e| {
+            if let Expr::VarRef(ref mut v) = e {
+                v.data_type = match tv.eval_var_ref(v) {
+                    Ok(v) => v,
+                    Err(e) => ControlFlow::Break(e)?,
+                };
             }
             ControlFlow::Continue(())
         })
@@ -1688,6 +1686,22 @@ mod test {
                 stmt.to_string(),
                 "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
             );
+
+            // Field does not exist in single measurement
+            let stmt = parse_select("SELECT usage_idle, bytes_free FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, bytes_free AS bytes_free FROM cpu"
+            );
+
+            // Field exists in each measurement
+            let stmt = parse_select("SELECT usage_idle, bytes_free FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, bytes_free::integer AS bytes_free FROM cpu, disk"
+            );
         }
 
         /// Validate the expansion of the `FROM` clause using regular expressions

From 03b41959766f150c972c58c8d93f3d08e04ab462 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 11:03:56 +1000
Subject: [PATCH 034/119] chore: Rename TableReference to DataSource to avoid
 conflicts with DF

---
 .../src/plan/expr_type_evaluator.rs           | 12 +++++------
 iox_query_influxql/src/plan/ir.rs             | 14 ++++++-------
 iox_query_influxql/src/plan/rewriter.rs       | 20 +++++++++----------
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/iox_query_influxql/src/plan/expr_type_evaluator.rs b/iox_query_influxql/src/plan/expr_type_evaluator.rs
index 538f43be3f..177d80ae01 100644
--- a/iox_query_influxql/src/plan/expr_type_evaluator.rs
+++ b/iox_query_influxql/src/plan/expr_type_evaluator.rs
@@ -1,6 +1,6 @@
 use crate::plan::field::field_by_name;
 use crate::plan::field_mapper::map_type;
-use crate::plan::ir::TableReference;
+use crate::plan::ir::DataSource;
 use crate::plan::{error, SchemaProvider};
 use datafusion::common::Result;
 use influxdb_influxql_parser::expression::{
@@ -16,7 +16,7 @@ use itertools::Itertools;
 pub(super) fn evaluate_type(
     s: &dyn SchemaProvider,
     expr: &Expr,
-    from: &[TableReference],
+    from: &[DataSource],
 ) -> Result<Option<VarRefDataType>> {
     TypeEvaluator::new(s, from).eval_type(expr)
 }
@@ -26,11 +26,11 @@ pub(super) fn evaluate_type(
 /// Derived from [Go implementation](https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4796-L4797).
 pub(super) struct TypeEvaluator<'a> {
     s: &'a dyn SchemaProvider,
-    from: &'a [TableReference],
+    from: &'a [DataSource],
 }
 
 impl<'a> TypeEvaluator<'a> {
-    pub(super) fn new(s: &'a dyn SchemaProvider, from: &'a [TableReference]) -> Self {
+    pub(super) fn new(s: &'a dyn SchemaProvider, from: &'a [DataSource]) -> Self {
         Self { from, s }
     }
 
@@ -104,7 +104,7 @@ impl<'a> TypeEvaluator<'a> {
                 let mut data_type: Option<VarRefDataType> = None;
                 for tr in self.from.iter() {
                     match tr {
-                        TableReference::Name(name) => match (
+                        DataSource::Table(name) => match (
                             data_type,
                             map_type(self.s, name.as_str(), expr.name.as_str())?,
                         ) {
@@ -116,7 +116,7 @@ impl<'a> TypeEvaluator<'a> {
                             (None, Some(res)) => data_type = Some(res),
                             _ => continue,
                         },
-                        TableReference::Subquery(select) => {
+                        DataSource::Subquery(select) => {
                             // find the field by name
                             if let Some(field) = field_by_name(&select.fields, expr.name.as_str()) {
                                 match (data_type, evaluate_type(self.s, &field.expr, &select.from)?)
diff --git a/iox_query_influxql/src/plan/ir.rs b/iox_query_influxql/src/plan/ir.rs
index 11547c8342..82a2347c2e 100644
--- a/iox_query_influxql/src/plan/ir.rs
+++ b/iox_query_influxql/src/plan/ir.rs
@@ -19,8 +19,8 @@ pub(super) struct Select {
     /// Projection clause of the selection.
     pub(super) fields: Vec<Field>,
 
-    /// A list of tables or subqueries used as the source data for the selection.
-    pub(super) from: Vec<TableReference>,
+    /// A list of data sources for the selection.
+    pub(super) from: Vec<DataSource>,
 
     /// A conditional expression to filter the selection.
     pub(super) condition: Option<ConditionalExpression>,
@@ -58,14 +58,14 @@ impl From<Select> for SelectStatement {
                     .from
                     .into_iter()
                     .map(|tr| match tr {
-                        TableReference::Name(name) => {
+                        DataSource::Table(name) => {
                             MeasurementSelection::Name(QualifiedMeasurementName {
                                 database: None,
                                 retention_policy: None,
                                 name: MeasurementName::Name(name.as_str().into()),
                             })
                         }
-                        TableReference::Subquery(q) => {
+                        DataSource::Subquery(q) => {
                             MeasurementSelection::Subquery(Box::new((*q).into()))
                         }
                     })
@@ -84,9 +84,9 @@ impl From<Select> for SelectStatement {
     }
 }
 
-/// Represents a concrete reference to a table in a [`Select`] from clause.
+/// Represents a data source that is either a table or a subquery in a [`Select`] from clause.
 #[derive(Debug, Clone)]
-pub(super) enum TableReference {
-    Name(String),
+pub(super) enum DataSource {
+    Table(String),
     Subquery(Box<Select>),
 }
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 9421e636c5..a1016a752b 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1,7 +1,7 @@
 use crate::plan::expr_type_evaluator::TypeEvaluator;
 use crate::plan::field::{field_by_name, field_name};
 use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet};
-use crate::plan::ir::{Select, TableReference};
+use crate::plan::ir::{DataSource, Select};
 use crate::plan::{error, util, SchemaProvider};
 use datafusion::common::{DataFusionError, Result};
 use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName};
@@ -119,7 +119,7 @@ fn field_list_normalize_time(stmt: &mut Select) {
     normalize_time(stmt, false);
 
     for stmt in stmt.from.iter_mut().filter_map(|ms| match ms {
-        TableReference::Subquery(stmt) => Some(stmt),
+        DataSource::Subquery(stmt) => Some(stmt),
         _ => None,
     }) {
         normalize_time(stmt, true)
@@ -141,7 +141,7 @@ fn from_expand_wildcards(
                     ..
                 } => {
                     if s.table_exists(name) {
-                        new_from.push(TableReference::Name(name.deref().to_owned()))
+                        new_from.push(DataSource::Table(name.deref().to_owned()))
                     }
                 }
                 QualifiedMeasurementName {
@@ -152,11 +152,11 @@ fn from_expand_wildcards(
                     s.table_names()
                         .into_iter()
                         .filter(|table| re.is_match(table))
-                        .for_each(|table| new_from.push(TableReference::Name(table.to_owned())));
+                        .for_each(|table| new_from.push(DataSource::Table(table.to_owned())));
                 }
             },
             MeasurementSelection::Subquery(q) => {
-                new_from.push(TableReference::Subquery(Box::new(map_select(s, q)?)))
+                new_from.push(DataSource::Subquery(Box::new(map_select(s, q)?)))
             }
         }
     }
@@ -170,7 +170,7 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut Select) {
     use schema::InfluxColumnType;
     stmt.from.retain_mut(|tr| {
         match tr {
-            TableReference::Name(name) => {
+            DataSource::Table(name) => {
                 // drop any measurements that have no matching fields in the
                 // projection
 
@@ -188,7 +188,7 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut Select) {
                     false
                 }
             }
-            TableReference::Subquery(q) => {
+            DataSource::Subquery(q) => {
                 from_drop_empty(s, q);
                 if q.from.is_empty() {
                     return false;
@@ -211,14 +211,14 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut Select) {
 /// Determine the merged fields and tags of the `FROM` clause.
 fn from_field_and_dimensions(
     s: &dyn SchemaProvider,
-    from: &[TableReference],
+    from: &[DataSource],
 ) -> Result<(FieldTypeMap, TagSet)> {
     let mut fs = FieldTypeMap::new();
     let mut ts = TagSet::new();
 
     for tr in from {
         match tr {
-            TableReference::Name(name) => {
+            DataSource::Table(name) => {
                 let (field_set, tag_set) = match field_and_dimensions(s, name.as_str())? {
                     Some(res) => res,
                     None => continue,
@@ -240,7 +240,7 @@ fn from_field_and_dimensions(
 
                 ts.extend(tag_set);
             }
-            TableReference::Subquery(select) => {
+            DataSource::Subquery(select) => {
                 let tv = TypeEvaluator::new(s, &select.from);
                 for f in &select.fields {
                     let Some(dt) = tv.eval_type(&f.expr)? else {

From a218d97dcffbb4da8f5b2644ba0cf01f4357d4da Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 12:17:15 +1000
Subject: [PATCH 035/119] chore: Use `Select` rather than `SelectStatement` in
 planner

---
 iox_query_influxql/src/plan/ir.rs       | 13 +++---
 iox_query_influxql/src/plan/planner.rs  | 62 +++++++++----------------
 iox_query_influxql/src/plan/rewriter.rs | 59 +++++++++++++----------
 3 files changed, 61 insertions(+), 73 deletions(-)

diff --git a/iox_query_influxql/src/plan/ir.rs b/iox_query_influxql/src/plan/ir.rs
index 82a2347c2e..f76287d273 100644
--- a/iox_query_influxql/src/plan/ir.rs
+++ b/iox_query_influxql/src/plan/ir.rs
@@ -5,7 +5,6 @@ use influxdb_influxql_parser::common::{
     LimitClause, MeasurementName, OffsetClause, OrderByClause, QualifiedMeasurementName,
     WhereClause,
 };
-use influxdb_influxql_parser::expression::ConditionalExpression;
 use influxdb_influxql_parser::select::{
     Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection,
     SelectStatement, TimeZoneClause,
@@ -23,7 +22,7 @@ pub(super) struct Select {
     pub(super) from: Vec<DataSource>,
 
     /// A conditional expression to filter the selection.
-    pub(super) condition: Option<ConditionalExpression>,
+    pub(super) condition: Option<WhereClause>,
 
     /// The GROUP BY clause of the selection.
     pub(super) group_by: Option<GroupByClause>,
@@ -38,10 +37,10 @@ pub(super) struct Select {
     pub(super) order_by: Option<OrderByClause>,
 
     /// A value to restrict the number of rows returned.
-    pub(super) limit: Option<u64>,
+    pub(super) limit: Option<LimitClause>,
 
     /// A value to specify an offset to start retrieving rows.
-    pub(super) offset: Option<u64>,
+    pub(super) offset: Option<OffsetClause>,
 
     /// The timezone for the query, specified as [`tz('<time zone>')`][time_zone_clause].
     ///
@@ -71,12 +70,12 @@ impl From<Select> for SelectStatement {
                     })
                     .collect(),
             ),
-            condition: value.condition.map(WhereClause::new),
+            condition: value.condition,
             group_by: value.group_by,
             fill: value.fill,
             order_by: value.order_by,
-            limit: value.limit.map(LimitClause::new),
-            offset: value.offset.map(OffsetClause::new),
+            limit: value.limit,
+            offset: value.offset,
             series_limit: None,
             series_offset: None,
             timezone: value.timezone.map(TimeZoneClause::new),
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index 11ec8fae99..b87dd68681 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -1,5 +1,6 @@
 mod select;
 
+use crate::plan::ir::{DataSource, Select};
 use crate::plan::planner::select::{
     check_exprs_satisfy_columns, fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort,
 };
@@ -43,7 +44,7 @@ use influxdb_influxql_parser::expression::{
 use influxdb_influxql_parser::functions::{
     is_aggregate_function, is_now_function, is_scalar_math_function,
 };
-use influxdb_influxql_parser::select::{FillClause, GroupByClause, TimeZoneClause};
+use influxdb_influxql_parser::select::{FillClause, GroupByClause};
 use influxdb_influxql_parser::show_field_keys::ShowFieldKeysStatement;
 use influxdb_influxql_parser::show_measurements::{
     ShowMeasurementsStatement, WithMeasurementClause,
@@ -54,9 +55,8 @@ use influxdb_influxql_parser::simple_from_clause::ShowFromClause;
 use influxdb_influxql_parser::{
     common::{MeasurementName, WhereClause},
     expression::Expr as IQLExpr,
-    identifier::Identifier,
     literal::Literal,
-    select::{Field, FromMeasurementClause, MeasurementSelection, SelectStatement},
+    select::{Field, SelectStatement},
     statement::Statement,
 };
 use iox_query::config::{IoxConfigExt, MetadataCutoff};
@@ -153,12 +153,11 @@ impl<'a> Context<'a> {
         Self { scope, ..*self }
     }
 
-    fn with_timezone(&self, timezone: Option<TimeZoneClause>) -> Self {
-        let tz = timezone.as_deref().cloned();
+    fn with_timezone(&self, tz: Option<Tz>) -> Self {
         Self { tz, ..*self }
     }
 
-    fn with_group_by_fill(&self, select: &'a SelectStatement) -> Self {
+    fn with_group_by_fill(&self, select: &'a Select) -> Self {
         Self {
             group_by: select.group_by.as_ref(),
             fill: select.fill,
@@ -267,12 +266,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         )
     }
 
-    fn rewrite_select_statement(&self, select: SelectStatement) -> Result<SelectStatement> {
+    fn rewrite_select_statement(&self, select: SelectStatement) -> Result<Select> {
         rewrite_statement(self.s, &select)
     }
 
     /// Create a [`LogicalPlan`] from the specified InfluxQL `SELECT` statement.
-    fn select_statement_to_plan(&self, select: &SelectStatement) -> Result<LogicalPlan> {
+    fn select_statement_to_plan(&self, select: &Select) -> Result<LogicalPlan> {
         let mut plans = self.plan_from_tables(&select.from)?;
 
         let ctx = Context::new(select_statement_info(select)?)
@@ -457,7 +456,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         ctx: &Context<'_>,
         input: LogicalPlan,
         proj: Vec<Expr>,
-        select: &SelectStatement,
+        select: &Select,
         fields: &[Field],
         group_by_tag_set: &[&str],
     ) -> Result<LogicalPlan> {
@@ -976,8 +975,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                 name,
                 data_type: opt_dst_type,
             }) => {
-                let name = normalize_identifier(name);
-                Ok(match (ctx.scope, name.as_str()) {
+                Ok(match (ctx.scope, name.deref().as_str()) {
                     // Per the Go implementation, the time column is case-insensitive in the
                     // `WHERE` clause and disregards any postfix type cast operator.
                     //
@@ -1236,24 +1234,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
 
     /// Generate a list of logical plans for each of the tables references in the `FROM`
     /// clause.
-    fn plan_from_tables(
-        &self,
-        from: &FromMeasurementClause,
-    ) -> Result<VecDeque<(LogicalPlan, Vec<Expr>)>> {
+    fn plan_from_tables(&self, from: &[DataSource]) -> Result<VecDeque<(LogicalPlan, Vec<Expr>)>> {
         // A list of scans and their initial projections
         let mut table_projs = VecDeque::new();
-        for ms in from.iter() {
-            let Some(table_proj) = match ms {
-                MeasurementSelection::Name(qn) => match qn.name {
-                    MeasurementName::Name(ref ident) => {
-                        self.create_table_ref(normalize_identifier(ident))
-                    }
-                    // rewriter is expected to expand the regular expression
-                    MeasurementName::Regex(_) => error::internal(
-                        "unexpected regular expression in FROM clause",
-                    ),
-                },
-                MeasurementSelection::Subquery(_) => error::not_implemented(
+        for ds in from.iter() {
+            let Some(table_proj) = match ds {
+                DataSource::Table(name) => self.create_table_ref(name),
+                DataSource::Subquery(_) => error::not_implemented(
                     "subquery in FROM clause",
                 ),
             }? else { continue };
@@ -1266,12 +1253,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
     ///
     /// Normally, this functions will not return a `None`, as tables have been matched]
     /// by the [`rewrite_statement`] function.
-    fn create_table_ref(&self, table_name: String) -> Result<Option<(LogicalPlan, Vec<Expr>)>> {
-        Ok(if let Ok(source) = self.s.get_table_provider(&table_name) {
-            let table_ref = TableReference::bare(table_name.to_string());
+    fn create_table_ref(&self, table_name: &str) -> Result<Option<(LogicalPlan, Vec<Expr>)>> {
+        Ok(if let Ok(source) = self.s.get_table_provider(table_name) {
+            let table_ref = TableReference::bare(table_name.to_owned());
             Some((
                 LogicalPlanBuilder::scan(table_ref, source, None)?.build()?,
-                vec![lit_dict(&table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
+                vec![lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
             ))
         } else {
             None
@@ -1411,7 +1398,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                 let mut union_plan = None;
                 for table in tables {
                     let Some(table_schema) = self.s.table_schema(&table) else {continue};
-                    let Some((plan, measurement_expr)) = self.create_table_ref(table.clone())? else {continue;};
+                    let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
 
                     let schemas = Schemas::new(plan.schema())?;
                     let plan =
@@ -1658,7 +1645,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                 continue;
             }
 
-            let Some((plan, measurement_expr)) = self.create_table_ref(table)? else {continue;};
+            let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
 
             let schemas = Schemas::new(plan.schema())?;
             let plan = self.plan_where_clause(
@@ -1762,7 +1749,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
 
                 let mut union_plan = None;
                 for table in tables {
-                    let Some((plan, _measurement_expr)) = self.create_table_ref(table.clone())? else {continue;};
+                    let Some((plan, _measurement_expr)) = self.create_table_ref(&table)? else {continue;};
 
                     let schemas = Schemas::new(plan.schema())?;
                     let plan =
@@ -2103,13 +2090,6 @@ fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> {
     }
 }
 
-// Normalize an identifier. Identifiers in InfluxQL are case sensitive,
-// and therefore not transformed to lower case.
-fn normalize_identifier(ident: &Identifier) -> String {
-    // Dereference the identifier to return the unquoted value.
-    ident.deref().clone()
-}
-
 /// Find the index of the time column in the fields list.
 ///
 /// > **Note**
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index a1016a752b..0c3ca3d4a5 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -21,16 +21,13 @@ use std::ops::{ControlFlow, Deref};
 
 /// Recursively rewrite the specified [`SelectStatement`] by performing a series of passes
 /// to validate and normalize the statement.
-pub(super) fn rewrite_statement(
-    s: &dyn SchemaProvider,
-    q: &SelectStatement,
-) -> Result<SelectStatement> {
+pub(super) fn rewrite_statement(s: &dyn SchemaProvider, q: &SelectStatement) -> Result<Select> {
     let mut stmt = map_select(s, q)?;
     from_drop_empty(s, &mut stmt);
     field_list_normalize_time(&mut stmt);
     field_list_rewrite_aliases(&mut stmt.fields)?;
 
-    Ok(stmt.into())
+    Ok(stmt)
 }
 
 /// Map a `SelectStatement` to a `Select`, which is an intermediate representation to be
@@ -46,12 +43,12 @@ pub(super) fn map_select(s: &dyn SchemaProvider, stmt: &SelectStatement) -> Resu
     let mut sel = Select {
         fields: vec![],
         from: vec![],
-        condition: stmt.condition.as_ref().map(|v| (**v).clone()),
+        condition: stmt.condition.clone(),
         group_by: stmt.group_by.clone(),
         fill: stmt.fill,
         order_by: stmt.order_by,
-        limit: stmt.limit.map(|v| *v),
-        offset: stmt.offset.map(|v| *v),
+        limit: stmt.limit,
+        offset: stmt.offset,
         timezone: stmt.timezone.map(|v| *v),
     };
     from_expand_wildcards(s, stmt, &mut sel)?;
@@ -708,7 +705,7 @@ struct FieldChecker {
 }
 
 impl FieldChecker {
-    fn check_fields(&mut self, q: &SelectStatement) -> Result<ProjectionType> {
+    fn check_fields(&mut self, q: &Select) -> Result<ProjectionType> {
         q.fields.iter().try_for_each(|f| self.check_expr(&f.expr))?;
 
         match self.function_count() {
@@ -1318,7 +1315,7 @@ pub(crate) struct SelectStatementInfo {
 ///
 /// * Are not combined with other aggregate, selector or window-like functions and may
 ///   only project additional fields
-pub(crate) fn select_statement_info(q: &SelectStatement) -> Result<SelectStatementInfo> {
+pub(super) fn select_statement_info(q: &Select) -> Result<SelectStatementInfo> {
     let has_group_by_time = q
         .group_by
         .as_ref()
@@ -1337,8 +1334,9 @@ pub(crate) fn select_statement_info(q: &SelectStatement) -> Result<SelectStateme
 
 #[cfg(test)]
 mod test {
+    use crate::plan::ir::Select;
     use crate::plan::rewriter::{
-        has_wildcards, rewrite_statement, select_statement_info, ProjectionType,
+        has_wildcards, map_select, rewrite_statement, select_statement_info, ProjectionType,
     };
     use crate::plan::test_utils::{parse_select, MockSchemaProvider};
     use assert_matches::assert_matches;
@@ -1347,6 +1345,12 @@ mod test {
 
     #[test]
     fn test_select_statement_info() {
+        let namespace = MockSchemaProvider::default();
+        let parse_select = |s: &str| -> Select {
+            let select = parse_select(s);
+            map_select(&namespace, &select).unwrap()
+        };
+
         let info = select_statement_info(&parse_select("SELECT foo, bar FROM cpu")).unwrap();
         assert_matches!(info.projection_type, ProjectionType::Raw);
 
@@ -1386,6 +1390,12 @@ mod test {
     /// by `select_statement_info`.
     #[test]
     fn test_select_statement_info_functions() {
+        let namespace = MockSchemaProvider::default();
+        let parse_select = |s: &str| -> Select {
+            let select = parse_select(s);
+            map_select(&namespace, &select).unwrap()
+        };
+
         // percentile
         let sel = parse_select("SELECT percentile(foo, 2) FROM cpu");
         select_statement_info(&sel).unwrap();
@@ -1600,16 +1610,10 @@ mod test {
         select_statement_info(&sel).unwrap();
         let sel = parse_select("SELECT count(distinct('foo')) FROM cpu");
         assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "expected field argument in distinct()");
-        let sel = parse_select("SELECT count(distinct foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected distinct clause in count");
 
         // Test rules for math functions
         let sel = parse_select("SELECT abs(usage_idle) FROM cpu");
         select_statement_info(&sel).unwrap();
-        let sel = parse_select("SELECT abs(*) + ceil(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard");
-        let sel = parse_select("SELECT abs(/f/) + ceil(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected regex");
 
         // Fallible
 
@@ -1629,14 +1633,6 @@ mod test {
         let sel = parse_select("SELECT foo, 1 FROM cpu");
         assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "field must contain at least one variable");
 
-        // wildcard expansion is not supported in binary expressions for aggregates
-        let sel = parse_select("SELECT count(*) + count(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex");
-
-        // regex expansion is not supported in binary expressions
-        let sel = parse_select("SELECT sum(/foo/) + count(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex");
-
         // aggregate functions require a field reference
         let sel = parse_select("SELECT sum(1) FROM cpu");
         assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "expected field argument in sum(), got Literal(Integer(1))");
@@ -1644,11 +1640,24 @@ mod test {
 
     mod rewrite_statement {
         use super::*;
+        use datafusion::common::Result;
+        use influxdb_influxql_parser::select::SelectStatement;
+
+        /// Test implementation that converts `Select` to `SelectStatement` so that it can be
+        /// converted back to a string.
+        fn rewrite_statement(
+            s: &MockSchemaProvider,
+            q: &SelectStatement,
+        ) -> Result<SelectStatement> {
+            let stmt = super::rewrite_statement(s, q)?;
+            Ok(stmt.into())
+        }
 
         /// Validating types for simple projections
         #[test]
         fn projection_simple() {
             let namespace = MockSchemaProvider::default();
+
             // Exact, match
             let stmt = parse_select("SELECT usage_user FROM cpu");
             let stmt = rewrite_statement(&namespace, &stmt).unwrap();

From e7d6819602ad2c64937188a199360728a79559ca Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 12:25:58 +1000
Subject: [PATCH 036/119] chore: Correct docs

---
 influxdb_influxql_parser/src/select.rs | 2 +-
 iox_query_influxql/src/plan/ir.rs      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/influxdb_influxql_parser/src/select.rs b/influxdb_influxql_parser/src/select.rs
index 0ef658ba57..b49c49425e 100644
--- a/influxdb_influxql_parser/src/select.rs
+++ b/influxdb_influxql_parser/src/select.rs
@@ -45,7 +45,7 @@ pub struct SelectStatement {
     pub group_by: Option<GroupByClause>,
 
     /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`],
-    /// it is the same behavior as `fill(none)`.
+    /// it is the same behavior as `fill(null)`.
     ///
     /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill
     pub fill: Option<FillClause>,
diff --git a/iox_query_influxql/src/plan/ir.rs b/iox_query_influxql/src/plan/ir.rs
index f76287d273..53ca5e18ca 100644
--- a/iox_query_influxql/src/plan/ir.rs
+++ b/iox_query_influxql/src/plan/ir.rs
@@ -28,7 +28,7 @@ pub(super) struct Select {
     pub(super) group_by: Option<GroupByClause>,
 
     /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`],
-    /// it is the same behavior as `fill(none)`.
+    /// it is the same behavior as `fill(null)`.
     ///
     /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill
     pub(super) fill: Option<FillClause>,

From f026d546b0753d5d666dac3dec50bdc6a577a666 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 13:35:32 +1000
Subject: [PATCH 037/119] chore: Refactor `select_statement_to_plan`

This is an improvement over the previous version, and prepares the
planner for implementing subqueries and passing schema to the
`project_select` function.
---
 iox_query_influxql/src/plan/planner.rs | 117 ++++++++++---------------
 1 file changed, 44 insertions(+), 73 deletions(-)

diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index b87dd68681..17f23bd4ca 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -73,7 +73,7 @@ use schema::{
     InfluxColumnType, InfluxFieldType, Schema, INFLUXQL_MEASUREMENT_COLUMN_NAME,
     INFLUXQL_METADATA_KEY,
 };
-use std::collections::{HashSet, VecDeque};
+use std::collections::HashSet;
 use std::fmt::Debug;
 use std::iter;
 use std::ops::{Bound, ControlFlow, Deref, Range};
@@ -272,8 +272,6 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
 
     /// Create a [`LogicalPlan`] from the specified InfluxQL `SELECT` statement.
     fn select_statement_to_plan(&self, select: &Select) -> Result<LogicalPlan> {
-        let mut plans = self.plan_from_tables(&select.from)?;
-
         let ctx = Context::new(select_statement_info(select)?)
             .with_timezone(select.timezone)
             .with_group_by_fill(select);
@@ -338,65 +336,37 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
 
         fields.extend(fields_no_time.iter().cloned());
 
-        // Build the first non-empty plan
         let plan = {
-            loop {
-                match plans.pop_front() {
-                    Some((plan, proj)) => match self.project_select(
-                        &ctx,
+            let mut iter = select.from.iter();
+            let plan = match iter.next() {
+                Some(ds) => self.project_select(&ctx, ds, select, &fields, &group_by_tag_set),
+                None => {
+                    // empty result, but let's at least have all the strictly necessary metadata
+                    let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
+                        INFLUXQL_MEASUREMENT_COLUMN_NAME,
+                        (&InfluxColumnType::Tag).into(),
+                        false,
+                    )]));
+                    let plan = LogicalPlan::EmptyRelation(EmptyRelation {
+                        produce_one_row: false,
+                        schema: schema.to_dfschema_ref()?,
+                    });
+                    let plan = plan_with_metadata(
                         plan,
-                        proj,
-                        select,
-                        &fields,
-                        &group_by_tag_set,
-                    )? {
-                        // Exclude any plans that produce no data, which is
-                        // consistent with InfluxQL.
-                        LogicalPlan::EmptyRelation(EmptyRelation {
-                            produce_one_row: false,
-                            ..
-                        }) => continue,
-                        plan => break plan,
-                    },
-                    None => {
-                        // empty result, but let's at least have all the strictly necessary metadata
-                        let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
-                            INFLUXQL_MEASUREMENT_COLUMN_NAME,
-                            (&InfluxColumnType::Tag).into(),
-                            false,
-                        )]));
-                        let plan = LogicalPlan::EmptyRelation(EmptyRelation {
-                            produce_one_row: false,
-                            schema: schema.to_dfschema_ref()?,
-                        });
-                        let plan = plan_with_metadata(
-                            plan,
-                            &InfluxQlMetadata {
-                                measurement_column_index: MEASUREMENT_COLUMN_INDEX,
-                                tag_key_columns: vec![],
-                            },
-                        )?;
-                        return Ok(plan);
-                    }
+                        &InfluxQlMetadata {
+                            measurement_column_index: MEASUREMENT_COLUMN_INDEX,
+                            tag_key_columns: vec![],
+                        },
+                    )?;
+                    return Ok(plan);
                 }
-            }
-        };
+            }?;
 
-        // UNION the remaining plans
-        let plan = plans.into_iter().try_fold(plan, |prev, (next, proj)| {
-            let next = self.project_select(&ctx, next, proj, select, &fields, &group_by_tag_set)?;
-            if let LogicalPlan::EmptyRelation(EmptyRelation {
-                produce_one_row: false,
-                ..
-            }) = next
-            {
-                // Exclude any plans that produce no data, which is
-                // consistent with InfluxQL.
-                Ok(prev)
-            } else {
+            iter.try_fold(plan, |prev, ds| {
+                let next = self.project_select(&ctx, ds, select, &fields, &group_by_tag_set)?;
                 LogicalPlanBuilder::from(prev).union(next)?.build()
-            }
-        })?;
+            })?
+        };
 
         let plan = plan_with_metadata(
             plan,
@@ -454,15 +424,16 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
     fn project_select(
         &self,
         ctx: &Context<'_>,
-        input: LogicalPlan,
-        proj: Vec<Expr>,
+        ds: &DataSource,
         select: &Select,
         fields: &[Field],
         group_by_tag_set: &[&str],
     ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let (plan, proj) = self.plan_from_data_source(ds)?;
 
-        let plan = self.plan_where_clause(ctx, &select.condition, input, &schemas)?;
+        let schemas = Schemas::new(plan.schema())?;
+
+        let plan = self.plan_where_clause(ctx, &select.condition, plan, &schemas)?;
 
         // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
         let mut select_exprs = self.field_list_to_exprs(ctx, &plan, fields, &schemas)?;
@@ -1234,19 +1205,19 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
 
     /// Generate a list of logical plans for each of the tables references in the `FROM`
     /// clause.
-    fn plan_from_tables(&self, from: &[DataSource]) -> Result<VecDeque<(LogicalPlan, Vec<Expr>)>> {
-        // A list of scans and their initial projections
-        let mut table_projs = VecDeque::new();
-        for ds in from.iter() {
-            let Some(table_proj) = match ds {
-                DataSource::Table(name) => self.create_table_ref(name),
-                DataSource::Subquery(_) => error::not_implemented(
-                    "subquery in FROM clause",
-                ),
-            }? else { continue };
-            table_projs.push_back(table_proj);
+    fn plan_from_data_source(&self, ds: &DataSource) -> Result<(LogicalPlan, Vec<Expr>)> {
+        match ds {
+            DataSource::Table(table_name) => {
+                // `rewrite_statement` guarantees the table should exist
+                let source = self.s.get_table_provider(table_name)?;
+                let table_ref = TableReference::bare(table_name.to_owned());
+                Ok((
+                    LogicalPlanBuilder::scan(table_ref, source, None)?.build()?,
+                    vec![lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
+                ))
+            }
+            DataSource::Subquery(_) => error::not_implemented("subquery in FROM clause"),
         }
-        Ok(table_projs)
     }
 
     /// Create a [LogicalPlan] that refers to the specified `table_name`.

From 455c78966cdb89136167aca643d5466868dd6284 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 16:15:22 +1000
Subject: [PATCH 038/119] chore: Remove unused API

---
 influxdb_influxql_parser/src/select.rs | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/influxdb_influxql_parser/src/select.rs b/influxdb_influxql_parser/src/select.rs
index b49c49425e..4fb6f8d22a 100644
--- a/influxdb_influxql_parser/src/select.rs
+++ b/influxdb_influxql_parser/src/select.rs
@@ -72,13 +72,6 @@ pub struct SelectStatement {
 }
 
 impl SelectStatement {
-    /// Return the `FILL` behaviour for the `SELECT` statement.
-    ///
-    /// The default when no `FILL` clause present is `FILL(null)`.
-    pub fn fill(&self) -> FillClause {
-        self.fill.unwrap_or_default()
-    }
-
     /// Return the sort order for the `SELECT` statement.
     ///
     /// The default when no `ORDER BY` clause present is `TIME ASC`.
@@ -760,8 +753,6 @@ mod test {
     fn test_select_statement() {
         let (_, got) = select_statement("SELECT value FROM foo").unwrap();
         assert_eq!(got.to_string(), "SELECT value FROM foo");
-        // Assert default behaviour when `FILL` is omitted
-        assert_eq!(got.fill(), FillClause::Null);
 
         let (_, got) =
             select_statement(r#"SELECT f1,/f2/, f3 AS "a field" FROM foo WHERE host =~ /c1/"#)
@@ -798,7 +789,7 @@ mod test {
             got.to_string(),
             r#"SELECT sum(value) FROM foo GROUP BY TIME(5m), host FILL(PREVIOUS)"#
         );
-        assert_eq!(got.fill(), FillClause::Previous);
+        assert_matches!(got.fill, Some(FillClause::Previous));
 
         let (_, got) = select_statement("SELECT value FROM foo ORDER BY DESC").unwrap();
         assert_eq!(

From 3099733de6f6b8ad4c41478a5a694fd8092ed453 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 16:48:27 +1000
Subject: [PATCH 039/119] chore: Introduce root `SelectQuery`

This is used to gather shared state for the query vs `Select`, which
represents state for each `SELECT` statement.

First example is tracking whether a statement projects multiple
unique measurements. This changed improved one of the plans, as it
no longer needs to sort the `iox::measurement` column.
---
 iox_query_influxql/src/plan/ir.rs       |  9 +++++
 iox_query_influxql/src/plan/planner.rs  | 25 +++++++-------
 iox_query_influxql/src/plan/rewriter.rs | 44 ++++++++++++++++++++-----
 3 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/iox_query_influxql/src/plan/ir.rs b/iox_query_influxql/src/plan/ir.rs
index 53ca5e18ca..161c0ba09a 100644
--- a/iox_query_influxql/src/plan/ir.rs
+++ b/iox_query_influxql/src/plan/ir.rs
@@ -10,6 +10,15 @@ use influxdb_influxql_parser::select::{
     SelectStatement, TimeZoneClause,
 };
 
+/// Represents a validated and normalized top-level [`SelectStatement]`.
+#[derive(Debug, Default, Clone)]
+pub(super) struct SelectQuery {
+    pub(super) select: Select,
+
+    /// `true` if the query projects from more than one unique measurement.
+    pub(super) has_multiple_measurements: bool,
+}
+
 #[derive(Debug, Default, Clone)]
 pub(super) struct Select {
     /// The schema of the selection.
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index 17f23bd4ca..edcad1b7e5 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -1,6 +1,6 @@
 mod select;
 
-use crate::plan::ir::{DataSource, Select};
+use crate::plan::ir::{DataSource, Select, SelectQuery};
 use crate::plan::planner::select::{
     check_exprs_satisfy_columns, fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort,
 };
@@ -266,12 +266,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         )
     }
 
-    fn rewrite_select_statement(&self, select: SelectStatement) -> Result<Select> {
+    fn rewrite_select_statement(&self, select: SelectStatement) -> Result<SelectQuery> {
         rewrite_statement(self.s, &select)
     }
 
     /// Create a [`LogicalPlan`] from the specified InfluxQL `SELECT` statement.
-    fn select_statement_to_plan(&self, select: &Select) -> Result<LogicalPlan> {
+    fn select_statement_to_plan(&self, query: &SelectQuery) -> Result<LogicalPlan> {
+        let select = &query.select;
+
         let ctx = Context::new(select_statement_info(select)?)
             .with_timezone(select.timezone)
             .with_group_by_fill(select);
@@ -380,9 +382,6 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             },
         )?;
 
-        // The UNION operator indicates the result set produces multiple tables or measurements.
-        let is_multiple_measurements = matches!(plan, LogicalPlan::Union(_));
-
         // the sort planner node must refer to the time column using
         // the alias that was specified
         let time_alias = fields[0]
@@ -403,7 +402,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         let plan = plan_with_sort(
             plan,
             vec![time_sort_expr.clone()],
-            is_multiple_measurements,
+            query.has_multiple_measurements,
             &group_by_tag_set,
             &projection_tag_set,
         )?;
@@ -413,7 +412,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             select.offset,
             select.limit,
             vec![time_sort_expr],
-            is_multiple_measurements,
+            query.has_multiple_measurements,
             &group_by_tag_set,
             &projection_tag_set,
         )?;
@@ -704,7 +703,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
     /// - `offset`: The number of input rows to skip.
     /// - `limit`: The maximum number of rows to return in the output plan per group.
     /// - `time_sort_expr`: An `Expr::Sort` referring to the `time` column of the input.
-    /// - `is_multiple_measurements`: `true` if the `input` produces multiple measurements,
+    /// - `has_multiple_measurements`: `true` if the `input` produces multiple measurements,
     ///   and therefore the limit should be applied per measurement and any additional group tags.
     /// - `group_by_tag_set`: Tag columns from the `input` plan that should be used to partition
     ///   the `input` plan and sort the `output` plan.
@@ -717,7 +716,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         offset: Option<OffsetClause>,
         limit: Option<LimitClause>,
         sort_exprs: Vec<Expr>,
-        is_multiple_measurements: bool,
+        has_multiple_measurements: bool,
         group_by_tag_set: &[&str],
         projection_tag_set: &[&str],
     ) -> Result<LogicalPlan> {
@@ -725,7 +724,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             return Ok(input);
         }
 
-        if group_by_tag_set.is_empty() && !is_multiple_measurements {
+        if group_by_tag_set.is_empty() && !has_multiple_measurements {
             // If the query is not grouping by tags, and is a single measurement, the DataFusion
             // Limit operator is sufficient.
             let skip = offset.map_or(0, |v| *v as usize);
@@ -823,7 +822,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             plan_with_sort(
                 plan,
                 sort_exprs,
-                is_multiple_measurements,
+                has_multiple_measurements,
                 group_by_tag_set,
                 projection_tag_set,
             )
@@ -2831,7 +2830,7 @@ mod test {
 
             // multiple of same measurement
             assert_snapshot!(plan("SELECT host, usage_idle FROM cpu, cpu"), @r###"
-            Sort: iox::measurement ASC NULLS LAST, time ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N]
+            Sort: time ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N]
               Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N]
                 Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.host AS host, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N]
                   TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 0c3ca3d4a5..16f3afb15b 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1,7 +1,7 @@
 use crate::plan::expr_type_evaluator::TypeEvaluator;
 use crate::plan::field::{field_by_name, field_name};
 use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet};
-use crate::plan::ir::{DataSource, Select};
+use crate::plan::ir::{DataSource, Select, SelectQuery};
 use crate::plan::{error, util, SchemaProvider};
 use datafusion::common::{DataFusionError, Result};
 use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName};
@@ -21,13 +21,41 @@ use std::ops::{ControlFlow, Deref};
 
 /// Recursively rewrite the specified [`SelectStatement`] by performing a series of passes
 /// to validate and normalize the statement.
-pub(super) fn rewrite_statement(s: &dyn SchemaProvider, q: &SelectStatement) -> Result<Select> {
-    let mut stmt = map_select(s, q)?;
-    from_drop_empty(s, &mut stmt);
-    field_list_normalize_time(&mut stmt);
-    field_list_rewrite_aliases(&mut stmt.fields)?;
+pub(super) fn rewrite_statement(
+    s: &dyn SchemaProvider,
+    q: &SelectStatement,
+) -> Result<SelectQuery> {
+    let mut select = map_select(s, q)?;
+    from_drop_empty(s, &mut select);
+    field_list_normalize_time(&mut select);
+    field_list_rewrite_aliases(&mut select.fields)?;
 
-    Ok(stmt)
+    let has_multiple_measurements = has_multiple_measurements(&select);
+
+    Ok(SelectQuery {
+        select,
+        has_multiple_measurements,
+    })
+}
+
+/// Determines if s projects more than a single unique table
+fn has_multiple_measurements(s: &Select) -> bool {
+    let mut data_sources = vec![s.from.as_slice()];
+    let mut table_name: Option<&str> = None;
+    while let Some(from) = data_sources.pop() {
+        for ds in from {
+            match ds {
+                DataSource::Table(name) if matches!(table_name, None) => table_name = Some(name),
+                DataSource::Table(name) => {
+                    if name != table_name.unwrap() {
+                        return true;
+                    }
+                }
+                DataSource::Subquery(q) => data_sources.push(q.from.as_slice()),
+            }
+        }
+    }
+    false
 }
 
 /// Map a `SelectStatement` to a `Select`, which is an intermediate representation to be
@@ -1650,7 +1678,7 @@ mod test {
             q: &SelectStatement,
         ) -> Result<SelectStatement> {
             let stmt = super::rewrite_statement(s, q)?;
-            Ok(stmt.into())
+            Ok(stmt.select.into())
         }
 
         /// Validating types for simple projections

From 9227bc032f59dcc1d75f98404dd8fd616d2ff9f4 Mon Sep 17 00:00:00 2001
From: Stuart Carnie <stuart.carnie@gmail.com>
Date: Fri, 5 May 2023 17:15:59 +1000
Subject: [PATCH 040/119] chore: Remove fragile `is_tag_field` API

---
 iox_query_influxql/src/plan/planner.rs | 28 +++++++++++++++++---------
 iox_query_influxql/src/plan/util.rs    |  8 --------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index edcad1b7e5..1d7bb246ca 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -462,7 +462,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
         }
 
         let (plan, select_exprs_post_aggr) =
-            self.select_aggregate(ctx, plan, fields, select_exprs, group_by_tag_set, &schemas)?;
+            self.select_aggregate(ctx, ds, plan, fields, select_exprs, group_by_tag_set)?;
 
         // Wrap the plan in a `LogicalPlan::Projection` from the select expressions
         project(
@@ -474,11 +474,11 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
     fn select_aggregate(
         &self,
         ctx: &Context<'_>,
+        ds: &DataSource,
         input: LogicalPlan,
         fields: &[Field],
         mut select_exprs: Vec<Expr>,
         group_by_tag_set: &[&str],
-        schemas: &Schemas,
     ) -> Result<(LogicalPlan, Vec<Expr>)> {
         if !ctx.is_aggregate() {
             return Ok((input, select_exprs));
@@ -568,14 +568,22 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                 group_by_exprs.push(select_exprs[time_column_index].clone());
             }
 
-            // Exclude tags that do not exist in the current table schema.
-            group_by_exprs.extend(group_by_tag_set.iter().filter_map(|name| {
-                if schemas.is_tag_field(name) {
-                    Some(name.as_expr())
-                } else {
-                    None
-                }
-            }));
+            if let DataSource::Table(table_name) = ds {
+                // If this is a table, exclude tags that do not exist in the current schema
+                let schema = self
+                    .s
+                    .table_schema(table_name)
+                    .ok_or_else(|| error::map::internal("expected table"))?;
+                group_by_exprs.extend(group_by_tag_set.iter().filter_map(|name| {
+                    if let Some(InfluxColumnType::Tag) = schema.field_type_by_name(name) {
+                        Some(name.as_expr())
+                    } else {
+                        None
+                    }
+                }));
+            } else {
+                group_by_exprs.extend(group_by_tag_set.iter().map(|name| name.as_expr()));
+            }
 
             group_by_exprs
         } else {
diff --git a/iox_query_influxql/src/plan/util.rs b/iox_query_influxql/src/plan/util.rs
index afd0d507d1..4c2eb3c8f6 100644
--- a/iox_query_influxql/src/plan/util.rs
+++ b/iox_query_influxql/src/plan/util.rs
@@ -35,14 +35,6 @@ impl Schemas {
             df_schema: Arc::clone(df_schema),
         })
     }
-
-    /// Returns `true` if the field `name` is a tag type.
-    pub(super) fn is_tag_field(&self, name: &str) -> bool {
-        self.df_schema
-            .fields()
-            .iter()
-            .any(|f| f.name() == name && matches!(f.data_type(), DataType::Dictionary(..)))
-    }
 }
 
 /// Sanitize an InfluxQL regular expression and create a compiled [`regex::Regex`].

From da6e123e9d8a929aa0d99dad7f25f6cb4ebc5fd0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 5 May 2023 13:29:50 +0000
Subject: [PATCH 041/119] chore(deps): Bump serde from 1.0.160 to 1.0.162
 (#7751)

Bumps [serde](https://github.com/serde-rs/serde) from 1.0.160 to 1.0.162.
- [Release notes](https://github.com/serde-rs/serde/releases)
- [Commits](https://github.com/serde-rs/serde/compare/v1.0.160...1.0.162)

---
updated-dependencies:
- dependency-name: serde
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 Cargo.lock              | 8 ++++----
 influxdb_iox/Cargo.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 477b3d4cf4..1d0ecba612 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4940,18 +4940,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
 
 [[package]]
 name = "serde"
-version = "1.0.160"
+version = "1.0.162"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c"
+checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.160"
+version = "1.0.162"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df"
+checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 20b985ddb0..022b7078fc 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -85,7 +85,7 @@ assert_matches = "1.5"
 async-trait = "0.1"
 predicate = { path = "../predicate" }
 predicates = "3.0.3"
-serde = "1.0.159"
+serde = "1.0.162"
 test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
 test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
 insta = { version = "1", features = ["yaml"] }

From aa70b14fe4bf648b5f3ac118d854286f706ee5ed Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 5 May 2023 15:47:26 +0000
Subject: [PATCH 042/119] chore(deps): Bump rustix from 0.37.18 to 0.37.19
 (#7752)

Bumps [rustix](https://github.com/bytecodealliance/rustix) from 0.37.18 to 0.37.19.
- [Release notes](https://github.com/bytecodealliance/rustix/releases)
- [Commits](https://github.com/bytecodealliance/rustix/compare/v0.37.18...v0.37.19)

---
updated-dependencies:
- dependency-name: rustix
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1d0ecba612..5b0fa3e06f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4783,9 +4783,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.37.18"
+version = "0.37.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bbfc1d1c7c40c01715f47d71444744a81669ca84e8b63e25a55e169b1f86433"
+checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
 dependencies = [
  "bitflags",
  "errno",

From 2860d87fe1c946c6b6f37cfacb6e5f67b2508f05 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <alamb@influxdata.com>
Date: Fri, 5 May 2023 14:58:18 -0400
Subject: [PATCH 043/119] chore: Update DataFusion (#7756)

* chore: Update DataFusion pin

* chore: Update explain plans

* chore: Run cargo hakari tasks

---------

Co-authored-by: CircleCI[bot] <circleci@influxdata.com>
---
 Cargo.lock                                    | 18 +++----
 Cargo.toml                                    |  4 +-
 .../cases/in/date_bin.sql.expected            |  2 +-
 .../dedup_and_predicates_parquet.sql.expected | 10 ++--
 ...d_predicates_parquet_ingester.sql.expected | 10 ++--
 .../cases/in/duplicates_ingester.sql.expected | 16 +++----
 .../cases/in/duplicates_parquet.sql.expected  | 20 ++++----
 .../duplicates_parquet_50_files.sql.expected  |  2 +-
 .../in/duplicates_parquet_many.sql.expected   |  4 +-
 .../cases/in/gapfill.sql.expected             |  4 +-
 .../cases/in/issue_6112.influxql.expected     |  4 +-
 .../cases/in/pushdown.sql.expected            | 26 +++++-----
 .../cases/in/retention.sql.expected           |  4 +-
 .../cases/in/several_chunks.sql.expected      | 12 ++---
 .../cases/in/two_chunks.sql.expected          |  4 +-
 .../src/physical_optimizer/combine_chunks.rs  | 16 +++----
 .../dedup/dedup_null_columns.rs               | 16 +++----
 .../dedup/dedup_sort_order.rs                 | 48 +++++++++----------
 .../dedup/partition_split.rs                  | 10 ++--
 .../physical_optimizer/dedup/time_split.rs    | 10 ++--
 .../physical_optimizer/predicate_pushdown.rs  |  4 +-
 .../physical_optimizer/projection_pushdown.rs |  8 ++--
 .../sort/parquet_sortness.rs                  | 36 +++++++-------
 .../physical_optimizer/sort/redundant_sort.rs |  8 ++--
 iox_query/src/provider.rs                     | 32 ++++++-------
 iox_query/src/provider/physical.rs            | 12 ++---
 querier/src/namespace/query_access.rs         | 10 ++--
 workspace-hack/Cargo.toml                     |  6 +--
 28 files changed, 178 insertions(+), 178 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5b0fa3e06f..7dbe1c3937 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1432,7 +1432,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1481,7 +1481,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1495,7 +1495,7 @@ dependencies = [
 [[package]]
 name = "datafusion-execution"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "dashmap",
  "datafusion-common",
@@ -1512,7 +1512,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1523,7 +1523,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1540,7 +1540,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1572,7 +1572,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "arrow",
  "chrono",
@@ -1586,7 +1586,7 @@ dependencies = [
 [[package]]
 name = "datafusion-row"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1597,7 +1597,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
 dependencies = [
  "arrow",
  "arrow-schema",
diff --git a/Cargo.toml b/Cargo.toml
index ea50bde0f2..cb5bfe369d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -115,8 +115,8 @@ license = "MIT OR Apache-2.0"
 [workspace.dependencies]
 arrow = { version = "38.0.0" }
 arrow-flight = { version = "38.0.0" }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="2787e7a36a6be83d91201df20827d3695f933300", default-features = false }
-datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="2787e7a36a6be83d91201df20827d3695f933300" }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="06e9f53637f20dd91bef43b74942ec36c38c22d5", default-features = false }
+datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="06e9f53637f20dd91bef43b74942ec36c38c22d5" }
 hashbrown = { version = "0.13.2" }
 parquet = { version = "38.0.0" }
 tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] }
diff --git a/influxdb_iox/tests/query_tests2/cases/in/date_bin.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/date_bin.sql.expected
index 04bf54f4cd..f4ed971ead 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/date_bin.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/date_bin.sql.expected
@@ -114,6 +114,6 @@
 |    |           RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1    |
 |    |             CoalesceBatchesExec: target_batch_size=8192    |
 |    |               FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000    |
-|    |                 ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time@2 >= 957528000000000000 AND time@2 <= 957531540000000000, pruning_predicate=time_max@0 >= 957528000000000000 AND time_min@1 <= 957531540000000000, projection=[time, user]    |
+|    |                 ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[time, user], predicate=time@2 >= 957528000000000000 AND time@2 <= 957531540000000000, pruning_predicate=time_max@0 >= 957528000000000000 AND time_min@1 <= 957531540000000000    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql.expected
index e9bd4a74ad..aec20cdbab 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql.expected
@@ -17,7 +17,7 @@
 |    |   ProjectionExec: expr=[bar@1 as bar, foo@2 as foo, tag@3 as tag, time@4 as time]    |
 |    |     DeduplicateExec: [tag@3 ASC,time@4 ASC]    |
 |    |       SortPreservingMergeExec: [tag@3 ASC,time@4 ASC,__chunk_order@0 ASC]    |
-|    |         ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |         ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE tag='A';
@@ -37,7 +37,7 @@
 |    |     SortPreservingMergeExec: [tag@3 ASC,time@4 ASC,__chunk_order@0 ASC]    |
 |    |       CoalesceBatchesExec: target_batch_size=8192    |
 |    |         FilterExec: tag@3 = A    |
-|    |           ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=tag@2 = A, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |           ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], predicate=tag@2 = A, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2;
@@ -57,7 +57,7 @@
 |    |     ProjectionExec: expr=[bar@1 as bar, foo@2 as foo, tag@3 as tag, time@4 as time]    |
 |    |       DeduplicateExec: [tag@3 ASC,time@4 ASC]    |
 |    |         SortPreservingMergeExec: [tag@3 ASC,time@4 ASC,__chunk_order@0 ASC]    |
-|    |           ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |           ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag;
@@ -80,7 +80,7 @@
 |    |       SortPreservingMergeExec: [tag@3 ASC,time@4 ASC,__chunk_order@0 ASC]    |
 |    |         CoalesceBatchesExec: target_batch_size=8192    |
 |    |           FilterExec: time@4 = 0    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=time@3 = 0, pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], predicate=time@3 = 0, pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00');
@@ -102,6 +102,6 @@
 |    |         SortPreservingMergeExec: [tag@3 ASC,time@4 ASC,__chunk_order@0 ASC]    |
 |    |           CoalesceBatchesExec: target_batch_size=8192    |
 |    |             FilterExec: tag@3 = A AND time@4 = 0    |
-|    |               ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=tag@2 = A AND time@3 = 0, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |               ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], predicate=tag@2 = A AND time@3 = 0, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql.expected
index 77a4f28d59..eb5b686900 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql.expected
@@ -20,7 +20,7 @@
 |    |         UnionExec    |
 |    |           SortExec: expr=[tag@3 ASC,time@4 ASC,__chunk_order@0 ASC]    |
 |    |             RecordBatchesExec: batches_groups=1 batches=1 total_rows=2    |
-|    |           ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |           ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE tag='A';
@@ -46,7 +46,7 @@
 |    |                 RecordBatchesExec: batches_groups=1 batches=1 total_rows=2    |
 |    |         CoalesceBatchesExec: target_batch_size=8192    |
 |    |           FilterExec: tag@3 = A    |
-|    |             ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag@2 = A, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |             ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], predicate=tag@2 = A, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2;
@@ -69,7 +69,7 @@
 |    |           UnionExec    |
 |    |             SortExec: expr=[tag@3 ASC,time@4 ASC,__chunk_order@0 ASC]    |
 |    |               RecordBatchesExec: batches_groups=1 batches=1 total_rows=2    |
-|    |             ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |             ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag;
@@ -98,7 +98,7 @@
 |    |                   RecordBatchesExec: batches_groups=1 batches=1 total_rows=2    |
 |    |           CoalesceBatchesExec: target_batch_size=8192    |
 |    |             FilterExec: time@4 = 0    |
-|    |               ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time@3 = 0, pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |               ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], predicate=time@3 = 0, pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00');
@@ -126,6 +126,6 @@
 |    |                     RecordBatchesExec: batches_groups=1 batches=1 total_rows=2    |
 |    |             CoalesceBatchesExec: target_batch_size=8192    |
 |    |               FilterExec: tag@3 = A AND time@4 = 0    |
-|    |                 ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag@2 = A AND time@3 = 0, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, bar, foo, tag, time]    |
+|    |                 ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[__chunk_order, bar, foo, tag, time], output_ordering=[tag@3 ASC, time@4 ASC, __chunk_order@0 ASC], predicate=tag@2 = A AND time@3 = 0, pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql.expected
index f7650395f9..dc9fd64026 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql.expected
@@ -34,12 +34,12 @@
 | physical_plan    | SortPreservingMergeExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST]    |
 |    |   UnionExec    |
 |    |     SortExec: expr=[time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST]    |
-|    |       ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC], projection=[time, state, city, min_temp, max_temp, area]    |
+|    |       ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[time, state, city, min_temp, max_temp, area], output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC]    |
 |    |     SortExec: expr=[time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST]    |
 |    |       ProjectionExec: expr=[time@1 as time, state@2 as state, city@3 as city, min_temp@4 as min_temp, max_temp@5 as max_temp, area@6 as area]    |
 |    |         DeduplicateExec: [state@2 ASC,city@3 ASC,time@1 ASC]    |
 |    |           SortPreservingMergeExec: [state@2 ASC,city@3 ASC,time@1 ASC,__chunk_order@0 ASC]    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC], projection=[__chunk_order, time, state, city, min_temp, max_temp, area]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[__chunk_order, time, state, city, min_temp, max_temp, area], output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC]    |
 |    |     SortExec: expr=[time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST]    |
 |    |       ProjectionExec: expr=[time@1 as time, state@2 as state, city@3 as city, min_temp@4 as min_temp, max_temp@5 as max_temp, area@6 as area]    |
 |    |         DeduplicateExec: [city@3 ASC,state@2 ASC,time@1 ASC]    |
@@ -55,11 +55,11 @@
 | logical_plan    | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area    |
 |    |   TableScan: h2o projection=[area, city, max_temp, min_temp, state, time]    |
 | physical_plan    | UnionExec    |
-|    |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC], projection=[time, state, city, min_temp, max_temp, area]    |
+|    |   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[time, state, city, min_temp, max_temp, area], output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC]    |
 |    |   ProjectionExec: expr=[time@1 as time, state@2 as state, city@3 as city, min_temp@4 as min_temp, max_temp@5 as max_temp, area@6 as area]    |
 |    |     DeduplicateExec: [state@2 ASC,city@3 ASC,time@1 ASC]    |
 |    |       SortPreservingMergeExec: [state@2 ASC,city@3 ASC,time@1 ASC,__chunk_order@0 ASC]    |
-|    |         ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC], projection=[__chunk_order, time, state, city, min_temp, max_temp, area]    |
+|    |         ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[__chunk_order, time, state, city, min_temp, max_temp, area], output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC]    |
 |    |   ProjectionExec: expr=[time@1 as time, state@2 as state, city@3 as city, min_temp@4 as min_temp, max_temp@5 as max_temp, area@6 as area]    |
 |    |     DeduplicateExec: [city@3 ASC,state@2 ASC,time@1 ASC]    |
 |    |       SortExec: expr=[city@3 ASC,state@2 ASC,time@1 ASC,__chunk_order@0 ASC]    |
@@ -79,22 +79,22 @@
 | physical_plan    | UnionExec    |
 |    |   ProjectionExec: expr=[state@0 as name]    |
 |    |     UnionExec    |
-|    |       ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@0 ASC], projection=[state]    |
+|    |       ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[state], output_ordering=[state@0 ASC]    |
 |    |       ProjectionExec: expr=[state@2 as state]    |
 |    |         DeduplicateExec: [state@2 ASC,city@1 ASC,time@3 ASC]    |
 |    |           SortPreservingMergeExec: [state@2 ASC,city@1 ASC,time@3 ASC,__chunk_order@0 ASC]    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, state, time]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[__chunk_order, city, state, time], output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC]    |
 |    |       ProjectionExec: expr=[state@2 as state]    |
 |    |         DeduplicateExec: [city@1 ASC,state@2 ASC,time@3 ASC]    |
 |    |           SortExec: expr=[city@1 ASC,state@2 ASC,time@3 ASC,__chunk_order@0 ASC]    |
 |    |             RecordBatchesExec: batches_groups=1 batches=1 total_rows=6    |
 |    |   ProjectionExec: expr=[city@0 as name]    |
 |    |     UnionExec    |
-|    |       ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[city]    |
+|    |       ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[city]    |
 |    |       ProjectionExec: expr=[city@1 as city]    |
 |    |         DeduplicateExec: [state@2 ASC,city@1 ASC,time@3 ASC]    |
 |    |           SortPreservingMergeExec: [state@2 ASC,city@1 ASC,time@3 ASC,__chunk_order@0 ASC]    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, state, time]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[__chunk_order, city, state, time], output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC]    |
 |    |       ProjectionExec: expr=[city@1 as city]    |
 |    |         DeduplicateExec: [city@1 ASC,state@2 ASC,time@3 ASC]    |
 |    |           SortExec: expr=[city@1 ASC,state@2 ASC,time@3 ASC,__chunk_order@0 ASC]    |
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql.expected
index 1c9dc7421e..b8293f2939 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql.expected
@@ -34,12 +34,12 @@
 | physical_plan    | SortPreservingMergeExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST]    |
 |    |   UnionExec    |
 |    |     SortExec: expr=[time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST]    |
-|    |       ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC], projection=[time, state, city, min_temp, max_temp, area]    |
+|    |       ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[time, state, city, min_temp, max_temp, area], output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC]    |
 |    |     SortExec: expr=[time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST]    |
 |    |       ProjectionExec: expr=[time@1 as time, state@2 as state, city@3 as city, min_temp@4 as min_temp, max_temp@5 as max_temp, area@6 as area]    |
 |    |         DeduplicateExec: [state@2 ASC,city@3 ASC,time@1 ASC]    |
 |    |           SortPreservingMergeExec: [state@2 ASC,city@3 ASC,time@1 ASC,__chunk_order@0 ASC]    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC], projection=[__chunk_order, time, state, city, min_temp, max_temp, area]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, time, state, city, min_temp, max_temp, area], output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o;
@@ -50,11 +50,11 @@
 | logical_plan    | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area    |
 |    |   TableScan: h2o projection=[area, city, max_temp, min_temp, state, time]    |
 | physical_plan    | UnionExec    |
-|    |   ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC], projection=[time, state, city, min_temp, max_temp, area]    |
+|    |   ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[time, state, city, min_temp, max_temp, area], output_ordering=[state@1 ASC, city@2 ASC, time@0 ASC]    |
 |    |   ProjectionExec: expr=[time@1 as time, state@2 as state, city@3 as city, min_temp@4 as min_temp, max_temp@5 as max_temp, area@6 as area]    |
 |    |     DeduplicateExec: [state@2 ASC,city@3 ASC,time@1 ASC]    |
 |    |       SortPreservingMergeExec: [state@2 ASC,city@3 ASC,time@1 ASC,__chunk_order@0 ASC]    |
-|    |         ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC], projection=[__chunk_order, time, state, city, min_temp, max_temp, area]    |
+|    |         ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, time, state, city, min_temp, max_temp, area], output_ordering=[state@2 ASC, city@3 ASC, time@1 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o;
@@ -70,18 +70,18 @@
 | physical_plan    | UnionExec    |
 |    |   ProjectionExec: expr=[state@0 as name]    |
 |    |     UnionExec    |
-|    |       ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@0 ASC], projection=[state]    |
+|    |       ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[state], output_ordering=[state@0 ASC]    |
 |    |       ProjectionExec: expr=[state@2 as state]    |
 |    |         DeduplicateExec: [state@2 ASC,city@1 ASC,time@3 ASC]    |
 |    |           SortPreservingMergeExec: [state@2 ASC,city@1 ASC,time@3 ASC,__chunk_order@0 ASC]    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, state, time]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, city, state, time], output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC]    |
 |    |   ProjectionExec: expr=[city@0 as name]    |
 |    |     UnionExec    |
-|    |       ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[city]    |
+|    |       ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[city]    |
 |    |       ProjectionExec: expr=[city@1 as city]    |
 |    |         DeduplicateExec: [state@2 ASC,city@1 ASC,time@3 ASC]    |
 |    |           SortPreservingMergeExec: [state@2 ASC,city@1 ASC,time@3 ASC,__chunk_order@0 ASC]    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, state, time]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, city, state, time], output_ordering=[state@2 ASC, city@1 ASC, time@3 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: select count(*) from h2o;
@@ -100,12 +100,12 @@
 |    |   UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=10, spill_count=0, spilled_bytes=0]    |
 |    |     CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=5, spill_count=0, spilled_bytes=0]    |
 |    |       FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=5, spill_count=0, spilled_bytes=0]    |
-|    |         ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms]    |
+|    |         ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms]    |
 |    |     ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=5, spill_count=0, spilled_bytes=0]    |
 |    |       DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, num_dupes=2, output_rows=5, spill_count=0, spilled_bytes=0]    |
 |    |         SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0]    |
 |    |           CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0]    |
 |    |             FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0]    |
-|    |               ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], projection=[__chunk_order, area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=1106, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms]    |
+|    |               ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1106, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms]    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected
index 0e397532e6..87317eeddb 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected
@@ -20,6 +20,6 @@
 |    |           DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC]    |
 |    |             SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC]    |
 |    |               SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC]    |
-|    |                 ParquetExec: limit=None, partitions={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time]    |
+|    |                 ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time]    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql.expected
index 76fd2547a5..d4c59b4416 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql.expected
@@ -16,10 +16,10 @@
 |    |   CoalescePartitionsExec    |
 |    |     AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)]    |
 |    |       UnionExec    |
-|    |         ParquetExec: limit=None, partitions={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet], [1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet], [1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet]]}, projection=[f]    |
+|    |         ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet], [1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet], [1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet]]}, projection=[f]    |
 |    |         ProjectionExec: expr=[f@1 as f]    |
 |    |           DeduplicateExec: [tag@2 ASC,time@3 ASC]    |
 |    |             SortPreservingMergeExec: [tag@2 ASC,time@3 ASC,__chunk_order@0 ASC]    |
-|    |               ParquetExec: limit=None, partitions={10 groups: [[1/1/1/00000000-0000-0000-0000-00000000000a.parquet], [1/1/1/00000000-0000-0000-0000-00000000000b.parquet], [1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet], [1/1/1/00000000-0000-0000-0000-00000000000e.parquet], [1/1/1/00000000-0000-0000-0000-00000000000f.parquet], [1/1/1/00000000-0000-0000-0000-000000000010.parquet], [1/1/1/00000000-0000-0000-0000-000000000011.parquet], [1/1/1/00000000-0000-0000-0000-000000000012.parquet], [1/1/1/00000000-0000-0000-0000-000000000013.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC, __chunk_order@0 ASC], projection=[__chunk_order, f, tag, time]    |
+|    |               ParquetExec: file_groups={10 groups: [[1/1/1/00000000-0000-0000-0000-00000000000a.parquet], [1/1/1/00000000-0000-0000-0000-00000000000b.parquet], [1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet], [1/1/1/00000000-0000-0000-0000-00000000000e.parquet], [1/1/1/00000000-0000-0000-0000-00000000000f.parquet], [1/1/1/00000000-0000-0000-0000-000000000010.parquet], [1/1/1/00000000-0000-0000-0000-000000000011.parquet], [1/1/1/00000000-0000-0000-0000-000000000012.parquet], [1/1/1/00000000-0000-0000-0000-000000000013.parquet]]}, projection=[__chunk_order, f, tag, time], output_ordering=[tag@2 ASC, time@3 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
index c2e80ba884..3b33e02a8d 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
@@ -37,7 +37,7 @@
 |    |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1    |
 |    |                   CoalesceBatchesExec: target_batch_size=8192    |
 |    |                     FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000    |
-|    |                       ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time@2 >= 957528000000000000 AND time@2 <= 957531540000000000, pruning_predicate=time_max@0 >= 957528000000000000 AND time_min@1 <= 957531540000000000, projection=[time, user]    |
+|    |                       ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[time, user], predicate=time@2 >= 957528000000000000 AND time@2 <= 957531540000000000, pruning_predicate=time_max@0 >= 957528000000000000 AND time_min@1 <= 957531540000000000    |
 |    |    |
 ----------
 -- SQL: SELECT date_bin_gapfill(interval '10 minute', time) as minute, count(cpu.user) from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by minute;
@@ -130,7 +130,7 @@ Error during planning: gap-filling query is missing lower time bound
 |    |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1    |
 |    |                   CoalesceBatchesExec: target_batch_size=8192    |
 |    |                     FilterExec: time@1 >= 957528000000000000 AND time@1 <= 957531540000000000    |
-|    |                       ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time@2 >= 957528000000000000 AND time@2 <= 957531540000000000, pruning_predicate=time_max@0 >= 957528000000000000 AND time_min@1 <= 957531540000000000, output_ordering=[region@0 ASC, time@1 ASC], projection=[region, time, user]    |
+|    |                       ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[region, time, user], output_ordering=[region@0 ASC, time@1 ASC], predicate=time@2 >= 957528000000000000 AND time@2 <= 957531540000000000, pruning_predicate=time_max@0 >= 957528000000000000 AND time_min@1 <= 957531540000000000    |
 |    |    |
 ----------
 -- SQL: SELECT region, date_bin_gapfill(interval '5 minute', time) as minute, locf(min(cpu.user)) from cpu where time between timestamp '2000-05-05T12:15:00Z' and timestamp '2000-05-05T12:59:00Z' group by region, minute;
diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
index b8310946e3..5764200f57 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
@@ -927,14 +927,14 @@ name: physical_plan
              RepartitionExec: partitioning=Hash([Column { name: "tag0", index: 0 }], 4), input_partitions=4
                RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
                  AggregateExec: mode=Partial, gby=[tag0@1 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
-                   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[f64, tag0]
+                   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[f64, tag0]
        ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m1.f64)@1 as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
          AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)]
            CoalesceBatchesExec: target_batch_size=8192
              RepartitionExec: partitioning=Hash([Column { name: "tag0", index: 0 }], 4), input_partitions=4
                RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
                  AggregateExec: mode=Partial, gby=[tag0@1 as tag0], aggr=[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)], ordering_mode=FullyOrdered
-                   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag0@1 ASC], projection=[f64, tag0]
+                   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[f64, tag0], output_ordering=[tag0@1 ASC]
 -- InfluxQL: SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0, m1 GROUP BY tag0;
 name: m0
 tags: tag0=val00
diff --git a/influxdb_iox/tests/query_tests2/cases/in/pushdown.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/pushdown.sql.expected
index 50282e8bd0..258af2512c 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/pushdown.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/pushdown.sql.expected
@@ -18,7 +18,7 @@
 | plan_type    | plan    |
 ----------
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town]    |
-| physical_plan    | ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+| physical_plan    | ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC]    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where count > 200;
@@ -41,7 +41,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.count > UInt64(200)]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: count@0 > 200    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count@0 > 200, pruning_predicate=count_max@0 > 200, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=count@0 > 200, pruning_predicate=count_max@0 > 200    |
 |    |    |
 ----------
 -- SQL: EXPLAIN SELECT * from restaurant where count > 200.0;
@@ -52,7 +52,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[CAST(restaurant.count AS Float64) > Float64(200)]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: CAST(count@0 AS Float64) > 200    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=CAST(count@0 AS Float64) > 200, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=CAST(count@0 AS Float64) > 200    |
 |    |    |
 ----------
 -- SQL: EXPLAIN SELECT * from restaurant where system > 4.0;
@@ -63,7 +63,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.system > Float64(4)]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: system@1 > 4    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system@1 > 4, pruning_predicate=system_max@0 > 4, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=system@1 > 4, pruning_predicate=system_max@0 > 4    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury';
@@ -85,7 +85,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: count@0 > 200 AND town@3 != tewsbury    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count@0 > 200 AND town@3 != tewsbury, pruning_predicate=count_max@0 > 200 AND (town_min@1 != tewsbury OR tewsbury != town_max@2), output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=count@0 > 200 AND town@3 != tewsbury, pruning_predicate=count_max@0 > 200 AND (town_min@1 != tewsbury OR tewsbury != town_max@2)    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
@@ -106,7 +106,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: count@0 > 200 AND town@3 != tewsbury AND (system@1 = 5 OR town@3 = lawrence)    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count@0 > 200 AND town@3 != tewsbury AND (system@1 = 5 OR town@3 = lawrence), pruning_predicate=count_max@0 > 200 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND (system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2), output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=count@0 > 200 AND town@3 != tewsbury AND (system@1 = 5 OR town@3 = lawrence), pruning_predicate=count_max@0 > 200 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND (system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2)    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000;
@@ -126,7 +126,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: count@0 > 200 AND town@3 != tewsbury AND (system@1 = 5 OR town@3 = lawrence) AND count@0 < 40000    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count@0 > 200 AND town@3 != tewsbury AND (system@1 = 5 OR town@3 = lawrence) AND count@0 < 40000, pruning_predicate=count_max@0 > 200 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND (system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2) AND count_min@5 < 40000, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=count@0 > 200 AND town@3 != tewsbury AND (system@1 = 5 OR town@3 = lawrence) AND count@0 < 40000, pruning_predicate=count_max@0 > 200 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND (system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2) AND count_min@5 < 40000    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where count > 200  and count < 40000;
@@ -148,7 +148,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: count@0 > 200 AND count@0 < 40000    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count@0 > 200 AND count@0 < 40000, pruning_predicate=count_max@0 > 200 AND count_min@1 < 40000, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=count@0 > 200 AND count@0 < 40000, pruning_predicate=count_max@0 > 200 AND count_min@1 < 40000    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0;
@@ -171,7 +171,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: system@1 > 4 AND system@1 < 7    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system@1 > 4 AND system@1 < 7, pruning_predicate=system_max@0 > 4 AND system_min@1 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=system@1 > 4 AND system@1 < 7, pruning_predicate=system_max@0 > 4 AND system_min@1 < 7    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where system > 5.0 and system < 7.0;
@@ -191,7 +191,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: system@1 > 5 AND system@1 < 7    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system@1 > 5 AND system@1 < 7, pruning_predicate=system_max@0 > 5 AND system_min@1 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=system@1 > 5 AND system@1 < 7, pruning_predicate=system_max@0 > 5 AND system_min@1 < 7    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system;
@@ -210,7 +210,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system@1 > 5 AND town@3 != tewsbury AND 7 > system@1, pruning_predicate=system_max@0 > 5 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND system_min@3 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=system@1 > 5 AND town@3 != tewsbury AND 7 > system@1, pruning_predicate=system_max@0 > 5 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND system_min@3 < 7    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading');
@@ -228,7 +228,7 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND (count@0 = 632 OR town@3 = reading)    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND (count@0 = 632 OR town@3 = reading), pruning_predicate=system_max@0 > 5 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND system_min@3 < 7 AND (count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2), output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND (count@0 = 632 OR town@3 = reading), pruning_predicate=system_max@0 > 5 AND (town_min@1 != tewsbury OR tewsbury != town_max@2) AND system_min@3 < 7 AND (count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2)    |
 |    |    |
 ----------
 -- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00');
@@ -276,6 +276,6 @@
 | logical_plan    | TableScan: restaurant projection=[count, system, time, town], full_filters=[CAST(restaurant.town AS Utf8) AS restaurant.town LIKE Utf8("%foo%") OR CAST(restaurant.town AS Utf8) AS restaurant.town LIKE Utf8("%bar%") OR CAST(restaurant.town AS Utf8) AS restaurant.town LIKE Utf8("%baz%") AS influx_regex_match(restaurant.town,Utf8("foo|bar|baz")), CAST(restaurant.town AS Utf8) AS restaurant.town NOT LIKE Utf8("%one%") AND CAST(restaurant.town AS Utf8) AS restaurant.town NOT LIKE Utf8("%two%") AS influx_regex_not_match(restaurant.town,Utf8("one|two"))]    |
 | physical_plan    | CoalesceBatchesExec: target_batch_size=8192    |
 |    |   FilterExec: (CAST(town@3 AS Utf8) LIKE %foo% OR CAST(town@3 AS Utf8) LIKE %bar% OR CAST(town@3 AS Utf8) LIKE %baz%) AND CAST(town@3 AS Utf8) NOT LIKE %one% AND CAST(town@3 AS Utf8) NOT LIKE %two%    |
-|    |     ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=(CAST(town@3 AS Utf8) LIKE %foo% OR CAST(town@3 AS Utf8) LIKE %bar% OR CAST(town@3 AS Utf8) LIKE %baz%) AND CAST(town@3 AS Utf8) NOT LIKE %one% AND CAST(town@3 AS Utf8) NOT LIKE %two%, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town]    |
+|    |     ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town], output_ordering=[town@3 ASC, time@2 ASC], predicate=(CAST(town@3 AS Utf8) LIKE %foo% OR CAST(town@3 AS Utf8) LIKE %bar% OR CAST(town@3 AS Utf8) LIKE %baz%) AND CAST(town@3 AS Utf8) NOT LIKE %one% AND CAST(town@3 AS Utf8) NOT LIKE %two%    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/retention.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/retention.sql.expected
index 4b372beec1..7df1d1a843 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/retention.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/retention.sql.expected
@@ -28,7 +28,7 @@
 |    |                   RecordBatchesExec: batches_groups=1 batches=1 total_rows=2    |
 |    |           CoalesceBatchesExec: target_batch_size=8192    |
 |    |             FilterExec: time@3 < <REDACTED> OR time@3 > <REDACTED>    |
-|    |               ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=time@2 < <REDACTED> OR time@2 > <REDACTED>, pruning_predicate=time_min@0 < <REDACTED> OR time_max@1 > <REDACTED>, output_ordering=[host@1 ASC, time@3 ASC, __chunk_order@0 ASC], projection=[__chunk_order, host, load, time]    |
+|    |               ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[__chunk_order, host, load, time], output_ordering=[host@1 ASC, time@3 ASC, __chunk_order@0 ASC], predicate=time@2 < <REDACTED> OR time@2 > <REDACTED>, pruning_predicate=time_min@0 < <REDACTED> OR time_max@1 > <REDACTED>    |
 |    |    |
 ----------
 -- SQL: SELECT * FROM cpu WHERE host != 'b' ORDER BY host,time;
@@ -59,6 +59,6 @@
 |    |                   RecordBatchesExec: batches_groups=1 batches=1 total_rows=2    |
 |    |           CoalesceBatchesExec: target_batch_size=8192    |
 |    |             FilterExec: host@1 != b AND (time@3 < <REDACTED> OR time@3 > <REDACTED>)    |
-|    |               ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=host@0 != b AND (time@2 < <REDACTED> OR time@2 > <REDACTED>), pruning_predicate=(host_min@0 != b OR b != host_max@1) AND (time_min@2 < <REDACTED> OR time_max@3 > <REDACTED>), output_ordering=[host@1 ASC, time@3 ASC, __chunk_order@0 ASC], projection=[__chunk_order, host, load, time]    |
+|    |               ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[__chunk_order, host, load, time], output_ordering=[host@1 ASC, time@3 ASC, __chunk_order@0 ASC], predicate=host@0 != b AND (time@2 < <REDACTED> OR time@2 > <REDACTED>), pruning_predicate=(host_min@0 != b OR b != host_max@1) AND (time_min@2 < <REDACTED> OR time_max@3 > <REDACTED>)    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql.expected
index fcb26f6ae9..1568a7ee73 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql.expected
@@ -19,11 +19,11 @@
 ----------
 | logical_plan    | TableScan: h2o projection=[city, other_temp, state, temp, time]    |
 | physical_plan    | UnionExec    |
-|    |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time]    |
+|    |   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[city, other_temp, state, temp, time], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC]    |
 |    |   ProjectionExec: expr=[city@1 as city, other_temp@2 as other_temp, state@3 as state, temp@4 as temp, time@5 as time]    |
 |    |     DeduplicateExec: [city@1 ASC,state@3 ASC,time@5 ASC]    |
 |    |       SortPreservingMergeExec: [city@1 ASC,state@3 ASC,time@5 ASC,__chunk_order@0 ASC]    |
-|    |         ParquetExec: limit=None, partitions={3 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[city@1 ASC, state@3 ASC, time@5 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, other_temp, state, temp, time]    |
+|    |         ParquetExec: file_groups={3 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, city, other_temp, state, temp, time], output_ordering=[city@1 ASC, state@3 ASC, time@5 ASC, __chunk_order@0 ASC]    |
 |    |   ProjectionExec: expr=[city@1 as city, other_temp@2 as other_temp, state@3 as state, temp@4 as temp, time@5 as time]    |
 |    |     DeduplicateExec: [city@1 ASC,state@3 ASC,time@5 ASC]    |
 |    |       SortExec: expr=[city@1 ASC,state@3 ASC,time@5 ASC,__chunk_order@0 ASC]    |
@@ -51,11 +51,11 @@
 | logical_plan    | Projection: h2o.temp, h2o.other_temp, h2o.time    |
 |    |   TableScan: h2o projection=[other_temp, temp, time]    |
 | physical_plan    | UnionExec    |
-|    |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[temp, other_temp, time]    |
+|    |   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[temp, other_temp, time]    |
 |    |   ProjectionExec: expr=[temp@3 as temp, other_temp@4 as other_temp, time@5 as time]    |
 |    |     DeduplicateExec: [city@1 ASC,state@2 ASC,time@5 ASC]    |
 |    |       SortPreservingMergeExec: [city@1 ASC,state@2 ASC,time@5 ASC,__chunk_order@0 ASC]    |
-|    |         ParquetExec: limit=None, partitions={3 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[city@1 ASC, state@2 ASC, time@5 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, state, temp, other_temp, time]    |
+|    |         ParquetExec: file_groups={3 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, city, state, temp, other_temp, time], output_ordering=[city@1 ASC, state@2 ASC, time@5 ASC, __chunk_order@0 ASC]    |
 |    |   ProjectionExec: expr=[temp@3 as temp, other_temp@4 as other_temp, time@5 as time]    |
 |    |     DeduplicateExec: [city@1 ASC,state@2 ASC,time@5 ASC]    |
 |    |       SortExec: expr=[city@1 ASC,state@2 ASC,time@5 ASC,__chunk_order@0 ASC]    |
@@ -71,13 +71,13 @@
 | physical_plan    | UnionExec    |
 |    |   CoalesceBatchesExec: target_batch_size=8192    |
 |    |     FilterExec: time@4 >= 250    |
-|    |       ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time@4 >= 250, pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time]    |
+|    |       ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[city, other_temp, state, temp, time], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], predicate=time@4 >= 250, pruning_predicate=time_max@0 >= 250    |
 |    |   ProjectionExec: expr=[city@1 as city, other_temp@2 as other_temp, state@3 as state, temp@4 as temp, time@5 as time]    |
 |    |     DeduplicateExec: [city@1 ASC,state@3 ASC,time@5 ASC]    |
 |    |       SortPreservingMergeExec: [city@1 ASC,state@3 ASC,time@5 ASC,__chunk_order@0 ASC]    |
 |    |         CoalesceBatchesExec: target_batch_size=8192    |
 |    |           FilterExec: time@5 >= 250    |
-|    |             ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, predicate=time@4 >= 250, pruning_predicate=time_max@0 >= 250, output_ordering=[city@1 ASC, state@3 ASC, time@5 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, other_temp, state, temp, time]    |
+|    |             ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet], [1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[__chunk_order, city, other_temp, state, temp, time], output_ordering=[city@1 ASC, state@3 ASC, time@5 ASC, __chunk_order@0 ASC], predicate=time@4 >= 250, pruning_predicate=time_max@0 >= 250    |
 |    |   ProjectionExec: expr=[city@1 as city, other_temp@2 as other_temp, state@3 as state, temp@4 as temp, time@5 as time]    |
 |    |     DeduplicateExec: [city@1 ASC,state@3 ASC,time@5 ASC]    |
 |    |       SortExec: expr=[city@1 ASC,state@3 ASC,time@5 ASC,__chunk_order@0 ASC]    |
diff --git a/influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql.expected
index 39dcbdfcd0..b9db74230c 100644
--- a/influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql.expected
@@ -20,7 +20,7 @@
 |    |       UnionExec    |
 |    |         SortExec: expr=[city@1 ASC,state@3 ASC,time@5 ASC,__chunk_order@0 ASC]    |
 |    |           RecordBatchesExec: batches_groups=1 batches=1 total_rows=1    |
-|    |         ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@1 ASC, state@3 ASC, time@5 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, other_temp, state, temp, time]    |
+|    |         ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[__chunk_order, city, other_temp, state, temp, time], output_ordering=[city@1 ASC, state@3 ASC, time@5 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
 -- SQL: select temp, other_temp, time from h2o;
@@ -44,6 +44,6 @@
 |    |       UnionExec    |
 |    |         SortExec: expr=[city@1 ASC,state@2 ASC,time@5 ASC,__chunk_order@0 ASC]    |
 |    |           RecordBatchesExec: batches_groups=1 batches=1 total_rows=1    |
-|    |         ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@1 ASC, state@2 ASC, time@5 ASC, __chunk_order@0 ASC], projection=[__chunk_order, city, state, temp, other_temp, time]    |
+|    |         ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[__chunk_order, city, state, temp, other_temp, time], output_ordering=[city@1 ASC, state@2 ASC, time@5 ASC, __chunk_order@0 ASC]    |
 |    |    |
 ----------
\ No newline at end of file
diff --git a/iox_query/src/physical_optimizer/combine_chunks.rs b/iox_query/src/physical_optimizer/combine_chunks.rs
index 00b6d7377e..971f986e8c 100644
--- a/iox_query/src/physical_optimizer/combine_chunks.rs
+++ b/iox_query/src/physical_optimizer/combine_chunks.rs
@@ -115,15 +115,15 @@ mod tests {
           - " UnionExec"
           - "   UnionExec"
           - "     RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-          - "     ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, projection=[]"
+          - "     ParquetExec: file_groups={1 group: [[2.parquet]]}"
           - "   UnionExec"
           - "     RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[4.parquet], [5.parquet]]}, projection=[]"
+          - "     ParquetExec: file_groups={2 groups: [[4.parquet], [5.parquet]]}"
         output:
           Ok:
             - " UnionExec"
             - "   RecordBatchesExec: batches_groups=2 batches=0 total_rows=0"
-            - "   ParquetExec: limit=None, partitions={2 groups: [[2.parquet, 5.parquet], [4.parquet]]}, projection=[]"
+            - "   ParquetExec: file_groups={2 groups: [[2.parquet, 5.parquet], [4.parquet]]}"
         "###
         );
     }
@@ -153,19 +153,19 @@ mod tests {
         input:
           - " UnionExec"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}"
           - "   FilterExec: false"
           - "     UnionExec"
-          - "       ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[]"
+          - "       ParquetExec: file_groups={1 group: [[1.parquet]]}"
         output:
           Ok:
             - " UnionExec"
-            - "   ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [1.parquet]]}, projection=[]"
+            - "   ParquetExec: file_groups={2 groups: [[1.parquet], [1.parquet]]}"
             - "   FilterExec: false"
             - "     UnionExec"
-            - "       ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[]"
+            - "       ParquetExec: file_groups={1 group: [[1.parquet]]}"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/dedup/dedup_null_columns.rs b/iox_query/src/physical_optimizer/dedup/dedup_null_columns.rs
index 11107ad82b..cc13bc06a0 100644
--- a/iox_query/src/physical_optimizer/dedup/dedup_null_columns.rs
+++ b/iox_query/src/physical_optimizer/dedup/dedup_null_columns.rs
@@ -140,12 +140,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
@@ -163,12 +163,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
     }
@@ -195,12 +195,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,zzz@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[tag1, tag2, zzz, time]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[tag1, tag2, zzz, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@0 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[tag1, tag2, zzz, time]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[tag1, tag2, zzz, time]"
         "###
         );
     }
@@ -236,12 +236,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,tag3@2 ASC,tag4@3 ASC,time@4 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, projection=[tag1, tag2, tag3, tag4, time]"
+          - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[tag1, tag2, tag3, tag4, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,tag3@2 ASC,time@4 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, projection=[tag1, tag2, tag3, tag4, time]"
+            - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[tag1, tag2, tag3, tag4, time]"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/dedup/dedup_sort_order.rs b/iox_query/src/physical_optimizer/dedup/dedup_sort_order.rs
index da70c6f8bd..a177b1c144 100644
--- a/iox_query/src/physical_optimizer/dedup/dedup_sort_order.rs
+++ b/iox_query/src/physical_optimizer/dedup/dedup_sort_order.rs
@@ -212,12 +212,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
@@ -241,12 +241,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC], projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time], output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@2 ASC,tag1@1 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC], projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time], output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC]"
         "###
         );
     }
@@ -270,12 +270,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC, __chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC, __chunk_order@4 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@2 ASC,tag1@1 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC, __chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC, __chunk_order@4 ASC]"
         "###
         );
     }
@@ -299,12 +299,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[time@3 ASC, tag1@1 ASC, tag2@2 ASC], projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time], output_ordering=[time@3 ASC, tag1@1 ASC, tag2@2 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [time@3 ASC,tag1@1 ASC,tag2@2 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[time@3 ASC, tag1@1 ASC, tag2@2 ASC], projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time], output_ordering=[time@3 ASC, tag1@1 ASC, tag2@2 ASC]"
         "###
         );
     }
@@ -328,12 +328,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,zzz@4 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[tag2@2 ASC, tag1@1 ASC], projection=[field, tag1, tag2, time, zzz]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time, zzz], output_ordering=[tag2@2 ASC, tag1@1 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@2 ASC,tag1@1 ASC,zzz@4 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time, zzz]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[field, tag1, tag2, time, zzz]"
         "###
         );
     }
@@ -361,12 +361,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,zzz@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[tag1@0 ASC], projection=[tag1, tag2, zzz, time]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[tag1, tag2, zzz, time], output_ordering=[tag1@0 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,zzz@2 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, output_ordering=[tag1@0 ASC, tag2@1 ASC, zzz@2 ASC, time@3 ASC], projection=[tag1, tag2, zzz, time]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet]]}, projection=[tag1, tag2, zzz, time], output_ordering=[tag1@0 ASC, tag2@1 ASC, zzz@2 ASC, time@3 ASC]"
         "###
         );
     }
@@ -397,12 +397,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
@@ -432,12 +432,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC], projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time], output_ordering=[tag2@2 ASC, tag1@1 ASC, time@3 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@2 ASC,tag1@1 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
@@ -462,12 +462,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@2 ASC,tag1@1 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
@@ -519,12 +519,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,tag3@2 ASC,time@3 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet, 3.parquet], [2.parquet]]}, projection=[tag1, tag2, tag3, time]"
+          - "     ParquetExec: file_groups={2 groups: [[1.parquet, 3.parquet], [2.parquet]]}, projection=[tag1, tag2, tag3, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@1 ASC,tag3@2 ASC,tag1@0 ASC,time@3 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={3 groups: [[1.parquet], [3.parquet], [2.parquet]]}, output_ordering=[tag2@1 ASC, tag3@2 ASC, tag1@0 ASC, time@3 ASC], projection=[tag1, tag2, tag3, time]"
+            - "     ParquetExec: file_groups={3 groups: [[1.parquet], [3.parquet], [2.parquet]]}, projection=[tag1, tag2, tag3, time], output_ordering=[tag2@1 ASC, tag3@2 ASC, tag1@0 ASC, time@3 ASC]"
         "###
         );
     }
@@ -570,12 +570,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,time@2 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet, 3.parquet], [2.parquet]]}, output_ordering=[tag2@1 ASC, tag1@0 ASC, time@2 ASC], projection=[tag1, tag2, time]"
+          - "     ParquetExec: file_groups={2 groups: [[1.parquet, 3.parquet], [2.parquet]]}, projection=[tag1, tag2, time], output_ordering=[tag2@1 ASC, tag1@0 ASC, time@2 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@1 ASC,tag1@0 ASC,time@2 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={3 groups: [[1.parquet], [3.parquet], [2.parquet]]}, output_ordering=[tag2@1 ASC, tag1@0 ASC, time@2 ASC], projection=[tag1, tag2, time]"
+            - "     ParquetExec: file_groups={3 groups: [[1.parquet], [3.parquet], [2.parquet]]}, projection=[tag1, tag2, time], output_ordering=[tag2@1 ASC, tag1@0 ASC, time@2 ASC]"
         "###
         );
     }
@@ -623,12 +623,12 @@ mod tests {
         input:
           - " DeduplicateExec: [tag1@0 ASC,tag2@1 ASC,time@2 ASC]"
           - "   UnionExec"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[1.parquet, 3.parquet], [2.parquet]]}, output_ordering=[tag2@1 ASC, tag1@0 ASC, time@2 ASC], projection=[tag1, tag2, time]"
+          - "     ParquetExec: file_groups={2 groups: [[1.parquet, 3.parquet], [2.parquet]]}, projection=[tag1, tag2, time], output_ordering=[tag2@1 ASC, tag1@0 ASC, time@2 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [tag2@1 ASC,tag1@0 ASC,time@2 ASC]"
             - "   UnionExec"
-            - "     ParquetExec: limit=None, partitions={3 groups: [[1.parquet], [3.parquet], [2.parquet]]}, projection=[tag1, tag2, time]"
+            - "     ParquetExec: file_groups={3 groups: [[1.parquet], [3.parquet], [2.parquet]]}, projection=[tag1, tag2, time]"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/dedup/partition_split.rs b/iox_query/src/physical_optimizer/dedup/partition_split.rs
index bc4c3759ac..ea293c109a 100644
--- a/iox_query/src/physical_optimizer/dedup/partition_split.rs
+++ b/iox_query/src/physical_optimizer/dedup/partition_split.rs
@@ -157,13 +157,13 @@ mod tests {
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
           - "     RecordBatchesExec: batches_groups=2 batches=0 total_rows=0"
-          - "     ParquetExec: limit=None, partitions={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "   UnionExec"
             - "     RecordBatchesExec: batches_groups=2 batches=0 total_rows=0"
-            - "     ParquetExec: limit=None, partitions={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
@@ -190,18 +190,18 @@ mod tests {
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
           - "     RecordBatchesExec: batches_groups=2 batches=0 total_rows=0"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[3.parquet, 5.parquet], [4.parquet, 6.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={2 groups: [[3.parquet, 5.parquet], [4.parquet, 6.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " UnionExec"
             - "   DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "     UnionExec"
             - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-            - "       ParquetExec: limit=None, partitions={2 groups: [[3.parquet, 6.parquet], [5.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "       ParquetExec: file_groups={2 groups: [[3.parquet, 6.parquet], [5.parquet]]}, projection=[field, tag1, tag2, time]"
             - "   DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "     UnionExec"
             - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-            - "       ParquetExec: limit=None, partitions={1 group: [[4.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "       ParquetExec: file_groups={1 group: [[4.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/dedup/time_split.rs b/iox_query/src/physical_optimizer/dedup/time_split.rs
index 1ab3cf1e40..cbf71670cc 100644
--- a/iox_query/src/physical_optimizer/dedup/time_split.rs
+++ b/iox_query/src/physical_optimizer/dedup/time_split.rs
@@ -146,13 +146,13 @@ mod tests {
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
           - "     RecordBatchesExec: batches_groups=2 batches=0 total_rows=0"
-          - "     ParquetExec: limit=None, partitions={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "   UnionExec"
             - "     RecordBatchesExec: batches_groups=2 batches=0 total_rows=0"
-            - "     ParquetExec: limit=None, partitions={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "     ParquetExec: file_groups={1 group: [[3.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
@@ -187,18 +187,18 @@ mod tests {
           - " DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
           - "   UnionExec"
           - "     RecordBatchesExec: batches_groups=2 batches=0 total_rows=0"
-          - "     ParquetExec: limit=None, partitions={2 groups: [[3.parquet, 5.parquet], [4.parquet, 6.parquet]]}, projection=[field, tag1, tag2, time]"
+          - "     ParquetExec: file_groups={2 groups: [[3.parquet, 5.parquet], [4.parquet, 6.parquet]]}, projection=[field, tag1, tag2, time]"
         output:
           Ok:
             - " UnionExec"
             - "   DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "     UnionExec"
             - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-            - "       ParquetExec: limit=None, partitions={2 groups: [[6.parquet, 5.parquet], [3.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "       ParquetExec: file_groups={2 groups: [[6.parquet, 5.parquet], [3.parquet]]}, projection=[field, tag1, tag2, time]"
             - "   DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
             - "     UnionExec"
             - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-            - "       ParquetExec: limit=None, partitions={1 group: [[4.parquet]]}, projection=[field, tag1, tag2, time]"
+            - "       ParquetExec: file_groups={1 group: [[4.parquet]]}, projection=[field, tag1, tag2, time]"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/predicate_pushdown.rs b/iox_query/src/physical_optimizer/predicate_pushdown.rs
index 2da2ec9869..d55eea9ffa 100644
--- a/iox_query/src/physical_optimizer/predicate_pushdown.rs
+++ b/iox_query/src/physical_optimizer/predicate_pushdown.rs
@@ -335,11 +335,11 @@ mod tests {
         ---
         input:
           - " FilterExec: tag1@0 = field@2"
-          - "   ParquetExec: limit=None, partitions={0 groups: []}, predicate=tag1@0 = foo, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1, projection=[tag1, tag2, field]"
+          - "   ParquetExec: file_groups={0 groups: []}, projection=[tag1, tag2, field], predicate=tag1@0 = foo, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1"
         output:
           Ok:
             - " FilterExec: tag1@0 = field@2"
-            - "   ParquetExec: limit=None, partitions={0 groups: []}, predicate=tag1@0 = foo AND tag1@0 = field@2, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1, projection=[tag1, tag2, field]"
+            - "   ParquetExec: file_groups={0 groups: []}, projection=[tag1, tag2, field], predicate=tag1@0 = foo AND tag1@0 = field@2, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/projection_pushdown.rs b/iox_query/src/physical_optimizer/projection_pushdown.rs
index 002c83b3a8..9e608eceee 100644
--- a/iox_query/src/physical_optimizer/projection_pushdown.rs
+++ b/iox_query/src/physical_optimizer/projection_pushdown.rs
@@ -764,10 +764,10 @@ mod tests {
         ---
         input:
           - " ProjectionExec: expr=[tag2@2 as tag2, tag3@1 as tag3]"
-          - "   ParquetExec: limit=None, partitions={0 groups: []}, predicate=tag1@0 = foo, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1, output_ordering=[tag3@1 ASC, field@0 ASC, tag2@2 ASC], projection=[field, tag3, tag2]"
+          - "   ParquetExec: file_groups={0 groups: []}, projection=[field, tag3, tag2], output_ordering=[tag3@1 ASC, field@0 ASC, tag2@2 ASC], predicate=tag1@0 = foo, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1"
         output:
           Ok:
-            - " ParquetExec: limit=None, partitions={0 groups: []}, predicate=tag1@0 = foo, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1, output_ordering=[tag3@1 ASC], projection=[tag2, tag3]"
+            - " ParquetExec: file_groups={0 groups: []}, projection=[tag2, tag3], output_ordering=[tag3@1 ASC], predicate=tag1@0 = foo, pruning_predicate=tag1_min@0 <= foo AND foo <= tag1_max@1"
         "###
         );
 
@@ -1369,7 +1369,7 @@ mod tests {
           - "   FilterExec: tag2@1 = foo"
           - "     DeduplicateExec: [tag1@0 ASC,tag2@1 ASC]"
           - "       UnionExec"
-          - "         ParquetExec: limit=None, partitions={0 groups: []}, projection=[tag1, tag2, field1, field2]"
+          - "         ParquetExec: file_groups={0 groups: []}, projection=[tag1, tag2, field1, field2]"
         output:
           Ok:
             - " ProjectionExec: expr=[field1@0 as field1]"
@@ -1377,7 +1377,7 @@ mod tests {
             - "     ProjectionExec: expr=[field1@0 as field1, tag2@2 as tag2]"
             - "       DeduplicateExec: [tag1@1 ASC,tag2@2 ASC]"
             - "         UnionExec"
-            - "           ParquetExec: limit=None, partitions={0 groups: []}, projection=[field1, tag1, tag2]"
+            - "           ParquetExec: file_groups={0 groups: []}, projection=[field1, tag1, tag2]"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/sort/parquet_sortness.rs b/iox_query/src/physical_optimizer/sort/parquet_sortness.rs
index e500237094..1bc70cd21c 100644
--- a/iox_query/src/physical_optimizer/sort/parquet_sortness.rs
+++ b/iox_query/src/physical_optimizer/sort/parquet_sortness.rs
@@ -207,11 +207,11 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+          - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         output:
           Ok:
             - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+            - "   ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         "###
         );
     }
@@ -243,11 +243,11 @@ mod tests {
         ---
         input:
           - " DeduplicateExec: [col2@1 ASC,col1@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC, __chunk_order@3 ASC], projection=[col1, col2, col3, __chunk_order]"
+          - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3, __chunk_order], output_ordering=[col2@1 ASC, col1@0 ASC, __chunk_order@3 ASC]"
         output:
           Ok:
             - " DeduplicateExec: [col2@1 ASC,col1@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC, __chunk_order@3 ASC], projection=[col1, col2, col3, __chunk_order]"
+            - "   ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[col1, col2, col3, __chunk_order], output_ordering=[col2@1 ASC, col1@0 ASC, __chunk_order@3 ASC]"
         "###
         );
     }
@@ -283,11 +283,11 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={2 groups: [[1.parquet, 2.parquet], [3.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+          - "   ParquetExec: file_groups={2 groups: [[1.parquet, 2.parquet], [3.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         output:
           Ok:
             - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={3 groups: [[1.parquet], [2.parquet], [3.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+            - "   ParquetExec: file_groups={3 groups: [[1.parquet], [2.parquet], [3.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         "###
         );
 
@@ -320,11 +320,11 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+          - "   ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         output:
           Ok:
             - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={2 groups: [[1.parquet], [2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+            - "   ParquetExec: file_groups={2 groups: [[1.parquet], [2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         "###
         );
     }
@@ -355,11 +355,11 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col1@0 ASC, col2@1 ASC], projection=[col1, col2, col3]"
+          - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col1@0 ASC, col2@1 ASC]"
         output:
           Ok:
             - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col1@0 ASC, col2@1 ASC], projection=[col1, col2, col3]"
+            - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col1@0 ASC, col2@1 ASC]"
         "###
         );
     }
@@ -390,11 +390,11 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3]"
+          - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3]"
         output:
           Ok:
             - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3]"
+            - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3]"
         "###
         );
     }
@@ -430,11 +430,11 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet, 3.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+          - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet, 3.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         output:
           Ok:
             - " SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet, 3.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+            - "   ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet, 3.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         "###
         );
     }
@@ -484,10 +484,10 @@ mod tests {
             @r###"
         ---
         input:
-          - " ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+          - " ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         output:
           Ok:
-            - " ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col2@1 ASC, col1@0 ASC], projection=[col1, col2, col3]"
+            - " ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col2@1 ASC, col1@0 ASC]"
         "###
         );
     }
@@ -519,12 +519,12 @@ mod tests {
         input:
           - " SortExec: fetch=42, expr=[col1@0 ASC,col2@1 ASC]"
           - "   SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-          - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col1@0 ASC, col2@1 ASC], projection=[col1, col2, col3]"
+          - "     ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col1@0 ASC, col2@1 ASC]"
         output:
           Ok:
             - " SortExec: fetch=42, expr=[col1@0 ASC,col2@1 ASC]"
             - "   SortExec: fetch=42, expr=[col2@1 ASC,col1@0 ASC]"
-            - "     ParquetExec: limit=None, partitions={1 group: [[1.parquet, 2.parquet]]}, output_ordering=[col1@0 ASC, col2@1 ASC], projection=[col1, col2, col3]"
+            - "     ParquetExec: file_groups={1 group: [[1.parquet, 2.parquet]]}, projection=[col1, col2, col3], output_ordering=[col1@0 ASC, col2@1 ASC]"
         "###
         );
     }
diff --git a/iox_query/src/physical_optimizer/sort/redundant_sort.rs b/iox_query/src/physical_optimizer/sort/redundant_sort.rs
index 32f380cb0a..9638748c00 100644
--- a/iox_query/src/physical_optimizer/sort/redundant_sort.rs
+++ b/iox_query/src/physical_optimizer/sort/redundant_sort.rs
@@ -85,11 +85,11 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=10, expr=[col@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={0 groups: []}, projection=[col]"
+          - "   ParquetExec: file_groups={0 groups: []}, projection=[col]"
         output:
           Ok:
             - " SortExec: fetch=10, expr=[col@0 ASC]"
-            - "   ParquetExec: limit=None, partitions={0 groups: []}, projection=[col]"
+            - "   ParquetExec: file_groups={0 groups: []}, projection=[col]"
         "###
         );
     }
@@ -121,10 +121,10 @@ mod tests {
         ---
         input:
           - " SortExec: fetch=10, expr=[col@0 ASC]"
-          - "   ParquetExec: limit=None, partitions={0 groups: []}, output_ordering=[col@0 ASC], projection=[col]"
+          - "   ParquetExec: file_groups={0 groups: []}, projection=[col], output_ordering=[col@0 ASC]"
         output:
           Ok:
-            - " ParquetExec: limit=None, partitions={0 groups: []}, output_ordering=[col@0 ASC], projection=[col]"
+            - " ParquetExec: file_groups={0 groups: []}, projection=[col], output_ordering=[col@0 ASC]"
         "###
         );
     }
diff --git a/iox_query/src/provider.rs b/iox_query/src/provider.rs
index 63e99b4d1d..7b398ac2b4 100644
--- a/iox_query/src/provider.rs
+++ b/iox_query/src/provider.rs
@@ -410,7 +410,7 @@ mod test {
         - "   DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "     UnionExec"
         - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "       ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "       ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -427,7 +427,7 @@ mod test {
         - "   DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "     UnionExec"
         - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "       ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "       ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -448,7 +448,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -462,7 +462,7 @@ mod test {
         - "   DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "     UnionExec"
         - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "       ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "       ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
     }
@@ -508,7 +508,7 @@ mod test {
         - " ProjectionExec: expr=[field@0 as field, tag1@1 as tag1, tag2@2 as tag2, time@3 as time]"
         - "   UnionExec"
         - "     RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "     ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "     ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -524,7 +524,7 @@ mod test {
         - " ProjectionExec: expr=[tag1@1 as tag1, time@3 as time]"
         - "   UnionExec"
         - "     RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "     ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "     ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -553,7 +553,7 @@ mod test {
         - "   FilterExec: false AND tag1@1 = CAST(foo AS Dictionary(Int32, Utf8))"
         - "     UnionExec"
         - "       RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "       ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "       ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -566,7 +566,7 @@ mod test {
         - " ProjectionExec: expr=[field@0 as field, tag1@1 as tag1, tag2@2 as tag2, time@3 as time]"
         - "   UnionExec"
         - "     RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "     ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "     ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
     }
@@ -614,7 +614,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -632,7 +632,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -654,7 +654,7 @@ mod test {
         - "       DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "         UnionExec"
         - "           RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "           ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "           ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -669,7 +669,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
     }
@@ -719,7 +719,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -737,7 +737,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -758,7 +758,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
 
@@ -773,7 +773,7 @@ mod test {
         - "     DeduplicateExec: [tag1@1 ASC,tag2@2 ASC,time@3 ASC]"
         - "       UnionExec"
         - "         RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "         ParquetExec: limit=None, partitions={1 group: [[2.parquet]]}, output_ordering=[__chunk_order@4 ASC], projection=[field, tag1, tag2, time, __chunk_order]"
+        - "         ParquetExec: file_groups={1 group: [[2.parquet]]}, projection=[field, tag1, tag2, time, __chunk_order], output_ordering=[__chunk_order@4 ASC]"
         "###
         );
     }
diff --git a/iox_query/src/provider/physical.rs b/iox_query/src/provider/physical.rs
index 3f085baac7..27b6125415 100644
--- a/iox_query/src/provider/physical.rs
+++ b/iox_query/src/provider/physical.rs
@@ -478,7 +478,7 @@ mod tests {
             @r###"
         ---
         - " UnionExec"
-        - "   ParquetExec: limit=None, partitions={1 group: [[0.parquet]]}, projection=[]"
+        - "   ParquetExec: file_groups={1 group: [[0.parquet]]}"
         "###
         );
     }
@@ -500,7 +500,7 @@ mod tests {
             @r###"
         ---
         - " UnionExec"
-        - "   ParquetExec: limit=None, partitions={2 groups: [[0.parquet, 2.parquet], [1.parquet]]}, projection=[]"
+        - "   ParquetExec: file_groups={2 groups: [[0.parquet, 2.parquet], [1.parquet]]}"
         "###
         );
     }
@@ -521,8 +521,8 @@ mod tests {
             @r###"
         ---
         - " UnionExec"
-        - "   ParquetExec: limit=None, partitions={1 group: [[0.parquet]]}, projection=[]"
-        - "   ParquetExec: limit=None, partitions={1 group: [[1.parquet]]}, projection=[]"
+        - "   ParquetExec: file_groups={1 group: [[0.parquet]]}"
+        - "   ParquetExec: file_groups={1 group: [[1.parquet]]}"
         "###
         );
     }
@@ -540,7 +540,7 @@ mod tests {
         ---
         - " UnionExec"
         - "   RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "   ParquetExec: limit=None, partitions={1 group: [[0.parquet]]}, projection=[]"
+        - "   ParquetExec: file_groups={1 group: [[0.parquet]]}"
         "###
         );
     }
@@ -569,7 +569,7 @@ mod tests {
         ---
         - " UnionExec"
         - "   RecordBatchesExec: batches_groups=1 batches=0 total_rows=0"
-        - "   ParquetExec: limit=None, partitions={1 group: [[0.parquet]]}, output_ordering=[__chunk_order@1 ASC], projection=[tag, __chunk_order]"
+        - "   ParquetExec: file_groups={1 group: [[0.parquet]]}, projection=[tag, __chunk_order], output_ordering=[__chunk_order@1 ASC]"
         "###
         );
     }
diff --git a/querier/src/namespace/query_access.rs b/querier/src/namespace/query_access.rs
index c6b1fd4d85..4335451afe 100644
--- a/querier/src/namespace/query_access.rs
+++ b/querier/src/namespace/query_access.rs
@@ -484,7 +484,7 @@ mod tests {
         - "| plan_type    | plan    |"
         - "----------"
         - "| logical_plan    | TableScan: cpu projection=[foo, host, load, time]    |"
-        - "| physical_plan    | ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet]]}, projection=[foo, host, load, time]    |"
+        - "| physical_plan    | ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet]]}, projection=[foo, host, load, time]    |"
         - "|    |    |"
         - "----------"
         "###
@@ -502,7 +502,7 @@ mod tests {
         - "| logical_plan    | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST    |"
         - "|    |   TableScan: mem projection=[host, perc, time]    |"
         - "| physical_plan    | SortExec: expr=[host@0 ASC NULLS LAST,time@2 ASC NULLS LAST]    |"
-        - "|    |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, perc, time]    |"
+        - "|    |   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[host, perc, time], output_ordering=[host@0 ASC, time@2 ASC]    |"
         - "|    |    |"
         - "----------"
         "###
@@ -552,12 +552,12 @@ mod tests {
         - "----------"
         - "| logical_plan    | TableScan: cpu projection=[foo, host, load, time]    |"
         - "| physical_plan    | UnionExec    |"
-        - "|    |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time]    |"
-        - "|    |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[foo, host, load, time]    |"
+        - "|    |   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[foo, host, load, time], output_ordering=[host@1 ASC, time@3 ASC]    |"
+        - "|    |   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[foo, host, load, time]    |"
         - "|    |   ProjectionExec: expr=[foo@1 as foo, host@2 as host, load@3 as load, time@4 as time]    |"
         - "|    |     DeduplicateExec: [host@2 ASC,time@4 ASC]    |"
         - "|    |       SortPreservingMergeExec: [host@2 ASC,time@4 ASC,__chunk_order@0 ASC]    |"
-        - "|    |         ParquetExec: limit=None, partitions={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000004.parquet], [1/1/1/00000000-0000-0000-0000-000000000005.parquet]]}, output_ordering=[host@2 ASC, time@4 ASC, __chunk_order@0 ASC], projection=[__chunk_order, foo, host, load, time]    |"
+        - "|    |         ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000004.parquet], [1/1/1/00000000-0000-0000-0000-000000000005.parquet]]}, projection=[__chunk_order, foo, host, load, time], output_ordering=[host@2 ASC, time@4 ASC, __chunk_order@0 ASC]    |"
         - "|    |    |"
         - "----------"
         "###
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 763595b17e..0d07e07fff 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -30,9 +30,9 @@ bytes = { version = "1" }
 chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
 crossbeam-utils = { version = "0.8" }
 crypto-common = { version = "0.1", default-features = false, features = ["std"] }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "06e9f53637f20dd91bef43b74942ec36c38c22d5" }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "06e9f53637f20dd91bef43b74942ec36c38c22d5", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "06e9f53637f20dd91bef43b74942ec36c38c22d5", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
 digest = { version = "0.10", features = ["mac", "std"] }
 either = { version = "1" }
 fixedbitset = { version = "0.4" }

From edf5b310e30f29f8f4e347e88cb6c67c9e166fd7 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 5 May 2023 14:44:01 -0400
Subject: [PATCH 044/119] fix: don't share Executor instances between tests

And remove some executor instances where they're not even being used
---
 ingester2/src/dml_sink/instrumentation.rs |  2 --
 ingester2/src/dml_sink/tracing.rs         |  2 --
 ingester2/src/persist/handle.rs           | 17 ++++++-----------
 ingester2/src/persist/mod.rs              |  7 +++----
 4 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/ingester2/src/dml_sink/instrumentation.rs b/ingester2/src/dml_sink/instrumentation.rs
index 28a4323269..3ddc33cbac 100644
--- a/ingester2/src/dml_sink/instrumentation.rs
+++ b/ingester2/src/dml_sink/instrumentation.rs
@@ -73,7 +73,6 @@ mod tests {
 
     use assert_matches::assert_matches;
     use data_types::{NamespaceId, PartitionId, PartitionKey, TableId};
-    use iox_query::exec::Executor;
     use lazy_static::lazy_static;
     use metric::Attributes;
 
@@ -92,7 +91,6 @@ mod tests {
     const NAMESPACE_NAME: &str = "platanos";
 
     lazy_static! {
-        static ref EXEC: Arc<Executor> = Arc::new(Executor::new_testing());
         static ref PARTITION_KEY: PartitionKey = PartitionKey::from("bananas");
         static ref NAMESPACE_NAME_LOADER: Arc<DeferredLoad<NamespaceName>> =
             Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
diff --git a/ingester2/src/dml_sink/tracing.rs b/ingester2/src/dml_sink/tracing.rs
index d7a2a96f18..fc72bb4fe7 100644
--- a/ingester2/src/dml_sink/tracing.rs
+++ b/ingester2/src/dml_sink/tracing.rs
@@ -59,7 +59,6 @@ mod tests {
     use assert_matches::assert_matches;
     use data_types::{NamespaceId, PartitionId, PartitionKey, TableId};
     use dml::DmlMeta;
-    use iox_query::exec::Executor;
     use lazy_static::lazy_static;
     use trace::{ctx::SpanContext, span::SpanStatus, RingBufferTraceCollector, TraceCollector};
 
@@ -79,7 +78,6 @@ mod tests {
     const NAMESPACE_NAME: &str = "platanos";
 
     lazy_static! {
-        static ref EXEC: Arc<Executor> = Arc::new(Executor::new_testing());
         static ref PARTITION_KEY: PartitionKey = PartitionKey::from("bananas");
         static ref NAMESPACE_NAME_LOADER: Arc<DeferredLoad<NamespaceName>> =
             Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
diff --git a/ingester2/src/persist/handle.rs b/ingester2/src/persist/handle.rs
index fd02ce7824..77c6f551cb 100644
--- a/ingester2/src/persist/handle.rs
+++ b/ingester2/src/persist/handle.rs
@@ -478,7 +478,6 @@ mod tests {
     use dml::DmlOperation;
     use futures::Future;
     use iox_catalog::mem::MemCatalog;
-    use lazy_static::lazy_static;
     use object_store::memory::InMemory;
     use parquet_file::storage::StorageId;
     use schema::sort::SortKey;
@@ -506,10 +505,6 @@ mod tests {
         },
     };
 
-    lazy_static! {
-        static ref EXEC: Arc<Executor> = Arc::new(Executor::new_testing());
-    }
-
     /// Construct a partition with the above constants, with the given sort key,
     /// and containing a single write.
     async fn new_partition(sort_key: SortKeyState) -> Arc<Mutex<PartitionData>> {
@@ -555,7 +550,7 @@ mod tests {
             1,
             2,
             Arc::new(IngestState::default()),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             catalog,
             Arc::new(MockCompletionObserver::default()),
@@ -631,7 +626,7 @@ mod tests {
             1,
             2,
             Arc::new(IngestState::default()),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             catalog,
             Arc::new(MockCompletionObserver::default()),
@@ -717,7 +712,7 @@ mod tests {
             1,
             2,
             Arc::new(IngestState::default()),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             catalog,
             Arc::new(MockCompletionObserver::default()),
@@ -803,7 +798,7 @@ mod tests {
             1,
             2,
             Arc::new(IngestState::default()),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             catalog,
             Arc::new(MockCompletionObserver::default()),
@@ -883,7 +878,7 @@ mod tests {
             1,
             1,
             Arc::clone(&ingest_state),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             catalog,
             NopObserver::default(),
@@ -951,7 +946,7 @@ mod tests {
             5,
             42,
             Arc::clone(&ingest_state),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             catalog,
             NopObserver::default(),
diff --git a/ingester2/src/persist/mod.rs b/ingester2/src/persist/mod.rs
index d841201197..835964ea77 100644
--- a/ingester2/src/persist/mod.rs
+++ b/ingester2/src/persist/mod.rs
@@ -42,6 +42,7 @@ mod tests {
         },
         dml_sink::DmlSink,
         ingest_state::IngestState,
+        persist::handle::PersistHandle,
         persist::{completion_observer::mock::MockCompletionObserver, queue::PersistQueue},
         test_util::{
             make_write_op, populate_catalog, ARBITRARY_NAMESPACE_NAME,
@@ -50,8 +51,6 @@ mod tests {
         },
     };
 
-    use super::handle::PersistHandle;
-
     lazy_static! {
         static ref EXEC: Arc<Executor> = Arc::new(Executor::new_testing());
     }
@@ -178,7 +177,7 @@ mod tests {
             1,
             2,
             Arc::clone(&ingest_state),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             Arc::clone(&catalog),
             Arc::clone(&completion_observer),
@@ -314,7 +313,7 @@ mod tests {
             1,
             2,
             Arc::clone(&ingest_state),
-            Arc::clone(&EXEC),
+            Arc::new(Executor::new_testing()),
             storage,
             Arc::clone(&catalog),
             Arc::clone(&completion_observer),

From 5378740424ecd020ea0105a8f4dd100b3ed2cd51 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 5 May 2023 15:09:40 -0400
Subject: [PATCH 045/119] fix: don't share DeferredLoad instances between tests

---
 ingester2/src/buffer_tree/namespace.rs        |  8 +--
 .../buffer_tree/partition/resolver/cache.rs   | 24 ++++----
 .../partition/resolver/coalesce.rs            | 26 ++++----
 .../buffer_tree/partition/resolver/trait.rs   | 22 ++++---
 ingester2/src/buffer_tree/root.rs             | 10 ++--
 ingester2/src/buffer_tree/table.rs            | 10 ++--
 ingester2/src/test_util.rs                    | 59 ++++++++++++-------
 7 files changed, 91 insertions(+), 68 deletions(-)

diff --git a/ingester2/src/buffer_tree/namespace.rs b/ingester2/src/buffer_tree/namespace.rs
index d51d75d6d7..07487afc0c 100644
--- a/ingester2/src/buffer_tree/namespace.rs
+++ b/ingester2/src/buffer_tree/namespace.rs
@@ -232,9 +232,9 @@ mod tests {
         },
         deferred_load,
         test_util::{
-            make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
-            ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME,
-            ARBITRARY_TABLE_NAME_PROVIDER, DEFER_NAMESPACE_NAME_1_MS,
+            defer_namespace_name_1_ms, make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
+            ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
+            ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
         },
     };
 
@@ -250,7 +250,7 @@ mod tests {
 
         let ns = NamespaceData::new(
             ARBITRARY_NAMESPACE_ID,
-            Arc::clone(&*DEFER_NAMESPACE_NAME_1_MS),
+            defer_namespace_name_1_ms(),
             Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
             partition_provider,
             Arc::new(MockPostWriteObserver::default()),
diff --git a/ingester2/src/buffer_tree/partition/resolver/cache.rs b/ingester2/src/buffer_tree/partition/resolver/cache.rs
index a4cf93e9d6..9f5a70c374 100644
--- a/ingester2/src/buffer_tree/partition/resolver/cache.rs
+++ b/ingester2/src/buffer_tree/partition/resolver/cache.rs
@@ -222,10 +222,10 @@ mod tests {
     use crate::{
         buffer_tree::partition::resolver::mock::MockPartitionProvider,
         test_util::{
-            arbitrary_partition, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
-            ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
-            ARBITRARY_PARTITION_KEY_STR, ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME,
-            DEFER_NAMESPACE_NAME_1_SEC, DEFER_TABLE_NAME_1_SEC,
+            arbitrary_partition, defer_namespace_name_1_sec, defer_table_name_1_sec,
+            PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
+            ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_PARTITION_KEY_STR,
+            ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME,
         },
     };
 
@@ -255,9 +255,9 @@ mod tests {
             .get_partition(
                 ARBITRARY_PARTITION_KEY.clone(),
                 ARBITRARY_NAMESPACE_ID,
-                Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+                defer_namespace_name_1_sec(),
                 ARBITRARY_TABLE_ID,
-                Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+                defer_table_name_1_sec(),
             )
             .await;
 
@@ -292,9 +292,9 @@ mod tests {
             .get_partition(
                 callers_partition_key.clone(),
                 ARBITRARY_NAMESPACE_ID,
-                Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+                defer_namespace_name_1_sec(),
                 ARBITRARY_TABLE_ID,
-                Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+                defer_table_name_1_sec(),
             )
             .await;
 
@@ -343,9 +343,9 @@ mod tests {
             .get_partition(
                 other_key,
                 ARBITRARY_NAMESPACE_ID,
-                Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+                defer_namespace_name_1_sec(),
                 ARBITRARY_TABLE_ID,
-                Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+                defer_table_name_1_sec(),
             )
             .await;
 
@@ -373,9 +373,9 @@ mod tests {
             .get_partition(
                 ARBITRARY_PARTITION_KEY.clone(),
                 ARBITRARY_NAMESPACE_ID,
-                Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+                defer_namespace_name_1_sec(),
                 other_table,
-                Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+                defer_table_name_1_sec(),
             )
             .await;
 
diff --git a/ingester2/src/buffer_tree/partition/resolver/coalesce.rs b/ingester2/src/buffer_tree/partition/resolver/coalesce.rs
index eca50cd5a7..4c8459fc60 100644
--- a/ingester2/src/buffer_tree/partition/resolver/coalesce.rs
+++ b/ingester2/src/buffer_tree/partition/resolver/coalesce.rs
@@ -281,8 +281,8 @@ mod tests {
     use crate::{
         buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
         test_util::{
-            PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY,
-            ARBITRARY_TABLE_ID, DEFER_NAMESPACE_NAME_1_SEC, DEFER_TABLE_NAME_1_SEC,
+            defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
+            ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
         },
     };
 
@@ -307,9 +307,9 @@ mod tests {
                 layer.get_partition(
                     ARBITRARY_PARTITION_KEY.clone(),
                     ARBITRARY_NAMESPACE_ID,
-                    Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+                    defer_namespace_name_1_sec(),
                     ARBITRARY_TABLE_ID,
-                    Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+                    defer_table_name_1_sec(),
                 )
             })
             .collect::<FuturesUnordered<_>>()
@@ -368,6 +368,8 @@ mod tests {
         use futures::Future;
 
         let data = PartitionDataBuilder::new().build();
+        let namespace_loader = defer_namespace_name_1_sec();
+        let table_name_loader = defer_table_name_1_sec();
 
         // Add a single instance of the partition - if more than one call is
         // made to the mock, it will panic.
@@ -381,16 +383,16 @@ mod tests {
         let pa_1 = layer.get_partition(
             ARBITRARY_PARTITION_KEY.clone(),
             ARBITRARY_NAMESPACE_ID,
-            Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+            Arc::clone(&namespace_loader),
             ARBITRARY_TABLE_ID,
-            Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+            Arc::clone(&table_name_loader),
         );
         let pa_2 = layer.get_partition(
             ARBITRARY_PARTITION_KEY.clone(),
             ARBITRARY_NAMESPACE_ID,
-            Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+            Arc::clone(&namespace_loader),
             ARBITRARY_TABLE_ID,
-            Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+            Arc::clone(&table_name_loader),
         );
 
         let waker = futures::task::noop_waker();
@@ -408,9 +410,9 @@ mod tests {
             .get_partition(
                 PartitionKey::from("orange you glad i didn't say bananas"),
                 ARBITRARY_NAMESPACE_ID,
-                Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+                namespace_loader,
                 ARBITRARY_TABLE_ID,
-                Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+                table_name_loader,
             )
             .with_timeout_panic(Duration::from_secs(5))
             .await;
@@ -478,9 +480,9 @@ mod tests {
         let fut = layer.get_partition(
             ARBITRARY_PARTITION_KEY.clone(),
             ARBITRARY_NAMESPACE_ID,
-            Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+            defer_namespace_name_1_sec(),
             ARBITRARY_TABLE_ID,
-            Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+            defer_table_name_1_sec(),
         );
 
         let waker = futures::task::noop_waker();
diff --git a/ingester2/src/buffer_tree/partition/resolver/trait.rs b/ingester2/src/buffer_tree/partition/resolver/trait.rs
index 9075b0ec71..158d9bdda2 100644
--- a/ingester2/src/buffer_tree/partition/resolver/trait.rs
+++ b/ingester2/src/buffer_tree/partition/resolver/trait.rs
@@ -61,15 +61,21 @@ mod tests {
     use crate::{
         buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
         test_util::{
-            PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID,
-            ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID, DEFER_NAMESPACE_NAME_1_SEC,
-            DEFER_TABLE_NAME_1_SEC,
+            defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
+            ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
+            ARBITRARY_TABLE_ID,
         },
     };
 
     #[tokio::test]
     async fn test_arc_impl() {
-        let data = PartitionDataBuilder::new().build();
+        let namespace_loader = defer_namespace_name_1_sec();
+        let table_name_loader = defer_table_name_1_sec();
+
+        let data = PartitionDataBuilder::new()
+            .with_table_name_loader(Arc::clone(&table_name_loader))
+            .with_namespace_loader(Arc::clone(&namespace_loader))
+            .build();
 
         let mock = Arc::new(MockPartitionProvider::default().with_partition(data));
 
@@ -77,20 +83,20 @@ mod tests {
             .get_partition(
                 ARBITRARY_PARTITION_KEY.clone(),
                 ARBITRARY_NAMESPACE_ID,
-                Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+                Arc::clone(&namespace_loader),
                 ARBITRARY_TABLE_ID,
-                Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+                Arc::clone(&table_name_loader),
             )
             .await;
         assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
         assert_eq!(got.lock().namespace_id(), ARBITRARY_NAMESPACE_ID);
         assert_eq!(
             got.lock().namespace_name().to_string(),
-            DEFER_NAMESPACE_NAME_1_SEC.to_string()
+            namespace_loader.to_string()
         );
         assert_eq!(
             got.lock().table_name().to_string(),
-            DEFER_TABLE_NAME_1_SEC.to_string()
+            table_name_loader.to_string()
         );
     }
 }
diff --git a/ingester2/src/buffer_tree/root.rs b/ingester2/src/buffer_tree/root.rs
index bc9db2dcee..e18a669038 100644
--- a/ingester2/src/buffer_tree/root.rs
+++ b/ingester2/src/buffer_tree/root.rs
@@ -246,9 +246,9 @@ mod tests {
         deferred_load::{self, DeferredLoad},
         query::partition_response::PartitionResponse,
         test_util::{
-            make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
-            ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME,
-            ARBITRARY_TABLE_NAME_PROVIDER, DEFER_NAMESPACE_NAME_1_MS,
+            defer_namespace_name_1_ms, make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
+            ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
+            ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
         },
     };
 
@@ -265,7 +265,7 @@ mod tests {
         // Init the namespace
         let ns = NamespaceData::new(
             ARBITRARY_NAMESPACE_ID,
-            Arc::clone(&*DEFER_NAMESPACE_NAME_1_MS),
+            defer_namespace_name_1_ms(),
             Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
             partition_provider,
             Arc::new(MockPostWriteObserver::default()),
@@ -703,7 +703,7 @@ mod tests {
                         .with_partition_id(PartitionId::new(2))
                         .with_partition_key(PartitionKey::from("p3"))
                         .with_table_id(TABLE2_ID)
-                        .with_table_name(Arc::new(DeferredLoad::new(
+                        .with_table_name_loader(Arc::new(DeferredLoad::new(
                             Duration::from_secs(1),
                             async move { TableName::from(TABLE2_NAME) },
                         )))
diff --git a/ingester2/src/buffer_tree/table.rs b/ingester2/src/buffer_tree/table.rs
index fe2f9272ed..3bf805a8be 100644
--- a/ingester2/src/buffer_tree/table.rs
+++ b/ingester2/src/buffer_tree/table.rs
@@ -267,9 +267,9 @@ mod tests {
             post_write::mock::MockPostWriteObserver,
         },
         test_util::{
-            PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY,
-            ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME, DEFER_NAMESPACE_NAME_1_SEC,
-            DEFER_TABLE_NAME_1_SEC,
+            defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
+            ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
+            ARBITRARY_TABLE_NAME,
         },
     };
 
@@ -282,9 +282,9 @@ mod tests {
 
         let table = TableData::new(
             ARBITRARY_TABLE_ID,
-            Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
+            defer_table_name_1_sec(),
             ARBITRARY_NAMESPACE_ID,
-            Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+            defer_namespace_name_1_sec(),
             partition_provider,
             Arc::new(MockPostWriteObserver::default()),
         );
diff --git a/ingester2/src/test_util.rs b/ingester2/src/test_util.rs
index 6e710bd06a..4eada1b052 100644
--- a/ingester2/src/test_util.rs
+++ b/ingester2/src/test_util.rs
@@ -27,30 +27,32 @@ pub(crate) const ARBITRARY_NAMESPACE_ID: NamespaceId = NamespaceId::new(3);
 pub(crate) const ARBITRARY_TABLE_ID: TableId = TableId::new(4);
 pub(crate) const ARBITRARY_PARTITION_KEY_STR: &str = "platanos";
 
+pub(crate) fn defer_namespace_name_1_sec() -> Arc<DeferredLoad<NamespaceName>> {
+    Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
+        ARBITRARY_NAMESPACE_NAME.clone()
+    }))
+}
+
+pub(crate) fn defer_namespace_name_1_ms() -> Arc<DeferredLoad<NamespaceName>> {
+    Arc::new(DeferredLoad::new(Duration::from_millis(1), async {
+        ARBITRARY_NAMESPACE_NAME.clone()
+    }))
+}
+
+pub(crate) fn defer_table_name_1_sec() -> Arc<DeferredLoad<TableName>> {
+    Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
+        ARBITRARY_TABLE_NAME.clone()
+    }))
+}
+
 lazy_static! {
     pub(crate) static ref ARBITRARY_PARTITION_KEY: PartitionKey =
         PartitionKey::from(ARBITRARY_PARTITION_KEY_STR);
     pub(crate) static ref ARBITRARY_NAMESPACE_NAME: NamespaceName =
         NamespaceName::from("namespace-bananas");
-    pub(crate) static ref DEFER_NAMESPACE_NAME_1_SEC: Arc<DeferredLoad<NamespaceName>> =
-        Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
-            ARBITRARY_NAMESPACE_NAME.clone()
-        }));
-    pub(crate) static ref DEFER_NAMESPACE_NAME_1_MS: Arc<DeferredLoad<NamespaceName>> =
-        Arc::new(DeferredLoad::new(Duration::from_millis(1), async {
-            ARBITRARY_NAMESPACE_NAME.clone()
-        }));
     pub(crate) static ref ARBITRARY_NAMESPACE_NAME_PROVIDER: Arc<dyn NamespaceNameProvider> =
         Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME));
     pub(crate) static ref ARBITRARY_TABLE_NAME: TableName = TableName::from("bananas");
-    pub(crate) static ref DEFER_TABLE_NAME_1_SEC: Arc<DeferredLoad<TableName>> =
-        Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
-            ARBITRARY_TABLE_NAME.clone()
-        }));
-    pub(crate) static ref DEFER_TABLE_NAME_1_MS: Arc<DeferredLoad<TableName>> =
-        Arc::new(DeferredLoad::new(Duration::from_millis(1), async {
-            ARBITRARY_TABLE_NAME.clone()
-        }));
     pub(crate) static ref ARBITRARY_TABLE_NAME_PROVIDER: Arc<dyn TableNameProvider> =
         Arc::new(MockTableNameProvider::new(&**ARBITRARY_TABLE_NAME));
 }
@@ -62,7 +64,8 @@ pub(crate) struct PartitionDataBuilder {
     partition_key: Option<PartitionKey>,
     namespace_id: Option<NamespaceId>,
     table_id: Option<TableId>,
-    table_name: Option<Arc<DeferredLoad<TableName>>>,
+    table_name_loader: Option<Arc<DeferredLoad<TableName>>>,
+    namespace_loader: Option<Arc<DeferredLoad<NamespaceName>>>,
     sort_key: Option<SortKeyState>,
 }
 
@@ -91,8 +94,19 @@ impl PartitionDataBuilder {
         self
     }
 
-    pub(crate) fn with_table_name(mut self, table_name: Arc<DeferredLoad<TableName>>) -> Self {
-        self.table_name = Some(table_name);
+    pub(crate) fn with_table_name_loader(
+        mut self,
+        table_name_loader: Arc<DeferredLoad<TableName>>,
+    ) -> Self {
+        self.table_name_loader = Some(table_name_loader);
+        self
+    }
+
+    pub(crate) fn with_namespace_loader(
+        mut self,
+        namespace_loader: Arc<DeferredLoad<NamespaceName>>,
+    ) -> Self {
+        self.namespace_loader = Some(namespace_loader);
         self
     }
 
@@ -109,10 +123,11 @@ impl PartitionDataBuilder {
             self.partition_key
                 .unwrap_or_else(|| ARBITRARY_PARTITION_KEY.clone()),
             self.namespace_id.unwrap_or(ARBITRARY_NAMESPACE_ID),
-            Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
+            self.namespace_loader
+                .unwrap_or_else(defer_namespace_name_1_sec),
             self.table_id.unwrap_or(ARBITRARY_TABLE_ID),
-            self.table_name
-                .unwrap_or_else(|| Arc::clone(&*DEFER_TABLE_NAME_1_SEC)),
+            self.table_name_loader
+                .unwrap_or_else(defer_table_name_1_sec),
             self.sort_key.unwrap_or(SortKeyState::Provided(None)),
         )
     }

From be6bcdef450d7b3076e0fbf25ce325cdc7f8a596 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <alamb@influxdata.com>
Date: Mon, 8 May 2023 11:13:54 -0400
Subject: [PATCH 046/119] fix: do not panic on unimplemented (#7765)

---
 flightsql/src/planner.rs                         |  6 ++++--
 influxdb_iox/tests/end_to_end_cases/flightsql.rs | 15 ++++++++++++++-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/flightsql/src/planner.rs b/flightsql/src/planner.rs
index 2c4498dd1d..f917795c38 100644
--- a/flightsql/src/planner.rs
+++ b/flightsql/src/planner.rs
@@ -19,7 +19,7 @@ use arrow_flight::{
 };
 use arrow_util::flight::prepare_schema_for_flight;
 use bytes::Bytes;
-use datafusion::{logical_expr::LogicalPlan, physical_plan::ExecutionPlan};
+use datafusion::{error::DataFusionError, logical_expr::LogicalPlan, physical_plan::ExecutionPlan};
 use iox_query::{exec::IOxSessionContext, QueryNamespace};
 use observability_deps::tracing::debug;
 use once_cell::sync::Lazy;
@@ -434,7 +434,9 @@ async fn plan_get_xdbc_type_info(
     match data_type {
         None => Ok(ctx.batch_to_logical_plan(TYPE_INFO_RECORD_BATCH.clone())?),
         // TODO chunchun: support search by data_type
-        Some(_data_type) => unimplemented!("filter by data_type is not implemented yet"),
+        Some(_data_type) => Err(Error::from(DataFusionError::NotImplemented(
+            "GetXdbcTypeInfo does not yet support filtering by data_type".to_string(),
+        ))),
     }
 }
 
diff --git a/influxdb_iox/tests/end_to_end_cases/flightsql.rs b/influxdb_iox/tests/end_to_end_cases/flightsql.rs
index 5274f4ff28..1cecbe32f4 100644
--- a/influxdb_iox/tests/end_to_end_cases/flightsql.rs
+++ b/influxdb_iox/tests/end_to_end_cases/flightsql.rs
@@ -1149,7 +1149,6 @@ async fn flightsql_get_xdbc_type_info() {
             Step::Custom(Box::new(move |state: &mut StepTestState| {
                 async move {
                     let mut client = flightsql_client(state.cluster());
-                    // TODO chunchun: search by data_type test case
                     let data_type: Option<i32> = None;
 
                     let stream = client.get_xdbc_type_info(data_type).await.unwrap();
@@ -1173,6 +1172,20 @@ async fn flightsql_get_xdbc_type_info() {
                 }
                 .boxed()
             })),
+            Step::Custom(Box::new(move |state: &mut StepTestState| {
+                async move {
+                    let mut client = flightsql_client(state.cluster());
+                    // TODO chunchun: search by data_type test case
+                    let data_type: Option<i32> = Some(6);
+
+                    let err = client.get_xdbc_type_info(data_type).await.unwrap_err();
+
+                    assert_matches!(err, FlightError::Tonic(..));
+                    assert_contains!(err.to_string(), "GetXdbcTypeInfo does not yet support filtering by data_type");
+
+                }
+                .boxed()
+            })),
         ],
     )
     .run()

From 56916cf942869f1be56549f09c7f09bb6904a4e4 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 5 May 2023 12:03:50 -0400
Subject: [PATCH 047/119] fix: Rename ingester2 to ingester

---
 Cargo.lock                                    |  6 ++--
 Cargo.toml                                    |  2 +-
 clap_blocks/src/{ingester2.rs => ingester.rs} |  2 +-
 clap_blocks/src/lib.rs                        |  2 +-
 docs/underground_guide.md                     |  2 +-
 .../iox/ingester/v1/replication.proto         | 14 ++++-----
 influxdb_iox/src/commands/run/all_in_one.rs   |  6 ++--
 .../run/{ingester2.rs => ingester.rs}         |  8 ++---
 influxdb_iox/src/commands/run/mod.rs          | 16 +++++-----
 .../tests/end_to_end_cases/compactor.rs       |  6 ++--
 .../tests/end_to_end_cases/namespace.rs       |  2 +-
 .../tests/end_to_end_cases/querier.rs         | 14 ++++-----
 .../querier/multi_ingester.rs                 |  4 +--
 {ingester2 => ingester}/Cargo.toml            |  2 +-
 {ingester2 => ingester}/README.md             |  6 ++--
 {ingester2 => ingester}/benches/README.md     |  4 +--
 {ingester2 => ingester}/benches/wal.rs        |  8 ++---
 {ingester2 => ingester}/benches/write.rs      |  2 +-
 {ingester2 => ingester}/src/arcmap.rs         |  0
 .../src/buffer_tree/mod.rs                    |  0
 .../src/buffer_tree/namespace.rs              |  0
 .../buffer_tree/namespace/name_resolver.rs    |  0
 .../src/buffer_tree/partition.rs              |  0
 .../src/buffer_tree/partition/buffer.rs       |  0
 .../partition/buffer/always_some.rs           |  0
 .../partition/buffer/mutable_buffer.rs        |  0
 .../partition/buffer/state_machine.rs         |  0
 .../buffer/state_machine/buffering.rs         |  0
 .../buffer/state_machine/persisting.rs        |  0
 .../buffer/state_machine/snapshot.rs          |  0
 .../buffer_tree/partition/buffer/traits.rs    |  0
 .../src/buffer_tree/partition/persisting.rs   |  0
 .../buffer_tree/partition/resolver/cache.rs   |  0
 .../buffer_tree/partition/resolver/catalog.rs |  0
 .../partition/resolver/coalesce.rs            |  0
 .../buffer_tree/partition/resolver/mock.rs    |  0
 .../src/buffer_tree/partition/resolver/mod.rs |  0
 .../partition/resolver/sort_key.rs            |  0
 .../buffer_tree/partition/resolver/trait.rs   |  0
 .../src/buffer_tree/post_write/mock.rs        |  0
 .../src/buffer_tree/post_write/mod.rs         |  0
 .../src/buffer_tree/post_write/trait.rs       |  0
 .../src/buffer_tree/root.rs                   |  0
 .../src/buffer_tree/table.rs                  |  0
 .../src/buffer_tree/table/name_resolver.rs    |  0
 .../src/cancellation_safe.rs                  |  0
 {ingester2 => ingester}/src/deferred_load.rs  |  0
 .../src/dml_sink/instrumentation.rs           |  0
 .../src/dml_sink/mock_sink.rs                 |  0
 {ingester2 => ingester}/src/dml_sink/mod.rs   |  0
 .../src/dml_sink/tracing.rs                   |  0
 {ingester2 => ingester}/src/dml_sink/trait.rs |  0
 {ingester2 => ingester}/src/ingest_state.rs   |  0
 {ingester2 => ingester}/src/ingester_id.rs    |  0
 {ingester2 => ingester}/src/init.rs           | 10 +++----
 .../src/init/graceful_shutdown.rs             |  0
 .../src/init/wal_replay.rs                    |  0
 {ingester2 => ingester}/src/lib.rs            |  2 +-
 {ingester2 => ingester}/src/partition_iter.rs |  0
 .../src/persist/backpressure.rs               |  0
 .../src/persist/compact.rs                    |  0
 .../src/persist/completion_observer.rs        |  0
 .../src/persist/context.rs                    |  2 +-
 .../src/persist/drain_buffer.rs               |  0
 {ingester2 => ingester}/src/persist/handle.rs |  0
 .../src/persist/hot_partitions.rs             |  0
 {ingester2 => ingester}/src/persist/mod.rs    |  0
 {ingester2 => ingester}/src/persist/queue.rs  |  0
 {ingester2 => ingester}/src/persist/worker.rs |  0
 .../src/query/exec_instrumentation.rs         |  0
 .../src/query/mock_query_exec.rs              |  0
 {ingester2 => ingester}/src/query/mod.rs      |  0
 .../src/query/partition_response.rs           |  0
 {ingester2 => ingester}/src/query/response.rs |  0
 .../src/query/result_instrumentation.rs       |  0
 {ingester2 => ingester}/src/query/tracing.rs  |  0
 {ingester2 => ingester}/src/query/trait.rs    |  0
 {ingester2 => ingester}/src/query_adaptor.rs  |  0
 {ingester2 => ingester}/src/server/grpc.rs    |  0
 .../src/server/grpc/persist.rs                |  0
 .../src/server/grpc/query.rs                  |  0
 .../src/server/grpc/rpc_write.rs              |  0
 {ingester2 => ingester}/src/server/mod.rs     |  0
 {ingester2 => ingester}/src/test_util.rs      |  0
 .../src/timestamp_oracle.rs                   |  0
 {ingester2 => ingester}/src/wal/mod.rs        |  0
 .../src/wal/reference_tracker/actor.rs        |  0
 .../src/wal/reference_tracker/handle.rs       |  0
 .../src/wal/reference_tracker/mod.rs          |  0
 .../src/wal/reference_tracker/wal_deleter.rs  |  0
 .../src/wal/rotate_task.rs                    |  0
 {ingester2 => ingester}/src/wal/traits.rs     |  0
 {ingester2 => ingester}/src/wal/wal_sink.rs   |  0
 {ingester2 => ingester}/tests/write.rs        |  0
 ingester2_test_ctx/Cargo.toml                 |  2 +-
 ingester2_test_ctx/src/lib.rs                 | 14 ++++-----
 ioxd_ingester2/Cargo.toml                     |  2 +-
 ioxd_ingester2/src/lib.rs                     | 16 +++++-----
 querier/src/cache/parquet_file.rs             |  2 +-
 querier/src/ingester/mod.rs                   |  2 +-
 test_helpers_end_to_end/src/config.rs         | 30 +++++++++----------
 test_helpers_end_to_end/src/mini_cluster.rs   |  6 ++--
 test_helpers_end_to_end/src/server_fixture.rs |  4 +--
 test_helpers_end_to_end/src/server_type.rs    |  6 ++--
 test_helpers_end_to_end/src/steps.rs          |  3 +-
 105 files changed, 102 insertions(+), 105 deletions(-)
 rename clap_blocks/src/{ingester2.rs => ingester.rs} (98%)
 rename influxdb_iox/src/commands/run/{ingester2.rs => ingester.rs} (94%)
 rename {ingester2 => ingester}/Cargo.toml (99%)
 rename {ingester2 => ingester}/README.md (78%)
 rename {ingester2 => ingester}/benches/README.md (63%)
 rename {ingester2 => ingester}/benches/wal.rs (94%)
 rename {ingester2 => ingester}/benches/write.rs (99%)
 rename {ingester2 => ingester}/src/arcmap.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/mod.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/namespace.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/namespace/name_resolver.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer/always_some.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer/mutable_buffer.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer/state_machine.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer/state_machine/buffering.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer/state_machine/persisting.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer/state_machine/snapshot.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/buffer/traits.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/persisting.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/resolver/cache.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/resolver/catalog.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/resolver/coalesce.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/resolver/mock.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/resolver/mod.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/resolver/sort_key.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/partition/resolver/trait.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/post_write/mock.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/post_write/mod.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/post_write/trait.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/root.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/table.rs (100%)
 rename {ingester2 => ingester}/src/buffer_tree/table/name_resolver.rs (100%)
 rename {ingester2 => ingester}/src/cancellation_safe.rs (100%)
 rename {ingester2 => ingester}/src/deferred_load.rs (100%)
 rename {ingester2 => ingester}/src/dml_sink/instrumentation.rs (100%)
 rename {ingester2 => ingester}/src/dml_sink/mock_sink.rs (100%)
 rename {ingester2 => ingester}/src/dml_sink/mod.rs (100%)
 rename {ingester2 => ingester}/src/dml_sink/tracing.rs (100%)
 rename {ingester2 => ingester}/src/dml_sink/trait.rs (100%)
 rename {ingester2 => ingester}/src/ingest_state.rs (100%)
 rename {ingester2 => ingester}/src/ingester_id.rs (100%)
 rename {ingester2 => ingester}/src/init.rs (97%)
 rename {ingester2 => ingester}/src/init/graceful_shutdown.rs (100%)
 rename {ingester2 => ingester}/src/init/wal_replay.rs (100%)
 rename {ingester2 => ingester}/src/lib.rs (99%)
 rename {ingester2 => ingester}/src/partition_iter.rs (100%)
 rename {ingester2 => ingester}/src/persist/backpressure.rs (100%)
 rename {ingester2 => ingester}/src/persist/compact.rs (100%)
 rename {ingester2 => ingester}/src/persist/completion_observer.rs (100%)
 rename {ingester2 => ingester}/src/persist/context.rs (99%)
 rename {ingester2 => ingester}/src/persist/drain_buffer.rs (100%)
 rename {ingester2 => ingester}/src/persist/handle.rs (100%)
 rename {ingester2 => ingester}/src/persist/hot_partitions.rs (100%)
 rename {ingester2 => ingester}/src/persist/mod.rs (100%)
 rename {ingester2 => ingester}/src/persist/queue.rs (100%)
 rename {ingester2 => ingester}/src/persist/worker.rs (100%)
 rename {ingester2 => ingester}/src/query/exec_instrumentation.rs (100%)
 rename {ingester2 => ingester}/src/query/mock_query_exec.rs (100%)
 rename {ingester2 => ingester}/src/query/mod.rs (100%)
 rename {ingester2 => ingester}/src/query/partition_response.rs (100%)
 rename {ingester2 => ingester}/src/query/response.rs (100%)
 rename {ingester2 => ingester}/src/query/result_instrumentation.rs (100%)
 rename {ingester2 => ingester}/src/query/tracing.rs (100%)
 rename {ingester2 => ingester}/src/query/trait.rs (100%)
 rename {ingester2 => ingester}/src/query_adaptor.rs (100%)
 rename {ingester2 => ingester}/src/server/grpc.rs (100%)
 rename {ingester2 => ingester}/src/server/grpc/persist.rs (100%)
 rename {ingester2 => ingester}/src/server/grpc/query.rs (100%)
 rename {ingester2 => ingester}/src/server/grpc/rpc_write.rs (100%)
 rename {ingester2 => ingester}/src/server/mod.rs (100%)
 rename {ingester2 => ingester}/src/test_util.rs (100%)
 rename {ingester2 => ingester}/src/timestamp_oracle.rs (100%)
 rename {ingester2 => ingester}/src/wal/mod.rs (100%)
 rename {ingester2 => ingester}/src/wal/reference_tracker/actor.rs (100%)
 rename {ingester2 => ingester}/src/wal/reference_tracker/handle.rs (100%)
 rename {ingester2 => ingester}/src/wal/reference_tracker/mod.rs (100%)
 rename {ingester2 => ingester}/src/wal/reference_tracker/wal_deleter.rs (100%)
 rename {ingester2 => ingester}/src/wal/rotate_task.rs (100%)
 rename {ingester2 => ingester}/src/wal/traits.rs (100%)
 rename {ingester2 => ingester}/src/wal/wal_sink.rs (100%)
 rename {ingester2 => ingester}/tests/write.rs (100%)

diff --git a/Cargo.lock b/Cargo.lock
index 7dbe1c3937..d31a92319e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2679,7 +2679,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "ingester2"
+name = "ingester"
 version = "0.1.0"
 dependencies = [
  "arrow",
@@ -2748,7 +2748,7 @@ dependencies = [
  "generated_types",
  "hashbrown 0.13.2",
  "influxdb_iox_client",
- "ingester2",
+ "ingester",
  "iox_catalog",
  "iox_query",
  "iox_time",
@@ -3081,7 +3081,7 @@ dependencies = [
  "futures",
  "generated_types",
  "hyper",
- "ingester2",
+ "ingester",
  "iox_catalog",
  "iox_query",
  "ioxd_common",
diff --git a/Cargo.toml b/Cargo.toml
index cb5bfe369d..e5df5e3db5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,7 +29,7 @@ members = [
     "influxdb2_client",
     "influxrpc_parser",
     "ingester2_test_ctx",
-    "ingester2",
+    "ingester",
     "iox_catalog",
     "iox_data_generator",
     "iox_query_influxql",
diff --git a/clap_blocks/src/ingester2.rs b/clap_blocks/src/ingester.rs
similarity index 98%
rename from clap_blocks/src/ingester2.rs
rename to clap_blocks/src/ingester.rs
index 23fbb9d4ee..3a6ebc7088 100644
--- a/clap_blocks/src/ingester2.rs
+++ b/clap_blocks/src/ingester.rs
@@ -5,7 +5,7 @@ use std::path::PathBuf;
 /// CLI config for the ingester using the RPC write path
 #[derive(Debug, Clone, clap::Parser)]
 #[allow(missing_copy_implementations)]
-pub struct Ingester2Config {
+pub struct IngesterConfig {
     /// Where this ingester instance should store its write-ahead log files. Each ingester instance
     /// must have its own directory.
     #[clap(long = "wal-directory", env = "INFLUXDB_IOX_WAL_DIRECTORY", action)]
diff --git a/clap_blocks/src/lib.rs b/clap_blocks/src/lib.rs
index 48bba965eb..398d9c083d 100644
--- a/clap_blocks/src/lib.rs
+++ b/clap_blocks/src/lib.rs
@@ -15,7 +15,7 @@
 pub mod catalog_dsn;
 pub mod compactor2;
 pub mod garbage_collector;
-pub mod ingester2;
+pub mod ingester;
 pub mod ingester_address;
 pub mod object_store;
 pub mod querier;
diff --git a/docs/underground_guide.md b/docs/underground_guide.md
index 1a0403ac5f..16979e20cf 100644
--- a/docs/underground_guide.md
+++ b/docs/underground_guide.md
@@ -108,7 +108,7 @@ INFLUXDB_IOX_MAX_HTTP_REQUEST_SIZE=100000000 \
 OBJECT_STORE=file \
 DATABASE_DIRECTORY=~/data_dir \
 LOG_FILTER=info \
-./target/release/influxdb_iox run ingester2
+./target/release/influxdb_iox run ingester
 ```
 
 ## Run Router on port 8080/8081 (http/grpc)
diff --git a/generated_types/protos/influxdata/iox/ingester/v1/replication.proto b/generated_types/protos/influxdata/iox/ingester/v1/replication.proto
index 07c5b9a178..9315d8fb3f 100644
--- a/generated_types/protos/influxdata/iox/ingester/v1/replication.proto
+++ b/generated_types/protos/influxdata/iox/ingester/v1/replication.proto
@@ -4,7 +4,7 @@ option go_package = "github.com/influxdata/iox/ingester/v1";
 
 import "influxdata/pbdata/v1/influxdb_pb_data_protocol.proto";
 
-// A service provided by Ingester2 instances, called by Ingester Replicas.
+// A service provided by Ingester instances, called by Ingester Replicas.
 service PartitionBufferService {
   // Acquire the full in-memory state of the recipient ingester, returning the
   // data of all known partitions.
@@ -19,13 +19,13 @@ message GetPartitionBuffersRequest {}
 message GetPartitionBuffersResponse {
   // The unique, per-instance UUID of the ingester pushing this operation.
   string ingester_uuid = 1;
-  
+
   // The catalog ID of the namespace this partition belongs to.
   int64 namespace_id = 2;
-  
+
   // The catalog ID of the table this partition belongs to.
   int64 table_id = 3;
-  
+
   // The catalog ID of this partition.
   int64 partition_id = 4;
 
@@ -41,7 +41,7 @@ message GetPartitionBuffersResponse {
 }
 
 // A service provided by Ingester Replica instances to accept pushed events from
-// an Ingester2 instance.
+// an Ingester instance.
 service ReplicationService {
   // Push the provided write request to the replica.
   rpc Replicate(ReplicateRequest) returns (ReplicateResponse);
@@ -70,10 +70,10 @@ message ReplicateResponse {}
 message PersistCompleteRequest {
   // The unique, per-instance UUID of the ingester pushing this operation.
   string ingester_uuid = 1;
-  
+
   // The catalog ID of the namespace that has had data persisted.
   int64 namespace_id = 2;
-  
+
   // The catalog ID of the table that has had data persisted.
   int64 table_id = 3;
 
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index 85d303424e..6754ce6a08 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -6,7 +6,7 @@ use super::main;
 use clap_blocks::{
     catalog_dsn::CatalogDsnConfig,
     compactor2::Compactor2Config,
-    ingester2::Ingester2Config,
+    ingester::IngesterConfig,
     ingester_address::IngesterAddress,
     object_store::{make_object_store, ObjectStoreConfig},
     querier::QuerierConfig,
@@ -456,7 +456,7 @@ impl Config {
             .clone()
             .with_grpc_bind_address(compactor_grpc_bind_address);
 
-        let ingester_config = Ingester2Config {
+        let ingester_config = IngesterConfig {
             wal_directory,
             wal_rotation_period_seconds,
             concurrent_query_limit,
@@ -555,7 +555,7 @@ struct SpecializedConfig {
     compactor_run_config: RunConfig,
 
     catalog_dsn: CatalogDsnConfig,
-    ingester_config: Ingester2Config,
+    ingester_config: IngesterConfig,
     router_config: Router2Config,
     compactor_config: Compactor2Config,
     querier_config: QuerierConfig,
diff --git a/influxdb_iox/src/commands/run/ingester2.rs b/influxdb_iox/src/commands/run/ingester.rs
similarity index 94%
rename from influxdb_iox/src/commands/run/ingester2.rs
rename to influxdb_iox/src/commands/run/ingester.rs
index a4a7311969..b978d3bc90 100644
--- a/influxdb_iox/src/commands/run/ingester2.rs
+++ b/influxdb_iox/src/commands/run/ingester.rs
@@ -3,7 +3,7 @@
 use super::main;
 use crate::process_info::{setup_metric_registry, USIZE_MAX};
 use clap_blocks::{
-    catalog_dsn::CatalogDsnConfig, ingester2::Ingester2Config, object_store::make_object_store,
+    catalog_dsn::CatalogDsnConfig, ingester::IngesterConfig, object_store::make_object_store,
     run_config::RunConfig,
 };
 use iox_query::exec::Executor;
@@ -32,7 +32,7 @@ pub enum Error {
     #[error("cannot parse object store config: {0}")]
     ObjectStoreParsing(#[from] clap_blocks::object_store::ParseError),
 
-    #[error("error initializing ingester2: {0}")]
+    #[error("error initializing ingester: {0}")]
     Ingester(#[from] ioxd_ingester2::Error),
 
     #[error("catalog DSN error: {0}")]
@@ -63,7 +63,7 @@ pub struct Config {
     pub(crate) catalog_dsn: CatalogDsnConfig,
 
     #[clap(flatten)]
-    pub(crate) ingester_config: Ingester2Config,
+    pub(crate) ingester_config: IngesterConfig,
 
     /// Specify the size of the thread-pool for query execution, and the
     /// separate compaction thread-pool.
@@ -125,7 +125,7 @@ pub async fn command(config: Config) -> Result<()> {
     )
     .await?;
 
-    info!("starting ingester2");
+    info!("starting ingester");
 
     let services = vec![Service::create(server_type, common_state.run_config())];
     Ok(main::main(common_state, services, metric_registry).await?)
diff --git a/influxdb_iox/src/commands/run/mod.rs b/influxdb_iox/src/commands/run/mod.rs
index 42c929aaec..573a932a05 100644
--- a/influxdb_iox/src/commands/run/mod.rs
+++ b/influxdb_iox/src/commands/run/mod.rs
@@ -4,7 +4,7 @@ use trogging::cli::LoggingConfig;
 pub(crate) mod all_in_one;
 mod compactor2;
 mod garbage_collector;
-mod ingester2;
+mod ingester;
 mod main;
 mod querier;
 mod router2;
@@ -25,8 +25,8 @@ pub enum Error {
     #[snafu(display("Error in router2 subcommand: {}", source))]
     Router2Error { source: router2::Error },
 
-    #[snafu(display("Error in ingester2 subcommand: {}", source))]
-    Ingester2Error { source: ingester2::Error },
+    #[snafu(display("Error in ingester subcommand: {}", source))]
+    IngesterError { source: ingester::Error },
 
     #[snafu(display("Error in all in one subcommand: {}", source))]
     AllInOneError { source: all_in_one::Error },
@@ -55,7 +55,7 @@ impl Config {
             Some(Command::GarbageCollector(config)) => config.run_config.logging_config(),
             Some(Command::Querier(config)) => config.run_config.logging_config(),
             Some(Command::Router2(config)) => config.run_config.logging_config(),
-            Some(Command::Ingester2(config)) => config.run_config.logging_config(),
+            Some(Command::Ingester(config)) => config.run_config.logging_config(),
             Some(Command::AllInOne(config)) => &config.logging_config,
             Some(Command::Test(config)) => config.run_config.logging_config(),
         }
@@ -73,8 +73,8 @@ enum Command {
     /// Run the server in router2 mode
     Router2(router2::Config),
 
-    /// Run the server in ingester2 mode
-    Ingester2(ingester2::Config),
+    /// Run the server in ingester mode
+    Ingester(ingester::Config),
 
     /// Run the server in "all in one" mode (Default)
     AllInOne(all_in_one::Config),
@@ -99,9 +99,7 @@ pub async fn command(config: Config) -> Result<()> {
             .context(GarbageCollectorSnafu),
         Some(Command::Querier(config)) => querier::command(config).await.context(QuerierSnafu),
         Some(Command::Router2(config)) => router2::command(config).await.context(Router2Snafu),
-        Some(Command::Ingester2(config)) => {
-            ingester2::command(config).await.context(Ingester2Snafu)
-        }
+        Some(Command::Ingester(config)) => ingester::command(config).await.context(IngesterSnafu),
         Some(Command::AllInOne(config)) => all_in_one::command(config).await.context(AllInOneSnafu),
         Some(Command::Test(config)) => test::command(config).await.context(TestSnafu),
     }
diff --git a/influxdb_iox/tests/end_to_end_cases/compactor.rs b/influxdb_iox/tests/end_to_end_cases/compactor.rs
index 7744c011ad..089498dc4a 100644
--- a/influxdb_iox/tests/end_to_end_cases/compactor.rs
+++ b/influxdb_iox/tests/end_to_end_cases/compactor.rs
@@ -10,7 +10,7 @@ async fn shard_id_greater_than_num_shards_is_invalid() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
 
-    let ingester_config = TestConfig::new_ingester2(&database_url);
+    let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor2(&ingester_config).with_compactor_shards(
@@ -95,7 +95,7 @@ async fn sharded_compactor_0_always_compacts_partition_1() {
 
     // The test below assumes a specific partition id, and it needs to customize the compactor
     // config, so use a non-shared minicluster here.
-    let ingester_config = TestConfig::new_ingester2(&database_url);
+    let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor2(&ingester_config).with_compactor_shards(
@@ -178,7 +178,7 @@ async fn sharded_compactor_1_never_compacts_partition_1() {
 
     // The test below assumes a specific partition id, and it needs to customize the compactor
     // config, so use a non-shared minicluster here.
-    let ingester_config = TestConfig::new_ingester2(&database_url);
+    let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor2(&ingester_config).with_compactor_shards(
diff --git a/influxdb_iox/tests/end_to_end_cases/namespace.rs b/influxdb_iox/tests/end_to_end_cases/namespace.rs
index a22d7fa29d..2dc641f0d7 100644
--- a/influxdb_iox/tests/end_to_end_cases/namespace.rs
+++ b/influxdb_iox/tests/end_to_end_cases/namespace.rs
@@ -12,7 +12,7 @@ async fn querier_namespace_client() {
 
     let table_name = "the_table";
 
-    let ingester_config = TestConfig::new_ingester2(&database_url);
+    let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config);
 
diff --git a/influxdb_iox/tests/end_to_end_cases/querier.rs b/influxdb_iox/tests/end_to_end_cases/querier.rs
index d2b2b2de53..cd71907039 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier.rs
@@ -125,10 +125,10 @@ async fn basic_empty() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let ingester_config = TestConfig::new_ingester2(&database_url);
+    let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    // specially create a querier2 config that is NOT connected to the ingester2
-    let querier_config = TestConfig::new_querier2_without_ingester2(&ingester_config);
+    // specially create a querier2 config that is NOT connected to the ingester
+    let querier_config = TestConfig::new_querier2_without_ingester(&ingester_config);
 
     let mut cluster = MiniCluster::new()
         .with_ingester(ingester_config)
@@ -196,10 +196,10 @@ async fn basic_no_ingester_connection() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let ingester_config = TestConfig::new_ingester2(&database_url);
+    let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    // specially create a querier2 config that is NOT connected to the ingester2
-    let querier_config = TestConfig::new_querier2_without_ingester2(&ingester_config);
+    // specially create a querier2 config that is NOT connected to the ingester
+    let querier_config = TestConfig::new_querier2_without_ingester(&ingester_config);
 
     let mut cluster = MiniCluster::new()
         .with_ingester(ingester_config)
@@ -636,7 +636,7 @@ async fn oom_protection() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let ingester_config = TestConfig::new_ingester2(&database_url);
+    let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
     let mut cluster = MiniCluster::new()
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
index 6d13e5d5ea..2d23ad9bab 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
@@ -12,7 +12,7 @@ async fn basic_multi_ingesters() {
     let database_url = maybe_skip_integration!();
     test_helpers::maybe_start_logging();
 
-    let ingester1_config = TestConfig::new_ingester2_never_persist(&database_url);
+    let ingester1_config = TestConfig::new_ingester_never_persist(&database_url);
     let ingester2_config = TestConfig::another_ingester(&ingester1_config);
     let ingester_configs = [ingester1_config, ingester2_config];
 
@@ -95,7 +95,7 @@ async fn write_replication() {
 
     let table_name = "some_table";
 
-    let ingester1_config = TestConfig::new_ingester2_never_persist(&database_url);
+    let ingester1_config = TestConfig::new_ingester_never_persist(&database_url);
     let ingester2_config = TestConfig::another_ingester(&ingester1_config);
     let ingester_configs = [ingester1_config, ingester2_config];
 
diff --git a/ingester2/Cargo.toml b/ingester/Cargo.toml
similarity index 99%
rename from ingester2/Cargo.toml
rename to ingester/Cargo.toml
index 50b791dcfc..cf278c071f 100644
--- a/ingester2/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "ingester2"
+name = "ingester"
 version.workspace = true
 authors.workspace = true
 edition.workspace = true
diff --git a/ingester2/README.md b/ingester/README.md
similarity index 78%
rename from ingester2/README.md
rename to ingester/README.md
index 43b5f53b12..0f73ea5562 100644
--- a/ingester2/README.md
+++ b/ingester/README.md
@@ -1,4 +1,4 @@
-# ingester2
+# ingester
 
 ## Quick run
 
@@ -15,10 +15,10 @@ psql 'dbname=iox_shared options=-csearch_path=public,iox_catalog' -c "insert int
 ```
 
 
-Run ingester2:
+Run ingester:
 
 ```bash
-./target/debug/influxdb_iox run ingester2 --api-bind=127.0.0.1:8081 --grpc-bind=127.0.0.1:8042 --wal-directory /tmp/iox/wal  --catalog-dsn postgres:///iox_shared --object-store=file --data-dir=/tmp/iox/obj -v
+./target/debug/influxdb_iox run ingester --api-bind=127.0.0.1:8081 --grpc-bind=127.0.0.1:8042 --wal-directory /tmp/iox/wal  --catalog-dsn postgres:///iox_shared --object-store=file --data-dir=/tmp/iox/obj -v
 ```
 
 Run router2:
diff --git a/ingester2/benches/README.md b/ingester/benches/README.md
similarity index 63%
rename from ingester2/benches/README.md
rename to ingester/benches/README.md
index cdf1d6468c..5adf66b313 100644
--- a/ingester2/benches/README.md
+++ b/ingester/benches/README.md
@@ -1,9 +1,9 @@
-## `ingester2` benchmarks
+## `ingester` benchmarks
 
 Run them like this:
 
 ```console
-% cargo bench -p ingester2 --features=benches
+% cargo bench -p ingester --features=benches
 ```
 
 This is required to mark internal types as `pub`, allowing the benchmarks to
diff --git a/ingester2/benches/wal.rs b/ingester/benches/wal.rs
similarity index 94%
rename from ingester2/benches/wal.rs
rename to ingester/benches/wal.rs
index cb81c310d3..ac3321b82c 100644
--- a/ingester2/benches/wal.rs
+++ b/ingester/benches/wal.rs
@@ -7,7 +7,7 @@ use dml::{DmlMeta, DmlOperation, DmlWrite};
 use generated_types::influxdata::{
     iox::wal::v1::sequenced_wal_op::Op as WalOp, pbdata::v1::DatabaseBatch,
 };
-use ingester2::{
+use ingester::{
     buffer_tree::benches::PartitionData,
     dml_sink::{DmlError, DmlSink},
 };
@@ -61,10 +61,10 @@ fn wal_replay_bench(c: &mut Criterion) {
                 // overhead.
                 let sink = NopSink::default();
 
-                let persist = ingester2::persist::queue::benches::MockPersistQueue::default();
+                let persist = ingester::persist::queue::benches::MockPersistQueue::default();
 
                 // Replay the wal into the NOP.
-                ingester2::benches::replay(
+                ingester::benches::replay(
                     &wal,
                     &sink,
                     Arc::new(persist),
@@ -117,7 +117,7 @@ impl DmlSink for NopSink {
     }
 }
 
-impl ingester2::partition_iter::PartitionIter for NopSink {
+impl ingester::partition_iter::PartitionIter for NopSink {
     fn partition_iter(
         &self,
     ) -> Box<dyn Iterator<Item = std::sync::Arc<parking_lot::Mutex<PartitionData>>> + Send> {
diff --git a/ingester2/benches/write.rs b/ingester/benches/write.rs
similarity index 99%
rename from ingester2/benches/write.rs
rename to ingester/benches/write.rs
index 5f9871b55a..c29aa54296 100644
--- a/ingester2/benches/write.rs
+++ b/ingester/benches/write.rs
@@ -8,7 +8,7 @@ use generated_types::influxdata::{
     iox::ingester::v1::write_service_server::WriteService, pbdata::v1::DatabaseBatch,
 };
 use influxdb_iox_client::ingester::generated_types::WriteRequest;
-use ingester2::IngesterRpcInterface;
+use ingester::IngesterRpcInterface;
 use ingester2_test_ctx::{TestContext, TestContextBuilder};
 use iox_time::TimeProvider;
 use mutable_batch_lp::lines_to_batches;
diff --git a/ingester2/src/arcmap.rs b/ingester/src/arcmap.rs
similarity index 100%
rename from ingester2/src/arcmap.rs
rename to ingester/src/arcmap.rs
diff --git a/ingester2/src/buffer_tree/mod.rs b/ingester/src/buffer_tree/mod.rs
similarity index 100%
rename from ingester2/src/buffer_tree/mod.rs
rename to ingester/src/buffer_tree/mod.rs
diff --git a/ingester2/src/buffer_tree/namespace.rs b/ingester/src/buffer_tree/namespace.rs
similarity index 100%
rename from ingester2/src/buffer_tree/namespace.rs
rename to ingester/src/buffer_tree/namespace.rs
diff --git a/ingester2/src/buffer_tree/namespace/name_resolver.rs b/ingester/src/buffer_tree/namespace/name_resolver.rs
similarity index 100%
rename from ingester2/src/buffer_tree/namespace/name_resolver.rs
rename to ingester/src/buffer_tree/namespace/name_resolver.rs
diff --git a/ingester2/src/buffer_tree/partition.rs b/ingester/src/buffer_tree/partition.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition.rs
rename to ingester/src/buffer_tree/partition.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer.rs b/ingester/src/buffer_tree/partition/buffer.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer.rs
rename to ingester/src/buffer_tree/partition/buffer.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer/always_some.rs b/ingester/src/buffer_tree/partition/buffer/always_some.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer/always_some.rs
rename to ingester/src/buffer_tree/partition/buffer/always_some.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer/mutable_buffer.rs b/ingester/src/buffer_tree/partition/buffer/mutable_buffer.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer/mutable_buffer.rs
rename to ingester/src/buffer_tree/partition/buffer/mutable_buffer.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer/state_machine.rs b/ingester/src/buffer_tree/partition/buffer/state_machine.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer/state_machine.rs
rename to ingester/src/buffer_tree/partition/buffer/state_machine.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer/state_machine/buffering.rs b/ingester/src/buffer_tree/partition/buffer/state_machine/buffering.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer/state_machine/buffering.rs
rename to ingester/src/buffer_tree/partition/buffer/state_machine/buffering.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer/state_machine/persisting.rs b/ingester/src/buffer_tree/partition/buffer/state_machine/persisting.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer/state_machine/persisting.rs
rename to ingester/src/buffer_tree/partition/buffer/state_machine/persisting.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer/state_machine/snapshot.rs b/ingester/src/buffer_tree/partition/buffer/state_machine/snapshot.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer/state_machine/snapshot.rs
rename to ingester/src/buffer_tree/partition/buffer/state_machine/snapshot.rs
diff --git a/ingester2/src/buffer_tree/partition/buffer/traits.rs b/ingester/src/buffer_tree/partition/buffer/traits.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/buffer/traits.rs
rename to ingester/src/buffer_tree/partition/buffer/traits.rs
diff --git a/ingester2/src/buffer_tree/partition/persisting.rs b/ingester/src/buffer_tree/partition/persisting.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/persisting.rs
rename to ingester/src/buffer_tree/partition/persisting.rs
diff --git a/ingester2/src/buffer_tree/partition/resolver/cache.rs b/ingester/src/buffer_tree/partition/resolver/cache.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/resolver/cache.rs
rename to ingester/src/buffer_tree/partition/resolver/cache.rs
diff --git a/ingester2/src/buffer_tree/partition/resolver/catalog.rs b/ingester/src/buffer_tree/partition/resolver/catalog.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/resolver/catalog.rs
rename to ingester/src/buffer_tree/partition/resolver/catalog.rs
diff --git a/ingester2/src/buffer_tree/partition/resolver/coalesce.rs b/ingester/src/buffer_tree/partition/resolver/coalesce.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/resolver/coalesce.rs
rename to ingester/src/buffer_tree/partition/resolver/coalesce.rs
diff --git a/ingester2/src/buffer_tree/partition/resolver/mock.rs b/ingester/src/buffer_tree/partition/resolver/mock.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/resolver/mock.rs
rename to ingester/src/buffer_tree/partition/resolver/mock.rs
diff --git a/ingester2/src/buffer_tree/partition/resolver/mod.rs b/ingester/src/buffer_tree/partition/resolver/mod.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/resolver/mod.rs
rename to ingester/src/buffer_tree/partition/resolver/mod.rs
diff --git a/ingester2/src/buffer_tree/partition/resolver/sort_key.rs b/ingester/src/buffer_tree/partition/resolver/sort_key.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/resolver/sort_key.rs
rename to ingester/src/buffer_tree/partition/resolver/sort_key.rs
diff --git a/ingester2/src/buffer_tree/partition/resolver/trait.rs b/ingester/src/buffer_tree/partition/resolver/trait.rs
similarity index 100%
rename from ingester2/src/buffer_tree/partition/resolver/trait.rs
rename to ingester/src/buffer_tree/partition/resolver/trait.rs
diff --git a/ingester2/src/buffer_tree/post_write/mock.rs b/ingester/src/buffer_tree/post_write/mock.rs
similarity index 100%
rename from ingester2/src/buffer_tree/post_write/mock.rs
rename to ingester/src/buffer_tree/post_write/mock.rs
diff --git a/ingester2/src/buffer_tree/post_write/mod.rs b/ingester/src/buffer_tree/post_write/mod.rs
similarity index 100%
rename from ingester2/src/buffer_tree/post_write/mod.rs
rename to ingester/src/buffer_tree/post_write/mod.rs
diff --git a/ingester2/src/buffer_tree/post_write/trait.rs b/ingester/src/buffer_tree/post_write/trait.rs
similarity index 100%
rename from ingester2/src/buffer_tree/post_write/trait.rs
rename to ingester/src/buffer_tree/post_write/trait.rs
diff --git a/ingester2/src/buffer_tree/root.rs b/ingester/src/buffer_tree/root.rs
similarity index 100%
rename from ingester2/src/buffer_tree/root.rs
rename to ingester/src/buffer_tree/root.rs
diff --git a/ingester2/src/buffer_tree/table.rs b/ingester/src/buffer_tree/table.rs
similarity index 100%
rename from ingester2/src/buffer_tree/table.rs
rename to ingester/src/buffer_tree/table.rs
diff --git a/ingester2/src/buffer_tree/table/name_resolver.rs b/ingester/src/buffer_tree/table/name_resolver.rs
similarity index 100%
rename from ingester2/src/buffer_tree/table/name_resolver.rs
rename to ingester/src/buffer_tree/table/name_resolver.rs
diff --git a/ingester2/src/cancellation_safe.rs b/ingester/src/cancellation_safe.rs
similarity index 100%
rename from ingester2/src/cancellation_safe.rs
rename to ingester/src/cancellation_safe.rs
diff --git a/ingester2/src/deferred_load.rs b/ingester/src/deferred_load.rs
similarity index 100%
rename from ingester2/src/deferred_load.rs
rename to ingester/src/deferred_load.rs
diff --git a/ingester2/src/dml_sink/instrumentation.rs b/ingester/src/dml_sink/instrumentation.rs
similarity index 100%
rename from ingester2/src/dml_sink/instrumentation.rs
rename to ingester/src/dml_sink/instrumentation.rs
diff --git a/ingester2/src/dml_sink/mock_sink.rs b/ingester/src/dml_sink/mock_sink.rs
similarity index 100%
rename from ingester2/src/dml_sink/mock_sink.rs
rename to ingester/src/dml_sink/mock_sink.rs
diff --git a/ingester2/src/dml_sink/mod.rs b/ingester/src/dml_sink/mod.rs
similarity index 100%
rename from ingester2/src/dml_sink/mod.rs
rename to ingester/src/dml_sink/mod.rs
diff --git a/ingester2/src/dml_sink/tracing.rs b/ingester/src/dml_sink/tracing.rs
similarity index 100%
rename from ingester2/src/dml_sink/tracing.rs
rename to ingester/src/dml_sink/tracing.rs
diff --git a/ingester2/src/dml_sink/trait.rs b/ingester/src/dml_sink/trait.rs
similarity index 100%
rename from ingester2/src/dml_sink/trait.rs
rename to ingester/src/dml_sink/trait.rs
diff --git a/ingester2/src/ingest_state.rs b/ingester/src/ingest_state.rs
similarity index 100%
rename from ingester2/src/ingest_state.rs
rename to ingester/src/ingest_state.rs
diff --git a/ingester2/src/ingester_id.rs b/ingester/src/ingester_id.rs
similarity index 100%
rename from ingester2/src/ingester_id.rs
rename to ingester/src/ingester_id.rs
diff --git a/ingester2/src/init.rs b/ingester/src/init.rs
similarity index 97%
rename from ingester2/src/init.rs
rename to ingester/src/init.rs
index 1750b07195..2ce6f7b621 100644
--- a/ingester2/src/init.rs
+++ b/ingester/src/init.rs
@@ -87,7 +87,7 @@ pub trait IngesterRpcInterface: Send + Sync + std::fmt::Debug {
     fn query_service(&self, max_simultaneous_requests: usize) -> Self::FlightHandler;
 }
 
-/// A RAII guard to clean up `ingester2` instance resources when dropped.
+/// A RAII guard to clean up `ingester` instance resources when dropped.
 #[must_use = "ingester stops when guard is dropped"]
 #[derive(Debug)]
 pub struct IngesterGuard<T> {
@@ -128,7 +128,7 @@ impl<T> Drop for IngesterGuard<T> {
     }
 }
 
-/// Errors that occur during initialisation of an `ingester2` instance.
+/// Errors that occur during initialisation of an `ingester` instance.
 #[derive(Debug, Error)]
 pub enum InitError {
     /// A catalog error occurred while fetching the most recent partitions for
@@ -145,14 +145,14 @@ pub enum InitError {
     WalReplay(Box<dyn std::error::Error>),
 }
 
-/// Initialise a new `ingester2` instance, returning the gRPC service handler
+/// Initialise a new `ingester` instance, returning the gRPC service handler
 /// implementations to be bound by the caller.
 ///
 /// ## WAL Replay
 ///
-/// Writes through an `ingester2` instance commit to a durable write-ahead log.
+/// Writes through an `ingester` instance commit to a durable write-ahead log.
 ///
-/// During initialisation of an `ingester2` instance, any files in
+/// During initialisation of an `ingester` instance, any files in
 /// `wal_directory` are read assuming they are redo log files from the
 /// write-ahead log.
 ///
diff --git a/ingester2/src/init/graceful_shutdown.rs b/ingester/src/init/graceful_shutdown.rs
similarity index 100%
rename from ingester2/src/init/graceful_shutdown.rs
rename to ingester/src/init/graceful_shutdown.rs
diff --git a/ingester2/src/init/wal_replay.rs b/ingester/src/init/wal_replay.rs
similarity index 100%
rename from ingester2/src/init/wal_replay.rs
rename to ingester/src/init/wal_replay.rs
diff --git a/ingester2/src/lib.rs b/ingester/src/lib.rs
similarity index 99%
rename from ingester2/src/lib.rs
rename to ingester/src/lib.rs
index e50d08c9bb..222b49d7d6 100644
--- a/ingester2/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -149,7 +149,7 @@
 //!
 //! ## Write Reordering
 //!
-//! A write that enters an `ingester2` instance can be reordered arbitrarily
+//! A write that enters an `ingester` instance can be reordered arbitrarily
 //! with concurrent write requests.
 //!
 //! For example, two gRPC writes can race to be committed to the [`wal`], and
diff --git a/ingester2/src/partition_iter.rs b/ingester/src/partition_iter.rs
similarity index 100%
rename from ingester2/src/partition_iter.rs
rename to ingester/src/partition_iter.rs
diff --git a/ingester2/src/persist/backpressure.rs b/ingester/src/persist/backpressure.rs
similarity index 100%
rename from ingester2/src/persist/backpressure.rs
rename to ingester/src/persist/backpressure.rs
diff --git a/ingester2/src/persist/compact.rs b/ingester/src/persist/compact.rs
similarity index 100%
rename from ingester2/src/persist/compact.rs
rename to ingester/src/persist/compact.rs
diff --git a/ingester2/src/persist/completion_observer.rs b/ingester/src/persist/completion_observer.rs
similarity index 100%
rename from ingester2/src/persist/completion_observer.rs
rename to ingester/src/persist/completion_observer.rs
diff --git a/ingester2/src/persist/context.rs b/ingester/src/persist/context.rs
similarity index 99%
rename from ingester2/src/persist/context.rs
rename to ingester/src/persist/context.rs
index 67eb7538f6..97bb2781aa 100644
--- a/ingester2/src/persist/context.rs
+++ b/ingester/src/persist/context.rs
@@ -203,7 +203,7 @@ impl Context {
             guard.update_sort_key(Some(new_sort_key.clone()));
         };
 
-        // Assert the internal (to this ingester2 instance) serialisation of
+        // Assert the internal (to this ingester instance) serialisation of
         // sort key updates.
         //
         // Both of these get() should not block due to both of the values having
diff --git a/ingester2/src/persist/drain_buffer.rs b/ingester/src/persist/drain_buffer.rs
similarity index 100%
rename from ingester2/src/persist/drain_buffer.rs
rename to ingester/src/persist/drain_buffer.rs
diff --git a/ingester2/src/persist/handle.rs b/ingester/src/persist/handle.rs
similarity index 100%
rename from ingester2/src/persist/handle.rs
rename to ingester/src/persist/handle.rs
diff --git a/ingester2/src/persist/hot_partitions.rs b/ingester/src/persist/hot_partitions.rs
similarity index 100%
rename from ingester2/src/persist/hot_partitions.rs
rename to ingester/src/persist/hot_partitions.rs
diff --git a/ingester2/src/persist/mod.rs b/ingester/src/persist/mod.rs
similarity index 100%
rename from ingester2/src/persist/mod.rs
rename to ingester/src/persist/mod.rs
diff --git a/ingester2/src/persist/queue.rs b/ingester/src/persist/queue.rs
similarity index 100%
rename from ingester2/src/persist/queue.rs
rename to ingester/src/persist/queue.rs
diff --git a/ingester2/src/persist/worker.rs b/ingester/src/persist/worker.rs
similarity index 100%
rename from ingester2/src/persist/worker.rs
rename to ingester/src/persist/worker.rs
diff --git a/ingester2/src/query/exec_instrumentation.rs b/ingester/src/query/exec_instrumentation.rs
similarity index 100%
rename from ingester2/src/query/exec_instrumentation.rs
rename to ingester/src/query/exec_instrumentation.rs
diff --git a/ingester2/src/query/mock_query_exec.rs b/ingester/src/query/mock_query_exec.rs
similarity index 100%
rename from ingester2/src/query/mock_query_exec.rs
rename to ingester/src/query/mock_query_exec.rs
diff --git a/ingester2/src/query/mod.rs b/ingester/src/query/mod.rs
similarity index 100%
rename from ingester2/src/query/mod.rs
rename to ingester/src/query/mod.rs
diff --git a/ingester2/src/query/partition_response.rs b/ingester/src/query/partition_response.rs
similarity index 100%
rename from ingester2/src/query/partition_response.rs
rename to ingester/src/query/partition_response.rs
diff --git a/ingester2/src/query/response.rs b/ingester/src/query/response.rs
similarity index 100%
rename from ingester2/src/query/response.rs
rename to ingester/src/query/response.rs
diff --git a/ingester2/src/query/result_instrumentation.rs b/ingester/src/query/result_instrumentation.rs
similarity index 100%
rename from ingester2/src/query/result_instrumentation.rs
rename to ingester/src/query/result_instrumentation.rs
diff --git a/ingester2/src/query/tracing.rs b/ingester/src/query/tracing.rs
similarity index 100%
rename from ingester2/src/query/tracing.rs
rename to ingester/src/query/tracing.rs
diff --git a/ingester2/src/query/trait.rs b/ingester/src/query/trait.rs
similarity index 100%
rename from ingester2/src/query/trait.rs
rename to ingester/src/query/trait.rs
diff --git a/ingester2/src/query_adaptor.rs b/ingester/src/query_adaptor.rs
similarity index 100%
rename from ingester2/src/query_adaptor.rs
rename to ingester/src/query_adaptor.rs
diff --git a/ingester2/src/server/grpc.rs b/ingester/src/server/grpc.rs
similarity index 100%
rename from ingester2/src/server/grpc.rs
rename to ingester/src/server/grpc.rs
diff --git a/ingester2/src/server/grpc/persist.rs b/ingester/src/server/grpc/persist.rs
similarity index 100%
rename from ingester2/src/server/grpc/persist.rs
rename to ingester/src/server/grpc/persist.rs
diff --git a/ingester2/src/server/grpc/query.rs b/ingester/src/server/grpc/query.rs
similarity index 100%
rename from ingester2/src/server/grpc/query.rs
rename to ingester/src/server/grpc/query.rs
diff --git a/ingester2/src/server/grpc/rpc_write.rs b/ingester/src/server/grpc/rpc_write.rs
similarity index 100%
rename from ingester2/src/server/grpc/rpc_write.rs
rename to ingester/src/server/grpc/rpc_write.rs
diff --git a/ingester2/src/server/mod.rs b/ingester/src/server/mod.rs
similarity index 100%
rename from ingester2/src/server/mod.rs
rename to ingester/src/server/mod.rs
diff --git a/ingester2/src/test_util.rs b/ingester/src/test_util.rs
similarity index 100%
rename from ingester2/src/test_util.rs
rename to ingester/src/test_util.rs
diff --git a/ingester2/src/timestamp_oracle.rs b/ingester/src/timestamp_oracle.rs
similarity index 100%
rename from ingester2/src/timestamp_oracle.rs
rename to ingester/src/timestamp_oracle.rs
diff --git a/ingester2/src/wal/mod.rs b/ingester/src/wal/mod.rs
similarity index 100%
rename from ingester2/src/wal/mod.rs
rename to ingester/src/wal/mod.rs
diff --git a/ingester2/src/wal/reference_tracker/actor.rs b/ingester/src/wal/reference_tracker/actor.rs
similarity index 100%
rename from ingester2/src/wal/reference_tracker/actor.rs
rename to ingester/src/wal/reference_tracker/actor.rs
diff --git a/ingester2/src/wal/reference_tracker/handle.rs b/ingester/src/wal/reference_tracker/handle.rs
similarity index 100%
rename from ingester2/src/wal/reference_tracker/handle.rs
rename to ingester/src/wal/reference_tracker/handle.rs
diff --git a/ingester2/src/wal/reference_tracker/mod.rs b/ingester/src/wal/reference_tracker/mod.rs
similarity index 100%
rename from ingester2/src/wal/reference_tracker/mod.rs
rename to ingester/src/wal/reference_tracker/mod.rs
diff --git a/ingester2/src/wal/reference_tracker/wal_deleter.rs b/ingester/src/wal/reference_tracker/wal_deleter.rs
similarity index 100%
rename from ingester2/src/wal/reference_tracker/wal_deleter.rs
rename to ingester/src/wal/reference_tracker/wal_deleter.rs
diff --git a/ingester2/src/wal/rotate_task.rs b/ingester/src/wal/rotate_task.rs
similarity index 100%
rename from ingester2/src/wal/rotate_task.rs
rename to ingester/src/wal/rotate_task.rs
diff --git a/ingester2/src/wal/traits.rs b/ingester/src/wal/traits.rs
similarity index 100%
rename from ingester2/src/wal/traits.rs
rename to ingester/src/wal/traits.rs
diff --git a/ingester2/src/wal/wal_sink.rs b/ingester/src/wal/wal_sink.rs
similarity index 100%
rename from ingester2/src/wal/wal_sink.rs
rename to ingester/src/wal/wal_sink.rs
diff --git a/ingester2/tests/write.rs b/ingester/tests/write.rs
similarity index 100%
rename from ingester2/tests/write.rs
rename to ingester/tests/write.rs
diff --git a/ingester2_test_ctx/Cargo.toml b/ingester2_test_ctx/Cargo.toml
index 050003e949..b9875743b7 100644
--- a/ingester2_test_ctx/Cargo.toml
+++ b/ingester2_test_ctx/Cargo.toml
@@ -17,7 +17,7 @@ futures = "0.3.28"
 generated_types = { version = "0.1.0", path = "../generated_types" }
 hashbrown.workspace = true
 influxdb_iox_client = { path = "../influxdb_iox_client" }
-ingester2 = { path = "../ingester2" }
+ingester = { path = "../ingester" }
 iox_catalog = { version = "0.1.0", path = "../iox_catalog" }
 iox_query = { version = "0.1.0", path = "../iox_query" }
 iox_time = { path = "../iox_time" }
diff --git a/ingester2_test_ctx/src/lib.rs b/ingester2_test_ctx/src/lib.rs
index ad50f7edb9..91187d65b8 100644
--- a/ingester2_test_ctx/src/lib.rs
+++ b/ingester2_test_ctx/src/lib.rs
@@ -25,7 +25,7 @@ use generated_types::influxdata::iox::ingester::v1::{
     write_service_server::WriteService, WriteRequest,
 };
 use influxdb_iox_client::flight;
-use ingester2::{IngesterGuard, IngesterRpcInterface};
+use ingester::{IngesterGuard, IngesterRpcInterface};
 use iox_catalog::{
     interface::{Catalog, SoftDeletedRows},
     validate_or_insert_schema,
@@ -49,12 +49,12 @@ pub const DEFAULT_MAX_PERSIST_QUEUE_DEPTH: usize = 5;
 /// [`TestContextBuilder::with_persist_hot_partition_cost()`].
 pub const DEFAULT_PERSIST_HOT_PARTITION_COST: usize = 20_000_000;
 
-/// Construct a new [`TestContextBuilder`] to make a [`TestContext`] for an [`ingester2`] instance.
+/// Construct a new [`TestContextBuilder`] to make a [`TestContext`] for an [`ingester`] instance.
 pub fn test_context() -> TestContextBuilder {
     TestContextBuilder::default()
 }
 
-/// Configure and construct a [`TestContext`] containing an [`ingester2`] instance.
+/// Configure and construct a [`TestContext`] containing an [`ingester`] instance.
 #[derive(Debug)]
 pub struct TestContextBuilder {
     wal_dir: Option<Arc<TempDir>>,
@@ -105,7 +105,7 @@ impl TestContextBuilder {
         self
     }
 
-    /// Initialise the [`ingester2`] instance and return a [`TestContext`] for it.
+    /// Initialise the [`ingester`] instance and return a [`TestContext`] for it.
     pub async fn build(self) -> TestContext<impl IngesterRpcInterface> {
         let Self {
             wal_dir,
@@ -136,7 +136,7 @@ impl TestContextBuilder {
 
         let (shutdown_tx, shutdown_rx) = oneshot::channel();
 
-        let ingester = ingester2::new(
+        let ingester = ingester::new(
             Arc::clone(&catalog),
             Arc::clone(&metrics),
             persist_background_fetch_time,
@@ -164,9 +164,9 @@ impl TestContextBuilder {
     }
 }
 
-/// A command interface to the underlying [`ingester2`] instance.
+/// A command interface to the underlying [`ingester`] instance.
 ///
-/// When the [`TestContext`] is dropped, the underlying [`ingester2`] instance
+/// When the [`TestContext`] is dropped, the underlying [`ingester`] instance
 /// it controls is (ungracefully) stopped.
 #[derive(Debug)]
 pub struct TestContext<T> {
diff --git a/ioxd_ingester2/Cargo.toml b/ioxd_ingester2/Cargo.toml
index 8fe5399ed9..fe748af922 100644
--- a/ioxd_ingester2/Cargo.toml
+++ b/ioxd_ingester2/Cargo.toml
@@ -12,7 +12,7 @@ clap_blocks = { path = "../clap_blocks" }
 futures = "0.3.28"
 generated_types = { path = "../generated_types" }
 hyper = "0.14"
-ingester2 = { path = "../ingester2" }
+ingester = { path = "../ingester" }
 iox_catalog = { path = "../iox_catalog" }
 iox_query = { version = "0.1.0", path = "../iox_query" }
 ioxd_common = { path = "../ioxd_common" }
diff --git a/ioxd_ingester2/src/lib.rs b/ioxd_ingester2/src/lib.rs
index 590d1985d4..c0d7f3d7b2 100644
--- a/ioxd_ingester2/src/lib.rs
+++ b/ioxd_ingester2/src/lib.rs
@@ -1,6 +1,6 @@
 use arrow_flight::flight_service_server::FlightServiceServer;
 use async_trait::async_trait;
-use clap_blocks::ingester2::Ingester2Config;
+use clap_blocks::ingester::IngesterConfig;
 use futures::FutureExt;
 use generated_types::influxdata::iox::{
     catalog::v1::catalog_service_server::CatalogServiceServer,
@@ -9,7 +9,7 @@ use generated_types::influxdata::iox::{
     },
 };
 use hyper::{Body, Request, Response};
-use ingester2::{IngesterGuard, IngesterRpcInterface};
+use ingester::{IngesterGuard, IngesterRpcInterface};
 use iox_catalog::interface::Catalog;
 use iox_query::exec::Executor;
 use ioxd_common::{
@@ -39,8 +39,8 @@ const MAX_OUTGOING_MSG_BYTES: usize = 1024 * 1024; // 1 MiB
 
 #[derive(Debug, Error)]
 pub enum Error {
-    #[error("error initializing ingester2: {0}")]
-    Ingester(#[from] ingester2::InitError),
+    #[error("error initializing ingester: {0}")]
+    Ingester(#[from] ingester::InitError),
 }
 
 pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -76,7 +76,7 @@ impl<I: IngesterRpcInterface> IngesterServerType<I> {
 
 impl<I: IngesterRpcInterface> std::fmt::Debug for IngesterServerType<I> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Ingester2")
+        write!(f, "Ingester")
     }
 }
 
@@ -84,7 +84,7 @@ impl<I: IngesterRpcInterface> std::fmt::Debug for IngesterServerType<I> {
 impl<I: IngesterRpcInterface + Sync + Send + Debug + 'static> ServerType for IngesterServerType<I> {
     /// Human name for this server type
     fn name(&self) -> &str {
-        "ingester2"
+        "ingester"
     }
 
     /// Return the [`metric::Registry`] used by the ingester.
@@ -188,13 +188,13 @@ pub async fn create_ingester_server_type(
     common_state: &CommonServerState,
     catalog: Arc<dyn Catalog>,
     metrics: Arc<Registry>,
-    ingester_config: &Ingester2Config,
+    ingester_config: &IngesterConfig,
     exec: Arc<Executor>,
     object_store: ParquetStorage,
 ) -> Result<Arc<dyn ServerType>> {
     let (shutdown_tx, shutdown_rx) = oneshot::channel();
 
-    let grpc = ingester2::new(
+    let grpc = ingester::new(
         catalog,
         Arc::clone(&metrics),
         PERSIST_BACKGROUND_FETCH_TIME,
diff --git a/querier/src/cache/parquet_file.rs b/querier/src/cache/parquet_file.rs
index e7d3eb909e..ecedc409bd 100644
--- a/querier/src/cache/parquet_file.rs
+++ b/querier/src/cache/parquet_file.rs
@@ -364,7 +364,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn ingester2_uuid_file_counts() {
+    async fn ingester_uuid_file_counts() {
         let (catalog, table, _partition) = make_catalog().await;
         let uuid = Uuid::new_v4();
         let table_id = table.table.id;
diff --git a/querier/src/ingester/mod.rs b/querier/src/ingester/mod.rs
index d55435580e..30cccd2157 100644
--- a/querier/src/ingester/mod.rs
+++ b/querier/src/ingester/mod.rs
@@ -1359,7 +1359,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn ingester2_uuid_completed_persistence_count() {
+    async fn ingester_uuid_completed_persistence_count() {
         let ingester_uuid1 = Uuid::new_v4();
         let ingester_uuid2 = Uuid::new_v4();
 
diff --git a/test_helpers_end_to_end/src/config.rs b/test_helpers_end_to_end/src/config.rs
index b3d4cda492..5a88746912 100644
--- a/test_helpers_end_to_end/src/config.rs
+++ b/test_helpers_end_to_end/src/config.rs
@@ -52,9 +52,9 @@ impl TestConfig {
         }
     }
 
-    /// Create a minimal router2 configuration sharing configuration with the ingester2 config
+    /// Create a minimal router2 configuration sharing configuration with the ingester config
     pub fn new_router2(ingester_config: &TestConfig) -> Self {
-        assert_eq!(ingester_config.server_type(), ServerType::Ingester2);
+        assert_eq!(ingester_config.server_type(), ServerType::Ingester);
 
         Self::new(
             ServerType::Router2,
@@ -65,22 +65,22 @@ impl TestConfig {
         .with_ingester_addresses(&[ingester_config.ingester_base()])
     }
 
-    /// Create a minimal ingester2 configuration, using the dsn configuration specified. Set the
+    /// Create a minimal ingester configuration, using the dsn configuration specified. Set the
     /// persistence options such that it will persist as quickly as possible.
-    pub fn new_ingester2(dsn: impl Into<String>) -> Self {
+    pub fn new_ingester(dsn: impl Into<String>) -> Self {
         let dsn = Some(dsn.into());
-        Self::new(ServerType::Ingester2, dsn, random_catalog_schema_name())
+        Self::new(ServerType::Ingester, dsn, random_catalog_schema_name())
             .with_new_object_store()
             .with_new_wal()
             .with_env("INFLUXDB_IOX_WAL_ROTATION_PERIOD_SECONDS", "1")
     }
 
-    /// Create a minimal ingester2 configuration, using the dsn configuration specified. Set the
+    /// Create a minimal ingester configuration, using the dsn configuration specified. Set the
     /// persistence options such that it will likely never persist, to be able to test when data
     /// only exists in the ingester's memory.
-    pub fn new_ingester2_never_persist(dsn: impl Into<String>) -> Self {
+    pub fn new_ingester_never_persist(dsn: impl Into<String>) -> Self {
         let dsn = Some(dsn.into());
-        Self::new(ServerType::Ingester2, dsn, random_catalog_schema_name())
+        Self::new(ServerType::Ingester, dsn, random_catalog_schema_name())
             .with_new_object_store()
             .with_new_wal()
             // I didn't run my tests for a day, because that would be too long
@@ -93,7 +93,7 @@ impl TestConfig {
         Self {
             env: ingester_config.env.clone(),
             client_headers: ingester_config.client_headers.clone(),
-            server_type: ServerType::Ingester2,
+            server_type: ServerType::Ingester,
             dsn: ingester_config.dsn.clone(),
             catalog_schema_name: ingester_config.catalog_schema_name.clone(),
             object_store_dir: None,
@@ -104,12 +104,12 @@ impl TestConfig {
         .with_new_wal()
     }
 
-    /// Create a minimal querier2 configuration from the specified ingester2 configuration, using
+    /// Create a minimal querier2 configuration from the specified ingester configuration, using
     /// the same dsn and object store, and pointing at the specified ingester.
     pub fn new_querier2(ingester_config: &TestConfig) -> Self {
-        assert_eq!(ingester_config.server_type(), ServerType::Ingester2);
+        assert_eq!(ingester_config.server_type(), ServerType::Ingester);
 
-        Self::new_querier2_without_ingester2(ingester_config)
+        Self::new_querier2_without_ingester(ingester_config)
             .with_ingester_addresses(&[ingester_config.ingester_base()])
     }
 
@@ -123,9 +123,9 @@ impl TestConfig {
         .with_existing_object_store(other)
     }
 
-    /// Create a minimal querier2 configuration from the specified ingester2 configuration, using
-    /// the same dsn and object store, but without specifying the ingester2 addresses
-    pub fn new_querier2_without_ingester2(ingester_config: &TestConfig) -> Self {
+    /// Create a minimal querier2 configuration from the specified ingester configuration, using
+    /// the same dsn and object store, but without specifying the ingester addresses
+    pub fn new_querier2_without_ingester(ingester_config: &TestConfig) -> Self {
         Self::new(
             ServerType::Querier2,
             ingester_config.dsn().to_owned(),
diff --git a/test_helpers_end_to_end/src/mini_cluster.rs b/test_helpers_end_to_end/src/mini_cluster.rs
index e065d62c09..2481b6eb5e 100644
--- a/test_helpers_end_to_end/src/mini_cluster.rs
+++ b/test_helpers_end_to_end/src/mini_cluster.rs
@@ -197,7 +197,7 @@ impl MiniCluster {
     /// querier. Save config for a compactor, but the compactor service should be run on-demand in
     /// tests using `compactor run-once` rather than using `run compactor`.
     pub async fn create_non_shared2(database_url: String) -> Self {
-        let ingester_config = TestConfig::new_ingester2(&database_url);
+        let ingester_config = TestConfig::new_ingester(&database_url);
         let router_config = TestConfig::new_router2(&ingester_config);
         let querier_config = TestConfig::new_querier2(&ingester_config);
         let compactor_config = TestConfig::new_compactor2(&ingester_config);
@@ -218,7 +218,7 @@ impl MiniCluster {
     /// compactor service should be run on-demand in tests using `compactor run-once` rather than
     /// using `run compactor`.
     pub async fn create_non_shared2_never_persist(database_url: String) -> Self {
-        let ingester_config = TestConfig::new_ingester2_never_persist(&database_url);
+        let ingester_config = TestConfig::new_ingester_never_persist(&database_url);
         let router_config = TestConfig::new_router2(&ingester_config);
         let querier_config = TestConfig::new_querier2(&ingester_config);
         let compactor_config = TestConfig::new_compactor2(&ingester_config);
@@ -243,7 +243,7 @@ impl MiniCluster {
         database_url: String,
         authz_addr: impl Into<String> + Clone,
     ) -> Self {
-        let ingester_config = TestConfig::new_ingester2(&database_url);
+        let ingester_config = TestConfig::new_ingester(&database_url);
         let router_config =
             TestConfig::new_router2(&ingester_config).with_single_tenancy(authz_addr.clone());
         let querier_config =
diff --git a/test_helpers_end_to_end/src/server_fixture.rs b/test_helpers_end_to_end/src/server_fixture.rs
index a6ba424f9a..00b7777405 100644
--- a/test_helpers_end_to_end/src/server_fixture.rs
+++ b/test_helpers_end_to_end/src/server_fixture.rs
@@ -196,7 +196,7 @@ impl Connections {
         };
 
         self.ingester_grpc_connection = match server_type {
-            ServerType::AllInOne | ServerType::Ingester2 => {
+            ServerType::AllInOne | ServerType::Ingester => {
                 let client_base = test_config.addrs().ingester_grpc_api().client_base();
                 Some(
                     grpc_channel(test_config, client_base.as_ref())
@@ -492,7 +492,7 @@ impl TestServer {
                         return;
                     }
                 }
-                ServerType::Ingester2 => {
+                ServerType::Ingester => {
                     if check_arrow_service_health(
                         server_type,
                         connections.ingester_grpc_connection(),
diff --git a/test_helpers_end_to_end/src/server_type.rs b/test_helpers_end_to_end/src/server_type.rs
index 3eb90c6203..bdb9966c1d 100644
--- a/test_helpers_end_to_end/src/server_type.rs
+++ b/test_helpers_end_to_end/src/server_type.rs
@@ -3,7 +3,7 @@ use super::addrs::BindAddresses;
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum ServerType {
     AllInOne,
-    Ingester2,
+    Ingester,
     Router2,
     Querier2,
     Compactor2,
@@ -14,7 +14,7 @@ impl ServerType {
     pub fn run_command(&self) -> &'static str {
         match self {
             Self::AllInOne => "all-in-one",
-            Self::Ingester2 => "ingester2",
+            Self::Ingester => "ingester",
             Self::Router2 => "router2",
             Self::Querier2 => "querier",
             Self::Compactor2 => "compactor2",
@@ -63,7 +63,7 @@ fn addr_envs(server_type: ServerType, addrs: &BindAddresses) -> Vec<(&'static st
                 addrs.compactor_grpc_api().bind_addr().to_string(),
             ),
         ],
-        ServerType::Ingester2 => vec![
+        ServerType::Ingester => vec![
             (
                 "INFLUXDB_IOX_BIND_ADDR",
                 addrs.router_http_api().bind_addr().to_string(),
diff --git a/test_helpers_end_to_end/src/steps.rs b/test_helpers_end_to_end/src/steps.rs
index f37a8bbc93..989eef4cda 100644
--- a/test_helpers_end_to_end/src/steps.rs
+++ b/test_helpers_end_to_end/src/steps.rs
@@ -161,8 +161,7 @@ pub enum Step {
     Persist,
 
     /// Wait for all previously written data to be persisted by observing an increase in the number
-    /// of Parquet files in the catalog as specified for this cluster's namespace. Needed for
-    /// router2/ingester2/querier2.
+    /// of Parquet files in the catalog as specified for this cluster's namespace.
     WaitForPersisted2 { expected_increase: usize },
 
     /// Set the namespace retention interval to a retention period,

From d8b56888a7548695ef0617b00facf2a64f108dc5 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 5 May 2023 12:21:02 -0400
Subject: [PATCH 048/119] feat: Add a command alias so that 'influxdb_iox run
 ingester2' runs ingester

---
 influxdb_iox/src/commands/run/mod.rs          |  1 +
 .../command_rename_support.rs                 | 30 +++++++++++++++++++
 influxdb_iox/tests/end_to_end_cases/mod.rs    |  1 +
 3 files changed, 32 insertions(+)
 create mode 100644 influxdb_iox/tests/end_to_end_cases/command_rename_support.rs

diff --git a/influxdb_iox/src/commands/run/mod.rs b/influxdb_iox/src/commands/run/mod.rs
index 573a932a05..e624b3c452 100644
--- a/influxdb_iox/src/commands/run/mod.rs
+++ b/influxdb_iox/src/commands/run/mod.rs
@@ -74,6 +74,7 @@ enum Command {
     Router2(router2::Config),
 
     /// Run the server in ingester mode
+    #[clap(alias = "ingester2")]
     Ingester(ingester::Config),
 
     /// Run the server in "all in one" mode (Default)
diff --git a/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs b/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
new file mode 100644
index 0000000000..a9eb8cfad3
--- /dev/null
+++ b/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
@@ -0,0 +1,30 @@
+//! Tests that we still support running using deprecated names so that deployments continue to work
+//! while transitioning.
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use std::time::Duration;
+use tempfile::tempdir;
+use test_helpers_end_to_end::{AddAddrEnv, BindAddresses, ServerType};
+
+#[tokio::test]
+async fn ingester2_runs_ingester() {
+    let tmpdir = tempdir().unwrap();
+    let addrs = BindAddresses::default();
+
+    Command::cargo_bin("influxdb_iox")
+        .unwrap()
+        .args(["run", "ingester2", "-v"])
+        .env_clear()
+        .env("HOME", tmpdir.path())
+        .env("INFLUXDB_IOX_WAL_DIRECTORY", tmpdir.path())
+        .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
+        .add_addr_env(ServerType::Ingester, &addrs)
+        .timeout(Duration::from_secs(5))
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("error: unrecognized subcommand 'ingester2'").not())
+        .stdout(predicate::str::contains(
+            "InfluxDB IOx Ingester server ready",
+        ));
+}
diff --git a/influxdb_iox/tests/end_to_end_cases/mod.rs b/influxdb_iox/tests/end_to_end_cases/mod.rs
index 3509b00983..21156ad80b 100644
--- a/influxdb_iox/tests/end_to_end_cases/mod.rs
+++ b/influxdb_iox/tests/end_to_end_cases/mod.rs
@@ -4,6 +4,7 @@ mod all_in_one;
 mod catalog;
 #[cfg(not(feature = "heappy"))]
 mod cli;
+mod command_rename_support;
 mod compactor;
 mod debug;
 mod error;

From 68117aee0fd3f080236c7fe47ae541e8b831c255 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 May 2023 19:12:58 +0000
Subject: [PATCH 049/119] chore(deps): Bump libc from 0.2.142 to 0.2.144
 (#7766)

Bumps [libc](https://github.com/rust-lang/libc) from 0.2.142 to 0.2.144.
- [Release notes](https://github.com/rust-lang/libc/releases)
- [Commits](https://github.com/rust-lang/libc/compare/0.2.142...0.2.144)

---
updated-dependencies:
- dependency-name: libc
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d31a92319e..23d4c36373 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3286,9 +3286,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.142"
+version = "0.2.144"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317"
+checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
 
 [[package]]
 name = "libm"

From 19a56bdf4b229ab08e64798551e9cb0b44db5ded Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 20:20:03 -0400
Subject: [PATCH 050/119] fix: Rename ioxd_ingester2 to ioxd_ingester

---
 Cargo.lock                                   | 4 ++--
 Cargo.toml                                   | 2 +-
 influxdb_iox/Cargo.toml                      | 2 +-
 influxdb_iox/src/commands/run/all_in_one.rs  | 2 +-
 influxdb_iox/src/commands/run/ingester.rs    | 4 ++--
 {ioxd_ingester2 => ioxd_ingester}/Cargo.toml | 2 +-
 {ioxd_ingester2 => ioxd_ingester}/src/lib.rs | 0
 7 files changed, 8 insertions(+), 8 deletions(-)
 rename {ioxd_ingester2 => ioxd_ingester}/Cargo.toml (97%)
 rename {ioxd_ingester2 => ioxd_ingester}/src/lib.rs (100%)

diff --git a/Cargo.lock b/Cargo.lock
index 23d4c36373..a292687019 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2574,7 +2574,7 @@ dependencies = [
  "ioxd_common",
  "ioxd_compactor2",
  "ioxd_garbage_collector",
- "ioxd_ingester2",
+ "ioxd_ingester",
  "ioxd_querier",
  "ioxd_router",
  "ioxd_test",
@@ -3072,7 +3072,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "ioxd_ingester2"
+name = "ioxd_ingester"
 version = "0.1.0"
 dependencies = [
  "arrow-flight",
diff --git a/Cargo.toml b/Cargo.toml
index e5df5e3db5..7951d2067b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -40,7 +40,7 @@ members = [
     "ioxd_common",
     "ioxd_compactor2",
     "ioxd_garbage_collector",
-    "ioxd_ingester2",
+    "ioxd_ingester",
     "ioxd_querier",
     "ioxd_router",
     "ioxd_test",
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 022b7078fc..2859e9bb4f 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -22,7 +22,7 @@ influxrpc_parser = { path = "../influxrpc_parser"}
 iox_catalog = { path = "../iox_catalog" }
 ioxd_common = { path = "../ioxd_common"}
 ioxd_compactor2 = { path = "../ioxd_compactor2"}
-ioxd_ingester2 = { path = "../ioxd_ingester2"}
+ioxd_ingester = { path = "../ioxd_ingester"}
 ioxd_garbage_collector = { path = "../ioxd_garbage_collector" }
 ioxd_querier = { path = "../ioxd_querier"}
 ioxd_router = { path = "../ioxd_router"}
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index 6754ce6a08..fe4021fb39 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -25,7 +25,7 @@ use ioxd_common::{
     Service,
 };
 use ioxd_compactor2::create_compactor2_server_type as create_compactor_server_type;
-use ioxd_ingester2::create_ingester_server_type;
+use ioxd_ingester::create_ingester_server_type;
 use ioxd_querier::{create_querier_server_type, QuerierServerTypeArgs};
 use ioxd_router::create_router2_server_type;
 use object_store::DynObjectStore;
diff --git a/influxdb_iox/src/commands/run/ingester.rs b/influxdb_iox/src/commands/run/ingester.rs
index b978d3bc90..e37d2c6e8f 100644
--- a/influxdb_iox/src/commands/run/ingester.rs
+++ b/influxdb_iox/src/commands/run/ingester.rs
@@ -12,7 +12,7 @@ use ioxd_common::{
     server_type::{CommonServerState, CommonServerStateError},
     Service,
 };
-use ioxd_ingester2::create_ingester_server_type;
+use ioxd_ingester::create_ingester_server_type;
 use object_store::DynObjectStore;
 use object_store_metrics::ObjectStoreMetrics;
 use observability_deps::tracing::*;
@@ -33,7 +33,7 @@ pub enum Error {
     ObjectStoreParsing(#[from] clap_blocks::object_store::ParseError),
 
     #[error("error initializing ingester: {0}")]
-    Ingester(#[from] ioxd_ingester2::Error),
+    Ingester(#[from] ioxd_ingester::Error),
 
     #[error("catalog DSN error: {0}")]
     CatalogDsn(#[from] clap_blocks::catalog_dsn::Error),
diff --git a/ioxd_ingester2/Cargo.toml b/ioxd_ingester/Cargo.toml
similarity index 97%
rename from ioxd_ingester2/Cargo.toml
rename to ioxd_ingester/Cargo.toml
index fe748af922..9ab5e58733 100644
--- a/ioxd_ingester2/Cargo.toml
+++ b/ioxd_ingester/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "ioxd_ingester2"
+name = "ioxd_ingester"
 version.workspace = true
 authors.workspace = true
 edition.workspace = true
diff --git a/ioxd_ingester2/src/lib.rs b/ioxd_ingester/src/lib.rs
similarity index 100%
rename from ioxd_ingester2/src/lib.rs
rename to ioxd_ingester/src/lib.rs

From 0849ce6f2bf533e95887d784b7bfdd86680a226a Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 20:23:02 -0400
Subject: [PATCH 051/119] fix: Rename ingester2_test_ctx to ingester_test_ctx

---
 Cargo.lock                                           | 4 ++--
 Cargo.toml                                           | 2 +-
 ingester/Cargo.toml                                  | 2 +-
 ingester/benches/write.rs                            | 2 +-
 ingester/tests/write.rs                              | 2 +-
 {ingester2_test_ctx => ingester_test_ctx}/Cargo.toml | 4 +---
 {ingester2_test_ctx => ingester_test_ctx}/README.md  | 0
 {ingester2_test_ctx => ingester_test_ctx}/src/lib.rs | 0
 8 files changed, 7 insertions(+), 9 deletions(-)
 rename {ingester2_test_ctx => ingester_test_ctx}/Cargo.toml (92%)
 rename {ingester2_test_ctx => ingester_test_ctx}/README.md (100%)
 rename {ingester2_test_ctx => ingester_test_ctx}/src/lib.rs (100%)

diff --git a/Cargo.lock b/Cargo.lock
index a292687019..1c618c232d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2701,7 +2701,7 @@ dependencies = [
  "generated_types",
  "hashbrown 0.13.2",
  "influxdb_iox_client",
- "ingester2_test_ctx",
+ "ingester_test_ctx",
  "iox_catalog",
  "iox_query",
  "iox_time",
@@ -2736,7 +2736,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "ingester2_test_ctx"
+name = "ingester_test_ctx"
 version = "0.1.0"
 dependencies = [
  "arrow",
diff --git a/Cargo.toml b/Cargo.toml
index 7951d2067b..8274838874 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,7 +28,7 @@ members = [
     "influxdb_tsm",
     "influxdb2_client",
     "influxrpc_parser",
-    "ingester2_test_ctx",
+    "ingester_test_ctx",
     "ingester",
     "iox_catalog",
     "iox_data_generator",
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index cf278c071f..7a1bdc77a6 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -55,7 +55,7 @@ assert_matches = "1.5.0"
 criterion = { version = "0.4", default-features = false, features = ["async_tokio"]}
 datafusion_util = { path = "../datafusion_util" }
 influxdb_iox_client = { path = "../influxdb_iox_client" }
-ingester2_test_ctx = { path = "../ingester2_test_ctx" }
+ingester_test_ctx = { path = "../ingester_test_ctx" }
 lazy_static = "1.4.0"
 mutable_batch_lp = { path = "../mutable_batch_lp" }
 paste = "1.0.12"
diff --git a/ingester/benches/write.rs b/ingester/benches/write.rs
index c29aa54296..86d59bc5d1 100644
--- a/ingester/benches/write.rs
+++ b/ingester/benches/write.rs
@@ -9,7 +9,7 @@ use generated_types::influxdata::{
 };
 use influxdb_iox_client::ingester::generated_types::WriteRequest;
 use ingester::IngesterRpcInterface;
-use ingester2_test_ctx::{TestContext, TestContextBuilder};
+use ingester_test_ctx::{TestContext, TestContextBuilder};
 use iox_time::TimeProvider;
 use mutable_batch_lp::lines_to_batches;
 use mutable_batch_pb::encode::encode_write;
diff --git a/ingester/tests/write.rs b/ingester/tests/write.rs
index ff15390dee..ddc0fc1686 100644
--- a/ingester/tests/write.rs
+++ b/ingester/tests/write.rs
@@ -2,7 +2,7 @@ use arrow_util::assert_batches_sorted_eq;
 use assert_matches::assert_matches;
 use data_types::PartitionKey;
 use influxdb_iox_client::flight::generated_types::IngesterQueryRequest;
-use ingester2_test_ctx::TestContextBuilder;
+use ingester_test_ctx::TestContextBuilder;
 use iox_catalog::interface::Catalog;
 use metric::{DurationHistogram, U64Histogram};
 use std::sync::Arc;
diff --git a/ingester2_test_ctx/Cargo.toml b/ingester_test_ctx/Cargo.toml
similarity index 92%
rename from ingester2_test_ctx/Cargo.toml
rename to ingester_test_ctx/Cargo.toml
index b9875743b7..c85ea97082 100644
--- a/ingester2_test_ctx/Cargo.toml
+++ b/ingester_test_ctx/Cargo.toml
@@ -1,12 +1,10 @@
 [package]
-name = "ingester2_test_ctx"
+name = "ingester_test_ctx"
 version.workspace = true
 authors.workspace = true
 edition.workspace = true
 license.workspace = true
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
 arrow = { workspace = true, features = ["prettyprint"] }
 arrow-flight = { workspace = true }
diff --git a/ingester2_test_ctx/README.md b/ingester_test_ctx/README.md
similarity index 100%
rename from ingester2_test_ctx/README.md
rename to ingester_test_ctx/README.md
diff --git a/ingester2_test_ctx/src/lib.rs b/ingester_test_ctx/src/lib.rs
similarity index 100%
rename from ingester2_test_ctx/src/lib.rs
rename to ingester_test_ctx/src/lib.rs

From e12329ac2b37523741adacab2f6bb58d0bb4c943 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Thu, 2 Mar 2023 15:41:24 -0500
Subject: [PATCH 052/119] fix: Remove temporary limiting of test threads

---
 .circleci/config.yml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 438a3b7171..f0ee2e8066 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -225,14 +225,6 @@ jobs:
       # When removing this, also remove the ignore on the test in trogging/src/cli.rs
       RUST_LOG: debug,,hyper::proto::h1=info,h2=info
       LOG_FILTER: debug,,hyper::proto::h1=info,h2=info
-      # TEMPORARY: Can be removed when the ingester that uses the write buffer is removed. Tests
-      # need to spin up separate servers because I've only been able to implement a "persist
-      # everything" API, and if tests run in parallel using a shared server, they interfere with
-      # each other. Starting separate servers with the maximum number of Rust test threads uses up
-      # all the Postgres connections in CI, so limit the parallelization until we switch completely
-      # to ingester2, which does have a "persist-per-namespace" API that means tests can run on
-      # shared MiniClusters.
-      RUST_TEST_THREADS: 8
       # Run the JDBC tests
       TEST_INFLUXDB_JDBC: "true"
     steps:

From 7e9a449623a6b174a8b2dabb8f7d50748d479454 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Thu, 2 Mar 2023 16:30:38 -0500
Subject: [PATCH 053/119] fix: Remove write buffer proto definitions

---
 generated_types/build.rs                      |  3 -
 .../iox/write_buffer/v1/write_buffer.proto    | 63 -------------------
 generated_types/src/lib.rs                    | 13 ----
 3 files changed, 79 deletions(-)
 delete mode 100644 generated_types/protos/influxdata/iox/write_buffer/v1/write_buffer.proto

diff --git a/generated_types/build.rs b/generated_types/build.rs
index 585eef0a62..de783dd931 100644
--- a/generated_types/build.rs
+++ b/generated_types/build.rs
@@ -30,7 +30,6 @@ fn main() -> Result<()> {
 /// - `influxdata.iox.schema.v1.rs`
 /// - `influxdata.iox.wal.v1.rs`
 /// - `influxdata.iox.write.v1.rs`
-/// - `influxdata.iox.write_buffer.v1.rs`
 /// - `influxdata.platform.storage.rs`
 fn generate_grpc_types(root: &Path) -> Result<()> {
     let authz_path = root.join("influxdata/iox/authz/v1");
@@ -44,7 +43,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
     let querier_path = root.join("influxdata/iox/querier/v1");
     let schema_path = root.join("influxdata/iox/schema/v1");
     let wal_path = root.join("influxdata/iox/wal/v1");
-    let write_buffer_path = root.join("influxdata/iox/write_buffer/v1");
     let storage_path = root.join("influxdata/platform/storage");
     let storage_errors_path = root.join("influxdata/platform/errors");
 
@@ -70,7 +68,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
         root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"),
         schema_path.join("service.proto"),
         wal_path.join("wal.proto"),
-        write_buffer_path.join("write_buffer.proto"),
         storage_path.join("predicate.proto"),
         storage_path.join("service.proto"),
         storage_path.join("source.proto"),
diff --git a/generated_types/protos/influxdata/iox/write_buffer/v1/write_buffer.proto b/generated_types/protos/influxdata/iox/write_buffer/v1/write_buffer.proto
deleted file mode 100644
index 4d61e6b651..0000000000
--- a/generated_types/protos/influxdata/iox/write_buffer/v1/write_buffer.proto
+++ /dev/null
@@ -1,63 +0,0 @@
-syntax = "proto3";
-package influxdata.iox.write_buffer.v1;
-option go_package = "github.com/influxdata/iox/write_buffer/v1";
-
-import "influxdata/iox/delete/v1/service.proto";
-import "influxdata/pbdata/v1/influxdb_pb_data_protocol.proto";
-
-// Configures the use of a write buffer.
-message WriteBufferConnection {
-  reserved 1;
-  reserved "direction";
-
-  // Which type should be used (e.g. "kafka", "mock")
-  string type = 2;
-
-  // Connection string, depends on `type`.
-  string connection = 3;
-
-  // Old non-nested auto-creation config.
-  reserved 4, 5, 7;
-
-  // Special configs to be applied when establishing the connection.
-  //
-  // This depends on `type` and can configure aspects like timeouts.
-  map<string, string> connection_config = 6;
-
-  // Specifies if the shards (e.g. for Kafka in form of a topic w/ `n_shards` partitions) should be
-  // automatically created if they do not existing prior to reading or writing.
-  WriteBufferCreationConfig creation_config = 8;
-}
-
-// Configs shard auto-creation for write buffers.
-//
-// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/
-// `n_shards` partitions.
-message WriteBufferCreationConfig {
-  // Renamed from n_sequencers to n_shards.
-  reserved 1;
-  reserved "n_sequencers";
-
-  // Number of shards.
-  //
-  // How they are implemented depends on `type`, e.g. for Kafka this is mapped to the number of
-  // partitions.
-  //
-  // If 0, a server-side default is used
-  uint32 n_shards = 3;
-
-  // Special configs to by applied when shards are created.
-  //
-  // This depends on `type` and can setup parameters like retention policy.
-  //
-  // Contains 0 or more key value pairs
-  map<string, string> options = 2;
-}
-
-// A write payload for the write buffer
-message WriteBufferPayload {
-  oneof payload {
-    influxdata.pbdata.v1.DatabaseBatch write = 1;
-    influxdata.iox.delete.v1.DeletePayload delete = 2;
-  }
-}
diff --git a/generated_types/src/lib.rs b/generated_types/src/lib.rs
index 5d274b578d..b718f51a44 100644
--- a/generated_types/src/lib.rs
+++ b/generated_types/src/lib.rs
@@ -173,19 +173,6 @@ pub mod influxdata {
                 include!(concat!(env!("OUT_DIR"), "/influxdata.iox.wal.v1.serde.rs"));
             }
         }
-
-        pub mod write_buffer {
-            pub mod v1 {
-                include!(concat!(
-                    env!("OUT_DIR"),
-                    "/influxdata.iox.write_buffer.v1.rs"
-                ));
-                include!(concat!(
-                    env!("OUT_DIR"),
-                    "/influxdata.iox.write_buffer.v1.serde.rs"
-                ));
-            }
-        }
     }
 
     pub mod pbdata {

From 3d5df5574a18fcc3cfba86bf87de008c07073209 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Thu, 2 Mar 2023 16:38:59 -0500
Subject: [PATCH 054/119] fix: Remove vestiges of shards

---
 Cargo.lock                                    |  2 --
 data_types/src/lib.rs                         | 11 ++++-----
 docs/compactor.md                             | 23 +++++++++----------
 generated_types/src/ingester.rs               |  3 +--
 influxdb_iox/Cargo.toml                       |  1 -
 influxdb_iox/tests/end_to_end_cases/cli.rs    |  1 -
 iox_catalog/src/interface.rs                  |  6 ++---
 iox_catalog/src/mem.rs                        | 22 ------------------
 iox_tests/Cargo.toml                          |  1 -
 router/src/dml_handlers/rpc_write/balancer.rs |  2 +-
 10 files changed, 21 insertions(+), 51 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 23d4c36373..d90929a9c5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2597,7 +2597,6 @@ dependencies = [
  "schema",
  "serde",
  "serde_json",
- "sharder",
  "snafu",
  "tempfile",
  "test_helpers",
@@ -2975,7 +2974,6 @@ dependencies = [
  "parquet_file",
  "predicate",
  "schema",
- "sharder",
  "uuid",
  "workspace-hack",
 ]
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 9adc3e8442..b21164e324 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -726,8 +726,8 @@ impl sqlx::Decode<'_, sqlx::Sqlite> for PartitionKey {
     }
 }
 
-/// Data object for a partition. The combination of shard, table and key are unique (i.e. only
-/// one record can exist for each combo)
+/// Data object for a partition. The combination of table and key are unique (i.e. only one record
+/// can exist for each combo)
 #[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
 pub struct Partition {
     /// the id of the partition
@@ -1849,10 +1849,9 @@ pub const MIN_NANO_TIME: i64 = i64::MIN + 2;
 ///
 /// 2262-04-11 23:47:16.854775806 +0000 UTC
 ///
-/// The highest time represented by a nanosecond needs to be used for an
-/// exclusive range in the shard group, so the maximum time needs to be one
-/// less than the possible maximum number of nanoseconds representable by an
-/// int64 so that we don't lose a point at that one time.
+/// The highest time represented by a nanosecond needs to be used for an exclusive range, so the
+/// maximum time needs to be one less than the possible maximum number of nanoseconds representable
+/// by an int64 so that we don't lose a point at that one time.
 /// Source: [influxdb](https://github.com/influxdata/influxdb/blob/540bb66e1381a48a6d1ede4fc3e49c75a7d9f4af/models/time.go#L12-L34)
 pub const MAX_NANO_TIME: i64 = i64::MAX - 1;
 
diff --git a/docs/compactor.md b/docs/compactor.md
index 253a8cd201..b747752efe 100644
--- a/docs/compactor.md
+++ b/docs/compactor.md
@@ -1,6 +1,6 @@
 # Job of a Compactor
 
-Compactor is one of the servers in an IOx cluster and its main job is to compact many small and time-overlapped files into larger and non-time-overlapped files. Duplicated and soft deleted data will also be removed during compaction. There may be one or many Compactors in a cluster, each will be responsible for compacting files of a set of specified shards.
+Compactor is one of the servers in an IOx cluster and its main job is to compact many small and time-overlapped files into larger and non-time-overlapped files. Duplicated and soft deleted data will also be removed during compaction. There may be one or many Compactors in a cluster.
 
 - The purpose of compaction to increase query performance by
    1. Avoiding reading too many small files
@@ -46,8 +46,7 @@ If increasing memory a lot does not help, consider changing one or a combination
 These are [up-to-date configurable parameters](https://github.com/influxdata/influxdb_iox/blob/main/clap_blocks/src/compactor.rs). Here are a few key parameters you may want to tune for your needs:
 
  - **Size of the files:** The compactor cannot control the sizes of level-0 files but they are usually small and can be adjusted by config params of the Ingesters. The compactor decides the max desired size of level-1 and level-2 files which is around `INFLUXDB_IOX_COMPACTION_MAX_DESIRED_FILE_SIZE_BYTES * (100 + INFLUXDB_IOX_COMPACTION_PERCENTAGE_MAX_FILE_SIZE) / 100`.
- - **Map a compactor to several shards:**  Depending on your Ingester setup, there may be several shards. A compactor can be set up to compact all or a fraction of the shards. Use range `[INFLUXDB_IOX_SHARD_INDEX_RANGE_START, INFLUXDB_IOX_SHARD_INDEX_RANGE_END]` to map them.
-- **Number of partitions considered to compact per shard:** If there is enough memory, which is usually the case, the compactor will compact many partitions of the same or different shards concurrently. Depending on how many shards a compactor handles and how much memory that compactor is configured to use, you can increase/reduce the concurrent compaction level by increasing/reducing the number of partitions per shard by adjusting `INFLUXDB_IOX_COMPACTION_MAX_NUMBER_PARTITIONS_PER_SHARD`.
+- **Number of partitions considered to compact:** If there is enough memory, which is usually the case, the compactor will compact many partitions concurrently. Depending on how much memory that compactor is configured to use, you can increase/reduce the concurrent compaction level by increasing/reducing the number of partitions.
 - **Concurrency capacity:** to configure this based on your available memory, you need to understand how IOx estimates memory to compact files in the next section.
 
 # Memory Estimation
@@ -196,15 +195,15 @@ SELECT * FROM skipped_compactions;
 -- remove partitions from the skipped_compactions
 DELETE FROM skipped_compactions WHERE partition_id in ([your_ids]);
 
--- Content of skipped_compactions with their shard index, partition key and table id
-SELECT shard_index, table_id, partition_id, partition_key, left(reason, 25),
+-- Content of skipped_compactions with their partition key and table id
+SELECT table_id, partition_id, partition_key, left(reason, 25),
    num_files, limit_num_files, estimated_bytes, limit_bytes, to_timestamp(skipped_at) skipped_at
-FROM skipped_compactions, partition, shard
-WHERE partition.id = skipped_compactions.partition_id and partition.shard_id = shard.id
-ORDER BY shard_index, table_id, partition_key, skipped_at;
+FROM skipped_compactions, partition
+WHERE partition.id = skipped_compactions.partition_id
+ORDER BY table_id, partition_key, skipped_at;
 
 -- Number of files per level for top 50 partitions with most files of a specified day
-SELECT s.shard_index, pf.table_id, pf.partition_id, p.partition_key,
+SELECT pf.table_id, pf.partition_id, p.partition_key,
    count(case when pf.to_delete is null then 1 end) total_not_deleted,
    count(case when pf.compaction_level=0 and pf.to_delete is null then 1 end) num_l0,
    count(case when pf.compaction_level=1 and pf.to_delete is null then 1 end) num_l1,
@@ -212,10 +211,10 @@ SELECT s.shard_index, pf.table_id, pf.partition_id, p.partition_key,
    count(case when pf.compaction_level=0 and pf.to_delete is not null then 1 end) deleted_num_l0,
    count(case when pf.compaction_level=1 and pf.to_delete is not null then 1 end) deleted_num_l1,
    count(case when pf.compaction_level=2 and pf.to_delete is not null then 1 end) deleted_num_l2
-FROM parquet_file pf, partition p, shard s
-WHERE pf.partition_id = p.id AND pf.shard_id = s.id
+FROM parquet_file pf, partition p
+WHERE pf.partition_id = p.id
   AND p.partition_key = '2022-10-11'
-GROUP BY s.shard_index, pf.table_id, pf.partition_id, p.partition_key
+GROUP BY pf.table_id, pf.partition_id, p.partition_key
 ORDER BY count(case when pf.to_delete is null then 1 end) DESC
 LIMIT 50;
 
diff --git a/generated_types/src/ingester.rs b/generated_types/src/ingester.rs
index 0b1c9c63d5..e38b09a6e3 100644
--- a/generated_types/src/ingester.rs
+++ b/generated_types/src/ingester.rs
@@ -38,8 +38,7 @@ pub struct IngesterQueryRequest {
 }
 
 impl IngesterQueryRequest {
-    /// Make a request to return data for a specified table for
-    /// all shards an ingester is responsible for
+    /// Make a request to return data for a specified table
     pub fn new(
         namespace_id: NamespaceId,
         table_id: TableId,
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 022b7078fc..588207e1b4 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -37,7 +37,6 @@ parquet_to_line_protocol = { path = "../parquet_to_line_protocol" }
 prost = { version = "0.11" }
 iox_query = { path = "../iox_query" }
 schema = { path = "../schema" }
-sharder = { path = "../sharder" }
 iox_time = { path = "../iox_time" }
 trace_exporters = { path = "../trace_exporters" }
 trogging = { path = "../trogging", default-features = false, features = ["clap"] }
diff --git a/influxdb_iox/tests/end_to_end_cases/cli.rs b/influxdb_iox/tests/end_to_end_cases/cli.rs
index d4247297ff..3d8c135f87 100644
--- a/influxdb_iox/tests/end_to_end_cases/cli.rs
+++ b/influxdb_iox/tests/end_to_end_cases/cli.rs
@@ -83,7 +83,6 @@ async fn parquet_to_lp() {
                     // Looks like:
                     // {
                     //     "id": "1",
-                    //     "shardId": 1,
                     //     "namespaceId": 1,
                     //     "tableId": 1,
                     //     "partitionId": "1",
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 884d224d01..3d1e0e467a 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -404,11 +404,11 @@ pub trait ColumnRepo: Send + Sync {
     async fn list(&mut self) -> Result<Vec<Column>>;
 }
 
-/// Functions for working with IOx partitions in the catalog. Note that these are how IOx splits up
-/// data within a namespace, which is different than Kafka partitions.
+/// Functions for working with IOx partitions in the catalog. These are how IOx splits up
+/// data within a namespace.
 #[async_trait]
 pub trait PartitionRepo: Send + Sync {
-    /// create or get a partition record for the given partition key, shard and table
+    /// create or get a partition record for the given partition key and table
     async fn create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition>;
 
     /// get partition by ID
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 930e84b1c2..a67c21f1e1 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -7,7 +7,6 @@ use crate::{
         Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, RepoCollection, Result,
         SoftDeletedRows, TableRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
     },
-    kafkaless_transition::{Shard, SHARED_TOPIC_ID, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX},
     metrics::MetricDecorator,
     DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
 };
@@ -66,7 +65,6 @@ struct MemCollections {
     namespaces: Vec<Namespace>,
     tables: Vec<Table>,
     columns: Vec<Column>,
-    shards: Vec<Shard>,
     partitions: Vec<Partition>,
     skipped_compactions: Vec<SkippedCompaction>,
     parquet_files: Vec<ParquetFile>,
@@ -121,26 +119,6 @@ impl Display for MemCatalog {
 #[async_trait]
 impl Catalog for MemCatalog {
     async fn setup(&self) -> Result<(), Error> {
-        let guard = Arc::clone(&self.collections).lock_owned().await;
-        let stage = guard.clone();
-        let mut transaction = MemTxn {
-            inner: MemTxnInner::Txn {
-                guard,
-                stage,
-                finalized: false,
-            },
-            time_provider: self.time_provider(),
-        };
-        let stage = transaction.stage();
-
-        // The transition shard must exist and must have magic ID and INDEX.
-        let shard = Shard {
-            id: TRANSITION_SHARD_ID,
-            topic_id: SHARED_TOPIC_ID,
-            shard_index: TRANSITION_SHARD_INDEX,
-        };
-        stage.shards.push(shard);
-        transaction.commit_inplace().await?;
         Ok(())
     }
 
diff --git a/iox_tests/Cargo.toml b/iox_tests/Cargo.toml
index 9b68dca297..213365cced 100644
--- a/iox_tests/Cargo.toml
+++ b/iox_tests/Cargo.toml
@@ -22,7 +22,6 @@ parquet_file = { path = "../parquet_file" }
 predicate = { path = "../predicate" }
 iox_query = { path = "../iox_query" }
 schema = { path = "../schema" }
-sharder = { path = "../sharder" }
 uuid = { version = "1", features = ["v4"] }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 futures = "0.3.28"
diff --git a/router/src/dml_handlers/rpc_write/balancer.rs b/router/src/dml_handlers/rpc_write/balancer.rs
index 327bd3169b..fecf664722 100644
--- a/router/src/dml_handlers/rpc_write/balancer.rs
+++ b/router/src/dml_handlers/rpc_write/balancer.rs
@@ -31,7 +31,7 @@ const METRIC_EVAL_INTERVAL: Duration = Duration::from_secs(3);
 ///
 /// # Request Distribution
 ///
-/// Requests are distributed uniformly across all shards **per thread**. Given
+/// Requests are distributed uniformly across all endpoints **per thread**. Given
 /// enough requests (where `N` is significantly larger than the number of
 /// threads) an approximately uniform distribution is achieved.
 #[derive(Debug)]

From 6506dd25a0edf091e7a06566b8ce92bd443102a4 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 3 Mar 2023 12:07:25 -0500
Subject: [PATCH 055/119] fix: Remove vestiges of topic

---
 ingester/README.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/ingester/README.md b/ingester/README.md
index 0f73ea5562..7ad6da613d 100644
--- a/ingester/README.md
+++ b/ingester/README.md
@@ -2,19 +2,15 @@
 
 ## Quick run
 
-Set-up empty catalog db:
+Set up empty catalog db:
 
 ```bash
 mkdir -p /tmp/iox/{wal,obj}
 
 createdb iox_shared
 ./target/debug/influxdb_iox catalog setup --catalog-dsn postgres:///iox_shared
-
-# there has to exist one "topic", see https://github.com/influxdata/influxdb_iox/issues/6420
-psql 'dbname=iox_shared options=-csearch_path=public,iox_catalog' -c "insert into topic (name) values ('iox-shared')"
 ```
 
-
 Run ingester:
 
 ```bash

From 6000d1e8d4388911f4b84a83917759b72fa00480 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 3 Mar 2023 13:21:13 -0500
Subject: [PATCH 056/119] fix: Remove unused crates

---
 Cargo.lock             | 3 ---
 clap_blocks/Cargo.toml | 2 +-
 import/Cargo.toml      | 3 ---
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d90929a9c5..d136b79368 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2443,8 +2443,6 @@ dependencies = [
  "client_util",
  "data_types",
  "futures",
- "generated_types",
- "influxdb_iox_client",
  "iox_catalog",
  "metric",
  "object_store",
@@ -2455,7 +2453,6 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
  "workspace-hack",
 ]
 
diff --git a/clap_blocks/Cargo.toml b/clap_blocks/Cargo.toml
index fce39986bb..6bc4c970af 100644
--- a/clap_blocks/Cargo.toml
+++ b/clap_blocks/Cargo.toml
@@ -19,7 +19,6 @@ observability_deps = { path = "../observability_deps" }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.96"
 snafu = "0.7"
-tempfile = "3.5.0"
 trace = { path = "../trace" }
 trace_exporters = { path = "../trace_exporters" }
 trogging = { path = "../trogging", default-features = false, features = ["clap"] }
@@ -27,6 +26,7 @@ uuid = { version = "1", features = ["v4"] }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 
 [dev-dependencies]
+tempfile = "3.5.0"
 test_helpers = { path = "../test_helpers" }
 
 [features]
diff --git a/import/Cargo.toml b/import/Cargo.toml
index b3a83baf4f..a46275205e 100644
--- a/import/Cargo.toml
+++ b/import/Cargo.toml
@@ -9,8 +9,6 @@ license.workspace = true
 chrono = { version = "0.4", default-features = false }
 data_types = { path = "../data_types" }
 futures = "0.3"
-generated_types = { path = "../generated_types" }
-influxdb_iox_client = { path = "../influxdb_iox_client" }
 iox_catalog = { path = "../iox_catalog" }
 object_store = { version = "0.5.6", features = ["aws"] }
 schema = { path = "../schema" }
@@ -18,7 +16,6 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.96"
 thiserror = "1.0.40"
 tokio = { version = "1.28" }
-tonic = { workspace = true }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 
 [dev-dependencies]

From dd9c5d1b132329e5c677fa7b82a043ad0070c7b1 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 20:41:33 -0400
Subject: [PATCH 057/119] fix: Rename compactor2 to compactor

---
 Cargo.lock                                    |  8 +++----
 Cargo.toml                                    |  2 +-
 .../src/{compactor2.rs => compactor.rs}       | 14 +++++------
 clap_blocks/src/lib.rs                        |  2 +-
 {compactor2 => compactor}/Cargo.toml          |  2 +-
 {compactor2 => compactor}/img/driver.svg      |  0
 {compactor2 => compactor}/src/compactor.rs    |  6 ++---
 .../changed_files_filter/logging.rs           |  0
 .../components/changed_files_filter/mod.rs    |  0
 .../src/components/combos/mod.rs              |  0
 .../src/components/combos/tests.rs            |  0
 .../components/combos/throttle_partition.rs   |  0
 .../components/combos/unique_partitions.rs    |  0
 .../src/components/commit/catalog.rs          |  0
 .../src/components/commit/logging.rs          |  0
 .../src/components/commit/metrics.rs          |  0
 .../src/components/commit/mock.rs             |  0
 .../src/components/commit/mod.rs              |  0
 .../src/components/df_plan_exec/dedicated.rs  |  0
 .../src/components/df_plan_exec/mod.rs        |  0
 .../src/components/df_plan_exec/noop.rs       |  0
 .../src/components/df_planner/mod.rs          |  0
 .../src/components/df_planner/panic.rs        |  0
 .../src/components/df_planner/planner_v1.rs   |  0
 .../src/components/df_planner/query_chunk.rs  |  0
 .../src/components/divide_initial/mod.rs      |  0
 .../divide_initial/multiple_branches.rs       |  0
 .../src/components/file_classifier/logging.rs |  0
 .../src/components/file_classifier/mod.rs     |  0
 .../components/file_classifier/split_based.rs |  0
 .../src/components/file_filter/and.rs         |  0
 .../src/components/file_filter/level_range.rs |  0
 .../src/components/file_filter/mod.rs         |  0
 .../src/components/files_split/mod.rs         |  0
 .../files_split/non_overlap_split.rs          |  0
 .../files_split/target_level_split.rs         |  0
 .../components/files_split/upgrade_split.rs   |  0
 .../src/components/hardcoded.rs               |  0
 .../id_only_partition_filter/and.rs           |  0
 .../id_only_partition_filter/by_id.rs         |  0
 .../id_only_partition_filter/mod.rs           |  0
 .../id_only_partition_filter/shard.rs         |  0
 .../src/components/ir_planner/logging.rs      |  0
 .../src/components/ir_planner/mod.rs          |  0
 .../src/components/ir_planner/planner_v1.rs   |  0
 .../src/components/mod.rs                     |  0
 .../components/namespaces_source/catalog.rs   |  0
 .../src/components/namespaces_source/mock.rs  |  0
 .../src/components/namespaces_source/mod.rs   |  0
 .../components/parquet_file_sink/dedicated.rs |  0
 .../components/parquet_file_sink/logging.rs   |  0
 .../src/components/parquet_file_sink/mock.rs  |  0
 .../src/components/parquet_file_sink/mod.rs   |  0
 .../parquet_file_sink/object_store.rs         |  0
 .../components/parquet_files_sink/dispatch.rs |  0
 .../src/components/parquet_files_sink/mod.rs  |  0
 .../components/partition_done_sink/catalog.rs |  0
 .../partition_done_sink/error_kind.rs         |  0
 .../components/partition_done_sink/logging.rs |  0
 .../components/partition_done_sink/metrics.rs |  0
 .../components/partition_done_sink/mock.rs    |  0
 .../src/components/partition_done_sink/mod.rs |  0
 .../partition_files_source/catalog.rs         |  0
 .../components/partition_files_source/mock.rs |  0
 .../components/partition_files_source/mod.rs  |  0
 .../src/components/partition_filter/and.rs    |  0
 .../greater_matching_files.rs                 |  0
 .../greater_size_matching_files.rs            |  0
 .../components/partition_filter/has_files.rs  |  0
 .../partition_filter/has_matching_file.rs     |  0
 .../components/partition_filter/logging.rs    |  0
 .../partition_filter/max_num_columns.rs       |  0
 .../components/partition_filter/metrics.rs    |  0
 .../src/components/partition_filter/mod.rs    |  0
 .../partition_filter/never_skipped.rs         |  0
 .../src/components/partition_filter/or.rs     |  0
 .../components/partition_info_source/mod.rs   |  0
 .../partition_info_source/sub_sources.rs      |  0
 .../components/partition_source/catalog.rs    |  0
 .../components/partition_source/logging.rs    |  0
 .../components/partition_source/metrics.rs    |  0
 .../src/components/partition_source/mock.rs   |  0
 .../src/components/partition_source/mod.rs    |  0
 .../components/partition_stream/endless.rs    |  0
 .../src/components/partition_stream/mod.rs    |  0
 .../src/components/partition_stream/once.rs   |  0
 .../partitions_source/catalog_all.rs          |  0
 .../partitions_source/catalog_to_compact.rs   |  0
 .../components/partitions_source/filter.rs    |  0
 .../components/partitions_source/logging.rs   |  0
 .../components/partitions_source/metrics.rs   |  0
 .../src/components/partitions_source/mock.rs  |  0
 .../src/components/partitions_source/mod.rs   |  0
 .../components/partitions_source/not_empty.rs |  0
 .../partitions_source/randomize_order.rs      |  0
 .../logging.rs                                |  0
 .../metrics.rs                                |  0
 .../mock.rs                                   |  0
 .../mod.rs                                    |  0
 .../possible_progress.rs                      |  0
 .../src/components/report.rs                  |  0
 .../src/components/round_info_source/mod.rs   |  0
 .../src/components/round_split/many_files.rs  |  0
 .../src/components/round_split/mod.rs         |  0
 .../src/components/scratchpad/mod.rs          |  2 +-
 .../src/components/scratchpad/noop.rs         |  0
 .../src/components/scratchpad/prod.rs         |  0
 .../src/components/scratchpad/test_util.rs    |  0
 .../src/components/scratchpad/util.rs         |  0
 .../skipped_compactions_source/catalog.rs     |  0
 .../skipped_compactions_source/mock.rs        |  0
 .../skipped_compactions_source/mod.rs         |  0
 .../split_or_compact/files_to_compact.rs      |  0
 .../split_or_compact/large_files_to_split.rs  |  0
 .../components/split_or_compact/logging.rs    |  0
 .../components/split_or_compact/metrics.rs    |  0
 .../src/components/split_or_compact/mod.rs    |  0
 .../split_or_compact/split_compact.rs         |  0
 .../start_level_files_to_split.rs             |  0
 .../src/components/tables_source/catalog.rs   |  0
 .../src/components/tables_source/mock.rs      |  0
 .../src/components/tables_source/mod.rs       |  0
 .../src/components/timeout.rs                 |  0
 {compactor2 => compactor}/src/config.rs       |  0
 {compactor2 => compactor}/src/driver.rs       |  0
 {compactor2 => compactor}/src/error.rs        |  0
 .../src/file_classification.rs                |  0
 {compactor2 => compactor}/src/file_group.rs   |  0
 {compactor2 => compactor}/src/lib.rs          |  0
 .../src/object_store/ignore_writes.rs         |  0
 .../src/object_store/metrics.rs               |  0
 .../src/object_store/mod.rs                   |  0
 .../src/partition_info.rs                     |  0
 {compactor2 => compactor}/src/plan_ir.rs      |  0
 {compactor2 => compactor}/src/round_info.rs   |  0
 {compactor2 => compactor}/src/test_utils.rs   |  0
 .../tests/integration.rs                      |  0
 .../tests/layouts/backfill.rs                 |  0
 .../tests/layouts/common_use_cases.rs         |  0
 .../tests/layouts/core.rs                     |  0
 .../tests/layouts/created_at.rs               |  0
 .../tests/layouts/knobs.rs                    |  0
 .../tests/layouts/large_files.rs              |  0
 .../tests/layouts/large_overlaps.rs           |  0
 .../tests/layouts/many_files.rs               |  0
 .../tests/layouts/mod.rs                      |  0
 .../tests/layouts/single_timestamp.rs         |  0
 compactor2_test_utils/Cargo.toml              |  4 ++--
 compactor2_test_utils/src/commit_wrapper.rs   |  2 +-
 compactor2_test_utils/src/lib.rs              |  6 ++---
 compactor2_test_utils/src/simulator.rs        |  2 +-
 influxdb_iox/Cargo.toml                       |  2 +-
 influxdb_iox/src/commands/run/all_in_one.rs   |  8 +++----
 .../run/{compactor2.rs => compactor.rs}       | 10 ++++----
 influxdb_iox/src/commands/run/mod.rs          | 17 ++++++-------
 .../command_rename_support.rs                 | 22 +++++++++++++++++
 .../tests/end_to_end_cases/compactor.rs       | 12 +++++-----
 ioxd_compactor2/Cargo.toml                    |  2 +-
 ioxd_compactor2/src/lib.rs                    | 24 +++++++++----------
 test_helpers_end_to_end/src/config.rs         |  4 ++--
 test_helpers_end_to_end/src/mini_cluster.rs   |  8 +++----
 test_helpers_end_to_end/src/server_fixture.rs |  2 +-
 test_helpers_end_to_end/src/server_type.rs    |  6 ++---
 163 files changed, 95 insertions(+), 72 deletions(-)
 rename clap_blocks/src/{compactor2.rs => compactor.rs} (96%)
 rename {compactor2 => compactor}/Cargo.toml (98%)
 rename {compactor2 => compactor}/img/driver.svg (100%)
 rename {compactor2 => compactor}/src/compactor.rs (97%)
 rename {compactor2 => compactor}/src/components/changed_files_filter/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/changed_files_filter/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/combos/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/combos/tests.rs (100%)
 rename {compactor2 => compactor}/src/components/combos/throttle_partition.rs (100%)
 rename {compactor2 => compactor}/src/components/combos/unique_partitions.rs (100%)
 rename {compactor2 => compactor}/src/components/commit/catalog.rs (100%)
 rename {compactor2 => compactor}/src/components/commit/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/commit/metrics.rs (100%)
 rename {compactor2 => compactor}/src/components/commit/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/commit/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/df_plan_exec/dedicated.rs (100%)
 rename {compactor2 => compactor}/src/components/df_plan_exec/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/df_plan_exec/noop.rs (100%)
 rename {compactor2 => compactor}/src/components/df_planner/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/df_planner/panic.rs (100%)
 rename {compactor2 => compactor}/src/components/df_planner/planner_v1.rs (100%)
 rename {compactor2 => compactor}/src/components/df_planner/query_chunk.rs (100%)
 rename {compactor2 => compactor}/src/components/divide_initial/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/divide_initial/multiple_branches.rs (100%)
 rename {compactor2 => compactor}/src/components/file_classifier/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/file_classifier/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/file_classifier/split_based.rs (100%)
 rename {compactor2 => compactor}/src/components/file_filter/and.rs (100%)
 rename {compactor2 => compactor}/src/components/file_filter/level_range.rs (100%)
 rename {compactor2 => compactor}/src/components/file_filter/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/files_split/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/files_split/non_overlap_split.rs (100%)
 rename {compactor2 => compactor}/src/components/files_split/target_level_split.rs (100%)
 rename {compactor2 => compactor}/src/components/files_split/upgrade_split.rs (100%)
 rename {compactor2 => compactor}/src/components/hardcoded.rs (100%)
 rename {compactor2 => compactor}/src/components/id_only_partition_filter/and.rs (100%)
 rename {compactor2 => compactor}/src/components/id_only_partition_filter/by_id.rs (100%)
 rename {compactor2 => compactor}/src/components/id_only_partition_filter/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/id_only_partition_filter/shard.rs (100%)
 rename {compactor2 => compactor}/src/components/ir_planner/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/ir_planner/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/ir_planner/planner_v1.rs (100%)
 rename {compactor2 => compactor}/src/components/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/namespaces_source/catalog.rs (100%)
 rename {compactor2 => compactor}/src/components/namespaces_source/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/namespaces_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/parquet_file_sink/dedicated.rs (100%)
 rename {compactor2 => compactor}/src/components/parquet_file_sink/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/parquet_file_sink/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/parquet_file_sink/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/parquet_file_sink/object_store.rs (100%)
 rename {compactor2 => compactor}/src/components/parquet_files_sink/dispatch.rs (100%)
 rename {compactor2 => compactor}/src/components/parquet_files_sink/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_done_sink/catalog.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_done_sink/error_kind.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_done_sink/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_done_sink/metrics.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_done_sink/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_done_sink/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_files_source/catalog.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_files_source/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_files_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/and.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/greater_matching_files.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/greater_size_matching_files.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/has_files.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/has_matching_file.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/max_num_columns.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/metrics.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/never_skipped.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_filter/or.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_info_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_info_source/sub_sources.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_source/catalog.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_source/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_source/metrics.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_source/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_stream/endless.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_stream/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partition_stream/once.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/catalog_all.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/catalog_to_compact.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/filter.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/metrics.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/not_empty.rs (100%)
 rename {compactor2 => compactor}/src/components/partitions_source/randomize_order.rs (100%)
 rename {compactor2 => compactor}/src/components/post_classification_partition_filter/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/post_classification_partition_filter/metrics.rs (100%)
 rename {compactor2 => compactor}/src/components/post_classification_partition_filter/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/post_classification_partition_filter/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/post_classification_partition_filter/possible_progress.rs (100%)
 rename {compactor2 => compactor}/src/components/report.rs (100%)
 rename {compactor2 => compactor}/src/components/round_info_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/round_split/many_files.rs (100%)
 rename {compactor2 => compactor}/src/components/round_split/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/scratchpad/mod.rs (97%)
 rename {compactor2 => compactor}/src/components/scratchpad/noop.rs (100%)
 rename {compactor2 => compactor}/src/components/scratchpad/prod.rs (100%)
 rename {compactor2 => compactor}/src/components/scratchpad/test_util.rs (100%)
 rename {compactor2 => compactor}/src/components/scratchpad/util.rs (100%)
 rename {compactor2 => compactor}/src/components/skipped_compactions_source/catalog.rs (100%)
 rename {compactor2 => compactor}/src/components/skipped_compactions_source/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/skipped_compactions_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/split_or_compact/files_to_compact.rs (100%)
 rename {compactor2 => compactor}/src/components/split_or_compact/large_files_to_split.rs (100%)
 rename {compactor2 => compactor}/src/components/split_or_compact/logging.rs (100%)
 rename {compactor2 => compactor}/src/components/split_or_compact/metrics.rs (100%)
 rename {compactor2 => compactor}/src/components/split_or_compact/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/split_or_compact/split_compact.rs (100%)
 rename {compactor2 => compactor}/src/components/split_or_compact/start_level_files_to_split.rs (100%)
 rename {compactor2 => compactor}/src/components/tables_source/catalog.rs (100%)
 rename {compactor2 => compactor}/src/components/tables_source/mock.rs (100%)
 rename {compactor2 => compactor}/src/components/tables_source/mod.rs (100%)
 rename {compactor2 => compactor}/src/components/timeout.rs (100%)
 rename {compactor2 => compactor}/src/config.rs (100%)
 rename {compactor2 => compactor}/src/driver.rs (100%)
 rename {compactor2 => compactor}/src/error.rs (100%)
 rename {compactor2 => compactor}/src/file_classification.rs (100%)
 rename {compactor2 => compactor}/src/file_group.rs (100%)
 rename {compactor2 => compactor}/src/lib.rs (100%)
 rename {compactor2 => compactor}/src/object_store/ignore_writes.rs (100%)
 rename {compactor2 => compactor}/src/object_store/metrics.rs (100%)
 rename {compactor2 => compactor}/src/object_store/mod.rs (100%)
 rename {compactor2 => compactor}/src/partition_info.rs (100%)
 rename {compactor2 => compactor}/src/plan_ir.rs (100%)
 rename {compactor2 => compactor}/src/round_info.rs (100%)
 rename {compactor2 => compactor}/src/test_utils.rs (100%)
 rename {compactor2 => compactor}/tests/integration.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/backfill.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/common_use_cases.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/core.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/created_at.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/knobs.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/large_files.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/large_overlaps.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/many_files.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/mod.rs (100%)
 rename {compactor2 => compactor}/tests/layouts/single_timestamp.rs (100%)
 rename influxdb_iox/src/commands/run/{compactor2.rs => compactor.rs} (93%)

diff --git a/Cargo.lock b/Cargo.lock
index 1c618c232d..81c4a5c4cf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1002,7 +1002,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "compactor2"
+name = "compactor"
 version = "0.1.0"
 dependencies = [
  "arrow_util",
@@ -1042,7 +1042,7 @@ version = "0.1.0"
 dependencies = [
  "async-trait",
  "backoff",
- "compactor2",
+ "compactor",
  "data_types",
  "datafusion",
  "datafusion_util",
@@ -2550,7 +2550,7 @@ dependencies = [
  "clap 4.2.7",
  "clap_blocks",
  "comfy-table",
- "compactor2",
+ "compactor",
  "console-subscriber",
  "data_types",
  "datafusion",
@@ -3039,7 +3039,7 @@ dependencies = [
  "async-trait",
  "backoff",
  "clap_blocks",
- "compactor2",
+ "compactor",
  "data_types",
  "hyper",
  "iox_catalog",
diff --git a/Cargo.toml b/Cargo.toml
index 8274838874..126d881b56 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,7 @@ members = [
     "clap_blocks",
     "client_util",
     "compactor2_test_utils",
-    "compactor2",
+    "compactor",
     "data_types",
     "datafusion_util",
     "dml",
diff --git a/clap_blocks/src/compactor2.rs b/clap_blocks/src/compactor.rs
similarity index 96%
rename from clap_blocks/src/compactor2.rs
rename to clap_blocks/src/compactor.rs
index 6b6cf5d64f..a6541f46e3 100644
--- a/clap_blocks/src/compactor2.rs
+++ b/clap_blocks/src/compactor.rs
@@ -1,4 +1,4 @@
-//! CLI config for compactor2-related commands
+//! CLI config for compactor-related commands
 
 use std::num::NonZeroUsize;
 
@@ -13,9 +13,9 @@ pub enum CompactionType {
     Cold,
 }
 
-/// CLI config for compactor2
+/// CLI config for compactor
 #[derive(Debug, Clone, clap::Parser)]
-pub struct Compactor2Config {
+pub struct CompactorConfig {
     /// Type of compaction to perform.
     #[clap(
         value_enum,
@@ -322,27 +322,27 @@ mod tests {
 
     #[test]
     fn default_compaction_type_is_hot() {
-        let config = Compactor2Config::try_parse_from(["my_binary"]).unwrap();
+        let config = CompactorConfig::try_parse_from(["my_binary"]).unwrap();
         assert_eq!(config.compaction_type, CompactionType::Hot);
     }
 
     #[test]
     fn can_specify_hot() {
         let config =
-            Compactor2Config::try_parse_from(["my_binary", "--compaction-type", "hot"]).unwrap();
+            CompactorConfig::try_parse_from(["my_binary", "--compaction-type", "hot"]).unwrap();
         assert_eq!(config.compaction_type, CompactionType::Hot);
     }
 
     #[test]
     fn can_specify_cold() {
         let config =
-            Compactor2Config::try_parse_from(["my_binary", "--compaction-type", "cold"]).unwrap();
+            CompactorConfig::try_parse_from(["my_binary", "--compaction-type", "cold"]).unwrap();
         assert_eq!(config.compaction_type, CompactionType::Cold);
     }
 
     #[test]
     fn any_other_compaction_type_string_is_invalid() {
-        let error = Compactor2Config::try_parse_from(["my_binary", "--compaction-type", "hello"])
+        let error = CompactorConfig::try_parse_from(["my_binary", "--compaction-type", "hello"])
             .unwrap_err()
             .to_string();
         assert_contains!(
diff --git a/clap_blocks/src/lib.rs b/clap_blocks/src/lib.rs
index 398d9c083d..9f75265810 100644
--- a/clap_blocks/src/lib.rs
+++ b/clap_blocks/src/lib.rs
@@ -13,7 +13,7 @@
     clippy::dbg_macro
 )]
 pub mod catalog_dsn;
-pub mod compactor2;
+pub mod compactor;
 pub mod garbage_collector;
 pub mod ingester;
 pub mod ingester_address;
diff --git a/compactor2/Cargo.toml b/compactor/Cargo.toml
similarity index 98%
rename from compactor2/Cargo.toml
rename to compactor/Cargo.toml
index 4694bed32c..d1858b8d54 100644
--- a/compactor2/Cargo.toml
+++ b/compactor/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "compactor2"
+name = "compactor"
 version.workspace = true
 authors.workspace = true
 edition.workspace = true
diff --git a/compactor2/img/driver.svg b/compactor/img/driver.svg
similarity index 100%
rename from compactor2/img/driver.svg
rename to compactor/img/driver.svg
diff --git a/compactor2/src/compactor.rs b/compactor/src/compactor.rs
similarity index 97%
rename from compactor2/src/compactor.rs
rename to compactor/src/compactor.rs
index 0a9f001459..b288baf7ff 100644
--- a/compactor2/src/compactor.rs
+++ b/compactor/src/compactor.rs
@@ -29,12 +29,12 @@ fn shared_handle(handle: JoinHandle<()>) -> SharedJoinHandle {
 
 /// Main compactor driver.
 #[derive(Debug)]
-pub struct Compactor2 {
+pub struct Compactor {
     shutdown: CancellationToken,
     worker: SharedJoinHandle,
 }
 
-impl Compactor2 {
+impl Compactor {
     /// Start compactor.
     pub fn start(config: Config) -> Self {
         info!("compactor starting");
@@ -84,7 +84,7 @@ impl Compactor2 {
     }
 }
 
-impl Drop for Compactor2 {
+impl Drop for Compactor {
     fn drop(&mut self) {
         if self.worker.clone().now_or_never().is_none() {
             warn!("Compactor was not shut down properly");
diff --git a/compactor2/src/components/changed_files_filter/logging.rs b/compactor/src/components/changed_files_filter/logging.rs
similarity index 100%
rename from compactor2/src/components/changed_files_filter/logging.rs
rename to compactor/src/components/changed_files_filter/logging.rs
diff --git a/compactor2/src/components/changed_files_filter/mod.rs b/compactor/src/components/changed_files_filter/mod.rs
similarity index 100%
rename from compactor2/src/components/changed_files_filter/mod.rs
rename to compactor/src/components/changed_files_filter/mod.rs
diff --git a/compactor2/src/components/combos/mod.rs b/compactor/src/components/combos/mod.rs
similarity index 100%
rename from compactor2/src/components/combos/mod.rs
rename to compactor/src/components/combos/mod.rs
diff --git a/compactor2/src/components/combos/tests.rs b/compactor/src/components/combos/tests.rs
similarity index 100%
rename from compactor2/src/components/combos/tests.rs
rename to compactor/src/components/combos/tests.rs
diff --git a/compactor2/src/components/combos/throttle_partition.rs b/compactor/src/components/combos/throttle_partition.rs
similarity index 100%
rename from compactor2/src/components/combos/throttle_partition.rs
rename to compactor/src/components/combos/throttle_partition.rs
diff --git a/compactor2/src/components/combos/unique_partitions.rs b/compactor/src/components/combos/unique_partitions.rs
similarity index 100%
rename from compactor2/src/components/combos/unique_partitions.rs
rename to compactor/src/components/combos/unique_partitions.rs
diff --git a/compactor2/src/components/commit/catalog.rs b/compactor/src/components/commit/catalog.rs
similarity index 100%
rename from compactor2/src/components/commit/catalog.rs
rename to compactor/src/components/commit/catalog.rs
diff --git a/compactor2/src/components/commit/logging.rs b/compactor/src/components/commit/logging.rs
similarity index 100%
rename from compactor2/src/components/commit/logging.rs
rename to compactor/src/components/commit/logging.rs
diff --git a/compactor2/src/components/commit/metrics.rs b/compactor/src/components/commit/metrics.rs
similarity index 100%
rename from compactor2/src/components/commit/metrics.rs
rename to compactor/src/components/commit/metrics.rs
diff --git a/compactor2/src/components/commit/mock.rs b/compactor/src/components/commit/mock.rs
similarity index 100%
rename from compactor2/src/components/commit/mock.rs
rename to compactor/src/components/commit/mock.rs
diff --git a/compactor2/src/components/commit/mod.rs b/compactor/src/components/commit/mod.rs
similarity index 100%
rename from compactor2/src/components/commit/mod.rs
rename to compactor/src/components/commit/mod.rs
diff --git a/compactor2/src/components/df_plan_exec/dedicated.rs b/compactor/src/components/df_plan_exec/dedicated.rs
similarity index 100%
rename from compactor2/src/components/df_plan_exec/dedicated.rs
rename to compactor/src/components/df_plan_exec/dedicated.rs
diff --git a/compactor2/src/components/df_plan_exec/mod.rs b/compactor/src/components/df_plan_exec/mod.rs
similarity index 100%
rename from compactor2/src/components/df_plan_exec/mod.rs
rename to compactor/src/components/df_plan_exec/mod.rs
diff --git a/compactor2/src/components/df_plan_exec/noop.rs b/compactor/src/components/df_plan_exec/noop.rs
similarity index 100%
rename from compactor2/src/components/df_plan_exec/noop.rs
rename to compactor/src/components/df_plan_exec/noop.rs
diff --git a/compactor2/src/components/df_planner/mod.rs b/compactor/src/components/df_planner/mod.rs
similarity index 100%
rename from compactor2/src/components/df_planner/mod.rs
rename to compactor/src/components/df_planner/mod.rs
diff --git a/compactor2/src/components/df_planner/panic.rs b/compactor/src/components/df_planner/panic.rs
similarity index 100%
rename from compactor2/src/components/df_planner/panic.rs
rename to compactor/src/components/df_planner/panic.rs
diff --git a/compactor2/src/components/df_planner/planner_v1.rs b/compactor/src/components/df_planner/planner_v1.rs
similarity index 100%
rename from compactor2/src/components/df_planner/planner_v1.rs
rename to compactor/src/components/df_planner/planner_v1.rs
diff --git a/compactor2/src/components/df_planner/query_chunk.rs b/compactor/src/components/df_planner/query_chunk.rs
similarity index 100%
rename from compactor2/src/components/df_planner/query_chunk.rs
rename to compactor/src/components/df_planner/query_chunk.rs
diff --git a/compactor2/src/components/divide_initial/mod.rs b/compactor/src/components/divide_initial/mod.rs
similarity index 100%
rename from compactor2/src/components/divide_initial/mod.rs
rename to compactor/src/components/divide_initial/mod.rs
diff --git a/compactor2/src/components/divide_initial/multiple_branches.rs b/compactor/src/components/divide_initial/multiple_branches.rs
similarity index 100%
rename from compactor2/src/components/divide_initial/multiple_branches.rs
rename to compactor/src/components/divide_initial/multiple_branches.rs
diff --git a/compactor2/src/components/file_classifier/logging.rs b/compactor/src/components/file_classifier/logging.rs
similarity index 100%
rename from compactor2/src/components/file_classifier/logging.rs
rename to compactor/src/components/file_classifier/logging.rs
diff --git a/compactor2/src/components/file_classifier/mod.rs b/compactor/src/components/file_classifier/mod.rs
similarity index 100%
rename from compactor2/src/components/file_classifier/mod.rs
rename to compactor/src/components/file_classifier/mod.rs
diff --git a/compactor2/src/components/file_classifier/split_based.rs b/compactor/src/components/file_classifier/split_based.rs
similarity index 100%
rename from compactor2/src/components/file_classifier/split_based.rs
rename to compactor/src/components/file_classifier/split_based.rs
diff --git a/compactor2/src/components/file_filter/and.rs b/compactor/src/components/file_filter/and.rs
similarity index 100%
rename from compactor2/src/components/file_filter/and.rs
rename to compactor/src/components/file_filter/and.rs
diff --git a/compactor2/src/components/file_filter/level_range.rs b/compactor/src/components/file_filter/level_range.rs
similarity index 100%
rename from compactor2/src/components/file_filter/level_range.rs
rename to compactor/src/components/file_filter/level_range.rs
diff --git a/compactor2/src/components/file_filter/mod.rs b/compactor/src/components/file_filter/mod.rs
similarity index 100%
rename from compactor2/src/components/file_filter/mod.rs
rename to compactor/src/components/file_filter/mod.rs
diff --git a/compactor2/src/components/files_split/mod.rs b/compactor/src/components/files_split/mod.rs
similarity index 100%
rename from compactor2/src/components/files_split/mod.rs
rename to compactor/src/components/files_split/mod.rs
diff --git a/compactor2/src/components/files_split/non_overlap_split.rs b/compactor/src/components/files_split/non_overlap_split.rs
similarity index 100%
rename from compactor2/src/components/files_split/non_overlap_split.rs
rename to compactor/src/components/files_split/non_overlap_split.rs
diff --git a/compactor2/src/components/files_split/target_level_split.rs b/compactor/src/components/files_split/target_level_split.rs
similarity index 100%
rename from compactor2/src/components/files_split/target_level_split.rs
rename to compactor/src/components/files_split/target_level_split.rs
diff --git a/compactor2/src/components/files_split/upgrade_split.rs b/compactor/src/components/files_split/upgrade_split.rs
similarity index 100%
rename from compactor2/src/components/files_split/upgrade_split.rs
rename to compactor/src/components/files_split/upgrade_split.rs
diff --git a/compactor2/src/components/hardcoded.rs b/compactor/src/components/hardcoded.rs
similarity index 100%
rename from compactor2/src/components/hardcoded.rs
rename to compactor/src/components/hardcoded.rs
diff --git a/compactor2/src/components/id_only_partition_filter/and.rs b/compactor/src/components/id_only_partition_filter/and.rs
similarity index 100%
rename from compactor2/src/components/id_only_partition_filter/and.rs
rename to compactor/src/components/id_only_partition_filter/and.rs
diff --git a/compactor2/src/components/id_only_partition_filter/by_id.rs b/compactor/src/components/id_only_partition_filter/by_id.rs
similarity index 100%
rename from compactor2/src/components/id_only_partition_filter/by_id.rs
rename to compactor/src/components/id_only_partition_filter/by_id.rs
diff --git a/compactor2/src/components/id_only_partition_filter/mod.rs b/compactor/src/components/id_only_partition_filter/mod.rs
similarity index 100%
rename from compactor2/src/components/id_only_partition_filter/mod.rs
rename to compactor/src/components/id_only_partition_filter/mod.rs
diff --git a/compactor2/src/components/id_only_partition_filter/shard.rs b/compactor/src/components/id_only_partition_filter/shard.rs
similarity index 100%
rename from compactor2/src/components/id_only_partition_filter/shard.rs
rename to compactor/src/components/id_only_partition_filter/shard.rs
diff --git a/compactor2/src/components/ir_planner/logging.rs b/compactor/src/components/ir_planner/logging.rs
similarity index 100%
rename from compactor2/src/components/ir_planner/logging.rs
rename to compactor/src/components/ir_planner/logging.rs
diff --git a/compactor2/src/components/ir_planner/mod.rs b/compactor/src/components/ir_planner/mod.rs
similarity index 100%
rename from compactor2/src/components/ir_planner/mod.rs
rename to compactor/src/components/ir_planner/mod.rs
diff --git a/compactor2/src/components/ir_planner/planner_v1.rs b/compactor/src/components/ir_planner/planner_v1.rs
similarity index 100%
rename from compactor2/src/components/ir_planner/planner_v1.rs
rename to compactor/src/components/ir_planner/planner_v1.rs
diff --git a/compactor2/src/components/mod.rs b/compactor/src/components/mod.rs
similarity index 100%
rename from compactor2/src/components/mod.rs
rename to compactor/src/components/mod.rs
diff --git a/compactor2/src/components/namespaces_source/catalog.rs b/compactor/src/components/namespaces_source/catalog.rs
similarity index 100%
rename from compactor2/src/components/namespaces_source/catalog.rs
rename to compactor/src/components/namespaces_source/catalog.rs
diff --git a/compactor2/src/components/namespaces_source/mock.rs b/compactor/src/components/namespaces_source/mock.rs
similarity index 100%
rename from compactor2/src/components/namespaces_source/mock.rs
rename to compactor/src/components/namespaces_source/mock.rs
diff --git a/compactor2/src/components/namespaces_source/mod.rs b/compactor/src/components/namespaces_source/mod.rs
similarity index 100%
rename from compactor2/src/components/namespaces_source/mod.rs
rename to compactor/src/components/namespaces_source/mod.rs
diff --git a/compactor2/src/components/parquet_file_sink/dedicated.rs b/compactor/src/components/parquet_file_sink/dedicated.rs
similarity index 100%
rename from compactor2/src/components/parquet_file_sink/dedicated.rs
rename to compactor/src/components/parquet_file_sink/dedicated.rs
diff --git a/compactor2/src/components/parquet_file_sink/logging.rs b/compactor/src/components/parquet_file_sink/logging.rs
similarity index 100%
rename from compactor2/src/components/parquet_file_sink/logging.rs
rename to compactor/src/components/parquet_file_sink/logging.rs
diff --git a/compactor2/src/components/parquet_file_sink/mock.rs b/compactor/src/components/parquet_file_sink/mock.rs
similarity index 100%
rename from compactor2/src/components/parquet_file_sink/mock.rs
rename to compactor/src/components/parquet_file_sink/mock.rs
diff --git a/compactor2/src/components/parquet_file_sink/mod.rs b/compactor/src/components/parquet_file_sink/mod.rs
similarity index 100%
rename from compactor2/src/components/parquet_file_sink/mod.rs
rename to compactor/src/components/parquet_file_sink/mod.rs
diff --git a/compactor2/src/components/parquet_file_sink/object_store.rs b/compactor/src/components/parquet_file_sink/object_store.rs
similarity index 100%
rename from compactor2/src/components/parquet_file_sink/object_store.rs
rename to compactor/src/components/parquet_file_sink/object_store.rs
diff --git a/compactor2/src/components/parquet_files_sink/dispatch.rs b/compactor/src/components/parquet_files_sink/dispatch.rs
similarity index 100%
rename from compactor2/src/components/parquet_files_sink/dispatch.rs
rename to compactor/src/components/parquet_files_sink/dispatch.rs
diff --git a/compactor2/src/components/parquet_files_sink/mod.rs b/compactor/src/components/parquet_files_sink/mod.rs
similarity index 100%
rename from compactor2/src/components/parquet_files_sink/mod.rs
rename to compactor/src/components/parquet_files_sink/mod.rs
diff --git a/compactor2/src/components/partition_done_sink/catalog.rs b/compactor/src/components/partition_done_sink/catalog.rs
similarity index 100%
rename from compactor2/src/components/partition_done_sink/catalog.rs
rename to compactor/src/components/partition_done_sink/catalog.rs
diff --git a/compactor2/src/components/partition_done_sink/error_kind.rs b/compactor/src/components/partition_done_sink/error_kind.rs
similarity index 100%
rename from compactor2/src/components/partition_done_sink/error_kind.rs
rename to compactor/src/components/partition_done_sink/error_kind.rs
diff --git a/compactor2/src/components/partition_done_sink/logging.rs b/compactor/src/components/partition_done_sink/logging.rs
similarity index 100%
rename from compactor2/src/components/partition_done_sink/logging.rs
rename to compactor/src/components/partition_done_sink/logging.rs
diff --git a/compactor2/src/components/partition_done_sink/metrics.rs b/compactor/src/components/partition_done_sink/metrics.rs
similarity index 100%
rename from compactor2/src/components/partition_done_sink/metrics.rs
rename to compactor/src/components/partition_done_sink/metrics.rs
diff --git a/compactor2/src/components/partition_done_sink/mock.rs b/compactor/src/components/partition_done_sink/mock.rs
similarity index 100%
rename from compactor2/src/components/partition_done_sink/mock.rs
rename to compactor/src/components/partition_done_sink/mock.rs
diff --git a/compactor2/src/components/partition_done_sink/mod.rs b/compactor/src/components/partition_done_sink/mod.rs
similarity index 100%
rename from compactor2/src/components/partition_done_sink/mod.rs
rename to compactor/src/components/partition_done_sink/mod.rs
diff --git a/compactor2/src/components/partition_files_source/catalog.rs b/compactor/src/components/partition_files_source/catalog.rs
similarity index 100%
rename from compactor2/src/components/partition_files_source/catalog.rs
rename to compactor/src/components/partition_files_source/catalog.rs
diff --git a/compactor2/src/components/partition_files_source/mock.rs b/compactor/src/components/partition_files_source/mock.rs
similarity index 100%
rename from compactor2/src/components/partition_files_source/mock.rs
rename to compactor/src/components/partition_files_source/mock.rs
diff --git a/compactor2/src/components/partition_files_source/mod.rs b/compactor/src/components/partition_files_source/mod.rs
similarity index 100%
rename from compactor2/src/components/partition_files_source/mod.rs
rename to compactor/src/components/partition_files_source/mod.rs
diff --git a/compactor2/src/components/partition_filter/and.rs b/compactor/src/components/partition_filter/and.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/and.rs
rename to compactor/src/components/partition_filter/and.rs
diff --git a/compactor2/src/components/partition_filter/greater_matching_files.rs b/compactor/src/components/partition_filter/greater_matching_files.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/greater_matching_files.rs
rename to compactor/src/components/partition_filter/greater_matching_files.rs
diff --git a/compactor2/src/components/partition_filter/greater_size_matching_files.rs b/compactor/src/components/partition_filter/greater_size_matching_files.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/greater_size_matching_files.rs
rename to compactor/src/components/partition_filter/greater_size_matching_files.rs
diff --git a/compactor2/src/components/partition_filter/has_files.rs b/compactor/src/components/partition_filter/has_files.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/has_files.rs
rename to compactor/src/components/partition_filter/has_files.rs
diff --git a/compactor2/src/components/partition_filter/has_matching_file.rs b/compactor/src/components/partition_filter/has_matching_file.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/has_matching_file.rs
rename to compactor/src/components/partition_filter/has_matching_file.rs
diff --git a/compactor2/src/components/partition_filter/logging.rs b/compactor/src/components/partition_filter/logging.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/logging.rs
rename to compactor/src/components/partition_filter/logging.rs
diff --git a/compactor2/src/components/partition_filter/max_num_columns.rs b/compactor/src/components/partition_filter/max_num_columns.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/max_num_columns.rs
rename to compactor/src/components/partition_filter/max_num_columns.rs
diff --git a/compactor2/src/components/partition_filter/metrics.rs b/compactor/src/components/partition_filter/metrics.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/metrics.rs
rename to compactor/src/components/partition_filter/metrics.rs
diff --git a/compactor2/src/components/partition_filter/mod.rs b/compactor/src/components/partition_filter/mod.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/mod.rs
rename to compactor/src/components/partition_filter/mod.rs
diff --git a/compactor2/src/components/partition_filter/never_skipped.rs b/compactor/src/components/partition_filter/never_skipped.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/never_skipped.rs
rename to compactor/src/components/partition_filter/never_skipped.rs
diff --git a/compactor2/src/components/partition_filter/or.rs b/compactor/src/components/partition_filter/or.rs
similarity index 100%
rename from compactor2/src/components/partition_filter/or.rs
rename to compactor/src/components/partition_filter/or.rs
diff --git a/compactor2/src/components/partition_info_source/mod.rs b/compactor/src/components/partition_info_source/mod.rs
similarity index 100%
rename from compactor2/src/components/partition_info_source/mod.rs
rename to compactor/src/components/partition_info_source/mod.rs
diff --git a/compactor2/src/components/partition_info_source/sub_sources.rs b/compactor/src/components/partition_info_source/sub_sources.rs
similarity index 100%
rename from compactor2/src/components/partition_info_source/sub_sources.rs
rename to compactor/src/components/partition_info_source/sub_sources.rs
diff --git a/compactor2/src/components/partition_source/catalog.rs b/compactor/src/components/partition_source/catalog.rs
similarity index 100%
rename from compactor2/src/components/partition_source/catalog.rs
rename to compactor/src/components/partition_source/catalog.rs
diff --git a/compactor2/src/components/partition_source/logging.rs b/compactor/src/components/partition_source/logging.rs
similarity index 100%
rename from compactor2/src/components/partition_source/logging.rs
rename to compactor/src/components/partition_source/logging.rs
diff --git a/compactor2/src/components/partition_source/metrics.rs b/compactor/src/components/partition_source/metrics.rs
similarity index 100%
rename from compactor2/src/components/partition_source/metrics.rs
rename to compactor/src/components/partition_source/metrics.rs
diff --git a/compactor2/src/components/partition_source/mock.rs b/compactor/src/components/partition_source/mock.rs
similarity index 100%
rename from compactor2/src/components/partition_source/mock.rs
rename to compactor/src/components/partition_source/mock.rs
diff --git a/compactor2/src/components/partition_source/mod.rs b/compactor/src/components/partition_source/mod.rs
similarity index 100%
rename from compactor2/src/components/partition_source/mod.rs
rename to compactor/src/components/partition_source/mod.rs
diff --git a/compactor2/src/components/partition_stream/endless.rs b/compactor/src/components/partition_stream/endless.rs
similarity index 100%
rename from compactor2/src/components/partition_stream/endless.rs
rename to compactor/src/components/partition_stream/endless.rs
diff --git a/compactor2/src/components/partition_stream/mod.rs b/compactor/src/components/partition_stream/mod.rs
similarity index 100%
rename from compactor2/src/components/partition_stream/mod.rs
rename to compactor/src/components/partition_stream/mod.rs
diff --git a/compactor2/src/components/partition_stream/once.rs b/compactor/src/components/partition_stream/once.rs
similarity index 100%
rename from compactor2/src/components/partition_stream/once.rs
rename to compactor/src/components/partition_stream/once.rs
diff --git a/compactor2/src/components/partitions_source/catalog_all.rs b/compactor/src/components/partitions_source/catalog_all.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/catalog_all.rs
rename to compactor/src/components/partitions_source/catalog_all.rs
diff --git a/compactor2/src/components/partitions_source/catalog_to_compact.rs b/compactor/src/components/partitions_source/catalog_to_compact.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/catalog_to_compact.rs
rename to compactor/src/components/partitions_source/catalog_to_compact.rs
diff --git a/compactor2/src/components/partitions_source/filter.rs b/compactor/src/components/partitions_source/filter.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/filter.rs
rename to compactor/src/components/partitions_source/filter.rs
diff --git a/compactor2/src/components/partitions_source/logging.rs b/compactor/src/components/partitions_source/logging.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/logging.rs
rename to compactor/src/components/partitions_source/logging.rs
diff --git a/compactor2/src/components/partitions_source/metrics.rs b/compactor/src/components/partitions_source/metrics.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/metrics.rs
rename to compactor/src/components/partitions_source/metrics.rs
diff --git a/compactor2/src/components/partitions_source/mock.rs b/compactor/src/components/partitions_source/mock.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/mock.rs
rename to compactor/src/components/partitions_source/mock.rs
diff --git a/compactor2/src/components/partitions_source/mod.rs b/compactor/src/components/partitions_source/mod.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/mod.rs
rename to compactor/src/components/partitions_source/mod.rs
diff --git a/compactor2/src/components/partitions_source/not_empty.rs b/compactor/src/components/partitions_source/not_empty.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/not_empty.rs
rename to compactor/src/components/partitions_source/not_empty.rs
diff --git a/compactor2/src/components/partitions_source/randomize_order.rs b/compactor/src/components/partitions_source/randomize_order.rs
similarity index 100%
rename from compactor2/src/components/partitions_source/randomize_order.rs
rename to compactor/src/components/partitions_source/randomize_order.rs
diff --git a/compactor2/src/components/post_classification_partition_filter/logging.rs b/compactor/src/components/post_classification_partition_filter/logging.rs
similarity index 100%
rename from compactor2/src/components/post_classification_partition_filter/logging.rs
rename to compactor/src/components/post_classification_partition_filter/logging.rs
diff --git a/compactor2/src/components/post_classification_partition_filter/metrics.rs b/compactor/src/components/post_classification_partition_filter/metrics.rs
similarity index 100%
rename from compactor2/src/components/post_classification_partition_filter/metrics.rs
rename to compactor/src/components/post_classification_partition_filter/metrics.rs
diff --git a/compactor2/src/components/post_classification_partition_filter/mock.rs b/compactor/src/components/post_classification_partition_filter/mock.rs
similarity index 100%
rename from compactor2/src/components/post_classification_partition_filter/mock.rs
rename to compactor/src/components/post_classification_partition_filter/mock.rs
diff --git a/compactor2/src/components/post_classification_partition_filter/mod.rs b/compactor/src/components/post_classification_partition_filter/mod.rs
similarity index 100%
rename from compactor2/src/components/post_classification_partition_filter/mod.rs
rename to compactor/src/components/post_classification_partition_filter/mod.rs
diff --git a/compactor2/src/components/post_classification_partition_filter/possible_progress.rs b/compactor/src/components/post_classification_partition_filter/possible_progress.rs
similarity index 100%
rename from compactor2/src/components/post_classification_partition_filter/possible_progress.rs
rename to compactor/src/components/post_classification_partition_filter/possible_progress.rs
diff --git a/compactor2/src/components/report.rs b/compactor/src/components/report.rs
similarity index 100%
rename from compactor2/src/components/report.rs
rename to compactor/src/components/report.rs
diff --git a/compactor2/src/components/round_info_source/mod.rs b/compactor/src/components/round_info_source/mod.rs
similarity index 100%
rename from compactor2/src/components/round_info_source/mod.rs
rename to compactor/src/components/round_info_source/mod.rs
diff --git a/compactor2/src/components/round_split/many_files.rs b/compactor/src/components/round_split/many_files.rs
similarity index 100%
rename from compactor2/src/components/round_split/many_files.rs
rename to compactor/src/components/round_split/many_files.rs
diff --git a/compactor2/src/components/round_split/mod.rs b/compactor/src/components/round_split/mod.rs
similarity index 100%
rename from compactor2/src/components/round_split/mod.rs
rename to compactor/src/components/round_split/mod.rs
diff --git a/compactor2/src/components/scratchpad/mod.rs b/compactor/src/components/scratchpad/mod.rs
similarity index 97%
rename from compactor2/src/components/scratchpad/mod.rs
rename to compactor/src/components/scratchpad/mod.rs
index 1e50ee7154..bdb145c6ee 100644
--- a/compactor2/src/components/scratchpad/mod.rs
+++ b/compactor/src/components/scratchpad/mod.rs
@@ -41,7 +41,7 @@ pub trait ScratchpadGen: Debug + Display + Send + Sync {
 /// object store IO. This was limiting our throughput substantially.
 ///
 /// **shadow mode**: De-coupling the stores in this way makes it easier
-/// to implement compactor2: shadow mode #6645.
+/// to implement compactor: shadow mode #6645.
 ///
 /// Note that we assume here that the input parquet files are WAY
 /// SMALLER than the uncompressed Arrow data during compaction itself.
diff --git a/compactor2/src/components/scratchpad/noop.rs b/compactor/src/components/scratchpad/noop.rs
similarity index 100%
rename from compactor2/src/components/scratchpad/noop.rs
rename to compactor/src/components/scratchpad/noop.rs
diff --git a/compactor2/src/components/scratchpad/prod.rs b/compactor/src/components/scratchpad/prod.rs
similarity index 100%
rename from compactor2/src/components/scratchpad/prod.rs
rename to compactor/src/components/scratchpad/prod.rs
diff --git a/compactor2/src/components/scratchpad/test_util.rs b/compactor/src/components/scratchpad/test_util.rs
similarity index 100%
rename from compactor2/src/components/scratchpad/test_util.rs
rename to compactor/src/components/scratchpad/test_util.rs
diff --git a/compactor2/src/components/scratchpad/util.rs b/compactor/src/components/scratchpad/util.rs
similarity index 100%
rename from compactor2/src/components/scratchpad/util.rs
rename to compactor/src/components/scratchpad/util.rs
diff --git a/compactor2/src/components/skipped_compactions_source/catalog.rs b/compactor/src/components/skipped_compactions_source/catalog.rs
similarity index 100%
rename from compactor2/src/components/skipped_compactions_source/catalog.rs
rename to compactor/src/components/skipped_compactions_source/catalog.rs
diff --git a/compactor2/src/components/skipped_compactions_source/mock.rs b/compactor/src/components/skipped_compactions_source/mock.rs
similarity index 100%
rename from compactor2/src/components/skipped_compactions_source/mock.rs
rename to compactor/src/components/skipped_compactions_source/mock.rs
diff --git a/compactor2/src/components/skipped_compactions_source/mod.rs b/compactor/src/components/skipped_compactions_source/mod.rs
similarity index 100%
rename from compactor2/src/components/skipped_compactions_source/mod.rs
rename to compactor/src/components/skipped_compactions_source/mod.rs
diff --git a/compactor2/src/components/split_or_compact/files_to_compact.rs b/compactor/src/components/split_or_compact/files_to_compact.rs
similarity index 100%
rename from compactor2/src/components/split_or_compact/files_to_compact.rs
rename to compactor/src/components/split_or_compact/files_to_compact.rs
diff --git a/compactor2/src/components/split_or_compact/large_files_to_split.rs b/compactor/src/components/split_or_compact/large_files_to_split.rs
similarity index 100%
rename from compactor2/src/components/split_or_compact/large_files_to_split.rs
rename to compactor/src/components/split_or_compact/large_files_to_split.rs
diff --git a/compactor2/src/components/split_or_compact/logging.rs b/compactor/src/components/split_or_compact/logging.rs
similarity index 100%
rename from compactor2/src/components/split_or_compact/logging.rs
rename to compactor/src/components/split_or_compact/logging.rs
diff --git a/compactor2/src/components/split_or_compact/metrics.rs b/compactor/src/components/split_or_compact/metrics.rs
similarity index 100%
rename from compactor2/src/components/split_or_compact/metrics.rs
rename to compactor/src/components/split_or_compact/metrics.rs
diff --git a/compactor2/src/components/split_or_compact/mod.rs b/compactor/src/components/split_or_compact/mod.rs
similarity index 100%
rename from compactor2/src/components/split_or_compact/mod.rs
rename to compactor/src/components/split_or_compact/mod.rs
diff --git a/compactor2/src/components/split_or_compact/split_compact.rs b/compactor/src/components/split_or_compact/split_compact.rs
similarity index 100%
rename from compactor2/src/components/split_or_compact/split_compact.rs
rename to compactor/src/components/split_or_compact/split_compact.rs
diff --git a/compactor2/src/components/split_or_compact/start_level_files_to_split.rs b/compactor/src/components/split_or_compact/start_level_files_to_split.rs
similarity index 100%
rename from compactor2/src/components/split_or_compact/start_level_files_to_split.rs
rename to compactor/src/components/split_or_compact/start_level_files_to_split.rs
diff --git a/compactor2/src/components/tables_source/catalog.rs b/compactor/src/components/tables_source/catalog.rs
similarity index 100%
rename from compactor2/src/components/tables_source/catalog.rs
rename to compactor/src/components/tables_source/catalog.rs
diff --git a/compactor2/src/components/tables_source/mock.rs b/compactor/src/components/tables_source/mock.rs
similarity index 100%
rename from compactor2/src/components/tables_source/mock.rs
rename to compactor/src/components/tables_source/mock.rs
diff --git a/compactor2/src/components/tables_source/mod.rs b/compactor/src/components/tables_source/mod.rs
similarity index 100%
rename from compactor2/src/components/tables_source/mod.rs
rename to compactor/src/components/tables_source/mod.rs
diff --git a/compactor2/src/components/timeout.rs b/compactor/src/components/timeout.rs
similarity index 100%
rename from compactor2/src/components/timeout.rs
rename to compactor/src/components/timeout.rs
diff --git a/compactor2/src/config.rs b/compactor/src/config.rs
similarity index 100%
rename from compactor2/src/config.rs
rename to compactor/src/config.rs
diff --git a/compactor2/src/driver.rs b/compactor/src/driver.rs
similarity index 100%
rename from compactor2/src/driver.rs
rename to compactor/src/driver.rs
diff --git a/compactor2/src/error.rs b/compactor/src/error.rs
similarity index 100%
rename from compactor2/src/error.rs
rename to compactor/src/error.rs
diff --git a/compactor2/src/file_classification.rs b/compactor/src/file_classification.rs
similarity index 100%
rename from compactor2/src/file_classification.rs
rename to compactor/src/file_classification.rs
diff --git a/compactor2/src/file_group.rs b/compactor/src/file_group.rs
similarity index 100%
rename from compactor2/src/file_group.rs
rename to compactor/src/file_group.rs
diff --git a/compactor2/src/lib.rs b/compactor/src/lib.rs
similarity index 100%
rename from compactor2/src/lib.rs
rename to compactor/src/lib.rs
diff --git a/compactor2/src/object_store/ignore_writes.rs b/compactor/src/object_store/ignore_writes.rs
similarity index 100%
rename from compactor2/src/object_store/ignore_writes.rs
rename to compactor/src/object_store/ignore_writes.rs
diff --git a/compactor2/src/object_store/metrics.rs b/compactor/src/object_store/metrics.rs
similarity index 100%
rename from compactor2/src/object_store/metrics.rs
rename to compactor/src/object_store/metrics.rs
diff --git a/compactor2/src/object_store/mod.rs b/compactor/src/object_store/mod.rs
similarity index 100%
rename from compactor2/src/object_store/mod.rs
rename to compactor/src/object_store/mod.rs
diff --git a/compactor2/src/partition_info.rs b/compactor/src/partition_info.rs
similarity index 100%
rename from compactor2/src/partition_info.rs
rename to compactor/src/partition_info.rs
diff --git a/compactor2/src/plan_ir.rs b/compactor/src/plan_ir.rs
similarity index 100%
rename from compactor2/src/plan_ir.rs
rename to compactor/src/plan_ir.rs
diff --git a/compactor2/src/round_info.rs b/compactor/src/round_info.rs
similarity index 100%
rename from compactor2/src/round_info.rs
rename to compactor/src/round_info.rs
diff --git a/compactor2/src/test_utils.rs b/compactor/src/test_utils.rs
similarity index 100%
rename from compactor2/src/test_utils.rs
rename to compactor/src/test_utils.rs
diff --git a/compactor2/tests/integration.rs b/compactor/tests/integration.rs
similarity index 100%
rename from compactor2/tests/integration.rs
rename to compactor/tests/integration.rs
diff --git a/compactor2/tests/layouts/backfill.rs b/compactor/tests/layouts/backfill.rs
similarity index 100%
rename from compactor2/tests/layouts/backfill.rs
rename to compactor/tests/layouts/backfill.rs
diff --git a/compactor2/tests/layouts/common_use_cases.rs b/compactor/tests/layouts/common_use_cases.rs
similarity index 100%
rename from compactor2/tests/layouts/common_use_cases.rs
rename to compactor/tests/layouts/common_use_cases.rs
diff --git a/compactor2/tests/layouts/core.rs b/compactor/tests/layouts/core.rs
similarity index 100%
rename from compactor2/tests/layouts/core.rs
rename to compactor/tests/layouts/core.rs
diff --git a/compactor2/tests/layouts/created_at.rs b/compactor/tests/layouts/created_at.rs
similarity index 100%
rename from compactor2/tests/layouts/created_at.rs
rename to compactor/tests/layouts/created_at.rs
diff --git a/compactor2/tests/layouts/knobs.rs b/compactor/tests/layouts/knobs.rs
similarity index 100%
rename from compactor2/tests/layouts/knobs.rs
rename to compactor/tests/layouts/knobs.rs
diff --git a/compactor2/tests/layouts/large_files.rs b/compactor/tests/layouts/large_files.rs
similarity index 100%
rename from compactor2/tests/layouts/large_files.rs
rename to compactor/tests/layouts/large_files.rs
diff --git a/compactor2/tests/layouts/large_overlaps.rs b/compactor/tests/layouts/large_overlaps.rs
similarity index 100%
rename from compactor2/tests/layouts/large_overlaps.rs
rename to compactor/tests/layouts/large_overlaps.rs
diff --git a/compactor2/tests/layouts/many_files.rs b/compactor/tests/layouts/many_files.rs
similarity index 100%
rename from compactor2/tests/layouts/many_files.rs
rename to compactor/tests/layouts/many_files.rs
diff --git a/compactor2/tests/layouts/mod.rs b/compactor/tests/layouts/mod.rs
similarity index 100%
rename from compactor2/tests/layouts/mod.rs
rename to compactor/tests/layouts/mod.rs
diff --git a/compactor2/tests/layouts/single_timestamp.rs b/compactor/tests/layouts/single_timestamp.rs
similarity index 100%
rename from compactor2/tests/layouts/single_timestamp.rs
rename to compactor/tests/layouts/single_timestamp.rs
diff --git a/compactor2_test_utils/Cargo.toml b/compactor2_test_utils/Cargo.toml
index b31da97e2c..0205316076 100644
--- a/compactor2_test_utils/Cargo.toml
+++ b/compactor2_test_utils/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "compactor2_test_utils"
-description = "Utilities for writing tests for compactor2"
+description = "Utilities for writing tests for compactor"
 version.workspace = true
 authors.workspace = true
 edition.workspace = true
@@ -9,7 +9,7 @@ license.workspace = true
 [dependencies]
 async-trait = "0.1.68"
 backoff = { path = "../backoff" }
-compactor2 = { path = "../compactor2" }
+compactor = { path = "../compactor" }
 datafusion = { workspace = true }
 datafusion_util = { path = "../datafusion_util" }
 data_types = { path = "../data_types" }
diff --git a/compactor2_test_utils/src/commit_wrapper.rs b/compactor2_test_utils/src/commit_wrapper.rs
index 6ca7905e9f..388c62fb5c 100644
--- a/compactor2_test_utils/src/commit_wrapper.rs
+++ b/compactor2_test_utils/src/commit_wrapper.rs
@@ -1,7 +1,7 @@
 //! Handles recording commit information to the test run log
 
 use async_trait::async_trait;
-use compactor2::{Commit, CommitWrapper};
+use compactor::{Commit, CommitWrapper};
 use data_types::{CompactionLevel, ParquetFile, ParquetFileId, ParquetFileParams, PartitionId};
 use std::{
     fmt::{Debug, Display},
diff --git a/compactor2_test_utils/src/lib.rs b/compactor2_test_utils/src/lib.rs
index 6d27e56b8a..e03cadc009 100644
--- a/compactor2_test_utils/src/lib.rs
+++ b/compactor2_test_utils/src/lib.rs
@@ -32,7 +32,7 @@ use crate::{
 };
 use async_trait::async_trait;
 use backoff::BackoffConfig;
-use compactor2::{
+use compactor::{
     compact,
     config::{CompactionType, Config, PartitionsSourceConfig},
     hardcoded_components, Components, PanicDataFusionPlanner, PartitionInfo,
@@ -595,7 +595,7 @@ impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
     }
 }
 
-/// Contains state for running compactor2 integration tests with a
+/// Contains state for running compactor integration tests with a
 /// single partition full of files.
 pub struct TestSetup {
     /// The parquet files in the partition
@@ -608,7 +608,7 @@ pub struct TestSetup {
     pub table: Arc<TestTable>,
     /// a test partition
     pub partition: Arc<TestPartition>,
-    /// The compactor2 configuration
+    /// The compactor configuration
     pub config: Arc<Config>,
     /// allows optionally suppressing output of running the test
     pub suppress_run_output: bool,
diff --git a/compactor2_test_utils/src/simulator.rs b/compactor2_test_utils/src/simulator.rs
index ce1620bf5f..5eba5467dd 100644
--- a/compactor2_test_utils/src/simulator.rs
+++ b/compactor2_test_utils/src/simulator.rs
@@ -13,7 +13,7 @@ use iox_time::Time;
 use observability_deps::tracing::info;
 use uuid::Uuid;
 
-use compactor2::{DynError, ParquetFilesSink, PartitionInfo, PlanIR};
+use compactor::{DynError, ParquetFilesSink, PartitionInfo, PlanIR};
 
 use crate::{display::total_size, display_size, format_files};
 
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 2859e9bb4f..93ad2c4ea0 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -11,7 +11,7 @@ license.workspace = true
 arrow-flight = { workspace = true }
 authz = {path = "../authz" }
 clap_blocks = { path = "../clap_blocks" }
-compactor2 = { path = "../compactor2" }
+compactor = { path = "../compactor" }
 data_types = { path = "../data_types" }
 datafusion = { workspace = true }
 generated_types = { path = "../generated_types" }
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index fe4021fb39..fa449b8b09 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -5,7 +5,7 @@ use crate::process_info::setup_metric_registry;
 use super::main;
 use clap_blocks::{
     catalog_dsn::CatalogDsnConfig,
-    compactor2::Compactor2Config,
+    compactor::CompactorConfig,
     ingester::IngesterConfig,
     ingester_address::IngesterAddress,
     object_store::{make_object_store, ObjectStoreConfig},
@@ -17,7 +17,7 @@ use clap_blocks::{
     },
     socket_addr::SocketAddr,
 };
-use compactor2::object_store::metrics::MetricsStore;
+use compactor::object_store::metrics::MetricsStore;
 use iox_query::exec::{Executor, ExecutorConfig};
 use iox_time::{SystemProvider, TimeProvider};
 use ioxd_common::{
@@ -482,7 +482,7 @@ impl Config {
         // create a CompactorConfig for the all in one server based on
         // settings from other configs. Can't use `#clap(flatten)` as the
         // parameters are redundant with ingester's
-        let compactor_config = Compactor2Config {
+        let compactor_config = CompactorConfig {
             compaction_type: Default::default(),
             compaction_partition_minute_threshold: 10,
             compaction_cold_partition_minute_threshold: 60,
@@ -557,7 +557,7 @@ struct SpecializedConfig {
     catalog_dsn: CatalogDsnConfig,
     ingester_config: IngesterConfig,
     router_config: Router2Config,
-    compactor_config: Compactor2Config,
+    compactor_config: CompactorConfig,
     querier_config: QuerierConfig,
 }
 
diff --git a/influxdb_iox/src/commands/run/compactor2.rs b/influxdb_iox/src/commands/run/compactor.rs
similarity index 93%
rename from influxdb_iox/src/commands/run/compactor2.rs
rename to influxdb_iox/src/commands/run/compactor.rs
index 0456a4b651..0f21c60d5b 100644
--- a/influxdb_iox/src/commands/run/compactor2.rs
+++ b/influxdb_iox/src/commands/run/compactor.rs
@@ -1,12 +1,12 @@
-//! Command line options for running compactor2 in RPC write mode
+//! Command line options for running compactor
 
 use super::main;
 use crate::process_info::setup_metric_registry;
 use clap_blocks::{
-    catalog_dsn::CatalogDsnConfig, compactor2::Compactor2Config, object_store::make_object_store,
+    catalog_dsn::CatalogDsnConfig, compactor::CompactorConfig, object_store::make_object_store,
     run_config::RunConfig,
 };
-use compactor2::object_store::metrics::MetricsStore;
+use compactor::object_store::metrics::MetricsStore;
 use iox_query::exec::{Executor, ExecutorConfig};
 use iox_time::{SystemProvider, TimeProvider};
 use ioxd_common::{
@@ -63,7 +63,7 @@ pub struct Config {
     pub(crate) catalog_dsn: CatalogDsnConfig,
 
     #[clap(flatten)]
-    pub(crate) compactor_config: Compactor2Config,
+    pub(crate) compactor_config: CompactorConfig,
 }
 
 pub async fn command(config: Config) -> Result<(), Error> {
@@ -136,7 +136,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
     let res = main::main(common_state, services, metric_registry).await;
     match res {
         Ok(()) => Ok(()),
-        // compactor2 is allowed to shut itself down
+        // compactor is allowed to shut itself down
         Err(main::Error::Wrapper {
             source: _source @ ioxd_common::Error::LostServer,
         }) if process_once => Ok(()),
diff --git a/influxdb_iox/src/commands/run/mod.rs b/influxdb_iox/src/commands/run/mod.rs
index e624b3c452..62fe851951 100644
--- a/influxdb_iox/src/commands/run/mod.rs
+++ b/influxdb_iox/src/commands/run/mod.rs
@@ -2,7 +2,7 @@ use snafu::{ResultExt, Snafu};
 use trogging::cli::LoggingConfig;
 
 pub(crate) mod all_in_one;
-mod compactor2;
+mod compactor;
 mod garbage_collector;
 mod ingester;
 mod main;
@@ -13,8 +13,8 @@ mod test;
 #[derive(Debug, Snafu)]
 #[allow(clippy::enum_variant_names)]
 pub enum Error {
-    #[snafu(display("Error in compactor2 subcommand: {}", source))]
-    Compactor2Error { source: compactor2::Error },
+    #[snafu(display("Error in compactor subcommand: {}", source))]
+    CompactorError { source: compactor::Error },
 
     #[snafu(display("Error in garbage collector subcommand: {}", source))]
     GarbageCollectorError { source: garbage_collector::Error },
@@ -51,7 +51,7 @@ impl Config {
     pub fn logging_config(&self) -> &LoggingConfig {
         match &self.command {
             None => &self.all_in_one_config.logging_config,
-            Some(Command::Compactor2(config)) => config.run_config.logging_config(),
+            Some(Command::Compactor(config)) => config.run_config.logging_config(),
             Some(Command::GarbageCollector(config)) => config.run_config.logging_config(),
             Some(Command::Querier(config)) => config.run_config.logging_config(),
             Some(Command::Router2(config)) => config.run_config.logging_config(),
@@ -64,8 +64,9 @@ impl Config {
 
 #[derive(Debug, clap::Parser)]
 enum Command {
-    /// Run the server in compactor2 mode
-    Compactor2(compactor2::Config),
+    /// Run the server in compactor mode
+    #[clap(alias = "compactor2")]
+    Compactor(compactor::Config),
 
     /// Run the server in querier mode
     Querier(querier::Config),
@@ -92,8 +93,8 @@ pub async fn command(config: Config) -> Result<()> {
         None => all_in_one::command(config.all_in_one_config)
             .await
             .context(AllInOneSnafu),
-        Some(Command::Compactor2(config)) => {
-            compactor2::command(config).await.context(Compactor2Snafu)
+        Some(Command::Compactor(config)) => {
+            compactor::command(config).await.context(CompactorSnafu)
         }
         Some(Command::GarbageCollector(config)) => garbage_collector::command(config)
             .await
diff --git a/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs b/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
index a9eb8cfad3..fa67206a2a 100644
--- a/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
+++ b/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
@@ -28,3 +28,25 @@ async fn ingester2_runs_ingester() {
             "InfluxDB IOx Ingester server ready",
         ));
 }
+
+#[tokio::test]
+async fn compactor2_runs_compactor() {
+    let tmpdir = tempdir().unwrap();
+    let addrs = BindAddresses::default();
+
+    Command::cargo_bin("influxdb_iox")
+        .unwrap()
+        .args(["run", "compactor2", "-v"])
+        .env_clear()
+        .env("HOME", tmpdir.path())
+        .env("INFLUXDB_IOX_WAL_DIRECTORY", tmpdir.path())
+        .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
+        .add_addr_env(ServerType::Compactor, &addrs)
+        .timeout(Duration::from_secs(5))
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("error: unrecognized subcommand 'compactor2'").not())
+        .stdout(predicate::str::contains(
+            "InfluxDB IOx Compactor server ready",
+        ));
+}
diff --git a/influxdb_iox/tests/end_to_end_cases/compactor.rs b/influxdb_iox/tests/end_to_end_cases/compactor.rs
index 089498dc4a..2de0871932 100644
--- a/influxdb_iox/tests/end_to_end_cases/compactor.rs
+++ b/influxdb_iox/tests/end_to_end_cases/compactor.rs
@@ -13,7 +13,7 @@ async fn shard_id_greater_than_num_shards_is_invalid() {
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
-    let compactor_config = TestConfig::new_compactor2(&ingester_config).with_compactor_shards(
+    let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2,   // num shards 2
         100, // and shard id > num shards; not valid
     );
@@ -45,7 +45,7 @@ fn shard_id_without_num_shards_is_invalid() {
     Command::cargo_bin("influxdb_iox")
         .unwrap()
         .arg("run")
-        .arg("compactor2")
+        .arg("compactor")
         .env("INFLUXDB_IOX_COMPACTION_SHARD_ID", "1") // only provide shard ID
         .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
         .assert()
@@ -63,7 +63,7 @@ fn num_shards_without_shard_id_is_invalid() {
     Command::cargo_bin("influxdb_iox")
         .unwrap()
         .arg("run")
-        .arg("compactor2")
+        .arg("compactor")
         .env("INFLUXDB_IOX_COMPACTION_SHARD_COUNT", "1") // only provide shard count
         .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
         .env_remove("HOSTNAME")
@@ -79,7 +79,7 @@ fn num_shards_with_hostname_is_valid() {
     Command::cargo_bin("influxdb_iox")
         .unwrap()
         .arg("run")
-        .arg("compactor2")
+        .arg("compactor")
         .env("INFLUXDB_IOX_COMPACTION_SHARD_COUNT", "3") // provide shard count
         .env("HOSTNAME", "iox-shared-compactor-8") // provide shard id via hostname
         .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
@@ -98,7 +98,7 @@ async fn sharded_compactor_0_always_compacts_partition_1() {
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
-    let compactor_config = TestConfig::new_compactor2(&ingester_config).with_compactor_shards(
+    let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2, // num shards 2
         0, // shard ID 0, which will always get partition ID 1
     );
@@ -181,7 +181,7 @@ async fn sharded_compactor_1_never_compacts_partition_1() {
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
     let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
-    let compactor_config = TestConfig::new_compactor2(&ingester_config).with_compactor_shards(
+    let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2, // num shards 2
         1, // shard ID 1, which will never get partition ID 1
     );
diff --git a/ioxd_compactor2/Cargo.toml b/ioxd_compactor2/Cargo.toml
index db3cd589e9..fd2f46ecc9 100644
--- a/ioxd_compactor2/Cargo.toml
+++ b/ioxd_compactor2/Cargo.toml
@@ -9,7 +9,7 @@ license.workspace = true
 async-trait = "0.1"
 backoff = { path = "../backoff" }
 clap_blocks = { path = "../clap_blocks" }
-compactor2 = { path = "../compactor2" }
+compactor = { path = "../compactor" }
 data_types = { path = "../data_types" }
 hyper = "0.14"
 iox_catalog = { path = "../iox_catalog" }
diff --git a/ioxd_compactor2/src/lib.rs b/ioxd_compactor2/src/lib.rs
index 397e4aa24e..3723da0d77 100644
--- a/ioxd_compactor2/src/lib.rs
+++ b/ioxd_compactor2/src/lib.rs
@@ -1,8 +1,8 @@
 use async_trait::async_trait;
 use backoff::BackoffConfig;
-use clap_blocks::compactor2::{CompactionType, Compactor2Config};
-use compactor2::{
-    compactor::Compactor2,
+use clap_blocks::compactor::{CompactionType, CompactorConfig};
+use compactor::{
+    compactor::Compactor,
     config::{Config, PartitionsSourceConfig, ShardConfig},
 };
 use data_types::PartitionId;
@@ -29,20 +29,20 @@ use tokio_util::sync::CancellationToken;
 use trace::TraceCollector;
 
 pub struct Compactor2ServerType {
-    compactor: Compactor2,
+    compactor: Compactor,
     metric_registry: Arc<Registry>,
     trace_collector: Option<Arc<dyn TraceCollector>>,
 }
 
 impl std::fmt::Debug for Compactor2ServerType {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Compactor2")
+        write!(f, "Compactor")
     }
 }
 
 impl Compactor2ServerType {
     pub fn new(
-        compactor: Compactor2,
+        compactor: Compactor,
         metric_registry: Arc<metric::Registry>,
         common_state: &CommonServerState,
     ) -> Self {
@@ -129,7 +129,7 @@ impl HttpApiErrorSource for IoxHttpError {
     }
 }
 
-/// Instantiate a compactor2 server that uses the RPC write path
+/// Instantiate a compactor server
 #[allow(clippy::too_many_arguments)]
 pub async fn create_compactor2_server_type(
     common_state: &CommonServerState,
@@ -139,7 +139,7 @@ pub async fn create_compactor2_server_type(
     parquet_store_scratchpad: ParquetStorage,
     exec: Arc<Executor>,
     time_provider: Arc<dyn TimeProvider>,
-    compactor_config: Compactor2Config,
+    compactor_config: CompactorConfig,
 ) -> Arc<dyn ServerType> {
     let backoff_config = BackoffConfig::default();
 
@@ -181,16 +181,16 @@ pub async fn create_compactor2_server_type(
     );
 
     // This is annoying to have two types that are so similar and have to convert between them, but
-    // this way compactor2 doesn't have to know about clap_blocks and vice versa. It would also
+    // this way compactor doesn't have to know about clap_blocks and vice versa. It would also
     // be nice to have this as a `From` trait implementation, but this crate isn't allowed because
     // neither type is defined in ioxd_compactor. This feels like the right place to do the
     // conversion, though.
     let compaction_type = match compactor_config.compaction_type {
-        CompactionType::Hot => compactor2::config::CompactionType::Hot,
-        CompactionType::Cold => compactor2::config::CompactionType::Cold,
+        CompactionType::Hot => compactor::config::CompactionType::Hot,
+        CompactionType::Cold => compactor::config::CompactionType::Cold,
     };
 
-    let compactor = Compactor2::start(Config {
+    let compactor = Compactor::start(Config {
         compaction_type,
         metric_registry: Arc::clone(&metric_registry),
         catalog,
diff --git a/test_helpers_end_to_end/src/config.rs b/test_helpers_end_to_end/src/config.rs
index 5a88746912..535972eadb 100644
--- a/test_helpers_end_to_end/src/config.rs
+++ b/test_helpers_end_to_end/src/config.rs
@@ -114,9 +114,9 @@ impl TestConfig {
     }
 
     /// Create a minimal compactor configuration, using the dsn configuration from other
-    pub fn new_compactor2(other: &TestConfig) -> Self {
+    pub fn new_compactor(other: &TestConfig) -> Self {
         Self::new(
-            ServerType::Compactor2,
+            ServerType::Compactor,
             other.dsn().to_owned(),
             other.catalog_schema_name(),
         )
diff --git a/test_helpers_end_to_end/src/mini_cluster.rs b/test_helpers_end_to_end/src/mini_cluster.rs
index 2481b6eb5e..d2cda24f26 100644
--- a/test_helpers_end_to_end/src/mini_cluster.rs
+++ b/test_helpers_end_to_end/src/mini_cluster.rs
@@ -200,7 +200,7 @@ impl MiniCluster {
         let ingester_config = TestConfig::new_ingester(&database_url);
         let router_config = TestConfig::new_router2(&ingester_config);
         let querier_config = TestConfig::new_querier2(&ingester_config);
-        let compactor_config = TestConfig::new_compactor2(&ingester_config);
+        let compactor_config = TestConfig::new_compactor(&ingester_config);
 
         // Set up the cluster  ====================================
         Self::new()
@@ -221,7 +221,7 @@ impl MiniCluster {
         let ingester_config = TestConfig::new_ingester_never_persist(&database_url);
         let router_config = TestConfig::new_router2(&ingester_config);
         let querier_config = TestConfig::new_querier2(&ingester_config);
-        let compactor_config = TestConfig::new_compactor2(&ingester_config);
+        let compactor_config = TestConfig::new_compactor(&ingester_config);
 
         // Set up the cluster  ====================================
         Self::new()
@@ -248,7 +248,7 @@ impl MiniCluster {
             TestConfig::new_router2(&ingester_config).with_single_tenancy(authz_addr.clone());
         let querier_config =
             TestConfig::new_querier2(&ingester_config).with_single_tenancy(authz_addr);
-        let compactor_config = TestConfig::new_compactor2(&ingester_config);
+        let compactor_config = TestConfig::new_compactor(&ingester_config);
 
         // Set up the cluster  ====================================
         Self::new_based_on_tenancy(true)
@@ -522,7 +522,7 @@ impl MiniCluster {
         let mut command = Command::cargo_bin("influxdb_iox").unwrap();
         let command = command
             .arg("run")
-            .arg("compactor2")
+            .arg("compactor")
             .arg("--compaction-process-once")
             .arg("--compaction-process-all-partitions")
             .env("LOG_FILTER", log_filter)
diff --git a/test_helpers_end_to_end/src/server_fixture.rs b/test_helpers_end_to_end/src/server_fixture.rs
index 00b7777405..2c7a0372a3 100644
--- a/test_helpers_end_to_end/src/server_fixture.rs
+++ b/test_helpers_end_to_end/src/server_fixture.rs
@@ -476,7 +476,7 @@ impl TestServer {
             }
 
             match server_type {
-                ServerType::Compactor2 => {
+                ServerType::Compactor => {
                     unimplemented!(
                         "Don't use a long-running compactor and gRPC in e2e tests; use \
                         `influxdb_iox compactor run-once` instead"
diff --git a/test_helpers_end_to_end/src/server_type.rs b/test_helpers_end_to_end/src/server_type.rs
index bdb9966c1d..8706b1eee1 100644
--- a/test_helpers_end_to_end/src/server_type.rs
+++ b/test_helpers_end_to_end/src/server_type.rs
@@ -6,7 +6,7 @@ pub enum ServerType {
     Ingester,
     Router2,
     Querier2,
-    Compactor2,
+    Compactor,
 }
 
 impl ServerType {
@@ -17,7 +17,7 @@ impl ServerType {
             Self::Ingester => "ingester",
             Self::Router2 => "router2",
             Self::Querier2 => "querier",
-            Self::Compactor2 => "compactor2",
+            Self::Compactor => "compactor",
         }
     }
 }
@@ -97,7 +97,7 @@ fn addr_envs(server_type: ServerType, addrs: &BindAddresses) -> Vec<(&'static st
                 addrs.querier_grpc_api().bind_addr().to_string(),
             ),
         ],
-        ServerType::Compactor2 => vec![
+        ServerType::Compactor => vec![
             (
                 "INFLUXDB_IOX_BIND_ADDR",
                 addrs.router_http_api().bind_addr().to_string(),

From 45493b91e515f8ddedb3b06b7f9506b9a58f677f Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 20:43:23 -0400
Subject: [PATCH 058/119] fix: Rename ioxd_compactor2 to ioxd_compactor

---
 Cargo.lock                                     |  4 ++--
 Cargo.toml                                     |  2 +-
 influxdb_iox/Cargo.toml                        |  2 +-
 influxdb_iox/src/commands/run/all_in_one.rs    |  2 +-
 influxdb_iox/src/commands/run/compactor.rs     |  4 ++--
 {ioxd_compactor2 => ioxd_compactor}/Cargo.toml |  2 +-
 {ioxd_compactor2 => ioxd_compactor}/src/lib.rs | 14 +++++++-------
 7 files changed, 15 insertions(+), 15 deletions(-)
 rename {ioxd_compactor2 => ioxd_compactor}/Cargo.toml (96%)
 rename {ioxd_compactor2 => ioxd_compactor}/src/lib.rs (97%)

diff --git a/Cargo.lock b/Cargo.lock
index 81c4a5c4cf..1115c2368f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2572,7 +2572,7 @@ dependencies = [
  "iox_query",
  "iox_time",
  "ioxd_common",
- "ioxd_compactor2",
+ "ioxd_compactor",
  "ioxd_garbage_collector",
  "ioxd_ingester",
  "ioxd_querier",
@@ -3033,7 +3033,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "ioxd_compactor2"
+name = "ioxd_compactor"
 version = "0.1.0"
 dependencies = [
  "async-trait",
diff --git a/Cargo.toml b/Cargo.toml
index 126d881b56..b5ca195018 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,7 +38,7 @@ members = [
     "iox_tests",
     "iox_time",
     "ioxd_common",
-    "ioxd_compactor2",
+    "ioxd_compactor",
     "ioxd_garbage_collector",
     "ioxd_ingester",
     "ioxd_querier",
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 93ad2c4ea0..8d4e158c7f 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -21,7 +21,7 @@ influxdb_storage_client = { path = "../influxdb_storage_client" }
 influxrpc_parser = { path = "../influxrpc_parser"}
 iox_catalog = { path = "../iox_catalog" }
 ioxd_common = { path = "../ioxd_common"}
-ioxd_compactor2 = { path = "../ioxd_compactor2"}
+ioxd_compactor = { path = "../ioxd_compactor"}
 ioxd_ingester = { path = "../ioxd_ingester"}
 ioxd_garbage_collector = { path = "../ioxd_garbage_collector" }
 ioxd_querier = { path = "../ioxd_querier"}
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index fa449b8b09..83808a0533 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -24,7 +24,7 @@ use ioxd_common::{
     server_type::{CommonServerState, CommonServerStateError},
     Service,
 };
-use ioxd_compactor2::create_compactor2_server_type as create_compactor_server_type;
+use ioxd_compactor::create_compactor_server_type;
 use ioxd_ingester::create_ingester_server_type;
 use ioxd_querier::{create_querier_server_type, QuerierServerTypeArgs};
 use ioxd_router::create_router2_server_type;
diff --git a/influxdb_iox/src/commands/run/compactor.rs b/influxdb_iox/src/commands/run/compactor.rs
index 0f21c60d5b..5b90ca4571 100644
--- a/influxdb_iox/src/commands/run/compactor.rs
+++ b/influxdb_iox/src/commands/run/compactor.rs
@@ -13,7 +13,7 @@ use ioxd_common::{
     server_type::{CommonServerState, CommonServerStateError},
     Service,
 };
-use ioxd_compactor2::create_compactor2_server_type;
+use ioxd_compactor::create_compactor_server_type;
 use object_store::DynObjectStore;
 use object_store_metrics::ObjectStoreMetrics;
 use observability_deps::tracing::*;
@@ -117,7 +117,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
     let time_provider = Arc::new(SystemProvider::new());
 
     let process_once = config.compactor_config.process_once;
-    let server_type = create_compactor2_server_type(
+    let server_type = create_compactor_server_type(
         &common_state,
         Arc::clone(&metric_registry),
         catalog,
diff --git a/ioxd_compactor2/Cargo.toml b/ioxd_compactor/Cargo.toml
similarity index 96%
rename from ioxd_compactor2/Cargo.toml
rename to ioxd_compactor/Cargo.toml
index fd2f46ecc9..c9fac20410 100644
--- a/ioxd_compactor2/Cargo.toml
+++ b/ioxd_compactor/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "ioxd_compactor2"
+name = "ioxd_compactor"
 version.workspace = true
 authors.workspace = true
 edition.workspace = true
diff --git a/ioxd_compactor2/src/lib.rs b/ioxd_compactor/src/lib.rs
similarity index 97%
rename from ioxd_compactor2/src/lib.rs
rename to ioxd_compactor/src/lib.rs
index 3723da0d77..78fe14f592 100644
--- a/ioxd_compactor2/src/lib.rs
+++ b/ioxd_compactor/src/lib.rs
@@ -28,19 +28,19 @@ use std::{
 use tokio_util::sync::CancellationToken;
 use trace::TraceCollector;
 
-pub struct Compactor2ServerType {
+pub struct CompactorServerType {
     compactor: Compactor,
     metric_registry: Arc<Registry>,
     trace_collector: Option<Arc<dyn TraceCollector>>,
 }
 
-impl std::fmt::Debug for Compactor2ServerType {
+impl std::fmt::Debug for CompactorServerType {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "Compactor")
     }
 }
 
-impl Compactor2ServerType {
+impl CompactorServerType {
     pub fn new(
         compactor: Compactor,
         metric_registry: Arc<metric::Registry>,
@@ -55,10 +55,10 @@ impl Compactor2ServerType {
 }
 
 #[async_trait]
-impl ServerType for Compactor2ServerType {
+impl ServerType for CompactorServerType {
     /// Human name for this server type
     fn name(&self) -> &str {
-        "compactor2"
+        "compactor"
     }
 
     /// Return the [`metric::Registry`] used by the compactor.
@@ -131,7 +131,7 @@ impl HttpApiErrorSource for IoxHttpError {
 
 /// Instantiate a compactor server
 #[allow(clippy::too_many_arguments)]
-pub async fn create_compactor2_server_type(
+pub async fn create_compactor_server_type(
     common_state: &CommonServerState,
     metric_registry: Arc<metric::Registry>,
     catalog: Arc<dyn Catalog>,
@@ -221,7 +221,7 @@ pub async fn create_compactor2_server_type(
         max_num_files_per_plan: compactor_config.max_num_files_per_plan,
     });
 
-    Arc::new(Compactor2ServerType::new(
+    Arc::new(CompactorServerType::new(
         compactor,
         metric_registry,
         common_state,

From 9229ce566814ce9e524f1b0d95958212ec042e05 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 20:47:08 -0400
Subject: [PATCH 059/119] fix: Rename compactor2_test_utils to
 compactor_test_utils

---
 Cargo.lock                                                    | 4 ++--
 Cargo.toml                                                    | 2 +-
 compactor/Cargo.toml                                          | 2 +-
 compactor/src/components/files_split/non_overlap_split.rs     | 2 +-
 compactor/src/components/files_split/target_level_split.rs    | 2 +-
 compactor/src/components/files_split/upgrade_split.rs         | 2 +-
 compactor/src/components/partitions_source/not_empty.rs       | 2 +-
 compactor/src/components/scratchpad/prod.rs                   | 2 +-
 compactor/src/components/scratchpad/test_util.rs              | 2 +-
 compactor/src/components/split_or_compact/files_to_compact.rs | 4 ++--
 .../src/components/split_or_compact/large_files_to_split.rs   | 2 +-
 compactor/src/components/split_or_compact/metrics.rs          | 2 +-
 compactor/src/components/split_or_compact/split_compact.rs    | 2 +-
 .../components/split_or_compact/start_level_files_to_split.rs | 2 +-
 compactor/tests/integration.rs                                | 2 +-
 compactor/tests/layouts/common_use_cases.rs                   | 2 +-
 compactor/tests/layouts/mod.rs                                | 2 +-
 {compactor2_test_utils => compactor_test_utils}/Cargo.toml    | 2 +-
 .../src/commit_wrapper.rs                                     | 0
 .../src/display.rs                                            | 0
 {compactor2_test_utils => compactor_test_utils}/src/lib.rs    | 0
 .../src/simulator.rs                                          | 0
 22 files changed, 20 insertions(+), 20 deletions(-)
 rename {compactor2_test_utils => compactor_test_utils}/Cargo.toml (96%)
 rename {compactor2_test_utils => compactor_test_utils}/src/commit_wrapper.rs (100%)
 rename {compactor2_test_utils => compactor_test_utils}/src/display.rs (100%)
 rename {compactor2_test_utils => compactor_test_utils}/src/lib.rs (100%)
 rename {compactor2_test_utils => compactor_test_utils}/src/simulator.rs (100%)

diff --git a/Cargo.lock b/Cargo.lock
index 1115c2368f..a6d9a3cbb9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1010,7 +1010,7 @@ dependencies = [
  "async-trait",
  "backoff",
  "bytes",
- "compactor2_test_utils",
+ "compactor_test_utils",
  "data_types",
  "datafusion",
  "futures",
@@ -1037,7 +1037,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "compactor2_test_utils"
+name = "compactor_test_utils"
 version = "0.1.0"
 dependencies = [
  "async-trait",
diff --git a/Cargo.toml b/Cargo.toml
index b5ca195018..5705441e4d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
     "cache_system",
     "clap_blocks",
     "client_util",
-    "compactor2_test_utils",
+    "compactor_test_utils",
     "compactor",
     "data_types",
     "datafusion_util",
diff --git a/compactor/Cargo.toml b/compactor/Cargo.toml
index d1858b8d54..0fb75b88c3 100644
--- a/compactor/Cargo.toml
+++ b/compactor/Cargo.toml
@@ -33,7 +33,7 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
 [dev-dependencies]
 arrow_util = { path = "../arrow_util" }
 assert_matches = "1"
-compactor2_test_utils = { path = "../compactor2_test_utils" }
+compactor_test_utils = { path = "../compactor_test_utils" }
 iox_tests = { path = "../iox_tests" }
 test_helpers = { path = "../test_helpers"}
 insta = { version = "1.29.0", features = ["yaml"] }
diff --git a/compactor/src/components/files_split/non_overlap_split.rs b/compactor/src/components/files_split/non_overlap_split.rs
index 69fc404ab8..09f8609344 100644
--- a/compactor/src/components/files_split/non_overlap_split.rs
+++ b/compactor/src/components/files_split/non_overlap_split.rs
@@ -128,7 +128,7 @@ impl FilesSplit for NonOverlapSplit {
 #[cfg(test)]
 mod tests {
 
-    use compactor2_test_utils::{
+    use compactor_test_utils::{
         create_l1_files, create_overlapped_files, create_overlapped_files_2,
         create_overlapped_l0_l1_files, create_overlapped_l1_l2_files, format_files,
         format_files_split,
diff --git a/compactor/src/components/files_split/target_level_split.rs b/compactor/src/components/files_split/target_level_split.rs
index 177e466290..b97022e86e 100644
--- a/compactor/src/components/files_split/target_level_split.rs
+++ b/compactor/src/components/files_split/target_level_split.rs
@@ -35,7 +35,7 @@ impl FilesSplit for TargetLevelSplit {
 #[cfg(test)]
 mod tests {
 
-    use compactor2_test_utils::{
+    use compactor_test_utils::{
         create_l0_files, create_l1_files, create_l2_files, create_overlapped_files, format_files,
         format_files_split,
     };
diff --git a/compactor/src/components/files_split/upgrade_split.rs b/compactor/src/components/files_split/upgrade_split.rs
index 6caa5edd6c..91dd3993dd 100644
--- a/compactor/src/components/files_split/upgrade_split.rs
+++ b/compactor/src/components/files_split/upgrade_split.rs
@@ -140,7 +140,7 @@ impl FilesSplit for UpgradeSplit {
 #[cfg(test)]
 mod tests {
 
-    use compactor2_test_utils::{
+    use compactor_test_utils::{
         create_l0_files, create_l1_files, create_l1_files_mix_size, create_overlapped_files,
         create_overlapped_files_2, create_overlapped_files_3, create_overlapped_files_3_mix_size,
         create_overlapped_l0_l1_files, create_overlapped_l1_l2_files,
diff --git a/compactor/src/components/partitions_source/not_empty.rs b/compactor/src/components/partitions_source/not_empty.rs
index 293a4f4b83..6d61897217 100644
--- a/compactor/src/components/partitions_source/not_empty.rs
+++ b/compactor/src/components/partitions_source/not_empty.rs
@@ -59,7 +59,7 @@ mod tests {
     use iox_time::{MockProvider, Time};
 
     use crate::components::partitions_source::mock::MockPartitionsSource;
-    use compactor2_test_utils::AssertFutureExt;
+    use compactor_test_utils::AssertFutureExt;
 
     use super::*;
 
diff --git a/compactor/src/components/scratchpad/prod.rs b/compactor/src/components/scratchpad/prod.rs
index 504d225112..8e6ca4a3d1 100644
--- a/compactor/src/components/scratchpad/prod.rs
+++ b/compactor/src/components/scratchpad/prod.rs
@@ -257,7 +257,7 @@ mod tests {
     use test_helpers::{maybe_start_logging, tracing::TracingCapture};
 
     use crate::components::scratchpad::test_util::{assert_content, file_path, stores};
-    use compactor2_test_utils::list_object_store;
+    use compactor_test_utils::list_object_store;
 
     use super::*;
 
diff --git a/compactor/src/components/scratchpad/test_util.rs b/compactor/src/components/scratchpad/test_util.rs
index 736af60108..50c48ab87e 100644
--- a/compactor/src/components/scratchpad/test_util.rs
+++ b/compactor/src/components/scratchpad/test_util.rs
@@ -5,7 +5,7 @@ use object_store::{memory::InMemory, DynObjectStore};
 use parquet_file::ParquetFilePath;
 use uuid::Uuid;
 
-use compactor2_test_utils::list_object_store;
+use compactor_test_utils::list_object_store;
 
 pub fn stores() -> (
     Arc<DynObjectStore>,
diff --git a/compactor/src/components/split_or_compact/files_to_compact.rs b/compactor/src/components/split_or_compact/files_to_compact.rs
index 7869895627..355be14dca 100644
--- a/compactor/src/components/split_or_compact/files_to_compact.rs
+++ b/compactor/src/components/split_or_compact/files_to_compact.rs
@@ -63,7 +63,7 @@ use crate::components::{
 ///  4. Largest compacting set: All input files
 ///    - files_to_compact: All input files
 ///    - files_to_keep: None
-///  
+///
 pub fn limit_files_to_compact(
     max_compact_size: usize,
     files: Vec<ParquetFile>,
@@ -232,7 +232,7 @@ pub enum CompactOrFurtherSplit {
 
 #[cfg(test)]
 mod tests {
-    use compactor2_test_utils::{
+    use compactor_test_utils::{
         create_l1_files, create_overlapped_files, create_overlapped_l0_l1_files_2,
         create_overlapped_l0_l1_files_3, create_overlapped_start_target_files, format_files,
         format_files_split,
diff --git a/compactor/src/components/split_or_compact/large_files_to_split.rs b/compactor/src/components/split_or_compact/large_files_to_split.rs
index 9c27bfeaf3..7d08261765 100644
--- a/compactor/src/components/split_or_compact/large_files_to_split.rs
+++ b/compactor/src/components/split_or_compact/large_files_to_split.rs
@@ -69,7 +69,7 @@ pub fn compute_split_times_for_large_files(
 mod tests {
     use std::sync::Arc;
 
-    use compactor2_test_utils::{
+    use compactor_test_utils::{
         create_overlapped_l0_l1_files_3, create_overlapped_two_overlapped_files, format_files,
         format_files_split, TestTimes,
     };
diff --git a/compactor/src/components/split_or_compact/metrics.rs b/compactor/src/components/split_or_compact/metrics.rs
index 0c82bfd90e..4bda93d4e0 100644
--- a/compactor/src/components/split_or_compact/metrics.rs
+++ b/compactor/src/components/split_or_compact/metrics.rs
@@ -99,7 +99,7 @@ mod tests {
 
     use std::sync::Arc;
 
-    use compactor2_test_utils::{create_overlapped_l0_l1_files_2, create_overlapped_l1_l2_files_2};
+    use compactor_test_utils::{create_overlapped_l0_l1_files_2, create_overlapped_l1_l2_files_2};
     use data_types::CompactionLevel;
     use metric::{assert_counter, assert_histogram};
 
diff --git a/compactor/src/components/split_or_compact/split_compact.rs b/compactor/src/components/split_or_compact/split_compact.rs
index 674771e1d2..91f3bfd849 100644
--- a/compactor/src/components/split_or_compact/split_compact.rs
+++ b/compactor/src/components/split_or_compact/split_compact.rs
@@ -152,7 +152,7 @@ impl SplitOrCompact for SplitCompact {
 mod tests {
     use std::sync::Arc;
 
-    use compactor2_test_utils::{
+    use compactor_test_utils::{
         create_overlapped_l0_l1_files_2, create_overlapped_l1_l2_files_2, format_files,
         format_files_split,
     };
diff --git a/compactor/src/components/split_or_compact/start_level_files_to_split.rs b/compactor/src/components/split_or_compact/start_level_files_to_split.rs
index e3660b4143..4a6eaba61e 100644
--- a/compactor/src/components/split_or_compact/start_level_files_to_split.rs
+++ b/compactor/src/components/split_or_compact/start_level_files_to_split.rs
@@ -417,7 +417,7 @@ pub fn identify_start_level_files_to_split(
 
 #[cfg(test)]
 mod tests {
-    use compactor2_test_utils::{
+    use compactor_test_utils::{
         create_l1_files, create_overlapped_files, create_overlapped_l0_l1_files_2, format_files,
         format_files_split,
     };
diff --git a/compactor/tests/integration.rs b/compactor/tests/integration.rs
index 0d1d50cc02..c0e8ffa90d 100644
--- a/compactor/tests/integration.rs
+++ b/compactor/tests/integration.rs
@@ -1,5 +1,5 @@
 use arrow_util::assert_batches_sorted_eq;
-use compactor2_test_utils::{format_files, list_object_store, TestSetup, TestSetupBuilder};
+use compactor_test_utils::{format_files, list_object_store, TestSetup, TestSetupBuilder};
 use data_types::{CompactionLevel, ParquetFile, PartitionId};
 use iox_tests::TestParquetFileBuilder;
 use test_helpers::{assert_contains, tracing::TracingCapture};
diff --git a/compactor/tests/layouts/common_use_cases.rs b/compactor/tests/layouts/common_use_cases.rs
index cec5de46ae..53c4beb831 100644
--- a/compactor/tests/layouts/common_use_cases.rs
+++ b/compactor/tests/layouts/common_use_cases.rs
@@ -5,7 +5,7 @@
 //!
 //! See [crate::layout] module for detailed documentation
 
-use compactor2_test_utils::format_files;
+use compactor_test_utils::format_files;
 use data_types::CompactionLevel;
 use iox_time::Time;
 
diff --git a/compactor/tests/layouts/mod.rs b/compactor/tests/layouts/mod.rs
index 06875ffce6..617f350e50 100644
--- a/compactor/tests/layouts/mod.rs
+++ b/compactor/tests/layouts/mod.rs
@@ -60,7 +60,7 @@ mod single_timestamp;
 
 use std::{sync::atomic::Ordering, time::Duration};
 
-use compactor2_test_utils::{display_size, format_files, TestSetup, TestSetupBuilder};
+use compactor_test_utils::{display_size, format_files, TestSetup, TestSetupBuilder};
 use data_types::{CompactionLevel, ParquetFile};
 use iox_tests::TestParquetFileBuilder;
 use iox_time::Time;
diff --git a/compactor2_test_utils/Cargo.toml b/compactor_test_utils/Cargo.toml
similarity index 96%
rename from compactor2_test_utils/Cargo.toml
rename to compactor_test_utils/Cargo.toml
index 0205316076..452ac20ac7 100644
--- a/compactor2_test_utils/Cargo.toml
+++ b/compactor_test_utils/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "compactor2_test_utils"
+name = "compactor_test_utils"
 description = "Utilities for writing tests for compactor"
 version.workspace = true
 authors.workspace = true
diff --git a/compactor2_test_utils/src/commit_wrapper.rs b/compactor_test_utils/src/commit_wrapper.rs
similarity index 100%
rename from compactor2_test_utils/src/commit_wrapper.rs
rename to compactor_test_utils/src/commit_wrapper.rs
diff --git a/compactor2_test_utils/src/display.rs b/compactor_test_utils/src/display.rs
similarity index 100%
rename from compactor2_test_utils/src/display.rs
rename to compactor_test_utils/src/display.rs
diff --git a/compactor2_test_utils/src/lib.rs b/compactor_test_utils/src/lib.rs
similarity index 100%
rename from compactor2_test_utils/src/lib.rs
rename to compactor_test_utils/src/lib.rs
diff --git a/compactor2_test_utils/src/simulator.rs b/compactor_test_utils/src/simulator.rs
similarity index 100%
rename from compactor2_test_utils/src/simulator.rs
rename to compactor_test_utils/src/simulator.rs

From 6aa657a6496bacd484b404c6d0e7894c41f095a4 Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Tue, 9 May 2023 14:30:06 +0200
Subject: [PATCH 060/119] refactor(router): separate RPC client error types

This commit splits out the RPC-request-centric errors in RpcWriteError
into their own RpcWriteClientError type.

This improves the separation of concerns - an RpcWriteError comes from
the RPC write handler, whereas an RpcWriteClientError comes from an
underlying client. It's definitely less confusing!
---
 router/src/dml_handlers/rpc_write.rs          | 64 +++++++++++--------
 .../rpc_write/circuit_breaking_client.rs      | 21 ++++--
 router/src/dml_handlers/rpc_write/client.rs   | 24 +++++--
 .../dml_handlers/rpc_write/lazy_connector.rs  | 23 +++----
 router/src/server/http.rs                     | 13 ++--
 5 files changed, 91 insertions(+), 54 deletions(-)

diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index 9fc0e75561..686fc2e536 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -11,6 +11,7 @@ use self::{
     balancer::Balancer,
     circuit_breaker::CircuitBreaker,
     circuit_breaking_client::{CircuitBreakerState, CircuitBreakingClient},
+    client::RpcWriteClientError,
     upstream_snapshot::UpstreamSnapshot,
 };
 
@@ -35,9 +36,9 @@ pub const RPC_TIMEOUT: Duration = Duration::from_secs(5);
 /// Errors experienced when submitting an RPC write request to an Ingester.
 #[derive(Debug, Error)]
 pub enum RpcWriteError {
-    /// The upstream ingester returned an error response.
-    #[error("upstream ingester error: {0}")]
-    Upstream(#[from] tonic::Status),
+    /// The RPC client returned an error.
+    #[error(transparent)]
+    Client(#[from] RpcWriteClientError),
 
     /// The RPC call timed out after [`RPC_TIMEOUT`] length of time.
     #[error("timeout writing to upstream ingester")]
@@ -47,10 +48,6 @@ pub enum RpcWriteError {
     #[error("no healthy upstream ingesters available")]
     NoUpstreams,
 
-    /// The upstream connection is not established.
-    #[error("upstream {0} is not connected")]
-    UpstreamNotConnected(String),
-
     /// The write request was not attempted, because not enough upstream
     /// ingesters needed to satisfy the configured replication factor are
     /// healthy.
@@ -236,7 +233,7 @@ where
                     // This error is an internal implementation detail - the
                     // meaningful error for the user is "there's no healthy
                     // upstreams".
-                    RpcWriteError::UpstreamNotConnected(_) => RpcWriteError::NoUpstreams,
+                    RpcWriteError::Client(_) => RpcWriteError::NoUpstreams,
                     // The number of upstreams no longer satisfies the desired
                     // replication factor.
                     RpcWriteError::NoUpstreams => RpcWriteError::NotEnoughReplicas,
@@ -303,7 +300,7 @@ where
     .await
     .map_err(|e| match last_err {
         // Any other error is returned as-is.
-        Some(v) => v,
+        Some(v) => RpcWriteError::Client(v),
         // If the entire write attempt fails during the first RPC write
         // request, then the per-request timeout is greater than the write
         // attempt timeout, and therefore only one upstream is ever tried.
@@ -462,7 +459,7 @@ mod tests {
 
         // Init the write handler with a mock client to capture the rpc calls.
         let client1 = Arc::new(MockWriteClient::default().with_ret(iter::once(Err(
-            RpcWriteError::Upstream(tonic::Status::internal("")),
+            RpcWriteClientError::Upstream(tonic::Status::internal("")),
         ))));
         let client2 = Arc::new(MockWriteClient::default());
         let client3 = Arc::new(MockWriteClient::default());
@@ -527,12 +524,12 @@ mod tests {
         // The first client in line fails the first request, but will succeed
         // the second try.
         let client1 = Arc::new(MockWriteClient::default().with_ret([
-            Err(RpcWriteError::Upstream(tonic::Status::internal(""))),
+            Err(RpcWriteClientError::Upstream(tonic::Status::internal(""))),
             Ok(()),
         ]));
         // This client always errors.
         let client2 = Arc::new(MockWriteClient::default().with_ret(iter::repeat_with(|| {
-            Err(RpcWriteError::Upstream(tonic::Status::internal("")))
+            Err(RpcWriteClientError::Upstream(tonic::Status::internal("")))
         })));
 
         let handler = RpcWrite::new(
@@ -605,7 +602,9 @@ mod tests {
     #[tokio::test]
     async fn test_write_upstream_error() {
         let client_1 = Arc::new(MockWriteClient::default().with_ret(iter::repeat_with(|| {
-            Err(RpcWriteError::Upstream(tonic::Status::internal("bananas")))
+            Err(RpcWriteClientError::Upstream(tonic::Status::internal(
+                "bananas",
+            )))
         })));
         let circuit_1 = Arc::new(MockCircuitBreaker::default());
         circuit_1.set_healthy(true);
@@ -616,18 +615,17 @@ mod tests {
         )
         .await;
 
-        assert_matches!(got, Err(RpcWriteError::Upstream(s)) => {
-            assert_eq!(s.code(), tonic::Code::Internal);
-            assert_eq!(s.message(), "bananas");
-        });
+        assert_matches!(got, Err(RpcWriteError::NoUpstreams));
     }
 
-    /// Assert that an [`RpcWriteError::UpstreamNotConnected`] error is mapped
+    /// Assert that an [`RpcWriteClientError::UpstreamNotConnected`] error is mapped
     /// to a user-friendly [`RpcWriteError::NoUpstreams`] for consistency.
     #[tokio::test]
     async fn test_write_map_upstream_not_connected_error() {
         let client_1 = Arc::new(MockWriteClient::default().with_ret(iter::repeat_with(|| {
-            Err(RpcWriteError::UpstreamNotConnected("bananas".to_string()))
+            Err(RpcWriteClientError::UpstreamNotConnected(
+                "bananas".to_string(),
+            ))
         })));
         let circuit_1 = Arc::new(MockCircuitBreaker::default());
         circuit_1.set_healthy(true);
@@ -648,13 +646,17 @@ mod tests {
     async fn test_write_not_enough_upstreams_for_replication() {
         // Initialise two upstreams, 1 healthy, 1 not.
         let client_1 = Arc::new(MockWriteClient::default().with_ret(iter::repeat_with(|| {
-            Err(RpcWriteError::UpstreamNotConnected("bananas".to_string()))
+            Err(RpcWriteClientError::UpstreamNotConnected(
+                "bananas".to_string(),
+            ))
         })));
         let circuit_1 = Arc::new(MockCircuitBreaker::default());
         circuit_1.set_healthy(true);
 
         let client_2 = Arc::new(MockWriteClient::default().with_ret(iter::repeat_with(|| {
-            Err(RpcWriteError::UpstreamNotConnected("bananas".to_string()))
+            Err(RpcWriteClientError::UpstreamNotConnected(
+                "bananas".to_string(),
+            ))
         })));
         let circuit_2 = Arc::new(MockCircuitBreaker::default());
         circuit_2.set_healthy(false);
@@ -713,7 +715,9 @@ mod tests {
         circuit_1.set_healthy(true);
 
         let client_2 = Arc::new(MockWriteClient::default().with_ret(iter::repeat_with(|| {
-            Err(RpcWriteError::Upstream(tonic::Status::internal("bananas")))
+            Err(RpcWriteClientError::Upstream(tonic::Status::internal(
+                "bananas",
+            )))
         })));
         let circuit_2 = Arc::new(MockCircuitBreaker::default());
         circuit_2.set_healthy(true);
@@ -755,7 +759,9 @@ mod tests {
         circuit_1.set_healthy(true);
 
         let client_2 = Arc::new(MockWriteClient::default().with_ret([
-            Err(RpcWriteError::Upstream(tonic::Status::internal("bananas"))),
+            Err(RpcWriteClientError::Upstream(tonic::Status::internal(
+                "bananas",
+            ))),
             Ok(()),
         ]));
         let circuit_2 = Arc::new(MockCircuitBreaker::default());
@@ -798,8 +804,12 @@ mod tests {
 
         // This client sometimes errors (2 times)
         let client_2 = Arc::new(MockWriteClient::default().with_ret([
-            Err(RpcWriteError::Upstream(tonic::Status::internal("bananas"))),
-            Err(RpcWriteError::Upstream(tonic::Status::internal("bananas"))),
+            Err(RpcWriteClientError::Upstream(tonic::Status::internal(
+                "bananas",
+            ))),
+            Err(RpcWriteClientError::Upstream(tonic::Status::internal(
+                "bananas",
+            ))),
             Ok(()),
         ]));
         let circuit_2 = Arc::new(MockCircuitBreaker::default());
@@ -807,7 +817,9 @@ mod tests {
 
         // This client always errors
         let client_3 = Arc::new(MockWriteClient::default().with_ret(iter::repeat_with(|| {
-            Err(RpcWriteError::UpstreamNotConnected("bananas".to_string()))
+            Err(RpcWriteClientError::UpstreamNotConnected(
+                "bananas".to_string(),
+            ))
         })));
         let circuit_3 = Arc::new(MockCircuitBreaker::default());
         circuit_3.set_healthy(true);
diff --git a/router/src/dml_handlers/rpc_write/circuit_breaking_client.rs b/router/src/dml_handlers/rpc_write/circuit_breaking_client.rs
index 044e71281a..7edf23d2c6 100644
--- a/router/src/dml_handlers/rpc_write/circuit_breaking_client.rs
+++ b/router/src/dml_handlers/rpc_write/circuit_breaking_client.rs
@@ -3,7 +3,10 @@ use std::{fmt::Debug, sync::Arc};
 use async_trait::async_trait;
 use generated_types::influxdata::iox::ingester::v1::WriteRequest;
 
-use super::{circuit_breaker::CircuitBreaker, client::WriteClient, RpcWriteError};
+use super::{
+    circuit_breaker::CircuitBreaker,
+    client::{RpcWriteClientError, WriteClient},
+};
 
 /// An internal abstraction over the health probing & result recording
 /// functionality of a circuit breaker.
@@ -95,7 +98,7 @@ where
     T: WriteClient,
     C: CircuitBreakerState,
 {
-    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteError> {
+    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError> {
         let res = self.inner.write(op).await;
         self.state.observe(&res);
         res
@@ -194,9 +197,17 @@ mod tests {
     #[tokio::test]
     async fn test_observe() {
         let circuit_breaker = Arc::new(MockCircuitBreaker::default());
-        let mock_client = Arc::new(MockWriteClient::default().with_ret(Box::new(
-            [Ok(()), Err(RpcWriteError::NoUpstreams)].into_iter(),
-        )));
+        let mock_client = Arc::new(
+            MockWriteClient::default().with_ret(Box::new(
+                [
+                    Ok(()),
+                    Err(RpcWriteClientError::UpstreamNotConnected(
+                        "bananas".to_string(),
+                    )),
+                ]
+                .into_iter(),
+            )),
+        );
         let wrapper = CircuitBreakingClient::new(Arc::clone(&mock_client), "bananas")
             .with_circuit_breaker(Arc::clone(&circuit_breaker));
 
diff --git a/router/src/dml_handlers/rpc_write/client.rs b/router/src/dml_handlers/rpc_write/client.rs
index cf6230b052..3d65c56890 100644
--- a/router/src/dml_handlers/rpc_write/client.rs
+++ b/router/src/dml_handlers/rpc_write/client.rs
@@ -4,20 +4,32 @@ use async_trait::async_trait;
 use generated_types::influxdata::iox::ingester::v1::{
     write_service_client::WriteServiceClient, WriteRequest,
 };
+use thiserror::Error;
 
-use super::RpcWriteError;
+/// Request errors returned by [`WriteClient`] implementations.
+#[derive(Debug, Error)]
+pub enum RpcWriteClientError {
+    /// The upstream connection is not established (lazy connection
+    /// establishment).
+    #[error("upstream {0} is not connected")]
+    UpstreamNotConnected(String),
+
+    /// The upstream ingester returned an error response.
+    #[error("upstream ingester error: {0}")]
+    Upstream(#[from] tonic::Status),
+}
 
 /// An abstract RPC client that pushes `op` to an opaque receiver.
 #[async_trait]
 pub(super) trait WriteClient: Send + Sync + std::fmt::Debug {
     /// Write `op` and wait for a response.
-    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteError>;
+    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError>;
 }
 
 /// An implementation of [`WriteClient`] for the tonic gRPC client.
 #[async_trait]
 impl WriteClient for WriteServiceClient<tonic::transport::Channel> {
-    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteError> {
+    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError> {
         WriteServiceClient::write(&mut self.clone(), op).await?;
         Ok(())
     }
@@ -31,7 +43,7 @@ pub mod mock {
 
     struct State {
         calls: Vec<WriteRequest>,
-        ret: Box<dyn Iterator<Item = Result<(), RpcWriteError>> + Send + Sync>,
+        ret: Box<dyn Iterator<Item = Result<(), RpcWriteClientError>> + Send + Sync>,
     }
 
     /// A mock implementation of the [`WriteClient`] for testing purposes.
@@ -71,7 +83,7 @@ pub mod mock {
         pub(crate) fn with_ret<T, U>(self, ret: T) -> Self
         where
             T: IntoIterator<IntoIter = U>,
-            U: Iterator<Item = Result<(), RpcWriteError>> + Send + Sync + 'static,
+            U: Iterator<Item = Result<(), RpcWriteClientError>> + Send + Sync + 'static,
         {
             self.state.lock().ret = Box::new(ret.into_iter());
             self
@@ -80,7 +92,7 @@ pub mod mock {
 
     #[async_trait]
     impl WriteClient for Arc<MockWriteClient> {
-        async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteError> {
+        async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError> {
             let mut guard = self.state.lock();
             guard.calls.push(op);
             guard.ret.next().expect("no mock response")
diff --git a/router/src/dml_handlers/rpc_write/lazy_connector.rs b/router/src/dml_handlers/rpc_write/lazy_connector.rs
index 5546a138c9..fe6275a23f 100644
--- a/router/src/dml_handlers/rpc_write/lazy_connector.rs
+++ b/router/src/dml_handlers/rpc_write/lazy_connector.rs
@@ -21,7 +21,7 @@ use tonic::{
     Code,
 };
 
-use super::{client::WriteClient, RpcWriteError};
+use super::client::{RpcWriteClientError, WriteClient};
 
 const RETRY_INTERVAL: Duration = Duration::from_secs(1);
 const CONNECT_TIMEOUT: Duration = Duration::from_secs(1);
@@ -39,7 +39,7 @@ const MAX_INCOMING_MSG_BYTES: usize = 1024 * 1024; // 1 MiB
 /// once a connection has been established, the [`Channel`] internally handles
 /// reconnections as needed.
 ///
-/// Returns [`RpcWriteError::UpstreamNotConnected`] when no connection is
+/// Returns [`RpcWriteClientError::UpstreamNotConnected`] when no connection is
 /// available.
 #[derive(Debug)]
 pub struct LazyConnector {
@@ -94,10 +94,11 @@ impl LazyConnector {
 
 #[async_trait]
 impl WriteClient for LazyConnector {
-    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteError> {
+    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError> {
         let conn = self.connection.lock().clone();
-        let conn =
-            conn.ok_or_else(|| RpcWriteError::UpstreamNotConnected(self.addr.uri().to_string()))?;
+        let conn = conn.ok_or_else(|| {
+            RpcWriteClientError::UpstreamNotConnected(self.addr.uri().to_string())
+        })?;
 
         match WriteServiceClient::new(conn)
             .max_encoding_message_size(self.max_outgoing_msg_bytes)
@@ -132,19 +133,15 @@ impl WriteClient for LazyConnector {
 /// HTTP proxy would. Unfortunately this is a breaking change in behaviour for
 /// networking code like [`tonic`]'s transport implementation, which can no
 /// longer easily differentiate network errors from actual application errors.
-fn is_envoy_unavailable_error(e: &RpcWriteError) -> bool {
+fn is_envoy_unavailable_error(e: &RpcWriteClientError) -> bool {
     match e {
-        RpcWriteError::Upstream(e) if e.code() == Code::Unavailable => e
+        RpcWriteClientError::Upstream(e) if e.code() == Code::Unavailable => e
             .metadata()
             .get("server")
             .map(|v| v == AsciiMetadataValue::from_static("envoy"))
             .unwrap_or(false),
-        RpcWriteError::Upstream(_)
-        | RpcWriteError::Timeout(_)
-        | RpcWriteError::NoUpstreams
-        | RpcWriteError::UpstreamNotConnected(_)
-        | RpcWriteError::PartialWrite { .. }
-        | RpcWriteError::NotEnoughReplicas => false,
+        RpcWriteClientError::Upstream(_) => false,
+        RpcWriteClientError::UpstreamNotConnected(_) => unreachable!(),
     }
 }
 
diff --git a/router/src/server/http.rs b/router/src/server/http.rs
index 37820e045e..27587502fe 100644
--- a/router/src/server/http.rs
+++ b/router/src/server/http.rs
@@ -23,7 +23,8 @@ use self::write::{
 };
 use crate::{
     dml_handlers::{
-        DmlError, DmlHandler, PartitionError, RetentionError, RpcWriteError, SchemaError,
+        client::RpcWriteClientError, DmlError, DmlHandler, PartitionError, RetentionError,
+        RpcWriteError, SchemaError,
     },
     namespace_resolver::NamespaceResolver,
 };
@@ -158,13 +159,17 @@ impl From<&DmlError> for StatusCode {
                 StatusCode::INTERNAL_SERVER_ERROR
             }
             DmlError::Retention(RetentionError::OutsideRetention(_)) => StatusCode::FORBIDDEN,
-            DmlError::RpcWrite(RpcWriteError::Upstream(_)) => StatusCode::INTERNAL_SERVER_ERROR,
+            DmlError::RpcWrite(RpcWriteError::Client(RpcWriteClientError::Upstream(_))) => {
+                StatusCode::INTERNAL_SERVER_ERROR
+            }
+            DmlError::RpcWrite(RpcWriteError::Client(
+                RpcWriteClientError::UpstreamNotConnected(_),
+            )) => StatusCode::SERVICE_UNAVAILABLE,
             DmlError::RpcWrite(RpcWriteError::Timeout(_)) => StatusCode::GATEWAY_TIMEOUT,
             DmlError::RpcWrite(
                 RpcWriteError::NoUpstreams
                 | RpcWriteError::NotEnoughReplicas
-                | RpcWriteError::PartialWrite { .. }
-                | RpcWriteError::UpstreamNotConnected(_),
+                | RpcWriteError::PartialWrite { .. },
             ) => StatusCode::SERVICE_UNAVAILABLE,
         }
     }

From 2aa8713d1d534b5e2db6f8f196b1246cf97e4a1f Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Thu, 27 Apr 2023 10:30:15 -0400
Subject: [PATCH 061/119] fix: Remove partition TemplatePart::Table;
 partitioning is already per-table

---
 data_types/src/lib.rs                  | 13 +++++--------
 mutable_batch/src/payload.rs           |  4 +---
 mutable_batch/src/payload/partition.rs | 20 +++++++-------------
 mutable_batch/tests/writer_fuzz.rs     |  1 -
 mutable_batch_pb/tests/encode.rs       |  1 -
 router/src/dml_handlers/partitioner.rs |  2 +-
 6 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index b21164e324..eb273511af 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -1077,13 +1077,12 @@ impl ChunkOrder {
     }
 }
 
-/// `PartitionTemplate` is used to compute the partition key of each row that
-/// gets written. It can consist of the table name, a column name and its value,
-/// a formatted time, or a string column and regex captures of its value. For
-/// columns that do not appear in the input row, a blank value is output.
+/// `PartitionTemplate` is used to compute the partition key of each row that gets written. It can
+/// consist of a column name and its value, a formatted time, or a string column and regex captures
+/// of its value. For columns that do not appear in the input row, a blank value is output.
 ///
-/// The key is constructed in order of the template parts; thus ordering changes
-/// what partition key is generated.
+/// The key is constructed in order of the template parts; thus ordering changes what partition key
+/// is generated.
 #[derive(Debug, Default, Eq, PartialEq, Clone)]
 #[allow(missing_docs)]
 pub struct PartitionTemplate {
@@ -1094,8 +1093,6 @@ pub struct PartitionTemplate {
 /// part of a partition key.
 #[derive(Debug, Eq, PartialEq, Clone)]
 pub enum TemplatePart {
-    /// The name of a table
-    Table,
     /// The value in a named column
     Column(String),
     /// Applies a  `strftime` format to the "time" column.
diff --git a/mutable_batch/src/payload.rs b/mutable_batch/src/payload.rs
index 3c194a22f6..0927b8ed06 100644
--- a/mutable_batch/src/payload.rs
+++ b/mutable_batch/src/payload.rs
@@ -99,7 +99,6 @@ impl<'a> PartitionWrite<'a> {
     /// Create a collection of [`PartitionWrite`] indexed by partition key
     /// from a [`MutableBatch`] and [`PartitionTemplate`]
     pub fn partition(
-        table_name: &str,
         batch: &'a MutableBatch,
         partition_template: &PartitionTemplate,
     ) -> HashMap<PartitionKey, Self> {
@@ -107,8 +106,7 @@ impl<'a> PartitionWrite<'a> {
         let time = get_time_column(batch);
 
         let mut partition_ranges = HashMap::new();
-        for (partition, range) in partition::partition_batch(batch, table_name, partition_template)
-        {
+        for (partition, range) in partition::partition_batch(batch, partition_template) {
             let row_count = NonZeroUsize::new(range.end - range.start).unwrap();
             let (min_timestamp, max_timestamp) = min_max_time(&time[range.clone()]);
 
diff --git a/mutable_batch/src/payload/partition.rs b/mutable_batch/src/payload/partition.rs
index 5ed817019a..b4d09dbc71 100644
--- a/mutable_batch/src/payload/partition.rs
+++ b/mutable_batch/src/payload/partition.rs
@@ -14,10 +14,9 @@ use std::ops::Range;
 /// Returns an iterator identifying consecutive ranges for a given partition key
 pub fn partition_batch<'a>(
     batch: &'a MutableBatch,
-    table_name: &'a str,
     template: &'a PartitionTemplate,
 ) -> impl Iterator<Item = (String, Range<usize>)> + 'a {
-    range_encode(partition_keys(batch, table_name, template))
+    range_encode(partition_keys(batch, template))
 }
 
 /// A [`PartitionTemplate`] is made up of one of more [`TemplatePart`] that are rendered and
@@ -29,7 +28,6 @@ pub fn partition_batch<'a>(
 /// [`Template::fmt_row`] can then be used to render the template for that particular row
 /// to the provided string, without performing any additional column lookups
 enum Template<'a> {
-    Table(&'a str),
     Column(&'a Column, &'a str),
     MissingColumn(&'a str),
     TimeFormat(&'a [i64], StrftimeItems<'a>),
@@ -39,7 +37,6 @@ impl<'a> Template<'a> {
     /// Renders this template to `out` for the row `idx`
     fn fmt_row<W: std::fmt::Write>(&self, out: &mut W, idx: usize) -> std::fmt::Result {
         match self {
-            Template::Table(table_name) => write!(out, "{table_name}"),
             Template::Column(col, col_name) if col.valid.get(idx) => {
                 out.write_str(col_name)?;
                 out.write_char('_')?;
@@ -75,7 +72,6 @@ impl<'a> Template<'a> {
 /// Returns an iterator of partition keys for the given table batch
 fn partition_keys<'a>(
     batch: &'a MutableBatch,
-    table_name: &'a str,
     template: &'a PartitionTemplate,
 ) -> impl Iterator<Item = String> + 'a {
     let time = batch.column(TIME_COLUMN_NAME).expect("time column");
@@ -88,7 +84,6 @@ fn partition_keys<'a>(
         .parts
         .iter()
         .map(|part| match part {
-            TemplatePart::Table => Template::Table(table_name),
             TemplatePart::Column(name) => batch.column(name).map_or_else(
                 |_| Template::MissingColumn(name),
                 |col| Template::Column(col, name),
@@ -209,7 +204,6 @@ mod tests {
 
         let template = PartitionTemplate {
             parts: vec![
-                TemplatePart::Table,
                 TemplatePart::TimeFormat("%Y-%m-%d %H:%M:%S".to_string()),
                 TemplatePart::Column("f64".to_string()),
                 TemplatePart::Column("region".to_string()),
@@ -219,16 +213,16 @@ mod tests {
 
         writer.commit();
 
-        let keys: Vec<_> = partition_keys(&batch, "foo", &template).collect();
+        let keys: Vec<_> = partition_keys(&batch, &template).collect();
 
         assert_eq!(
             keys,
             vec![
-                "foo-1970-01-01 00:00:00-f64_2-region-bananas".to_string(),
-                "foo-1970-01-01 00:00:00-f64_4.5-region_west-bananas".to_string(),
-                "foo-1970-01-01 00:00:00-f64_6-region-bananas".to_string(),
-                "foo-1970-01-01 00:00:00-f64_3-region_east-bananas".to_string(),
-                "foo-1970-01-01 00:00:00-f64_6-region-bananas".to_string()
+                "1970-01-01 00:00:00-f64_2-region-bananas".to_string(),
+                "1970-01-01 00:00:00-f64_4.5-region_west-bananas".to_string(),
+                "1970-01-01 00:00:00-f64_6-region-bananas".to_string(),
+                "1970-01-01 00:00:00-f64_3-region_east-bananas".to_string(),
+                "1970-01-01 00:00:00-f64_6-region-bananas".to_string()
             ]
         )
     }
diff --git a/mutable_batch/tests/writer_fuzz.rs b/mutable_batch/tests/writer_fuzz.rs
index 09484a4069..17b588e425 100644
--- a/mutable_batch/tests/writer_fuzz.rs
+++ b/mutable_batch/tests/writer_fuzz.rs
@@ -433,7 +433,6 @@ fn test_partition_write() {
     };
 
     let partitioned = PartitionWrite::partition(
-        "table",
         &batch,
         &PartitionTemplate {
             parts: vec![TemplatePart::Column("b1".to_string())],
diff --git a/mutable_batch_pb/tests/encode.rs b/mutable_batch_pb/tests/encode.rs
index d269734182..26d2bcb407 100644
--- a/mutable_batch_pb/tests/encode.rs
+++ b/mutable_batch_pb/tests/encode.rs
@@ -121,7 +121,6 @@ fn test_encode_decode_null_columns_issue_4272() {
     writer.commit();
 
     let mut partitions = PartitionWrite::partition(
-        "test",
         &batch,
         &PartitionTemplate {
             parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
diff --git a/router/src/dml_handlers/partitioner.rs b/router/src/dml_handlers/partitioner.rs
index f0a031d70f..27de162c97 100644
--- a/router/src/dml_handlers/partitioner.rs
+++ b/router/src/dml_handlers/partitioner.rs
@@ -82,7 +82,7 @@ impl DmlHandler for Partitioner {
             // Partition the table batch according to the configured partition
             // template and write it into the partition-keyed map.
             for (partition_key, partition_payload) in
-                PartitionWrite::partition(&table_name, &batch, &self.partition_template)
+                PartitionWrite::partition(&batch, &self.partition_template)
             {
                 let partition = partitions.entry(partition_key).or_default();
                 let table_batch = partition

From c1a8408572a102aa943c1acdb40bbd4ee2f942cb Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Thu, 27 Apr 2023 11:18:54 -0400
Subject: [PATCH 062/119] fix: Consolidate the default partition template;
 remove --partition-key-pattern CLI option

---
 clap_blocks/src/router2.rs                  | 12 ------------
 data_types/src/lib.rs                       | 10 +++++++++-
 influxdb_iox/src/commands/run/all_in_one.rs |  1 -
 ioxd_router/src/lib.rs                      | 10 +++-------
 mutable_batch_pb/tests/encode.rs            |  9 ++-------
 router/src/dml_handlers/partitioner.rs      |  5 +----
 router/tests/common/mod.rs                  |  6 ++----
 7 files changed, 17 insertions(+), 36 deletions(-)

diff --git a/clap_blocks/src/router2.rs b/clap_blocks/src/router2.rs
index 5fd0506dd9..dbad16fc19 100644
--- a/clap_blocks/src/router2.rs
+++ b/clap_blocks/src/router2.rs
@@ -92,18 +92,6 @@ pub struct Router2Config {
     )]
     pub namespace_autocreation_enabled: bool,
 
-    /// A "strftime" format string used to derive the partition key from the row
-    /// timestamps.
-    ///
-    /// Changing this from the default value is experimental.
-    #[clap(
-        long = "partition-key-pattern",
-        env = "INFLUXDB_IOX_PARTITION_KEY_PATTERN",
-        default_value = "%Y-%m-%d",
-        action
-    )]
-    pub partition_key_pattern: String,
-
     /// Specify the timeout in seconds for a single RPC write request to an
     /// ingester.
     #[clap(
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index eb273511af..69cc1e24b1 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -1083,12 +1083,20 @@ impl ChunkOrder {
 ///
 /// The key is constructed in order of the template parts; thus ordering changes what partition key
 /// is generated.
-#[derive(Debug, Default, Eq, PartialEq, Clone)]
+#[derive(Debug, Eq, PartialEq, Clone)]
 #[allow(missing_docs)]
 pub struct PartitionTemplate {
     pub parts: Vec<TemplatePart>,
 }
 
+impl Default for PartitionTemplate {
+    fn default() -> Self {
+        Self {
+            parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
+        }
+    }
+}
+
 /// `TemplatePart` specifies what part of a row should be used to compute this
 /// part of a partition key.
 #[derive(Debug, Eq, PartialEq, Clone)]
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index 83808a0533..e4d707e18d 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -473,7 +473,6 @@ impl Config {
             ingester_addresses: ingester_addresses.clone(),
             new_namespace_retention_hours: None, // infinite retention
             namespace_autocreation_enabled: true,
-            partition_key_pattern: "%Y-%m-%d".to_string(),
             rpc_write_timeout_seconds: Duration::new(3, 0),
             rpc_write_replicas: None,
             rpc_write_max_outgoing_bytes: ingester_config.rpc_write_max_incoming_bytes,
diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index 992958f78f..2eb4b1fcae 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -6,7 +6,7 @@ use std::{
 use async_trait::async_trait;
 use authz::{Authorizer, IoxAuthorizer};
 use clap_blocks::router2::Router2Config;
-use data_types::{NamespaceName, PartitionTemplate, TemplatePart};
+use data_types::{NamespaceName, PartitionTemplate};
 use hashbrown::HashMap;
 use hyper::{Body, Request, Response};
 use iox_catalog::interface::Catalog;
@@ -257,12 +257,8 @@ pub async fn create_router2_server_type(
     // # Write partitioner
     //
     // Add a write partitioner into the handler stack that splits by the date
-    // portion of the write's timestamp.
-    let partitioner = Partitioner::new(PartitionTemplate {
-        parts: vec![TemplatePart::TimeFormat(
-            router_config.partition_key_pattern.clone(),
-        )],
-    });
+    // portion of the write's timestamp (the default [`PartitionTemplate`]).
+    let partitioner = Partitioner::new(PartitionTemplate::default());
     let partitioner = InstrumentationDecorator::new("partitioner", &metrics, partitioner);
 
     // # Namespace resolver
diff --git a/mutable_batch_pb/tests/encode.rs b/mutable_batch_pb/tests/encode.rs
index 26d2bcb407..8d0f4c5c12 100644
--- a/mutable_batch_pb/tests/encode.rs
+++ b/mutable_batch_pb/tests/encode.rs
@@ -1,5 +1,5 @@
 use arrow_util::assert_batches_eq;
-use data_types::{PartitionKey, PartitionTemplate, TemplatePart};
+use data_types::{PartitionKey, PartitionTemplate};
 use mutable_batch::{writer::Writer, MutableBatch, PartitionWrite, WritePayload};
 use mutable_batch_pb::{decode::write_table_batch, encode::encode_batch};
 use schema::Projection;
@@ -120,12 +120,7 @@ fn test_encode_decode_null_columns_issue_4272() {
         .unwrap();
     writer.commit();
 
-    let mut partitions = PartitionWrite::partition(
-        &batch,
-        &PartitionTemplate {
-            parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
-        },
-    );
+    let mut partitions = PartitionWrite::partition(&batch, &PartitionTemplate::default());
 
     // There should be two partitions, one with for the timestamp 160, and
     // one for the other timestamp.
diff --git a/router/src/dml_handlers/partitioner.rs b/router/src/dml_handlers/partitioner.rs
index 27de162c97..6eda6cddfc 100644
--- a/router/src/dml_handlers/partitioner.rs
+++ b/router/src/dml_handlers/partitioner.rs
@@ -106,7 +106,6 @@ impl DmlHandler for Partitioner {
 #[cfg(test)]
 mod tests {
     use assert_matches::assert_matches;
-    use data_types::TemplatePart;
 
     use super::*;
 
@@ -136,9 +135,7 @@ mod tests {
             paste::paste! {
                 #[tokio::test]
                 async fn [<test_write_ $name>]() {
-                    let partition_template = PartitionTemplate {
-                        parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
-                    };
+                    let partition_template = PartitionTemplate::default();
 
                     let partitioner = Partitioner::new(partition_template);
                     let ns = NamespaceName::new("bananas").expect("valid db name");
diff --git a/router/tests/common/mod.rs b/router/tests/common/mod.rs
index bea90ebaad..da73a4fafa 100644
--- a/router/tests/common/mod.rs
+++ b/router/tests/common/mod.rs
@@ -1,6 +1,6 @@
 use std::{iter, string::String, sync::Arc, time::Duration};
 
-use data_types::{PartitionTemplate, TableId, TemplatePart};
+use data_types::{PartitionTemplate, TableId};
 use generated_types::influxdata::iox::ingester::v1::WriteRequest;
 use hashbrown::HashMap;
 use hyper::{Body, Request, Response};
@@ -151,9 +151,7 @@ impl TestContext {
 
         let retention_validator = RetentionValidator::new(Arc::clone(&ns_cache));
 
-        let partitioner = Partitioner::new(PartitionTemplate {
-            parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
-        });
+        let partitioner = Partitioner::new(PartitionTemplate::default());
 
         let namespace_resolver = NamespaceSchemaResolver::new(Arc::clone(&ns_cache));
         let namespace_resolver = NamespaceAutocreation::new(

From 58d9c40ffd01cae56f8ceb4c5853cbbe748edf77 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Thu, 27 Apr 2023 13:20:15 -0400
Subject: [PATCH 063/119] feat: If namespace or table partition templates are
 specified, use those

---
 .../src/components/df_planner/query_chunk.rs  |  11 +-
 .../src/components/namespaces_source/mock.rs  |  11 +-
 .../parquet_file_sink/object_store.rs         |   4 +-
 .../partition_info_source/sub_sources.rs      |   4 +-
 compactor/src/partition_info.rs               |   8 +-
 compactor/src/test_utils.rs                   |  12 +-
 compactor/tests/layouts/backfill.rs           |  94 +++---
 compactor/tests/layouts/knobs.rs              |  76 ++---
 compactor_test_utils/src/lib.rs               |   4 +-
 data_types/src/lib.rs                         | 154 ++++++++-
 .../aggregate_tsm_schema/update_catalog.rs    |  52 ++--
 ingester_test_ctx/src/lib.rs                  |  12 +-
 iox_catalog/src/interface.rs                  |  43 +--
 iox_catalog/src/lib.rs                        |  19 +-
 querier/src/cache/namespace.rs                |  15 +-
 querier/src/table/test_util.rs                |   4 +-
 router/benches/schema_validator.rs            |  10 +-
 router/src/dml_handlers/chain.rs              |  20 +-
 router/src/dml_handlers/fan_out.rs            |  14 +-
 router/src/dml_handlers/instrumentation.rs    |  16 +-
 router/src/dml_handlers/mock.rs               |   7 +-
 router/src/dml_handlers/nop.rs                |   5 +-
 router/src/dml_handlers/partitioner.rs        | 291 +++++++++++++++++-
 .../src/dml_handlers/retention_validation.rs  |  12 +-
 router/src/dml_handlers/rpc_write.rs          |   7 +-
 router/src/dml_handlers/schema_validation.rs  |  59 ++--
 router/src/dml_handlers/trait.rs              |  12 +-
 router/src/namespace_cache/memory.rs          |  79 +++--
 router/src/namespace_cache/metrics.rs         |   9 +-
 .../src/namespace_cache/read_through_cache.rs |  29 +-
 router/src/namespace_cache/sharded_cache.rs   |   1 +
 router/src/namespace_resolver.rs              |  27 +-
 router/src/namespace_resolver/mock.rs         |  22 +-
 .../src/namespace_resolver/ns_autocreation.rs |  27 +-
 router/src/server/http.rs                     |  44 ++-
 service_grpc_schema/src/lib.rs                |   4 +-
 36 files changed, 868 insertions(+), 350 deletions(-)

diff --git a/compactor/src/components/df_planner/query_chunk.rs b/compactor/src/components/df_planner/query_chunk.rs
index dadeda0b5b..abdc704127 100644
--- a/compactor/src/components/df_planner/query_chunk.rs
+++ b/compactor/src/components/df_planner/query_chunk.rs
@@ -174,20 +174,21 @@ fn to_queryable_parquet_chunk(
     partition_info: &PartitionInfo,
     store: ParquetStorage,
 ) -> QueryableParquetChunk {
-    let column_id_lookup = partition_info.table_schema.column_id_map();
+    let column_id_lookup = partition_info.table_info.column_id_map();
     let selection: Vec<_> = file
         .file
         .column_set
         .iter()
         .flat_map(|id| column_id_lookup.get(id).copied())
         .collect();
-    let table_schema: Schema = partition_info
-        .table_schema
+    let table_info: Schema = partition_info
+        .table_info
         .as_ref()
         .clone()
+        .schema()
         .try_into()
-        .expect("table schema is broken");
-    let schema = table_schema
+        .expect("table info is broken");
+    let schema = table_info
         .select_by_names(&selection)
         .expect("schema in-sync");
     let pk = schema.primary_key();
diff --git a/compactor/src/components/namespaces_source/mock.rs b/compactor/src/components/namespaces_source/mock.rs
index fab5243e5d..1ad9356a6d 100644
--- a/compactor/src/components/namespaces_source/mock.rs
+++ b/compactor/src/components/namespaces_source/mock.rs
@@ -49,7 +49,7 @@ impl NamespacesSource for MockNamespacesSource {
 mod tests {
     use std::collections::BTreeMap;
 
-    use data_types::{ColumnId, ColumnSchema, ColumnType, TableId, TableSchema};
+    use data_types::{ColumnId, ColumnSchema, ColumnType, TableId, TableInfo, TableSchema};
 
     use super::*;
 
@@ -128,7 +128,7 @@ mod tests {
             let tables = BTreeMap::from([
                 (
                     "table1".to_string(),
-                    TableSchema {
+                    TableInfo::new(TableSchema {
                         id: TableId::new(1),
                         columns: BTreeMap::from([
                             (
@@ -146,11 +146,11 @@ mod tests {
                                 },
                             ),
                         ]),
-                    },
+                    }),
                 ),
                 (
                     "table2".to_string(),
-                    TableSchema {
+                    TableInfo::new(TableSchema {
                         id: TableId::new(2),
                         columns: BTreeMap::from([
                             (
@@ -175,7 +175,7 @@ mod tests {
                                 },
                             ),
                         ]),
-                    },
+                    }),
                 ),
             ]);
 
@@ -196,6 +196,7 @@ mod tests {
                         max_columns_per_table: 10,
                         max_tables: 42,
                         retention_period_ns: None,
+                        partition_template: None,
                     },
                 },
             }
diff --git a/compactor/src/components/parquet_file_sink/object_store.rs b/compactor/src/components/parquet_file_sink/object_store.rs
index 16af1d3b96..4c6a45a651 100644
--- a/compactor/src/components/parquet_file_sink/object_store.rs
+++ b/compactor/src/components/parquet_file_sink/object_store.rs
@@ -80,8 +80,8 @@ impl ParquetFileSink for ObjectStoreParquetFileSink {
         let parquet_file =
             meta.to_parquet_file(partition.partition_id, file_size, &parquet_meta, |name| {
                 partition
-                    .table_schema
-                    .columns
+                    .table_info
+                    .columns()
                     .get(name)
                     .expect("unknown column")
                     .id
diff --git a/compactor/src/components/partition_info_source/sub_sources.rs b/compactor/src/components/partition_info_source/sub_sources.rs
index 32d5da0d4a..039833bea1 100644
--- a/compactor/src/components/partition_info_source/sub_sources.rs
+++ b/compactor/src/components/partition_info_source/sub_sources.rs
@@ -91,7 +91,7 @@ where
             .await
             .ok_or_else::<DynError, _>(|| String::from("Cannot find namespace schema").into())?;
 
-        let table_schema = namespace_schema
+        let table_info = namespace_schema
             .tables
             .get(&table.name)
             .ok_or_else::<DynError, _>(|| String::from("Cannot find table schema").into())?;
@@ -101,7 +101,7 @@ where
             namespace_id: table.namespace_id,
             namespace_name: namespace.name,
             table: Arc::new(table),
-            table_schema: Arc::new(table_schema.clone()),
+            table_info: Arc::new(table_info.clone()),
             sort_key: partition.sort_key(),
             partition_key: partition.partition_key,
         }))
diff --git a/compactor/src/partition_info.rs b/compactor/src/partition_info.rs
index ada8b2da31..05e426014b 100644
--- a/compactor/src/partition_info.rs
+++ b/compactor/src/partition_info.rs
@@ -2,7 +2,7 @@
 
 use std::sync::Arc;
 
-use data_types::{NamespaceId, PartitionId, PartitionKey, Table, TableSchema};
+use data_types::{NamespaceId, PartitionId, PartitionKey, Table, TableInfo};
 use schema::sort::SortKey;
 
 /// Information about the Partition being compacted
@@ -20,8 +20,8 @@ pub struct PartitionInfo {
     /// Table.
     pub table: Arc<Table>,
 
-    /// Table schema
-    pub table_schema: Arc<TableSchema>,
+    /// Table info
+    pub table_info: Arc<TableInfo>,
 
     /// Sort key of the partition
     pub sort_key: Option<SortKey>,
@@ -33,6 +33,6 @@ pub struct PartitionInfo {
 impl PartitionInfo {
     /// Returns number of columns in the table
     pub fn column_count(&self) -> usize {
-        self.table_schema.column_count()
+        self.table_info.column_count()
     }
 }
diff --git a/compactor/src/test_utils.rs b/compactor/src/test_utils.rs
index c61826210a..90d8a30267 100644
--- a/compactor/src/test_utils.rs
+++ b/compactor/src/test_utils.rs
@@ -2,7 +2,7 @@ use std::{collections::BTreeMap, sync::Arc};
 
 use data_types::{
     ColumnId, ColumnSchema, ColumnType, NamespaceId, PartitionId, PartitionKey, Table, TableId,
-    TableSchema,
+    TableInfo, TableSchema,
 };
 
 use crate::PartitionInfo;
@@ -27,10 +27,10 @@ impl PartitionInfoBuilder {
                     namespace_id,
                     name: String::from("table"),
                 }),
-                table_schema: Arc::new(TableSchema {
+                table_info: Arc::new(TableInfo::new(TableSchema {
                     id: table_id,
                     columns: BTreeMap::new(),
-                }),
+                })),
                 sort_key: None,
                 partition_key: PartitionKey::from("key"),
             },
@@ -52,11 +52,11 @@ impl PartitionInfoBuilder {
             columns.insert(i.to_string(), col);
         }
 
-        let table_schema = Arc::new(TableSchema {
+        let table_info = Arc::new(TableInfo::new(TableSchema {
             id: self.inner.table.id,
             columns,
-        });
-        self.inner.table_schema = table_schema;
+        }));
+        self.inner.table_info = table_info;
 
         self
     }
diff --git a/compactor/tests/layouts/backfill.rs b/compactor/tests/layouts/backfill.rs
index ec1c5c7e33..e8e72442ee 100644
--- a/compactor/tests/layouts/backfill.rs
+++ b/compactor/tests/layouts/backfill.rs
@@ -766,13 +766,13 @@ async fn random_backfill_empty_partition() {
     - "L0                                                                                                                 "
     - "L0.?[357,658] 1.04us 4mb |----------------------------------------L0.?----------------------------------------|    "
     - "L0.?[659,670] 1.04us 144kb                                                                                      |L0.?|"
-    - "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2mb total:"
-    - "L0, all files 2mb                                                                                                  "
-    - "L0.168[173,356] 1.04us   |-----------------------------------------L0.168-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 2mb total:"
+    - "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3mb total:"
+    - "L0, all files 3mb                                                                                                  "
+    - "L0.165[42,356] 1.04us    |-----------------------------------------L0.165-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 3mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[173,329] 1.04us 2mb |-----------------------------------L0.?-----------------------------------|              "
-    - "L0.?[330,356] 1.04us 356kb                                                                             |---L0.?---| "
+    - "L0.?[42,329] 1.04us 3mb  |--------------------------------------L0.?--------------------------------------|        "
+    - "L0.?[330,356] 1.04us 293kb                                                                                  |L0.?-| "
     - "**** Simulation run 73, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4mb total:"
     - "L0, all files 4mb                                                                                                  "
     - "L0.169[357,670] 1.04us   |-----------------------------------------L0.169-----------------------------------------|"
@@ -822,13 +822,13 @@ async fn random_backfill_empty_partition() {
     - "L0                                                                                                                 "
     - "L0.?[173,329] 1.05us 2mb |-----------------------------------L0.?-----------------------------------|              "
     - "L0.?[330,356] 1.05us 356kb                                                                             |---L0.?---| "
-    - "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3mb total:"
-    - "L0, all files 3mb                                                                                                  "
-    - "L0.165[42,356] 1.04us    |-----------------------------------------L0.165-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 3mb total:"
+    - "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4mb total:"
+    - "L0, all files 4mb                                                                                                  "
+    - "L0.180[357,670] 1.05us   |-----------------------------------------L0.180-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 4mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[42,329] 1.04us 3mb  |--------------------------------------L0.?--------------------------------------|        "
-    - "L0.?[330,356] 1.04us 293kb                                                                                  |L0.?-| "
+    - "L0.?[357,658] 1.05us 4mb |----------------------------------------L0.?----------------------------------------|    "
+    - "L0.?[659,670] 1.05us 158kb                                                                                      |L0.?|"
     - "**** Simulation run 81, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3mb total:"
     - "L0, all files 3mb                                                                                                  "
     - "L0.166[357,670] 1.04us   |-----------------------------------------L0.166-----------------------------------------|"
@@ -836,13 +836,13 @@ async fn random_backfill_empty_partition() {
     - "L0                                                                                                                 "
     - "L0.?[357,658] 1.04us 3mb |----------------------------------------L0.?----------------------------------------|    "
     - "L0.?[659,670] 1.04us 130kb                                                                                      |L0.?|"
-    - "**** Simulation run 82, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4mb total:"
-    - "L0, all files 4mb                                                                                                  "
-    - "L0.180[357,670] 1.05us   |-----------------------------------------L0.180-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 4mb total:"
+    - "**** Simulation run 82, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2mb total:"
+    - "L0, all files 2mb                                                                                                  "
+    - "L0.168[173,356] 1.04us   |-----------------------------------------L0.168-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 2mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[357,658] 1.05us 4mb |----------------------------------------L0.?----------------------------------------|    "
-    - "L0.?[659,670] 1.05us 158kb                                                                                      |L0.?|"
+    - "L0.?[173,329] 1.04us 2mb |-----------------------------------L0.?-----------------------------------|              "
+    - "L0.?[330,356] 1.04us 356kb                                                                             |---L0.?---| "
     - "**** Simulation run 83, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 5mb total:"
     - "L0, all files 5mb                                                                                                  "
     - "L0.182[50,356] 1.05us    |-----------------------------------------L0.182-----------------------------------------|"
@@ -1012,7 +1012,7 @@ async fn random_backfill_empty_partition() {
     - "L0.?[967,986] 1.05us 218kb                                                                                    |L0.?|"
     - "**** Simulation run 105, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 4mb total:"
     - "L0, all files 4mb                                                                                                  "
-    - "L0.255[357,658] 1.05us   |-----------------------------------------L0.255-----------------------------------------|"
+    - "L0.251[357,658] 1.05us   |-----------------------------------------L0.251-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 4mb total:"
     - "L0                                                                                                                 "
     - "L0.?[357,648] 1.05us 4mb |----------------------------------------L0.?-----------------------------------------|   "
@@ -1039,7 +1039,7 @@ async fn random_backfill_empty_partition() {
     - "L0.?[671,966] 1.05us 3mb |---------------------------------------L0.?---------------------------------------|      "
     - "L0.?[967,986] 1.05us 218kb                                                                                    |L0.?|"
     - "Committing partition 1:"
-    - "  Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.253, L0.255, L0.261, L0.265"
+    - "  Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.251, L0.253, L0.261, L0.265"
     - "  Creating 40 files"
     - "**** Simulation run 109, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[263]). 2 Input Files, 103mb total:"
     - "L0                                                                                                                 "
@@ -1132,14 +1132,14 @@ async fn random_backfill_empty_partition() {
     - "L0.?[264,329] 1.04us 790kb                                                                  |--------L0.?---------| "
     - "**** Simulation run 121, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 3mb total:"
     - "L0, all files 3mb                                                                                                  "
-    - "L0.251[42,329] 1.04us    |-----------------------------------------L0.251-----------------------------------------|"
+    - "L0.235[42,329] 1.04us    |-----------------------------------------L0.235-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 3mb total:"
     - "L0                                                                                                                 "
     - "L0.?[42,263] 1.04us 2mb  |-------------------------------L0.?--------------------------------|                     "
     - "L0.?[264,329] 1.04us 716kb                                                                     |-------L0.?-------| "
     - "**** Simulation run 122, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 2mb total:"
     - "L0, all files 2mb                                                                                                  "
-    - "L0.235[173,329] 1.04us   |-----------------------------------------L0.235-----------------------------------------|"
+    - "L0.255[173,329] 1.04us   |-----------------------------------------L0.255-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 2mb total:"
     - "L0                                                                                                                 "
     - "L0.?[173,263] 1.04us 1mb |----------------------L0.?-----------------------|                                       "
@@ -1194,7 +1194,7 @@ async fn random_backfill_empty_partition() {
     - "L0.?[42,263] 1.05us 2mb  |-------------------------------L0.?--------------------------------|                     "
     - "L0.?[264,329] 1.05us 716kb                                                                     |-------L0.?-------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.249, L0.251, L0.257, L0.259, L0.263"
+    - "  Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.249, L0.255, L0.257, L0.259, L0.263"
     - "  Creating 40 files"
     - "**** Simulation run 130, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[570, 876]). 9 Input Files, 230mb total:"
     - "L0                                                                                                                 "
@@ -2057,7 +2057,7 @@ async fn random_backfill_empty_partition() {
     - "L0.334[42,263] 1.04us 2mb|------L0.334-------|                                                                     "
     - "L0.460[264,295] 1.04us 341kb                     |L0.460|                                                             "
     - "L0.461[296,329] 1.04us 374kb                        |L0.461|                                                          "
-    - "L0.252[330,356] 1.04us 293kb                           |L0.252|                                                       "
+    - "L0.236[330,356] 1.04us 293kb                           |L0.236|                                                       "
     - "L0.389[357,570] 1.04us 2mb                              |------L0.389------|                                        "
     - "L0.525[571,583] 1.04us 132kb                                                  |L0.525|                                "
     - "L0.526[584,590] 1.04us 77kb                                                   |L0.526|                               "
@@ -2071,7 +2071,7 @@ async fn random_backfill_empty_partition() {
     - "L0.336[173,263] 1.04us 1mb            |L0.336|                                                                      "
     - "L0.464[264,295] 1.04us 415kb                     |L0.464|                                                             "
     - "L0.465[296,329] 1.04us 455kb                        |L0.465|                                                          "
-    - "L0.236[330,356] 1.04us 356kb                           |L0.236|                                                       "
+    - "L0.256[330,356] 1.04us 356kb                           |L0.256|                                                       "
     - "L0.393[357,570] 1.04us 3mb                              |------L0.393------|                                        "
     - "L0.529[571,583] 1.04us 160kb                                                  |L0.529|                                "
     - "L0.530[584,590] 1.04us 93kb                                                   |L0.530|                               "
@@ -2125,7 +2125,7 @@ async fn random_backfill_empty_partition() {
     - "L0.544[584,590] 1.05us 93kb                                                   |L0.544|                               "
     - "L0.479[591,648] 1.05us 774kb                                                    |L0.479|                              "
     - "L0.303[649,658] 1.05us 132kb                                                         |L0.303|                         "
-    - "L0.256[659,670] 1.05us 158kb                                                          |L0.256|                        "
+    - "L0.252[659,670] 1.05us 158kb                                                          |L0.252|                        "
     - "L0.545[671,870] 1.05us 3mb                                                           |-----L0.545-----|             "
     - "L0.546[871,876] 1.05us 80kb                                                                               |L0.546|   "
     - "L0.410[877,950] 1.05us 982kb                                                                               |L0.410|   "
@@ -3646,13 +3646,13 @@ async fn random_backfill_over_l2s() {
     - "L0                                                                                                                 "
     - "L0.?[295,334] 1.03us 430kb|-------------------------L0.?--------------------------|                                 "
     - "L0.?[335,356] 1.03us 243kb                                                           |------------L0.?------------| "
-    - "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677kb total:"
-    - "L0, all files 677kb                                                                                                "
-    - "L0.328[592,629] 1.03us   |-----------------------------------------L0.328-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 677kb total:"
+    - "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1mb total:"
+    - "L0, all files 1mb                                                                                                  "
+    - "L0.324[592,670] 1.03us   |-----------------------------------------L0.324-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[592,626] 1.03us 622kb|--------------------------------------L0.?--------------------------------------|        "
-    - "L0.?[627,629] 1.03us 55kb                                                                                     |L0.?|"
+    - "L0.?[592,626] 1.03us 455kb|----------------L0.?-----------------|                                                   "
+    - "L0.?[627,670] 1.03us 589kb                                        |---------------------L0.?----------------------| "
     - "**** Simulation run 144, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.283[295,356] 1.03us   |-----------------------------------------L0.283-----------------------------------------|"
@@ -3702,13 +3702,13 @@ async fn random_backfill_over_l2s() {
     - "L0                                                                                                                 "
     - "L0.?[295,334] 1.04us 522kb|-------------------------L0.?--------------------------|                                 "
     - "L0.?[335,356] 1.04us 295kb                                                           |------------L0.?------------| "
-    - "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1mb total:"
-    - "L0, all files 1mb                                                                                                  "
-    - "L0.324[592,670] 1.03us   |-----------------------------------------L0.324-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
+    - "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677kb total:"
+    - "L0, all files 677kb                                                                                                "
+    - "L0.342[592,629] 1.04us   |-----------------------------------------L0.342-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 677kb total:"
     - "L0                                                                                                                 "
-    - "L0.?[592,626] 1.03us 455kb|----------------L0.?-----------------|                                                   "
-    - "L0.?[627,670] 1.03us 589kb                                        |---------------------L0.?----------------------| "
+    - "L0.?[592,626] 1.04us 622kb|--------------------------------------L0.?--------------------------------------|        "
+    - "L0.?[627,629] 1.04us 55kb                                                                                     |L0.?|"
     - "**** Simulation run 152, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817kb total:"
     - "L0, all files 817kb                                                                                                "
     - "L0.281[295,356] 1.03us   |-----------------------------------------L0.281-----------------------------------------|"
@@ -3718,11 +3718,11 @@ async fn random_backfill_over_l2s() {
     - "L0.?[335,356] 1.03us 295kb                                                           |------------L0.?------------| "
     - "**** Simulation run 153, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677kb total:"
     - "L0, all files 677kb                                                                                                "
-    - "L0.342[592,629] 1.04us   |-----------------------------------------L0.342-----------------------------------------|"
+    - "L0.328[592,629] 1.03us   |-----------------------------------------L0.328-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 677kb total:"
     - "L0                                                                                                                 "
-    - "L0.?[592,626] 1.04us 622kb|--------------------------------------L0.?--------------------------------------|        "
-    - "L0.?[627,629] 1.04us 55kb                                                                                     |L0.?|"
+    - "L0.?[592,626] 1.03us 622kb|--------------------------------------L0.?--------------------------------------|        "
+    - "L0.?[627,629] 1.03us 55kb                                                                                     |L0.?|"
     - "**** Simulation run 154, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.291[295,356] 1.04us   |-----------------------------------------L0.291-----------------------------------------|"
@@ -3910,7 +3910,7 @@ async fn random_backfill_over_l2s() {
     - "L0.?[904,986] 1.03us 918kb              |----------------------------------L0.?-----------------------------------| "
     - "**** Simulation run 178, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 455kb total:"
     - "L0, all files 455kb                                                                                                "
-    - "L0.404[592,626] 1.03us   |-----------------------------------------L0.404-----------------------------------------|"
+    - "L0.388[592,626] 1.03us   |-----------------------------------------L0.388-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 455kb total:"
     - "L0                                                                                                                 "
     - "L0.?[592,619] 1.03us 362kb|--------------------------------L0.?---------------------------------|                   "
@@ -3924,7 +3924,7 @@ async fn random_backfill_over_l2s() {
     - "L0.?[904,950] 1.03us 636kb                       |------------------------------L0.?------------------------------| "
     - "**** Simulation run 180, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 622kb total:"
     - "L0, all files 622kb                                                                                                "
-    - "L0.388[592,626] 1.03us   |-----------------------------------------L0.388-----------------------------------------|"
+    - "L0.408[592,626] 1.03us   |-----------------------------------------L0.408-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 622kb total:"
     - "L0                                                                                                                 "
     - "L0.?[592,619] 1.03us 494kb|--------------------------------L0.?---------------------------------|                   "
@@ -3973,7 +3973,7 @@ async fn random_backfill_over_l2s() {
     - "L0.?[904,950] 1.04us 636kb                       |------------------------------L0.?------------------------------| "
     - "**** Simulation run 187, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 622kb total:"
     - "L0, all files 622kb                                                                                                "
-    - "L0.408[592,626] 1.04us   |-----------------------------------------L0.408-----------------------------------------|"
+    - "L0.404[592,626] 1.04us   |-----------------------------------------L0.404-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 622kb total:"
     - "L0                                                                                                                 "
     - "L0.?[592,619] 1.04us 494kb|--------------------------------L0.?---------------------------------|                   "
@@ -4253,7 +4253,7 @@ async fn random_backfill_over_l2s() {
     - "L0.323[358,591] 1.03us 3mb                              |-------L0.323-------|                                      "
     - "L0.459[592,619] 1.03us 362kb                                                    |L0.459|                              "
     - "L0.460[620,626] 1.03us 94kb                                                       |L0.460|                           "
-    - "L0.405[627,670] 1.03us 589kb                                                       |L0.405|                           "
+    - "L0.389[627,670] 1.03us 589kb                                                       |L0.389|                           "
     - "L0.218[671,672] 1.03us 13kb                                                           |L0.218|                       "
     - "L0.325[673,887] 1.03us 3mb                                                            |------L0.325------|          "
     - "L0.461[888,903] 1.03us 203kb                                                                                |L0.461|  "
@@ -4266,7 +4266,7 @@ async fn random_backfill_over_l2s() {
     - "L0.327[358,591] 1.03us 4mb                              |-------L0.327-------|                                      "
     - "L0.463[592,619] 1.03us 494kb                                                    |L0.463|                              "
     - "L0.464[620,626] 1.03us 128kb                                                       |L0.464|                           "
-    - "L0.389[627,629] 1.03us 55kb                                                       |L0.389|                           "
+    - "L0.409[627,629] 1.03us 55kb                                                       |L0.409|                           "
     - "L0.521[76,275] 1.04us 2mb   |-----L0.521-----|                                                                     "
     - "L0.522[276,294] 1.04us 227kb                      |L0.522|                                                            "
     - "L0.394[295,334] 1.04us 474kb                        |L0.394|                                                          "
@@ -4314,7 +4314,7 @@ async fn random_backfill_over_l2s() {
     - "L0.341[358,591] 1.04us 4mb                              |-------L0.341-------|                                      "
     - "L0.477[592,619] 1.04us 494kb                                                    |L0.477|                              "
     - "L0.478[620,626] 1.04us 128kb                                                       |L0.478|                           "
-    - "L0.409[627,629] 1.04us 55kb                                                       |L0.409|                           "
+    - "L0.405[627,629] 1.04us 55kb                                                       |L0.405|                           "
     - "L0.529[76,275] 1.04us 2mb   |-----L0.529-----|                                                                     "
     - "L0.530[276,294] 1.04us 227kb                      |L0.530|                                                            "
     - "L0.414[295,334] 1.04us 474kb                        |L0.414|                                                          "
diff --git a/compactor/tests/layouts/knobs.rs b/compactor/tests/layouts/knobs.rs
index 2326390b7e..e6a0531f0c 100644
--- a/compactor/tests/layouts/knobs.rs
+++ b/compactor/tests/layouts/knobs.rs
@@ -865,11 +865,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[171444,200000] 6ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
+    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.80[171443,200000] 10ns|-----------------------------------------L0.80------------------------------------------|"
@@ -879,18 +879,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[171444,200000] 10ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
-    - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
-    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
-    - "L0, all files 1mb                                                                                                  "
     - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171443,171443] 8ns 0b|L0.?|                                                                                    "
     - "L0.?[171444,200000] 8ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
+    - "L0, all files 1mb                                                                                                  "
+    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
+    - "L0                                                                                                                 "
+    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "Committing partition 1:"
     - "  Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
     - "  Creating 55 files"
@@ -1227,7 +1227,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[156351,160867] 7ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|"
+    - "L0.172[171444,200000] 7ns|-----------------------------------------L0.172-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 7ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -1241,7 +1241,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[156351,160867] 8ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|"
+    - "L0.176[171444,200000] 8ns|-----------------------------------------L0.176-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 8ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -1255,7 +1255,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[156351,160867] 9ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 101, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.172[171444,200000] 9ns|-----------------------------------------L0.172-----------------------------------------|"
+    - "L0.178[171444,200000] 9ns|-----------------------------------------L0.178-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 9ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -1743,7 +1743,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 20mb total:"
     - "L0                                                                                                                 "
     - "L0.190[160868,171442] 7ns 488kb                              |----L0.190----|                                            "
-    - "L0.175[171443,171443] 7ns 0b                                              |L0.175|                                    "
+    - "L0.171[171443,171443] 7ns 0b                                              |L0.171|                                    "
     - "L0.309[171444,185000] 7ns 625kb                                              |------L0.309------|                        "
     - "L0.310[185001,198370] 7ns 617kb                                                                   |------L0.310------|   "
     - "L0.264[198371,200000] 7ns 75kb                                                                                       |L0.264|"
@@ -1756,7 +1756,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------|                                            "
     - "L1.?[170978,200000] 7ns 10mb                                              |------------------L1.?-------------------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
+    - "  Soft Deleting 8 files: L0.171, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
     - "  Creating 2 files"
     - "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 488kb total:"
     - "L0, all files 488kb                                                                                                "
@@ -1924,7 +1924,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.266[156351,160867] 8ns 208kb                       |L0.266|                                                           "
     - "L0.387[160868,170977] 8ns 466kb                              |---L0.387----|                                             "
     - "L0.388[170978,171442] 8ns 21kb                                              |L0.388|                                    "
-    - "L0.177[171443,171443] 8ns 0b                                              |L0.177|                                    "
+    - "L0.175[171443,171443] 8ns 0b                                              |L0.175|                                    "
     - "L0.313[171444,185000] 8ns 625kb                                              |------L0.313------|                        "
     - "L0.314[185001,198370] 8ns 617kb                                                                   |------L0.314------|   "
     - "L0.268[198371,200000] 8ns 75kb                                                                                       |L0.268|"
@@ -1937,7 +1937,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L1.?[167315,194064] 8ns 10mb                                        |-----------------L1.?-----------------|          "
     - "L1.?[194065,200000] 8ns 2mb                                                                                 |-L1.?-| "
     - "Committing partition 1:"
-    - "  Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
+    - "  Soft Deleting 13 files: L0.159, L0.175, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
     - "  Creating 3 files"
     - "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466kb total:"
     - "L0, all files 466kb                                                                                                "
@@ -2118,7 +2118,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0                                                                                                                 "
     - "L0.423[167315,170977] 9ns 169kb|-L0.423-|                                                                                "
     - "L0.390[170978,171442] 9ns 21kb          |L0.390|                                                                        "
-    - "L0.171[171443,171443] 9ns 0b           |L0.171|                                                                       "
+    - "L0.177[171443,171443] 9ns 0b           |L0.177|                                                                       "
     - "L0.317[171444,185000] 9ns 625kb           |--------------L0.317---------------|                                          "
     - "L0.424[185001,194064] 9ns 418kb                                                |--------L0.424--------|                  "
     - "L0.425[194065,198370] 9ns 199kb                                                                         |-L0.425--|      "
@@ -2131,7 +2131,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L1.?[167315,191189] 9ns 10mb|-----------------------------L1.?------------------------------|                         "
     - "L1.?[191190,200000] 9ns 4mb                                                                 |---------L1.?---------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 9 files: L0.171, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
+    - "  Soft Deleting 9 files: L0.177, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
     - "  Creating 2 files"
     - "**** Simulation run 189, type=split(ReduceOverlap)(split_times=[191189]). 1 Input Files, 418kb total:"
     - "L0, all files 418kb                                                                                                "
@@ -2819,11 +2819,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[171444,200000] 6ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
+    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.80[171443,200000] 10ns|-----------------------------------------L0.80------------------------------------------|"
@@ -2833,18 +2833,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[171444,200000] 10ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
-    - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
-    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
-    - "L0, all files 1mb                                                                                                  "
     - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171443,171443] 8ns 0b|L0.?|                                                                                    "
     - "L0.?[171444,200000] 8ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
+    - "L0, all files 1mb                                                                                                  "
+    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
+    - "L0                                                                                                                 "
+    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "Committing partition 1:"
     - "  Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
     - "  Creating 55 files"
@@ -3181,7 +3181,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[156351,160867] 7ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|"
+    - "L0.172[171444,200000] 7ns|-----------------------------------------L0.172-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 7ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -3195,7 +3195,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[156351,160867] 8ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|"
+    - "L0.176[171444,200000] 8ns|-----------------------------------------L0.176-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 8ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -3209,7 +3209,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[156351,160867] 9ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 101, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.172[171444,200000] 9ns|-----------------------------------------L0.172-----------------------------------------|"
+    - "L0.178[171444,200000] 9ns|-----------------------------------------L0.178-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 9ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -3697,7 +3697,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 20mb total:"
     - "L0                                                                                                                 "
     - "L0.190[160868,171442] 7ns 488kb                              |----L0.190----|                                            "
-    - "L0.175[171443,171443] 7ns 0b                                              |L0.175|                                    "
+    - "L0.171[171443,171443] 7ns 0b                                              |L0.171|                                    "
     - "L0.309[171444,185000] 7ns 625kb                                              |------L0.309------|                        "
     - "L0.310[185001,198370] 7ns 617kb                                                                   |------L0.310------|   "
     - "L0.264[198371,200000] 7ns 75kb                                                                                       |L0.264|"
@@ -3710,7 +3710,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------|                                            "
     - "L1.?[170978,200000] 7ns 10mb                                              |------------------L1.?-------------------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
+    - "  Soft Deleting 8 files: L0.171, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
     - "  Creating 2 files"
     - "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 488kb total:"
     - "L0, all files 488kb                                                                                                "
@@ -3878,7 +3878,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.266[156351,160867] 8ns 208kb                       |L0.266|                                                           "
     - "L0.387[160868,170977] 8ns 466kb                              |---L0.387----|                                             "
     - "L0.388[170978,171442] 8ns 21kb                                              |L0.388|                                    "
-    - "L0.177[171443,171443] 8ns 0b                                              |L0.177|                                    "
+    - "L0.175[171443,171443] 8ns 0b                                              |L0.175|                                    "
     - "L0.313[171444,185000] 8ns 625kb                                              |------L0.313------|                        "
     - "L0.314[185001,198370] 8ns 617kb                                                                   |------L0.314------|   "
     - "L0.268[198371,200000] 8ns 75kb                                                                                       |L0.268|"
@@ -3891,7 +3891,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L1.?[167315,194064] 8ns 10mb                                        |-----------------L1.?-----------------|          "
     - "L1.?[194065,200000] 8ns 2mb                                                                                 |-L1.?-| "
     - "Committing partition 1:"
-    - "  Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
+    - "  Soft Deleting 13 files: L0.159, L0.175, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
     - "  Creating 3 files"
     - "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466kb total:"
     - "L0, all files 466kb                                                                                                "
@@ -4072,7 +4072,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0                                                                                                                 "
     - "L0.423[167315,170977] 9ns 169kb|-L0.423-|                                                                                "
     - "L0.390[170978,171442] 9ns 21kb          |L0.390|                                                                        "
-    - "L0.171[171443,171443] 9ns 0b           |L0.171|                                                                       "
+    - "L0.177[171443,171443] 9ns 0b           |L0.177|                                                                       "
     - "L0.317[171444,185000] 9ns 625kb           |--------------L0.317---------------|                                          "
     - "L0.424[185001,194064] 9ns 418kb                                                |--------L0.424--------|                  "
     - "L0.425[194065,198370] 9ns 199kb                                                                         |-L0.425--|      "
@@ -4085,7 +4085,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L1.?[167315,191189] 9ns 10mb|-----------------------------L1.?------------------------------|                         "
     - "L1.?[191190,200000] 9ns 4mb                                                                 |---------L1.?---------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 9 files: L0.171, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
+    - "  Soft Deleting 9 files: L0.177, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
     - "  Creating 2 files"
     - "**** Simulation run 189, type=split(ReduceOverlap)(split_times=[191189]). 1 Input Files, 418kb total:"
     - "L0, all files 418kb                                                                                                "
diff --git a/compactor_test_utils/src/lib.rs b/compactor_test_utils/src/lib.rs
index e03cadc009..bd85aaf333 100644
--- a/compactor_test_utils/src/lib.rs
+++ b/compactor_test_utils/src/lib.rs
@@ -37,7 +37,7 @@ use compactor::{
     config::{CompactionType, Config, PartitionsSourceConfig},
     hardcoded_components, Components, PanicDataFusionPlanner, PartitionInfo,
 };
-use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId};
+use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId, TableInfo};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion_util::config::register_iox_object_store;
 use futures::TryStreamExt;
@@ -575,7 +575,7 @@ impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
             namespace_id: self.ns.namespace.id,
             namespace_name: self.ns.namespace.name.clone(),
             table: Arc::new(self.table.table.clone()),
-            table_schema: Arc::new(self.table.catalog_schema().await),
+            table_info: Arc::new(TableInfo::from(&self.table.table)),
             sort_key: self.partition.partition.sort_key(),
             partition_key: self.partition.partition.partition_key.clone(),
         });
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 69cc1e24b1..1b1f390f64 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -330,7 +330,7 @@ pub struct NamespaceSchema {
     /// the namespace id
     pub id: NamespaceId,
     /// the tables in the namespace by name
-    pub tables: BTreeMap<String, TableSchema>,
+    pub tables: BTreeMap<String, TableInfo>,
     /// the number of columns per table this namespace allows
     pub max_columns_per_table: usize,
     /// The maximum number of tables permitted in this namespace.
@@ -338,25 +338,34 @@ pub struct NamespaceSchema {
     /// The retention period in ns.
     /// None represents infinite duration (i.e. never drop data).
     pub retention_period_ns: Option<i64>,
+    /// The optionally-specified partition template to use for writes in this namespace.
+    pub partition_template: Option<Arc<PartitionTemplate>>,
 }
 
-impl NamespaceSchema {
-    /// Create a new `NamespaceSchema`
-    pub fn new(
-        id: NamespaceId,
-        max_columns_per_table: i32,
-        max_tables: i32,
-        retention_period_ns: Option<i64>,
-    ) -> Self {
+impl From<&Namespace> for NamespaceSchema {
+    fn from(namespace: &Namespace) -> Self {
+        let &Namespace {
+            id,
+            retention_period_ns,
+            max_tables,
+            max_columns_per_table,
+            ..
+        } = namespace;
+
         Self {
             id,
             tables: BTreeMap::new(),
             max_columns_per_table: max_columns_per_table as usize,
             max_tables: max_tables as usize,
             retention_period_ns,
+
+            // TODO: Store and retrieve PartitionTemplate from the database
+            partition_template: None,
         }
     }
+}
 
+impl NamespaceSchema {
     /// Estimated Size in bytes including `self`.
     pub fn size(&self) -> usize {
         std::mem::size_of_val(self)
@@ -379,6 +388,108 @@ pub struct Table {
     pub name: String,
 }
 
+/// Useful table information to cache, including the table's partition template if any, and the
+/// table's columns.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct TableInfo {
+    table_schema: TableSchema,
+    /// This table's partition template
+    pub partition_template: Option<Arc<PartitionTemplate>>,
+}
+
+impl TableInfo {
+    /// Create new table info with the given table schema and no partition template specified.
+    pub fn new(table_schema: TableSchema) -> Self {
+        Self {
+            table_schema,
+            partition_template: None,
+        }
+    }
+
+    /// This table's ID
+    pub fn id(&self) -> TableId {
+        self.table_schema.id
+    }
+
+    /// This table's schema
+    pub fn schema(&self) -> &TableSchema {
+        &self.table_schema
+    }
+
+    /// This table's columns
+    pub fn columns(&self) -> &BTreeMap<String, ColumnSchema> {
+        &self.table_schema.columns
+    }
+
+    /// Mutable access to his table's columns
+    pub fn columns_mut(&mut self) -> &mut BTreeMap<String, ColumnSchema> {
+        &mut self.table_schema.columns
+    }
+
+    /// Add `col` to this table schema.
+    ///
+    /// # Panics
+    ///
+    /// This method panics if a column of the same name already exists in
+    /// `self`.
+    pub fn add_column(&mut self, col: &Column) {
+        let old = self
+            .table_schema
+            .columns
+            .insert(col.name.clone(), ColumnSchema::from(col));
+        assert!(old.is_none());
+    }
+
+    /// Estimated Size in bytes including `self`.
+    pub fn size(&self) -> usize {
+        size_of_val(self)
+            + size_of_val(&self.partition_template)
+            + self
+                .table_schema
+                .columns
+                .iter()
+                .map(|(k, v)| size_of_val(k) + k.capacity() + size_of_val(v))
+                .sum::<usize>()
+    }
+
+    /// Create `ID->name` map for columns.
+    pub fn column_id_map(&self) -> HashMap<ColumnId, &str> {
+        self.table_schema
+            .columns
+            .iter()
+            .map(|(name, c)| (c.id, name.as_str()))
+            .collect()
+    }
+
+    /// Return the set of column names for this table. Used in combination with a write operation's
+    /// column names to determine whether a write would exceed the max allowed columns.
+    pub fn column_names(&self) -> BTreeSet<&str> {
+        self.table_schema
+            .columns
+            .keys()
+            .map(|name| name.as_str())
+            .collect()
+    }
+
+    /// Return number of columns of the table
+    pub fn column_count(&self) -> usize {
+        self.table_schema.columns.len()
+    }
+}
+
+impl From<&Table> for TableInfo {
+    fn from(table: &Table) -> Self {
+        let &Table { id, .. } = table;
+
+        Self {
+            table_schema: TableSchema::new(id),
+
+            // TODO: Store and retrieve PartitionTemplate from the database
+            partition_template: None,
+        }
+    }
+}
+
 /// Column definitions for a table
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct TableSchema {
@@ -599,10 +710,10 @@ impl From<ColumnType> for InfluxColumnType {
     }
 }
 
-impl TryFrom<TableSchema> for Schema {
+impl TryFrom<&TableSchema> for Schema {
     type Error = schema::builder::Error;
 
-    fn try_from(value: TableSchema) -> Result<Self, Self::Error> {
+    fn try_from(value: &TableSchema) -> Result<Self, Self::Error> {
         let mut builder = SchemaBuilder::new();
 
         for (column_name, column_schema) in &value.columns {
@@ -614,6 +725,14 @@ impl TryFrom<TableSchema> for Schema {
     }
 }
 
+impl TryFrom<TableSchema> for Schema {
+    type Error = schema::builder::Error;
+
+    fn try_from(value: TableSchema) -> Result<Self, Self::Error> {
+        Self::try_from(&value)
+    }
+}
+
 impl PartialEq<InfluxColumnType> for ColumnType {
     fn eq(&self, got: &InfluxColumnType) -> bool {
         match self {
@@ -2918,13 +3037,24 @@ mod tests {
             max_columns_per_table: 4,
             max_tables: 42,
             retention_period_ns: None,
+            partition_template: None,
         };
         let schema2 = NamespaceSchema {
             id: NamespaceId::new(1),
-            tables: BTreeMap::from([(String::from("foo"), TableSchema::new(TableId::new(1)))]),
+            tables: BTreeMap::from([(
+                String::from("foo"),
+                TableInfo {
+                    table_schema: TableSchema {
+                        id: TableId::new(1),
+                        columns: BTreeMap::new(),
+                    },
+                    partition_template: None,
+                },
+            )]),
             max_columns_per_table: 4,
             max_tables: 42,
             retention_period_ns: None,
+            partition_template: None,
         };
         assert!(schema1.size() < schema2.size());
     }
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index 1232de3255..aab07b00e9 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -2,7 +2,7 @@ use crate::{AggregateTSMMeasurement, AggregateTSMSchema};
 use chrono::{format::StrftimeItems, offset::FixedOffset, DateTime, Duration};
 use data_types::{
     ColumnType, Namespace, NamespaceName, NamespaceSchema, OrgBucketMappingError, Partition,
-    PartitionKey, TableSchema,
+    PartitionKey, TableInfo,
 };
 use iox_catalog::interface::{
     get_schema_by_name, CasFailure, Catalog, RepoCollection, SoftDeletedRows,
@@ -126,10 +126,10 @@ where
                     .tables()
                     .create_or_get(measurement_name, iox_schema.id)
                     .await
-                    .map(|t| TableSchema::new(t.id))?;
+                    .map(|t| TableInfo::from(&t))?;
                 let time_col = repos
                     .columns()
-                    .create_or_get("time", table.id, ColumnType::Time)
+                    .create_or_get("time", table.id(), ColumnType::Time)
                     .await?;
                 table.add_column(&time_col);
                 table
@@ -140,7 +140,7 @@ where
         // fields and tags are both columns; tag is a special type of column.
         // check that the schema has all these columns or update accordingly.
         for tag in measurement.tags.values() {
-            match table.columns.get(tag.name.as_str()) {
+            match table.columns().get(tag.name.as_str()) {
                 Some(c) if c.is_tag() => {
                     // nothing to do, all good
                 }
@@ -178,7 +178,7 @@ where
                         field.name, field_type, e,
                     ))
                 })?);
-            match table.columns.get(field.name.as_str()) {
+            match table.columns().get(field.name.as_str()) {
                 Some(c) if c.matches_type(influx_column_type) => {
                     // nothing to do, all good
                 }
@@ -210,7 +210,7 @@ where
             // figure it's okay.
             repos
                 .columns()
-                .create_or_get_many_unchecked(table.id, column_batch)
+                .create_or_get_many_unchecked(table.id(), column_batch)
                 .await?;
         }
         // create a partition for every day in the date range.
@@ -223,7 +223,7 @@ where
             // gets matched as `None`` in the code below
             let partition = repos
                 .partitions()
-                .create_or_get(partition_key, table.id)
+                .create_or_get(partition_key, table.id())
                 .await
                 .map_err(UpdateCatalogError::CatalogError)?;
             // get the sort key from the partition, if it exists. create it or update it as
@@ -384,10 +384,10 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("cpu").expect("got table");
-        assert_eq!(table.columns.len(), 3); // one tag & one field, plus time
-        let tag = table.columns.get("host").expect("got tag");
+        assert_eq!(table.columns().len(), 3); // one tag & one field, plus time
+        let tag = table.columns().get("host").expect("got tag");
         assert!(tag.is_tag());
-        let field = table.columns.get("usage").expect("got field");
+        let field = table.columns().get("usage").expect("got field");
         assert_eq!(
             field.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
@@ -395,7 +395,7 @@ mod tests {
         // check that the partitions were created and the sort keys are correct
         let partitions = repos
             .partitions()
-            .list_by_table_id(table.id)
+            .list_by_table_id(table.id())
             .await
             .expect("got partitions");
         // number of days in the date range of the schema
@@ -435,22 +435,22 @@ mod tests {
             .tables()
             .create_or_get("weather", namespace.id)
             .await
-            .map(|t| TableSchema::new(t.id))
+            .map(|t| TableInfo::from(&t))
             .expect("table created");
         let time_col = txn
             .columns()
-            .create_or_get("time", table.id, ColumnType::Time)
+            .create_or_get("time", table.id(), ColumnType::Time)
             .await
             .expect("column created");
         table.add_column(&time_col);
         let location_col = txn
             .columns()
-            .create_or_get("city", table.id, ColumnType::Tag)
+            .create_or_get("city", table.id(), ColumnType::Tag)
             .await
             .expect("column created");
         let temperature_col = txn
             .columns()
-            .create_or_get("temperature", table.id, ColumnType::F64)
+            .create_or_get("temperature", table.id(), ColumnType::F64)
             .await
             .expect("column created");
         table.add_column(&location_col);
@@ -491,17 +491,17 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("weather").expect("got table");
-        assert_eq!(table.columns.len(), 5); // two tags, two fields, plus time
-        let tag1 = table.columns.get("city").expect("got tag");
+        assert_eq!(table.columns().len(), 5); // two tags, two fields, plus time
+        let tag1 = table.columns().get("city").expect("got tag");
         assert!(tag1.is_tag());
-        let tag2 = table.columns.get("country").expect("got tag");
+        let tag2 = table.columns().get("country").expect("got tag");
         assert!(tag2.is_tag());
-        let field1 = table.columns.get("temperature").expect("got field");
+        let field1 = table.columns().get("temperature").expect("got field");
         assert_eq!(
             field1.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
         );
-        let field2 = table.columns.get("humidity").expect("got field");
+        let field2 = table.columns().get("humidity").expect("got field");
         assert_eq!(
             field2.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
@@ -527,17 +527,17 @@ mod tests {
             .tables()
             .create_or_get("weather", namespace.id)
             .await
-            .map(|t| TableSchema::new(t.id))
+            .map(|t| TableInfo::from(&t))
             .expect("table created");
         let time_col = txn
             .columns()
-            .create_or_get("time", table.id, ColumnType::Time)
+            .create_or_get("time", table.id(), ColumnType::Time)
             .await
             .expect("column created");
         table.add_column(&time_col);
         let temperature_col = txn
             .columns()
-            .create_or_get("temperature", table.id, ColumnType::F64)
+            .create_or_get("temperature", table.id(), ColumnType::F64)
             .await
             .expect("column created");
         table.add_column(&temperature_col);
@@ -592,17 +592,17 @@ mod tests {
             .tables()
             .create_or_get("weather", namespace.id)
             .await
-            .map(|t| TableSchema::new(t.id))
+            .map(|t| TableInfo::from(&t))
             .expect("table created");
         let time_col = txn
             .columns()
-            .create_or_get("time", table.id, ColumnType::Time)
+            .create_or_get("time", table.id(), ColumnType::Time)
             .await
             .expect("column created");
         table.add_column(&time_col);
         let temperature_col = txn
             .columns()
-            .create_or_get("temperature", table.id, ColumnType::F64)
+            .create_or_get("temperature", table.id(), ColumnType::F64)
             .await
             .expect("column created");
         table.add_column(&temperature_col);
diff --git a/ingester_test_ctx/src/lib.rs b/ingester_test_ctx/src/lib.rs
index 91187d65b8..83a7ab5b7e 100644
--- a/ingester_test_ctx/src/lib.rs
+++ b/ingester_test_ctx/src/lib.rs
@@ -215,12 +215,14 @@ where
             self.namespaces
                 .insert(
                     ns.id,
-                    NamespaceSchema::new(
-                        ns.id,
-                        iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
-                        iox_catalog::DEFAULT_MAX_TABLES,
+                    NamespaceSchema {
+                        id: ns.id,
+                        tables: Default::default(),
+                        max_columns_per_table: iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE as usize,
+                        max_tables: iox_catalog::DEFAULT_MAX_TABLES as usize,
                         retention_period_ns,
-                    ),
+                        partition_template: None,
+                    },
                 )
                 .is_none(),
             "namespace must not be duplicated"
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 3d1e0e467a..c991eeef85 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -4,7 +4,7 @@ use async_trait::async_trait;
 use data_types::{
     Column, ColumnSchema, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceSchema,
     ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey,
-    SkippedCompaction, Table, TableId, TableSchema, Timestamp,
+    SkippedCompaction, Table, TableId, TableInfo, TableSchema, Timestamp,
 };
 use iox_time::TimeProvider;
 use snafu::{OptionExt, Snafu};
@@ -588,21 +588,17 @@ where
     let columns = repos.columns().list_by_namespace_id(namespace.id).await?;
     let tables = repos.tables().list_by_namespace_id(namespace.id).await?;
 
-    let mut namespace = NamespaceSchema::new(
-        namespace.id,
-        namespace.max_columns_per_table,
-        namespace.max_tables,
-        namespace.retention_period_ns,
-    );
+    let mut namespace = NamespaceSchema::from(&namespace);
 
-    let mut table_id_to_schema = BTreeMap::new();
+    let mut table_id_to_info = BTreeMap::new();
     for t in tables {
-        table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id)));
+        let table_info = TableInfo::from(&t);
+        table_id_to_info.insert(t.id, (t.name, table_info));
     }
 
     for c in columns {
-        let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
-        t.columns.insert(
+        let (_, t) = table_id_to_info.get_mut(&c.table_id).unwrap();
+        t.columns_mut().insert(
             c.name,
             ColumnSchema {
                 id: c.id,
@@ -611,7 +607,7 @@ where
         );
     }
 
-    for (_, (table_name, schema)) in table_id_to_schema {
+    for (_, (table_name, schema)) in table_id_to_info {
         namespace.tables.insert(table_name, schema);
     }
 
@@ -705,23 +701,23 @@ pub async fn list_schemas(
     });
 
     // A set of tables within a single namespace.
-    type NamespaceTables = BTreeMap<String, TableSchema>;
+    type NamespaceTables = BTreeMap<String, TableInfo>;
 
     let mut joined = HashMap::<NamespaceId, NamespaceTables>::default();
     for column in columns {
         // Resolve the table this column references
         let table = tables.get(&column.table_id).expect("no table for column");
 
-        let table_schema = joined
+        let table_info = joined
             // Find or create a record in the joined <NamespaceId, Tables> map
             // for this namespace ID.
             .entry(table.namespace_id)
             .or_default()
             // Fetch the schema record for this table, or create an empty one.
             .entry(table.name.clone())
-            .or_insert_with(|| TableSchema::new(column.table_id));
+            .or_insert_with(|| TableInfo::from(table));
 
-        table_schema.add_column(&column);
+        table_info.add_column(&column);
     }
 
     // The table map is no longer needed - immediately reclaim the memory.
@@ -739,12 +735,8 @@ pub async fn list_schemas(
             // The catalog call explicitly asked for no soft deleted records.
             assert!(v.deleted_at.is_none());
 
-            let mut ns = NamespaceSchema::new(
-                v.id,
-                v.max_columns_per_table,
-                v.max_tables,
-                v.retention_period_ns,
-            );
+            let mut ns = NamespaceSchema::from(&v);
+
             ns.tables = joined.remove(&v.id)?;
             Some((v, ns))
         });
@@ -3049,12 +3041,7 @@ pub(crate) mod test_helpers {
 
         let batches = mutable_batch_lp::lines_to_batches(lines, 42).unwrap();
         let batches = batches.iter().map(|(table, batch)| (table.as_str(), batch));
-        let ns = NamespaceSchema::new(
-            namespace.id,
-            namespace.max_columns_per_table,
-            namespace.max_tables,
-            namespace.retention_period_ns,
-        );
+        let ns = NamespaceSchema::from(&namespace);
 
         let schema = validate_or_insert_schema(batches, &ns, repos)
             .await
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 406ec7737a..7e21901cdb 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -14,7 +14,7 @@
 )]
 
 use crate::interface::{ColumnTypeMismatchSnafu, Error, RepoCollection, Result};
-use data_types::{ColumnType, NamespaceSchema, TableSchema};
+use data_types::{ColumnType, NamespaceSchema, TableInfo};
 use mutable_batch::MutableBatch;
 use std::{borrow::Cow, collections::HashMap};
 use thiserror::Error;
@@ -118,12 +118,12 @@ where
                 .tables()
                 .create_or_get(table_name, schema.id)
                 .await
-                .map(|t| TableSchema::new(t.id))?;
+                .map(|t| TableInfo::from(&t))?;
 
             // Always add a time column to all new tables.
             let time_col = repos
                 .columns()
-                .create_or_get(TIME_COLUMN, table.id, ColumnType::Time)
+                .create_or_get(TIME_COLUMN, table.id(), ColumnType::Time)
                 .await?;
 
             table.add_column(&time_col);
@@ -152,7 +152,7 @@ where
         // If it does, validate it. If it does not exist, create it and insert
         // it into the cached schema.
 
-        match table.columns.get(name.as_str()) {
+        match table.columns().get(name.as_str()) {
             Some(existing) if existing.matches_type(col.influx_type()) => {
                 // No action is needed as the column matches the existing column
                 // schema.
@@ -182,7 +182,7 @@ where
     if !column_batch.is_empty() {
         repos
             .columns()
-            .create_or_get_many_unchecked(table.id, column_batch)
+            .create_or_get_many_unchecked(table.id(), column_batch)
             .await?
             .into_iter()
             .for_each(|c| table.to_mut().add_column(&c));
@@ -242,12 +242,7 @@ mod tests {
                         .await
                         .unwrap();
 
-                    let schema = NamespaceSchema::new(
-                        namespace.id,
-                        namespace.max_columns_per_table,
-                        namespace.max_tables,
-                        namespace.retention_period_ns,
-                    );
+                    let schema = NamespaceSchema::from(&namespace);
 
                     // Apply all the lp literals as individual writes, feeding
                     // the result of one validation into the next to drive
@@ -295,7 +290,7 @@ mod tests {
                         .iter()
                         .map(|(table, table_schema)| {
                             let desired_cols = table_schema
-                                .columns
+                                .columns()
                                 .iter()
                                 .map(|(column, column_schema)| (column.clone(), column_schema.column_type))
                                 .collect::<BTreeMap<_, _>>();
diff --git a/querier/src/cache/namespace.rs b/querier/src/cache/namespace.rs
index 7ee698dd91..777d6d5e4d 100644
--- a/querier/src/cache/namespace.rs
+++ b/querier/src/cache/namespace.rs
@@ -13,7 +13,7 @@ use cache_system::{
     loader::{metrics::MetricsLoader, FunctionLoader},
     resource_consumption::FunctionEstimator,
 };
-use data_types::{ColumnId, NamespaceId, NamespaceSchema, TableId, TableSchema};
+use data_types::{ColumnId, NamespaceId, NamespaceSchema, TableId, TableInfo};
 use iox_catalog::interface::{get_schema_by_name, Catalog, SoftDeletedRows};
 use iox_time::TimeProvider;
 use schema::Schema;
@@ -238,17 +238,20 @@ impl CachedTable {
     }
 }
 
-impl From<TableSchema> for CachedTable {
-    fn from(table: TableSchema) -> Self {
+impl From<TableInfo> for CachedTable {
+    fn from(table: TableInfo) -> Self {
         let mut column_id_map: HashMap<ColumnId, Arc<str>> = table
-            .columns
+            .columns()
             .iter()
             .map(|(name, c)| (c.id, Arc::from(name.clone())))
             .collect();
         column_id_map.shrink_to_fit();
 
-        let id = table.id;
-        let schema: Schema = table.try_into().expect("Catalog table schema broken");
+        let id = table.id();
+        let schema: Schema = table
+            .schema()
+            .try_into()
+            .expect("Catalog table schema broken");
 
         let mut column_id_map_rev: HashMap<Arc<str>, ColumnId> = column_id_map
             .iter()
diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs
index e5a96135a7..d49d79f37b 100644
--- a/querier/src/table/test_util.rs
+++ b/querier/src/table/test_util.rs
@@ -32,8 +32,8 @@ pub async fn querier_table(catalog: &Arc<TestCatalog>, table: &Arc<TestTable>) -
     )
     .await
     .unwrap();
-    let schema = catalog_schema.tables.remove(&table.table.name).unwrap();
-    let schema = Schema::try_from(schema).unwrap();
+    let table_info = catalog_schema.tables.remove(&table.table.name).unwrap();
+    let schema = Schema::try_from(table_info.schema()).unwrap();
 
     let namespace_name = Arc::from(table.namespace.namespace.name.as_str());
 
diff --git a/router/benches/schema_validator.rs b/router/benches/schema_validator.rs
index 96d42a1c9b..644fe0ea9a 100644
--- a/router/benches/schema_validator.rs
+++ b/router/benches/schema_validator.rs
@@ -52,7 +52,13 @@ fn bench(group: &mut BenchmarkGroup<WallTime>, tables: usize, columns_per_table:
 
     for i in 0..65_000 {
         let write = lp_to_writes(format!("{}{}", i + 10_000_000, generate_lp(1, 1)).as_str());
-        let _ = runtime().block_on(validator.write(&NAMESPACE, NamespaceId::new(42), write, None));
+        let _ = runtime().block_on(validator.write(
+            &NAMESPACE,
+            NamespaceId::new(42),
+            None,
+            write,
+            None,
+        ));
     }
 
     let write = lp_to_writes(&generate_lp(tables, columns_per_table));
@@ -64,7 +70,7 @@ fn bench(group: &mut BenchmarkGroup<WallTime>, tables: usize, columns_per_table:
     group.bench_function(format!("{tables}x{columns_per_table}"), |b| {
         b.to_async(runtime()).iter_batched(
             || write.clone(),
-            |write| validator.write(&NAMESPACE, NamespaceId::new(42), write, None),
+            |write| validator.write(&NAMESPACE, NamespaceId::new(42), None, write, None),
             BatchSize::SmallInput,
         );
     });
diff --git a/router/src/dml_handlers/chain.rs b/router/src/dml_handlers/chain.rs
index 13c5d29ff3..94815986f2 100644
--- a/router/src/dml_handlers/chain.rs
+++ b/router/src/dml_handlers/chain.rs
@@ -1,5 +1,6 @@
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use std::sync::Arc;
 use trace::ctx::SpanContext;
 
 use super::{DmlError, DmlHandler};
@@ -59,17 +60,30 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
         let output = self
             .first
-            .write(namespace, namespace_id, input, span_ctx.clone())
+            .write(
+                namespace,
+                namespace_id,
+                namespace_partition_template.clone(),
+                input,
+                span_ctx.clone(),
+            )
             .await
             .map_err(Into::into)?;
 
         self.second
-            .write(namespace, namespace_id, output, span_ctx)
+            .write(
+                namespace,
+                namespace_id,
+                namespace_partition_template,
+                output,
+                span_ctx,
+            )
             .await
             .map_err(Into::into)
     }
diff --git a/router/src/dml_handlers/fan_out.rs b/router/src/dml_handlers/fan_out.rs
index 3e561c64ea..3ade01fe83 100644
--- a/router/src/dml_handlers/fan_out.rs
+++ b/router/src/dml_handlers/fan_out.rs
@@ -1,7 +1,7 @@
-use std::{fmt::Debug, marker::PhantomData};
+use std::{fmt::Debug, marker::PhantomData, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use futures::{stream::FuturesUnordered, TryStreamExt};
 use trace::ctx::SpanContext;
 
@@ -50,6 +50,7 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -58,9 +59,16 @@ where
             .map(|v| {
                 let namespace = namespace.clone();
                 let span_ctx = span_ctx.clone();
+                let namespace_partition_template = namespace_partition_template.clone();
                 async move {
                     self.inner
-                        .write(&namespace, namespace_id, v, span_ctx)
+                        .write(
+                            &namespace,
+                            namespace_id,
+                            namespace_partition_template,
+                            v,
+                            span_ctx,
+                        )
                         .await
                 }
             })
diff --git a/router/src/dml_handlers/instrumentation.rs b/router/src/dml_handlers/instrumentation.rs
index 4d769c5621..e1f44ff6b8 100644
--- a/router/src/dml_handlers/instrumentation.rs
+++ b/router/src/dml_handlers/instrumentation.rs
@@ -1,7 +1,8 @@
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use iox_time::{SystemProvider, TimeProvider};
 use metric::{DurationHistogram, Metric};
+use std::sync::Arc;
 use trace::{ctx::SpanContext, span::SpanRecorder};
 
 use super::DmlHandler;
@@ -53,6 +54,7 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -64,7 +66,13 @@ where
 
         let res = self
             .inner
-            .write(namespace, namespace_id, input, span_ctx)
+            .write(
+                namespace,
+                namespace_id,
+                namespace_partition_template,
+                input,
+                span_ctx,
+            )
             .await;
 
         // Avoid exploding if time goes backwards - simply drop the measurement
@@ -148,7 +156,7 @@ mod tests {
         let decorator = InstrumentationDecorator::new(HANDLER_NAME, &metrics, handler);
 
         decorator
-            .write(&ns, NamespaceId::new(42), (), Some(span))
+            .write(&ns, NamespaceId::new(42), None, (), Some(span))
             .await
             .expect("inner handler configured to succeed");
 
@@ -171,7 +179,7 @@ mod tests {
         let decorator = InstrumentationDecorator::new(HANDLER_NAME, &metrics, handler);
 
         let err = decorator
-            .write(&ns, NamespaceId::new(42), (), Some(span))
+            .write(&ns, NamespaceId::new(42), None, (), Some(span))
             .await
             .expect_err("inner handler configured to fail");
 
diff --git a/router/src/dml_handlers/mock.rs b/router/src/dml_handlers/mock.rs
index 9fdf74895a..0cca0f54d1 100644
--- a/router/src/dml_handlers/mock.rs
+++ b/router/src/dml_handlers/mock.rs
@@ -1,7 +1,7 @@
-use std::{collections::VecDeque, fmt::Debug};
+use std::{collections::VecDeque, fmt::Debug, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use parking_lot::Mutex;
 use trace::ctx::SpanContext;
 
@@ -14,6 +14,7 @@ pub enum MockDmlHandlerCall<W> {
     Write {
         namespace: String,
         namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         write_input: W,
     },
 }
@@ -87,6 +88,7 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         write_input: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -95,6 +97,7 @@ where
             MockDmlHandlerCall::Write {
                 namespace: namespace.into(),
                 namespace_id,
+                namespace_partition_template,
                 write_input,
             },
             write_return
diff --git a/router/src/dml_handlers/nop.rs b/router/src/dml_handlers/nop.rs
index 472ea652e0..3454bca206 100644
--- a/router/src/dml_handlers/nop.rs
+++ b/router/src/dml_handlers/nop.rs
@@ -1,9 +1,9 @@
 //! A NOP implementation of [`DmlHandler`].
 
-use std::{fmt::Debug, marker::PhantomData};
+use std::{fmt::Debug, marker::PhantomData, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use observability_deps::tracing::*;
 use trace::ctx::SpanContext;
 
@@ -32,6 +32,7 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
         batches: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
diff --git a/router/src/dml_handlers/partitioner.rs b/router/src/dml_handlers/partitioner.rs
index 6eda6cddfc..7867e1610f 100644
--- a/router/src/dml_handlers/partitioner.rs
+++ b/router/src/dml_handlers/partitioner.rs
@@ -3,6 +3,7 @@ use data_types::{NamespaceId, NamespaceName, PartitionKey, PartitionTemplate, Ta
 use hashbrown::HashMap;
 use mutable_batch::{MutableBatch, PartitionWrite, WritePayload};
 use observability_deps::tracing::*;
+use std::sync::Arc;
 use thiserror::Error;
 use trace::ctx::SpanContext;
 
@@ -48,14 +49,16 @@ impl<T> Partitioned<T> {
 /// occurs during partitioning.
 #[derive(Debug)]
 pub struct Partitioner {
-    partition_template: PartitionTemplate,
+    partition_template: Arc<PartitionTemplate>,
 }
 
 impl Partitioner {
     /// Initialise a new [`Partitioner`], splitting writes according to the
     /// specified [`PartitionTemplate`].
     pub fn new(partition_template: PartitionTemplate) -> Self {
-        Self { partition_template }
+        Self {
+            partition_template: Arc::new(partition_template),
+        }
     }
 }
 
@@ -63,14 +66,15 @@ impl Partitioner {
 impl DmlHandler for Partitioner {
     type WriteError = PartitionError;
 
-    type WriteInput = HashMap<TableId, (String, MutableBatch)>;
-    type WriteOutput = Vec<Partitioned<Self::WriteInput>>;
+    type WriteInput = HashMap<TableId, (String, Option<Arc<PartitionTemplate>>, MutableBatch)>;
+    type WriteOutput = Vec<Partitioned<HashMap<TableId, (String, MutableBatch)>>>;
 
     /// Partition the per-table [`MutableBatch`].
     async fn write(
         &self,
         _namespace: &NamespaceName<'static>,
         _namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         batch: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -78,11 +82,20 @@ impl DmlHandler for Partitioner {
         let mut partitions: HashMap<PartitionKey, HashMap<_, (String, MutableBatch)>> =
             HashMap::default();
 
-        for (table_id, (table_name, batch)) in batch {
+        for (table_id, (table_name, table_partition_template, batch)) in batch {
             // Partition the table batch according to the configured partition
             // template and write it into the partition-keyed map.
+            // If the table has a partition template, use that. Otherwise, if the namespace has a
+            // partition template, use that. If neither the table nor the namespace has a template,
+            // use the partitioner's template.
+
+            let partition_template = table_partition_template
+                .as_ref()
+                .or(namespace_partition_template.as_ref())
+                .unwrap_or(&self.partition_template);
+
             for (partition_key, partition_payload) in
-                PartitionWrite::partition(&batch, &self.partition_template)
+                PartitionWrite::partition(&batch, partition_template)
             {
                 let partition = partitions.entry(partition_key).or_default();
                 let table_batch = partition
@@ -107,17 +120,21 @@ impl DmlHandler for Partitioner {
 mod tests {
     use assert_matches::assert_matches;
 
+    use data_types::TemplatePart;
+
     use super::*;
 
     // Parse `lp` into a table-keyed MutableBatch map.
-    pub(crate) fn lp_to_writes(lp: &str) -> HashMap<TableId, (String, MutableBatch)> {
+    pub(crate) fn lp_to_writes(
+        lp: &str,
+    ) -> HashMap<TableId, (String, Option<Arc<PartitionTemplate>>, MutableBatch)> {
         let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
             .expect("failed to build test writes from LP");
 
         writes
             .into_iter()
             .enumerate()
-            .map(|(i, (name, data))| (TableId::new(i as _), (name, data)))
+            .map(|(i, (name, data))| (TableId::new(i as _), (name, None, data)))
             .collect()
     }
 
@@ -142,7 +159,13 @@ mod tests {
 
                     let writes = lp_to_writes($lp);
 
-                    let handler_ret = partitioner.write(&ns, NamespaceId::new(42), writes, None).await;
+                    let handler_ret = partitioner.write(
+                        &ns,
+                        NamespaceId::new(42),
+                        None,
+                        writes,
+                        None
+                    ).await;
                     assert_matches!(handler_ret, $($want_handler_ret)+);
 
                     // Check the partition -> table mapping.
@@ -182,7 +205,9 @@ mod tests {
             #[allow(unused_mut)]
             let mut want_writes: HashMap<PartitionKey, _> = Default::default();
             $(
-                let mut want: Vec<String> = $want_tables.into_iter().map(|t| t.to_string()).collect();
+                let mut want: Vec<String> = $want_tables.into_iter()
+                    .map(|t| t.to_string())
+                    .collect();
                 want.sort();
                 want_writes.insert(PartitionKey::from($partition_key), want);
             )*
@@ -282,4 +307,250 @@ mod tests {
         ],
         want_handler_ret = Ok(_)
     );
+
+    #[tokio::test]
+    async fn test_write_namespace_partition_template() {
+        let partitioner = Partitioner::new(PartitionTemplate::default());
+        let ns = NamespaceName::new("bananas").expect("valid db name");
+
+        let namespace_partition_template = Some(Arc::new(PartitionTemplate {
+            parts: vec![
+                TemplatePart::TimeFormat("%Y".to_string()),
+                TemplatePart::Column("tag1".to_string()),
+                TemplatePart::Column("nonanas".to_string()),
+            ],
+        }));
+
+        let writes = lp_to_writes(
+            "
+            bananas,tag1=A,tag2=C val=42i 1\n\
+            platanos,tag1=B,tag2=C value=42i 1465839830100400200\n\
+            platanos,tag1=A,tag2=D value=42i 1\n\
+            bananas,tag1=B,tag2=D value=42i 1465839830100400200\n\
+            bananas,tag1=A,tag2=D value=42i 1465839830100400200\n\
+        ",
+        );
+
+        let handler_ret = partitioner
+            .write(
+                &ns,
+                NamespaceId::new(42),
+                namespace_partition_template,
+                writes,
+                None,
+            )
+            .await;
+
+        // Check the partition -> table mapping.
+        let got = handler_ret
+            .unwrap_or_default()
+            .into_iter()
+            .map(|partition| {
+                // Extract the table names in this partition
+                let mut tables = partition
+                    .payload
+                    .values()
+                    .map(|v| v.0.clone())
+                    .collect::<Vec<String>>();
+
+                tables.sort();
+
+                (partition.key, tables)
+            })
+            .collect::<HashMap<_, _>>();
+
+        let expected = HashMap::from([
+            (
+                PartitionKey::from("2016-tag1_B-nonanas"),
+                vec!["bananas".into(), "platanos".into()],
+            ),
+            (
+                PartitionKey::from("1970-tag1_A-nonanas"),
+                vec!["bananas".into(), "platanos".into()],
+            ),
+            (
+                PartitionKey::from("2016-tag1_A-nonanas"),
+                vec!["bananas".into()],
+            ),
+        ]);
+
+        pretty_assertions::assert_eq!(expected, got);
+    }
+
+    #[tokio::test]
+    async fn test_write_namespace_and_table_partition_template() {
+        let partitioner = Partitioner::new(PartitionTemplate::default());
+        let ns = NamespaceName::new("bananas").expect("valid db name");
+
+        // Specify this but the table partition will take precedence for bananas.
+        let namespace_partition_template = Some(Arc::new(PartitionTemplate {
+            parts: vec![
+                TemplatePart::TimeFormat("%Y".to_string()),
+                TemplatePart::Column("tag1".to_string()),
+                TemplatePart::Column("nonanas".to_string()),
+            ],
+        }));
+        let bananas_table_template = Some(Arc::new(PartitionTemplate {
+            parts: vec![
+                TemplatePart::Column("oranges".to_string()),
+                TemplatePart::TimeFormat("%Y-%m".to_string()),
+                TemplatePart::Column("tag2".to_string()),
+            ],
+        }));
+
+        let lp = "
+            bananas,tag1=A,tag2=C val=42i 1\n\
+            platanos,tag1=B,tag2=C value=42i 1465839830100400200\n\
+            platanos,tag1=A,tag2=D value=42i 1\n\
+            bananas,tag1=B,tag2=D value=42i 1465839830100400200\n\
+            bananas,tag1=A,tag2=D value=42i 1465839830100400200\n\
+        ";
+
+        let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
+            .expect("failed to build test writes from LP");
+
+        let writes = writes
+            .into_iter()
+            .enumerate()
+            .map(|(i, (name, data))| {
+                let table_partition_template = match name.as_str() {
+                    "bananas" => bananas_table_template.clone(),
+                    _ => None,
+                };
+                (TableId::new(i as _), (name, table_partition_template, data))
+            })
+            .collect();
+
+        let handler_ret = partitioner
+            .write(
+                &ns,
+                NamespaceId::new(42),
+                namespace_partition_template,
+                writes,
+                None,
+            )
+            .await;
+
+        // Check the partition -> table mapping.
+        let got = handler_ret
+            .unwrap_or_default()
+            .into_iter()
+            .map(|partition| {
+                // Extract the table names in this partition
+                let mut tables = partition
+                    .payload
+                    .values()
+                    .map(|v| v.0.clone())
+                    .collect::<Vec<String>>();
+
+                tables.sort();
+
+                (partition.key, tables)
+            })
+            .collect::<HashMap<_, _>>();
+
+        let expected = HashMap::from([
+            (
+                PartitionKey::from("oranges-1970-01-tag2_C"),
+                vec!["bananas".into()],
+            ),
+            (
+                PartitionKey::from("oranges-2016-06-tag2_D"),
+                vec!["bananas".into()],
+            ),
+            (
+                PartitionKey::from("1970-tag1_A-nonanas"),
+                vec!["platanos".into()],
+            ),
+            (
+                PartitionKey::from("2016-tag1_B-nonanas"),
+                vec!["platanos".into()],
+            ),
+        ]);
+
+        pretty_assertions::assert_eq!(expected, got);
+    }
+
+    #[tokio::test]
+    async fn test_write_only_table_partition_template() {
+        let partitioner = Partitioner::new(PartitionTemplate::default());
+        let ns = NamespaceName::new("bananas").expect("valid db name");
+
+        // No namespace partition means the platanos table will fall back to the default
+        let namespace_partition_template = None;
+
+        let bananas_table_template = Some(Arc::new(PartitionTemplate {
+            parts: vec![
+                TemplatePart::Column("oranges".to_string()),
+                TemplatePart::TimeFormat("%Y-%m".to_string()),
+                TemplatePart::Column("tag2".to_string()),
+            ],
+        }));
+
+        let lp = "
+            bananas,tag1=A,tag2=C val=42i 1\n\
+            platanos,tag1=B,tag2=C value=42i 1465839830100400200\n\
+            platanos,tag1=A,tag2=D value=42i 1\n\
+            bananas,tag1=B,tag2=D value=42i 1465839830100400200\n\
+            bananas,tag1=A,tag2=D value=42i 1465839830100400200\n\
+        ";
+
+        let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
+            .expect("failed to build test writes from LP");
+
+        let writes = writes
+            .into_iter()
+            .enumerate()
+            .map(|(i, (name, data))| {
+                let table_partition_template = match name.as_str() {
+                    "bananas" => bananas_table_template.clone(),
+                    _ => None,
+                };
+                (TableId::new(i as _), (name, table_partition_template, data))
+            })
+            .collect();
+
+        let handler_ret = partitioner
+            .write(
+                &ns,
+                NamespaceId::new(42),
+                namespace_partition_template,
+                writes,
+                None,
+            )
+            .await;
+
+        // Check the partition -> table mapping.
+        let got = handler_ret
+            .unwrap_or_default()
+            .into_iter()
+            .map(|partition| {
+                // Extract the table names in this partition
+                let mut tables = partition
+                    .payload
+                    .values()
+                    .map(|v| v.0.clone())
+                    .collect::<Vec<String>>();
+
+                tables.sort();
+
+                (partition.key, tables)
+            })
+            .collect::<HashMap<_, _>>();
+
+        let expected = HashMap::from([
+            (
+                PartitionKey::from("oranges-1970-01-tag2_C"),
+                vec!["bananas".into()],
+            ),
+            (
+                PartitionKey::from("oranges-2016-06-tag2_D"),
+                vec!["bananas".into()],
+            ),
+            (PartitionKey::from("1970-01-01"), vec!["platanos".into()]),
+            (PartitionKey::from("2016-06-13"), vec!["platanos".into()]),
+        ]);
+
+        pretty_assertions::assert_eq!(expected, got);
+    }
 }
diff --git a/router/src/dml_handlers/retention_validation.rs b/router/src/dml_handlers/retention_validation.rs
index 963b3ab543..b2f27ea4d6 100644
--- a/router/src/dml_handlers/retention_validation.rs
+++ b/router/src/dml_handlers/retention_validation.rs
@@ -1,9 +1,10 @@
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use hashbrown::HashMap;
 use iox_time::{SystemProvider, TimeProvider};
 use mutable_batch::MutableBatch;
 use observability_deps::tracing::*;
+use std::sync::Arc;
 use thiserror::Error;
 use trace::ctx::SpanContext;
 
@@ -62,6 +63,7 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         _namespace_id: NamespaceId,
+        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
         batch: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -129,7 +131,7 @@ mod tests {
         let writes = lp_to_writes(&line);
 
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await;
 
         // no error means the time is inside the retention period
@@ -155,7 +157,7 @@ mod tests {
         let writes = lp_to_writes(&line);
 
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await;
 
         // error means the time is outside the retention period
@@ -191,7 +193,7 @@ mod tests {
 
         let writes = lp_to_writes(&lp);
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await;
 
         // error means the time is outside the retention period
@@ -227,7 +229,7 @@ mod tests {
 
         let writes = lp_to_writes(&lp);
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await;
 
         // error means the time is outside the retention period
diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index 9fc0e75561..f69c8e9360 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -16,7 +16,7 @@ use self::{
 
 use super::{DmlHandler, Partitioned};
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, TableId};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate, TableId};
 use dml::{DmlMeta, DmlWrite};
 use generated_types::influxdata::iox::ingester::v1::WriteRequest;
 use hashbrown::HashMap;
@@ -177,6 +177,7 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
         writes: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, RpcWriteError> {
@@ -385,6 +386,7 @@ mod tests {
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
                 NAMESPACE_ID,
+                None,
                 input,
                 None,
             )
@@ -419,6 +421,7 @@ mod tests {
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
                 NAMESPACE_ID,
+                None,
                 input,
                 None,
             )
@@ -481,6 +484,7 @@ mod tests {
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
                 NAMESPACE_ID,
+                None,
                 input,
                 None,
             )
@@ -549,6 +553,7 @@ mod tests {
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
                 NAMESPACE_ID,
+                None,
                 input,
                 None,
             )
diff --git a/router/src/dml_handlers/schema_validation.rs b/router/src/dml_handlers/schema_validation.rs
index e5a4109f72..5cba7ed35b 100644
--- a/router/src/dml_handlers/schema_validation.rs
+++ b/router/src/dml_handlers/schema_validation.rs
@@ -1,7 +1,7 @@
 use std::{ops::DerefMut, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, NamespaceSchema, TableId};
+use data_types::{NamespaceId, NamespaceName, NamespaceSchema, PartitionTemplate, TableId};
 use hashbrown::HashMap;
 use iox_catalog::{
     interface::{Catalog, Error as CatalogError},
@@ -148,8 +148,8 @@ where
 
     // Accepts a map of TableName -> MutableBatch
     type WriteInput = HashMap<String, MutableBatch>;
-    // And returns a map of TableId -> (TableName, MutableBatch)
-    type WriteOutput = HashMap<TableId, (String, MutableBatch)>;
+    // And returns a map of TableId -> (TableName, OptionalTablePartitionTemplate, MutableBatch)
+    type WriteOutput = HashMap<TableId, (String, Option<Arc<PartitionTemplate>>, MutableBatch)>;
 
     /// Validate the schema of all the writes in `batches`.
     ///
@@ -170,6 +170,7 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
         batches: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -297,14 +298,16 @@ where
             }
         };
 
-        // Map the "TableName -> Data" into "TableId -> (TableName, Data)" for
-        // downstream handlers.
+        // Map the "TableName -> Data" into "TableId -> (TableName, OptionalTablePartitionTemplate,
+        // Data)" for downstream handlers.
         let batches = batches
             .into_iter()
             .map(|(name, data)| {
-                let id = latest_schema.tables.get(&name).unwrap().id;
+                let table = latest_schema.tables.get(&name).unwrap();
+                let id = table.id();
+                let table_partition_template = table.partition_template.clone();
 
-                (id, (name, data))
+                (id, (name, table_partition_template, data))
             })
             .collect();
 
@@ -564,12 +567,12 @@ mod tests {
             // namespace schema gets cached
             let writes1_valid = lp_to_writes("dragonfruit val=42i 123456");
             handler1
-                .write(&NAMESPACE, NamespaceId::new(42), writes1_valid, None)
+                .write(&NAMESPACE, NamespaceId::new(42), None, writes1_valid, None)
                 .await
                 .expect("request should succeed");
             let writes2_valid = lp_to_writes("dragonfruit val=43i 123457");
             handler2
-                .write(&NAMESPACE, NamespaceId::new(42), writes2_valid, None)
+                .write(&NAMESPACE, NamespaceId::new(42), None, writes2_valid, None)
                 .await
                 .expect("request should succeed");
 
@@ -577,12 +580,24 @@ mod tests {
             // putting the table over the limit
             let writes1_add_column = lp_to_writes("dragonfruit,tag1=A val=42i 123456");
             handler1
-                .write(&NAMESPACE, NamespaceId::new(42), writes1_add_column, None)
+                .write(
+                    &NAMESPACE,
+                    NamespaceId::new(42),
+                    None,
+                    writes1_add_column,
+                    None,
+                )
                 .await
                 .expect("request should succeed");
             let writes2_add_column = lp_to_writes("dragonfruit,tag2=B val=43i 123457");
             handler2
-                .write(&NAMESPACE, NamespaceId::new(42), writes2_add_column, None)
+                .write(
+                    &NAMESPACE,
+                    NamespaceId::new(42),
+                    None,
+                    writes2_add_column,
+                    None,
+                )
                 .await
                 .expect("request should succeed");
 
@@ -759,7 +774,7 @@ mod tests {
         let table = ns.tables.get(table).expect("table should exist in cache");
         assert_eq!(
             table
-                .columns
+                .columns()
                 .get(col)
                 .expect("column not cached")
                 .column_type,
@@ -779,7 +794,7 @@ mod tests {
 
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await
             .expect("request should succeed");
 
@@ -790,7 +805,7 @@ mod tests {
         assert_cache(&handler, "bananas", "time", ColumnType::Time).await;
 
         // Validate the table ID mapping.
-        let (name, _data) = got.get(&want_id).expect("table not in output");
+        let (name, _partition_template, _data) = got.get(&want_id).expect("table not in output");
         assert_eq!(name, "bananas");
     }
 
@@ -804,7 +819,7 @@ mod tests {
 
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
         let err = handler
-            .write(&ns, NamespaceId::new(42), writes, None)
+            .write(&ns, NamespaceId::new(42), None, writes, None)
             .await
             .expect_err("request should fail");
 
@@ -823,7 +838,7 @@ mod tests {
         // First write sets the schema
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456"); // val=i64
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes.clone(), None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes.clone(), None)
             .await
             .expect("request should succeed");
         assert_eq!(writes.len(), got.len());
@@ -831,7 +846,7 @@ mod tests {
         // Second write attempts to violate it causing an error
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42.0 123456"); // val=float
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await
             .expect_err("request should fail");
 
@@ -857,7 +872,7 @@ mod tests {
         // First write sets the schema
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes.clone(), None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes.clone(), None)
             .await
             .expect("request should succeed");
         assert_eq!(writes.len(), got.len());
@@ -875,7 +890,7 @@ mod tests {
         // Second write attempts to violate limits, causing an error
         let writes = lp_to_writes("bananas2,tag1=A,tag2=B val=42i 123456");
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await
             .expect_err("request should fail");
 
@@ -892,7 +907,7 @@ mod tests {
         // First write sets the schema
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes.clone(), None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes.clone(), None)
             .await
             .expect("request should succeed");
         assert_eq!(writes.len(), got.len());
@@ -904,7 +919,7 @@ mod tests {
         // Second write attempts to violate limits, causing an error
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i,val2=42i 123456");
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await
             .expect_err("request should fail");
 
@@ -931,7 +946,7 @@ mod tests {
         // First write attempts to add columns over the limit, causing an error
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i,val2=42i 123456");
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), writes, None)
+            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
             .await
             .expect_err("request should fail");
 
diff --git a/router/src/dml_handlers/trait.rs b/router/src/dml_handlers/trait.rs
index 1039d2d268..9843143ddc 100644
--- a/router/src/dml_handlers/trait.rs
+++ b/router/src/dml_handlers/trait.rs
@@ -2,7 +2,7 @@ use super::{
     partitioner::PartitionError, retention_validation::RetentionError, RpcWriteError, SchemaError,
 };
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use std::{error::Error, fmt::Debug, sync::Arc};
 use thiserror::Error;
 use trace::ctx::SpanContext;
@@ -62,6 +62,7 @@ pub trait DmlHandler: Debug + Send + Sync {
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError>;
@@ -80,11 +81,18 @@ where
         &self,
         namespace: &NamespaceName<'static>,
         namespace_id: NamespaceId,
+        namespace_partition_template: Option<Arc<PartitionTemplate>>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
         (**self)
-            .write(namespace, namespace_id, input, span_ctx)
+            .write(
+                namespace,
+                namespace_id,
+                namespace_partition_template,
+                input,
+                span_ctx,
+            )
             .await
     }
 }
diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index 8cdc04f942..1e549867fa 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -59,7 +59,7 @@ impl NamespaceCache for Arc<MemoryNamespaceCache> {
                     new_columns: schema
                         .tables
                         .values()
-                        .map(|v| v.columns.len())
+                        .map(|v| v.columns().len())
                         .sum::<usize>(),
                     new_tables: schema.tables.len(),
                     did_update: false,
@@ -100,12 +100,14 @@ fn merge_schema_additive(
     // to 0 as the schemas become fully populated, leaving the common path free
     // of overhead.
     for (old_table_name, old_table) in &old_ns.tables {
-        old_column_count += old_table.columns.len();
+        old_column_count += old_table.columns().len();
         match new_ns.tables.get_mut(old_table_name) {
             Some(new_table) => {
-                for (column_name, column) in &old_table.columns {
-                    if !new_table.columns.contains_key(column_name) {
-                        new_table.columns.insert(column_name.to_owned(), *column);
+                for (column_name, column) in old_table.columns() {
+                    if !new_table.columns().contains_key(column_name) {
+                        new_table
+                            .columns_mut()
+                            .insert(column_name.to_owned(), *column);
                     }
                 }
             }
@@ -125,7 +127,7 @@ fn merge_schema_additive(
         new_columns: new_ns
             .tables
             .values()
-            .map(|v| v.columns.len())
+            .map(|v| v.columns().len())
             .sum::<usize>()
             - old_column_count,
         did_update: true,
@@ -139,7 +141,7 @@ mod tests {
 
     use assert_matches::assert_matches;
     use data_types::{
-        Column, ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableSchema,
+        Column, ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableInfo, TableSchema,
     };
     use proptest::{prelude::*, prop_compose, proptest};
 
@@ -165,6 +167,7 @@ mod tests {
             max_columns_per_table: 50,
             max_tables: 24,
             retention_period_ns: Some(876),
+            partition_template: None,
         };
         assert_matches!(cache.put_schema(ns.clone(), schema1.clone()), (new, s) => {
             assert_eq!(*new, schema1);
@@ -181,6 +184,7 @@ mod tests {
             max_columns_per_table: 10,
             max_tables: 42,
             retention_period_ns: Some(876),
+            partition_template: None,
         };
 
         assert_matches!(cache.put_schema(ns.clone(), schema2.clone()), (new, s) => {
@@ -219,27 +223,33 @@ mod tests {
         let mut second_write_table_schema = TableSchema::new(table_id);
         second_write_table_schema.add_column(&column_2);
 
-        assert_ne!(first_write_table_schema, second_write_table_schema); // These MUST always be different
+        // These MUST always be different
+        assert_ne!(first_write_table_schema, second_write_table_schema);
+
+        let first_write_table_info = TableInfo::new(first_write_table_schema);
+        let second_write_table_info = TableInfo::new(second_write_table_schema);
 
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
-            tables: BTreeMap::from([(String::from(table_name), first_write_table_schema)]),
+            tables: BTreeMap::from([(String::from(table_name), first_write_table_info)]),
             max_columns_per_table: 50,
             max_tables: 24,
             retention_period_ns: None,
+            partition_template: None,
         };
         let schema_update_2 = NamespaceSchema {
-            tables: BTreeMap::from([(String::from(table_name), second_write_table_schema)]),
-            ..schema_update_1
+            tables: BTreeMap::from([(String::from(table_name), second_write_table_info)]),
+            ..schema_update_1.clone()
         };
 
         let want_namespace_schema = {
             let mut want_table_schema = TableSchema::new(table_id);
             want_table_schema.add_column(&column_1);
             want_table_schema.add_column(&column_2);
+            let want_table_schema = TableInfo::new(want_table_schema);
             NamespaceSchema {
                 tables: BTreeMap::from([(String::from(table_name), want_table_schema)]),
-                ..schema_update_1
+                ..schema_update_1.clone()
             }
         };
 
@@ -252,10 +262,16 @@ mod tests {
             }
         );
 
-        assert_matches!(cache.put_schema(ns.clone(), schema_update_1.clone()), (new_schema, new_stats) => {
-            assert_eq!(*new_schema, schema_update_1);
-            assert_eq!(new_stats, ChangeStats{ new_tables: 1, new_columns: 1, did_update: false});
-        });
+        assert_matches!(
+            cache.put_schema(ns.clone(), schema_update_1.clone()),
+            (new_schema, new_stats) => {
+                assert_eq!(*new_schema, schema_update_1);
+                assert_eq!(
+                    new_stats,
+                    ChangeStats { new_tables: 1, new_columns: 1, did_update: false }
+                );
+            }
+        );
         assert_matches!(cache.put_schema(ns.clone(), schema_update_2), (new_schema, new_stats) => {
             assert_eq!(*new_schema, want_namespace_schema);
             assert_eq!(new_stats, ChangeStats{ new_tables: 0, new_columns: 1, did_update: true});
@@ -287,6 +303,7 @@ mod tests {
             name: "column_a".to_string(),
             column_type: ColumnType::String,
         });
+        let table_1 = TableInfo::new(table_1);
         let mut table_2 = TableSchema::new(TableId::new(2));
         table_2.add_column(&Column {
             id: ColumnId::new(2),
@@ -294,6 +311,7 @@ mod tests {
             name: "column_b".to_string(),
             column_type: ColumnType::String,
         });
+        let table_2 = TableInfo::new(table_2);
         let mut table_3 = TableSchema::new(TableId::new(3));
         table_3.add_column(&Column {
             id: ColumnId::new(3),
@@ -301,6 +319,7 @@ mod tests {
             name: "column_c".to_string(),
             column_type: ColumnType::String,
         });
+        let table_3 = TableInfo::new(table_3);
 
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
@@ -311,13 +330,14 @@ mod tests {
             max_columns_per_table: 50,
             max_tables: 24,
             retention_period_ns: None,
+            partition_template: None,
         };
         let schema_update_2 = NamespaceSchema {
             tables: BTreeMap::from([
                 (String::from("table_1"), table_1.to_owned()),
                 (String::from("table_3"), table_3.to_owned()),
             ]),
-            ..schema_update_1
+            ..schema_update_1.clone()
         };
 
         let want_namespace_schema = NamespaceSchema {
@@ -326,7 +346,7 @@ mod tests {
                 (String::from("table_2"), table_2),
                 (String::from("table_3"), table_3),
             ]),
-            ..schema_update_1
+            ..schema_update_1.clone()
         };
 
         // Set up the cache and ensure there are no entries for the namespace.
@@ -338,10 +358,16 @@ mod tests {
             }
         );
 
-        assert_matches!(cache.put_schema(ns.clone(), schema_update_1.clone()), (new_schema, new_stats) => {
-            assert_eq!(*new_schema, schema_update_1);
-            assert_eq!(new_stats, ChangeStats{ new_tables: 2, new_columns: 2, did_update: false});
-        });
+        assert_matches!(
+            cache.put_schema(ns.clone(), schema_update_1.clone()),
+            (new_schema, new_stats) => {
+                assert_eq!(*new_schema, schema_update_1);
+                assert_eq!(
+                    new_stats,
+                    ChangeStats { new_tables: 2, new_columns: 2, did_update: false }
+                );
+            }
+        );
         assert_matches!(cache.put_schema(ns.clone(), schema_update_2), (new_schema, new_stats) => {
             assert_eq!(*new_schema, want_namespace_schema);
             assert_eq!(new_stats, ChangeStats{ new_tables: 1, new_columns: 1, did_update: true});
@@ -372,7 +398,7 @@ mod tests {
     }
 
     prop_compose! {
-        /// Generate an arbitrary TableSchema with up to 10 columns.
+        /// Generate an arbitrary TableInfo with up to 10 columns.
         fn arbitrary_table_schema()(
             id in any::<i64>(),
             columns in proptest::collection::btree_map(
@@ -380,9 +406,9 @@ mod tests {
                 arbitrary_column_schema(),
                 (0, 10) // Set size range
             ),
-        ) -> TableSchema {
+        ) -> TableInfo {
             let columns = columns.into_iter().map(|(k, v)| (k.to_string(), v)).collect();
-            TableSchema { id: TableId::new(id), columns }
+            TableInfo::new(TableSchema { id: TableId::new(id), columns })
         }
     }
 
@@ -404,6 +430,7 @@ mod tests {
                 max_columns_per_table,
                 max_tables,
                 retention_period_ns,
+                partition_template: None,
             }
         }
     }
@@ -416,7 +443,7 @@ mod tests {
             .flat_map(|(table_name, col_set)| {
                 // Build a set of tuples in the form (table_name, column_name)
                 col_set
-                    .columns
+                    .columns()
                     .keys()
                     .map(|col_name| (table_name.to_string(), col_name.to_string()))
             })
diff --git a/router/src/namespace_cache/metrics.rs b/router/src/namespace_cache/metrics.rs
index d76815357c..618795b126 100644
--- a/router/src/namespace_cache/metrics.rs
+++ b/router/src/namespace_cache/metrics.rs
@@ -128,7 +128,9 @@ mod tests {
     use std::collections::BTreeMap;
 
     use assert_matches::assert_matches;
-    use data_types::{ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableSchema};
+    use data_types::{
+        ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableInfo, TableSchema,
+    };
     use metric::{Attributes, MetricObserver, Observation};
 
     use super::*;
@@ -156,10 +158,10 @@ mod tests {
 
                 (
                     i.to_string(),
-                    TableSchema {
+                    TableInfo::new(TableSchema {
                         id: TableId::new(i as _),
                         columns,
-                    },
+                    }),
                 )
             })
             .collect();
@@ -170,6 +172,7 @@ mod tests {
             max_columns_per_table: 100,
             max_tables: 42,
             retention_period_ns: None,
+            partition_template: None,
         }
     }
 
diff --git a/router/src/namespace_cache/read_through_cache.rs b/router/src/namespace_cache/read_through_cache.rs
index 1c600eed76..de7bf66ccf 100644
--- a/router/src/namespace_cache/read_through_cache.rs
+++ b/router/src/namespace_cache/read_through_cache.rs
@@ -118,12 +118,14 @@ mod tests {
         assert_matches!(cache.get_schema(&ns).await, Err(_));
 
         // Place a schema in the cache for that name
-        let schema1 = NamespaceSchema::new(
-            NamespaceId::new(1),
-            iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
-            iox_catalog::DEFAULT_MAX_TABLES,
-            iox_catalog::DEFAULT_RETENTION_PERIOD,
-        );
+        let schema1 = NamespaceSchema {
+            id: NamespaceId::new(1),
+            tables: Default::default(),
+            max_columns_per_table: iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE as usize,
+            max_tables: iox_catalog::DEFAULT_MAX_TABLES as usize,
+            retention_period_ns: iox_catalog::DEFAULT_RETENTION_PERIOD,
+            partition_template: None,
+        };
         assert_matches!(cache.put_schema(ns.clone(), schema1.clone()), (result, _) => {
             assert_eq!(*result, schema1);
         });
@@ -152,12 +154,15 @@ mod tests {
         assert_matches!(cache.get_schema(&ns).await, Err(_));
 
         // Place a schema in the catalog for that name
-        let schema1 = NamespaceSchema::new(
-            NamespaceId::new(1),
-            iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
-            iox_catalog::DEFAULT_MAX_TABLES,
-            iox_catalog::DEFAULT_RETENTION_PERIOD,
-        );
+        let schema1 = NamespaceSchema {
+            id: NamespaceId::new(1),
+            tables: Default::default(),
+            max_columns_per_table: iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE as usize,
+            max_tables: iox_catalog::DEFAULT_MAX_TABLES as usize,
+            retention_period_ns: iox_catalog::DEFAULT_RETENTION_PERIOD,
+            partition_template: None,
+        };
+
         assert_matches!(
             catalog
                 .repositories()
diff --git a/router/src/namespace_cache/sharded_cache.rs b/router/src/namespace_cache/sharded_cache.rs
index 3b91649343..0ae550907c 100644
--- a/router/src/namespace_cache/sharded_cache.rs
+++ b/router/src/namespace_cache/sharded_cache.rs
@@ -74,6 +74,7 @@ mod tests {
             max_columns_per_table: 7,
             max_tables: 42,
             retention_period_ns: None,
+            partition_template: None,
         }
     }
 
diff --git a/router/src/namespace_resolver.rs b/router/src/namespace_resolver.rs
index 555489ab25..9678e8fb7d 100644
--- a/router/src/namespace_resolver.rs
+++ b/router/src/namespace_resolver.rs
@@ -1,8 +1,9 @@
 //! An trait to abstract resolving a[`NamespaceName`] to [`NamespaceId`], and a
 //! collection of composable implementations.
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use observability_deps::tracing::*;
+use std::sync::Arc;
 use thiserror::Error;
 
 use crate::namespace_cache::NamespaceCache;
@@ -26,11 +27,11 @@ pub enum Error {
 /// An abstract resolver of [`NamespaceName`] to [`NamespaceId`].
 #[async_trait]
 pub trait NamespaceResolver: std::fmt::Debug + Send + Sync {
-    /// Return the [`NamespaceId`] for the given [`NamespaceName`].
-    async fn get_namespace_id(
+    /// Return the [`NamespaceId`] and [`PartitionTemplate`] for the given [`NamespaceName`].
+    async fn get_namespace_info(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<NamespaceId, Error>;
+    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), Error>;
 }
 
 /// An implementation of [`NamespaceResolver`] that resolves the [`NamespaceId`]
@@ -53,14 +54,13 @@ impl<C> NamespaceResolver for NamespaceSchemaResolver<C>
 where
     C: NamespaceCache<ReadError = iox_catalog::interface::Error>,
 {
-    async fn get_namespace_id(
+    async fn get_namespace_info(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<NamespaceId, Error> {
-        // Load the namespace schema from the cache, falling back to pulling it
-        // from the global catalog (if it exists).
+    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), Error> {
+        // Load the namespace schema from the cache.
         match self.cache.get_schema(namespace).await {
-            Ok(v) => Ok(v.id),
+            Ok(v) => Ok((v.id, v.partition_template.clone())),
             Err(e) => return Err(Error::Lookup(e)),
         }
     }
@@ -100,6 +100,7 @@ mod tests {
                 max_columns_per_table: 4,
                 max_tables: 42,
                 retention_period_ns: None,
+                partition_template: None,
             },
         );
 
@@ -107,7 +108,7 @@ mod tests {
 
         // Drive the code under test
         resolver
-            .get_namespace_id(&ns)
+            .get_namespace_info(&ns)
             .await
             .expect("lookup should succeed");
 
@@ -151,7 +152,7 @@ mod tests {
         let resolver = NamespaceSchemaResolver::new(Arc::clone(&cache));
 
         resolver
-            .get_namespace_id(&ns)
+            .get_namespace_info(&ns)
             .await
             .expect("lookup should succeed");
 
@@ -188,7 +189,7 @@ mod tests {
         let resolver = NamespaceSchemaResolver::new(Arc::clone(&cache));
 
         let err = resolver
-            .get_namespace_id(&ns)
+            .get_namespace_info(&ns)
             .await
             .expect_err("lookup should succeed");
         assert_matches!(
@@ -214,7 +215,7 @@ mod tests {
         let resolver = NamespaceSchemaResolver::new(Arc::clone(&cache));
 
         let err = resolver
-            .get_namespace_id(&ns)
+            .get_namespace_info(&ns)
             .await
             .expect_err("lookup should error");
 
diff --git a/router/src/namespace_resolver/mock.rs b/router/src/namespace_resolver/mock.rs
index 6334a3cc0b..634aedd392 100644
--- a/router/src/namespace_resolver/mock.rs
+++ b/router/src/namespace_resolver/mock.rs
@@ -2,10 +2,10 @@
 
 #![allow(missing_docs)]
 
-use std::collections::HashMap;
+use std::{collections::HashMap, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use parking_lot::Mutex;
 
 use super::NamespaceResolver;
@@ -31,15 +31,17 @@ impl MockNamespaceResolver {
 
 #[async_trait]
 impl NamespaceResolver for MockNamespaceResolver {
-    /// Return the [`NamespaceId`] for the given [`NamespaceName`].
-    async fn get_namespace_id(
+    async fn get_namespace_info(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<NamespaceId, super::Error> {
-        Ok(*self.map.lock().get(namespace).ok_or(super::Error::Lookup(
-            iox_catalog::interface::Error::NamespaceNotFoundByName {
-                name: namespace.to_string(),
-            },
-        ))?)
+    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), super::Error> {
+        Ok((
+            *self.map.lock().get(namespace).ok_or(super::Error::Lookup(
+                iox_catalog::interface::Error::NamespaceNotFoundByName {
+                    name: namespace.to_string(),
+                },
+            ))?,
+            None,
+        ))
     }
 }
diff --git a/router/src/namespace_resolver/ns_autocreation.rs b/router/src/namespace_resolver/ns_autocreation.rs
index 2ec498e5f2..b9245c5872 100644
--- a/router/src/namespace_resolver/ns_autocreation.rs
+++ b/router/src/namespace_resolver/ns_autocreation.rs
@@ -1,7 +1,7 @@
 use std::{fmt::Debug, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName};
+use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
 use iox_catalog::interface::Catalog;
 use observability_deps::tracing::*;
 use thiserror::Error;
@@ -78,10 +78,10 @@ where
 {
     /// Force the creation of `namespace` if it does not already exist in the
     /// cache, before passing the request through to the inner delegate.
-    async fn get_namespace_id(
+    async fn get_namespace_info(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<NamespaceId, super::Error> {
+    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), super::Error> {
         if self.cache.get_schema(namespace).await.is_err() {
             trace!(%namespace, "namespace not found in cache");
 
@@ -90,7 +90,7 @@ where
                     // The namespace is not cached, but may exist in the
                     // catalog. Delegate discovery down to the inner handler,
                     // and map the lookup error to a reject error.
-                    match self.inner.get_namespace_id(namespace).await {
+                    match self.inner.get_namespace_info(namespace).await {
                         Ok(v) => return Ok(v),
                         Err(super::Error::Lookup(
                             iox_catalog::interface::Error::NamespaceNotFoundByName { .. },
@@ -128,7 +128,7 @@ where
             }
         }
 
-        self.inner.get_namespace_id(namespace).await
+        self.inner.get_namespace_info(namespace).await
     }
 }
 
@@ -171,6 +171,7 @@ mod tests {
                 max_columns_per_table: 4,
                 max_tables: 42,
                 retention_period_ns: None,
+                partition_template: None,
             },
         );
 
@@ -182,11 +183,11 @@ mod tests {
         );
 
         // Drive the code under test
-        let got = creator
-            .get_namespace_id(&ns)
+        let (got_id, _got_partition_template) = creator
+            .get_namespace_info(&ns)
             .await
             .expect("handler should succeed");
-        assert_eq!(got, NAMESPACE_ID);
+        assert_eq!(got_id, NAMESPACE_ID);
 
         // The cache hit should mean the catalog SHOULD NOT see a create request
         // for the namespace.
@@ -221,8 +222,8 @@ mod tests {
             MissingNamespaceAction::AutoCreate(TEST_RETENTION_PERIOD_NS),
         );
 
-        let created_id = creator
-            .get_namespace_id(&ns)
+        let (created_id, _created_partition_template) = creator
+            .get_namespace_info(&ns)
             .await
             .expect("handler should succeed");
 
@@ -270,7 +271,7 @@ mod tests {
 
         // It should not autocreate because we specified "rejection" behaviour, above
         assert_matches!(
-            creator.get_namespace_id(&ns).await,
+            creator.get_namespace_info(&ns).await,
             Err(crate::namespace_resolver::Error::Create(
                 NamespaceCreationError::Reject(_ns)
             ))
@@ -307,7 +308,7 @@ mod tests {
         );
 
         let created_id = creator
-            .get_namespace_id(&ns)
+            .get_namespace_info(&ns)
             .await
             .expect("handler should succeed");
 
@@ -321,7 +322,7 @@ mod tests {
 
         // It should not autocreate because we specified "rejection" behaviour, above
         let id = creator
-            .get_namespace_id(&ns)
+            .get_namespace_info(&ns)
             .await
             .expect("should allow existing namespace from catalog");
         assert_eq!(created_id, id);
diff --git a/router/src/server/http.rs b/router/src/server/http.rs
index 37820e045e..b592e154a3 100644
--- a/router/src/server/http.rs
+++ b/router/src/server/http.rs
@@ -363,13 +363,19 @@ where
         );
 
         // Retrieve the namespace ID for this namespace.
-        let namespace_id = self
+        let (namespace_id, namespace_partition_template) = self
             .namespace_resolver
-            .get_namespace_id(&write_info.namespace)
+            .get_namespace_info(&write_info.namespace)
             .await?;
 
         self.dml_handler
-            .write(&write_info.namespace, namespace_id, batches, span_ctx)
+            .write(
+                &write_info.namespace,
+                namespace_id,
+                namespace_partition_template,
+                batches,
+                span_ctx,
+            )
             .await
             .map_err(Into::into)?;
 
@@ -670,7 +676,9 @@ mod tests {
         body = "platanos,tag1=A,tag2=B val=42i 123456".as_bytes(),
         dml_handler = [Ok(())],
         want_result = Ok(_),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, ..}] => {
+        want_dml_calls = [
+            MockDmlHandlerCall::Write { namespace, .. }
+        ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
         }
     );
@@ -681,7 +689,9 @@ mod tests {
         body = "platanos,tag1=A,tag2=B val=42i 1647622847".as_bytes(),
         dml_handler = [Ok(())],
         want_result = Ok(_),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, namespace_id, write_input}] => {
+        want_dml_calls = [
+            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+        ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
             assert_eq!(*namespace_id, NAMESPACE_ID);
 
@@ -697,7 +707,9 @@ mod tests {
         body = "platanos,tag1=A,tag2=B val=42i 1647622847000".as_bytes(),
         dml_handler = [Ok(())],
         want_result = Ok(_),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, namespace_id, write_input}] => {
+        want_dml_calls = [
+            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+        ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
             assert_eq!(*namespace_id, NAMESPACE_ID);
 
@@ -713,7 +725,9 @@ mod tests {
         body = "platanos,tag1=A,tag2=B val=42i 1647622847000000".as_bytes(),
         dml_handler = [Ok(())],
         want_result = Ok(_),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, namespace_id, write_input}] => {
+        want_dml_calls = [
+            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+        ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
             assert_eq!(*namespace_id, NAMESPACE_ID);
 
@@ -729,7 +743,9 @@ mod tests {
         body = "platanos,tag1=A,tag2=B val=42i 1647622847000000000".as_bytes(),
         dml_handler = [Ok(())],
         want_result = Ok(_),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, namespace_id, write_input}] => {
+        want_dml_calls = [
+            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+        ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
             assert_eq!(*namespace_id, NAMESPACE_ID);
 
@@ -853,7 +869,7 @@ mod tests {
         body = "platanos,tag1=A,tag2=B val=42i 123456".as_bytes(),
         dml_handler = [Err(DmlError::NamespaceNotFound(NAMESPACE_NAME.to_string()))],
         want_result = Err(Error::DmlHandler(DmlError::NamespaceNotFound(_))),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, ..}] => {
+        want_dml_calls = [MockDmlHandlerCall::Write { namespace, .. }] => {
             assert_eq!(namespace, NAMESPACE_NAME);
         }
     );
@@ -864,7 +880,7 @@ mod tests {
         body = "platanos,tag1=A,tag2=B val=42i 123456".as_bytes(),
         dml_handler = [Err(DmlError::Internal("💣".into()))],
         want_result = Err(Error::DmlHandler(DmlError::Internal(_))),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, ..}] => {
+        want_dml_calls = [MockDmlHandlerCall::Write { namespace, .. }] => {
             assert_eq!(namespace, NAMESPACE_NAME);
         }
     );
@@ -875,7 +891,9 @@ mod tests {
         body = "test field=1u 100\ntest field=2u 100".as_bytes(),
         dml_handler = [Ok(())],
         want_result = Ok(_),
-        want_dml_calls = [MockDmlHandlerCall::Write{namespace, namespace_id, write_input}] => {
+        want_dml_calls = [
+            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+        ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
             assert_eq!(*namespace_id, NAMESPACE_ID);
             let table = write_input.get("test").expect("table not in write");
@@ -916,7 +934,7 @@ mod tests {
             body = "whydo InputPower=300i,InputPower=300i".as_bytes(),
             dml_handler = [Ok(())],
             want_result = Ok(_),
-            want_dml_calls = [MockDmlHandlerCall::Write{namespace, write_input, ..}] => {
+            want_dml_calls = [MockDmlHandlerCall::Write { namespace, write_input, .. }] => {
                 assert_eq!(namespace, NAMESPACE_NAME);
                 let table = write_input.get("whydo").expect("table not in write");
                 let col = table.column("InputPower").expect("column missing");
@@ -933,7 +951,7 @@ mod tests {
             body = "whydo InputPower=300i,InputPower=42i".as_bytes(),
             dml_handler = [Ok(())],
             want_result = Ok(_),
-            want_dml_calls = [MockDmlHandlerCall::Write{namespace, write_input, ..}] => {
+            want_dml_calls = [MockDmlHandlerCall::Write { namespace, write_input, .. }] => {
                 assert_eq!(namespace, NAMESPACE_NAME);
                 let table = write_input.get("whydo").expect("table not in write");
                 let col = table.column("InputPower").expect("column missing");
diff --git a/service_grpc_schema/src/lib.rs b/service_grpc_schema/src/lib.rs
index f359e57224..64b2776ca8 100644
--- a/service_grpc_schema/src/lib.rs
+++ b/service_grpc_schema/src/lib.rs
@@ -55,9 +55,9 @@ fn schema_to_proto(schema: Arc<data_types::NamespaceSchema>) -> GetSchemaRespons
                     (
                         name.clone(),
                         TableSchema {
-                            id: t.id.get(),
+                            id: t.id().get(),
                             columns: t
-                                .columns
+                                .columns()
                                 .iter()
                                 .map(|(name, c)| {
                                     (

From 1f1dcc947d49e4e1efabc582ce017fbb3d31bbad Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 28 Apr 2023 16:58:28 -0400
Subject: [PATCH 064/119] fix: Don't change how the compactor gets the table
 schema

---
 .../src/components/df_planner/query_chunk.rs  | 11 +--
 .../src/components/namespaces_source/mock.rs  | 96 ++++++++++---------
 .../parquet_file_sink/object_store.rs         |  4 +-
 .../partition_info_source/sub_sources.rs      |  4 +-
 compactor/src/partition_info.rs               |  8 +-
 compactor/src/test_utils.rs                   | 12 +--
 compactor/tests/layouts/backfill.rs           | 94 +++++++++---------
 compactor/tests/layouts/knobs.rs              | 76 +++++++--------
 compactor_test_utils/src/lib.rs               |  4 +-
 data_types/src/lib.rs                         | 79 ++-------------
 .../aggregate_tsm_schema/update_catalog.rs    | 36 +++----
 iox_catalog/src/interface.rs                  |  4 +-
 iox_catalog/src/lib.rs                        | 13 +--
 querier/src/cache/namespace.rs                |  5 +-
 querier/src/table/test_util.rs                |  2 +-
 router/src/dml_handlers/schema_validation.rs  |  5 +-
 router/src/namespace_cache/memory.rs          | 51 +++++++---
 router/src/namespace_cache/metrics.rs         | 11 ++-
 service_grpc_schema/src/lib.rs                |  3 +-
 19 files changed, 243 insertions(+), 275 deletions(-)

diff --git a/compactor/src/components/df_planner/query_chunk.rs b/compactor/src/components/df_planner/query_chunk.rs
index abdc704127..dadeda0b5b 100644
--- a/compactor/src/components/df_planner/query_chunk.rs
+++ b/compactor/src/components/df_planner/query_chunk.rs
@@ -174,21 +174,20 @@ fn to_queryable_parquet_chunk(
     partition_info: &PartitionInfo,
     store: ParquetStorage,
 ) -> QueryableParquetChunk {
-    let column_id_lookup = partition_info.table_info.column_id_map();
+    let column_id_lookup = partition_info.table_schema.column_id_map();
     let selection: Vec<_> = file
         .file
         .column_set
         .iter()
         .flat_map(|id| column_id_lookup.get(id).copied())
         .collect();
-    let table_info: Schema = partition_info
-        .table_info
+    let table_schema: Schema = partition_info
+        .table_schema
         .as_ref()
         .clone()
-        .schema()
         .try_into()
-        .expect("table info is broken");
-    let schema = table_info
+        .expect("table schema is broken");
+    let schema = table_schema
         .select_by_names(&selection)
         .expect("schema in-sync");
     let pk = schema.primary_key();
diff --git a/compactor/src/components/namespaces_source/mock.rs b/compactor/src/components/namespaces_source/mock.rs
index 1ad9356a6d..5d672a187d 100644
--- a/compactor/src/components/namespaces_source/mock.rs
+++ b/compactor/src/components/namespaces_source/mock.rs
@@ -128,54 +128,60 @@ mod tests {
             let tables = BTreeMap::from([
                 (
                     "table1".to_string(),
-                    TableInfo::new(TableSchema {
-                        id: TableId::new(1),
-                        columns: BTreeMap::from([
-                            (
-                                "col1".to_string(),
-                                ColumnSchema {
-                                    id: ColumnId::new(1),
-                                    column_type: ColumnType::I64,
-                                },
-                            ),
-                            (
-                                "col2".to_string(),
-                                ColumnSchema {
-                                    id: ColumnId::new(2),
-                                    column_type: ColumnType::String,
-                                },
-                            ),
-                        ]),
-                    }),
+                    TableInfo {
+                        schema: TableSchema {
+                            id: TableId::new(1),
+                            columns: BTreeMap::from([
+                                (
+                                    "col1".to_string(),
+                                    ColumnSchema {
+                                        id: ColumnId::new(1),
+                                        column_type: ColumnType::I64,
+                                    },
+                                ),
+                                (
+                                    "col2".to_string(),
+                                    ColumnSchema {
+                                        id: ColumnId::new(2),
+                                        column_type: ColumnType::String,
+                                    },
+                                ),
+                            ]),
+                        },
+                        partition_template: None,
+                    },
                 ),
                 (
                     "table2".to_string(),
-                    TableInfo::new(TableSchema {
-                        id: TableId::new(2),
-                        columns: BTreeMap::from([
-                            (
-                                "col1".to_string(),
-                                ColumnSchema {
-                                    id: ColumnId::new(3),
-                                    column_type: ColumnType::I64,
-                                },
-                            ),
-                            (
-                                "col2".to_string(),
-                                ColumnSchema {
-                                    id: ColumnId::new(4),
-                                    column_type: ColumnType::String,
-                                },
-                            ),
-                            (
-                                "col3".to_string(),
-                                ColumnSchema {
-                                    id: ColumnId::new(5),
-                                    column_type: ColumnType::F64,
-                                },
-                            ),
-                        ]),
-                    }),
+                    TableInfo {
+                        schema: TableSchema {
+                            id: TableId::new(2),
+                            columns: BTreeMap::from([
+                                (
+                                    "col1".to_string(),
+                                    ColumnSchema {
+                                        id: ColumnId::new(3),
+                                        column_type: ColumnType::I64,
+                                    },
+                                ),
+                                (
+                                    "col2".to_string(),
+                                    ColumnSchema {
+                                        id: ColumnId::new(4),
+                                        column_type: ColumnType::String,
+                                    },
+                                ),
+                                (
+                                    "col3".to_string(),
+                                    ColumnSchema {
+                                        id: ColumnId::new(5),
+                                        column_type: ColumnType::F64,
+                                    },
+                                ),
+                            ]),
+                        },
+                        partition_template: None,
+                    },
                 ),
             ]);
 
diff --git a/compactor/src/components/parquet_file_sink/object_store.rs b/compactor/src/components/parquet_file_sink/object_store.rs
index 4c6a45a651..16af1d3b96 100644
--- a/compactor/src/components/parquet_file_sink/object_store.rs
+++ b/compactor/src/components/parquet_file_sink/object_store.rs
@@ -80,8 +80,8 @@ impl ParquetFileSink for ObjectStoreParquetFileSink {
         let parquet_file =
             meta.to_parquet_file(partition.partition_id, file_size, &parquet_meta, |name| {
                 partition
-                    .table_info
-                    .columns()
+                    .table_schema
+                    .columns
                     .get(name)
                     .expect("unknown column")
                     .id
diff --git a/compactor/src/components/partition_info_source/sub_sources.rs b/compactor/src/components/partition_info_source/sub_sources.rs
index 039833bea1..f2453a24f8 100644
--- a/compactor/src/components/partition_info_source/sub_sources.rs
+++ b/compactor/src/components/partition_info_source/sub_sources.rs
@@ -94,14 +94,14 @@ where
         let table_info = namespace_schema
             .tables
             .get(&table.name)
-            .ok_or_else::<DynError, _>(|| String::from("Cannot find table schema").into())?;
+            .ok_or_else::<DynError, _>(|| String::from("Cannot find table info").into())?;
 
         Ok(Arc::new(PartitionInfo {
             partition_id,
             namespace_id: table.namespace_id,
             namespace_name: namespace.name,
             table: Arc::new(table),
-            table_info: Arc::new(table_info.clone()),
+            table_schema: Arc::new(table_info.schema.clone()),
             sort_key: partition.sort_key(),
             partition_key: partition.partition_key,
         }))
diff --git a/compactor/src/partition_info.rs b/compactor/src/partition_info.rs
index 05e426014b..ada8b2da31 100644
--- a/compactor/src/partition_info.rs
+++ b/compactor/src/partition_info.rs
@@ -2,7 +2,7 @@
 
 use std::sync::Arc;
 
-use data_types::{NamespaceId, PartitionId, PartitionKey, Table, TableInfo};
+use data_types::{NamespaceId, PartitionId, PartitionKey, Table, TableSchema};
 use schema::sort::SortKey;
 
 /// Information about the Partition being compacted
@@ -20,8 +20,8 @@ pub struct PartitionInfo {
     /// Table.
     pub table: Arc<Table>,
 
-    /// Table info
-    pub table_info: Arc<TableInfo>,
+    /// Table schema
+    pub table_schema: Arc<TableSchema>,
 
     /// Sort key of the partition
     pub sort_key: Option<SortKey>,
@@ -33,6 +33,6 @@ pub struct PartitionInfo {
 impl PartitionInfo {
     /// Returns number of columns in the table
     pub fn column_count(&self) -> usize {
-        self.table_info.column_count()
+        self.table_schema.column_count()
     }
 }
diff --git a/compactor/src/test_utils.rs b/compactor/src/test_utils.rs
index 90d8a30267..c61826210a 100644
--- a/compactor/src/test_utils.rs
+++ b/compactor/src/test_utils.rs
@@ -2,7 +2,7 @@ use std::{collections::BTreeMap, sync::Arc};
 
 use data_types::{
     ColumnId, ColumnSchema, ColumnType, NamespaceId, PartitionId, PartitionKey, Table, TableId,
-    TableInfo, TableSchema,
+    TableSchema,
 };
 
 use crate::PartitionInfo;
@@ -27,10 +27,10 @@ impl PartitionInfoBuilder {
                     namespace_id,
                     name: String::from("table"),
                 }),
-                table_info: Arc::new(TableInfo::new(TableSchema {
+                table_schema: Arc::new(TableSchema {
                     id: table_id,
                     columns: BTreeMap::new(),
-                })),
+                }),
                 sort_key: None,
                 partition_key: PartitionKey::from("key"),
             },
@@ -52,11 +52,11 @@ impl PartitionInfoBuilder {
             columns.insert(i.to_string(), col);
         }
 
-        let table_info = Arc::new(TableInfo::new(TableSchema {
+        let table_schema = Arc::new(TableSchema {
             id: self.inner.table.id,
             columns,
-        }));
-        self.inner.table_info = table_info;
+        });
+        self.inner.table_schema = table_schema;
 
         self
     }
diff --git a/compactor/tests/layouts/backfill.rs b/compactor/tests/layouts/backfill.rs
index e8e72442ee..ec1c5c7e33 100644
--- a/compactor/tests/layouts/backfill.rs
+++ b/compactor/tests/layouts/backfill.rs
@@ -766,13 +766,13 @@ async fn random_backfill_empty_partition() {
     - "L0                                                                                                                 "
     - "L0.?[357,658] 1.04us 4mb |----------------------------------------L0.?----------------------------------------|    "
     - "L0.?[659,670] 1.04us 144kb                                                                                      |L0.?|"
-    - "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3mb total:"
-    - "L0, all files 3mb                                                                                                  "
-    - "L0.165[42,356] 1.04us    |-----------------------------------------L0.165-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 3mb total:"
+    - "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2mb total:"
+    - "L0, all files 2mb                                                                                                  "
+    - "L0.168[173,356] 1.04us   |-----------------------------------------L0.168-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 2mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[42,329] 1.04us 3mb  |--------------------------------------L0.?--------------------------------------|        "
-    - "L0.?[330,356] 1.04us 293kb                                                                                  |L0.?-| "
+    - "L0.?[173,329] 1.04us 2mb |-----------------------------------L0.?-----------------------------------|              "
+    - "L0.?[330,356] 1.04us 356kb                                                                             |---L0.?---| "
     - "**** Simulation run 73, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4mb total:"
     - "L0, all files 4mb                                                                                                  "
     - "L0.169[357,670] 1.04us   |-----------------------------------------L0.169-----------------------------------------|"
@@ -822,13 +822,13 @@ async fn random_backfill_empty_partition() {
     - "L0                                                                                                                 "
     - "L0.?[173,329] 1.05us 2mb |-----------------------------------L0.?-----------------------------------|              "
     - "L0.?[330,356] 1.05us 356kb                                                                             |---L0.?---| "
-    - "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4mb total:"
-    - "L0, all files 4mb                                                                                                  "
-    - "L0.180[357,670] 1.05us   |-----------------------------------------L0.180-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 4mb total:"
+    - "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3mb total:"
+    - "L0, all files 3mb                                                                                                  "
+    - "L0.165[42,356] 1.04us    |-----------------------------------------L0.165-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 3mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[357,658] 1.05us 4mb |----------------------------------------L0.?----------------------------------------|    "
-    - "L0.?[659,670] 1.05us 158kb                                                                                      |L0.?|"
+    - "L0.?[42,329] 1.04us 3mb  |--------------------------------------L0.?--------------------------------------|        "
+    - "L0.?[330,356] 1.04us 293kb                                                                                  |L0.?-| "
     - "**** Simulation run 81, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3mb total:"
     - "L0, all files 3mb                                                                                                  "
     - "L0.166[357,670] 1.04us   |-----------------------------------------L0.166-----------------------------------------|"
@@ -836,13 +836,13 @@ async fn random_backfill_empty_partition() {
     - "L0                                                                                                                 "
     - "L0.?[357,658] 1.04us 3mb |----------------------------------------L0.?----------------------------------------|    "
     - "L0.?[659,670] 1.04us 130kb                                                                                      |L0.?|"
-    - "**** Simulation run 82, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2mb total:"
-    - "L0, all files 2mb                                                                                                  "
-    - "L0.168[173,356] 1.04us   |-----------------------------------------L0.168-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 2mb total:"
+    - "**** Simulation run 82, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4mb total:"
+    - "L0, all files 4mb                                                                                                  "
+    - "L0.180[357,670] 1.05us   |-----------------------------------------L0.180-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 4mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[173,329] 1.04us 2mb |-----------------------------------L0.?-----------------------------------|              "
-    - "L0.?[330,356] 1.04us 356kb                                                                             |---L0.?---| "
+    - "L0.?[357,658] 1.05us 4mb |----------------------------------------L0.?----------------------------------------|    "
+    - "L0.?[659,670] 1.05us 158kb                                                                                      |L0.?|"
     - "**** Simulation run 83, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 5mb total:"
     - "L0, all files 5mb                                                                                                  "
     - "L0.182[50,356] 1.05us    |-----------------------------------------L0.182-----------------------------------------|"
@@ -1012,7 +1012,7 @@ async fn random_backfill_empty_partition() {
     - "L0.?[967,986] 1.05us 218kb                                                                                    |L0.?|"
     - "**** Simulation run 105, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 4mb total:"
     - "L0, all files 4mb                                                                                                  "
-    - "L0.251[357,658] 1.05us   |-----------------------------------------L0.251-----------------------------------------|"
+    - "L0.255[357,658] 1.05us   |-----------------------------------------L0.255-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 4mb total:"
     - "L0                                                                                                                 "
     - "L0.?[357,648] 1.05us 4mb |----------------------------------------L0.?-----------------------------------------|   "
@@ -1039,7 +1039,7 @@ async fn random_backfill_empty_partition() {
     - "L0.?[671,966] 1.05us 3mb |---------------------------------------L0.?---------------------------------------|      "
     - "L0.?[967,986] 1.05us 218kb                                                                                    |L0.?|"
     - "Committing partition 1:"
-    - "  Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.251, L0.253, L0.261, L0.265"
+    - "  Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.253, L0.255, L0.261, L0.265"
     - "  Creating 40 files"
     - "**** Simulation run 109, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[263]). 2 Input Files, 103mb total:"
     - "L0                                                                                                                 "
@@ -1132,14 +1132,14 @@ async fn random_backfill_empty_partition() {
     - "L0.?[264,329] 1.04us 790kb                                                                  |--------L0.?---------| "
     - "**** Simulation run 121, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 3mb total:"
     - "L0, all files 3mb                                                                                                  "
-    - "L0.235[42,329] 1.04us    |-----------------------------------------L0.235-----------------------------------------|"
+    - "L0.251[42,329] 1.04us    |-----------------------------------------L0.251-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 3mb total:"
     - "L0                                                                                                                 "
     - "L0.?[42,263] 1.04us 2mb  |-------------------------------L0.?--------------------------------|                     "
     - "L0.?[264,329] 1.04us 716kb                                                                     |-------L0.?-------| "
     - "**** Simulation run 122, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 2mb total:"
     - "L0, all files 2mb                                                                                                  "
-    - "L0.255[173,329] 1.04us   |-----------------------------------------L0.255-----------------------------------------|"
+    - "L0.235[173,329] 1.04us   |-----------------------------------------L0.235-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 2mb total:"
     - "L0                                                                                                                 "
     - "L0.?[173,263] 1.04us 1mb |----------------------L0.?-----------------------|                                       "
@@ -1194,7 +1194,7 @@ async fn random_backfill_empty_partition() {
     - "L0.?[42,263] 1.05us 2mb  |-------------------------------L0.?--------------------------------|                     "
     - "L0.?[264,329] 1.05us 716kb                                                                     |-------L0.?-------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.249, L0.255, L0.257, L0.259, L0.263"
+    - "  Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.249, L0.251, L0.257, L0.259, L0.263"
     - "  Creating 40 files"
     - "**** Simulation run 130, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[570, 876]). 9 Input Files, 230mb total:"
     - "L0                                                                                                                 "
@@ -2057,7 +2057,7 @@ async fn random_backfill_empty_partition() {
     - "L0.334[42,263] 1.04us 2mb|------L0.334-------|                                                                     "
     - "L0.460[264,295] 1.04us 341kb                     |L0.460|                                                             "
     - "L0.461[296,329] 1.04us 374kb                        |L0.461|                                                          "
-    - "L0.236[330,356] 1.04us 293kb                           |L0.236|                                                       "
+    - "L0.252[330,356] 1.04us 293kb                           |L0.252|                                                       "
     - "L0.389[357,570] 1.04us 2mb                              |------L0.389------|                                        "
     - "L0.525[571,583] 1.04us 132kb                                                  |L0.525|                                "
     - "L0.526[584,590] 1.04us 77kb                                                   |L0.526|                               "
@@ -2071,7 +2071,7 @@ async fn random_backfill_empty_partition() {
     - "L0.336[173,263] 1.04us 1mb            |L0.336|                                                                      "
     - "L0.464[264,295] 1.04us 415kb                     |L0.464|                                                             "
     - "L0.465[296,329] 1.04us 455kb                        |L0.465|                                                          "
-    - "L0.256[330,356] 1.04us 356kb                           |L0.256|                                                       "
+    - "L0.236[330,356] 1.04us 356kb                           |L0.236|                                                       "
     - "L0.393[357,570] 1.04us 3mb                              |------L0.393------|                                        "
     - "L0.529[571,583] 1.04us 160kb                                                  |L0.529|                                "
     - "L0.530[584,590] 1.04us 93kb                                                   |L0.530|                               "
@@ -2125,7 +2125,7 @@ async fn random_backfill_empty_partition() {
     - "L0.544[584,590] 1.05us 93kb                                                   |L0.544|                               "
     - "L0.479[591,648] 1.05us 774kb                                                    |L0.479|                              "
     - "L0.303[649,658] 1.05us 132kb                                                         |L0.303|                         "
-    - "L0.252[659,670] 1.05us 158kb                                                          |L0.252|                        "
+    - "L0.256[659,670] 1.05us 158kb                                                          |L0.256|                        "
     - "L0.545[671,870] 1.05us 3mb                                                           |-----L0.545-----|             "
     - "L0.546[871,876] 1.05us 80kb                                                                               |L0.546|   "
     - "L0.410[877,950] 1.05us 982kb                                                                               |L0.410|   "
@@ -3646,13 +3646,13 @@ async fn random_backfill_over_l2s() {
     - "L0                                                                                                                 "
     - "L0.?[295,334] 1.03us 430kb|-------------------------L0.?--------------------------|                                 "
     - "L0.?[335,356] 1.03us 243kb                                                           |------------L0.?------------| "
-    - "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1mb total:"
-    - "L0, all files 1mb                                                                                                  "
-    - "L0.324[592,670] 1.03us   |-----------------------------------------L0.324-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
+    - "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677kb total:"
+    - "L0, all files 677kb                                                                                                "
+    - "L0.328[592,629] 1.03us   |-----------------------------------------L0.328-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 677kb total:"
     - "L0                                                                                                                 "
-    - "L0.?[592,626] 1.03us 455kb|----------------L0.?-----------------|                                                   "
-    - "L0.?[627,670] 1.03us 589kb                                        |---------------------L0.?----------------------| "
+    - "L0.?[592,626] 1.03us 622kb|--------------------------------------L0.?--------------------------------------|        "
+    - "L0.?[627,629] 1.03us 55kb                                                                                     |L0.?|"
     - "**** Simulation run 144, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.283[295,356] 1.03us   |-----------------------------------------L0.283-----------------------------------------|"
@@ -3702,13 +3702,13 @@ async fn random_backfill_over_l2s() {
     - "L0                                                                                                                 "
     - "L0.?[295,334] 1.04us 522kb|-------------------------L0.?--------------------------|                                 "
     - "L0.?[335,356] 1.04us 295kb                                                           |------------L0.?------------| "
-    - "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677kb total:"
-    - "L0, all files 677kb                                                                                                "
-    - "L0.342[592,629] 1.04us   |-----------------------------------------L0.342-----------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 677kb total:"
+    - "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1mb total:"
+    - "L0, all files 1mb                                                                                                  "
+    - "L0.324[592,670] 1.03us   |-----------------------------------------L0.324-----------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[592,626] 1.04us 622kb|--------------------------------------L0.?--------------------------------------|        "
-    - "L0.?[627,629] 1.04us 55kb                                                                                     |L0.?|"
+    - "L0.?[592,626] 1.03us 455kb|----------------L0.?-----------------|                                                   "
+    - "L0.?[627,670] 1.03us 589kb                                        |---------------------L0.?----------------------| "
     - "**** Simulation run 152, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817kb total:"
     - "L0, all files 817kb                                                                                                "
     - "L0.281[295,356] 1.03us   |-----------------------------------------L0.281-----------------------------------------|"
@@ -3718,11 +3718,11 @@ async fn random_backfill_over_l2s() {
     - "L0.?[335,356] 1.03us 295kb                                                           |------------L0.?------------| "
     - "**** Simulation run 153, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677kb total:"
     - "L0, all files 677kb                                                                                                "
-    - "L0.328[592,629] 1.03us   |-----------------------------------------L0.328-----------------------------------------|"
+    - "L0.342[592,629] 1.04us   |-----------------------------------------L0.342-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 677kb total:"
     - "L0                                                                                                                 "
-    - "L0.?[592,626] 1.03us 622kb|--------------------------------------L0.?--------------------------------------|        "
-    - "L0.?[627,629] 1.03us 55kb                                                                                     |L0.?|"
+    - "L0.?[592,626] 1.04us 622kb|--------------------------------------L0.?--------------------------------------|        "
+    - "L0.?[627,629] 1.04us 55kb                                                                                     |L0.?|"
     - "**** Simulation run 154, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.291[295,356] 1.04us   |-----------------------------------------L0.291-----------------------------------------|"
@@ -3910,7 +3910,7 @@ async fn random_backfill_over_l2s() {
     - "L0.?[904,986] 1.03us 918kb              |----------------------------------L0.?-----------------------------------| "
     - "**** Simulation run 178, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 455kb total:"
     - "L0, all files 455kb                                                                                                "
-    - "L0.388[592,626] 1.03us   |-----------------------------------------L0.388-----------------------------------------|"
+    - "L0.404[592,626] 1.03us   |-----------------------------------------L0.404-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 455kb total:"
     - "L0                                                                                                                 "
     - "L0.?[592,619] 1.03us 362kb|--------------------------------L0.?---------------------------------|                   "
@@ -3924,7 +3924,7 @@ async fn random_backfill_over_l2s() {
     - "L0.?[904,950] 1.03us 636kb                       |------------------------------L0.?------------------------------| "
     - "**** Simulation run 180, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 622kb total:"
     - "L0, all files 622kb                                                                                                "
-    - "L0.408[592,626] 1.03us   |-----------------------------------------L0.408-----------------------------------------|"
+    - "L0.388[592,626] 1.03us   |-----------------------------------------L0.388-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 622kb total:"
     - "L0                                                                                                                 "
     - "L0.?[592,619] 1.03us 494kb|--------------------------------L0.?---------------------------------|                   "
@@ -3973,7 +3973,7 @@ async fn random_backfill_over_l2s() {
     - "L0.?[904,950] 1.04us 636kb                       |------------------------------L0.?------------------------------| "
     - "**** Simulation run 187, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 622kb total:"
     - "L0, all files 622kb                                                                                                "
-    - "L0.404[592,626] 1.04us   |-----------------------------------------L0.404-----------------------------------------|"
+    - "L0.408[592,626] 1.04us   |-----------------------------------------L0.408-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 622kb total:"
     - "L0                                                                                                                 "
     - "L0.?[592,619] 1.04us 494kb|--------------------------------L0.?---------------------------------|                   "
@@ -4253,7 +4253,7 @@ async fn random_backfill_over_l2s() {
     - "L0.323[358,591] 1.03us 3mb                              |-------L0.323-------|                                      "
     - "L0.459[592,619] 1.03us 362kb                                                    |L0.459|                              "
     - "L0.460[620,626] 1.03us 94kb                                                       |L0.460|                           "
-    - "L0.389[627,670] 1.03us 589kb                                                       |L0.389|                           "
+    - "L0.405[627,670] 1.03us 589kb                                                       |L0.405|                           "
     - "L0.218[671,672] 1.03us 13kb                                                           |L0.218|                       "
     - "L0.325[673,887] 1.03us 3mb                                                            |------L0.325------|          "
     - "L0.461[888,903] 1.03us 203kb                                                                                |L0.461|  "
@@ -4266,7 +4266,7 @@ async fn random_backfill_over_l2s() {
     - "L0.327[358,591] 1.03us 4mb                              |-------L0.327-------|                                      "
     - "L0.463[592,619] 1.03us 494kb                                                    |L0.463|                              "
     - "L0.464[620,626] 1.03us 128kb                                                       |L0.464|                           "
-    - "L0.409[627,629] 1.03us 55kb                                                       |L0.409|                           "
+    - "L0.389[627,629] 1.03us 55kb                                                       |L0.389|                           "
     - "L0.521[76,275] 1.04us 2mb   |-----L0.521-----|                                                                     "
     - "L0.522[276,294] 1.04us 227kb                      |L0.522|                                                            "
     - "L0.394[295,334] 1.04us 474kb                        |L0.394|                                                          "
@@ -4314,7 +4314,7 @@ async fn random_backfill_over_l2s() {
     - "L0.341[358,591] 1.04us 4mb                              |-------L0.341-------|                                      "
     - "L0.477[592,619] 1.04us 494kb                                                    |L0.477|                              "
     - "L0.478[620,626] 1.04us 128kb                                                       |L0.478|                           "
-    - "L0.405[627,629] 1.04us 55kb                                                       |L0.405|                           "
+    - "L0.409[627,629] 1.04us 55kb                                                       |L0.409|                           "
     - "L0.529[76,275] 1.04us 2mb   |-----L0.529-----|                                                                     "
     - "L0.530[276,294] 1.04us 227kb                      |L0.530|                                                            "
     - "L0.414[295,334] 1.04us 474kb                        |L0.414|                                                          "
diff --git a/compactor/tests/layouts/knobs.rs b/compactor/tests/layouts/knobs.rs
index e6a0531f0c..2326390b7e 100644
--- a/compactor/tests/layouts/knobs.rs
+++ b/compactor/tests/layouts/knobs.rs
@@ -865,11 +865,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[171444,200000] 6ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
+    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.80[171443,200000] 10ns|-----------------------------------------L0.80------------------------------------------|"
@@ -879,18 +879,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[171444,200000] 10ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
+    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
+    - "L0                                                                                                                 "
+    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
+    - "L0, all files 1mb                                                                                                  "
     - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171443,171443] 8ns 0b|L0.?|                                                                                    "
     - "L0.?[171444,200000] 8ns 1mb|-----------------------------------------L0.?------------------------------------------| "
-    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
-    - "L0, all files 1mb                                                                                                  "
-    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
-    - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "Committing partition 1:"
     - "  Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
     - "  Creating 55 files"
@@ -1227,7 +1227,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[156351,160867] 7ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.172[171444,200000] 7ns|-----------------------------------------L0.172-----------------------------------------|"
+    - "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 7ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -1241,7 +1241,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[156351,160867] 8ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.176[171444,200000] 8ns|-----------------------------------------L0.176-----------------------------------------|"
+    - "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 8ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -1255,7 +1255,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.?[156351,160867] 9ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 101, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.178[171444,200000] 9ns|-----------------------------------------L0.178-----------------------------------------|"
+    - "L0.172[171444,200000] 9ns|-----------------------------------------L0.172-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 9ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -1743,7 +1743,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 20mb total:"
     - "L0                                                                                                                 "
     - "L0.190[160868,171442] 7ns 488kb                              |----L0.190----|                                            "
-    - "L0.171[171443,171443] 7ns 0b                                              |L0.171|                                    "
+    - "L0.175[171443,171443] 7ns 0b                                              |L0.175|                                    "
     - "L0.309[171444,185000] 7ns 625kb                                              |------L0.309------|                        "
     - "L0.310[185001,198370] 7ns 617kb                                                                   |------L0.310------|   "
     - "L0.264[198371,200000] 7ns 75kb                                                                                       |L0.264|"
@@ -1756,7 +1756,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------|                                            "
     - "L1.?[170978,200000] 7ns 10mb                                              |------------------L1.?-------------------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 8 files: L0.171, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
+    - "  Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
     - "  Creating 2 files"
     - "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 488kb total:"
     - "L0, all files 488kb                                                                                                "
@@ -1924,7 +1924,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0.266[156351,160867] 8ns 208kb                       |L0.266|                                                           "
     - "L0.387[160868,170977] 8ns 466kb                              |---L0.387----|                                             "
     - "L0.388[170978,171442] 8ns 21kb                                              |L0.388|                                    "
-    - "L0.175[171443,171443] 8ns 0b                                              |L0.175|                                    "
+    - "L0.177[171443,171443] 8ns 0b                                              |L0.177|                                    "
     - "L0.313[171444,185000] 8ns 625kb                                              |------L0.313------|                        "
     - "L0.314[185001,198370] 8ns 617kb                                                                   |------L0.314------|   "
     - "L0.268[198371,200000] 8ns 75kb                                                                                       |L0.268|"
@@ -1937,7 +1937,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L1.?[167315,194064] 8ns 10mb                                        |-----------------L1.?-----------------|          "
     - "L1.?[194065,200000] 8ns 2mb                                                                                 |-L1.?-| "
     - "Committing partition 1:"
-    - "  Soft Deleting 13 files: L0.159, L0.175, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
+    - "  Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
     - "  Creating 3 files"
     - "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466kb total:"
     - "L0, all files 466kb                                                                                                "
@@ -2118,7 +2118,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L0                                                                                                                 "
     - "L0.423[167315,170977] 9ns 169kb|-L0.423-|                                                                                "
     - "L0.390[170978,171442] 9ns 21kb          |L0.390|                                                                        "
-    - "L0.177[171443,171443] 9ns 0b           |L0.177|                                                                       "
+    - "L0.171[171443,171443] 9ns 0b           |L0.171|                                                                       "
     - "L0.317[171444,185000] 9ns 625kb           |--------------L0.317---------------|                                          "
     - "L0.424[185001,194064] 9ns 418kb                                                |--------L0.424--------|                  "
     - "L0.425[194065,198370] 9ns 199kb                                                                         |-L0.425--|      "
@@ -2131,7 +2131,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
     - "L1.?[167315,191189] 9ns 10mb|-----------------------------L1.?------------------------------|                         "
     - "L1.?[191190,200000] 9ns 4mb                                                                 |---------L1.?---------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 9 files: L0.177, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
+    - "  Soft Deleting 9 files: L0.171, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
     - "  Creating 2 files"
     - "**** Simulation run 189, type=split(ReduceOverlap)(split_times=[191189]). 1 Input Files, 418kb total:"
     - "L0, all files 418kb                                                                                                "
@@ -2819,11 +2819,11 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[171444,200000] 6ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
+    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
     - "L0.80[171443,200000] 10ns|-----------------------------------------L0.80------------------------------------------|"
@@ -2833,18 +2833,18 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[171444,200000] 10ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
+    - "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
+    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
+    - "L0                                                                                                                 "
+    - "L0.?[171443,171443] 7ns 0b|L0.?|                                                                                    "
+    - "L0.?[171444,200000] 7ns 1mb|-----------------------------------------L0.?------------------------------------------| "
+    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
+    - "L0, all files 1mb                                                                                                  "
     - "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171443,171443] 8ns 0b|L0.?|                                                                                    "
     - "L0.?[171444,200000] 8ns 1mb|-----------------------------------------L0.?------------------------------------------| "
-    - "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1mb total:"
-    - "L0, all files 1mb                                                                                                  "
-    - "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
-    - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
-    - "L0                                                                                                                 "
-    - "L0.?[171443,171443] 9ns 0b|L0.?|                                                                                    "
-    - "L0.?[171444,200000] 9ns 1mb|-----------------------------------------L0.?------------------------------------------| "
     - "Committing partition 1:"
     - "  Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
     - "  Creating 55 files"
@@ -3181,7 +3181,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[156351,160867] 7ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.172[171444,200000] 7ns|-----------------------------------------L0.172-----------------------------------------|"
+    - "L0.176[171444,200000] 7ns|-----------------------------------------L0.176-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 7ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -3195,7 +3195,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[156351,160867] 8ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.176[171444,200000] 8ns|-----------------------------------------L0.176-----------------------------------------|"
+    - "L0.178[171444,200000] 8ns|-----------------------------------------L0.178-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 8ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -3209,7 +3209,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.?[156351,160867] 9ns 208kb                                                                   |--------L0.?--------| "
     - "**** Simulation run 101, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1mb total:"
     - "L0, all files 1mb                                                                                                  "
-    - "L0.178[171444,200000] 9ns|-----------------------------------------L0.178-----------------------------------------|"
+    - "L0.172[171444,200000] 9ns|-----------------------------------------L0.172-----------------------------------------|"
     - "**** 2 Output Files (parquet_file_id not yet assigned), 1mb total:"
     - "L0                                                                                                                 "
     - "L0.?[171444,198370] 9ns 1mb|---------------------------------------L0.?---------------------------------------|      "
@@ -3697,7 +3697,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 20mb total:"
     - "L0                                                                                                                 "
     - "L0.190[160868,171442] 7ns 488kb                              |----L0.190----|                                            "
-    - "L0.171[171443,171443] 7ns 0b                                              |L0.171|                                    "
+    - "L0.175[171443,171443] 7ns 0b                                              |L0.175|                                    "
     - "L0.309[171444,185000] 7ns 625kb                                              |------L0.309------|                        "
     - "L0.310[185001,198370] 7ns 617kb                                                                   |------L0.310------|   "
     - "L0.264[198371,200000] 7ns 75kb                                                                                       |L0.264|"
@@ -3710,7 +3710,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------|                                            "
     - "L1.?[170978,200000] 7ns 10mb                                              |------------------L1.?-------------------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 8 files: L0.171, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
+    - "  Soft Deleting 8 files: L0.175, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
     - "  Creating 2 files"
     - "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 488kb total:"
     - "L0, all files 488kb                                                                                                "
@@ -3878,7 +3878,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0.266[156351,160867] 8ns 208kb                       |L0.266|                                                           "
     - "L0.387[160868,170977] 8ns 466kb                              |---L0.387----|                                             "
     - "L0.388[170978,171442] 8ns 21kb                                              |L0.388|                                    "
-    - "L0.175[171443,171443] 8ns 0b                                              |L0.175|                                    "
+    - "L0.177[171443,171443] 8ns 0b                                              |L0.177|                                    "
     - "L0.313[171444,185000] 8ns 625kb                                              |------L0.313------|                        "
     - "L0.314[185001,198370] 8ns 617kb                                                                   |------L0.314------|   "
     - "L0.268[198371,200000] 8ns 75kb                                                                                       |L0.268|"
@@ -3891,7 +3891,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L1.?[167315,194064] 8ns 10mb                                        |-----------------L1.?-----------------|          "
     - "L1.?[194065,200000] 8ns 2mb                                                                                 |-L1.?-| "
     - "Committing partition 1:"
-    - "  Soft Deleting 13 files: L0.159, L0.175, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
+    - "  Soft Deleting 13 files: L0.159, L0.177, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
     - "  Creating 3 files"
     - "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466kb total:"
     - "L0, all files 466kb                                                                                                "
@@ -4072,7 +4072,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L0                                                                                                                 "
     - "L0.423[167315,170977] 9ns 169kb|-L0.423-|                                                                                "
     - "L0.390[170978,171442] 9ns 21kb          |L0.390|                                                                        "
-    - "L0.177[171443,171443] 9ns 0b           |L0.177|                                                                       "
+    - "L0.171[171443,171443] 9ns 0b           |L0.171|                                                                       "
     - "L0.317[171444,185000] 9ns 625kb           |--------------L0.317---------------|                                          "
     - "L0.424[185001,194064] 9ns 418kb                                                |--------L0.424--------|                  "
     - "L0.425[194065,198370] 9ns 199kb                                                                         |-L0.425--|      "
@@ -4085,7 +4085,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
     - "L1.?[167315,191189] 9ns 10mb|-----------------------------L1.?------------------------------|                         "
     - "L1.?[191190,200000] 9ns 4mb                                                                 |---------L1.?---------| "
     - "Committing partition 1:"
-    - "  Soft Deleting 9 files: L0.177, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
+    - "  Soft Deleting 9 files: L0.171, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
     - "  Creating 2 files"
     - "**** Simulation run 189, type=split(ReduceOverlap)(split_times=[191189]). 1 Input Files, 418kb total:"
     - "L0, all files 418kb                                                                                                "
diff --git a/compactor_test_utils/src/lib.rs b/compactor_test_utils/src/lib.rs
index bd85aaf333..e03cadc009 100644
--- a/compactor_test_utils/src/lib.rs
+++ b/compactor_test_utils/src/lib.rs
@@ -37,7 +37,7 @@ use compactor::{
     config::{CompactionType, Config, PartitionsSourceConfig},
     hardcoded_components, Components, PanicDataFusionPlanner, PartitionInfo,
 };
-use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId, TableInfo};
+use data_types::{ColumnType, CompactionLevel, ParquetFile, TableId};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion_util::config::register_iox_object_store;
 use futures::TryStreamExt;
@@ -575,7 +575,7 @@ impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
             namespace_id: self.ns.namespace.id,
             namespace_name: self.ns.namespace.name.clone(),
             table: Arc::new(self.table.table.clone()),
-            table_info: Arc::new(TableInfo::from(&self.table.table)),
+            table_schema: Arc::new(self.table.catalog_schema().await),
             sort_key: self.partition.partition.sort_key(),
             partition_key: self.partition.partition.partition_key.clone(),
         });
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 1b1f390f64..edc48fa9be 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -392,88 +392,21 @@ pub struct Table {
 /// table's columns.
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct TableInfo {
-    table_schema: TableSchema,
+    /// This table's schema
+    pub schema: TableSchema,
     /// This table's partition template
     pub partition_template: Option<Arc<PartitionTemplate>>,
 }
 
 impl TableInfo {
-    /// Create new table info with the given table schema and no partition template specified.
-    pub fn new(table_schema: TableSchema) -> Self {
-        Self {
-            table_schema,
-            partition_template: None,
-        }
-    }
-
     /// This table's ID
     pub fn id(&self) -> TableId {
-        self.table_schema.id
-    }
-
-    /// This table's schema
-    pub fn schema(&self) -> &TableSchema {
-        &self.table_schema
-    }
-
-    /// This table's columns
-    pub fn columns(&self) -> &BTreeMap<String, ColumnSchema> {
-        &self.table_schema.columns
-    }
-
-    /// Mutable access to his table's columns
-    pub fn columns_mut(&mut self) -> &mut BTreeMap<String, ColumnSchema> {
-        &mut self.table_schema.columns
-    }
-
-    /// Add `col` to this table schema.
-    ///
-    /// # Panics
-    ///
-    /// This method panics if a column of the same name already exists in
-    /// `self`.
-    pub fn add_column(&mut self, col: &Column) {
-        let old = self
-            .table_schema
-            .columns
-            .insert(col.name.clone(), ColumnSchema::from(col));
-        assert!(old.is_none());
+        self.schema.id
     }
 
     /// Estimated Size in bytes including `self`.
     pub fn size(&self) -> usize {
-        size_of_val(self)
-            + size_of_val(&self.partition_template)
-            + self
-                .table_schema
-                .columns
-                .iter()
-                .map(|(k, v)| size_of_val(k) + k.capacity() + size_of_val(v))
-                .sum::<usize>()
-    }
-
-    /// Create `ID->name` map for columns.
-    pub fn column_id_map(&self) -> HashMap<ColumnId, &str> {
-        self.table_schema
-            .columns
-            .iter()
-            .map(|(name, c)| (c.id, name.as_str()))
-            .collect()
-    }
-
-    /// Return the set of column names for this table. Used in combination with a write operation's
-    /// column names to determine whether a write would exceed the max allowed columns.
-    pub fn column_names(&self) -> BTreeSet<&str> {
-        self.table_schema
-            .columns
-            .keys()
-            .map(|name| name.as_str())
-            .collect()
-    }
-
-    /// Return number of columns of the table
-    pub fn column_count(&self) -> usize {
-        self.table_schema.columns.len()
+        size_of_val(self) + size_of_val(&self.partition_template) + self.schema.size()
     }
 }
 
@@ -482,7 +415,7 @@ impl From<&Table> for TableInfo {
         let &Table { id, .. } = table;
 
         Self {
-            table_schema: TableSchema::new(id),
+            schema: TableSchema::new(id),
 
             // TODO: Store and retrieve PartitionTemplate from the database
             partition_template: None,
@@ -3044,7 +2977,7 @@ mod tests {
             tables: BTreeMap::from([(
                 String::from("foo"),
                 TableInfo {
-                    table_schema: TableSchema {
+                    schema: TableSchema {
                         id: TableId::new(1),
                         columns: BTreeMap::new(),
                     },
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index aab07b00e9..a78f6ffd05 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -131,7 +131,7 @@ where
                     .columns()
                     .create_or_get("time", table.id(), ColumnType::Time)
                     .await?;
-                table.add_column(&time_col);
+                table.schema.add_column(&time_col);
                 table
             }
         };
@@ -140,7 +140,7 @@ where
         // fields and tags are both columns; tag is a special type of column.
         // check that the schema has all these columns or update accordingly.
         for tag in measurement.tags.values() {
-            match table.columns().get(tag.name.as_str()) {
+            match table.schema.columns.get(tag.name.as_str()) {
                 Some(c) if c.is_tag() => {
                     // nothing to do, all good
                 }
@@ -178,7 +178,7 @@ where
                         field.name, field_type, e,
                     ))
                 })?);
-            match table.columns().get(field.name.as_str()) {
+            match table.schema.columns.get(field.name.as_str()) {
                 Some(c) if c.matches_type(influx_column_type) => {
                     // nothing to do, all good
                 }
@@ -384,10 +384,10 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("cpu").expect("got table");
-        assert_eq!(table.columns().len(), 3); // one tag & one field, plus time
-        let tag = table.columns().get("host").expect("got tag");
+        assert_eq!(table.schema.columns.len(), 3); // one tag & one field, plus time
+        let tag = table.schema.columns.get("host").expect("got tag");
         assert!(tag.is_tag());
-        let field = table.columns().get("usage").expect("got field");
+        let field = table.schema.columns.get("usage").expect("got field");
         assert_eq!(
             field.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
@@ -442,7 +442,7 @@ mod tests {
             .create_or_get("time", table.id(), ColumnType::Time)
             .await
             .expect("column created");
-        table.add_column(&time_col);
+        table.schema.add_column(&time_col);
         let location_col = txn
             .columns()
             .create_or_get("city", table.id(), ColumnType::Tag)
@@ -453,8 +453,8 @@ mod tests {
             .create_or_get("temperature", table.id(), ColumnType::F64)
             .await
             .expect("column created");
-        table.add_column(&location_col);
-        table.add_column(&temperature_col);
+        table.schema.add_column(&location_col);
+        table.schema.add_column(&temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some overlap
@@ -491,17 +491,17 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("weather").expect("got table");
-        assert_eq!(table.columns().len(), 5); // two tags, two fields, plus time
-        let tag1 = table.columns().get("city").expect("got tag");
+        assert_eq!(table.schema.columns.len(), 5); // two tags, two fields, plus time
+        let tag1 = table.schema.columns.get("city").expect("got tag");
         assert!(tag1.is_tag());
-        let tag2 = table.columns().get("country").expect("got tag");
+        let tag2 = table.schema.columns.get("country").expect("got tag");
         assert!(tag2.is_tag());
-        let field1 = table.columns().get("temperature").expect("got field");
+        let field1 = table.schema.columns.get("temperature").expect("got field");
         assert_eq!(
             field1.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
         );
-        let field2 = table.columns().get("humidity").expect("got field");
+        let field2 = table.schema.columns.get("humidity").expect("got field");
         assert_eq!(
             field2.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
@@ -534,13 +534,13 @@ mod tests {
             .create_or_get("time", table.id(), ColumnType::Time)
             .await
             .expect("column created");
-        table.add_column(&time_col);
+        table.schema.add_column(&time_col);
         let temperature_col = txn
             .columns()
             .create_or_get("temperature", table.id(), ColumnType::F64)
             .await
             .expect("column created");
-        table.add_column(&temperature_col);
+        table.schema.add_column(&temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some issue that will trip a catalog error
@@ -599,13 +599,13 @@ mod tests {
             .create_or_get("time", table.id(), ColumnType::Time)
             .await
             .expect("column created");
-        table.add_column(&time_col);
+        table.schema.add_column(&time_col);
         let temperature_col = txn
             .columns()
             .create_or_get("temperature", table.id(), ColumnType::F64)
             .await
             .expect("column created");
-        table.add_column(&temperature_col);
+        table.schema.add_column(&temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some issue that will trip a catalog error
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index c991eeef85..e44eac3971 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -598,7 +598,7 @@ where
 
     for c in columns {
         let (_, t) = table_id_to_info.get_mut(&c.table_id).unwrap();
-        t.columns_mut().insert(
+        t.schema.columns.insert(
             c.name,
             ColumnSchema {
                 id: c.id,
@@ -717,7 +717,7 @@ pub async fn list_schemas(
             .entry(table.name.clone())
             .or_insert_with(|| TableInfo::from(table));
 
-        table_info.add_column(&column);
+        table_info.schema.add_column(&column);
     }
 
     // The table map is no longer needed - immediately reclaim the memory.
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 7e21901cdb..147338d73a 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -126,7 +126,7 @@ where
                 .create_or_get(TIME_COLUMN, table.id(), ColumnType::Time)
                 .await?;
 
-            table.add_column(&time_col);
+            table.schema.add_column(&time_col);
 
             assert!(schema
                 .to_mut()
@@ -152,7 +152,7 @@ where
         // If it does, validate it. If it does not exist, create it and insert
         // it into the cached schema.
 
-        match table.columns().get(name.as_str()) {
+        match table.schema.columns.get(name.as_str()) {
             Some(existing) if existing.matches_type(col.influx_type()) => {
                 // No action is needed as the column matches the existing column
                 // schema.
@@ -185,7 +185,7 @@ where
             .create_or_get_many_unchecked(table.id(), column_batch)
             .await?
             .into_iter()
-            .for_each(|c| table.to_mut().add_column(&c));
+            .for_each(|c| table.to_mut().schema.add_column(&c));
     }
 
     if let Cow::Owned(table) = table {
@@ -288,9 +288,10 @@ mod tests {
                     let actual_tables: BTreeMap<String, BTreeMap<String, ColumnType>> = schema
                         .tables
                         .iter()
-                        .map(|(table, table_schema)| {
-                            let desired_cols = table_schema
-                                .columns()
+                        .map(|(table, table_info)| {
+                            let desired_cols = table_info
+                                .schema
+                                .columns
                                 .iter()
                                 .map(|(column, column_schema)| (column.clone(), column_schema.column_type))
                                 .collect::<BTreeMap<_, _>>();
diff --git a/querier/src/cache/namespace.rs b/querier/src/cache/namespace.rs
index 777d6d5e4d..6000a77c42 100644
--- a/querier/src/cache/namespace.rs
+++ b/querier/src/cache/namespace.rs
@@ -241,7 +241,8 @@ impl CachedTable {
 impl From<TableInfo> for CachedTable {
     fn from(table: TableInfo) -> Self {
         let mut column_id_map: HashMap<ColumnId, Arc<str>> = table
-            .columns()
+            .schema
+            .columns
             .iter()
             .map(|(name, c)| (c.id, Arc::from(name.clone())))
             .collect();
@@ -249,7 +250,7 @@ impl From<TableInfo> for CachedTable {
 
         let id = table.id();
         let schema: Schema = table
-            .schema()
+            .schema
             .try_into()
             .expect("Catalog table schema broken");
 
diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs
index d49d79f37b..71ce14eb09 100644
--- a/querier/src/table/test_util.rs
+++ b/querier/src/table/test_util.rs
@@ -33,7 +33,7 @@ pub async fn querier_table(catalog: &Arc<TestCatalog>, table: &Arc<TestTable>) -
     .await
     .unwrap();
     let table_info = catalog_schema.tables.remove(&table.table.name).unwrap();
-    let schema = Schema::try_from(table_info.schema()).unwrap();
+    let schema = Schema::try_from(table_info.schema).unwrap();
 
     let namespace_name = Arc::from(table.namespace.namespace.name.as_str());
 
diff --git a/router/src/dml_handlers/schema_validation.rs b/router/src/dml_handlers/schema_validation.rs
index 5cba7ed35b..22a51824b8 100644
--- a/router/src/dml_handlers/schema_validation.rs
+++ b/router/src/dml_handlers/schema_validation.rs
@@ -372,7 +372,7 @@ fn validate_schema_limits(
     for (table_name, batch) in batches {
         // Get the column set for this table from the schema.
         let mut existing_columns = match schema.tables.get(table_name) {
-            Some(v) => v.column_names(),
+            Some(v) => v.schema.column_names(),
             None if batch.columns().len() > schema.max_columns_per_table => {
                 // The table does not exist, therefore all the columns in this
                 // write must be created - there's no need to perform a set
@@ -774,7 +774,8 @@ mod tests {
         let table = ns.tables.get(table).expect("table should exist in cache");
         assert_eq!(
             table
-                .columns()
+                .schema
+                .columns
                 .get(col)
                 .expect("column not cached")
                 .column_type,
diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index 1e549867fa..14ac255fdd 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -59,7 +59,7 @@ impl NamespaceCache for Arc<MemoryNamespaceCache> {
                     new_columns: schema
                         .tables
                         .values()
-                        .map(|v| v.columns().len())
+                        .map(|v| v.schema.columns.len())
                         .sum::<usize>(),
                     new_tables: schema.tables.len(),
                     did_update: false,
@@ -100,13 +100,14 @@ fn merge_schema_additive(
     // to 0 as the schemas become fully populated, leaving the common path free
     // of overhead.
     for (old_table_name, old_table) in &old_ns.tables {
-        old_column_count += old_table.columns().len();
+        old_column_count += old_table.schema.columns.len();
         match new_ns.tables.get_mut(old_table_name) {
             Some(new_table) => {
-                for (column_name, column) in old_table.columns() {
-                    if !new_table.columns().contains_key(column_name) {
+                for (column_name, column) in &old_table.schema.columns {
+                    if !new_table.schema.columns.contains_key(column_name) {
                         new_table
-                            .columns_mut()
+                            .schema
+                            .columns
                             .insert(column_name.to_owned(), *column);
                     }
                 }
@@ -127,7 +128,7 @@ fn merge_schema_additive(
         new_columns: new_ns
             .tables
             .values()
-            .map(|v| v.columns().len())
+            .map(|v| v.schema.columns.len())
             .sum::<usize>()
             - old_column_count,
         did_update: true,
@@ -226,8 +227,14 @@ mod tests {
         // These MUST always be different
         assert_ne!(first_write_table_schema, second_write_table_schema);
 
-        let first_write_table_info = TableInfo::new(first_write_table_schema);
-        let second_write_table_info = TableInfo::new(second_write_table_schema);
+        let first_write_table_info = TableInfo {
+            schema: first_write_table_schema,
+            partition_template: None,
+        };
+        let second_write_table_info = TableInfo {
+            schema: second_write_table_schema,
+            partition_template: None,
+        };
 
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
@@ -246,7 +253,10 @@ mod tests {
             let mut want_table_schema = TableSchema::new(table_id);
             want_table_schema.add_column(&column_1);
             want_table_schema.add_column(&column_2);
-            let want_table_schema = TableInfo::new(want_table_schema);
+            let want_table_schema = TableInfo {
+                schema: want_table_schema,
+                partition_template: None,
+            };
             NamespaceSchema {
                 tables: BTreeMap::from([(String::from(table_name), want_table_schema)]),
                 ..schema_update_1.clone()
@@ -303,7 +313,10 @@ mod tests {
             name: "column_a".to_string(),
             column_type: ColumnType::String,
         });
-        let table_1 = TableInfo::new(table_1);
+        let table_1 = TableInfo {
+            schema: table_1,
+            partition_template: None,
+        };
         let mut table_2 = TableSchema::new(TableId::new(2));
         table_2.add_column(&Column {
             id: ColumnId::new(2),
@@ -311,7 +324,10 @@ mod tests {
             name: "column_b".to_string(),
             column_type: ColumnType::String,
         });
-        let table_2 = TableInfo::new(table_2);
+        let table_2 = TableInfo {
+            schema: table_2,
+            partition_template: None,
+        };
         let mut table_3 = TableSchema::new(TableId::new(3));
         table_3.add_column(&Column {
             id: ColumnId::new(3),
@@ -319,7 +335,10 @@ mod tests {
             name: "column_c".to_string(),
             column_type: ColumnType::String,
         });
-        let table_3 = TableInfo::new(table_3);
+        let table_3 = TableInfo {
+            schema: table_3,
+            partition_template: None,
+        };
 
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
@@ -408,7 +427,10 @@ mod tests {
             ),
         ) -> TableInfo {
             let columns = columns.into_iter().map(|(k, v)| (k.to_string(), v)).collect();
-            TableInfo::new(TableSchema { id: TableId::new(id), columns })
+            TableInfo {
+                schema: TableSchema { id: TableId::new(id), columns },
+                partition_template: None,
+            }
         }
     }
 
@@ -443,7 +465,8 @@ mod tests {
             .flat_map(|(table_name, col_set)| {
                 // Build a set of tuples in the form (table_name, column_name)
                 col_set
-                    .columns()
+                    .schema
+                    .columns
                     .keys()
                     .map(|col_name| (table_name.to_string(), col_name.to_string()))
             })
diff --git a/router/src/namespace_cache/metrics.rs b/router/src/namespace_cache/metrics.rs
index 618795b126..b906f68985 100644
--- a/router/src/namespace_cache/metrics.rs
+++ b/router/src/namespace_cache/metrics.rs
@@ -158,10 +158,13 @@ mod tests {
 
                 (
                     i.to_string(),
-                    TableInfo::new(TableSchema {
-                        id: TableId::new(i as _),
-                        columns,
-                    }),
+                    TableInfo {
+                        schema: TableSchema {
+                            id: TableId::new(i as _),
+                            columns,
+                        },
+                        partition_template: None,
+                    },
                 )
             })
             .collect();
diff --git a/service_grpc_schema/src/lib.rs b/service_grpc_schema/src/lib.rs
index 64b2776ca8..bcd0284aa7 100644
--- a/service_grpc_schema/src/lib.rs
+++ b/service_grpc_schema/src/lib.rs
@@ -57,7 +57,8 @@ fn schema_to_proto(schema: Arc<data_types::NamespaceSchema>) -> GetSchemaRespons
                         TableSchema {
                             id: t.id().get(),
                             columns: t
-                                .columns()
+                                .schema
+                                .columns
                                 .iter()
                                 .map(|(name, c)| {
                                     (

From b5af971190e6f92783ac268e402a8599ab519762 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Wed, 3 May 2023 11:18:17 -0400
Subject: [PATCH 065/119] fix: Make NamespaceSchema::new_empty_from rather than
 From::from

To avoid implying that this constructor caches the tables.
---
 data_types/src/lib.rs        | 6 ++++--
 iox_catalog/src/interface.rs | 6 +++---
 iox_catalog/src/lib.rs       | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index edc48fa9be..9c7a7001c3 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -342,8 +342,10 @@ pub struct NamespaceSchema {
     pub partition_template: Option<Arc<PartitionTemplate>>,
 }
 
-impl From<&Namespace> for NamespaceSchema {
-    fn from(namespace: &Namespace) -> Self {
+impl NamespaceSchema {
+    /// Start a new `NamespaceSchema` with empty `tables` but the rest of the information populated
+    /// from the given `Namespace`.
+    pub fn new_empty_from(namespace: &Namespace) -> Self {
         let &Namespace {
             id,
             retention_period_ns,
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index e44eac3971..a6ce6cfc1d 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -588,7 +588,7 @@ where
     let columns = repos.columns().list_by_namespace_id(namespace.id).await?;
     let tables = repos.tables().list_by_namespace_id(namespace.id).await?;
 
-    let mut namespace = NamespaceSchema::from(&namespace);
+    let mut namespace = NamespaceSchema::new_empty_from(&namespace);
 
     let mut table_id_to_info = BTreeMap::new();
     for t in tables {
@@ -735,7 +735,7 @@ pub async fn list_schemas(
             // The catalog call explicitly asked for no soft deleted records.
             assert!(v.deleted_at.is_none());
 
-            let mut ns = NamespaceSchema::from(&v);
+            let mut ns = NamespaceSchema::new_empty_from(&v);
 
             ns.tables = joined.remove(&v.id)?;
             Some((v, ns))
@@ -3041,7 +3041,7 @@ pub(crate) mod test_helpers {
 
         let batches = mutable_batch_lp::lines_to_batches(lines, 42).unwrap();
         let batches = batches.iter().map(|(table, batch)| (table.as_str(), batch));
-        let ns = NamespaceSchema::from(&namespace);
+        let ns = NamespaceSchema::new_empty_from(&namespace);
 
         let schema = validate_or_insert_schema(batches, &ns, repos)
             .await
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 147338d73a..2e73267edf 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -242,7 +242,7 @@ mod tests {
                         .await
                         .unwrap();
 
-                    let schema = NamespaceSchema::from(&namespace);
+                    let schema = NamespaceSchema::new_empty_from(&namespace);
 
                     // Apply all the lp literals as individual writes, feeding
                     // the result of one validation into the next to drive

From 596673d51594125b6f346fa1f48dd38b313ed040 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Wed, 3 May 2023 13:42:57 -0400
Subject: [PATCH 066/119] refactor: Create a new ColumnsByName type to abstract
 over TableSchema columns

And allow usage of just the columns when that's all that's needed
without leaking the BTreeMap implementation detail everywhere
---
 .../src/components/df_planner/query_chunk.rs  |   1 +
 .../src/components/namespaces_source/mock.rs  |  73 ++++----
 compactor/src/test_utils.rs                   |  22 +--
 data_types/src/lib.rs                         | 158 +++++++++++++-----
 .../aggregate_tsm_schema/update_catalog.rs    |   4 +-
 ingester/src/persist/worker.rs                |  12 +-
 iox_catalog/src/interface.rs                  |  29 +---
 iox_tests/src/catalog.rs                      |  38 +++--
 querier/src/cache/namespace.rs                |   5 +-
 querier/src/table/test_util.rs                |   2 +-
 router/src/namespace_cache/memory.rs          |  23 ++-
 router/src/namespace_cache/metrics.rs         |  21 +--
 12 files changed, 223 insertions(+), 165 deletions(-)

diff --git a/compactor/src/components/df_planner/query_chunk.rs b/compactor/src/components/df_planner/query_chunk.rs
index dadeda0b5b..f944295928 100644
--- a/compactor/src/components/df_planner/query_chunk.rs
+++ b/compactor/src/components/df_planner/query_chunk.rs
@@ -184,6 +184,7 @@ fn to_queryable_parquet_chunk(
     let table_schema: Schema = partition_info
         .table_schema
         .as_ref()
+        .columns
         .clone()
         .try_into()
         .expect("table schema is broken");
diff --git a/compactor/src/components/namespaces_source/mock.rs b/compactor/src/components/namespaces_source/mock.rs
index 5d672a187d..201e407e6f 100644
--- a/compactor/src/components/namespaces_source/mock.rs
+++ b/compactor/src/components/namespaces_source/mock.rs
@@ -49,7 +49,9 @@ impl NamespacesSource for MockNamespacesSource {
 mod tests {
     use std::collections::BTreeMap;
 
-    use data_types::{ColumnId, ColumnSchema, ColumnType, TableId, TableInfo, TableSchema};
+    use data_types::{
+        Column, ColumnId, ColumnType, ColumnsByName, TableId, TableInfo, TableSchema,
+    };
 
     use super::*;
 
@@ -131,21 +133,19 @@ mod tests {
                     TableInfo {
                         schema: TableSchema {
                             id: TableId::new(1),
-                            columns: BTreeMap::from([
-                                (
-                                    "col1".to_string(),
-                                    ColumnSchema {
-                                        id: ColumnId::new(1),
-                                        column_type: ColumnType::I64,
-                                    },
-                                ),
-                                (
-                                    "col2".to_string(),
-                                    ColumnSchema {
-                                        id: ColumnId::new(2),
-                                        column_type: ColumnType::String,
-                                    },
-                                ),
+                            columns: ColumnsByName::new(&[
+                                Column {
+                                    name: "col1".to_string(),
+                                    id: ColumnId::new(1),
+                                    column_type: ColumnType::I64,
+                                    table_id: TableId::new(1),
+                                },
+                                Column {
+                                    name: "col2".to_string(),
+                                    id: ColumnId::new(2),
+                                    column_type: ColumnType::String,
+                                    table_id: TableId::new(1),
+                                },
                             ]),
                         },
                         partition_template: None,
@@ -156,28 +156,25 @@ mod tests {
                     TableInfo {
                         schema: TableSchema {
                             id: TableId::new(2),
-                            columns: BTreeMap::from([
-                                (
-                                    "col1".to_string(),
-                                    ColumnSchema {
-                                        id: ColumnId::new(3),
-                                        column_type: ColumnType::I64,
-                                    },
-                                ),
-                                (
-                                    "col2".to_string(),
-                                    ColumnSchema {
-                                        id: ColumnId::new(4),
-                                        column_type: ColumnType::String,
-                                    },
-                                ),
-                                (
-                                    "col3".to_string(),
-                                    ColumnSchema {
-                                        id: ColumnId::new(5),
-                                        column_type: ColumnType::F64,
-                                    },
-                                ),
+                            columns: ColumnsByName::new(&[
+                                Column {
+                                    name: "col1".to_string(),
+                                    id: ColumnId::new(3),
+                                    column_type: ColumnType::I64,
+                                    table_id: TableId::new(2),
+                                },
+                                Column {
+                                    name: "col2".to_string(),
+                                    id: ColumnId::new(4),
+                                    column_type: ColumnType::String,
+                                    table_id: TableId::new(2),
+                                },
+                                Column {
+                                    name: "col3".to_string(),
+                                    id: ColumnId::new(5),
+                                    column_type: ColumnType::F64,
+                                    table_id: TableId::new(2),
+                                },
                             ]),
                         },
                         partition_template: None,
diff --git a/compactor/src/test_utils.rs b/compactor/src/test_utils.rs
index c61826210a..618734aed9 100644
--- a/compactor/src/test_utils.rs
+++ b/compactor/src/test_utils.rs
@@ -1,8 +1,8 @@
-use std::{collections::BTreeMap, sync::Arc};
+use std::sync::Arc;
 
 use data_types::{
-    ColumnId, ColumnSchema, ColumnType, NamespaceId, PartitionId, PartitionKey, Table, TableId,
-    TableSchema,
+    Column, ColumnId, ColumnType, ColumnsByName, NamespaceId, PartitionId, PartitionKey, Table,
+    TableId, TableSchema,
 };
 
 use crate::PartitionInfo;
@@ -29,7 +29,7 @@ impl PartitionInfoBuilder {
                 }),
                 table_schema: Arc::new(TableSchema {
                     id: table_id,
-                    columns: BTreeMap::new(),
+                    columns: ColumnsByName::new(&[]),
                 }),
                 sort_key: None,
                 partition_key: PartitionKey::from("key"),
@@ -43,18 +43,18 @@ impl PartitionInfoBuilder {
     }
 
     pub fn with_num_columns(mut self, num_cols: usize) -> Self {
-        let mut columns = BTreeMap::new();
-        for i in 0..num_cols {
-            let col = ColumnSchema {
+        let columns: Vec<_> = (0..num_cols)
+            .map(|i| Column {
                 id: ColumnId::new(i as i64),
+                name: i.to_string(),
                 column_type: ColumnType::I64,
-            };
-            columns.insert(i.to_string(), col);
-        }
+                table_id: self.inner.table.id,
+            })
+            .collect();
 
         let table_schema = Arc::new(TableSchema {
             id: self.inner.table.id,
-            columns,
+            columns: ColumnsByName::new(&columns),
         });
         self.inner.table_schema = table_schema;
 
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 9c7a7001c3..578cb4429f 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -425,13 +425,101 @@ impl From<&Table> for TableInfo {
     }
 }
 
+/// Column definitions for a table indexed by their name
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
+
+impl From<BTreeMap<&str, ColumnSchema>> for ColumnsByName {
+    fn from(value: BTreeMap<&str, ColumnSchema>) -> Self {
+        Self(
+            value
+                .into_iter()
+                .map(|(name, column)| (name.to_owned(), column))
+                .collect(),
+        )
+    }
+}
+
+impl ColumnsByName {
+    /// Create a new instance holding the given [`Column`]s.
+    pub fn new(columns: &[Column]) -> Self {
+        Self(
+            columns
+                .iter()
+                .map(|c| {
+                    (
+                        c.name.to_owned(),
+                        ColumnSchema {
+                            id: c.id,
+                            column_type: c.column_type,
+                        },
+                    )
+                })
+                .collect(),
+        )
+    }
+
+    /// Iterate over the names and columns.
+    pub fn iter(&self) -> impl Iterator<Item = (&String, &ColumnSchema)> {
+        self.0.iter()
+    }
+
+    /// Return the set of column names. Used in combination with a write operation's
+    /// column names to determine whether a write would exceed the max allowed columns.
+    pub fn names(&self) -> BTreeSet<&str> {
+        self.0.keys().map(|name| name.as_str()).collect()
+    }
+
+    /// Return the set of column IDs.
+    pub fn ids(&self) -> Vec<ColumnId> {
+        self.0.values().map(|c| c.id).collect()
+    }
+
+    /// Get a column by its name.
+    pub fn get(&self, name: &str) -> Option<&ColumnSchema> {
+        self.0.get(name)
+    }
+
+    /// Create `ID->name` map for columns.
+    pub fn id_map(&self) -> HashMap<ColumnId, &str> {
+        self.0
+            .iter()
+            .map(|(name, c)| (c.id, name.as_str()))
+            .collect()
+    }
+}
+
+// ColumnsByName is a newtype so that we can implement this `TryFrom` in this crate
+impl TryFrom<&ColumnsByName> for Schema {
+    type Error = schema::builder::Error;
+
+    fn try_from(value: &ColumnsByName) -> Result<Self, Self::Error> {
+        let mut builder = SchemaBuilder::new();
+
+        for (column_name, column_schema) in value.iter() {
+            let t = InfluxColumnType::from(column_schema.column_type);
+            builder.influx_column(column_name, t);
+        }
+
+        builder.build()
+    }
+}
+
+impl TryFrom<ColumnsByName> for Schema {
+    type Error = schema::builder::Error;
+
+    fn try_from(value: ColumnsByName) -> Result<Self, Self::Error> {
+        Self::try_from(&value)
+    }
+}
+
 /// Column definitions for a table
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct TableSchema {
     /// the table id
     pub id: TableId,
     /// the table's columns by their name
-    pub columns: BTreeMap<String, ColumnSchema>,
+    pub columns: ColumnsByName,
 }
 
 impl TableSchema {
@@ -439,7 +527,7 @@ impl TableSchema {
     pub fn new(id: TableId) -> Self {
         Self {
             id,
-            columns: BTreeMap::new(),
+            columns: ColumnsByName::new(&[]),
         }
     }
 
@@ -452,10 +540,22 @@ impl TableSchema {
     pub fn add_column(&mut self, col: &Column) {
         let old = self
             .columns
+            .0
             .insert(col.name.clone(), ColumnSchema::from(col));
         assert!(old.is_none());
     }
 
+    /// Add the name and column schema to this table's schema.
+    ///
+    /// # Panics
+    ///
+    /// This method panics if a column of the same name already exists in
+    /// `self`.
+    pub fn add_column_schema(&mut self, name: &str, column_schema: &ColumnSchema) {
+        let old = self.columns.0.insert(name.into(), column_schema.to_owned());
+        assert!(old.is_none());
+    }
+
     /// Estimated Size in bytes including `self`.
     pub fn size(&self) -> usize {
         size_of_val(self)
@@ -468,21 +568,23 @@ impl TableSchema {
 
     /// Create `ID->name` map for columns.
     pub fn column_id_map(&self) -> HashMap<ColumnId, &str> {
-        self.columns
-            .iter()
-            .map(|(name, c)| (c.id, name.as_str()))
-            .collect()
+        self.columns.id_map()
+    }
+
+    /// Whether a column with this name is in the schema.
+    pub fn contains_column_name(&self, name: &str) -> bool {
+        self.columns.0.contains_key(name)
     }
 
     /// Return the set of column names for this table. Used in combination with a write operation's
     /// column names to determine whether a write would exceed the max allowed columns.
     pub fn column_names(&self) -> BTreeSet<&str> {
-        self.columns.keys().map(|name| name.as_str()).collect()
+        self.columns.names()
     }
 
     /// Return number of columns of the table
     pub fn column_count(&self) -> usize {
-        self.columns.len()
+        self.columns.0.len()
     }
 }
 
@@ -645,29 +747,6 @@ impl From<ColumnType> for InfluxColumnType {
     }
 }
 
-impl TryFrom<&TableSchema> for Schema {
-    type Error = schema::builder::Error;
-
-    fn try_from(value: &TableSchema) -> Result<Self, Self::Error> {
-        let mut builder = SchemaBuilder::new();
-
-        for (column_name, column_schema) in &value.columns {
-            let t = InfluxColumnType::from(column_schema.column_type);
-            builder.influx_column(column_name, t);
-        }
-
-        builder.build()
-    }
-}
-
-impl TryFrom<TableSchema> for Schema {
-    type Error = schema::builder::Error;
-
-    fn try_from(value: TableSchema) -> Result<Self, Self::Error> {
-        Self::try_from(&value)
-    }
-}
-
 impl PartialEq<InfluxColumnType> for ColumnType {
     fn eq(&self, got: &InfluxColumnType) -> bool {
         match self {
@@ -2949,17 +3028,16 @@ mod tests {
     fn test_table_schema_size() {
         let schema1 = TableSchema {
             id: TableId::new(1),
-            columns: BTreeMap::from([]),
+            columns: ColumnsByName::new(&[]),
         };
         let schema2 = TableSchema {
             id: TableId::new(2),
-            columns: BTreeMap::from([(
-                String::from("foo"),
-                ColumnSchema {
-                    id: ColumnId::new(1),
-                    column_type: ColumnType::Bool,
-                },
-            )]),
+            columns: ColumnsByName::new(&[Column {
+                id: ColumnId::new(1),
+                table_id: TableId::new(2),
+                name: String::from("foo"),
+                column_type: ColumnType::Bool,
+            }]),
         };
         assert!(schema1.size() < schema2.size());
     }
@@ -2981,7 +3059,7 @@ mod tests {
                 TableInfo {
                     schema: TableSchema {
                         id: TableId::new(1),
-                        columns: BTreeMap::new(),
+                        columns: ColumnsByName::new(&[]),
                     },
                     partition_template: None,
                 },
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index a78f6ffd05..561a64170e 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -384,7 +384,7 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("cpu").expect("got table");
-        assert_eq!(table.schema.columns.len(), 3); // one tag & one field, plus time
+        assert_eq!(table.schema.column_count(), 3); // one tag & one field, plus time
         let tag = table.schema.columns.get("host").expect("got tag");
         assert!(tag.is_tag());
         let field = table.schema.columns.get("usage").expect("got field");
@@ -491,7 +491,7 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("weather").expect("got table");
-        assert_eq!(table.schema.columns.len(), 5); // two tags, two fields, plus time
+        assert_eq!(table.schema.column_count(), 5); // two tags, two fields, plus time
         let tag1 = table.schema.columns.get("city").expect("got tag");
         assert!(tag1.is_tag());
         let tag2 = table.schema.columns.get("country").expect("got tag");
diff --git a/ingester/src/persist/worker.rs b/ingester/src/persist/worker.rs
index ee69bcce1b..9d02eba7fd 100644
--- a/ingester/src/persist/worker.rs
+++ b/ingester/src/persist/worker.rs
@@ -3,7 +3,7 @@ use std::{ops::ControlFlow, sync::Arc};
 use async_channel::RecvError;
 use backoff::Backoff;
 use data_types::{CompactionLevel, ParquetFileParams};
-use iox_catalog::interface::{get_table_schema_by_id, CasFailure, Catalog};
+use iox_catalog::interface::{get_table_columns_by_id, CasFailure, Catalog};
 use iox_query::exec::Executor;
 use iox_time::{SystemProvider, TimeProvider};
 use metric::DurationHistogram;
@@ -289,12 +289,11 @@ where
         "partition parquet uploaded"
     );
 
-    // Read the table schema from the catalog to act as a map of column name
-    // -> column IDs.
-    let table_schema = Backoff::new(&Default::default())
+    // Read the table's columns from the catalog to get a map of column name -> column IDs.
+    let columns = Backoff::new(&Default::default())
         .retry_all_errors("get table schema", || async {
             let mut repos = worker_state.catalog.repositories().await;
-            get_table_schema_by_id(ctx.table_id(), repos.as_mut()).await
+            get_table_columns_by_id(ctx.table_id(), repos.as_mut()).await
         })
         .await
         .expect("retry forever");
@@ -303,8 +302,7 @@ where
     // table in order to make the file visible to queriers.
     let parquet_table_data =
         iox_metadata.to_parquet_file(ctx.partition_id(), file_size, &md, |name| {
-            table_schema
-                .columns
+            columns
                 .get(name)
                 .unwrap_or_else(|| {
                     panic!(
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index a6ce6cfc1d..09d36676e5 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -2,9 +2,9 @@
 
 use async_trait::async_trait;
 use data_types::{
-    Column, ColumnSchema, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceSchema,
+    Column, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceId, NamespaceSchema,
     ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey,
-    SkippedCompaction, Table, TableId, TableInfo, TableSchema, Timestamp,
+    SkippedCompaction, Table, TableId, TableInfo, Timestamp,
 };
 use iox_time::TimeProvider;
 use snafu::{OptionExt, Snafu};
@@ -598,13 +598,7 @@ where
 
     for c in columns {
         let (_, t) = table_id_to_info.get_mut(&c.table_id).unwrap();
-        t.schema.columns.insert(
-            c.name,
-            ColumnSchema {
-                id: c.id,
-                column_type: c.column_type,
-            },
-        );
+        t.schema.add_column(&c);
     }
 
     for (_, (table_name, schema)) in table_id_to_info {
@@ -614,25 +608,14 @@ where
     Ok(namespace)
 }
 
-/// Gets the table schema including all columns.
-pub async fn get_table_schema_by_id<R>(id: TableId, repos: &mut R) -> Result<TableSchema>
+/// Gets all the table's columns.
+pub async fn get_table_columns_by_id<R>(id: TableId, repos: &mut R) -> Result<ColumnsByName>
 where
     R: RepoCollection + ?Sized,
 {
     let columns = repos.columns().list_by_table_id(id).await?;
-    let mut schema = TableSchema::new(id);
 
-    for c in columns {
-        schema.columns.insert(
-            c.name,
-            ColumnSchema {
-                id: c.id,
-                column_type: c.column_type,
-            },
-        );
-    }
-
-    Ok(schema)
+    Ok(ColumnsByName::new(&columns))
 }
 
 /// Fetch all [`NamespaceSchema`] in the catalog.
diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs
index 6564abede0..a5534150dc 100644
--- a/iox_tests/src/catalog.rs
+++ b/iox_tests/src/catalog.rs
@@ -5,14 +5,14 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use data_types::{
-    Column, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceSchema, ParquetFile,
-    ParquetFileParams, Partition, PartitionId, Table, TableId, TableSchema, Timestamp,
+    Column, ColumnSet, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceSchema,
+    ParquetFile, ParquetFileParams, Partition, PartitionId, Table, TableId, TableSchema, Timestamp,
 };
 use datafusion::physical_plan::metrics::Count;
 use datafusion_util::MemoryStream;
 use iox_catalog::{
     interface::{
-        get_schema_by_id, get_table_schema_by_id, Catalog, PartitionRepo, SoftDeletedRows,
+        get_schema_by_id, get_table_columns_by_id, Catalog, PartitionRepo, SoftDeletedRows,
     },
     mem::MemCatalog,
 };
@@ -342,25 +342,33 @@ impl TestTable {
         })
     }
 
-    /// Get catalog schema.
+    /// Get the TableSchema from the catalog.
     pub async fn catalog_schema(&self) -> TableSchema {
+        TableSchema {
+            id: self.table.id,
+            columns: self.catalog_columns().await,
+        }
+    }
+
+    /// Get columns from the catalog.
+    pub async fn catalog_columns(&self) -> ColumnsByName {
         let mut repos = self.catalog.catalog.repositories().await;
 
-        get_table_schema_by_id(self.table.id, repos.as_mut())
+        get_table_columns_by_id(self.table.id, repos.as_mut())
             .await
             .unwrap()
     }
 
     /// Get schema for this table.
     pub async fn schema(&self) -> Schema {
-        self.catalog_schema().await.try_into().unwrap()
+        self.catalog_columns().await.try_into().unwrap()
     }
 
     /// Read the record batches from the specified Parquet File associated with this table.
     pub async fn read_parquet_file(&self, file: ParquetFile) -> Vec<RecordBatch> {
         // get schema
-        let table_catalog_schema = self.catalog_schema().await;
-        let column_id_lookup = table_catalog_schema.column_id_map();
+        let table_catalog_columns = self.catalog_columns().await;
+        let column_id_lookup = table_catalog_columns.id_map();
         let table_schema = self.schema().await;
         let selection: Vec<_> = file
             .column_set
@@ -545,12 +553,11 @@ impl TestPartition {
             ..
         } = builder;
 
-        let table_catalog_schema = self.table.catalog_schema().await;
+        let table_catalog_columns = self.table.catalog_columns().await;
 
         let (row_count, column_set) = if let Some(record_batch) = record_batch {
             let column_set = ColumnSet::new(record_batch.schema().fields().iter().map(|f| {
-                table_catalog_schema
-                    .columns
+                table_catalog_columns
                     .get(f.name())
                     .unwrap_or_else(|| panic!("Column {} is not registered", f.name()))
                     .id
@@ -563,8 +570,7 @@ impl TestPartition {
 
             (record_batch.num_rows(), column_set)
         } else {
-            let column_set =
-                ColumnSet::new(table_catalog_schema.columns.values().map(|col| col.id));
+            let column_set = ColumnSet::new(table_catalog_columns.ids());
             (row_count.unwrap_or(0), column_set)
         };
 
@@ -829,15 +835,15 @@ impl TestParquetFile {
 
     /// Get Parquet file schema.
     pub async fn schema(&self) -> Schema {
-        let table_schema = self.table.catalog_schema().await;
-        let column_id_lookup = table_schema.column_id_map();
+        let table_columns = self.table.catalog_columns().await;
+        let column_id_lookup = table_columns.id_map();
         let selection: Vec<_> = self
             .parquet_file
             .column_set
             .iter()
             .map(|id| *column_id_lookup.get(id).unwrap())
             .collect();
-        let table_schema: Schema = table_schema.clone().try_into().unwrap();
+        let table_schema: Schema = table_columns.clone().try_into().unwrap();
         table_schema.select_by_names(&selection).unwrap()
     }
 }
diff --git a/querier/src/cache/namespace.rs b/querier/src/cache/namespace.rs
index 6000a77c42..c148f8be91 100644
--- a/querier/src/cache/namespace.rs
+++ b/querier/src/cache/namespace.rs
@@ -242,15 +242,16 @@ impl From<TableInfo> for CachedTable {
     fn from(table: TableInfo) -> Self {
         let mut column_id_map: HashMap<ColumnId, Arc<str>> = table
             .schema
-            .columns
+            .column_id_map()
             .iter()
-            .map(|(name, c)| (c.id, Arc::from(name.clone())))
+            .map(|(&c_id, &name)| (c_id, Arc::from(name)))
             .collect();
         column_id_map.shrink_to_fit();
 
         let id = table.id();
         let schema: Schema = table
             .schema
+            .columns
             .try_into()
             .expect("Catalog table schema broken");
 
diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs
index 71ce14eb09..b4d907f1cc 100644
--- a/querier/src/table/test_util.rs
+++ b/querier/src/table/test_util.rs
@@ -33,7 +33,7 @@ pub async fn querier_table(catalog: &Arc<TestCatalog>, table: &Arc<TestTable>) -
     .await
     .unwrap();
     let table_info = catalog_schema.tables.remove(&table.table.name).unwrap();
-    let schema = Schema::try_from(table_info.schema).unwrap();
+    let schema = Schema::try_from(table_info.schema.columns).unwrap();
 
     let namespace_name = Arc::from(table.namespace.namespace.name.as_str());
 
diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index 14ac255fdd..281bb6197a 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -59,7 +59,7 @@ impl NamespaceCache for Arc<MemoryNamespaceCache> {
                     new_columns: schema
                         .tables
                         .values()
-                        .map(|v| v.schema.columns.len())
+                        .map(|v| v.schema.column_count())
                         .sum::<usize>(),
                     new_tables: schema.tables.len(),
                     did_update: false,
@@ -100,15 +100,12 @@ fn merge_schema_additive(
     // to 0 as the schemas become fully populated, leaving the common path free
     // of overhead.
     for (old_table_name, old_table) in &old_ns.tables {
-        old_column_count += old_table.schema.columns.len();
+        old_column_count += old_table.schema.column_count();
         match new_ns.tables.get_mut(old_table_name) {
             Some(new_table) => {
-                for (column_name, column) in &old_table.schema.columns {
-                    if !new_table.schema.columns.contains_key(column_name) {
-                        new_table
-                            .schema
-                            .columns
-                            .insert(column_name.to_owned(), *column);
+                for (column_name, column) in old_table.schema.columns.iter() {
+                    if !new_table.schema.contains_column_name(column_name) {
+                        new_table.schema.add_column_schema(column_name, column);
                     }
                 }
             }
@@ -128,7 +125,7 @@ fn merge_schema_additive(
         new_columns: new_ns
             .tables
             .values()
-            .map(|v| v.schema.columns.len())
+            .map(|v| v.schema.column_count())
             .sum::<usize>()
             - old_column_count,
         did_update: true,
@@ -142,7 +139,8 @@ mod tests {
 
     use assert_matches::assert_matches;
     use data_types::{
-        Column, ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableInfo, TableSchema,
+        Column, ColumnId, ColumnSchema, ColumnType, ColumnsByName, NamespaceId, TableId, TableInfo,
+        TableSchema,
     };
     use proptest::{prelude::*, prop_compose, proptest};
 
@@ -426,7 +424,7 @@ mod tests {
                 (0, 10) // Set size range
             ),
         ) -> TableInfo {
-            let columns = columns.into_iter().map(|(k, v)| (k.to_string(), v)).collect();
+            let columns = ColumnsByName::from(columns);
             TableInfo {
                 schema: TableSchema { id: TableId::new(id), columns },
                 partition_template: None,
@@ -467,7 +465,8 @@ mod tests {
                 col_set
                     .schema
                     .columns
-                    .keys()
+                    .names()
+                    .into_iter()
                     .map(|col_name| (table_name.to_string(), col_name.to_string()))
             })
             .collect()
diff --git a/router/src/namespace_cache/metrics.rs b/router/src/namespace_cache/metrics.rs
index b906f68985..5cbb3ff18b 100644
--- a/router/src/namespace_cache/metrics.rs
+++ b/router/src/namespace_cache/metrics.rs
@@ -125,11 +125,9 @@ where
 
 #[cfg(test)]
 mod tests {
-    use std::collections::BTreeMap;
-
     use assert_matches::assert_matches;
     use data_types::{
-        ColumnId, ColumnSchema, ColumnType, NamespaceId, TableId, TableInfo, TableSchema,
+        Column, ColumnId, ColumnType, ColumnsByName, NamespaceId, TableId, TableInfo, TableSchema,
     };
     use metric::{Attributes, MetricObserver, Observation};
 
@@ -145,23 +143,20 @@ mod tests {
             .map(|(i, &n)| {
                 let columns = (0..n)
                     .enumerate()
-                    .map(|(i, _)| {
-                        (
-                            i.to_string(),
-                            ColumnSchema {
-                                id: ColumnId::new(i as _),
-                                column_type: ColumnType::Bool,
-                            },
-                        )
+                    .map(|(i, _)| Column {
+                        id: ColumnId::new(i as _),
+                        column_type: ColumnType::Bool,
+                        name: i.to_string(),
+                        table_id: TableId::new(i as _),
                     })
-                    .collect::<BTreeMap<String, ColumnSchema>>();
+                    .collect::<Vec<_>>();
 
                 (
                     i.to_string(),
                     TableInfo {
                         schema: TableSchema {
                             id: TableId::new(i as _),
-                            columns,
+                            columns: ColumnsByName::new(&columns),
                         },
                         partition_template: None,
                     },

From cc412163822c6c4edb2eb865aa6e19e3d8834404 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Wed, 3 May 2023 14:30:15 -0400
Subject: [PATCH 067/119] fix: Undo the addition of a TableInfo type; store
 partition_template on TableSchema

---
 .../src/components/namespaces_source/mock.rs  | 84 +++++++++----------
 .../partition_info_source/sub_sources.rs      |  6 +-
 compactor/src/test_utils.rs                   |  6 +-
 data_types/src/lib.rs                         | 64 +++++---------
 .../aggregate_tsm_schema/update_catalog.rs    | 68 +++++++--------
 iox_catalog/src/interface.rs                  | 22 ++---
 iox_catalog/src/lib.rs                        | 19 ++---
 iox_tests/src/catalog.rs                      |  1 +
 querier/src/cache/namespace.rs                | 10 +--
 querier/src/table/test_util.rs                |  2 +-
 router/src/dml_handlers/schema_validation.rs  |  5 +-
 router/src/namespace_cache/memory.rs          | 53 ++++--------
 router/src/namespace_cache/metrics.rs         | 10 +--
 service_grpc_schema/src/lib.rs                |  3 +-
 14 files changed, 147 insertions(+), 206 deletions(-)

diff --git a/compactor/src/components/namespaces_source/mock.rs b/compactor/src/components/namespaces_source/mock.rs
index 201e407e6f..93c1b07bae 100644
--- a/compactor/src/components/namespaces_source/mock.rs
+++ b/compactor/src/components/namespaces_source/mock.rs
@@ -49,9 +49,7 @@ impl NamespacesSource for MockNamespacesSource {
 mod tests {
     use std::collections::BTreeMap;
 
-    use data_types::{
-        Column, ColumnId, ColumnType, ColumnsByName, TableId, TableInfo, TableSchema,
-    };
+    use data_types::{Column, ColumnId, ColumnType, ColumnsByName, TableId, TableSchema};
 
     use super::*;
 
@@ -130,54 +128,50 @@ mod tests {
             let tables = BTreeMap::from([
                 (
                     "table1".to_string(),
-                    TableInfo {
-                        schema: TableSchema {
-                            id: TableId::new(1),
-                            columns: ColumnsByName::new(&[
-                                Column {
-                                    name: "col1".to_string(),
-                                    id: ColumnId::new(1),
-                                    column_type: ColumnType::I64,
-                                    table_id: TableId::new(1),
-                                },
-                                Column {
-                                    name: "col2".to_string(),
-                                    id: ColumnId::new(2),
-                                    column_type: ColumnType::String,
-                                    table_id: TableId::new(1),
-                                },
-                            ]),
-                        },
+                    TableSchema {
+                        id: TableId::new(1),
                         partition_template: None,
+                        columns: ColumnsByName::new(&[
+                            Column {
+                                name: "col1".to_string(),
+                                id: ColumnId::new(1),
+                                column_type: ColumnType::I64,
+                                table_id: TableId::new(1),
+                            },
+                            Column {
+                                name: "col2".to_string(),
+                                id: ColumnId::new(2),
+                                column_type: ColumnType::String,
+                                table_id: TableId::new(1),
+                            },
+                        ]),
                     },
                 ),
                 (
                     "table2".to_string(),
-                    TableInfo {
-                        schema: TableSchema {
-                            id: TableId::new(2),
-                            columns: ColumnsByName::new(&[
-                                Column {
-                                    name: "col1".to_string(),
-                                    id: ColumnId::new(3),
-                                    column_type: ColumnType::I64,
-                                    table_id: TableId::new(2),
-                                },
-                                Column {
-                                    name: "col2".to_string(),
-                                    id: ColumnId::new(4),
-                                    column_type: ColumnType::String,
-                                    table_id: TableId::new(2),
-                                },
-                                Column {
-                                    name: "col3".to_string(),
-                                    id: ColumnId::new(5),
-                                    column_type: ColumnType::F64,
-                                    table_id: TableId::new(2),
-                                },
-                            ]),
-                        },
+                    TableSchema {
+                        id: TableId::new(2),
                         partition_template: None,
+                        columns: ColumnsByName::new(&[
+                            Column {
+                                name: "col1".to_string(),
+                                id: ColumnId::new(3),
+                                column_type: ColumnType::I64,
+                                table_id: TableId::new(2),
+                            },
+                            Column {
+                                name: "col2".to_string(),
+                                id: ColumnId::new(4),
+                                column_type: ColumnType::String,
+                                table_id: TableId::new(2),
+                            },
+                            Column {
+                                name: "col3".to_string(),
+                                id: ColumnId::new(5),
+                                column_type: ColumnType::F64,
+                                table_id: TableId::new(2),
+                            },
+                        ]),
                     },
                 ),
             ]);
diff --git a/compactor/src/components/partition_info_source/sub_sources.rs b/compactor/src/components/partition_info_source/sub_sources.rs
index f2453a24f8..32d5da0d4a 100644
--- a/compactor/src/components/partition_info_source/sub_sources.rs
+++ b/compactor/src/components/partition_info_source/sub_sources.rs
@@ -91,17 +91,17 @@ where
             .await
             .ok_or_else::<DynError, _>(|| String::from("Cannot find namespace schema").into())?;
 
-        let table_info = namespace_schema
+        let table_schema = namespace_schema
             .tables
             .get(&table.name)
-            .ok_or_else::<DynError, _>(|| String::from("Cannot find table info").into())?;
+            .ok_or_else::<DynError, _>(|| String::from("Cannot find table schema").into())?;
 
         Ok(Arc::new(PartitionInfo {
             partition_id,
             namespace_id: table.namespace_id,
             namespace_name: namespace.name,
             table: Arc::new(table),
-            table_schema: Arc::new(table_info.schema.clone()),
+            table_schema: Arc::new(table_schema.clone()),
             sort_key: partition.sort_key(),
             partition_key: partition.partition_key,
         }))
diff --git a/compactor/src/test_utils.rs b/compactor/src/test_utils.rs
index 618734aed9..e741b703c1 100644
--- a/compactor/src/test_utils.rs
+++ b/compactor/src/test_utils.rs
@@ -27,10 +27,7 @@ impl PartitionInfoBuilder {
                     namespace_id,
                     name: String::from("table"),
                 }),
-                table_schema: Arc::new(TableSchema {
-                    id: table_id,
-                    columns: ColumnsByName::new(&[]),
-                }),
+                table_schema: Arc::new(TableSchema::new(table_id)),
                 sort_key: None,
                 partition_key: PartitionKey::from("key"),
             },
@@ -54,6 +51,7 @@ impl PartitionInfoBuilder {
 
         let table_schema = Arc::new(TableSchema {
             id: self.inner.table.id,
+            partition_template: None,
             columns: ColumnsByName::new(&columns),
         });
         self.inner.table_schema = table_schema;
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 578cb4429f..b7cde6f5d9 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -330,7 +330,7 @@ pub struct NamespaceSchema {
     /// the namespace id
     pub id: NamespaceId,
     /// the tables in the namespace by name
-    pub tables: BTreeMap<String, TableInfo>,
+    pub tables: BTreeMap<String, TableSchema>,
     /// the number of columns per table this namespace allows
     pub max_columns_per_table: usize,
     /// The maximum number of tables permitted in this namespace.
@@ -390,41 +390,6 @@ pub struct Table {
     pub name: String,
 }
 
-/// Useful table information to cache, including the table's partition template if any, and the
-/// table's columns.
-#[derive(Debug, Clone, Eq, PartialEq)]
-pub struct TableInfo {
-    /// This table's schema
-    pub schema: TableSchema,
-    /// This table's partition template
-    pub partition_template: Option<Arc<PartitionTemplate>>,
-}
-
-impl TableInfo {
-    /// This table's ID
-    pub fn id(&self) -> TableId {
-        self.schema.id
-    }
-
-    /// Estimated Size in bytes including `self`.
-    pub fn size(&self) -> usize {
-        size_of_val(self) + size_of_val(&self.partition_template) + self.schema.size()
-    }
-}
-
-impl From<&Table> for TableInfo {
-    fn from(table: &Table) -> Self {
-        let &Table { id, .. } = table;
-
-        Self {
-            schema: TableSchema::new(id),
-
-            // TODO: Store and retrieve PartitionTemplate from the database
-            partition_template: None,
-        }
-    }
-}
-
 /// Column definitions for a table indexed by their name
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
@@ -518,15 +483,30 @@ impl TryFrom<ColumnsByName> for Schema {
 pub struct TableSchema {
     /// the table id
     pub id: TableId,
+
+    /// the table's partition template
+    pub partition_template: Option<Arc<PartitionTemplate>>,
+
     /// the table's columns by their name
     pub columns: ColumnsByName,
 }
 
 impl TableSchema {
-    /// Initialize new `TableSchema`
+    /// Initialize new `TableSchema` with the given `TableId`.
     pub fn new(id: TableId) -> Self {
         Self {
             id,
+            partition_template: None,
+            columns: ColumnsByName::new(&[]),
+        }
+    }
+
+    /// Initialize new `TableSchema` from the information in the given `Table`.
+    pub fn new_empty_from(table: &Table) -> Self {
+        Self {
+            id: table.id,
+            // TODO: Store and retrieve PartitionTemplate from the database
+            partition_template: None,
             columns: ColumnsByName::new(&[]),
         }
     }
@@ -3028,10 +3008,12 @@ mod tests {
     fn test_table_schema_size() {
         let schema1 = TableSchema {
             id: TableId::new(1),
+            partition_template: None,
             columns: ColumnsByName::new(&[]),
         };
         let schema2 = TableSchema {
             id: TableId::new(2),
+            partition_template: None,
             columns: ColumnsByName::new(&[Column {
                 id: ColumnId::new(1),
                 table_id: TableId::new(2),
@@ -3056,11 +3038,9 @@ mod tests {
             id: NamespaceId::new(1),
             tables: BTreeMap::from([(
                 String::from("foo"),
-                TableInfo {
-                    schema: TableSchema {
-                        id: TableId::new(1),
-                        columns: ColumnsByName::new(&[]),
-                    },
+                TableSchema {
+                    id: TableId::new(1),
+                    columns: ColumnsByName::new(&[]),
                     partition_template: None,
                 },
             )]),
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index 561a64170e..077d97cfb9 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -2,7 +2,7 @@ use crate::{AggregateTSMMeasurement, AggregateTSMSchema};
 use chrono::{format::StrftimeItems, offset::FixedOffset, DateTime, Duration};
 use data_types::{
     ColumnType, Namespace, NamespaceName, NamespaceSchema, OrgBucketMappingError, Partition,
-    PartitionKey, TableInfo,
+    PartitionKey, TableSchema,
 };
 use iox_catalog::interface::{
     get_schema_by_name, CasFailure, Catalog, RepoCollection, SoftDeletedRows,
@@ -126,12 +126,12 @@ where
                     .tables()
                     .create_or_get(measurement_name, iox_schema.id)
                     .await
-                    .map(|t| TableInfo::from(&t))?;
+                    .map(|t| TableSchema::new_empty_from(&t))?;
                 let time_col = repos
                     .columns()
-                    .create_or_get("time", table.id(), ColumnType::Time)
+                    .create_or_get("time", table.id, ColumnType::Time)
                     .await?;
-                table.schema.add_column(&time_col);
+                table.add_column(&time_col);
                 table
             }
         };
@@ -140,7 +140,7 @@ where
         // fields and tags are both columns; tag is a special type of column.
         // check that the schema has all these columns or update accordingly.
         for tag in measurement.tags.values() {
-            match table.schema.columns.get(tag.name.as_str()) {
+            match table.columns.get(tag.name.as_str()) {
                 Some(c) if c.is_tag() => {
                     // nothing to do, all good
                 }
@@ -178,7 +178,7 @@ where
                         field.name, field_type, e,
                     ))
                 })?);
-            match table.schema.columns.get(field.name.as_str()) {
+            match table.columns.get(field.name.as_str()) {
                 Some(c) if c.matches_type(influx_column_type) => {
                     // nothing to do, all good
                 }
@@ -210,7 +210,7 @@ where
             // figure it's okay.
             repos
                 .columns()
-                .create_or_get_many_unchecked(table.id(), column_batch)
+                .create_or_get_many_unchecked(table.id, column_batch)
                 .await?;
         }
         // create a partition for every day in the date range.
@@ -223,7 +223,7 @@ where
             // gets matched as `None`` in the code below
             let partition = repos
                 .partitions()
-                .create_or_get(partition_key, table.id())
+                .create_or_get(partition_key, table.id)
                 .await
                 .map_err(UpdateCatalogError::CatalogError)?;
             // get the sort key from the partition, if it exists. create it or update it as
@@ -384,10 +384,10 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("cpu").expect("got table");
-        assert_eq!(table.schema.column_count(), 3); // one tag & one field, plus time
-        let tag = table.schema.columns.get("host").expect("got tag");
+        assert_eq!(table.column_count(), 3); // one tag & one field, plus time
+        let tag = table.columns.get("host").expect("got tag");
         assert!(tag.is_tag());
-        let field = table.schema.columns.get("usage").expect("got field");
+        let field = table.columns.get("usage").expect("got field");
         assert_eq!(
             field.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
@@ -395,7 +395,7 @@ mod tests {
         // check that the partitions were created and the sort keys are correct
         let partitions = repos
             .partitions()
-            .list_by_table_id(table.id())
+            .list_by_table_id(table.id)
             .await
             .expect("got partitions");
         // number of days in the date range of the schema
@@ -435,26 +435,26 @@ mod tests {
             .tables()
             .create_or_get("weather", namespace.id)
             .await
-            .map(|t| TableInfo::from(&t))
+            .map(|t| TableSchema::new_empty_from(&t))
             .expect("table created");
         let time_col = txn
             .columns()
-            .create_or_get("time", table.id(), ColumnType::Time)
+            .create_or_get("time", table.id, ColumnType::Time)
             .await
             .expect("column created");
-        table.schema.add_column(&time_col);
+        table.add_column(&time_col);
         let location_col = txn
             .columns()
-            .create_or_get("city", table.id(), ColumnType::Tag)
+            .create_or_get("city", table.id, ColumnType::Tag)
             .await
             .expect("column created");
         let temperature_col = txn
             .columns()
-            .create_or_get("temperature", table.id(), ColumnType::F64)
+            .create_or_get("temperature", table.id, ColumnType::F64)
             .await
             .expect("column created");
-        table.schema.add_column(&location_col);
-        table.schema.add_column(&temperature_col);
+        table.add_column(&location_col);
+        table.add_column(&temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some overlap
@@ -491,17 +491,17 @@ mod tests {
         .expect("got schema");
         assert_eq!(iox_schema.tables.len(), 1);
         let table = iox_schema.tables.get("weather").expect("got table");
-        assert_eq!(table.schema.column_count(), 5); // two tags, two fields, plus time
-        let tag1 = table.schema.columns.get("city").expect("got tag");
+        assert_eq!(table.column_count(), 5); // two tags, two fields, plus time
+        let tag1 = table.columns.get("city").expect("got tag");
         assert!(tag1.is_tag());
-        let tag2 = table.schema.columns.get("country").expect("got tag");
+        let tag2 = table.columns.get("country").expect("got tag");
         assert!(tag2.is_tag());
-        let field1 = table.schema.columns.get("temperature").expect("got field");
+        let field1 = table.columns.get("temperature").expect("got field");
         assert_eq!(
             field1.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
         );
-        let field2 = table.schema.columns.get("humidity").expect("got field");
+        let field2 = table.columns.get("humidity").expect("got field");
         assert_eq!(
             field2.column_type,
             InfluxColumnType::Field(InfluxFieldType::Float)
@@ -527,20 +527,20 @@ mod tests {
             .tables()
             .create_or_get("weather", namespace.id)
             .await
-            .map(|t| TableInfo::from(&t))
+            .map(|t| TableSchema::new_empty_from(&t))
             .expect("table created");
         let time_col = txn
             .columns()
-            .create_or_get("time", table.id(), ColumnType::Time)
+            .create_or_get("time", table.id, ColumnType::Time)
             .await
             .expect("column created");
-        table.schema.add_column(&time_col);
+        table.add_column(&time_col);
         let temperature_col = txn
             .columns()
-            .create_or_get("temperature", table.id(), ColumnType::F64)
+            .create_or_get("temperature", table.id, ColumnType::F64)
             .await
             .expect("column created");
-        table.schema.add_column(&temperature_col);
+        table.add_column(&temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some issue that will trip a catalog error
@@ -592,20 +592,20 @@ mod tests {
             .tables()
             .create_or_get("weather", namespace.id)
             .await
-            .map(|t| TableInfo::from(&t))
+            .map(|t| TableSchema::new_empty_from(&t))
             .expect("table created");
         let time_col = txn
             .columns()
-            .create_or_get("time", table.id(), ColumnType::Time)
+            .create_or_get("time", table.id, ColumnType::Time)
             .await
             .expect("column created");
-        table.schema.add_column(&time_col);
+        table.add_column(&time_col);
         let temperature_col = txn
             .columns()
-            .create_or_get("temperature", table.id(), ColumnType::F64)
+            .create_or_get("temperature", table.id, ColumnType::F64)
             .await
             .expect("column created");
-        table.schema.add_column(&temperature_col);
+        table.add_column(&temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some issue that will trip a catalog error
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 09d36676e5..114e650d17 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -4,7 +4,7 @@ use async_trait::async_trait;
 use data_types::{
     Column, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceId, NamespaceSchema,
     ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey,
-    SkippedCompaction, Table, TableId, TableInfo, Timestamp,
+    SkippedCompaction, Table, TableId, TableSchema, Timestamp,
 };
 use iox_time::TimeProvider;
 use snafu::{OptionExt, Snafu};
@@ -590,18 +590,18 @@ where
 
     let mut namespace = NamespaceSchema::new_empty_from(&namespace);
 
-    let mut table_id_to_info = BTreeMap::new();
+    let mut table_id_to_schema = BTreeMap::new();
     for t in tables {
-        let table_info = TableInfo::from(&t);
-        table_id_to_info.insert(t.id, (t.name, table_info));
+        let table_schema = TableSchema::new_empty_from(&t);
+        table_id_to_schema.insert(t.id, (t.name, table_schema));
     }
 
     for c in columns {
-        let (_, t) = table_id_to_info.get_mut(&c.table_id).unwrap();
-        t.schema.add_column(&c);
+        let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
+        t.add_column(&c);
     }
 
-    for (_, (table_name, schema)) in table_id_to_info {
+    for (_, (table_name, schema)) in table_id_to_schema {
         namespace.tables.insert(table_name, schema);
     }
 
@@ -684,23 +684,23 @@ pub async fn list_schemas(
     });
 
     // A set of tables within a single namespace.
-    type NamespaceTables = BTreeMap<String, TableInfo>;
+    type NamespaceTables = BTreeMap<String, TableSchema>;
 
     let mut joined = HashMap::<NamespaceId, NamespaceTables>::default();
     for column in columns {
         // Resolve the table this column references
         let table = tables.get(&column.table_id).expect("no table for column");
 
-        let table_info = joined
+        let table_schema = joined
             // Find or create a record in the joined <NamespaceId, Tables> map
             // for this namespace ID.
             .entry(table.namespace_id)
             .or_default()
             // Fetch the schema record for this table, or create an empty one.
             .entry(table.name.clone())
-            .or_insert_with(|| TableInfo::from(table));
+            .or_insert_with(|| TableSchema::new_empty_from(table));
 
-        table_info.schema.add_column(&column);
+        table_schema.add_column(&column);
     }
 
     // The table map is no longer needed - immediately reclaim the memory.
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 2e73267edf..8988466f14 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -14,7 +14,7 @@
 )]
 
 use crate::interface::{ColumnTypeMismatchSnafu, Error, RepoCollection, Result};
-use data_types::{ColumnType, NamespaceSchema, TableInfo};
+use data_types::{ColumnType, NamespaceSchema, TableSchema};
 use mutable_batch::MutableBatch;
 use std::{borrow::Cow, collections::HashMap};
 use thiserror::Error;
@@ -118,15 +118,15 @@ where
                 .tables()
                 .create_or_get(table_name, schema.id)
                 .await
-                .map(|t| TableInfo::from(&t))?;
+                .map(|t| TableSchema::new_empty_from(&t))?;
 
             // Always add a time column to all new tables.
             let time_col = repos
                 .columns()
-                .create_or_get(TIME_COLUMN, table.id(), ColumnType::Time)
+                .create_or_get(TIME_COLUMN, table.id, ColumnType::Time)
                 .await?;
 
-            table.schema.add_column(&time_col);
+            table.add_column(&time_col);
 
             assert!(schema
                 .to_mut()
@@ -152,7 +152,7 @@ where
         // If it does, validate it. If it does not exist, create it and insert
         // it into the cached schema.
 
-        match table.schema.columns.get(name.as_str()) {
+        match table.columns.get(name.as_str()) {
             Some(existing) if existing.matches_type(col.influx_type()) => {
                 // No action is needed as the column matches the existing column
                 // schema.
@@ -182,10 +182,10 @@ where
     if !column_batch.is_empty() {
         repos
             .columns()
-            .create_or_get_many_unchecked(table.id(), column_batch)
+            .create_or_get_many_unchecked(table.id, column_batch)
             .await?
             .into_iter()
-            .for_each(|c| table.to_mut().schema.add_column(&c));
+            .for_each(|c| table.to_mut().add_column(&c));
     }
 
     if let Cow::Owned(table) = table {
@@ -288,9 +288,8 @@ mod tests {
                     let actual_tables: BTreeMap<String, BTreeMap<String, ColumnType>> = schema
                         .tables
                         .iter()
-                        .map(|(table, table_info)| {
-                            let desired_cols = table_info
-                                .schema
+                        .map(|(table, table_schema)| {
+                            let desired_cols = table_schema
                                 .columns
                                 .iter()
                                 .map(|(column, column_schema)| (column.clone(), column_schema.column_type))
diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs
index a5534150dc..eaa6140484 100644
--- a/iox_tests/src/catalog.rs
+++ b/iox_tests/src/catalog.rs
@@ -346,6 +346,7 @@ impl TestTable {
     pub async fn catalog_schema(&self) -> TableSchema {
         TableSchema {
             id: self.table.id,
+            partition_template: None,
             columns: self.catalog_columns().await,
         }
     }
diff --git a/querier/src/cache/namespace.rs b/querier/src/cache/namespace.rs
index c148f8be91..521aa9419e 100644
--- a/querier/src/cache/namespace.rs
+++ b/querier/src/cache/namespace.rs
@@ -13,7 +13,7 @@ use cache_system::{
     loader::{metrics::MetricsLoader, FunctionLoader},
     resource_consumption::FunctionEstimator,
 };
-use data_types::{ColumnId, NamespaceId, NamespaceSchema, TableId, TableInfo};
+use data_types::{ColumnId, NamespaceId, NamespaceSchema, TableId, TableSchema};
 use iox_catalog::interface::{get_schema_by_name, Catalog, SoftDeletedRows};
 use iox_time::TimeProvider;
 use schema::Schema;
@@ -238,19 +238,17 @@ impl CachedTable {
     }
 }
 
-impl From<TableInfo> for CachedTable {
-    fn from(table: TableInfo) -> Self {
+impl From<TableSchema> for CachedTable {
+    fn from(table: TableSchema) -> Self {
         let mut column_id_map: HashMap<ColumnId, Arc<str>> = table
-            .schema
             .column_id_map()
             .iter()
             .map(|(&c_id, &name)| (c_id, Arc::from(name)))
             .collect();
         column_id_map.shrink_to_fit();
 
-        let id = table.id();
+        let id = table.id;
         let schema: Schema = table
-            .schema
             .columns
             .try_into()
             .expect("Catalog table schema broken");
diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs
index b4d907f1cc..8475ea37a9 100644
--- a/querier/src/table/test_util.rs
+++ b/querier/src/table/test_util.rs
@@ -33,7 +33,7 @@ pub async fn querier_table(catalog: &Arc<TestCatalog>, table: &Arc<TestTable>) -
     .await
     .unwrap();
     let table_info = catalog_schema.tables.remove(&table.table.name).unwrap();
-    let schema = Schema::try_from(table_info.schema.columns).unwrap();
+    let schema = Schema::try_from(table_info.columns).unwrap();
 
     let namespace_name = Arc::from(table.namespace.namespace.name.as_str());
 
diff --git a/router/src/dml_handlers/schema_validation.rs b/router/src/dml_handlers/schema_validation.rs
index 22a51824b8..b36cd8d163 100644
--- a/router/src/dml_handlers/schema_validation.rs
+++ b/router/src/dml_handlers/schema_validation.rs
@@ -304,7 +304,7 @@ where
             .into_iter()
             .map(|(name, data)| {
                 let table = latest_schema.tables.get(&name).unwrap();
-                let id = table.id();
+                let id = table.id;
                 let table_partition_template = table.partition_template.clone();
 
                 (id, (name, table_partition_template, data))
@@ -372,7 +372,7 @@ fn validate_schema_limits(
     for (table_name, batch) in batches {
         // Get the column set for this table from the schema.
         let mut existing_columns = match schema.tables.get(table_name) {
-            Some(v) => v.schema.column_names(),
+            Some(v) => v.column_names(),
             None if batch.columns().len() > schema.max_columns_per_table => {
                 // The table does not exist, therefore all the columns in this
                 // write must be created - there's no need to perform a set
@@ -774,7 +774,6 @@ mod tests {
         let table = ns.tables.get(table).expect("table should exist in cache");
         assert_eq!(
             table
-                .schema
                 .columns
                 .get(col)
                 .expect("column not cached")
diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index 281bb6197a..46e1912e8a 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -59,7 +59,7 @@ impl NamespaceCache for Arc<MemoryNamespaceCache> {
                     new_columns: schema
                         .tables
                         .values()
-                        .map(|v| v.schema.column_count())
+                        .map(|v| v.column_count())
                         .sum::<usize>(),
                     new_tables: schema.tables.len(),
                     did_update: false,
@@ -100,12 +100,12 @@ fn merge_schema_additive(
     // to 0 as the schemas become fully populated, leaving the common path free
     // of overhead.
     for (old_table_name, old_table) in &old_ns.tables {
-        old_column_count += old_table.schema.column_count();
+        old_column_count += old_table.column_count();
         match new_ns.tables.get_mut(old_table_name) {
             Some(new_table) => {
-                for (column_name, column) in old_table.schema.columns.iter() {
-                    if !new_table.schema.contains_column_name(column_name) {
-                        new_table.schema.add_column_schema(column_name, column);
+                for (column_name, column) in old_table.columns.iter() {
+                    if !new_table.contains_column_name(column_name) {
+                        new_table.add_column_schema(column_name, column);
                     }
                 }
             }
@@ -125,7 +125,7 @@ fn merge_schema_additive(
         new_columns: new_ns
             .tables
             .values()
-            .map(|v| v.schema.column_count())
+            .map(|v| v.column_count())
             .sum::<usize>()
             - old_column_count,
         did_update: true,
@@ -139,7 +139,7 @@ mod tests {
 
     use assert_matches::assert_matches;
     use data_types::{
-        Column, ColumnId, ColumnSchema, ColumnType, ColumnsByName, NamespaceId, TableId, TableInfo,
+        Column, ColumnId, ColumnSchema, ColumnType, ColumnsByName, NamespaceId, TableId,
         TableSchema,
     };
     use proptest::{prelude::*, prop_compose, proptest};
@@ -225,25 +225,16 @@ mod tests {
         // These MUST always be different
         assert_ne!(first_write_table_schema, second_write_table_schema);
 
-        let first_write_table_info = TableInfo {
-            schema: first_write_table_schema,
-            partition_template: None,
-        };
-        let second_write_table_info = TableInfo {
-            schema: second_write_table_schema,
-            partition_template: None,
-        };
-
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
-            tables: BTreeMap::from([(String::from(table_name), first_write_table_info)]),
+            tables: BTreeMap::from([(String::from(table_name), first_write_table_schema)]),
             max_columns_per_table: 50,
             max_tables: 24,
             retention_period_ns: None,
             partition_template: None,
         };
         let schema_update_2 = NamespaceSchema {
-            tables: BTreeMap::from([(String::from(table_name), second_write_table_info)]),
+            tables: BTreeMap::from([(String::from(table_name), second_write_table_schema)]),
             ..schema_update_1.clone()
         };
 
@@ -251,10 +242,6 @@ mod tests {
             let mut want_table_schema = TableSchema::new(table_id);
             want_table_schema.add_column(&column_1);
             want_table_schema.add_column(&column_2);
-            let want_table_schema = TableInfo {
-                schema: want_table_schema,
-                partition_template: None,
-            };
             NamespaceSchema {
                 tables: BTreeMap::from([(String::from(table_name), want_table_schema)]),
                 ..schema_update_1.clone()
@@ -311,10 +298,6 @@ mod tests {
             name: "column_a".to_string(),
             column_type: ColumnType::String,
         });
-        let table_1 = TableInfo {
-            schema: table_1,
-            partition_template: None,
-        };
         let mut table_2 = TableSchema::new(TableId::new(2));
         table_2.add_column(&Column {
             id: ColumnId::new(2),
@@ -322,10 +305,6 @@ mod tests {
             name: "column_b".to_string(),
             column_type: ColumnType::String,
         });
-        let table_2 = TableInfo {
-            schema: table_2,
-            partition_template: None,
-        };
         let mut table_3 = TableSchema::new(TableId::new(3));
         table_3.add_column(&Column {
             id: ColumnId::new(3),
@@ -333,10 +312,6 @@ mod tests {
             name: "column_c".to_string(),
             column_type: ColumnType::String,
         });
-        let table_3 = TableInfo {
-            schema: table_3,
-            partition_template: None,
-        };
 
         let schema_update_1 = NamespaceSchema {
             id: NamespaceId::new(42),
@@ -415,7 +390,7 @@ mod tests {
     }
 
     prop_compose! {
-        /// Generate an arbitrary TableInfo with up to 10 columns.
+        /// Generate an arbitrary TableSchema with up to 10 columns.
         fn arbitrary_table_schema()(
             id in any::<i64>(),
             columns in proptest::collection::btree_map(
@@ -423,11 +398,12 @@ mod tests {
                 arbitrary_column_schema(),
                 (0, 10) // Set size range
             ),
-        ) -> TableInfo {
+        ) -> TableSchema {
             let columns = ColumnsByName::from(columns);
-            TableInfo {
-                schema: TableSchema { id: TableId::new(id), columns },
+            TableSchema {
+                id: TableId::new(id),
                 partition_template: None,
+                columns,
             }
         }
     }
@@ -463,7 +439,6 @@ mod tests {
             .flat_map(|(table_name, col_set)| {
                 // Build a set of tuples in the form (table_name, column_name)
                 col_set
-                    .schema
                     .columns
                     .names()
                     .into_iter()
diff --git a/router/src/namespace_cache/metrics.rs b/router/src/namespace_cache/metrics.rs
index 5cbb3ff18b..3264a4bae4 100644
--- a/router/src/namespace_cache/metrics.rs
+++ b/router/src/namespace_cache/metrics.rs
@@ -127,7 +127,7 @@ where
 mod tests {
     use assert_matches::assert_matches;
     use data_types::{
-        Column, ColumnId, ColumnType, ColumnsByName, NamespaceId, TableId, TableInfo, TableSchema,
+        Column, ColumnId, ColumnType, ColumnsByName, NamespaceId, TableId, TableSchema,
     };
     use metric::{Attributes, MetricObserver, Observation};
 
@@ -153,12 +153,10 @@ mod tests {
 
                 (
                     i.to_string(),
-                    TableInfo {
-                        schema: TableSchema {
-                            id: TableId::new(i as _),
-                            columns: ColumnsByName::new(&columns),
-                        },
+                    TableSchema {
+                        id: TableId::new(i as _),
                         partition_template: None,
+                        columns: ColumnsByName::new(&columns),
                     },
                 )
             })
diff --git a/service_grpc_schema/src/lib.rs b/service_grpc_schema/src/lib.rs
index bcd0284aa7..f359e57224 100644
--- a/service_grpc_schema/src/lib.rs
+++ b/service_grpc_schema/src/lib.rs
@@ -55,9 +55,8 @@ fn schema_to_proto(schema: Arc<data_types::NamespaceSchema>) -> GetSchemaRespons
                     (
                         name.clone(),
                         TableSchema {
-                            id: t.id().get(),
+                            id: t.id.get(),
                             columns: t
-                                .schema
                                 .columns
                                 .iter()
                                 .map(|(name, c)| {

From bbf6a5087c7bb282cf6af035bf5ee55ab3aa2f49 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 10:35:11 -0400
Subject: [PATCH 068/119] fix: Return an iterator rather than allocating a Vec

---
 data_types/src/lib.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index b7cde6f5d9..8dd7af1a5b 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -435,9 +435,9 @@ impl ColumnsByName {
         self.0.keys().map(|name| name.as_str()).collect()
     }
 
-    /// Return the set of column IDs.
-    pub fn ids(&self) -> Vec<ColumnId> {
-        self.0.values().map(|c| c.id).collect()
+    /// Return an iterator of the set of column IDs.
+    pub fn ids(&self) -> impl Iterator<Item = ColumnId> + '_ {
+        self.0.values().map(|c| c.id)
     }
 
     /// Get a column by its name.

From 1f42fd8ebfbf6534dec89b972edf90f0bda23ad0 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 11:03:21 -0400
Subject: [PATCH 069/119] fix: Move column types to their own module and
 encapsulate more details

---
 data_types/src/columns.rs | 388 ++++++++++++++++++++++++++++++++++++++
 data_types/src/lib.rs     | 370 +-----------------------------------
 2 files changed, 395 insertions(+), 363 deletions(-)
 create mode 100644 data_types/src/columns.rs

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
new file mode 100644
index 0000000000..b282ccb702
--- /dev/null
+++ b/data_types/src/columns.rs
@@ -0,0 +1,388 @@
+//! Types having to do with columns.
+
+use super::{ChunkId, TableId};
+use influxdb_line_protocol::FieldValue;
+use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema};
+use sqlx::postgres::PgHasArrayType;
+use std::{
+    collections::{BTreeMap, BTreeSet, HashMap},
+    convert::TryFrom,
+    ops::Deref,
+};
+
+/// Unique ID for a `Column`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct ColumnId(i64);
+
+#[allow(missing_docs)]
+impl ColumnId {
+    pub fn new(v: i64) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i64 {
+        self.0
+    }
+}
+
+impl PgHasArrayType for ColumnId {
+    fn array_type_info() -> sqlx::postgres::PgTypeInfo {
+        <i64 as PgHasArrayType>::array_type_info()
+    }
+}
+
+/// Column definitions for a table indexed by their name
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
+
+impl From<BTreeMap<&str, ColumnSchema>> for ColumnsByName {
+    fn from(value: BTreeMap<&str, ColumnSchema>) -> Self {
+        Self(
+            value
+                .into_iter()
+                .map(|(name, column)| (name.to_owned(), column))
+                .collect(),
+        )
+    }
+}
+
+impl ColumnsByName {
+    /// Create a new instance holding the given [`Column`]s.
+    pub fn new(columns: &[Column]) -> Self {
+        Self(
+            columns
+                .iter()
+                .map(|c| {
+                    (
+                        c.name.to_owned(),
+                        ColumnSchema {
+                            id: c.id,
+                            column_type: c.column_type,
+                        },
+                    )
+                })
+                .collect(),
+        )
+    }
+
+    /// Add the given column name and schema to this set of columns.
+    ///
+    /// # Panics
+    ///
+    /// This method panics if a column of the same name already exists in `self`.
+    pub fn add_column(
+        &mut self,
+        column_name: impl Into<String>,
+        column_schema: impl Into<ColumnSchema>,
+    ) {
+        let old = self.0.insert(column_name.into(), column_schema.into());
+        assert!(old.is_none());
+    }
+
+    /// Iterate over the names and columns.
+    pub fn iter(&self) -> impl Iterator<Item = (&String, &ColumnSchema)> {
+        self.0.iter()
+    }
+
+    /// Whether a column with this name is in the set.
+    pub fn contains_column_name(&self, name: &str) -> bool {
+        self.0.contains_key(name)
+    }
+
+    /// Return number of columns in the set.
+    pub fn column_count(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Return the set of column names. Used in combination with a write operation's
+    /// column names to determine whether a write would exceed the max allowed columns.
+    pub fn names(&self) -> BTreeSet<&str> {
+        self.0.keys().map(|name| name.as_str()).collect()
+    }
+
+    /// Return an iterator of the set of column IDs.
+    pub fn ids(&self) -> impl Iterator<Item = ColumnId> + '_ {
+        self.0.values().map(|c| c.id)
+    }
+
+    /// Get a column by its name.
+    pub fn get(&self, name: &str) -> Option<&ColumnSchema> {
+        self.0.get(name)
+    }
+
+    /// Create `ID->name` map for columns.
+    pub fn id_map(&self) -> HashMap<ColumnId, &str> {
+        self.0
+            .iter()
+            .map(|(name, c)| (c.id, name.as_str()))
+            .collect()
+    }
+}
+
+// ColumnsByName is a newtype so that we can implement this `TryFrom` in this crate
+impl TryFrom<&ColumnsByName> for Schema {
+    type Error = schema::builder::Error;
+
+    fn try_from(value: &ColumnsByName) -> Result<Self, Self::Error> {
+        let mut builder = SchemaBuilder::new();
+
+        for (column_name, column_schema) in value.iter() {
+            let t = InfluxColumnType::from(column_schema.column_type);
+            builder.influx_column(column_name, t);
+        }
+
+        builder.build()
+    }
+}
+
+impl TryFrom<ColumnsByName> for Schema {
+    type Error = schema::builder::Error;
+
+    fn try_from(value: ColumnsByName) -> Result<Self, Self::Error> {
+        Self::try_from(&value)
+    }
+}
+
+/// Data object for a column
+#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
+pub struct Column {
+    /// the column id
+    pub id: ColumnId,
+    /// the table id the column is in
+    pub table_id: TableId,
+    /// the name of the column, which is unique in the table
+    pub name: String,
+    /// the logical type of the column
+    pub column_type: ColumnType,
+}
+
+impl Column {
+    /// returns true if the column type is a tag
+    pub fn is_tag(&self) -> bool {
+        self.column_type == ColumnType::Tag
+    }
+
+    /// returns true if the column type matches the line protocol field value type
+    pub fn matches_field_type(&self, field_value: &FieldValue) -> bool {
+        match field_value {
+            FieldValue::I64(_) => self.column_type == ColumnType::I64,
+            FieldValue::U64(_) => self.column_type == ColumnType::U64,
+            FieldValue::F64(_) => self.column_type == ColumnType::F64,
+            FieldValue::String(_) => self.column_type == ColumnType::String,
+            FieldValue::Boolean(_) => self.column_type == ColumnType::Bool,
+        }
+    }
+}
+
+/// The column id and its type for a column
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct ColumnSchema {
+    /// the column id
+    pub id: ColumnId,
+    /// the column type
+    pub column_type: ColumnType,
+}
+
+impl ColumnSchema {
+    /// returns true if the column is a tag
+    pub fn is_tag(&self) -> bool {
+        self.column_type == ColumnType::Tag
+    }
+
+    /// returns true if the column matches the line protocol field value type
+    pub fn matches_field_type(&self, field_value: &FieldValue) -> bool {
+        matches!(
+            (field_value, self.column_type),
+            (FieldValue::I64(_), ColumnType::I64)
+                | (FieldValue::U64(_), ColumnType::U64)
+                | (FieldValue::F64(_), ColumnType::F64)
+                | (FieldValue::String(_), ColumnType::String)
+                | (FieldValue::Boolean(_), ColumnType::Bool)
+        )
+    }
+
+    /// Returns true if `mb_column` is of the same type as `self`.
+    pub fn matches_type(&self, mb_column_influx_type: InfluxColumnType) -> bool {
+        self.column_type == mb_column_influx_type
+    }
+}
+
+impl From<&Column> for ColumnSchema {
+    fn from(c: &Column) -> Self {
+        let Column {
+            id, column_type, ..
+        } = c;
+
+        Self {
+            id: *id,
+            column_type: *column_type,
+        }
+    }
+}
+
+/// The column data type
+#[allow(missing_docs)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[repr(i16)]
+pub enum ColumnType {
+    I64 = 1,
+    U64 = 2,
+    F64 = 3,
+    Bool = 4,
+    String = 5,
+    Time = 6,
+    Tag = 7,
+}
+
+impl ColumnType {
+    /// the short string description of the type
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::I64 => "i64",
+            Self::U64 => "u64",
+            Self::F64 => "f64",
+            Self::Bool => "bool",
+            Self::String => "string",
+            Self::Time => "time",
+            Self::Tag => "tag",
+        }
+    }
+}
+
+impl std::fmt::Display for ColumnType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = self.as_str();
+
+        write!(f, "{s}")
+    }
+}
+
+impl TryFrom<i16> for ColumnType {
+    type Error = Box<dyn std::error::Error>;
+
+    fn try_from(value: i16) -> Result<Self, Self::Error> {
+        match value {
+            x if x == Self::I64 as i16 => Ok(Self::I64),
+            x if x == Self::U64 as i16 => Ok(Self::U64),
+            x if x == Self::F64 as i16 => Ok(Self::F64),
+            x if x == Self::Bool as i16 => Ok(Self::Bool),
+            x if x == Self::String as i16 => Ok(Self::String),
+            x if x == Self::Time as i16 => Ok(Self::Time),
+            x if x == Self::Tag as i16 => Ok(Self::Tag),
+            _ => Err("invalid column value".into()),
+        }
+    }
+}
+
+impl From<InfluxColumnType> for ColumnType {
+    fn from(value: InfluxColumnType) -> Self {
+        match value {
+            InfluxColumnType::Tag => Self::Tag,
+            InfluxColumnType::Field(InfluxFieldType::Float) => Self::F64,
+            InfluxColumnType::Field(InfluxFieldType::Integer) => Self::I64,
+            InfluxColumnType::Field(InfluxFieldType::UInteger) => Self::U64,
+            InfluxColumnType::Field(InfluxFieldType::String) => Self::String,
+            InfluxColumnType::Field(InfluxFieldType::Boolean) => Self::Bool,
+            InfluxColumnType::Timestamp => Self::Time,
+        }
+    }
+}
+
+impl From<ColumnType> for InfluxColumnType {
+    fn from(value: ColumnType) -> Self {
+        match value {
+            ColumnType::I64 => Self::Field(InfluxFieldType::Integer),
+            ColumnType::U64 => Self::Field(InfluxFieldType::UInteger),
+            ColumnType::F64 => Self::Field(InfluxFieldType::Float),
+            ColumnType::Bool => Self::Field(InfluxFieldType::Boolean),
+            ColumnType::String => Self::Field(InfluxFieldType::String),
+            ColumnType::Time => Self::Timestamp,
+            ColumnType::Tag => Self::Tag,
+        }
+    }
+}
+
+impl PartialEq<InfluxColumnType> for ColumnType {
+    fn eq(&self, got: &InfluxColumnType) -> bool {
+        match self {
+            Self::I64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::Integer)),
+            Self::U64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::UInteger)),
+            Self::F64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::Float)),
+            Self::Bool => matches!(got, InfluxColumnType::Field(InfluxFieldType::Boolean)),
+            Self::String => matches!(got, InfluxColumnType::Field(InfluxFieldType::String)),
+            Self::Time => matches!(got, InfluxColumnType::Timestamp),
+            Self::Tag => matches!(got, InfluxColumnType::Tag),
+        }
+    }
+}
+
+/// Returns the `ColumnType` for the passed in line protocol `FieldValue` type
+pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType {
+    match field_value {
+        FieldValue::I64(_) => ColumnType::I64,
+        FieldValue::U64(_) => ColumnType::U64,
+        FieldValue::F64(_) => ColumnType::F64,
+        FieldValue::String(_) => ColumnType::String,
+        FieldValue::Boolean(_) => ColumnType::Bool,
+    }
+}
+
+/// Set of columns.
+#[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct ColumnSet(Vec<ColumnId>);
+
+impl ColumnSet {
+    /// Create new column set.
+    ///
+    /// The order of the passed columns will NOT be preserved.
+    ///
+    /// # Panic
+    /// Panics when the set of passed columns contains duplicates.
+    pub fn new<I>(columns: I) -> Self
+    where
+        I: IntoIterator<Item = ColumnId>,
+    {
+        let mut columns: Vec<ColumnId> = columns.into_iter().collect();
+        columns.sort();
+
+        let len_pre_dedup = columns.len();
+        columns.dedup();
+        let len_post_dedup = columns.len();
+        assert_eq!(len_pre_dedup, len_post_dedup, "set contains duplicates");
+
+        columns.shrink_to_fit();
+
+        Self(columns)
+    }
+
+    /// Estimate the memory consumption of this object and its contents
+    pub fn size(&self) -> usize {
+        std::mem::size_of_val(self) + (std::mem::size_of::<ChunkId>() * self.0.capacity())
+    }
+}
+
+impl From<ColumnSet> for Vec<ColumnId> {
+    fn from(set: ColumnSet) -> Self {
+        set.0
+    }
+}
+
+impl Deref for ColumnSet {
+    type Target = [ColumnId];
+
+    fn deref(&self) -> &Self::Target {
+        self.0.deref()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[should_panic = "set contains duplicates"]
+    fn test_column_set_duplicates() {
+        ColumnSet::new([ColumnId::new(1), ColumnId::new(2), ColumnId::new(1)]);
+    }
+}
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 8dd7af1a5b..4e2a39e604 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -15,16 +15,13 @@
 
 pub mod sequence_number_set;
 
+mod columns;
+pub use columns::*;
 mod namespace_name;
 pub use namespace_name::*;
 
-use influxdb_line_protocol::FieldValue;
 use observability_deps::tracing::warn;
-use schema::{
-    builder::SchemaBuilder, sort::SortKey, InfluxColumnType, InfluxFieldType, Schema,
-    TIME_COLUMN_NAME,
-};
-use sqlx::postgres::PgHasArrayType;
+use schema::{sort::SortKey, TIME_COLUMN_NAME};
 use std::{
     borrow::Borrow,
     collections::{BTreeMap, BTreeSet, HashMap},
@@ -150,27 +147,6 @@ impl std::fmt::Display for TableId {
     }
 }
 
-/// Unique ID for a `Column`
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
-#[sqlx(transparent)]
-pub struct ColumnId(i64);
-
-#[allow(missing_docs)]
-impl ColumnId {
-    pub fn new(v: i64) -> Self {
-        Self(v)
-    }
-    pub fn get(&self) -> i64 {
-        self.0
-    }
-}
-
-impl PgHasArrayType for ColumnId {
-    fn array_type_info() -> sqlx::postgres::PgTypeInfo {
-        <i64 as PgHasArrayType>::array_type_info()
-    }
-}
-
 /// Unique ID for a `Partition`
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type, sqlx::FromRow)]
 #[sqlx(transparent)]
@@ -390,94 +366,6 @@ pub struct Table {
     pub name: String,
 }
 
-/// Column definitions for a table indexed by their name
-#[derive(Debug, Clone, Eq, PartialEq)]
-pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
-
-impl From<BTreeMap<&str, ColumnSchema>> for ColumnsByName {
-    fn from(value: BTreeMap<&str, ColumnSchema>) -> Self {
-        Self(
-            value
-                .into_iter()
-                .map(|(name, column)| (name.to_owned(), column))
-                .collect(),
-        )
-    }
-}
-
-impl ColumnsByName {
-    /// Create a new instance holding the given [`Column`]s.
-    pub fn new(columns: &[Column]) -> Self {
-        Self(
-            columns
-                .iter()
-                .map(|c| {
-                    (
-                        c.name.to_owned(),
-                        ColumnSchema {
-                            id: c.id,
-                            column_type: c.column_type,
-                        },
-                    )
-                })
-                .collect(),
-        )
-    }
-
-    /// Iterate over the names and columns.
-    pub fn iter(&self) -> impl Iterator<Item = (&String, &ColumnSchema)> {
-        self.0.iter()
-    }
-
-    /// Return the set of column names. Used in combination with a write operation's
-    /// column names to determine whether a write would exceed the max allowed columns.
-    pub fn names(&self) -> BTreeSet<&str> {
-        self.0.keys().map(|name| name.as_str()).collect()
-    }
-
-    /// Return an iterator of the set of column IDs.
-    pub fn ids(&self) -> impl Iterator<Item = ColumnId> + '_ {
-        self.0.values().map(|c| c.id)
-    }
-
-    /// Get a column by its name.
-    pub fn get(&self, name: &str) -> Option<&ColumnSchema> {
-        self.0.get(name)
-    }
-
-    /// Create `ID->name` map for columns.
-    pub fn id_map(&self) -> HashMap<ColumnId, &str> {
-        self.0
-            .iter()
-            .map(|(name, c)| (c.id, name.as_str()))
-            .collect()
-    }
-}
-
-// ColumnsByName is a newtype so that we can implement this `TryFrom` in this crate
-impl TryFrom<&ColumnsByName> for Schema {
-    type Error = schema::builder::Error;
-
-    fn try_from(value: &ColumnsByName) -> Result<Self, Self::Error> {
-        let mut builder = SchemaBuilder::new();
-
-        for (column_name, column_schema) in value.iter() {
-            let t = InfluxColumnType::from(column_schema.column_type);
-            builder.influx_column(column_name, t);
-        }
-
-        builder.build()
-    }
-}
-
-impl TryFrom<ColumnsByName> for Schema {
-    type Error = schema::builder::Error;
-
-    fn try_from(value: ColumnsByName) -> Result<Self, Self::Error> {
-        Self::try_from(&value)
-    }
-}
-
 /// Column definitions for a table
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct TableSchema {
@@ -518,11 +406,7 @@ impl TableSchema {
     /// This method panics if a column of the same name already exists in
     /// `self`.
     pub fn add_column(&mut self, col: &Column) {
-        let old = self
-            .columns
-            .0
-            .insert(col.name.clone(), ColumnSchema::from(col));
-        assert!(old.is_none());
+        self.columns.add_column(&col.name, col);
     }
 
     /// Add the name and column schema to this table's schema.
@@ -532,8 +416,7 @@ impl TableSchema {
     /// This method panics if a column of the same name already exists in
     /// `self`.
     pub fn add_column_schema(&mut self, name: &str, column_schema: &ColumnSchema) {
-        let old = self.columns.0.insert(name.into(), column_schema.to_owned());
-        assert!(old.is_none());
+        self.columns.add_column(name, column_schema.to_owned());
     }
 
     /// Estimated Size in bytes including `self`.
@@ -553,7 +436,7 @@ impl TableSchema {
 
     /// Whether a column with this name is in the schema.
     pub fn contains_column_name(&self, name: &str) -> bool {
-        self.columns.0.contains_key(name)
+        self.columns.contains_column_name(name)
     }
 
     /// Return the set of column names for this table. Used in combination with a write operation's
@@ -564,191 +447,7 @@ impl TableSchema {
 
     /// Return number of columns of the table
     pub fn column_count(&self) -> usize {
-        self.columns.0.len()
-    }
-}
-
-/// Data object for a column
-#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
-pub struct Column {
-    /// the column id
-    pub id: ColumnId,
-    /// the table id the column is in
-    pub table_id: TableId,
-    /// the name of the column, which is unique in the table
-    pub name: String,
-    /// the logical type of the column
-    pub column_type: ColumnType,
-}
-
-impl Column {
-    /// returns true if the column type is a tag
-    pub fn is_tag(&self) -> bool {
-        self.column_type == ColumnType::Tag
-    }
-
-    /// returns true if the column type matches the line protocol field value type
-    pub fn matches_field_type(&self, field_value: &FieldValue) -> bool {
-        match field_value {
-            FieldValue::I64(_) => self.column_type == ColumnType::I64,
-            FieldValue::U64(_) => self.column_type == ColumnType::U64,
-            FieldValue::F64(_) => self.column_type == ColumnType::F64,
-            FieldValue::String(_) => self.column_type == ColumnType::String,
-            FieldValue::Boolean(_) => self.column_type == ColumnType::Bool,
-        }
-    }
-}
-
-/// The column id and its type for a column
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub struct ColumnSchema {
-    /// the column id
-    pub id: ColumnId,
-    /// the column type
-    pub column_type: ColumnType,
-}
-
-impl ColumnSchema {
-    /// returns true if the column is a tag
-    pub fn is_tag(&self) -> bool {
-        self.column_type == ColumnType::Tag
-    }
-
-    /// returns true if the column matches the line protocol field value type
-    pub fn matches_field_type(&self, field_value: &FieldValue) -> bool {
-        matches!(
-            (field_value, self.column_type),
-            (FieldValue::I64(_), ColumnType::I64)
-                | (FieldValue::U64(_), ColumnType::U64)
-                | (FieldValue::F64(_), ColumnType::F64)
-                | (FieldValue::String(_), ColumnType::String)
-                | (FieldValue::Boolean(_), ColumnType::Bool)
-        )
-    }
-
-    /// Returns true if `mb_column` is of the same type as `self`.
-    pub fn matches_type(&self, mb_column_influx_type: InfluxColumnType) -> bool {
-        self.column_type == mb_column_influx_type
-    }
-}
-
-impl From<&Column> for ColumnSchema {
-    fn from(c: &Column) -> Self {
-        let Column {
-            id, column_type, ..
-        } = c;
-
-        Self {
-            id: *id,
-            column_type: *column_type,
-        }
-    }
-}
-
-/// The column data type
-#[allow(missing_docs)]
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, sqlx::Type)]
-#[repr(i16)]
-pub enum ColumnType {
-    I64 = 1,
-    U64 = 2,
-    F64 = 3,
-    Bool = 4,
-    String = 5,
-    Time = 6,
-    Tag = 7,
-}
-
-impl ColumnType {
-    /// the short string description of the type
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Self::I64 => "i64",
-            Self::U64 => "u64",
-            Self::F64 => "f64",
-            Self::Bool => "bool",
-            Self::String => "string",
-            Self::Time => "time",
-            Self::Tag => "tag",
-        }
-    }
-}
-
-impl std::fmt::Display for ColumnType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let s = self.as_str();
-
-        write!(f, "{s}")
-    }
-}
-
-impl TryFrom<i16> for ColumnType {
-    type Error = Box<dyn std::error::Error>;
-
-    fn try_from(value: i16) -> Result<Self, Self::Error> {
-        match value {
-            x if x == Self::I64 as i16 => Ok(Self::I64),
-            x if x == Self::U64 as i16 => Ok(Self::U64),
-            x if x == Self::F64 as i16 => Ok(Self::F64),
-            x if x == Self::Bool as i16 => Ok(Self::Bool),
-            x if x == Self::String as i16 => Ok(Self::String),
-            x if x == Self::Time as i16 => Ok(Self::Time),
-            x if x == Self::Tag as i16 => Ok(Self::Tag),
-            _ => Err("invalid column value".into()),
-        }
-    }
-}
-
-impl From<InfluxColumnType> for ColumnType {
-    fn from(value: InfluxColumnType) -> Self {
-        match value {
-            InfluxColumnType::Tag => Self::Tag,
-            InfluxColumnType::Field(InfluxFieldType::Float) => Self::F64,
-            InfluxColumnType::Field(InfluxFieldType::Integer) => Self::I64,
-            InfluxColumnType::Field(InfluxFieldType::UInteger) => Self::U64,
-            InfluxColumnType::Field(InfluxFieldType::String) => Self::String,
-            InfluxColumnType::Field(InfluxFieldType::Boolean) => Self::Bool,
-            InfluxColumnType::Timestamp => Self::Time,
-        }
-    }
-}
-
-impl From<ColumnType> for InfluxColumnType {
-    fn from(value: ColumnType) -> Self {
-        match value {
-            ColumnType::I64 => Self::Field(InfluxFieldType::Integer),
-            ColumnType::U64 => Self::Field(InfluxFieldType::UInteger),
-            ColumnType::F64 => Self::Field(InfluxFieldType::Float),
-            ColumnType::Bool => Self::Field(InfluxFieldType::Boolean),
-            ColumnType::String => Self::Field(InfluxFieldType::String),
-            ColumnType::Time => Self::Timestamp,
-            ColumnType::Tag => Self::Tag,
-        }
-    }
-}
-
-impl PartialEq<InfluxColumnType> for ColumnType {
-    fn eq(&self, got: &InfluxColumnType) -> bool {
-        match self {
-            Self::I64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::Integer)),
-            Self::U64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::UInteger)),
-            Self::F64 => matches!(got, InfluxColumnType::Field(InfluxFieldType::Float)),
-            Self::Bool => matches!(got, InfluxColumnType::Field(InfluxFieldType::Boolean)),
-            Self::String => matches!(got, InfluxColumnType::Field(InfluxFieldType::String)),
-            Self::Time => matches!(got, InfluxColumnType::Timestamp),
-            Self::Tag => matches!(got, InfluxColumnType::Tag),
-        }
-    }
-}
-
-/// Returns the `ColumnType` for the passed in line protocol `FieldValue` type
-pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType {
-    match field_value {
-        FieldValue::I64(_) => ColumnType::I64,
-        FieldValue::U64(_) => ColumnType::U64,
-        FieldValue::F64(_) => ColumnType::F64,
-        FieldValue::String(_) => ColumnType::String,
-        FieldValue::Boolean(_) => ColumnType::Bool,
+        self.columns.column_count()
     }
 }
 
@@ -912,55 +611,6 @@ pub struct SkippedCompaction {
     pub limit_num_files_first_in_partition: i64,
 }
 
-/// Set of columns.
-#[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)]
-#[sqlx(transparent)]
-pub struct ColumnSet(Vec<ColumnId>);
-
-impl ColumnSet {
-    /// Create new column set.
-    ///
-    /// The order of the passed columns will NOT be preserved.
-    ///
-    /// # Panic
-    /// Panics when the set of passed columns contains duplicates.
-    pub fn new<I>(columns: I) -> Self
-    where
-        I: IntoIterator<Item = ColumnId>,
-    {
-        let mut columns: Vec<ColumnId> = columns.into_iter().collect();
-        columns.sort();
-
-        let len_pre_dedup = columns.len();
-        columns.dedup();
-        let len_post_dedup = columns.len();
-        assert_eq!(len_pre_dedup, len_post_dedup, "set contains duplicates");
-
-        columns.shrink_to_fit();
-
-        Self(columns)
-    }
-
-    /// Estimate the memory consumption of this object and its contents
-    pub fn size(&self) -> usize {
-        std::mem::size_of_val(self) + (std::mem::size_of::<ChunkId>() * self.0.capacity())
-    }
-}
-
-impl From<ColumnSet> for Vec<ColumnId> {
-    fn from(set: ColumnSet) -> Self {
-        set.0
-    }
-}
-
-impl Deref for ColumnSet {
-    type Target = [ColumnId];
-
-    fn deref(&self) -> &Self::Target {
-        self.0.deref()
-    }
-}
-
 /// Data for a parquet file reference that has been inserted in the catalog.
 #[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
 pub struct ParquetFile {
@@ -3076,12 +2726,6 @@ mod tests {
         let _ = Timestamp::new(i64::MIN) - Timestamp::new(1);
     }
 
-    #[test]
-    #[should_panic = "set contains duplicates"]
-    fn test_column_set_duplicates() {
-        ColumnSet::new([ColumnId::new(1), ColumnId::new(2), ColumnId::new(1)]);
-    }
-
     #[test]
     fn test_timestamprange_start_after_end() {
         let tr = TimestampRange::new(2, 1);

From e8655af52d4a2b831c225782ef88846b1601541e Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 11:11:16 -0400
Subject: [PATCH 070/119] fix: Change ColumnsByName::new to enable taking
 ownership if caller wants to give it

---
 .../src/components/namespaces_source/mock.rs  |  4 ++--
 compactor/src/test_utils.rs                   |  2 +-
 data_types/src/columns.rs                     |  4 ++--
 data_types/src/lib.rs                         | 23 +++++++++++--------
 iox_catalog/src/interface.rs                  |  2 +-
 router/src/namespace_cache/metrics.rs         |  2 +-
 6 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/compactor/src/components/namespaces_source/mock.rs b/compactor/src/components/namespaces_source/mock.rs
index 93c1b07bae..ee32b5dcbe 100644
--- a/compactor/src/components/namespaces_source/mock.rs
+++ b/compactor/src/components/namespaces_source/mock.rs
@@ -131,7 +131,7 @@ mod tests {
                     TableSchema {
                         id: TableId::new(1),
                         partition_template: None,
-                        columns: ColumnsByName::new(&[
+                        columns: ColumnsByName::new([
                             Column {
                                 name: "col1".to_string(),
                                 id: ColumnId::new(1),
@@ -152,7 +152,7 @@ mod tests {
                     TableSchema {
                         id: TableId::new(2),
                         partition_template: None,
-                        columns: ColumnsByName::new(&[
+                        columns: ColumnsByName::new([
                             Column {
                                 name: "col1".to_string(),
                                 id: ColumnId::new(3),
diff --git a/compactor/src/test_utils.rs b/compactor/src/test_utils.rs
index e741b703c1..d62e402fb4 100644
--- a/compactor/src/test_utils.rs
+++ b/compactor/src/test_utils.rs
@@ -52,7 +52,7 @@ impl PartitionInfoBuilder {
         let table_schema = Arc::new(TableSchema {
             id: self.inner.table.id,
             partition_template: None,
-            columns: ColumnsByName::new(&columns),
+            columns: ColumnsByName::new(columns),
         });
         self.inner.table_schema = table_schema;
 
diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index b282ccb702..8990ffc440 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -48,10 +48,10 @@ impl From<BTreeMap<&str, ColumnSchema>> for ColumnsByName {
 
 impl ColumnsByName {
     /// Create a new instance holding the given [`Column`]s.
-    pub fn new(columns: &[Column]) -> Self {
+    pub fn new(columns: impl IntoIterator<Item = Column>) -> Self {
         Self(
             columns
-                .iter()
+                .into_iter()
                 .map(|c| {
                     (
                         c.name.to_owned(),
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 4e2a39e604..6c7d4c6fee 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -385,7 +385,7 @@ impl TableSchema {
         Self {
             id,
             partition_template: None,
-            columns: ColumnsByName::new(&[]),
+            columns: ColumnsByName::new([]),
         }
     }
 
@@ -395,7 +395,7 @@ impl TableSchema {
             id: table.id,
             // TODO: Store and retrieve PartitionTemplate from the database
             partition_template: None,
-            columns: ColumnsByName::new(&[]),
+            columns: ColumnsByName::new([]),
         }
     }
 
@@ -2659,17 +2659,20 @@ mod tests {
         let schema1 = TableSchema {
             id: TableId::new(1),
             partition_template: None,
-            columns: ColumnsByName::new(&[]),
+            columns: ColumnsByName::new([]),
         };
         let schema2 = TableSchema {
             id: TableId::new(2),
             partition_template: None,
-            columns: ColumnsByName::new(&[Column {
-                id: ColumnId::new(1),
-                table_id: TableId::new(2),
-                name: String::from("foo"),
-                column_type: ColumnType::Bool,
-            }]),
+            columns: ColumnsByName::new(
+                [Column {
+                    id: ColumnId::new(1),
+                    table_id: TableId::new(2),
+                    name: String::from("foo"),
+                    column_type: ColumnType::Bool,
+                }]
+                .into_iter(),
+            ),
         };
         assert!(schema1.size() < schema2.size());
     }
@@ -2690,7 +2693,7 @@ mod tests {
                 String::from("foo"),
                 TableSchema {
                     id: TableId::new(1),
-                    columns: ColumnsByName::new(&[]),
+                    columns: ColumnsByName::new([]),
                     partition_template: None,
                 },
             )]),
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 114e650d17..ce7b33b37e 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -615,7 +615,7 @@ where
 {
     let columns = repos.columns().list_by_table_id(id).await?;
 
-    Ok(ColumnsByName::new(&columns))
+    Ok(ColumnsByName::new(columns))
 }
 
 /// Fetch all [`NamespaceSchema`] in the catalog.
diff --git a/router/src/namespace_cache/metrics.rs b/router/src/namespace_cache/metrics.rs
index 3264a4bae4..58acbd5769 100644
--- a/router/src/namespace_cache/metrics.rs
+++ b/router/src/namespace_cache/metrics.rs
@@ -156,7 +156,7 @@ mod tests {
                     TableSchema {
                         id: TableId::new(i as _),
                         partition_template: None,
-                        columns: ColumnsByName::new(&columns),
+                        columns: ColumnsByName::new(columns),
                     },
                 )
             })

From e8a480f5f613d2ab61821f692b744a803e6e44d1 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 11:32:44 -0400
Subject: [PATCH 071/119] fix: Give up ownership of Column when adding to a
 table

To enable reuse of existing allocations rather than borrowing, creating
new allocations, then dropping them.
---
 data_types/src/columns.rs                     | 21 ++-----------------
 data_types/src/lib.rs                         | 16 ++++++++++----
 .../aggregate_tsm_schema/update_catalog.rs    | 16 +++++++-------
 iox_catalog/src/interface.rs                  |  4 ++--
 iox_catalog/src/lib.rs                        |  4 ++--
 router/src/namespace_cache/memory.rs          | 16 +++++++-------
 6 files changed, 34 insertions(+), 43 deletions(-)

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index 8990ffc440..45876e1a56 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -70,12 +70,8 @@ impl ColumnsByName {
     /// # Panics
     ///
     /// This method panics if a column of the same name already exists in `self`.
-    pub fn add_column(
-        &mut self,
-        column_name: impl Into<String>,
-        column_schema: impl Into<ColumnSchema>,
-    ) {
-        let old = self.0.insert(column_name.into(), column_schema.into());
+    pub fn add_column(&mut self, column_name: String, column_schema: ColumnSchema) {
+        let old = self.0.insert(column_name, column_schema);
         assert!(old.is_none());
     }
 
@@ -207,19 +203,6 @@ impl ColumnSchema {
     }
 }
 
-impl From<&Column> for ColumnSchema {
-    fn from(c: &Column) -> Self {
-        let Column {
-            id, column_type, ..
-        } = c;
-
-        Self {
-            id: *id,
-            column_type: *column_type,
-        }
-    }
-}
-
 /// The column data type
 #[allow(missing_docs)]
 #[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, sqlx::Type)]
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 6c7d4c6fee..4519cb9d10 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -405,8 +405,16 @@ impl TableSchema {
     ///
     /// This method panics if a column of the same name already exists in
     /// `self`.
-    pub fn add_column(&mut self, col: &Column) {
-        self.columns.add_column(&col.name, col);
+    pub fn add_column(&mut self, col: Column) {
+        let Column {
+            id,
+            name,
+            column_type,
+            ..
+        } = col;
+
+        let column_schema = ColumnSchema { id, column_type };
+        self.add_column_schema(name, column_schema);
     }
 
     /// Add the name and column schema to this table's schema.
@@ -415,8 +423,8 @@ impl TableSchema {
     ///
     /// This method panics if a column of the same name already exists in
     /// `self`.
-    pub fn add_column_schema(&mut self, name: &str, column_schema: &ColumnSchema) {
-        self.columns.add_column(name, column_schema.to_owned());
+    pub fn add_column_schema(&mut self, column_name: String, column_schema: ColumnSchema) {
+        self.columns.add_column(column_name, column_schema);
     }
 
     /// Estimated Size in bytes including `self`.
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index 077d97cfb9..173c5bcfa1 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -131,7 +131,7 @@ where
                     .columns()
                     .create_or_get("time", table.id, ColumnType::Time)
                     .await?;
-                table.add_column(&time_col);
+                table.add_column(time_col);
                 table
             }
         };
@@ -442,7 +442,7 @@ mod tests {
             .create_or_get("time", table.id, ColumnType::Time)
             .await
             .expect("column created");
-        table.add_column(&time_col);
+        table.add_column(time_col);
         let location_col = txn
             .columns()
             .create_or_get("city", table.id, ColumnType::Tag)
@@ -453,8 +453,8 @@ mod tests {
             .create_or_get("temperature", table.id, ColumnType::F64)
             .await
             .expect("column created");
-        table.add_column(&location_col);
-        table.add_column(&temperature_col);
+        table.add_column(location_col);
+        table.add_column(temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some overlap
@@ -534,13 +534,13 @@ mod tests {
             .create_or_get("time", table.id, ColumnType::Time)
             .await
             .expect("column created");
-        table.add_column(&time_col);
+        table.add_column(time_col);
         let temperature_col = txn
             .columns()
             .create_or_get("temperature", table.id, ColumnType::F64)
             .await
             .expect("column created");
-        table.add_column(&temperature_col);
+        table.add_column(temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some issue that will trip a catalog error
@@ -599,13 +599,13 @@ mod tests {
             .create_or_get("time", table.id, ColumnType::Time)
             .await
             .expect("column created");
-        table.add_column(&time_col);
+        table.add_column(time_col);
         let temperature_col = txn
             .columns()
             .create_or_get("temperature", table.id, ColumnType::F64)
             .await
             .expect("column created");
-        table.add_column(&temperature_col);
+        table.add_column(temperature_col);
         txn.commit().await.unwrap();
 
         // merge with aggregate schema that has some issue that will trip a catalog error
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index ce7b33b37e..02a51c4633 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -598,7 +598,7 @@ where
 
     for c in columns {
         let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
-        t.add_column(&c);
+        t.add_column(c);
     }
 
     for (_, (table_name, schema)) in table_id_to_schema {
@@ -700,7 +700,7 @@ pub async fn list_schemas(
             .entry(table.name.clone())
             .or_insert_with(|| TableSchema::new_empty_from(table));
 
-        table_schema.add_column(&column);
+        table_schema.add_column(column);
     }
 
     // The table map is no longer needed - immediately reclaim the memory.
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 8988466f14..880e98e1f6 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -126,7 +126,7 @@ where
                 .create_or_get(TIME_COLUMN, table.id, ColumnType::Time)
                 .await?;
 
-            table.add_column(&time_col);
+            table.add_column(time_col);
 
             assert!(schema
                 .to_mut()
@@ -185,7 +185,7 @@ where
             .create_or_get_many_unchecked(table.id, column_batch)
             .await?
             .into_iter()
-            .for_each(|c| table.to_mut().add_column(&c));
+            .for_each(|c| table.to_mut().add_column(c));
     }
 
     if let Cow::Owned(table) = table {
diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index 46e1912e8a..57f6b2a869 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -105,7 +105,7 @@ fn merge_schema_additive(
             Some(new_table) => {
                 for (column_name, column) in old_table.columns.iter() {
                     if !new_table.contains_column_name(column_name) {
-                        new_table.add_column_schema(column_name, column);
+                        new_table.add_column_schema(column_name.to_string(), *column);
                     }
                 }
             }
@@ -218,9 +218,9 @@ mod tests {
         };
 
         let mut first_write_table_schema = TableSchema::new(table_id);
-        first_write_table_schema.add_column(&column_1);
+        first_write_table_schema.add_column(column_1.clone());
         let mut second_write_table_schema = TableSchema::new(table_id);
-        second_write_table_schema.add_column(&column_2);
+        second_write_table_schema.add_column(column_2.clone());
 
         // These MUST always be different
         assert_ne!(first_write_table_schema, second_write_table_schema);
@@ -240,8 +240,8 @@ mod tests {
 
         let want_namespace_schema = {
             let mut want_table_schema = TableSchema::new(table_id);
-            want_table_schema.add_column(&column_1);
-            want_table_schema.add_column(&column_2);
+            want_table_schema.add_column(column_1);
+            want_table_schema.add_column(column_2);
             NamespaceSchema {
                 tables: BTreeMap::from([(String::from(table_name), want_table_schema)]),
                 ..schema_update_1.clone()
@@ -292,21 +292,21 @@ mod tests {
         // Each table has been given a column to assert the table merge logic
         // produces the correct metrics.
         let mut table_1 = TableSchema::new(TableId::new(1));
-        table_1.add_column(&Column {
+        table_1.add_column(Column {
             id: ColumnId::new(1),
             table_id: TableId::new(1),
             name: "column_a".to_string(),
             column_type: ColumnType::String,
         });
         let mut table_2 = TableSchema::new(TableId::new(2));
-        table_2.add_column(&Column {
+        table_2.add_column(Column {
             id: ColumnId::new(2),
             table_id: TableId::new(2),
             name: "column_b".to_string(),
             column_type: ColumnType::String,
         });
         let mut table_3 = TableSchema::new(TableId::new(3));
-        table_3.add_column(&Column {
+        table_3.add_column(Column {
             id: ColumnId::new(3),
             table_id: TableId::new(3),
             name: "column_c".to_string(),

From 28d4f4743c44950a93703063c26629d8b149481c Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 28 Apr 2023 14:39:29 -0400
Subject: [PATCH 072/119] refactor: Move PartitionTemplate and friends to their
 own module

---
 data_types/src/lib.rs                | 36 ++--------------------------
 data_types/src/partition_template.rs | 33 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 34 deletions(-)
 create mode 100644 data_types/src/partition_template.rs

diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 4519cb9d10..2318ea5c6a 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -19,6 +19,8 @@ mod columns;
 pub use columns::*;
 mod namespace_name;
 pub use namespace_name::*;
+mod partition_template;
+pub use partition_template::*;
 
 use observability_deps::tracing::warn;
 use schema::{sort::SortKey, TIME_COLUMN_NAME};
@@ -848,40 +850,6 @@ impl ChunkOrder {
     }
 }
 
-/// `PartitionTemplate` is used to compute the partition key of each row that gets written. It can
-/// consist of a column name and its value, a formatted time, or a string column and regex captures
-/// of its value. For columns that do not appear in the input row, a blank value is output.
-///
-/// The key is constructed in order of the template parts; thus ordering changes what partition key
-/// is generated.
-#[derive(Debug, Eq, PartialEq, Clone)]
-#[allow(missing_docs)]
-pub struct PartitionTemplate {
-    pub parts: Vec<TemplatePart>,
-}
-
-impl Default for PartitionTemplate {
-    fn default() -> Self {
-        Self {
-            parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
-        }
-    }
-}
-
-/// `TemplatePart` specifies what part of a row should be used to compute this
-/// part of a partition key.
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub enum TemplatePart {
-    /// The value in a named column
-    Column(String),
-    /// Applies a  `strftime` format to the "time" column.
-    ///
-    /// For example, a time format of "%Y-%m-%d %H:%M:%S" will produce
-    /// partition key parts such as "2021-03-14 12:25:21" and
-    /// "2021-04-14 12:24:21"
-    TimeFormat(String),
-}
-
 /// Represents a parsed delete predicate for evaluation by the InfluxDB IOx
 /// query engine.
 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
diff --git a/data_types/src/partition_template.rs b/data_types/src/partition_template.rs
new file mode 100644
index 0000000000..fd79221b7f
--- /dev/null
+++ b/data_types/src/partition_template.rs
@@ -0,0 +1,33 @@
+/// `PartitionTemplate` is used to compute the partition key of each row that gets written. It can
+/// consist of a column name and its value or a formatted time. For columns that do not appear in
+/// the input row, a blank value is output.
+///
+/// The key is constructed in order of the template parts; thus ordering changes what partition key
+/// is generated.
+#[derive(Debug, Eq, PartialEq, Clone)]
+#[allow(missing_docs)]
+pub struct PartitionTemplate {
+    pub parts: Vec<TemplatePart>,
+}
+
+impl Default for PartitionTemplate {
+    fn default() -> Self {
+        Self {
+            parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
+        }
+    }
+}
+
+/// `TemplatePart` specifies what part of a row should be used to compute this
+/// part of a partition key.
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub enum TemplatePart {
+    /// The value in a named column
+    Column(String),
+    /// Applies a  `strftime` format to the "time" column.
+    ///
+    /// For example, a time format of "%Y-%m-%d %H:%M:%S" will produce
+    /// partition key parts such as "2021-03-14 12:25:21" and
+    /// "2021-04-14 12:24:21"
+    TimeFormat(String),
+}

From c062d2d890629bbdd2ee3cb331adcdf183ee2dfa Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 13:22:53 -0400
Subject: [PATCH 073/119] fix: Change NamespaceResolver to return the whole
 cached NamespaceSchema

Rather than picking out the ID and partition template to be passed
around separately
---
 router/src/namespace_resolver.rs              | 32 ++++++-------
 router/src/namespace_resolver/mock.rs         | 47 +++++++++++++------
 .../src/namespace_resolver/ns_autocreation.rs | 28 +++++------
 router/src/server/http.rs                     |  9 ++--
 4 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/router/src/namespace_resolver.rs b/router/src/namespace_resolver.rs
index 9678e8fb7d..fc34c5de52 100644
--- a/router/src/namespace_resolver.rs
+++ b/router/src/namespace_resolver.rs
@@ -1,7 +1,7 @@
-//! An trait to abstract resolving a[`NamespaceName`] to [`NamespaceId`], and a
+//! An trait to abstract resolving a[`NamespaceName`] to [`NamespaceSchema`], and a
 //! collection of composable implementations.
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use observability_deps::tracing::*;
 use std::sync::Arc;
 use thiserror::Error;
@@ -12,7 +12,7 @@ pub mod mock;
 pub(crate) mod ns_autocreation;
 pub use ns_autocreation::*;
 
-/// Error states encountered during [`NamespaceId`] lookup.
+/// Error states encountered during [`NamespaceSchema`] lookup.
 #[derive(Debug, Error)]
 pub enum Error {
     /// An error occured when attempting to fetch the namespace ID.
@@ -24,17 +24,17 @@ pub enum Error {
     Create(#[from] NamespaceCreationError),
 }
 
-/// An abstract resolver of [`NamespaceName`] to [`NamespaceId`].
+/// An abstract resolver of [`NamespaceName`] to [`NamespaceSchema`].
 #[async_trait]
 pub trait NamespaceResolver: std::fmt::Debug + Send + Sync {
-    /// Return the [`NamespaceId`] and [`PartitionTemplate`] for the given [`NamespaceName`].
-    async fn get_namespace_info(
+    /// Return the [`NamespaceSchema`] for the given [`NamespaceName`].
+    async fn get_namespace_schema(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), Error>;
+    ) -> Result<Arc<NamespaceSchema>, Error>;
 }
 
-/// An implementation of [`NamespaceResolver`] that resolves the [`NamespaceId`]
+/// An implementation of [`NamespaceResolver`] that resolves the [`NamespaceSchema`]
 /// for a given name through a [`NamespaceCache`].
 #[derive(Debug)]
 pub struct NamespaceSchemaResolver<C> {
@@ -42,7 +42,7 @@ pub struct NamespaceSchemaResolver<C> {
 }
 
 impl<C> NamespaceSchemaResolver<C> {
-    /// Construct a new [`NamespaceSchemaResolver`] that resolves namespace IDs
+    /// Construct a new [`NamespaceSchemaResolver`] that resolves namespace schemas
     /// using `cache`.
     pub fn new(cache: C) -> Self {
         Self { cache }
@@ -54,13 +54,13 @@ impl<C> NamespaceResolver for NamespaceSchemaResolver<C>
 where
     C: NamespaceCache<ReadError = iox_catalog::interface::Error>,
 {
-    async fn get_namespace_info(
+    async fn get_namespace_schema(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), Error> {
+    ) -> Result<Arc<NamespaceSchema>, Error> {
         // Load the namespace schema from the cache.
         match self.cache.get_schema(namespace).await {
-            Ok(v) => Ok((v.id, v.partition_template.clone())),
+            Ok(v) => Ok(v),
             Err(e) => return Err(Error::Lookup(e)),
         }
     }
@@ -108,7 +108,7 @@ mod tests {
 
         // Drive the code under test
         resolver
-            .get_namespace_info(&ns)
+            .get_namespace_schema(&ns)
             .await
             .expect("lookup should succeed");
 
@@ -152,7 +152,7 @@ mod tests {
         let resolver = NamespaceSchemaResolver::new(Arc::clone(&cache));
 
         resolver
-            .get_namespace_info(&ns)
+            .get_namespace_schema(&ns)
             .await
             .expect("lookup should succeed");
 
@@ -189,7 +189,7 @@ mod tests {
         let resolver = NamespaceSchemaResolver::new(Arc::clone(&cache));
 
         let err = resolver
-            .get_namespace_info(&ns)
+            .get_namespace_schema(&ns)
             .await
             .expect_err("lookup should succeed");
         assert_matches!(
@@ -215,7 +215,7 @@ mod tests {
         let resolver = NamespaceSchemaResolver::new(Arc::clone(&cache));
 
         let err = resolver
-            .get_namespace_info(&ns)
+            .get_namespace_schema(&ns)
             .await
             .expect_err("lookup should error");
 
diff --git a/router/src/namespace_resolver/mock.rs b/router/src/namespace_resolver/mock.rs
index 634aedd392..ac05561b9d 100644
--- a/router/src/namespace_resolver/mock.rs
+++ b/router/src/namespace_resolver/mock.rs
@@ -2,21 +2,24 @@
 
 #![allow(missing_docs)]
 
-use std::{collections::HashMap, sync::Arc};
+use std::{
+    collections::{BTreeMap, HashMap},
+    sync::Arc,
+};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceId, NamespaceName, NamespaceSchema};
 use parking_lot::Mutex;
 
 use super::NamespaceResolver;
 
 #[derive(Debug, Default)]
 pub struct MockNamespaceResolver {
-    map: Mutex<HashMap<NamespaceName<'static>, NamespaceId>>,
+    map: Mutex<HashMap<NamespaceName<'static>, Arc<NamespaceSchema>>>,
 }
 
 impl MockNamespaceResolver {
-    pub fn new(map: HashMap<NamespaceName<'static>, NamespaceId>) -> Self {
+    pub fn new(map: HashMap<NamespaceName<'static>, Arc<NamespaceSchema>>) -> Self {
         Self {
             map: Mutex::new(map),
         }
@@ -24,24 +27,38 @@ impl MockNamespaceResolver {
 
     pub fn with_mapping(self, name: impl Into<String> + 'static, id: NamespaceId) -> Self {
         let name = NamespaceName::try_from(name.into()).unwrap();
-        assert!(self.map.lock().insert(name, id).is_none());
+        let empty_namespace_schema = Arc::new(new_empty_namespace_schema(id));
+        assert!(self
+            .map
+            .lock()
+            .insert(name, empty_namespace_schema)
+            .is_none());
         self
     }
 }
 
+// Start a new `NamespaceSchema` with only the given ID; the rest of the fields are arbitrary.
+fn new_empty_namespace_schema(id: NamespaceId) -> NamespaceSchema {
+    NamespaceSchema {
+        id,
+        tables: BTreeMap::new(),
+        max_columns_per_table: 500,
+        max_tables: 200,
+        retention_period_ns: None,
+        partition_template: None,
+    }
+}
+
 #[async_trait]
 impl NamespaceResolver for MockNamespaceResolver {
-    async fn get_namespace_info(
+    async fn get_namespace_schema(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), super::Error> {
-        Ok((
-            *self.map.lock().get(namespace).ok_or(super::Error::Lookup(
-                iox_catalog::interface::Error::NamespaceNotFoundByName {
-                    name: namespace.to_string(),
-                },
-            ))?,
-            None,
-        ))
+    ) -> Result<Arc<NamespaceSchema>, super::Error> {
+        Ok(Arc::clone(self.map.lock().get(namespace).ok_or(
+            super::Error::Lookup(iox_catalog::interface::Error::NamespaceNotFoundByName {
+                name: namespace.to_string(),
+            }),
+        )?))
     }
 }
diff --git a/router/src/namespace_resolver/ns_autocreation.rs b/router/src/namespace_resolver/ns_autocreation.rs
index b9245c5872..36d42099e7 100644
--- a/router/src/namespace_resolver/ns_autocreation.rs
+++ b/router/src/namespace_resolver/ns_autocreation.rs
@@ -1,7 +1,7 @@
 use std::{fmt::Debug, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use iox_catalog::interface::Catalog;
 use observability_deps::tracing::*;
 use thiserror::Error;
@@ -78,10 +78,10 @@ where
 {
     /// Force the creation of `namespace` if it does not already exist in the
     /// cache, before passing the request through to the inner delegate.
-    async fn get_namespace_info(
+    async fn get_namespace_schema(
         &self,
         namespace: &NamespaceName<'static>,
-    ) -> Result<(NamespaceId, Option<Arc<PartitionTemplate>>), super::Error> {
+    ) -> Result<Arc<NamespaceSchema>, super::Error> {
         if self.cache.get_schema(namespace).await.is_err() {
             trace!(%namespace, "namespace not found in cache");
 
@@ -90,7 +90,7 @@ where
                     // The namespace is not cached, but may exist in the
                     // catalog. Delegate discovery down to the inner handler,
                     // and map the lookup error to a reject error.
-                    match self.inner.get_namespace_info(namespace).await {
+                    match self.inner.get_namespace_schema(namespace).await {
                         Ok(v) => return Ok(v),
                         Err(super::Error::Lookup(
                             iox_catalog::interface::Error::NamespaceNotFoundByName { .. },
@@ -128,7 +128,7 @@ where
             }
         }
 
-        self.inner.get_namespace_info(namespace).await
+        self.inner.get_namespace_schema(namespace).await
     }
 }
 
@@ -183,11 +183,11 @@ mod tests {
         );
 
         // Drive the code under test
-        let (got_id, _got_partition_template) = creator
-            .get_namespace_info(&ns)
+        let got_namespace_schema = creator
+            .get_namespace_schema(&ns)
             .await
             .expect("handler should succeed");
-        assert_eq!(got_id, NAMESPACE_ID);
+        assert_eq!(got_namespace_schema.id, NAMESPACE_ID);
 
         // The cache hit should mean the catalog SHOULD NOT see a create request
         // for the namespace.
@@ -222,8 +222,8 @@ mod tests {
             MissingNamespaceAction::AutoCreate(TEST_RETENTION_PERIOD_NS),
         );
 
-        let (created_id, _created_partition_template) = creator
-            .get_namespace_info(&ns)
+        let created_namespace_schema = creator
+            .get_namespace_schema(&ns)
             .await
             .expect("handler should succeed");
 
@@ -237,7 +237,7 @@ mod tests {
             .expect("lookup should not error")
             .expect("creation request should be sent to catalog");
 
-        assert_eq!(got.id, created_id);
+        assert_eq!(got.id, created_namespace_schema.id);
         assert_eq!(
             got,
             Namespace {
@@ -271,7 +271,7 @@ mod tests {
 
         // It should not autocreate because we specified "rejection" behaviour, above
         assert_matches!(
-            creator.get_namespace_info(&ns).await,
+            creator.get_namespace_schema(&ns).await,
             Err(crate::namespace_resolver::Error::Create(
                 NamespaceCreationError::Reject(_ns)
             ))
@@ -308,7 +308,7 @@ mod tests {
         );
 
         let created_id = creator
-            .get_namespace_info(&ns)
+            .get_namespace_schema(&ns)
             .await
             .expect("handler should succeed");
 
@@ -322,7 +322,7 @@ mod tests {
 
         // It should not autocreate because we specified "rejection" behaviour, above
         let id = creator
-            .get_namespace_info(&ns)
+            .get_namespace_schema(&ns)
             .await
             .expect("should allow existing namespace from catalog");
         assert_eq!(created_id, id);
diff --git a/router/src/server/http.rs b/router/src/server/http.rs
index b592e154a3..67729deb43 100644
--- a/router/src/server/http.rs
+++ b/router/src/server/http.rs
@@ -362,12 +362,15 @@ where
             "routing write",
         );
 
-        // Retrieve the namespace ID for this namespace.
-        let (namespace_id, namespace_partition_template) = self
+        // Retrieve the namespace schema for this namespace.
+        let namespace_schema = self
             .namespace_resolver
-            .get_namespace_info(&write_info.namespace)
+            .get_namespace_schema(&write_info.namespace)
             .await?;
 
+        let namespace_id = namespace_schema.id;
+        let namespace_partition_template = namespace_schema.partition_template.as_ref().cloned();
+
         self.dml_handler
             .write(
                 &write_info.namespace,

From 70dca8f60b554c07edf2fe77eb3d5e702677b9f5 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 14:25:19 -0400
Subject: [PATCH 074/119] fix: Pass the NamespaceSchema through the dml write
 traits

---
 ioxd_router/src/lib.rs                        |   2 +-
 router/benches/schema_validator.rs            |  24 ++--
 router/src/dml_handlers/chain.rs              |  16 +--
 router/src/dml_handlers/fan_out.rs            |  15 +--
 router/src/dml_handlers/instrumentation.rs    |  30 +++--
 router/src/dml_handlers/mock.rs               |  11 +-
 router/src/dml_handlers/nop.rs                |   7 +-
 router/src/dml_handlers/partitioner.rs        |  62 ++++------
 .../src/dml_handlers/retention_validation.rs  |  85 ++++---------
 router/src/dml_handlers/rpc_write.rs          |  32 +++--
 router/src/dml_handlers/schema_validation.rs  | 116 +++++++++---------
 router/src/dml_handlers/trait.rs              |  16 +--
 router/src/server/http.rs                     |  40 ++----
 router/tests/common/mod.rs                    |   6 +-
 14 files changed, 191 insertions(+), 271 deletions(-)

diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index 2eb4b1fcae..2b4b6977cc 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -250,7 +250,7 @@ pub async fn create_router2_server_type(
     // # Retention validator
     //
     // Add a retention validator into handler stack to reject data outside the retention period
-    let retention_validator = RetentionValidator::new(Arc::clone(&ns_cache));
+    let retention_validator = RetentionValidator::new();
     let retention_validator =
         InstrumentationDecorator::new("retention_validator", &metrics, retention_validator);
 
diff --git a/router/benches/schema_validator.rs b/router/benches/schema_validator.rs
index 644fe0ea9a..73a71a2af7 100644
--- a/router/benches/schema_validator.rs
+++ b/router/benches/schema_validator.rs
@@ -4,14 +4,14 @@ use criterion::{
     criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup, Criterion,
     Throughput,
 };
-use data_types::{NamespaceId, NamespaceName};
+use data_types::NamespaceName;
 use hashbrown::HashMap;
 use iox_catalog::{interface::Catalog, mem::MemCatalog};
 use mutable_batch::MutableBatch;
 use once_cell::sync::Lazy;
 use router::{
     dml_handlers::{DmlHandler, SchemaValidator},
-    namespace_cache::{MemoryNamespaceCache, ReadThroughCache, ShardedCache},
+    namespace_cache::{MemoryNamespaceCache, NamespaceCache, ReadThroughCache, ShardedCache},
 };
 use schema::Projection;
 use tokio::runtime::Runtime;
@@ -48,17 +48,12 @@ fn bench(group: &mut BenchmarkGroup<WallTime>, tables: usize, columns_per_table:
         )),
         Arc::clone(&catalog),
     ));
-    let validator = SchemaValidator::new(catalog, ns_cache, &metrics);
+    let validator = SchemaValidator::new(catalog, Arc::clone(&ns_cache), &metrics);
 
     for i in 0..65_000 {
         let write = lp_to_writes(format!("{}{}", i + 10_000_000, generate_lp(1, 1)).as_str());
-        let _ = runtime().block_on(validator.write(
-            &NAMESPACE,
-            NamespaceId::new(42),
-            None,
-            write,
-            None,
-        ));
+        let namespace_schema = runtime().block_on(ns_cache.get_schema(&NAMESPACE)).unwrap();
+        let _ = runtime().block_on(validator.write(&NAMESPACE, namespace_schema, write, None));
     }
 
     let write = lp_to_writes(&generate_lp(tables, columns_per_table));
@@ -69,8 +64,13 @@ fn bench(group: &mut BenchmarkGroup<WallTime>, tables: usize, columns_per_table:
     group.throughput(Throughput::Elements(column_count as _));
     group.bench_function(format!("{tables}x{columns_per_table}"), |b| {
         b.to_async(runtime()).iter_batched(
-            || write.clone(),
-            |write| validator.write(&NAMESPACE, NamespaceId::new(42), None, write, None),
+            || {
+                (
+                    write.clone(),
+                    runtime().block_on(ns_cache.get_schema(&NAMESPACE)).unwrap(),
+                )
+            },
+            |(write, namespace_schema)| validator.write(&NAMESPACE, namespace_schema, write, None),
             BatchSize::SmallInput,
         );
     });
diff --git a/router/src/dml_handlers/chain.rs b/router/src/dml_handlers/chain.rs
index 94815986f2..2e0fbc946a 100644
--- a/router/src/dml_handlers/chain.rs
+++ b/router/src/dml_handlers/chain.rs
@@ -1,5 +1,5 @@
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use std::sync::Arc;
 use trace::ctx::SpanContext;
 
@@ -59,8 +59,7 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -68,8 +67,7 @@ where
             .first
             .write(
                 namespace,
-                namespace_id,
-                namespace_partition_template.clone(),
+                Arc::clone(&namespace_schema),
                 input,
                 span_ctx.clone(),
             )
@@ -77,13 +75,7 @@ where
             .map_err(Into::into)?;
 
         self.second
-            .write(
-                namespace,
-                namespace_id,
-                namespace_partition_template,
-                output,
-                span_ctx,
-            )
+            .write(namespace, namespace_schema, output, span_ctx)
             .await
             .map_err(Into::into)
     }
diff --git a/router/src/dml_handlers/fan_out.rs b/router/src/dml_handlers/fan_out.rs
index 3ade01fe83..9cf4d46864 100644
--- a/router/src/dml_handlers/fan_out.rs
+++ b/router/src/dml_handlers/fan_out.rs
@@ -1,7 +1,7 @@
 use std::{fmt::Debug, marker::PhantomData, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use futures::{stream::FuturesUnordered, TryStreamExt};
 use trace::ctx::SpanContext;
 
@@ -49,8 +49,7 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -58,17 +57,11 @@ where
             .into_iter()
             .map(|v| {
                 let namespace = namespace.clone();
+                let namespace_schema = Arc::clone(&namespace_schema);
                 let span_ctx = span_ctx.clone();
-                let namespace_partition_template = namespace_partition_template.clone();
                 async move {
                     self.inner
-                        .write(
-                            &namespace,
-                            namespace_id,
-                            namespace_partition_template,
-                            v,
-                            span_ctx,
-                        )
+                        .write(&namespace, namespace_schema, v, span_ctx)
                         .await
                 }
             })
diff --git a/router/src/dml_handlers/instrumentation.rs b/router/src/dml_handlers/instrumentation.rs
index e1f44ff6b8..213a109c76 100644
--- a/router/src/dml_handlers/instrumentation.rs
+++ b/router/src/dml_handlers/instrumentation.rs
@@ -1,5 +1,5 @@
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use iox_time::{SystemProvider, TimeProvider};
 use metric::{DurationHistogram, Metric};
 use std::sync::Arc;
@@ -53,8 +53,7 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -66,13 +65,7 @@ where
 
         let res = self
             .inner
-            .write(
-                namespace,
-                namespace_id,
-                namespace_partition_template,
-                input,
-                span_ctx,
-            )
+            .write(namespace, namespace_schema, input, span_ctx)
             .await;
 
         // Avoid exploding if time goes backwards - simply drop the measurement
@@ -99,6 +92,7 @@ mod tests {
     use std::sync::Arc;
 
     use assert_matches::assert_matches;
+    use data_types::NamespaceId;
     use metric::Attributes;
     use trace::{span::SpanStatus, RingBufferTraceCollector, TraceCollector};
 
@@ -107,6 +101,18 @@ mod tests {
 
     const HANDLER_NAME: &str = "bananas";
 
+    // Start a new `NamespaceSchema` with only the given ID; the rest of the fields are arbitrary.
+    fn new_empty_namespace_schema(id: i64) -> Arc<NamespaceSchema> {
+        Arc::new(NamespaceSchema {
+            id: NamespaceId::new(id),
+            tables: Default::default(),
+            max_columns_per_table: 500,
+            max_tables: 200,
+            retention_period_ns: None,
+            partition_template: None,
+        })
+    }
+
     fn assert_metric_hit(
         metrics: &metric::Registry,
         metric_name: &'static str,
@@ -156,7 +162,7 @@ mod tests {
         let decorator = InstrumentationDecorator::new(HANDLER_NAME, &metrics, handler);
 
         decorator
-            .write(&ns, NamespaceId::new(42), None, (), Some(span))
+            .write(&ns, new_empty_namespace_schema(42), (), Some(span))
             .await
             .expect("inner handler configured to succeed");
 
@@ -179,7 +185,7 @@ mod tests {
         let decorator = InstrumentationDecorator::new(HANDLER_NAME, &metrics, handler);
 
         let err = decorator
-            .write(&ns, NamespaceId::new(42), None, (), Some(span))
+            .write(&ns, new_empty_namespace_schema(42), (), Some(span))
             .await
             .expect_err("inner handler configured to fail");
 
diff --git a/router/src/dml_handlers/mock.rs b/router/src/dml_handlers/mock.rs
index 0cca0f54d1..d16297399a 100644
--- a/router/src/dml_handlers/mock.rs
+++ b/router/src/dml_handlers/mock.rs
@@ -1,7 +1,7 @@
 use std::{collections::VecDeque, fmt::Debug, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use parking_lot::Mutex;
 use trace::ctx::SpanContext;
 
@@ -13,8 +13,7 @@ use super::{DmlError, DmlHandler};
 pub enum MockDmlHandlerCall<W> {
     Write {
         namespace: String,
-        namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         write_input: W,
     },
 }
@@ -87,8 +86,7 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         write_input: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
@@ -96,8 +94,7 @@ where
             self,
             MockDmlHandlerCall::Write {
                 namespace: namespace.into(),
-                namespace_id,
-                namespace_partition_template,
+                namespace_schema,
                 write_input,
             },
             write_return
diff --git a/router/src/dml_handlers/nop.rs b/router/src/dml_handlers/nop.rs
index 3454bca206..3f0124c76e 100644
--- a/router/src/dml_handlers/nop.rs
+++ b/router/src/dml_handlers/nop.rs
@@ -3,7 +3,7 @@
 use std::{fmt::Debug, marker::PhantomData, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use observability_deps::tracing::*;
 use trace::ctx::SpanContext;
 
@@ -31,12 +31,11 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         batches: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
-        info!(%namespace, %namespace_id, ?batches, "dropping write operation");
+        info!(%namespace, %namespace_schema.id, ?batches, "dropping write operation");
         Ok(batches)
     }
 }
diff --git a/router/src/dml_handlers/partitioner.rs b/router/src/dml_handlers/partitioner.rs
index 7867e1610f..0ddb4c2d28 100644
--- a/router/src/dml_handlers/partitioner.rs
+++ b/router/src/dml_handlers/partitioner.rs
@@ -1,5 +1,5 @@
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionKey, PartitionTemplate, TableId};
+use data_types::{NamespaceName, NamespaceSchema, PartitionKey, PartitionTemplate, TableId};
 use hashbrown::HashMap;
 use mutable_batch::{MutableBatch, PartitionWrite, WritePayload};
 use observability_deps::tracing::*;
@@ -73,11 +73,11 @@ impl DmlHandler for Partitioner {
     async fn write(
         &self,
         _namespace: &NamespaceName<'static>,
-        _namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         batch: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
+        let namespace_partition_template = &namespace_schema.partition_template;
         // A collection of partition-keyed, per-table MutableBatch instances.
         let mut partitions: HashMap<PartitionKey, HashMap<_, (String, MutableBatch)>> =
             HashMap::default();
@@ -119,8 +119,7 @@ impl DmlHandler for Partitioner {
 #[cfg(test)]
 mod tests {
     use assert_matches::assert_matches;
-
-    use data_types::TemplatePart;
+    use data_types::{NamespaceId, TemplatePart};
 
     use super::*;
 
@@ -138,6 +137,22 @@ mod tests {
             .collect()
     }
 
+    // Start a new `NamespaceSchema` with only the given ID and partition template override; the
+    // rest of the fields are arbitrary.
+    fn namespace_schema(
+        id: i64,
+        partition_template: Option<Arc<PartitionTemplate>>,
+    ) -> Arc<NamespaceSchema> {
+        Arc::new(NamespaceSchema {
+            id: NamespaceId::new(id),
+            tables: Default::default(),
+            max_columns_per_table: 500,
+            max_tables: 200,
+            retention_period_ns: None,
+            partition_template,
+        })
+    }
+
     // Generate a test case that partitions "lp".
     //
     // Assert the partition-to-table mapping in "want_writes" and assert the
@@ -161,8 +176,7 @@ mod tests {
 
                     let handler_ret = partitioner.write(
                         &ns,
-                        NamespaceId::new(42),
-                        None,
+                        namespace_schema(42, None),
                         writes,
                         None
                     ).await;
@@ -320,6 +334,7 @@ mod tests {
                 TemplatePart::Column("nonanas".to_string()),
             ],
         }));
+        let namespace_schema = namespace_schema(42, namespace_partition_template);
 
         let writes = lp_to_writes(
             "
@@ -331,15 +346,7 @@ mod tests {
         ",
         );
 
-        let handler_ret = partitioner
-            .write(
-                &ns,
-                NamespaceId::new(42),
-                namespace_partition_template,
-                writes,
-                None,
-            )
-            .await;
+        let handler_ret = partitioner.write(&ns, namespace_schema, writes, None).await;
 
         // Check the partition -> table mapping.
         let got = handler_ret
@@ -390,6 +397,7 @@ mod tests {
                 TemplatePart::Column("nonanas".to_string()),
             ],
         }));
+        let namespace_schema = namespace_schema(42, namespace_partition_template);
         let bananas_table_template = Some(Arc::new(PartitionTemplate {
             parts: vec![
                 TemplatePart::Column("oranges".to_string()),
@@ -421,15 +429,7 @@ mod tests {
             })
             .collect();
 
-        let handler_ret = partitioner
-            .write(
-                &ns,
-                NamespaceId::new(42),
-                namespace_partition_template,
-                writes,
-                None,
-            )
-            .await;
+        let handler_ret = partitioner.write(&ns, namespace_schema, writes, None).await;
 
         // Check the partition -> table mapping.
         let got = handler_ret
@@ -477,7 +477,7 @@ mod tests {
         let ns = NamespaceName::new("bananas").expect("valid db name");
 
         // No namespace partition means the platanos table will fall back to the default
-        let namespace_partition_template = None;
+        let namespace_schema = namespace_schema(42, None);
 
         let bananas_table_template = Some(Arc::new(PartitionTemplate {
             parts: vec![
@@ -510,15 +510,7 @@ mod tests {
             })
             .collect();
 
-        let handler_ret = partitioner
-            .write(
-                &ns,
-                NamespaceId::new(42),
-                namespace_partition_template,
-                writes,
-                None,
-            )
-            .await;
+        let handler_ret = partitioner.write(&ns, namespace_schema, writes, None).await;
 
         // Check the partition -> table mapping.
         let got = handler_ret
diff --git a/router/src/dml_handlers/retention_validation.rs b/router/src/dml_handlers/retention_validation.rs
index b2f27ea4d6..8946e6dd7a 100644
--- a/router/src/dml_handlers/retention_validation.rs
+++ b/router/src/dml_handlers/retention_validation.rs
@@ -1,5 +1,5 @@
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use hashbrown::HashMap;
 use iox_time::{SystemProvider, TimeProvider};
 use mutable_batch::MutableBatch;
@@ -9,15 +9,10 @@ use thiserror::Error;
 use trace::ctx::SpanContext;
 
 use super::DmlHandler;
-use crate::namespace_cache::NamespaceCache;
 
 /// Errors emitted during retention validation.
 #[derive(Debug, Error)]
 pub enum RetentionError {
-    /// The requested namespace could not be found in the catalog.
-    #[error("failed to read namespace schema from catalog: {0}")]
-    NamespaceLookup(iox_catalog::interface::Error),
-
     /// Time is outside the retention period.
     #[error("data in table {0} is outside of the retention period")]
     OutsideRetention(String),
@@ -29,30 +24,20 @@ pub enum RetentionError {
 /// Each row of data being wrote is inspected, and if any "time" column
 /// timestamp lays outside of the configured namespace retention period, the
 /// entire write is rejected.
-///
-/// Namespace retention periods are loaded from the provided [`NamespaceCache`]
-/// implementation.
-#[derive(Debug)]
-pub struct RetentionValidator<C, P = SystemProvider> {
-    cache: C,
+#[derive(Debug, Default)]
+pub struct RetentionValidator<P = SystemProvider> {
     time_provider: P,
 }
 
-impl<C> RetentionValidator<C> {
+impl RetentionValidator {
     /// Initialise a new [`RetentionValidator`], rejecting time outside retention period
-    pub fn new(cache: C) -> Self {
-        Self {
-            cache,
-            time_provider: Default::default(),
-        }
+    pub fn new() -> Self {
+        Self::default()
     }
 }
 
 #[async_trait]
-impl<C> DmlHandler for RetentionValidator<C>
-where
-    C: NamespaceCache<ReadError = iox_catalog::interface::Error>, // The handler expects the cache to read from the catalog if necessary.
-{
+impl DmlHandler for RetentionValidator {
     type WriteError = RetentionError;
 
     type WriteInput = HashMap<String, MutableBatch>;
@@ -61,20 +46,13 @@ where
     /// Partition the per-table [`MutableBatch`].
     async fn write(
         &self,
-        namespace: &NamespaceName<'static>,
-        _namespace_id: NamespaceId,
-        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        _namespace: &NamespaceName<'static>,
+        namespace_schema: Arc<NamespaceSchema>,
         batch: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
-        // Try to fetch the namespace schema through the cache.
-        let schema = match self.cache.get_schema(namespace).await {
-            Ok(v) => v,
-            Err(e) => return Err(RetentionError::NamespaceLookup(e)),
-        };
-
         // retention is not infinte, validate all lines of a write are within the retention period
-        if let Some(retention_period_ns) = schema.retention_period_ns {
+        if let Some(retention_period_ns) = namespace_schema.retention_period_ns {
             let min_retention = self.time_provider.now().timestamp_nanos() - retention_period_ns;
             // batch is a HashMap<tring, MutableBatch>
             for (table_name, batch) in &batch {
@@ -98,29 +76,18 @@ mod tests {
     use once_cell::sync::Lazy;
 
     use super::*;
-    use crate::namespace_cache::{MemoryNamespaceCache, ReadThroughCache};
 
     static NAMESPACE: Lazy<NamespaceName<'static>> = Lazy::new(|| "bananas".try_into().unwrap());
 
-    fn setup_test_cache(
-        catalog: Arc<TestCatalog>,
-    ) -> Arc<ReadThroughCache<Arc<MemoryNamespaceCache>>> {
-        Arc::new(ReadThroughCache::new(
-            Arc::new(MemoryNamespaceCache::default()),
-            catalog.catalog(),
-        ))
-    }
-
     #[tokio::test]
     async fn test_time_inside_retention_period() {
-        let (catalog, namespace) = test_setup().await;
+        let namespace = test_setup().await;
 
         // Create the table so that there is a known ID that must be returned.
         let _want_id = namespace.create_table("bananas").await.table.id;
 
         // Create the validator whose retention period is 1 hour
-        let cache = setup_test_cache(catalog);
-        let handler = RetentionValidator::new(cache);
+        let handler = RetentionValidator::new();
 
         // Make time now to be inside the retention period
         let now = SystemProvider::default()
@@ -131,7 +98,7 @@ mod tests {
         let writes = lp_to_writes(&line);
 
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await;
 
         // no error means the time is inside the retention period
@@ -140,14 +107,13 @@ mod tests {
 
     #[tokio::test]
     async fn test_time_outside_retention_period() {
-        let (catalog, namespace) = test_setup().await;
+        let namespace = test_setup().await;
 
         // Create the table so that there is a known ID that must be returned.
         let _want_id = namespace.create_table("bananas").await.table.id;
 
         // Create the validator whose retention period is 1 hour
-        let cache = setup_test_cache(catalog);
-        let handler = RetentionValidator::new(cache);
+        let handler = RetentionValidator::new();
 
         // Make time outside the retention period
         let two_hours_ago = (SystemProvider::default().now().timestamp_nanos()
@@ -157,7 +123,7 @@ mod tests {
         let writes = lp_to_writes(&line);
 
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await;
 
         // error means the time is outside the retention period
@@ -168,14 +134,13 @@ mod tests {
 
     #[tokio::test]
     async fn test_time_partial_inside_retention_period() {
-        let (catalog, namespace) = test_setup().await;
+        let namespace = test_setup().await;
 
         // Create the table so that there is a known ID that must be returned.
         let _want_id = namespace.create_table("bananas").await.table.id;
 
         // Create the validator whose retention period is 1 hour
-        let cache = setup_test_cache(catalog);
-        let handler = RetentionValidator::new(cache);
+        let handler = RetentionValidator::new();
 
         // Make time now to be inside the retention period
         let now = SystemProvider::default()
@@ -193,7 +158,7 @@ mod tests {
 
         let writes = lp_to_writes(&lp);
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await;
 
         // error means the time is outside the retention period
@@ -204,14 +169,13 @@ mod tests {
 
     #[tokio::test]
     async fn test_one_table_inside_one_table_outside_retention_period() {
-        let (catalog, namespace) = test_setup().await;
+        let namespace = test_setup().await;
 
         // Create the table so that there is a known ID that must be returned.
         let _want_id = namespace.create_table("bananas").await.table.id;
 
         // Create the validator whse retention period is 1 hour
-        let cache = setup_test_cache(catalog);
-        let handler = RetentionValidator::new(cache);
+        let handler = RetentionValidator::new();
 
         // Make time now to be inside the retention period
         let now = SystemProvider::default()
@@ -229,7 +193,7 @@ mod tests {
 
         let writes = lp_to_writes(&lp);
         let result = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await;
 
         // error means the time is outside the retention period
@@ -247,10 +211,9 @@ mod tests {
 
     /// Initialise an in-memory [`MemCatalog`] and create a single namespace
     /// named [`NAMESPACE`].
-    async fn test_setup() -> (Arc<TestCatalog>, Arc<TestNamespace>) {
+    async fn test_setup() -> Arc<TestNamespace> {
         let catalog = TestCatalog::new();
-        let namespace = catalog.create_namespace_1hr_retention(&NAMESPACE).await;
 
-        (catalog, namespace)
+        catalog.create_namespace_1hr_retention(&NAMESPACE).await
     }
 }
diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index f69c8e9360..4d87af110b 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -16,7 +16,7 @@ use self::{
 
 use super::{DmlHandler, Partitioned};
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate, TableId};
+use data_types::{NamespaceName, NamespaceSchema, TableId};
 use dml::{DmlMeta, DmlWrite};
 use generated_types::influxdata::iox::ingester::v1::WriteRequest;
 use hashbrown::HashMap;
@@ -176,11 +176,11 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         writes: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, RpcWriteError> {
+        let namespace_id = namespace_schema.id;
         // Extract the partition key & DML writes.
         let (partition_key, writes) = writes.into_parts();
 
@@ -327,7 +327,7 @@ mod tests {
     use std::{collections::HashSet, iter, sync::Arc};
 
     use assert_matches::assert_matches;
-    use data_types::PartitionKey;
+    use data_types::{NamespaceId, PartitionKey};
     use rand::seq::SliceRandom;
 
     use crate::dml_handlers::rpc_write::circuit_breaking_client::mock::MockCircuitBreaker;
@@ -349,6 +349,18 @@ mod tests {
     const NAMESPACE_NAME: &str = "bananas";
     const NAMESPACE_ID: NamespaceId = NamespaceId::new(42);
 
+    // Start a new `NamespaceSchema` with only the given ID; the rest of the fields are arbitrary.
+    fn new_empty_namespace_schema() -> Arc<NamespaceSchema> {
+        Arc::new(NamespaceSchema {
+            id: NAMESPACE_ID,
+            tables: Default::default(),
+            max_columns_per_table: 500,
+            max_tables: 200,
+            retention_period_ns: None,
+            partition_template: None,
+        })
+    }
+
     /// A helper function to perform an arbitrary write against `endpoints`,
     /// with the given number of desired distinct data copies.
     async fn make_request<T, C>(
@@ -385,8 +397,7 @@ mod tests {
         handler
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
-                NAMESPACE_ID,
-                None,
+                new_empty_namespace_schema(),
                 input,
                 None,
             )
@@ -420,8 +431,7 @@ mod tests {
         let got = handler
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
-                NAMESPACE_ID,
-                None,
+                new_empty_namespace_schema(),
                 input,
                 None,
             )
@@ -483,8 +493,7 @@ mod tests {
         let got = handler
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
-                NAMESPACE_ID,
-                None,
+                new_empty_namespace_schema(),
                 input,
                 None,
             )
@@ -552,8 +561,7 @@ mod tests {
         let got = handler
             .write(
                 &NamespaceName::new(NAMESPACE_NAME).unwrap(),
-                NAMESPACE_ID,
-                None,
+                new_empty_namespace_schema(),
                 input,
                 None,
             )
diff --git a/router/src/dml_handlers/schema_validation.rs b/router/src/dml_handlers/schema_validation.rs
index b36cd8d163..6461b09dc0 100644
--- a/router/src/dml_handlers/schema_validation.rs
+++ b/router/src/dml_handlers/schema_validation.rs
@@ -1,7 +1,7 @@
 use std::{ops::DerefMut, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, NamespaceSchema, PartitionTemplate, TableId};
+use data_types::{NamespaceName, NamespaceSchema, PartitionTemplate, TableId};
 use hashbrown::HashMap;
 use iox_catalog::{
     interface::{Catalog, Error as CatalogError},
@@ -19,10 +19,6 @@ use crate::namespace_cache::NamespaceCache;
 /// Errors emitted during schema validation.
 #[derive(Debug, Error)]
 pub enum SchemaError {
-    /// The requested namespace could not be found in the catalog.
-    #[error("failed to read namespace schema from catalog: {0}")]
-    NamespaceLookup(iox_catalog::interface::Error),
-
     /// The user has hit their column/table limit.
     #[error("service limit reached: {0}")]
     ServiceLimit(Box<dyn std::error::Error + Send + Sync + 'static>),
@@ -155,9 +151,6 @@ where
     ///
     /// # Errors
     ///
-    /// If `namespace` does not exist, [`SchemaError::NamespaceLookup`] is
-    /// returned.
-    ///
     /// If the schema validation fails due to a schema conflict in the request,
     /// [`SchemaError::Conflict`] is returned.
     ///
@@ -169,18 +162,12 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        _namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         batches: Self::WriteInput,
         _span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
-        // Try to fetch the namespace schema through the cache.
-        let schema = match self.cache.get_schema(namespace).await {
-            Ok(v) => v,
-            Err(e) => return Err(SchemaError::NamespaceLookup(e)),
-        };
-
-        validate_schema_limits(&batches, &schema).map_err(|e| {
+        let namespace_id = namespace_schema.id;
+        validate_schema_limits(&batches, &namespace_schema).map_err(|e| {
             match &e {
                 CachedServiceProtectionLimit::Column {
                     table_name,
@@ -222,7 +209,7 @@ where
 
         let maybe_new_schema = validate_or_insert_schema(
             batches.iter().map(|(k, v)| (k.as_str(), v)),
-            &schema,
+            &namespace_schema,
             repos.deref_mut(),
         )
         .await
@@ -294,7 +281,7 @@ where
             }
             None => {
                 trace!(%namespace, "schema unchanged");
-                schema
+                namespace_schema
             }
         };
 
@@ -552,14 +539,16 @@ mod tests {
         // Table exists and is over the column limit because of the race condition,
         {
             // Make two schema validator instances each with their own cache
+            let cache1 = setup_test_cache(&catalog);
             let handler1 = SchemaValidator::new(
                 catalog.catalog(),
-                setup_test_cache(&catalog),
+                Arc::clone(&cache1),
                 &catalog.metric_registry,
             );
+            let cache2 = setup_test_cache(&catalog);
             let handler2 = SchemaValidator::new(
                 catalog.catalog(),
-                setup_test_cache(&catalog),
+                Arc::clone(&cache2),
                 &catalog.metric_registry,
             );
 
@@ -567,12 +556,22 @@ mod tests {
             // namespace schema gets cached
             let writes1_valid = lp_to_writes("dragonfruit val=42i 123456");
             handler1
-                .write(&NAMESPACE, NamespaceId::new(42), None, writes1_valid, None)
+                .write(
+                    &NAMESPACE,
+                    cache1.get_schema(&NAMESPACE).await.unwrap(),
+                    writes1_valid,
+                    None,
+                )
                 .await
                 .expect("request should succeed");
             let writes2_valid = lp_to_writes("dragonfruit val=43i 123457");
             handler2
-                .write(&NAMESPACE, NamespaceId::new(42), None, writes2_valid, None)
+                .write(
+                    &NAMESPACE,
+                    cache2.get_schema(&NAMESPACE).await.unwrap(),
+                    writes2_valid,
+                    None,
+                )
                 .await
                 .expect("request should succeed");
 
@@ -582,8 +581,7 @@ mod tests {
             handler1
                 .write(
                     &NAMESPACE,
-                    NamespaceId::new(42),
-                    None,
+                    cache1.get_schema(&NAMESPACE).await.unwrap(),
                     writes1_add_column,
                     None,
                 )
@@ -593,8 +591,7 @@ mod tests {
             handler2
                 .write(
                     &NAMESPACE,
-                    NamespaceId::new(42),
-                    None,
+                    cache2.get_schema(&NAMESPACE).await.unwrap(),
                     writes2_add_column,
                     None,
                 )
@@ -790,11 +787,17 @@ mod tests {
         let want_id = namespace.create_table("bananas").await.table.id;
 
         let metrics = Arc::new(metric::Registry::default());
-        let handler = SchemaValidator::new(catalog.catalog(), setup_test_cache(&catalog), &metrics);
+        let cache = setup_test_cache(&catalog);
+        let handler = SchemaValidator::new(catalog.catalog(), Arc::clone(&cache), &metrics);
 
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(
+                &NAMESPACE,
+                cache.get_schema(&NAMESPACE).await.unwrap(),
+                writes,
+                None,
+            )
             .await
             .expect("request should succeed");
 
@@ -809,36 +812,21 @@ mod tests {
         assert_eq!(name, "bananas");
     }
 
-    #[tokio::test]
-    async fn test_write_schema_not_found() {
-        let (catalog, _namespace) = test_setup().await;
-        let metrics = Arc::new(metric::Registry::default());
-        let handler = SchemaValidator::new(catalog.catalog(), setup_test_cache(&catalog), &metrics);
-
-        let ns = NamespaceName::try_from("A_DIFFERENT_NAMESPACE").unwrap();
-
-        let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
-        let err = handler
-            .write(&ns, NamespaceId::new(42), None, writes, None)
-            .await
-            .expect_err("request should fail");
-
-        assert_matches!(err, SchemaError::NamespaceLookup(_));
-
-        // The cache should not have retained the schema.
-        assert!(handler.cache.get_schema(&ns).await.is_err());
-    }
-
     #[tokio::test]
     async fn test_write_validation_failure() {
-        let (catalog, _namespace) = test_setup().await;
+        let (catalog, namespace) = test_setup().await;
         let metrics = Arc::new(metric::Registry::default());
         let handler = SchemaValidator::new(catalog.catalog(), setup_test_cache(&catalog), &metrics);
 
         // First write sets the schema
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456"); // val=i64
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes.clone(), None)
+            .write(
+                &NAMESPACE,
+                namespace.schema().await.into(),
+                writes.clone(),
+                None,
+            )
             .await
             .expect("request should succeed");
         assert_eq!(writes.len(), got.len());
@@ -846,7 +834,7 @@ mod tests {
         // Second write attempts to violate it causing an error
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42.0 123456"); // val=float
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await
             .expect_err("request should fail");
 
@@ -865,14 +853,19 @@ mod tests {
 
     #[tokio::test]
     async fn test_write_table_service_limit() {
-        let (catalog, _namespace) = test_setup().await;
+        let (catalog, namespace) = test_setup().await;
         let metrics = Arc::new(metric::Registry::default());
         let handler = SchemaValidator::new(catalog.catalog(), setup_test_cache(&catalog), &metrics);
 
         // First write sets the schema
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes.clone(), None)
+            .write(
+                &NAMESPACE,
+                namespace.schema().await.into(),
+                writes.clone(),
+                None,
+            )
             .await
             .expect("request should succeed");
         assert_eq!(writes.len(), got.len());
@@ -890,7 +883,7 @@ mod tests {
         // Second write attempts to violate limits, causing an error
         let writes = lp_to_writes("bananas2,tag1=A,tag2=B val=42i 123456");
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await
             .expect_err("request should fail");
 
@@ -907,7 +900,12 @@ mod tests {
         // First write sets the schema
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i 123456");
         let got = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes.clone(), None)
+            .write(
+                &NAMESPACE,
+                namespace.schema().await.into(),
+                writes.clone(),
+                None,
+            )
             .await
             .expect("request should succeed");
         assert_eq!(writes.len(), got.len());
@@ -919,7 +917,7 @@ mod tests {
         // Second write attempts to violate limits, causing an error
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i,val2=42i 123456");
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await
             .expect_err("request should fail");
 
@@ -929,7 +927,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_first_write_many_columns_service_limit() {
-        let (catalog, _namespace) = test_setup().await;
+        let (catalog, namespace) = test_setup().await;
         let metrics = Arc::new(metric::Registry::default());
         let handler = SchemaValidator::new(catalog.catalog(), setup_test_cache(&catalog), &metrics);
 
@@ -946,7 +944,7 @@ mod tests {
         // First write attempts to add columns over the limit, causing an error
         let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i,val2=42i 123456");
         let err = handler
-            .write(&NAMESPACE, NamespaceId::new(42), None, writes, None)
+            .write(&NAMESPACE, namespace.schema().await.into(), writes, None)
             .await
             .expect_err("request should fail");
 
diff --git a/router/src/dml_handlers/trait.rs b/router/src/dml_handlers/trait.rs
index 9843143ddc..dd2365ae3d 100644
--- a/router/src/dml_handlers/trait.rs
+++ b/router/src/dml_handlers/trait.rs
@@ -2,7 +2,7 @@ use super::{
     partitioner::PartitionError, retention_validation::RetentionError, RpcWriteError, SchemaError,
 };
 use async_trait::async_trait;
-use data_types::{NamespaceId, NamespaceName, PartitionTemplate};
+use data_types::{NamespaceName, NamespaceSchema};
 use std::{error::Error, fmt::Debug, sync::Arc};
 use thiserror::Error;
 use trace::ctx::SpanContext;
@@ -61,8 +61,7 @@ pub trait DmlHandler: Debug + Send + Sync {
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError>;
@@ -80,19 +79,12 @@ where
     async fn write(
         &self,
         namespace: &NamespaceName<'static>,
-        namespace_id: NamespaceId,
-        namespace_partition_template: Option<Arc<PartitionTemplate>>,
+        namespace_schema: Arc<NamespaceSchema>,
         input: Self::WriteInput,
         span_ctx: Option<SpanContext>,
     ) -> Result<Self::WriteOutput, Self::WriteError> {
         (**self)
-            .write(
-                namespace,
-                namespace_id,
-                namespace_partition_template,
-                input,
-                span_ctx,
-            )
+            .write(namespace, namespace_schema, input, span_ctx)
             .await
     }
 }
diff --git a/router/src/server/http.rs b/router/src/server/http.rs
index 67729deb43..7b703d91b0 100644
--- a/router/src/server/http.rs
+++ b/router/src/server/http.rs
@@ -137,12 +137,6 @@ impl From<&DmlError> for StatusCode {
         match e {
             DmlError::NamespaceNotFound(_) => StatusCode::NOT_FOUND,
 
-            // Schema validation error cases
-            DmlError::Schema(SchemaError::NamespaceLookup(_)) => {
-                // While the [`NamespaceAutocreation`] layer is in use, this is
-                // an internal error as the namespace should always exist.
-                StatusCode::INTERNAL_SERVER_ERROR
-            }
             DmlError::Schema(SchemaError::ServiceLimit(_)) => {
                 // https://docs.influxdata.com/influxdb/cloud/account-management/limits/#api-error-responses
                 StatusCode::BAD_REQUEST
@@ -154,9 +148,6 @@ impl From<&DmlError> for StatusCode {
 
             DmlError::Internal(_) => StatusCode::INTERNAL_SERVER_ERROR,
             DmlError::Partition(PartitionError::BatchWrite(_)) => StatusCode::INTERNAL_SERVER_ERROR,
-            DmlError::Retention(RetentionError::NamespaceLookup(_)) => {
-                StatusCode::INTERNAL_SERVER_ERROR
-            }
             DmlError::Retention(RetentionError::OutsideRetention(_)) => StatusCode::FORBIDDEN,
             DmlError::RpcWrite(RpcWriteError::Upstream(_)) => StatusCode::INTERNAL_SERVER_ERROR,
             DmlError::RpcWrite(RpcWriteError::Timeout(_)) => StatusCode::GATEWAY_TIMEOUT,
@@ -368,17 +359,8 @@ where
             .get_namespace_schema(&write_info.namespace)
             .await?;
 
-        let namespace_id = namespace_schema.id;
-        let namespace_partition_template = namespace_schema.partition_template.as_ref().cloned();
-
         self.dml_handler
-            .write(
-                &write_info.namespace,
-                namespace_id,
-                namespace_partition_template,
-                batches,
-                span_ctx,
-            )
+            .write(&write_info.namespace, namespace_schema, batches, span_ctx)
             .await
             .map_err(Into::into)?;
 
@@ -693,10 +675,10 @@ mod tests {
         dml_handler = [Ok(())],
         want_result = Ok(_),
         want_dml_calls = [
-            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+            MockDmlHandlerCall::Write { namespace, namespace_schema, write_input, .. }
         ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
-            assert_eq!(*namespace_id, NAMESPACE_ID);
+            assert_eq!(namespace_schema.id, NAMESPACE_ID);
 
             let table = write_input.get("platanos").expect("table not found");
             let ts = table.timestamp_summary().expect("no timestamp summary");
@@ -711,10 +693,10 @@ mod tests {
         dml_handler = [Ok(())],
         want_result = Ok(_),
         want_dml_calls = [
-            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+            MockDmlHandlerCall::Write { namespace, namespace_schema, write_input, .. }
         ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
-            assert_eq!(*namespace_id, NAMESPACE_ID);
+            assert_eq!(namespace_schema.id, NAMESPACE_ID);
 
             let table = write_input.get("platanos").expect("table not found");
             let ts = table.timestamp_summary().expect("no timestamp summary");
@@ -729,10 +711,10 @@ mod tests {
         dml_handler = [Ok(())],
         want_result = Ok(_),
         want_dml_calls = [
-            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+            MockDmlHandlerCall::Write { namespace, namespace_schema, write_input, .. }
         ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
-            assert_eq!(*namespace_id, NAMESPACE_ID);
+            assert_eq!(namespace_schema.id, NAMESPACE_ID);
 
             let table = write_input.get("platanos").expect("table not found");
             let ts = table.timestamp_summary().expect("no timestamp summary");
@@ -747,10 +729,10 @@ mod tests {
         dml_handler = [Ok(())],
         want_result = Ok(_),
         want_dml_calls = [
-            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+            MockDmlHandlerCall::Write { namespace, namespace_schema, write_input, .. }
         ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
-            assert_eq!(*namespace_id, NAMESPACE_ID);
+            assert_eq!(namespace_schema.id, NAMESPACE_ID);
 
             let table = write_input.get("platanos").expect("table not found");
             let ts = table.timestamp_summary().expect("no timestamp summary");
@@ -895,10 +877,10 @@ mod tests {
         dml_handler = [Ok(())],
         want_result = Ok(_),
         want_dml_calls = [
-            MockDmlHandlerCall::Write { namespace, namespace_id, write_input, .. }
+            MockDmlHandlerCall::Write { namespace, namespace_schema, write_input, .. }
         ] => {
             assert_eq!(namespace, NAMESPACE_NAME);
-            assert_eq!(*namespace_id, NAMESPACE_ID);
+            assert_eq!(namespace_schema.id, NAMESPACE_ID);
             let table = write_input.get("test").expect("table not in write");
             let col = table.column("field").expect("column missing");
             assert_matches!(col.data(), ColumnData::U64(data, _) => {
diff --git a/router/tests/common/mod.rs b/router/tests/common/mod.rs
index da73a4fafa..07517a6f1a 100644
--- a/router/tests/common/mod.rs
+++ b/router/tests/common/mod.rs
@@ -105,9 +105,7 @@ type HttpDelegateStack = HttpDelegate<
         Chain<
             Chain<
                 Chain<
-                    RetentionValidator<
-                        Arc<ReadThroughCache<Arc<ShardedCache<Arc<MemoryNamespaceCache>>>>>,
-                    >,
+                    RetentionValidator,
                     SchemaValidator<
                         Arc<ReadThroughCache<Arc<ShardedCache<Arc<MemoryNamespaceCache>>>>>,
                     >,
@@ -149,7 +147,7 @@ impl TestContext {
         let schema_validator =
             SchemaValidator::new(Arc::clone(&catalog), Arc::clone(&ns_cache), &metrics);
 
-        let retention_validator = RetentionValidator::new(Arc::clone(&ns_cache));
+        let retention_validator = RetentionValidator::new();
 
         let partitioner = Partitioner::new(PartitionTemplate::default());
 

From ebceabb608cb5e7b522bb665578d2686ee86d592 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 15:06:52 -0400
Subject: [PATCH 075/119] feat: Add an assertion for the new invariant that
 namespace partition templates never change

---
 router/src/namespace_cache/memory.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index 57f6b2a869..269e551d56 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -83,6 +83,8 @@ fn merge_schema_additive(
 ) -> (NamespaceSchema, ChangeStats) {
     // invariant: Namespace ID should never change for a given name
     assert_eq!(old_ns.id, new_ns.id);
+    // invariant: Namespace partition template override should never change for a given name
+    assert_eq!(old_ns.partition_template, new_ns.partition_template);
 
     let old_table_count = old_ns.tables.len();
     let mut old_column_count = 0;

From 23c0110b320c36d3550cf8b2008d4d943ea68297 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 8 May 2023 15:33:32 -0400
Subject: [PATCH 076/119] feat: Create newtypes for different partition
 templates

So that the different kinds aren't mixed up. Also extracts the logic
having to do with which template takes precedence onto the
PartitionTemplate type itself.
---
 Cargo.lock                                   |   1 +
 data_types/Cargo.toml                        |   1 +
 data_types/src/lib.rs                        |   4 +-
 data_types/src/partition_template.rs         |  58 ++++++++-
 ioxd_router/src/lib.rs                       |   4 +-
 mutable_batch_pb/tests/encode.rs             |   4 +-
 router/src/dml_handlers/partitioner.rs       | 119 +++++++++++--------
 router/src/dml_handlers/schema_validation.rs |  11 +-
 router/tests/common/mod.rs                   |   4 +-
 9 files changed, 143 insertions(+), 63 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0f21c689c2..2c14ce1b15 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1417,6 +1417,7 @@ dependencies = [
  "influxdb-line-protocol",
  "iox_time",
  "observability_deps",
+ "once_cell",
  "ordered-float 3.7.0",
  "percent-encoding",
  "proptest",
diff --git a/data_types/Cargo.toml b/data_types/Cargo.toml
index 972f509078..7de4c08e1d 100644
--- a/data_types/Cargo.toml
+++ b/data_types/Cargo.toml
@@ -11,6 +11,7 @@ croaring = "0.8.1"
 influxdb-line-protocol = { path = "../influxdb_line_protocol" }
 iox_time = { path = "../iox_time" }
 observability_deps = { path = "../observability_deps" }
+once_cell = "1"
 ordered-float = "3"
 percent-encoding = "2.2.0"
 schema = { path = "../schema" }
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 2318ea5c6a..e2db8152a8 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -317,7 +317,7 @@ pub struct NamespaceSchema {
     /// None represents infinite duration (i.e. never drop data).
     pub retention_period_ns: Option<i64>,
     /// The optionally-specified partition template to use for writes in this namespace.
-    pub partition_template: Option<Arc<PartitionTemplate>>,
+    pub partition_template: Option<Arc<NamespacePartitionTemplateOverride>>,
 }
 
 impl NamespaceSchema {
@@ -375,7 +375,7 @@ pub struct TableSchema {
     pub id: TableId,
 
     /// the table's partition template
-    pub partition_template: Option<Arc<PartitionTemplate>>,
+    pub partition_template: Option<Arc<TablePartitionTemplateOverride>>,
 
     /// the table's columns by their name
     pub columns: ColumnsByName,
diff --git a/data_types/src/partition_template.rs b/data_types/src/partition_template.rs
index fd79221b7f..2b50a6277c 100644
--- a/data_types/src/partition_template.rs
+++ b/data_types/src/partition_template.rs
@@ -1,3 +1,43 @@
+use once_cell::sync::Lazy;
+use std::sync::Arc;
+
+/// A partition template specified by a namespace record.
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub struct NamespacePartitionTemplateOverride(PartitionTemplate);
+
+impl NamespacePartitionTemplateOverride {
+    /// Create a new, immutable override for a namespace's partition template.
+    pub fn new(partition_template: PartitionTemplate) -> Self {
+        Self(partition_template)
+    }
+}
+
+/// A partition template specified by a table record.
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub struct TablePartitionTemplateOverride(PartitionTemplate);
+
+impl TablePartitionTemplateOverride {
+    /// Create a new, immutable override for a table's partition template.
+    pub fn new(partition_template: PartitionTemplate) -> Self {
+        Self(partition_template)
+    }
+}
+
+/// A partition template specified as the default to be used in the absence of any overrides.
+#[derive(Debug, Eq, PartialEq, Clone, Copy)]
+pub struct DefaultPartitionTemplate(&'static PartitionTemplate);
+
+impl Default for DefaultPartitionTemplate {
+    fn default() -> Self {
+        Self(&PARTITION_BY_DAY)
+    }
+}
+
+/// The default partitioning scheme is by each day according to the "time" column.
+pub static PARTITION_BY_DAY: Lazy<PartitionTemplate> = Lazy::new(|| PartitionTemplate {
+    parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
+});
+
 /// `PartitionTemplate` is used to compute the partition key of each row that gets written. It can
 /// consist of a column name and its value or a formatted time. For columns that do not appear in
 /// the input row, a blank value is output.
@@ -10,11 +50,19 @@ pub struct PartitionTemplate {
     pub parts: Vec<TemplatePart>,
 }
 
-impl Default for PartitionTemplate {
-    fn default() -> Self {
-        Self {
-            parts: vec![TemplatePart::TimeFormat("%Y-%m-%d".to_owned())],
-        }
+impl PartitionTemplate {
+    /// If the table has a partition template, use that. Otherwise, if the namespace has a
+    /// partition template, use that. If neither the table nor the namespace has a template,
+    /// use the default template.
+    pub fn determine_precedence<'a>(
+        table: Option<&'a Arc<TablePartitionTemplateOverride>>,
+        namespace: Option<&'a Arc<NamespacePartitionTemplateOverride>>,
+        default: &'a DefaultPartitionTemplate,
+    ) -> &'a PartitionTemplate {
+        table
+            .map(|t| &t.0)
+            .or(namespace.map(|n| &n.0))
+            .unwrap_or(default.0)
     }
 }
 
diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index 2b4b6977cc..935ee630a0 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -6,7 +6,7 @@ use std::{
 use async_trait::async_trait;
 use authz::{Authorizer, IoxAuthorizer};
 use clap_blocks::router2::Router2Config;
-use data_types::{NamespaceName, PartitionTemplate};
+use data_types::{DefaultPartitionTemplate, NamespaceName};
 use hashbrown::HashMap;
 use hyper::{Body, Request, Response};
 use iox_catalog::interface::Catalog;
@@ -258,7 +258,7 @@ pub async fn create_router2_server_type(
     //
     // Add a write partitioner into the handler stack that splits by the date
     // portion of the write's timestamp (the default [`PartitionTemplate`]).
-    let partitioner = Partitioner::new(PartitionTemplate::default());
+    let partitioner = Partitioner::new(DefaultPartitionTemplate::default());
     let partitioner = InstrumentationDecorator::new("partitioner", &metrics, partitioner);
 
     // # Namespace resolver
diff --git a/mutable_batch_pb/tests/encode.rs b/mutable_batch_pb/tests/encode.rs
index 8d0f4c5c12..2f980bea69 100644
--- a/mutable_batch_pb/tests/encode.rs
+++ b/mutable_batch_pb/tests/encode.rs
@@ -1,5 +1,5 @@
 use arrow_util::assert_batches_eq;
-use data_types::{PartitionKey, PartitionTemplate};
+use data_types::{PartitionKey, PARTITION_BY_DAY};
 use mutable_batch::{writer::Writer, MutableBatch, PartitionWrite, WritePayload};
 use mutable_batch_pb::{decode::write_table_batch, encode::encode_batch};
 use schema::Projection;
@@ -120,7 +120,7 @@ fn test_encode_decode_null_columns_issue_4272() {
         .unwrap();
     writer.commit();
 
-    let mut partitions = PartitionWrite::partition(&batch, &PartitionTemplate::default());
+    let mut partitions = PartitionWrite::partition(&batch, &PARTITION_BY_DAY);
 
     // There should be two partitions, one with for the timestamp 160, and
     // one for the other timestamp.
diff --git a/router/src/dml_handlers/partitioner.rs b/router/src/dml_handlers/partitioner.rs
index 0ddb4c2d28..4079412d9d 100644
--- a/router/src/dml_handlers/partitioner.rs
+++ b/router/src/dml_handlers/partitioner.rs
@@ -1,5 +1,8 @@
 use async_trait::async_trait;
-use data_types::{NamespaceName, NamespaceSchema, PartitionKey, PartitionTemplate, TableId};
+use data_types::{
+    DefaultPartitionTemplate, NamespaceName, NamespaceSchema, PartitionKey, PartitionTemplate,
+    TableId, TablePartitionTemplateOverride,
+};
 use hashbrown::HashMap;
 use mutable_batch::{MutableBatch, PartitionWrite, WritePayload};
 use observability_deps::tracing::*;
@@ -43,19 +46,19 @@ impl<T> Partitioned<T> {
 
 /// A [`DmlHandler`] implementation that splits per-table [`MutableBatch`] into
 /// partitioned per-table [`MutableBatch`] instances according to a configured
-/// [`PartitionTemplate`]. Deletes pass through unmodified.
+/// [`DefaultPartitionTemplate`]. Deletes pass through unmodified.
 ///
 /// A vector of partitions are returned to the caller, or the first error that
 /// occurs during partitioning.
 #[derive(Debug)]
 pub struct Partitioner {
-    partition_template: Arc<PartitionTemplate>,
+    partition_template: Arc<DefaultPartitionTemplate>,
 }
 
 impl Partitioner {
     /// Initialise a new [`Partitioner`], splitting writes according to the
-    /// specified [`PartitionTemplate`].
-    pub fn new(partition_template: PartitionTemplate) -> Self {
+    /// specified [`DefaultPartitionTemplate`].
+    pub fn new(partition_template: DefaultPartitionTemplate) -> Self {
         Self {
             partition_template: Arc::new(partition_template),
         }
@@ -66,7 +69,14 @@ impl Partitioner {
 impl DmlHandler for Partitioner {
     type WriteError = PartitionError;
 
-    type WriteInput = HashMap<TableId, (String, Option<Arc<PartitionTemplate>>, MutableBatch)>;
+    type WriteInput = HashMap<
+        TableId,
+        (
+            String,
+            Option<Arc<TablePartitionTemplateOverride>>,
+            MutableBatch,
+        ),
+    >;
     type WriteOutput = Vec<Partitioned<HashMap<TableId, (String, MutableBatch)>>>;
 
     /// Partition the per-table [`MutableBatch`].
@@ -85,14 +95,12 @@ impl DmlHandler for Partitioner {
         for (table_id, (table_name, table_partition_template, batch)) in batch {
             // Partition the table batch according to the configured partition
             // template and write it into the partition-keyed map.
-            // If the table has a partition template, use that. Otherwise, if the namespace has a
-            // partition template, use that. If neither the table nor the namespace has a template,
-            // use the partitioner's template.
 
-            let partition_template = table_partition_template
-                .as_ref()
-                .or(namespace_partition_template.as_ref())
-                .unwrap_or(&self.partition_template);
+            let partition_template = PartitionTemplate::determine_precedence(
+                table_partition_template.as_ref(),
+                namespace_partition_template.as_ref(),
+                &self.partition_template,
+            );
 
             for (partition_key, partition_payload) in
                 PartitionWrite::partition(&batch, partition_template)
@@ -119,14 +127,21 @@ impl DmlHandler for Partitioner {
 #[cfg(test)]
 mod tests {
     use assert_matches::assert_matches;
-    use data_types::{NamespaceId, TemplatePart};
+    use data_types::{NamespaceId, NamespacePartitionTemplateOverride, TemplatePart};
 
     use super::*;
 
     // Parse `lp` into a table-keyed MutableBatch map.
     pub(crate) fn lp_to_writes(
         lp: &str,
-    ) -> HashMap<TableId, (String, Option<Arc<PartitionTemplate>>, MutableBatch)> {
+    ) -> HashMap<
+        TableId,
+        (
+            String,
+            Option<Arc<TablePartitionTemplateOverride>>,
+            MutableBatch,
+        ),
+    > {
         let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
             .expect("failed to build test writes from LP");
 
@@ -141,7 +156,7 @@ mod tests {
     // rest of the fields are arbitrary.
     fn namespace_schema(
         id: i64,
-        partition_template: Option<Arc<PartitionTemplate>>,
+        partition_template: Option<Arc<NamespacePartitionTemplateOverride>>,
     ) -> Arc<NamespaceSchema> {
         Arc::new(NamespaceSchema {
             id: NamespaceId::new(id),
@@ -167,7 +182,7 @@ mod tests {
             paste::paste! {
                 #[tokio::test]
                 async fn [<test_write_ $name>]() {
-                    let partition_template = PartitionTemplate::default();
+                    let partition_template = DefaultPartitionTemplate::default();
 
                     let partitioner = Partitioner::new(partition_template);
                     let ns = NamespaceName::new("bananas").expect("valid db name");
@@ -324,16 +339,18 @@ mod tests {
 
     #[tokio::test]
     async fn test_write_namespace_partition_template() {
-        let partitioner = Partitioner::new(PartitionTemplate::default());
+        let partitioner = Partitioner::new(DefaultPartitionTemplate::default());
         let ns = NamespaceName::new("bananas").expect("valid db name");
 
-        let namespace_partition_template = Some(Arc::new(PartitionTemplate {
-            parts: vec![
-                TemplatePart::TimeFormat("%Y".to_string()),
-                TemplatePart::Column("tag1".to_string()),
-                TemplatePart::Column("nonanas".to_string()),
-            ],
-        }));
+        let namespace_partition_template = Some(Arc::new(NamespacePartitionTemplateOverride::new(
+            PartitionTemplate {
+                parts: vec![
+                    TemplatePart::TimeFormat("%Y".to_string()),
+                    TemplatePart::Column("tag1".to_string()),
+                    TemplatePart::Column("nonanas".to_string()),
+                ],
+            },
+        )));
         let namespace_schema = namespace_schema(42, namespace_partition_template);
 
         let writes = lp_to_writes(
@@ -386,25 +403,29 @@ mod tests {
 
     #[tokio::test]
     async fn test_write_namespace_and_table_partition_template() {
-        let partitioner = Partitioner::new(PartitionTemplate::default());
+        let partitioner = Partitioner::new(DefaultPartitionTemplate::default());
         let ns = NamespaceName::new("bananas").expect("valid db name");
 
         // Specify this but the table partition will take precedence for bananas.
-        let namespace_partition_template = Some(Arc::new(PartitionTemplate {
-            parts: vec![
-                TemplatePart::TimeFormat("%Y".to_string()),
-                TemplatePart::Column("tag1".to_string()),
-                TemplatePart::Column("nonanas".to_string()),
-            ],
-        }));
+        let namespace_partition_template = Some(Arc::new(NamespacePartitionTemplateOverride::new(
+            PartitionTemplate {
+                parts: vec![
+                    TemplatePart::TimeFormat("%Y".to_string()),
+                    TemplatePart::Column("tag1".to_string()),
+                    TemplatePart::Column("nonanas".to_string()),
+                ],
+            },
+        )));
         let namespace_schema = namespace_schema(42, namespace_partition_template);
-        let bananas_table_template = Some(Arc::new(PartitionTemplate {
-            parts: vec![
-                TemplatePart::Column("oranges".to_string()),
-                TemplatePart::TimeFormat("%Y-%m".to_string()),
-                TemplatePart::Column("tag2".to_string()),
-            ],
-        }));
+        let bananas_table_template = Some(Arc::new(TablePartitionTemplateOverride::new(
+            PartitionTemplate {
+                parts: vec![
+                    TemplatePart::Column("oranges".to_string()),
+                    TemplatePart::TimeFormat("%Y-%m".to_string()),
+                    TemplatePart::Column("tag2".to_string()),
+                ],
+            },
+        )));
 
         let lp = "
             bananas,tag1=A,tag2=C val=42i 1\n\
@@ -473,19 +494,21 @@ mod tests {
 
     #[tokio::test]
     async fn test_write_only_table_partition_template() {
-        let partitioner = Partitioner::new(PartitionTemplate::default());
+        let partitioner = Partitioner::new(DefaultPartitionTemplate::default());
         let ns = NamespaceName::new("bananas").expect("valid db name");
 
         // No namespace partition means the platanos table will fall back to the default
         let namespace_schema = namespace_schema(42, None);
 
-        let bananas_table_template = Some(Arc::new(PartitionTemplate {
-            parts: vec![
-                TemplatePart::Column("oranges".to_string()),
-                TemplatePart::TimeFormat("%Y-%m".to_string()),
-                TemplatePart::Column("tag2".to_string()),
-            ],
-        }));
+        let bananas_table_template = Some(Arc::new(TablePartitionTemplateOverride::new(
+            PartitionTemplate {
+                parts: vec![
+                    TemplatePart::Column("oranges".to_string()),
+                    TemplatePart::TimeFormat("%Y-%m".to_string()),
+                    TemplatePart::Column("tag2".to_string()),
+                ],
+            },
+        )));
 
         let lp = "
             bananas,tag1=A,tag2=C val=42i 1\n\
diff --git a/router/src/dml_handlers/schema_validation.rs b/router/src/dml_handlers/schema_validation.rs
index 6461b09dc0..d61ea43d4c 100644
--- a/router/src/dml_handlers/schema_validation.rs
+++ b/router/src/dml_handlers/schema_validation.rs
@@ -1,7 +1,7 @@
 use std::{ops::DerefMut, sync::Arc};
 
 use async_trait::async_trait;
-use data_types::{NamespaceName, NamespaceSchema, PartitionTemplate, TableId};
+use data_types::{NamespaceName, NamespaceSchema, TableId, TablePartitionTemplateOverride};
 use hashbrown::HashMap;
 use iox_catalog::{
     interface::{Catalog, Error as CatalogError},
@@ -145,7 +145,14 @@ where
     // Accepts a map of TableName -> MutableBatch
     type WriteInput = HashMap<String, MutableBatch>;
     // And returns a map of TableId -> (TableName, OptionalTablePartitionTemplate, MutableBatch)
-    type WriteOutput = HashMap<TableId, (String, Option<Arc<PartitionTemplate>>, MutableBatch)>;
+    type WriteOutput = HashMap<
+        TableId,
+        (
+            String,
+            Option<Arc<TablePartitionTemplateOverride>>,
+            MutableBatch,
+        ),
+    >;
 
     /// Validate the schema of all the writes in `batches`.
     ///
diff --git a/router/tests/common/mod.rs b/router/tests/common/mod.rs
index 07517a6f1a..a0ac073115 100644
--- a/router/tests/common/mod.rs
+++ b/router/tests/common/mod.rs
@@ -1,6 +1,6 @@
 use std::{iter, string::String, sync::Arc, time::Duration};
 
-use data_types::{PartitionTemplate, TableId};
+use data_types::{DefaultPartitionTemplate, TableId};
 use generated_types::influxdata::iox::ingester::v1::WriteRequest;
 use hashbrown::HashMap;
 use hyper::{Body, Request, Response};
@@ -149,7 +149,7 @@ impl TestContext {
 
         let retention_validator = RetentionValidator::new();
 
-        let partitioner = Partitioner::new(PartitionTemplate::default());
+        let partitioner = Partitioner::new(DefaultPartitionTemplate::default());
 
         let namespace_resolver = NamespaceSchemaResolver::new(Arc::clone(&ns_cache));
         let namespace_resolver = NamespaceAutocreation::new(

From ab666ea5fa62969872af083acbd0e3fdf7966145 Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Tue, 9 May 2023 11:49:08 +0200
Subject: [PATCH 077/119] refactor: owned ColumnsByName constructor only

Refactors the From<BtreeMap> impl that accepted a &str name for
ColumnsByName construction, instead allowing only the owned String, and
updating the test that makes use of it appropriately.
---
 data_types/src/columns.rs            | 11 +++--------
 router/src/namespace_cache/memory.rs |  2 +-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index 45876e1a56..06502319db 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -35,14 +35,9 @@ impl PgHasArrayType for ColumnId {
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
 
-impl From<BTreeMap<&str, ColumnSchema>> for ColumnsByName {
-    fn from(value: BTreeMap<&str, ColumnSchema>) -> Self {
-        Self(
-            value
-                .into_iter()
-                .map(|(name, column)| (name.to_owned(), column))
-                .collect(),
-        )
+impl From<BTreeMap<String, ColumnSchema>> for ColumnsByName {
+    fn from(value: BTreeMap<String, ColumnSchema>) -> Self {
+        Self(value)
     }
 }
 
diff --git a/router/src/namespace_cache/memory.rs b/router/src/namespace_cache/memory.rs
index 269e551d56..3049a9ec5d 100644
--- a/router/src/namespace_cache/memory.rs
+++ b/router/src/namespace_cache/memory.rs
@@ -396,7 +396,7 @@ mod tests {
         fn arbitrary_table_schema()(
             id in any::<i64>(),
             columns in proptest::collection::btree_map(
-                proptest::sample::select(TEST_COLUMN_NAME_SET),
+                proptest::sample::select(TEST_COLUMN_NAME_SET).prop_map(ToString::to_string),
                 arbitrary_column_schema(),
                 (0, 10) // Set size range
             ),

From 18c6d9e306c5a62e584d245dd6740b0bd3a4283e Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Tue, 9 May 2023 11:51:42 +0200
Subject: [PATCH 078/119] refactor: remove unnecessary "to_owned()" call

This method now takes an owned name, so no need to call to_owned()!
---
 data_types/src/columns.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index 06502319db..a2142af6b8 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -49,7 +49,7 @@ impl ColumnsByName {
                 .into_iter()
                 .map(|c| {
                     (
-                        c.name.to_owned(),
+                        c.name,
                         ColumnSchema {
                             id: c.id,
                             column_type: c.column_type,

From 01205e9671a8cd9e69aec2f52b0244bd4e3455bc Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Tue, 9 May 2023 12:07:21 +0200
Subject: [PATCH 079/119] refactor: assert Column.table_id matches

Include an invariant assert when adding a Column to a TableSchema,
ensuring the table IDs match.
---
 data_types/src/lib.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index e2db8152a8..84a415da8d 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -406,15 +406,17 @@ impl TableSchema {
     /// # Panics
     ///
     /// This method panics if a column of the same name already exists in
-    /// `self`.
+    /// `self`, or if `col` references a different `table_id`.
     pub fn add_column(&mut self, col: Column) {
         let Column {
             id,
             name,
             column_type,
-            ..
+            table_id,
         } = col;
 
+        assert_eq!(table_id, self.id);
+
         let column_schema = ColumnSchema { id, column_type };
         self.add_column_schema(name, column_schema);
     }

From d99444fb2e045833db308af3c5d24f30e3178c7e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 10 May 2023 01:59:08 +0000
Subject: [PATCH 080/119] chore(deps): Bump console-subscriber from 0.1.8 to
 0.1.9

Bumps [console-subscriber](https://github.com/tokio-rs/console) from 0.1.8 to 0.1.9.
- [Release notes](https://github.com/tokio-rs/console/releases)
- [Commits](https://github.com/tokio-rs/console/compare/console-subscriber-v0.1.8...console-subscriber-v0.1.9)

---
updated-dependencies:
- dependency-name: console-subscriber
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 Cargo.lock              | 114 +++++++++++++---------------------------
 influxdb_iox/Cargo.toml |   2 +-
 2 files changed, 37 insertions(+), 79 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2c14ce1b15..06d96aa731 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -276,7 +276,7 @@ dependencies = [
  "paste",
  "prost",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
 ]
 
 [[package]]
@@ -520,7 +520,7 @@ dependencies = [
  "http",
  "observability_deps",
  "snafu",
- "tonic 0.9.2",
+ "tonic",
  "workspace-hack",
 ]
 
@@ -947,7 +947,7 @@ dependencies = [
  "reqwest",
  "thiserror",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
  "tower",
  "workspace-hack",
 ]
@@ -1085,21 +1085,21 @@ dependencies = [
 
 [[package]]
 name = "console-api"
-version = "0.4.0"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e57ff02e8ad8e06ab9731d5dc72dc23bef9200778eae1a89d555d8c42e5d4a86"
+checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e"
 dependencies = [
  "prost",
  "prost-types",
- "tonic 0.8.3",
+ "tonic",
  "tracing-core",
 ]
 
 [[package]]
 name = "console-subscriber"
-version = "0.1.8"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22a3a81dfaf6b66bce5d159eddae701e3a002f194d378cbf7be5f053c281d9be"
+checksum = "57ab2224a0311582eb03adba4caaf18644f7b1f10a760803a803b9b605187fc7"
 dependencies = [
  "console-api",
  "crossbeam-channel",
@@ -1107,14 +1107,14 @@ dependencies = [
  "futures",
  "hdrhistogram",
  "humantime",
- "parking_lot 0.11.2",
+ "parking_lot 0.12.1",
  "prost-types",
  "serde",
  "serde_json",
  "thread_local",
  "tokio",
  "tokio-stream",
- "tonic 0.8.3",
+ "tonic",
  "tracing",
  "tracing-core",
  "tracing-subscriber",
@@ -1862,7 +1862,7 @@ dependencies = [
  "prost",
  "snafu",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
  "workspace-hack",
 ]
 
@@ -2049,7 +2049,7 @@ dependencies = [
  "query_functions",
  "serde",
  "snafu",
- "tonic 0.9.2",
+ "tonic",
  "tonic-build",
  "workspace-hack",
 ]
@@ -2106,7 +2106,7 @@ dependencies = [
  "prost-build",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
  "tonic-build",
  "tower",
  "workspace-hack",
@@ -2119,7 +2119,7 @@ dependencies = [
  "prost",
  "prost-build",
  "prost-types",
- "tonic 0.9.2",
+ "tonic",
  "tonic-build",
  "workspace-hack",
 ]
@@ -2131,7 +2131,7 @@ dependencies = [
  "prost",
  "prost-build",
  "prost-types",
- "tonic 0.9.2",
+ "tonic",
  "tonic-build",
  "workspace-hack",
 ]
@@ -2605,7 +2605,7 @@ dependencies = [
  "tokio",
  "tokio-stream",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "trace_exporters",
  "trogging",
  "uuid",
@@ -2634,7 +2634,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
 ]
 
 [[package]]
@@ -2646,7 +2646,7 @@ dependencies = [
  "generated_types",
  "observability_deps",
  "prost",
- "tonic 0.9.2",
+ "tonic",
  "workspace-hack",
 ]
 
@@ -2725,7 +2725,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "trace",
  "uuid",
  "wal",
@@ -2760,7 +2760,7 @@ dependencies = [
  "test_helpers",
  "tokio",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "wal",
  "workspace-hack",
 ]
@@ -3017,7 +3017,7 @@ dependencies = [
  "tokio",
  "tokio-stream",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "tonic-health",
  "tonic-reflection",
  "tower",
@@ -3115,7 +3115,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "trace",
  "workspace-hack",
 ]
@@ -4454,7 +4454,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "trace",
  "trace_exporters",
  "trace_http",
@@ -4757,7 +4757,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
  "trace",
  "workspace-hack",
 ]
@@ -5000,7 +5000,7 @@ dependencies = [
  "metric",
  "parking_lot 0.12.1",
  "predicate",
- "tonic 0.9.2",
+ "tonic",
  "trace",
  "tracker",
  "workspace-hack",
@@ -5016,7 +5016,7 @@ dependencies = [
  "metric",
  "observability_deps",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
  "uuid",
  "workspace-hack",
 ]
@@ -5046,7 +5046,7 @@ dependencies = [
  "service_common",
  "snafu",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
  "trace",
  "trace_http",
  "tracker",
@@ -5085,7 +5085,7 @@ dependencies = [
  "test_helpers",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
  "trace",
  "trace_http",
  "tracker",
@@ -5105,7 +5105,7 @@ dependencies = [
  "observability_deps",
  "paste",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
  "workspace-hack",
 ]
 
@@ -5123,7 +5123,7 @@ dependencies = [
  "observability_deps",
  "parquet_file",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
  "uuid",
  "workspace-hack",
 ]
@@ -5138,7 +5138,7 @@ dependencies = [
  "metric",
  "observability_deps",
  "tokio",
- "tonic 0.9.2",
+ "tonic",
  "workspace-hack",
 ]
 
@@ -5148,7 +5148,7 @@ version = "0.1.0"
 dependencies = [
  "generated_types",
  "observability_deps",
- "tonic 0.9.2",
+ "tonic",
  "workspace-hack",
 ]
 
@@ -5629,7 +5629,7 @@ dependencies = [
  "test_helpers",
  "tokio",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "workspace-hack",
 ]
 
@@ -5868,38 +5868,6 @@ dependencies = [
  "winnow",
 ]
 
-[[package]]
-name = "tonic"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb"
-dependencies = [
- "async-stream",
- "async-trait",
- "axum",
- "base64 0.13.1",
- "bytes",
- "futures-core",
- "futures-util",
- "h2",
- "http",
- "http-body",
- "hyper",
- "hyper-timeout",
- "percent-encoding",
- "pin-project",
- "prost",
- "prost-derive",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "tower",
- "tower-layer",
- "tower-service",
- "tracing",
- "tracing-futures",
-]
-
 [[package]]
 name = "tonic"
 version = "0.9.2"
@@ -5955,7 +5923,7 @@ dependencies = [
  "prost",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
 ]
 
 [[package]]
@@ -5968,7 +5936,7 @@ dependencies = [
  "prost-types",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
 ]
 
 [[package]]
@@ -6103,16 +6071,6 @@ dependencies = [
  "valuable",
 ]
 
-[[package]]
-name = "tracing-futures"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
-dependencies = [
- "pin-project",
- "tracing",
-]
-
 [[package]]
 name = "tracing-log"
 version = "0.1.3"
@@ -6802,7 +6760,7 @@ dependencies = [
  "tokio",
  "tokio-stream",
  "tokio-util",
- "tonic 0.9.2",
+ "tonic",
  "tower",
  "tracing",
  "tracing-core",
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 3b6bf304b5..f644c93803 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -48,7 +48,7 @@ backtrace = "0.3"
 bytes = "1.4"
 clap = { version = "4", features = ["derive", "env"] }
 comfy-table = { version = "6.1", default-features = false }
-console-subscriber = { version = "0.1.8", optional = true, features = ["parking_lot"] }
+console-subscriber = { version = "0.1.9", optional = true, features = ["parking_lot"] }
 dotenvy = "0.15.7"
 futures = "0.3"
 futures-util = { version = "0.3" }

From 6daf5660b4c5021ade307441a56af26ea837d6af Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Tue, 9 May 2023 21:51:48 -0400
Subject: [PATCH 081/119] fix: Rename querier2 to querier

Just some test helpers; there never was an `influxdb_iox run querier2`
command so no alias or test needed.
---
 influxdb_iox/tests/end_to_end_cases/compactor.rs     |  6 +++---
 influxdb_iox/tests/end_to_end_cases/namespace.rs     |  2 +-
 influxdb_iox/tests/end_to_end_cases/querier.rs       | 10 +++++-----
 .../tests/end_to_end_cases/querier/multi_ingester.rs |  4 ++--
 test_helpers_end_to_end/src/config.rs                | 12 ++++++------
 test_helpers_end_to_end/src/mini_cluster.rs          |  6 +++---
 test_helpers_end_to_end/src/server_fixture.rs        |  4 ++--
 test_helpers_end_to_end/src/server_type.rs           |  6 +++---
 8 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/influxdb_iox/tests/end_to_end_cases/compactor.rs b/influxdb_iox/tests/end_to_end_cases/compactor.rs
index 2de0871932..2c21ec560e 100644
--- a/influxdb_iox/tests/end_to_end_cases/compactor.rs
+++ b/influxdb_iox/tests/end_to_end_cases/compactor.rs
@@ -12,7 +12,7 @@ async fn shard_id_greater_than_num_shards_is_invalid() {
 
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
+    let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2,   // num shards 2
         100, // and shard id > num shards; not valid
@@ -97,7 +97,7 @@ async fn sharded_compactor_0_always_compacts_partition_1() {
     // config, so use a non-shared minicluster here.
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
+    let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2, // num shards 2
         0, // shard ID 0, which will always get partition ID 1
@@ -180,7 +180,7 @@ async fn sharded_compactor_1_never_compacts_partition_1() {
     // config, so use a non-shared minicluster here.
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
+    let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2, // num shards 2
         1, // shard ID 1, which will never get partition ID 1
diff --git a/influxdb_iox/tests/end_to_end_cases/namespace.rs b/influxdb_iox/tests/end_to_end_cases/namespace.rs
index 2dc641f0d7..7c9bad4ce0 100644
--- a/influxdb_iox/tests/end_to_end_cases/namespace.rs
+++ b/influxdb_iox/tests/end_to_end_cases/namespace.rs
@@ -14,7 +14,7 @@ async fn querier_namespace_client() {
 
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    let querier_config = TestConfig::new_querier2(&ingester_config);
+    let querier_config = TestConfig::new_querier(&ingester_config);
 
     // Set up the cluster  ====================================
     let cluster = MiniCluster::new()
diff --git a/influxdb_iox/tests/end_to_end_cases/querier.rs b/influxdb_iox/tests/end_to_end_cases/querier.rs
index cd71907039..3770908b19 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier.rs
@@ -127,8 +127,8 @@ async fn basic_empty() {
     // Set up the cluster  ====================================
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    // specially create a querier2 config that is NOT connected to the ingester
-    let querier_config = TestConfig::new_querier2_without_ingester(&ingester_config);
+    // specially create a querier config that is NOT connected to the ingester
+    let querier_config = TestConfig::new_querier_without_ingester(&ingester_config);
 
     let mut cluster = MiniCluster::new()
         .with_ingester(ingester_config)
@@ -198,8 +198,8 @@ async fn basic_no_ingester_connection() {
     // Set up the cluster  ====================================
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    // specially create a querier2 config that is NOT connected to the ingester
-    let querier_config = TestConfig::new_querier2_without_ingester(&ingester_config);
+    // specially create a querier config that is NOT connected to the ingester
+    let querier_config = TestConfig::new_querier_without_ingester(&ingester_config);
 
     let mut cluster = MiniCluster::new()
         .with_ingester(ingester_config)
@@ -638,7 +638,7 @@ async fn oom_protection() {
     // Set up the cluster  ====================================
     let ingester_config = TestConfig::new_ingester(&database_url);
     let router_config = TestConfig::new_router2(&ingester_config);
-    let querier_config = TestConfig::new_querier2(&ingester_config).with_querier_mem_pool_bytes(1);
+    let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let mut cluster = MiniCluster::new()
         .with_router(router_config)
         .await
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
index 2d23ad9bab..c0f04ddf5a 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
@@ -23,7 +23,7 @@ async fn basic_multi_ingesters() {
     let router_config =
         TestConfig::new_router2(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
     let querier_config =
-        TestConfig::new_querier2(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
+        TestConfig::new_querier(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
 
     let mut cluster = MiniCluster::new();
     for ingester_config in ingester_configs {
@@ -108,7 +108,7 @@ async fn write_replication() {
         // Require both ingesters to get this write to be counted as a full write
         .with_rpc_write_replicas(NonZeroUsize::new(2).unwrap());
     let querier_config =
-        TestConfig::new_querier2(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
+        TestConfig::new_querier(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
 
     let mut cluster = MiniCluster::new();
     for ingester_config in ingester_configs {
diff --git a/test_helpers_end_to_end/src/config.rs b/test_helpers_end_to_end/src/config.rs
index 535972eadb..3beea91f16 100644
--- a/test_helpers_end_to_end/src/config.rs
+++ b/test_helpers_end_to_end/src/config.rs
@@ -104,12 +104,12 @@ impl TestConfig {
         .with_new_wal()
     }
 
-    /// Create a minimal querier2 configuration from the specified ingester configuration, using
+    /// Create a minimal querier configuration from the specified ingester configuration, using
     /// the same dsn and object store, and pointing at the specified ingester.
-    pub fn new_querier2(ingester_config: &TestConfig) -> Self {
+    pub fn new_querier(ingester_config: &TestConfig) -> Self {
         assert_eq!(ingester_config.server_type(), ServerType::Ingester);
 
-        Self::new_querier2_without_ingester(ingester_config)
+        Self::new_querier_without_ingester(ingester_config)
             .with_ingester_addresses(&[ingester_config.ingester_base()])
     }
 
@@ -123,11 +123,11 @@ impl TestConfig {
         .with_existing_object_store(other)
     }
 
-    /// Create a minimal querier2 configuration from the specified ingester configuration, using
+    /// Create a minimal querier configuration from the specified ingester configuration, using
     /// the same dsn and object store, but without specifying the ingester addresses
-    pub fn new_querier2_without_ingester(ingester_config: &TestConfig) -> Self {
+    pub fn new_querier_without_ingester(ingester_config: &TestConfig) -> Self {
         Self::new(
-            ServerType::Querier2,
+            ServerType::Querier,
             ingester_config.dsn().to_owned(),
             ingester_config.catalog_schema_name(),
         )
diff --git a/test_helpers_end_to_end/src/mini_cluster.rs b/test_helpers_end_to_end/src/mini_cluster.rs
index d2cda24f26..7726f88b61 100644
--- a/test_helpers_end_to_end/src/mini_cluster.rs
+++ b/test_helpers_end_to_end/src/mini_cluster.rs
@@ -199,7 +199,7 @@ impl MiniCluster {
     pub async fn create_non_shared2(database_url: String) -> Self {
         let ingester_config = TestConfig::new_ingester(&database_url);
         let router_config = TestConfig::new_router2(&ingester_config);
-        let querier_config = TestConfig::new_querier2(&ingester_config);
+        let querier_config = TestConfig::new_querier(&ingester_config);
         let compactor_config = TestConfig::new_compactor(&ingester_config);
 
         // Set up the cluster  ====================================
@@ -220,7 +220,7 @@ impl MiniCluster {
     pub async fn create_non_shared2_never_persist(database_url: String) -> Self {
         let ingester_config = TestConfig::new_ingester_never_persist(&database_url);
         let router_config = TestConfig::new_router2(&ingester_config);
-        let querier_config = TestConfig::new_querier2(&ingester_config);
+        let querier_config = TestConfig::new_querier(&ingester_config);
         let compactor_config = TestConfig::new_compactor(&ingester_config);
 
         // Set up the cluster  ====================================
@@ -247,7 +247,7 @@ impl MiniCluster {
         let router_config =
             TestConfig::new_router2(&ingester_config).with_single_tenancy(authz_addr.clone());
         let querier_config =
-            TestConfig::new_querier2(&ingester_config).with_single_tenancy(authz_addr);
+            TestConfig::new_querier(&ingester_config).with_single_tenancy(authz_addr);
         let compactor_config = TestConfig::new_compactor(&ingester_config);
 
         // Set up the cluster  ====================================
diff --git a/test_helpers_end_to_end/src/server_fixture.rs b/test_helpers_end_to_end/src/server_fixture.rs
index 2c7a0372a3..3ff9249517 100644
--- a/test_helpers_end_to_end/src/server_fixture.rs
+++ b/test_helpers_end_to_end/src/server_fixture.rs
@@ -208,7 +208,7 @@ impl Connections {
         };
 
         self.querier_grpc_connection = match server_type {
-            ServerType::AllInOne | ServerType::Querier2 => {
+            ServerType::AllInOne | ServerType::Querier => {
                 let client_base = test_config.addrs().querier_grpc_api().client_base();
                 Some(
                     grpc_channel(test_config, client_base.as_ref())
@@ -502,7 +502,7 @@ impl TestServer {
                         return;
                     }
                 }
-                ServerType::Querier2 => {
+                ServerType::Querier => {
                     if check_arrow_service_health(
                         server_type,
                         connections.querier_grpc_connection(),
diff --git a/test_helpers_end_to_end/src/server_type.rs b/test_helpers_end_to_end/src/server_type.rs
index 8706b1eee1..16619c4bce 100644
--- a/test_helpers_end_to_end/src/server_type.rs
+++ b/test_helpers_end_to_end/src/server_type.rs
@@ -5,7 +5,7 @@ pub enum ServerType {
     AllInOne,
     Ingester,
     Router2,
-    Querier2,
+    Querier,
     Compactor,
 }
 
@@ -16,7 +16,7 @@ impl ServerType {
             Self::AllInOne => "all-in-one",
             Self::Ingester => "ingester",
             Self::Router2 => "router2",
-            Self::Querier2 => "querier",
+            Self::Querier => "querier",
             Self::Compactor => "compactor",
         }
     }
@@ -87,7 +87,7 @@ fn addr_envs(server_type: ServerType, addrs: &BindAddresses) -> Vec<(&'static st
                 addrs.ingester_grpc_api().bind_addr().to_string(),
             ),
         ],
-        ServerType::Querier2 => vec![
+        ServerType::Querier => vec![
             (
                 "INFLUXDB_IOX_BIND_ADDR",
                 addrs.router_http_api().bind_addr().to_string(),

From e60f703e9592a1823eecf112b1014c5313659cd1 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Tue, 9 May 2023 22:00:11 -0400
Subject: [PATCH 082/119] fix: Rename router2 to router

Including an alias and a test for continuing to support `influxdb_iox
run router2`.
---
 clap_blocks/src/lib.rs                        |  2 +-
 clap_blocks/src/{router2.rs => router.rs}     |  2 +-
 docs/underground_guide.md                     |  2 +-
 influxdb_iox/src/commands/run/all_in_one.rs   | 10 ++++-----
 influxdb_iox/src/commands/run/mod.rs          | 15 +++++++------
 .../commands/run/{router2.rs => router.rs}    | 14 ++++++------
 .../command_rename_support.rs                 | 22 ++++++++++++++++++-
 .../tests/end_to_end_cases/compactor.rs       |  6 ++---
 .../tests/end_to_end_cases/namespace.rs       |  2 +-
 .../tests/end_to_end_cases/querier.rs         |  6 ++---
 .../querier/multi_ingester.rs                 |  4 ++--
 ingester/README.md                            |  4 ++--
 ioxd_router/src/lib.rs                        |  8 +++----
 test_helpers_end_to_end/src/config.rs         |  8 +++----
 test_helpers_end_to_end/src/mini_cluster.rs   |  8 +++----
 test_helpers_end_to_end/src/server_fixture.rs |  4 ++--
 test_helpers_end_to_end/src/server_type.rs    |  6 ++---
 17 files changed, 72 insertions(+), 51 deletions(-)
 rename clap_blocks/src/{router2.rs => router.rs} (99%)
 rename influxdb_iox/src/commands/run/{router2.rs => router.rs} (90%)

diff --git a/clap_blocks/src/lib.rs b/clap_blocks/src/lib.rs
index 9f75265810..ca0de64cb6 100644
--- a/clap_blocks/src/lib.rs
+++ b/clap_blocks/src/lib.rs
@@ -19,7 +19,7 @@ pub mod ingester;
 pub mod ingester_address;
 pub mod object_store;
 pub mod querier;
-pub mod router2;
+pub mod router;
 pub mod run_config;
 pub mod single_tenant;
 pub mod socket_addr;
diff --git a/clap_blocks/src/router2.rs b/clap_blocks/src/router.rs
similarity index 99%
rename from clap_blocks/src/router2.rs
rename to clap_blocks/src/router.rs
index dbad16fc19..f78a75aa78 100644
--- a/clap_blocks/src/router2.rs
+++ b/clap_blocks/src/router.rs
@@ -14,7 +14,7 @@ use std::{
 /// CLI config for the router using the RPC write path
 #[derive(Debug, Clone, clap::Parser)]
 #[allow(missing_copy_implementations)]
-pub struct Router2Config {
+pub struct RouterConfig {
     /// Addr for connection to authz
     #[clap(
         long = CONFIG_AUTHZ_FLAG,
diff --git a/docs/underground_guide.md b/docs/underground_guide.md
index 16979e20cf..ba73562a72 100644
--- a/docs/underground_guide.md
+++ b/docs/underground_guide.md
@@ -120,7 +120,7 @@ INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \
 OBJECT_STORE=file \
 DATABASE_DIRECTORY=~/data_dir \
 LOG_FILTER=info \
-./target/release/influxdb_iox run router2
+./target/release/influxdb_iox run router
 ```
 
 # Step 5: Ingest data
diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs
index e4d707e18d..ce11a53705 100644
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@@ -10,7 +10,7 @@ use clap_blocks::{
     ingester_address::IngesterAddress,
     object_store::{make_object_store, ObjectStoreConfig},
     querier::QuerierConfig,
-    router2::Router2Config,
+    router::RouterConfig,
     run_config::RunConfig,
     single_tenant::{
         CONFIG_AUTHZ_ENV_NAME, CONFIG_AUTHZ_FLAG, CONFIG_CST_ENV_NAME, CONFIG_CST_FLAG,
@@ -27,7 +27,7 @@ use ioxd_common::{
 use ioxd_compactor::create_compactor_server_type;
 use ioxd_ingester::create_ingester_server_type;
 use ioxd_querier::{create_querier_server_type, QuerierServerTypeArgs};
-use ioxd_router::create_router2_server_type;
+use ioxd_router::create_router_server_type;
 use object_store::DynObjectStore;
 use observability_deps::tracing::*;
 use parquet_file::storage::{ParquetStorage, StorageId};
@@ -466,7 +466,7 @@ impl Config {
             rpc_write_max_incoming_bytes: 1024 * 1024 * 1024, // 1GiB
         };
 
-        let router_config = Router2Config {
+        let router_config = RouterConfig {
             authz_address: authz_address.clone(),
             single_tenant_deployment,
             http_request_limit: 1_000,
@@ -555,7 +555,7 @@ struct SpecializedConfig {
 
     catalog_dsn: CatalogDsnConfig,
     ingester_config: IngesterConfig,
-    router_config: Router2Config,
+    router_config: RouterConfig,
     compactor_config: CompactorConfig,
     querier_config: QuerierConfig,
 }
@@ -611,7 +611,7 @@ pub async fn command(config: Config) -> Result<()> {
     }));
 
     info!("starting router");
-    let router = create_router2_server_type(
+    let router = create_router_server_type(
         &common_state,
         Arc::clone(&metrics),
         Arc::clone(&catalog),
diff --git a/influxdb_iox/src/commands/run/mod.rs b/influxdb_iox/src/commands/run/mod.rs
index 62fe851951..e0d5b6f0ac 100644
--- a/influxdb_iox/src/commands/run/mod.rs
+++ b/influxdb_iox/src/commands/run/mod.rs
@@ -7,7 +7,7 @@ mod garbage_collector;
 mod ingester;
 mod main;
 mod querier;
-mod router2;
+mod router;
 mod test;
 
 #[derive(Debug, Snafu)]
@@ -22,8 +22,8 @@ pub enum Error {
     #[snafu(display("Error in querier subcommand: {}", source))]
     QuerierError { source: querier::Error },
 
-    #[snafu(display("Error in router2 subcommand: {}", source))]
-    Router2Error { source: router2::Error },
+    #[snafu(display("Error in router subcommand: {}", source))]
+    RouterError { source: router::Error },
 
     #[snafu(display("Error in ingester subcommand: {}", source))]
     IngesterError { source: ingester::Error },
@@ -54,7 +54,7 @@ impl Config {
             Some(Command::Compactor(config)) => config.run_config.logging_config(),
             Some(Command::GarbageCollector(config)) => config.run_config.logging_config(),
             Some(Command::Querier(config)) => config.run_config.logging_config(),
-            Some(Command::Router2(config)) => config.run_config.logging_config(),
+            Some(Command::Router(config)) => config.run_config.logging_config(),
             Some(Command::Ingester(config)) => config.run_config.logging_config(),
             Some(Command::AllInOne(config)) => &config.logging_config,
             Some(Command::Test(config)) => config.run_config.logging_config(),
@@ -71,8 +71,9 @@ enum Command {
     /// Run the server in querier mode
     Querier(querier::Config),
 
-    /// Run the server in router2 mode
-    Router2(router2::Config),
+    /// Run the server in router mode
+    #[clap(alias = "router2")]
+    Router(router::Config),
 
     /// Run the server in ingester mode
     #[clap(alias = "ingester2")]
@@ -100,7 +101,7 @@ pub async fn command(config: Config) -> Result<()> {
             .await
             .context(GarbageCollectorSnafu),
         Some(Command::Querier(config)) => querier::command(config).await.context(QuerierSnafu),
-        Some(Command::Router2(config)) => router2::command(config).await.context(Router2Snafu),
+        Some(Command::Router(config)) => router::command(config).await.context(RouterSnafu),
         Some(Command::Ingester(config)) => ingester::command(config).await.context(IngesterSnafu),
         Some(Command::AllInOne(config)) => all_in_one::command(config).await.context(AllInOneSnafu),
         Some(Command::Test(config)) => test::command(config).await.context(TestSnafu),
diff --git a/influxdb_iox/src/commands/run/router2.rs b/influxdb_iox/src/commands/run/router.rs
similarity index 90%
rename from influxdb_iox/src/commands/run/router2.rs
rename to influxdb_iox/src/commands/run/router.rs
index 2c473eb1a3..f028ae273b 100644
--- a/influxdb_iox/src/commands/run/router2.rs
+++ b/influxdb_iox/src/commands/run/router.rs
@@ -1,8 +1,8 @@
-//! Command line options for running a router2 that uses the RPC write path.
+//! Command line options for running a router that uses the RPC write path.
 use super::main;
 use crate::process_info::setup_metric_registry;
 use clap_blocks::{
-    catalog_dsn::CatalogDsnConfig, object_store::make_object_store, router2::Router2Config,
+    catalog_dsn::CatalogDsnConfig, object_store::make_object_store, router::RouterConfig,
     run_config::RunConfig,
 };
 use iox_time::{SystemProvider, TimeProvider};
@@ -10,7 +10,7 @@ use ioxd_common::{
     server_type::{CommonServerState, CommonServerStateError},
     Service,
 };
-use ioxd_router::create_router2_server_type;
+use ioxd_router::create_router_server_type;
 use object_store::DynObjectStore;
 use object_store_metrics::ObjectStoreMetrics;
 use observability_deps::tracing::*;
@@ -64,7 +64,7 @@ pub struct Config {
     pub(crate) catalog_dsn: CatalogDsnConfig,
 
     #[clap(flatten)]
-    pub(crate) router_config: Router2Config,
+    pub(crate) router_config: RouterConfig,
 }
 
 pub async fn command(config: Config) -> Result<()> {
@@ -80,7 +80,7 @@ pub async fn command(config: Config) -> Result<()> {
 
     let catalog = config
         .catalog_dsn
-        .get_catalog("router2", Arc::clone(&metrics))
+        .get_catalog("router", Arc::clone(&metrics))
         .await?;
 
     let object_store = make_object_store(config.run_config.object_store_config())
@@ -92,7 +92,7 @@ pub async fn command(config: Config) -> Result<()> {
         &metrics,
     ));
 
-    let server_type = create_router2_server_type(
+    let server_type = create_router_server_type(
         &common_state,
         Arc::clone(&metrics),
         catalog,
@@ -101,7 +101,7 @@ pub async fn command(config: Config) -> Result<()> {
     )
     .await?;
 
-    info!("starting router2");
+    info!("starting router");
     let services = vec![Service::create(server_type, common_state.run_config())];
     Ok(main::main(common_state, services, metrics).await?)
 }
diff --git a/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs b/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
index fa67206a2a..e55ba03abf 100644
--- a/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
+++ b/influxdb_iox/tests/end_to_end_cases/command_rename_support.rs
@@ -1,5 +1,5 @@
 //! Tests that we still support running using deprecated names so that deployments continue to work
-//! while transitioning.
+//! while transitioning. There was never a `querier2` command, so there isn't a test for it here.
 
 use assert_cmd::Command;
 use predicates::prelude::*;
@@ -29,6 +29,26 @@ async fn ingester2_runs_ingester() {
         ));
 }
 
+#[tokio::test]
+async fn router2_runs_router() {
+    let tmpdir = tempdir().unwrap();
+    let addrs = BindAddresses::default();
+
+    Command::cargo_bin("influxdb_iox")
+        .unwrap()
+        .args(["run", "router2", "-v"])
+        .env_clear()
+        .env("HOME", tmpdir.path())
+        .env("INFLUXDB_IOX_WAL_DIRECTORY", tmpdir.path())
+        .env("INFLUXDB_IOX_CATALOG_TYPE", "memory")
+        .add_addr_env(ServerType::Router, &addrs)
+        .timeout(Duration::from_secs(5))
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("error: unrecognized subcommand 'router2'").not())
+        .stdout(predicate::str::contains("InfluxDB IOx Router server ready"));
+}
+
 #[tokio::test]
 async fn compactor2_runs_compactor() {
     let tmpdir = tempdir().unwrap();
diff --git a/influxdb_iox/tests/end_to_end_cases/compactor.rs b/influxdb_iox/tests/end_to_end_cases/compactor.rs
index 2c21ec560e..9763cfc4b7 100644
--- a/influxdb_iox/tests/end_to_end_cases/compactor.rs
+++ b/influxdb_iox/tests/end_to_end_cases/compactor.rs
@@ -11,7 +11,7 @@ async fn shard_id_greater_than_num_shards_is_invalid() {
     let database_url = maybe_skip_integration!();
 
     let ingester_config = TestConfig::new_ingester(&database_url);
-    let router_config = TestConfig::new_router2(&ingester_config);
+    let router_config = TestConfig::new_router(&ingester_config);
     let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2,   // num shards 2
@@ -96,7 +96,7 @@ async fn sharded_compactor_0_always_compacts_partition_1() {
     // The test below assumes a specific partition id, and it needs to customize the compactor
     // config, so use a non-shared minicluster here.
     let ingester_config = TestConfig::new_ingester(&database_url);
-    let router_config = TestConfig::new_router2(&ingester_config);
+    let router_config = TestConfig::new_router(&ingester_config);
     let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2, // num shards 2
@@ -179,7 +179,7 @@ async fn sharded_compactor_1_never_compacts_partition_1() {
     // The test below assumes a specific partition id, and it needs to customize the compactor
     // config, so use a non-shared minicluster here.
     let ingester_config = TestConfig::new_ingester(&database_url);
-    let router_config = TestConfig::new_router2(&ingester_config);
+    let router_config = TestConfig::new_router(&ingester_config);
     let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let compactor_config = TestConfig::new_compactor(&ingester_config).with_compactor_shards(
         2, // num shards 2
diff --git a/influxdb_iox/tests/end_to_end_cases/namespace.rs b/influxdb_iox/tests/end_to_end_cases/namespace.rs
index 7c9bad4ce0..e51a943f6d 100644
--- a/influxdb_iox/tests/end_to_end_cases/namespace.rs
+++ b/influxdb_iox/tests/end_to_end_cases/namespace.rs
@@ -13,7 +13,7 @@ async fn querier_namespace_client() {
     let table_name = "the_table";
 
     let ingester_config = TestConfig::new_ingester(&database_url);
-    let router_config = TestConfig::new_router2(&ingester_config);
+    let router_config = TestConfig::new_router(&ingester_config);
     let querier_config = TestConfig::new_querier(&ingester_config);
 
     // Set up the cluster  ====================================
diff --git a/influxdb_iox/tests/end_to_end_cases/querier.rs b/influxdb_iox/tests/end_to_end_cases/querier.rs
index 3770908b19..c33076740e 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier.rs
@@ -126,7 +126,7 @@ async fn basic_empty() {
 
     // Set up the cluster  ====================================
     let ingester_config = TestConfig::new_ingester(&database_url);
-    let router_config = TestConfig::new_router2(&ingester_config);
+    let router_config = TestConfig::new_router(&ingester_config);
     // specially create a querier config that is NOT connected to the ingester
     let querier_config = TestConfig::new_querier_without_ingester(&ingester_config);
 
@@ -197,7 +197,7 @@ async fn basic_no_ingester_connection() {
 
     // Set up the cluster  ====================================
     let ingester_config = TestConfig::new_ingester(&database_url);
-    let router_config = TestConfig::new_router2(&ingester_config);
+    let router_config = TestConfig::new_router(&ingester_config);
     // specially create a querier config that is NOT connected to the ingester
     let querier_config = TestConfig::new_querier_without_ingester(&ingester_config);
 
@@ -637,7 +637,7 @@ async fn oom_protection() {
 
     // Set up the cluster  ====================================
     let ingester_config = TestConfig::new_ingester(&database_url);
-    let router_config = TestConfig::new_router2(&ingester_config);
+    let router_config = TestConfig::new_router(&ingester_config);
     let querier_config = TestConfig::new_querier(&ingester_config).with_querier_mem_pool_bytes(1);
     let mut cluster = MiniCluster::new()
         .with_router(router_config)
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
index c0f04ddf5a..1c6ccd7dbd 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
@@ -21,7 +21,7 @@ async fn basic_multi_ingesters() {
         .map(|ingester_config| ingester_config.ingester_base())
         .collect();
     let router_config =
-        TestConfig::new_router2(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
+        TestConfig::new_router(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
     let querier_config =
         TestConfig::new_querier(&ingester_configs[0]).with_ingester_addresses(&ingester_addresses);
 
@@ -103,7 +103,7 @@ async fn write_replication() {
         .iter()
         .map(|ingester_config| ingester_config.ingester_base())
         .collect();
-    let router_config = TestConfig::new_router2(&ingester_configs[0])
+    let router_config = TestConfig::new_router(&ingester_configs[0])
         .with_ingester_addresses(&ingester_addresses)
         // Require both ingesters to get this write to be counted as a full write
         .with_rpc_write_replicas(NonZeroUsize::new(2).unwrap());
diff --git a/ingester/README.md b/ingester/README.md
index 7ad6da613d..e4633e957e 100644
--- a/ingester/README.md
+++ b/ingester/README.md
@@ -17,10 +17,10 @@ Run ingester:
 ./target/debug/influxdb_iox run ingester --api-bind=127.0.0.1:8081 --grpc-bind=127.0.0.1:8042 --wal-directory /tmp/iox/wal  --catalog-dsn postgres:///iox_shared --object-store=file --data-dir=/tmp/iox/obj -v
 ```
 
-Run router2:
+Run router:
 
 ```bash
-./target/debug/influxdb_iox run router2 --api-bind=127.0.0.1:8080 --grpc-bind=127.0.0.1:8085 --ingester-addresses=127.0.0.1:8042 --catalog-dsn postgres:///iox_shared -v
+./target/debug/influxdb_iox run router --api-bind=127.0.0.1:8080 --grpc-bind=127.0.0.1:8085 --ingester-addresses=127.0.0.1:8042 --catalog-dsn postgres:///iox_shared -v
 ```
 
 Run querier:
diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index 935ee630a0..9a2e98d14b 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -5,7 +5,7 @@ use std::{
 
 use async_trait::async_trait;
 use authz::{Authorizer, IoxAuthorizer};
-use clap_blocks::router2::Router2Config;
+use clap_blocks::router::RouterConfig;
 use data_types::{DefaultPartitionTemplate, NamespaceName};
 use hashbrown::HashMap;
 use hyper::{Body, Request, Response};
@@ -91,7 +91,7 @@ impl<D, N> RpcWriteRouterServerType<D, N> {
 
 impl<D, N> std::fmt::Debug for RpcWriteRouterServerType<D, N> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "RpcWriteRouter")
+        write!(f, "Router")
     }
 }
 
@@ -191,12 +191,12 @@ impl HttpApiErrorSource for IoxHttpErrorAdaptor {
 }
 
 /// Instantiate a router server that uses the RPC write path
-pub async fn create_router2_server_type(
+pub async fn create_router_server_type(
     common_state: &CommonServerState,
     metrics: Arc<metric::Registry>,
     catalog: Arc<dyn Catalog>,
     object_store: Arc<DynObjectStore>,
-    router_config: &Router2Config,
+    router_config: &RouterConfig,
 ) -> Result<Arc<dyn ServerType>> {
     let ingester_connections = router_config.ingester_addresses.iter().map(|addr| {
         let addr = addr.to_string();
diff --git a/test_helpers_end_to_end/src/config.rs b/test_helpers_end_to_end/src/config.rs
index 3beea91f16..e557b117ba 100644
--- a/test_helpers_end_to_end/src/config.rs
+++ b/test_helpers_end_to_end/src/config.rs
@@ -34,7 +34,7 @@ pub struct TestConfig {
 
 impl TestConfig {
     /// Create a new TestConfig. Tests should use one of the specific
-    /// configuration setup below, such as [new_router2](Self::new_router2).
+    /// configuration setup below, such as [new_router](Self::new_router).
     fn new(
         server_type: ServerType,
         dsn: Option<String>,
@@ -52,12 +52,12 @@ impl TestConfig {
         }
     }
 
-    /// Create a minimal router2 configuration sharing configuration with the ingester config
-    pub fn new_router2(ingester_config: &TestConfig) -> Self {
+    /// Create a minimal router configuration sharing configuration with the ingester config
+    pub fn new_router(ingester_config: &TestConfig) -> Self {
         assert_eq!(ingester_config.server_type(), ServerType::Ingester);
 
         Self::new(
-            ServerType::Router2,
+            ServerType::Router,
             ingester_config.dsn().to_owned(),
             ingester_config.catalog_schema_name(),
         )
diff --git a/test_helpers_end_to_end/src/mini_cluster.rs b/test_helpers_end_to_end/src/mini_cluster.rs
index 7726f88b61..f01cb03118 100644
--- a/test_helpers_end_to_end/src/mini_cluster.rs
+++ b/test_helpers_end_to_end/src/mini_cluster.rs
@@ -198,7 +198,7 @@ impl MiniCluster {
     /// tests using `compactor run-once` rather than using `run compactor`.
     pub async fn create_non_shared2(database_url: String) -> Self {
         let ingester_config = TestConfig::new_ingester(&database_url);
-        let router_config = TestConfig::new_router2(&ingester_config);
+        let router_config = TestConfig::new_router(&ingester_config);
         let querier_config = TestConfig::new_querier(&ingester_config);
         let compactor_config = TestConfig::new_compactor(&ingester_config);
 
@@ -219,7 +219,7 @@ impl MiniCluster {
     /// using `run compactor`.
     pub async fn create_non_shared2_never_persist(database_url: String) -> Self {
         let ingester_config = TestConfig::new_ingester_never_persist(&database_url);
-        let router_config = TestConfig::new_router2(&ingester_config);
+        let router_config = TestConfig::new_router(&ingester_config);
         let querier_config = TestConfig::new_querier(&ingester_config);
         let compactor_config = TestConfig::new_compactor(&ingester_config);
 
@@ -245,7 +245,7 @@ impl MiniCluster {
     ) -> Self {
         let ingester_config = TestConfig::new_ingester(&database_url);
         let router_config =
-            TestConfig::new_router2(&ingester_config).with_single_tenancy(authz_addr.clone());
+            TestConfig::new_router(&ingester_config).with_single_tenancy(authz_addr.clone());
         let querier_config =
             TestConfig::new_querier(&ingester_config).with_single_tenancy(authz_addr);
         let compactor_config = TestConfig::new_compactor(&ingester_config);
@@ -680,7 +680,7 @@ fn server_from_weak(server: Option<&Weak<TestServer>>) -> Option<Option<Arc<Test
 }
 
 // For the new server versions. `GLOBAL_SHARED_SERVERS` can be removed and this can be renamed
-// when the migration to router2/etc is complete.
+// when the migration to router/etc is complete.
 static GLOBAL_SHARED_SERVERS2: Lazy<Mutex<Option<SharedServers>>> = Lazy::new(|| Mutex::new(None));
 static GLOBAL_SHARED_SERVERS2_NEVER_PERSIST: Lazy<Mutex<Option<SharedServers>>> =
     Lazy::new(|| Mutex::new(None));
diff --git a/test_helpers_end_to_end/src/server_fixture.rs b/test_helpers_end_to_end/src/server_fixture.rs
index 3ff9249517..2f6337d1a4 100644
--- a/test_helpers_end_to_end/src/server_fixture.rs
+++ b/test_helpers_end_to_end/src/server_fixture.rs
@@ -184,7 +184,7 @@ impl Connections {
         let server_type = test_config.server_type();
 
         self.router_grpc_connection = match server_type {
-            ServerType::AllInOne | ServerType::Router2 => {
+            ServerType::AllInOne | ServerType::Router => {
                 let client_base = test_config.addrs().router_grpc_api().client_base();
                 Some(
                     grpc_channel(test_config, client_base.as_ref())
@@ -482,7 +482,7 @@ impl TestServer {
                         `influxdb_iox compactor run-once` instead"
                     );
                 }
-                ServerType::Router2 => {
+                ServerType::Router => {
                     if check_catalog_service_health(
                         server_type,
                         connections.router_grpc_connection(),
diff --git a/test_helpers_end_to_end/src/server_type.rs b/test_helpers_end_to_end/src/server_type.rs
index 16619c4bce..3cd4a34603 100644
--- a/test_helpers_end_to_end/src/server_type.rs
+++ b/test_helpers_end_to_end/src/server_type.rs
@@ -4,7 +4,7 @@ use super::addrs::BindAddresses;
 pub enum ServerType {
     AllInOne,
     Ingester,
-    Router2,
+    Router,
     Querier,
     Compactor,
 }
@@ -15,7 +15,7 @@ impl ServerType {
         match self {
             Self::AllInOne => "all-in-one",
             Self::Ingester => "ingester",
-            Self::Router2 => "router2",
+            Self::Router => "router",
             Self::Querier => "querier",
             Self::Compactor => "compactor",
         }
@@ -73,7 +73,7 @@ fn addr_envs(server_type: ServerType, addrs: &BindAddresses) -> Vec<(&'static st
                 addrs.ingester_grpc_api().bind_addr().to_string(),
             ),
         ],
-        ServerType::Router2 => vec![
+        ServerType::Router => vec![
             (
                 "INFLUXDB_IOX_BIND_ADDR",
                 addrs.router_http_api().bind_addr().to_string(),

From 35e8c527024830172e1945f91687f5f432daea2f Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Tue, 9 May 2023 22:07:46 -0400
Subject: [PATCH 083/119] fix: Rename remaining test helper methods with a 2
 suffix to not

---
 .../tests/end_to_end_cases/catalog.rs         |  2 +-
 influxdb_iox/tests/end_to_end_cases/cli.rs    | 20 +++----
 influxdb_iox/tests/end_to_end_cases/error.rs  |  2 +-
 .../tests/end_to_end_cases/flightsql.rs       | 52 +++++++++----------
 .../tests/end_to_end_cases/influxql.rs        |  6 +--
 .../tests/end_to_end_cases/ingester.rs        |  8 +--
 .../tests/end_to_end_cases/namespace.rs       |  2 +-
 .../tests/end_to_end_cases/querier.rs         | 20 +++----
 .../end_to_end_cases/querier/influxrpc.rs     |  6 +--
 .../querier/influxrpc/read_filter.rs          |  2 +-
 .../querier/influxrpc/read_group.rs           |  4 +-
 .../influxrpc/read_window_aggregate.rs        |  2 +-
 influxdb_iox/tests/end_to_end_cases/remote.rs |  4 +-
 influxdb_iox/tests/end_to_end_cases/router.rs |  2 +-
 influxdb_iox/tests/end_to_end_cases/schema.rs |  2 +-
 influxdb_iox/tests/query_tests2/framework.rs  |  4 +-
 influxdb_iox/tests/query_tests2/sql_errors.rs |  2 +-
 test_helpers_end_to_end/src/mini_cluster.rs   | 51 +++++++++---------
 18 files changed, 95 insertions(+), 96 deletions(-)

diff --git a/influxdb_iox/tests/end_to_end_cases/catalog.rs b/influxdb_iox/tests/end_to_end_cases/catalog.rs
index b05417e56d..771cd57dc2 100644
--- a/influxdb_iox/tests/end_to_end_cases/catalog.rs
+++ b/influxdb_iox/tests/end_to_end_cases/catalog.rs
@@ -25,7 +25,7 @@ async fn dsn_file() {
     println!("databse_url is {database_url}");
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_non_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_non_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
diff --git a/influxdb_iox/tests/end_to_end_cases/cli.rs b/influxdb_iox/tests/end_to_end_cases/cli.rs
index 3d8c135f87..5f794a2f94 100644
--- a/influxdb_iox/tests/end_to_end_cases/cli.rs
+++ b/influxdb_iox/tests/end_to_end_cases/cli.rs
@@ -60,7 +60,7 @@ async fn parquet_to_lp() {
     // The test below assumes a specific partition id, so use a
     // non-shared one here so concurrent tests don't interfere with
     // each other
-    let mut cluster = MiniCluster::create_non_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_non_shared(database_url).await;
 
     let line_protocol = "my_awesome_table,tag1=A,tag2=B val=42i 123456";
 
@@ -184,7 +184,7 @@ async fn schema_cli() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
 
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -245,7 +245,7 @@ async fn write_and_query() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
 
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -320,7 +320,7 @@ async fn query_error_handling() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
 
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -360,7 +360,7 @@ async fn influxql_error_handling() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
 
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -480,7 +480,7 @@ async fn namespaces_cli() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
 
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -516,7 +516,7 @@ async fn namespaces_cli() {
 async fn namespace_retention() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -696,7 +696,7 @@ async fn namespace_retention() {
 async fn namespace_deletion() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -784,7 +784,7 @@ async fn query_ingester() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
 
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -905,7 +905,7 @@ async fn query_ingester() {
 async fn namespace_update_service_limit() {
     test_helpers::maybe_start_logging();
     let database_url = maybe_skip_integration!();
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
diff --git a/influxdb_iox/tests/end_to_end_cases/error.rs b/influxdb_iox/tests/end_to_end_cases/error.rs
index bfdabb1047..095e085ebd 100644
--- a/influxdb_iox/tests/end_to_end_cases/error.rs
+++ b/influxdb_iox/tests/end_to_end_cases/error.rs
@@ -8,7 +8,7 @@ use test_helpers_end_to_end::{maybe_skip_integration, MiniCluster, Step, StepTes
 #[tokio::test]
 pub async fn test_panic() {
     let database_url = maybe_skip_integration!();
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
diff --git a/influxdb_iox/tests/end_to_end_cases/flightsql.rs b/influxdb_iox/tests/end_to_end_cases/flightsql.rs
index 1cecbe32f4..143e7492aa 100644
--- a/influxdb_iox/tests/end_to_end_cases/flightsql.rs
+++ b/influxdb_iox/tests/end_to_end_cases/flightsql.rs
@@ -36,7 +36,7 @@ async fn flightsql_adhoc_query() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -79,7 +79,7 @@ async fn flightsql_adhoc_query_error() {
     let database_url = maybe_skip_integration!();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -119,7 +119,7 @@ async fn flightsql_prepared_query() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -166,7 +166,7 @@ async fn flightsql_get_sql_infos() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -242,7 +242,7 @@ async fn flightsql_get_catalogs() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -286,7 +286,7 @@ async fn flightsql_get_catalogs_matches_information_schema() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -346,7 +346,7 @@ async fn flightsql_get_cross_reference() {
     let foreign_table_name = "foreign_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -403,7 +403,7 @@ async fn flightsql_get_tables() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -573,7 +573,7 @@ async fn flightsql_get_tables_decoded_table_schema() {
     let database_url = maybe_skip_integration!();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -646,7 +646,7 @@ async fn flightsql_get_tables_matches_information_schema() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -716,7 +716,7 @@ async fn flightsql_get_table_types() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -761,7 +761,7 @@ async fn flightsql_get_table_types_matches_information_schema() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -822,7 +822,7 @@ async fn flightsql_get_db_schemas() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -932,7 +932,7 @@ async fn flightsql_get_db_schema_matches_information_schema() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -999,7 +999,7 @@ async fn flightsql_get_exported_keys() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1045,7 +1045,7 @@ async fn flightsql_get_imported_keys() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1091,7 +1091,7 @@ async fn flightsql_get_primary_keys() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1137,7 +1137,7 @@ async fn flightsql_get_xdbc_type_info() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1212,7 +1212,7 @@ async fn flightsql_jdbc() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1269,7 +1269,7 @@ async fn flightsql_jdbc_authz_token() {
     let mut authz = Authorizer::create().await;
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_non_shared2_with_authz(database_url, authz.addr()).await;
+    let mut cluster = MiniCluster::create_non_shared_with_authz(database_url, authz.addr()).await;
 
     let write_token = authz.create_token_for(cluster.namespace(), &["ACTION_WRITE"]);
     let read_token =
@@ -1338,7 +1338,7 @@ async fn flightsql_jdbc_authz_handshake() {
     let mut authz = Authorizer::create().await;
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_non_shared2_with_authz(database_url, authz.addr()).await;
+    let mut cluster = MiniCluster::create_non_shared_with_authz(database_url, authz.addr()).await;
 
     let write_token = authz.create_token_for(cluster.namespace(), &["ACTION_WRITE"]);
     let read_token =
@@ -1509,7 +1509,7 @@ async fn flightsql_schema_matches() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1635,7 +1635,7 @@ async fn authz() {
     let mut authz = Authorizer::create().await;
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_non_shared2_with_authz(database_url, authz.addr()).await;
+    let mut cluster = MiniCluster::create_non_shared_with_authz(database_url, authz.addr()).await;
 
     let write_token = authz.create_token_for(cluster.namespace(), &["ACTION_WRITE"]);
     let read_token = authz.create_token_for(cluster.namespace(), &["ACTION_READ_SCHEMA"]);
@@ -1731,7 +1731,7 @@ async fn flightsql_client_header_same_database() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1782,7 +1782,7 @@ async fn flightsql_client_header_different_database() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -1822,7 +1822,7 @@ async fn flightsql_client_header_no_database() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
diff --git a/influxdb_iox/tests/end_to_end_cases/influxql.rs b/influxdb_iox/tests/end_to_end_cases/influxql.rs
index 38e0cd7889..4bd91d6fda 100644
--- a/influxdb_iox/tests/end_to_end_cases/influxql.rs
+++ b/influxdb_iox/tests/end_to_end_cases/influxql.rs
@@ -12,7 +12,7 @@ async fn influxql_returns_error() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -49,7 +49,7 @@ async fn influxql_select_returns_results() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -86,7 +86,7 @@ async fn authz() {
     let mut authz = Authorizer::create().await;
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_non_shared2_with_authz(database_url, authz.addr()).await;
+    let mut cluster = MiniCluster::create_non_shared_with_authz(database_url, authz.addr()).await;
 
     let write_token = authz.create_token_for(cluster.namespace(), &["ACTION_WRITE"]);
     let read_token = authz.create_token_for(cluster.namespace(), &["ACTION_READ"]);
diff --git a/influxdb_iox/tests/end_to_end_cases/ingester.rs b/influxdb_iox/tests/end_to_end_cases/ingester.rs
index 692ce2c6ec..c3dc5034b0 100644
--- a/influxdb_iox/tests/end_to_end_cases/ingester.rs
+++ b/influxdb_iox/tests/end_to_end_cases/ingester.rs
@@ -13,7 +13,7 @@ async fn persist_on_demand() {
     let database_url = maybe_skip_integration!();
 
     let table_name = "mytable";
-    let mut cluster = MiniCluster::create_shared2_never_persist(database_url).await;
+    let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -98,7 +98,7 @@ async fn ingester_flight_api() {
 
     // Set up cluster
     // Don't use a shared cluster because the ingester is going to be restarted
-    let mut cluster = MiniCluster::create_non_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_non_shared(database_url).await;
 
     // Write some data into the v2 HTTP API to set up the namespace and schema ==============
     let lp = format!("{table_name},tag1=A,tag2=B val=42i 123456");
@@ -176,7 +176,7 @@ async fn ingester_flight_api_namespace_not_found() {
     let database_url = maybe_skip_integration!();
 
     // Set up cluster
-    let cluster = MiniCluster::create_shared2(database_url).await;
+    let cluster = MiniCluster::create_shared(database_url).await;
 
     // query the ingester
     let query = IngesterQueryRequest::new(
@@ -204,7 +204,7 @@ async fn ingester_flight_api_table_not_found() {
     let database_url = maybe_skip_integration!();
 
     // Set up cluster
-    let cluster = MiniCluster::create_shared2(database_url).await;
+    let cluster = MiniCluster::create_shared(database_url).await;
 
     // Write some data into the v2 HTTP API ==============
     let lp = String::from("my_table,tag1=A,tag2=B val=42i 123456");
diff --git a/influxdb_iox/tests/end_to_end_cases/namespace.rs b/influxdb_iox/tests/end_to_end_cases/namespace.rs
index e51a943f6d..63b6916009 100644
--- a/influxdb_iox/tests/end_to_end_cases/namespace.rs
+++ b/influxdb_iox/tests/end_to_end_cases/namespace.rs
@@ -49,7 +49,7 @@ async fn soft_deletion() {
 
     // Set up the cluster  ====================================
     // cannot use shared cluster because we're going to restart services
-    let mut cluster = MiniCluster::create_non_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_non_shared(database_url).await;
 
     let namespace_name = cluster.namespace().to_string();
     let table_name = "ananas";
diff --git a/influxdb_iox/tests/end_to_end_cases/querier.rs b/influxdb_iox/tests/end_to_end_cases/querier.rs
index c33076740e..af9b1a0530 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier.rs
@@ -24,7 +24,7 @@ async fn basic_ingester() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2_never_persist(database_url).await;
+    let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -62,7 +62,7 @@ async fn never_persist_really_never_persists() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2_never_persist(database_url).await;
+    let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -90,7 +90,7 @@ async fn basic_on_parquet() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -245,7 +245,7 @@ async fn query_after_persist_sees_new_files() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     let steps = vec![
         Step::RecordNumParquetFiles,
@@ -308,7 +308,7 @@ async fn table_not_found_on_ingester() {
 
     // Set up the cluster  ====================================
     // cannot use shared cluster because we're restarting the ingester
-    let mut cluster = MiniCluster::create_non_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_non_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -363,7 +363,7 @@ async fn issue_4631_a() {
 
     let database_url = maybe_skip_integration!();
     // Set up a cluster configured to never persist automatically
-    let mut cluster = MiniCluster::create_shared2_never_persist(database_url).await;
+    let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
 
     let steps = vec![
         Step::RecordNumParquetFiles,
@@ -434,7 +434,7 @@ async fn issue_4631_b() {
     let table_name = "the_table";
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -496,7 +496,7 @@ async fn unsupported_sql_returns_error() {
     let database_url = maybe_skip_integration!();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     fn make_error_message(name: &str) -> String {
         format!("Error while planning query: This feature is not implemented: Unsupported logical plan: {name}")
@@ -548,7 +548,7 @@ async fn table_or_namespace_not_found() {
     let database_url = maybe_skip_integration!();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -714,7 +714,7 @@ async fn authz() {
     let mut authz = Authorizer::create().await;
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_non_shared2_with_authz(database_url, authz.addr()).await;
+    let mut cluster = MiniCluster::create_non_shared_with_authz(database_url, authz.addr()).await;
 
     let write_token = authz.create_token_for(cluster.namespace(), &["ACTION_WRITE"]);
     let read_token = authz.create_token_for(cluster.namespace(), &["ACTION_READ"]);
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs
index 760755cdfa..514001eb3a 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs
@@ -21,7 +21,7 @@ pub(crate) async fn run_no_data_test(custom: FCustom) {
     let database_url = maybe_skip_integration!();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(&mut cluster, vec![Step::Custom(custom)])
         .run()
@@ -34,7 +34,7 @@ pub(crate) async fn run_data_test(generator: Arc<DataGenerator>, custom: FCustom
     let database_url = maybe_skip_integration!();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -90,7 +90,7 @@ trait InfluxRpcTest: Send + Sync + 'static {
         info!("Using setup {setup_name}");
 
         // Set up the cluster  ====================================
-        let mut cluster = MiniCluster::create_shared2_never_persist(database_url.clone()).await;
+        let mut cluster = MiniCluster::create_shared_never_persist(database_url.clone()).await;
 
         let setup_steps = SETUPS
             .get(setup_name)
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_filter.rs b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_filter.rs
index d654cdc1f4..d6e2a63887 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_filter.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_filter.rs
@@ -368,7 +368,7 @@ async fn do_read_filter_test(
     let expected_frames: Vec<String> = expected_frames.into_iter().map(|s| s.to_string()).collect();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     let line_protocol = input_lines.join("\n");
     StepTest::new(
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_group.rs b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_group.rs
index 8e4e2e2e55..230e11edea 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_group.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_group.rs
@@ -243,7 +243,7 @@ async fn do_test_invalid_group_key(variant: InvalidGroupKey) {
     let database_url = maybe_skip_integration!();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -306,7 +306,7 @@ async fn do_read_group_test(
     let expected_frames: Vec<String> = expected_frames.into_iter().map(|s| s.to_string()).collect();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     let line_protocol = input_lines.join("\n");
     StepTest::new(
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_window_aggregate.rs b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_window_aggregate.rs
index 466379416a..2b777ac6c6 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_window_aggregate.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc/read_window_aggregate.rs
@@ -80,7 +80,7 @@ async fn do_read_window_aggregate_test(
     let expected_frames: Vec<String> = expected_frames.into_iter().map(|s| s.to_string()).collect();
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     let line_protocol = input_lines.join("\n");
     StepTest::new(
diff --git a/influxdb_iox/tests/end_to_end_cases/remote.rs b/influxdb_iox/tests/end_to_end_cases/remote.rs
index 8620756de0..92ebf267ed 100644
--- a/influxdb_iox/tests/end_to_end_cases/remote.rs
+++ b/influxdb_iox/tests/end_to_end_cases/remote.rs
@@ -16,7 +16,7 @@ async fn remote_store_get_table() {
     let table_name = "my_awesome_table";
     let other_table_name = "my_ordinary_table";
 
-    let mut cluster = MiniCluster::create_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
@@ -243,7 +243,7 @@ async fn remote_partition_and_get_from_store_and_pull() {
     // The test below assumes a specific partition id, so use a
     // non-shared one here so concurrent tests don't interfere with
     // each other
-    let mut cluster = MiniCluster::create_non_shared2(database_url).await;
+    let mut cluster = MiniCluster::create_non_shared(database_url).await;
 
     StepTest::new(
         &mut cluster,
diff --git a/influxdb_iox/tests/end_to_end_cases/router.rs b/influxdb_iox/tests/end_to_end_cases/router.rs
index 1750a8cda4..058997bd6c 100644
--- a/influxdb_iox/tests/end_to_end_cases/router.rs
+++ b/influxdb_iox/tests/end_to_end_cases/router.rs
@@ -74,7 +74,7 @@ async fn authz() {
     let mut authz = Authorizer::create().await;
 
     // Set up the cluster  ====================================
-    let mut cluster = MiniCluster::create_non_shared2_with_authz(database_url, authz.addr()).await;
+    let mut cluster = MiniCluster::create_non_shared_with_authz(database_url, authz.addr()).await;
 
     let write_token = authz.create_token_for(cluster.namespace(), &["ACTION_WRITE"]);
     let read_token = authz.create_token_for(cluster.namespace(), &["ACTION_READ"]);
diff --git a/influxdb_iox/tests/end_to_end_cases/schema.rs b/influxdb_iox/tests/end_to_end_cases/schema.rs
index 8d80751b48..6791e5eac4 100644
--- a/influxdb_iox/tests/end_to_end_cases/schema.rs
+++ b/influxdb_iox/tests/end_to_end_cases/schema.rs
@@ -35,7 +35,7 @@ impl SchemaTest {
         info!("Using setup {setup_name}");
 
         // Set up the cluster  ====================================
-        let mut cluster = MiniCluster::create_shared2_never_persist(database_url.clone()).await;
+        let mut cluster = MiniCluster::create_shared_never_persist(database_url.clone()).await;
 
         let setup_steps = SETUPS
             .get(setup_name)
diff --git a/influxdb_iox/tests/query_tests2/framework.rs b/influxdb_iox/tests/query_tests2/framework.rs
index fce726ccfe..6145c9f5ad 100644
--- a/influxdb_iox/tests/query_tests2/framework.rs
+++ b/influxdb_iox/tests/query_tests2/framework.rs
@@ -69,9 +69,9 @@ impl TestCase {
             // Setup that differs by chunk stage.
             let mut cluster = match chunk_stage {
                 ChunkStage::Ingester => {
-                    MiniCluster::create_shared2_never_persist(database_url.clone()).await
+                    MiniCluster::create_shared_never_persist(database_url.clone()).await
                 }
-                ChunkStage::Parquet => MiniCluster::create_shared2(database_url.clone()).await,
+                ChunkStage::Parquet => MiniCluster::create_shared(database_url.clone()).await,
                 ChunkStage::All => unreachable!("See `impl IntoIterator for ChunkStage`"),
             };
 
diff --git a/influxdb_iox/tests/query_tests2/sql_errors.rs b/influxdb_iox/tests/query_tests2/sql_errors.rs
index 433b9d40aa..d6d27dc77c 100644
--- a/influxdb_iox/tests/query_tests2/sql_errors.rs
+++ b/influxdb_iox/tests/query_tests2/sql_errors.rs
@@ -94,7 +94,7 @@ impl SqlErrorTest {
         info!("Using setup {setup_name}");
 
         // Set up the cluster  ====================================
-        let mut cluster = MiniCluster::create_shared2_never_persist(database_url.clone()).await;
+        let mut cluster = MiniCluster::create_shared_never_persist(database_url.clone()).await;
 
         let setup_steps = SETUPS
             .get(setup_name)
diff --git a/test_helpers_end_to_end/src/mini_cluster.rs b/test_helpers_end_to_end/src/mini_cluster.rs
index f01cb03118..9b7fe9494c 100644
--- a/test_helpers_end_to_end/src/mini_cluster.rs
+++ b/test_helpers_end_to_end/src/mini_cluster.rs
@@ -86,7 +86,7 @@ impl MiniCluster {
     /// namespace and set of connections
     ///
     /// Note this is an internal implementation -- please use
-    /// [`create_shared2`](Self::create_shared2) and [`new`](Self::new) to create new MiniClusters.
+    /// [`create_shared`](Self::create_shared) and [`new`](Self::new) to create new MiniClusters.
     fn new_from_fixtures(
         router: Option<ServerFixture>,
         ingesters: Vec<ServerFixture>,
@@ -116,10 +116,10 @@ impl MiniCluster {
     ///
     /// Note: Because the underlying server processes are shared across multiple tests, all users
     /// of this `MiniCluster` instance should only modify their own unique namespace.
-    pub async fn create_shared2(database_url: String) -> Self {
+    pub async fn create_shared(database_url: String) -> Self {
         let start = Instant::now();
-        let mut shared_servers = GLOBAL_SHARED_SERVERS2.lock().await;
-        debug!(mutex_wait=?start.elapsed(), "creating standard2 cluster");
+        let mut shared_servers = GLOBAL_SHARED_SERVERS.lock().await;
+        debug!(mutex_wait=?start.elapsed(), "creating standard cluster");
 
         // try to reuse existing server processes
         if let Some(shared) = shared_servers.take() {
@@ -134,34 +134,34 @@ impl MiniCluster {
                 let new_self = cluster.create().await;
                 info!(
                     total_wait=?start.elapsed(),
-                    "created new mini cluster2 from existing cluster"
+                    "created new mini cluster from existing cluster"
                 );
                 return new_self;
             } else {
-                info!("some server proceses of previous cluster2 have already returned");
+                info!("some server proceses of previous cluster have already returned");
             }
         }
 
         // Have to make a new one
-        info!("Create a new server2");
-        let new_cluster = Self::create_non_shared2(database_url).await;
+        info!("Create a new server");
+        let new_cluster = Self::create_non_shared(database_url).await;
 
         // Update the shared servers to point at the newly created server proesses
         *shared_servers = Some(SharedServers::new(&new_cluster));
         new_cluster
     }
 
-    /// Create a shared "version 2" MiniCluster that has a router, ingester set to essentially
+    /// Create a shared  MiniCluster that has a router, ingester set to essentially
     /// never persist data (except on-demand), and querier. Save config for a compactor, but the
     /// compactor service should be run on-demand in tests using `compactor run-once` rather than
     /// using `run compactor`.
     ///
     /// Note: Because the underlying server processes are shared across multiple tests, all users
     /// of this `MiniCluster` instance should only modify their own unique namespace.
-    pub async fn create_shared2_never_persist(database_url: String) -> Self {
+    pub async fn create_shared_never_persist(database_url: String) -> Self {
         let start = Instant::now();
-        let mut shared_servers = GLOBAL_SHARED_SERVERS2_NEVER_PERSIST.lock().await;
-        debug!(mutex_wait=?start.elapsed(), "creating standard2 cluster");
+        let mut shared_servers = GLOBAL_SHARED_SERVERS_NEVER_PERSIST.lock().await;
+        debug!(mutex_wait=?start.elapsed(), "creating standard cluster");
 
         // try to reuse existing server processes
         if let Some(shared) = shared_servers.take() {
@@ -176,27 +176,27 @@ impl MiniCluster {
                 let new_self = cluster.create().await;
                 info!(
                     total_wait=?start.elapsed(),
-                    "created new mini cluster2 from existing cluster"
+                    "created new mini cluster from existing cluster"
                 );
                 return new_self;
             } else {
-                info!("some server proceses of previous cluster2 have already returned");
+                info!("some server proceses of previous cluster have already returned");
             }
         }
 
         // Have to make a new one
-        info!("Create a new server2 set to never persist");
-        let new_cluster = Self::create_non_shared2_never_persist(database_url).await;
+        info!("Create a new server set to never persist");
+        let new_cluster = Self::create_non_shared_never_persist(database_url).await;
 
         // Update the shared servers to point at the newly created server proesses
         *shared_servers = Some(SharedServers::new(&new_cluster));
         new_cluster
     }
 
-    /// Create a non-shared "version 2" "standard" MiniCluster that has a router, ingester,
+    /// Create a non-shared "standard" MiniCluster that has a router, ingester,
     /// querier. Save config for a compactor, but the compactor service should be run on-demand in
     /// tests using `compactor run-once` rather than using `run compactor`.
-    pub async fn create_non_shared2(database_url: String) -> Self {
+    pub async fn create_non_shared(database_url: String) -> Self {
         let ingester_config = TestConfig::new_ingester(&database_url);
         let router_config = TestConfig::new_router(&ingester_config);
         let querier_config = TestConfig::new_querier(&ingester_config);
@@ -213,11 +213,11 @@ impl MiniCluster {
             .with_compactor_config(compactor_config)
     }
 
-    /// Create a non-shared "version 2" MiniCluster that has a router, ingester set to essentially
+    /// Create a non-shared MiniCluster that has a router, ingester set to essentially
     /// never persist data (except on-demand), and querier. Save config for a compactor, but the
     /// compactor service should be run on-demand in tests using `compactor run-once` rather than
     /// using `run compactor`.
-    pub async fn create_non_shared2_never_persist(database_url: String) -> Self {
+    pub async fn create_non_shared_never_persist(database_url: String) -> Self {
         let ingester_config = TestConfig::new_ingester_never_persist(&database_url);
         let router_config = TestConfig::new_router(&ingester_config);
         let querier_config = TestConfig::new_querier(&ingester_config);
@@ -233,13 +233,14 @@ impl MiniCluster {
             .await
             .with_compactor_config(compactor_config)
     }
-    /// Create a non-shared "version 2" MiniCluster that has a router,
+
+    /// Create a non-shared MiniCluster that has a router,
     /// ingester, and querier. The router and querier will be configured
     /// to use the authorization service and will require all requests to
     /// be authorized. Save config for a compactor, but the compactor service
     /// should be run on-demand in tests using `compactor run-once` rather
     /// than using `run compactor`.
-    pub async fn create_non_shared2_with_authz(
+    pub async fn create_non_shared_with_authz(
         database_url: String,
         authz_addr: impl Into<String> + Clone,
     ) -> Self {
@@ -679,10 +680,8 @@ fn server_from_weak(server: Option<&Weak<TestServer>>) -> Option<Option<Arc<Test
     }
 }
 
-// For the new server versions. `GLOBAL_SHARED_SERVERS` can be removed and this can be renamed
-// when the migration to router/etc is complete.
-static GLOBAL_SHARED_SERVERS2: Lazy<Mutex<Option<SharedServers>>> = Lazy::new(|| Mutex::new(None));
-static GLOBAL_SHARED_SERVERS2_NEVER_PERSIST: Lazy<Mutex<Option<SharedServers>>> =
+static GLOBAL_SHARED_SERVERS: Lazy<Mutex<Option<SharedServers>>> = Lazy::new(|| Mutex::new(None));
+static GLOBAL_SHARED_SERVERS_NEVER_PERSIST: Lazy<Mutex<Option<SharedServers>>> =
     Lazy::new(|| Mutex::new(None));
 
 async fn next_message(

From 3cafa9ace3c5a6ffee353e391bd49ff6cb6e4a31 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Tue, 9 May 2023 22:10:46 -0400
Subject: [PATCH 084/119] fix: Rename query_tests2 to query_tests

---
 influxdb_iox/tests/end_to_end.rs                                | 2 +-
 influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs        | 2 +-
 influxdb_iox/tests/end_to_end_cases/schema.rs                   | 2 +-
 influxdb_iox/tests/{query_tests2 => query_tests}/cases.rs       | 2 +-
 .../tests/{query_tests2 => query_tests}/cases/.gitignore        | 0
 .../tests/{query_tests2 => query_tests}/cases/in/aggregates.sql | 0
 .../cases/in/aggregates.sql.expected                            | 0
 .../cases/in/aggregates_with_nulls.sql                          | 0
 .../cases/in/aggregates_with_nulls.sql.expected                 | 0
 .../tests/{query_tests2 => query_tests}/cases/in/basic.sql      | 0
 .../{query_tests2 => query_tests}/cases/in/basic.sql.expected   | 0
 .../tests/{query_tests2 => query_tests}/cases/in/bugs.sql       | 0
 .../{query_tests2 => query_tests}/cases/in/bugs.sql.expected    | 0
 .../tests/{query_tests2 => query_tests}/cases/in/date_bin.sql   | 0
 .../cases/in/date_bin.sql.expected                              | 0
 .../cases/in/dedup_and_predicates_parquet.sql                   | 0
 .../cases/in/dedup_and_predicates_parquet.sql.expected          | 0
 .../cases/in/dedup_and_predicates_parquet_ingester.sql          | 0
 .../cases/in/dedup_and_predicates_parquet_ingester.sql.expected | 0
 .../cases/in/different_tag_sets.sql                             | 0
 .../cases/in/different_tag_sets.sql.expected                    | 0
 .../cases/in/duplicates_ingester.sql                            | 0
 .../cases/in/duplicates_ingester.sql.expected                   | 0
 .../cases/in/duplicates_parquet.sql                             | 0
 .../cases/in/duplicates_parquet.sql.expected                    | 0
 .../cases/in/duplicates_parquet_50_files.sql                    | 0
 .../cases/in/duplicates_parquet_50_files.sql.expected           | 0
 .../cases/in/duplicates_parquet_many.sql                        | 0
 .../cases/in/duplicates_parquet_many.sql.expected               | 0
 .../tests/{query_tests2 => query_tests}/cases/in/gapfill.sql    | 0
 .../{query_tests2 => query_tests}/cases/in/gapfill.sql.expected | 0
 .../cases/in/influxql_metadata.influxql                         | 0
 .../cases/in/influxql_metadata.influxql.expected                | 0
 .../{query_tests2 => query_tests}/cases/in/issue_6112.influxql  | 0
 .../cases/in/issue_6112.influxql.expected                       | 0
 .../cases/in/new_sql_system_tables.sql                          | 0
 .../cases/in/new_sql_system_tables.sql.expected                 | 0
 .../tests/{query_tests2 => query_tests}/cases/in/periods.sql    | 0
 .../{query_tests2 => query_tests}/cases/in/periods.sql.expected | 0
 .../tests/{query_tests2 => query_tests}/cases/in/pushdown.sql   | 0
 .../cases/in/pushdown.sql.expected                              | 0
 .../tests/{query_tests2 => query_tests}/cases/in/restaurant.sql | 0
 .../cases/in/restaurant.sql.expected                            | 0
 .../tests/{query_tests2 => query_tests}/cases/in/retention.sql  | 0
 .../cases/in/retention.sql.expected                             | 0
 .../{query_tests2 => query_tests}/cases/in/schema_merge.sql     | 0
 .../cases/in/schema_merge.sql.expected                          | 0
 .../tests/{query_tests2 => query_tests}/cases/in/selectors.sql  | 0
 .../cases/in/selectors.sql.expected                             | 0
 .../{query_tests2 => query_tests}/cases/in/several_chunks.sql   | 0
 .../cases/in/several_chunks.sql.expected                        | 0
 .../cases/in/sql_information_schema.sql                         | 0
 .../cases/in/sql_information_schema.sql.expected                | 0
 .../tests/{query_tests2 => query_tests}/cases/in/timestamps.sql | 0
 .../cases/in/timestamps.sql.expected                            | 0
 .../tests/{query_tests2 => query_tests}/cases/in/two_chunks.sql | 0
 .../cases/in/two_chunks.sql.expected                            | 0
 .../cases/in/two_chunks_missing_columns.sql                     | 0
 .../cases/in/two_chunks_missing_columns.sql.expected            | 0
 .../tests/{query_tests2 => query_tests}/cases/in/union_all.sql  | 0
 .../cases/in/union_all.sql.expected                             | 0
 influxdb_iox/tests/{query_tests2 => query_tests}/framework.rs   | 2 +-
 influxdb_iox/tests/{query_tests2 => query_tests}/mod.rs         | 0
 influxdb_iox/tests/{query_tests2 => query_tests}/setups.rs      | 0
 influxdb_iox/tests/{query_tests2 => query_tests}/sql_errors.rs  | 2 +-
 65 files changed, 6 insertions(+), 6 deletions(-)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases.rs (99%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/.gitignore (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/aggregates.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/aggregates.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/aggregates_with_nulls.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/aggregates_with_nulls.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/basic.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/basic.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/bugs.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/bugs.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/date_bin.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/date_bin.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/dedup_and_predicates_parquet.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/dedup_and_predicates_parquet.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/dedup_and_predicates_parquet_ingester.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/dedup_and_predicates_parquet_ingester.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/different_tag_sets.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/different_tag_sets.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_ingester.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_ingester.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_parquet.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_parquet.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_parquet_50_files.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_parquet_50_files.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_parquet_many.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/duplicates_parquet_many.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/gapfill.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/gapfill.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/influxql_metadata.influxql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/influxql_metadata.influxql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/issue_6112.influxql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/issue_6112.influxql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/new_sql_system_tables.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/new_sql_system_tables.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/periods.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/periods.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/pushdown.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/pushdown.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/restaurant.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/restaurant.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/retention.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/retention.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/schema_merge.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/schema_merge.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/selectors.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/selectors.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/several_chunks.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/several_chunks.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/sql_information_schema.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/sql_information_schema.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/timestamps.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/timestamps.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/two_chunks.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/two_chunks.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/two_chunks_missing_columns.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/two_chunks_missing_columns.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/union_all.sql (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/cases/in/union_all.sql.expected (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/framework.rs (99%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/mod.rs (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/setups.rs (100%)
 rename influxdb_iox/tests/{query_tests2 => query_tests}/sql_errors.rs (98%)

diff --git a/influxdb_iox/tests/end_to_end.rs b/influxdb_iox/tests/end_to_end.rs
index f71efd5f04..25352f1cd3 100644
--- a/influxdb_iox/tests/end_to_end.rs
+++ b/influxdb_iox/tests/end_to_end.rs
@@ -6,4 +6,4 @@
 // The tests are defined in the submodules of [`end_to_end_cases`]
 
 mod end_to_end_cases;
-mod query_tests2;
+mod query_tests;
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs
index 514001eb3a..1753645d1e 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/influxrpc.rs
@@ -6,7 +6,7 @@ mod read_filter;
 mod read_group;
 mod read_window_aggregate;
 
-use crate::query_tests2::setups::SETUPS;
+use crate::query_tests::setups::SETUPS;
 use async_trait::async_trait;
 use futures::FutureExt;
 use observability_deps::tracing::*;
diff --git a/influxdb_iox/tests/end_to_end_cases/schema.rs b/influxdb_iox/tests/end_to_end_cases/schema.rs
index 6791e5eac4..f89f1d4b5c 100644
--- a/influxdb_iox/tests/end_to_end_cases/schema.rs
+++ b/influxdb_iox/tests/end_to_end_cases/schema.rs
@@ -1,4 +1,4 @@
-use crate::query_tests2::setups::SETUPS;
+use crate::query_tests::setups::SETUPS;
 use data_types::ColumnType;
 use futures::FutureExt;
 use observability_deps::tracing::*;
diff --git a/influxdb_iox/tests/query_tests2/cases.rs b/influxdb_iox/tests/query_tests/cases.rs
similarity index 99%
rename from influxdb_iox/tests/query_tests2/cases.rs
rename to influxdb_iox/tests/query_tests/cases.rs
index 6c8d803a6f..c89db53658 100644
--- a/influxdb_iox/tests/query_tests2/cases.rs
+++ b/influxdb_iox/tests/query_tests/cases.rs
@@ -33,7 +33,7 @@
 //! -- IOX_SETUP: OneMeasurementFourChunksWithDuplicates
 //! ```
 //!
-//! To add a new setup, follow the pattern in `influxdb_iox/tests/query_tests2/setups.rs`.
+//! To add a new setup, follow the pattern in `influxdb_iox/tests/query_tests/setups.rs`.
 
 use super::framework::{ChunkStage, TestCase};
 
diff --git a/influxdb_iox/tests/query_tests2/cases/.gitignore b/influxdb_iox/tests/query_tests/cases/.gitignore
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/.gitignore
rename to influxdb_iox/tests/query_tests/cases/.gitignore
diff --git a/influxdb_iox/tests/query_tests2/cases/in/aggregates.sql b/influxdb_iox/tests/query_tests/cases/in/aggregates.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/aggregates.sql
rename to influxdb_iox/tests/query_tests/cases/in/aggregates.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/aggregates.sql.expected b/influxdb_iox/tests/query_tests/cases/in/aggregates.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/aggregates.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/aggregates.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/aggregates_with_nulls.sql b/influxdb_iox/tests/query_tests/cases/in/aggregates_with_nulls.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/aggregates_with_nulls.sql
rename to influxdb_iox/tests/query_tests/cases/in/aggregates_with_nulls.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/aggregates_with_nulls.sql.expected b/influxdb_iox/tests/query_tests/cases/in/aggregates_with_nulls.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/aggregates_with_nulls.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/aggregates_with_nulls.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/basic.sql b/influxdb_iox/tests/query_tests/cases/in/basic.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/basic.sql
rename to influxdb_iox/tests/query_tests/cases/in/basic.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/basic.sql.expected b/influxdb_iox/tests/query_tests/cases/in/basic.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/basic.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/basic.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/bugs.sql b/influxdb_iox/tests/query_tests/cases/in/bugs.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/bugs.sql
rename to influxdb_iox/tests/query_tests/cases/in/bugs.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/bugs.sql.expected b/influxdb_iox/tests/query_tests/cases/in/bugs.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/bugs.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/bugs.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/date_bin.sql b/influxdb_iox/tests/query_tests/cases/in/date_bin.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/date_bin.sql
rename to influxdb_iox/tests/query_tests/cases/in/date_bin.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/date_bin.sql.expected b/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/date_bin.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql b/influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql
rename to influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql.expected b/influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql b/influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet_ingester.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql
rename to influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet_ingester.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql.expected b/influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet_ingester.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/dedup_and_predicates_parquet_ingester.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/dedup_and_predicates_parquet_ingester.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/different_tag_sets.sql b/influxdb_iox/tests/query_tests/cases/in/different_tag_sets.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/different_tag_sets.sql
rename to influxdb_iox/tests/query_tests/cases/in/different_tag_sets.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/different_tag_sets.sql.expected b/influxdb_iox/tests/query_tests/cases/in/different_tag_sets.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/different_tag_sets.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/different_tag_sets.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql b/influxdb_iox/tests/query_tests/cases/in/duplicates_ingester.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_ingester.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql.expected b/influxdb_iox/tests/query_tests/cases/in/duplicates_ingester.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_ingester.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_ingester.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql.expected b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_50_files.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_50_files.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_50_files.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_50_files.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_many.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_many.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql.expected b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_many.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_many.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_many.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/gapfill.sql
rename to influxdb_iox/tests/query_tests/cases/in/gapfill.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql b/influxdb_iox/tests/query_tests/cases/in/influxql_metadata.influxql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql
rename to influxdb_iox/tests/query_tests/cases/in/influxql_metadata.influxql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected b/influxdb_iox/tests/query_tests/cases/in/influxql_metadata.influxql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected
rename to influxdb_iox/tests/query_tests/cases/in/influxql_metadata.influxql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql
rename to influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
rename to influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/new_sql_system_tables.sql b/influxdb_iox/tests/query_tests/cases/in/new_sql_system_tables.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/new_sql_system_tables.sql
rename to influxdb_iox/tests/query_tests/cases/in/new_sql_system_tables.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/new_sql_system_tables.sql.expected b/influxdb_iox/tests/query_tests/cases/in/new_sql_system_tables.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/new_sql_system_tables.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/new_sql_system_tables.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/periods.sql b/influxdb_iox/tests/query_tests/cases/in/periods.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/periods.sql
rename to influxdb_iox/tests/query_tests/cases/in/periods.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/periods.sql.expected b/influxdb_iox/tests/query_tests/cases/in/periods.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/periods.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/periods.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/pushdown.sql b/influxdb_iox/tests/query_tests/cases/in/pushdown.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/pushdown.sql
rename to influxdb_iox/tests/query_tests/cases/in/pushdown.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/pushdown.sql.expected b/influxdb_iox/tests/query_tests/cases/in/pushdown.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/pushdown.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/pushdown.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/restaurant.sql b/influxdb_iox/tests/query_tests/cases/in/restaurant.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/restaurant.sql
rename to influxdb_iox/tests/query_tests/cases/in/restaurant.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/restaurant.sql.expected b/influxdb_iox/tests/query_tests/cases/in/restaurant.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/restaurant.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/restaurant.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/retention.sql b/influxdb_iox/tests/query_tests/cases/in/retention.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/retention.sql
rename to influxdb_iox/tests/query_tests/cases/in/retention.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/retention.sql.expected b/influxdb_iox/tests/query_tests/cases/in/retention.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/retention.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/retention.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/schema_merge.sql b/influxdb_iox/tests/query_tests/cases/in/schema_merge.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/schema_merge.sql
rename to influxdb_iox/tests/query_tests/cases/in/schema_merge.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/schema_merge.sql.expected b/influxdb_iox/tests/query_tests/cases/in/schema_merge.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/schema_merge.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/schema_merge.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/selectors.sql b/influxdb_iox/tests/query_tests/cases/in/selectors.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/selectors.sql
rename to influxdb_iox/tests/query_tests/cases/in/selectors.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/selectors.sql.expected b/influxdb_iox/tests/query_tests/cases/in/selectors.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/selectors.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/selectors.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql b/influxdb_iox/tests/query_tests/cases/in/several_chunks.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql
rename to influxdb_iox/tests/query_tests/cases/in/several_chunks.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql.expected b/influxdb_iox/tests/query_tests/cases/in/several_chunks.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/several_chunks.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/several_chunks.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/sql_information_schema.sql b/influxdb_iox/tests/query_tests/cases/in/sql_information_schema.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/sql_information_schema.sql
rename to influxdb_iox/tests/query_tests/cases/in/sql_information_schema.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/sql_information_schema.sql.expected b/influxdb_iox/tests/query_tests/cases/in/sql_information_schema.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/sql_information_schema.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/sql_information_schema.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/timestamps.sql b/influxdb_iox/tests/query_tests/cases/in/timestamps.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/timestamps.sql
rename to influxdb_iox/tests/query_tests/cases/in/timestamps.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/timestamps.sql.expected b/influxdb_iox/tests/query_tests/cases/in/timestamps.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/timestamps.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/timestamps.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql b/influxdb_iox/tests/query_tests/cases/in/two_chunks.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql
rename to influxdb_iox/tests/query_tests/cases/in/two_chunks.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql.expected b/influxdb_iox/tests/query_tests/cases/in/two_chunks.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/two_chunks.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/two_chunks.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/two_chunks_missing_columns.sql b/influxdb_iox/tests/query_tests/cases/in/two_chunks_missing_columns.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/two_chunks_missing_columns.sql
rename to influxdb_iox/tests/query_tests/cases/in/two_chunks_missing_columns.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/two_chunks_missing_columns.sql.expected b/influxdb_iox/tests/query_tests/cases/in/two_chunks_missing_columns.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/two_chunks_missing_columns.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/two_chunks_missing_columns.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/cases/in/union_all.sql b/influxdb_iox/tests/query_tests/cases/in/union_all.sql
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/union_all.sql
rename to influxdb_iox/tests/query_tests/cases/in/union_all.sql
diff --git a/influxdb_iox/tests/query_tests2/cases/in/union_all.sql.expected b/influxdb_iox/tests/query_tests/cases/in/union_all.sql.expected
similarity index 100%
rename from influxdb_iox/tests/query_tests2/cases/in/union_all.sql.expected
rename to influxdb_iox/tests/query_tests/cases/in/union_all.sql.expected
diff --git a/influxdb_iox/tests/query_tests2/framework.rs b/influxdb_iox/tests/query_tests/framework.rs
similarity index 99%
rename from influxdb_iox/tests/query_tests2/framework.rs
rename to influxdb_iox/tests/query_tests/framework.rs
index 6145c9f5ad..9e6f2caf61 100644
--- a/influxdb_iox/tests/query_tests2/framework.rs
+++ b/influxdb_iox/tests/query_tests/framework.rs
@@ -76,7 +76,7 @@ impl TestCase {
             };
 
             let given_input_path: PathBuf = self.input.into();
-            let mut input_path = PathBuf::from("tests/query_tests2/");
+            let mut input_path = PathBuf::from("tests/query_tests/");
             input_path.push(given_input_path.clone());
             let contents = fs::read_to_string(&input_path).unwrap_or_else(|_| {
                 panic!("Could not read test case file `{}`", input_path.display())
diff --git a/influxdb_iox/tests/query_tests2/mod.rs b/influxdb_iox/tests/query_tests/mod.rs
similarity index 100%
rename from influxdb_iox/tests/query_tests2/mod.rs
rename to influxdb_iox/tests/query_tests/mod.rs
diff --git a/influxdb_iox/tests/query_tests2/setups.rs b/influxdb_iox/tests/query_tests/setups.rs
similarity index 100%
rename from influxdb_iox/tests/query_tests2/setups.rs
rename to influxdb_iox/tests/query_tests/setups.rs
diff --git a/influxdb_iox/tests/query_tests2/sql_errors.rs b/influxdb_iox/tests/query_tests/sql_errors.rs
similarity index 98%
rename from influxdb_iox/tests/query_tests2/sql_errors.rs
rename to influxdb_iox/tests/query_tests/sql_errors.rs
index d6d27dc77c..0146b645ed 100644
--- a/influxdb_iox/tests/query_tests2/sql_errors.rs
+++ b/influxdb_iox/tests/query_tests/sql_errors.rs
@@ -1,6 +1,6 @@
 //! Tests of SQL queries that are expected to return particular errors.
 
-use crate::query_tests2::setups::SETUPS;
+use crate::query_tests::setups::SETUPS;
 use observability_deps::tracing::*;
 use test_helpers_end_to_end::{maybe_skip_integration, MiniCluster, Step, StepTest};
 

From 81eb6631225dabe1a286a8844aba5a802c4e239c Mon Sep 17 00:00:00 2001
From: kayagokalp <kaya.gokalp@fuel.sh>
Date: Thu, 11 May 2023 00:53:47 +0300
Subject: [PATCH 085/119] refactor: accept impl Into<String> for schema methods

---
 mutable_batch/src/lib.rs       |  2 +-
 parquet_file/tests/metadata.rs |  2 +-
 schema/src/builder.rs          | 14 +++++++++-----
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/mutable_batch/src/lib.rs b/mutable_batch/src/lib.rs
index 89bb55d70a..bd24a94ff1 100644
--- a/mutable_batch/src/lib.rs
+++ b/mutable_batch/src/lib.rs
@@ -102,7 +102,7 @@ impl MutableBatch {
             Projection::Some(cols) => {
                 for col in cols {
                     let column = self.column(col)?;
-                    schema_builder.influx_column(col, column.influx_type());
+                    schema_builder.influx_column(*col, column.influx_type());
                 }
                 schema_builder.build().context(InternalSchemaSnafu)?
             }
diff --git a/parquet_file/tests/metadata.rs b/parquet_file/tests/metadata.rs
index ec673bea8f..ebbcce662e 100644
--- a/parquet_file/tests/metadata.rs
+++ b/parquet_file/tests/metadata.rs
@@ -62,7 +62,7 @@ async fn test_decoded_iox_metadata() {
 
     let mut schema_builder = SchemaBuilder::new();
     for (name, _array, column_type) in &data {
-        schema_builder.influx_column(name, *column_type);
+        schema_builder.influx_column(*name, *column_type);
     }
     let schema = schema_builder.build().unwrap();
 
diff --git a/schema/src/builder.rs b/schema/src/builder.rs
index 8a652c9d57..65da620a48 100644
--- a/schema/src/builder.rs
+++ b/schema/src/builder.rs
@@ -35,7 +35,7 @@ impl SchemaBuilder {
     /// Add a new tag column to this schema. By default tags are
     /// potentially nullable as they are not guaranteed to be present
     /// for all rows
-    pub fn tag(&mut self, column_name: &str) -> &mut Self {
+    pub fn tag(&mut self, column_name: impl Into<String>) -> &mut Self {
         let influxdb_column_type = InfluxColumnType::Tag;
         let arrow_type = (&influxdb_column_type).into();
 
@@ -45,7 +45,7 @@ impl SchemaBuilder {
     /// Add a new field column with the specified InfluxDB data model type
     pub fn influx_field(
         &mut self,
-        column_name: &str,
+        column_name: impl Into<String>,
         influxdb_field_type: InfluxFieldType,
     ) -> &mut Self {
         let arrow_type: ArrowDataType = influxdb_field_type.into();
@@ -58,7 +58,11 @@ impl SchemaBuilder {
     }
 
     /// Add a new field column with the specified InfluxDB data model type
-    pub fn influx_column(&mut self, column_name: &str, column_type: InfluxColumnType) -> &mut Self {
+    pub fn influx_column(
+        &mut self,
+        column_name: impl Into<String>,
+        column_type: InfluxColumnType,
+    ) -> &mut Self {
         match column_type {
             InfluxColumnType::Tag => self.tag(column_name),
             InfluxColumnType::Field(influx_field_type) => self
@@ -71,7 +75,7 @@ impl SchemaBuilder {
     /// Add a new nullable field column with the specified Arrow datatype.
     pub fn field(
         &mut self,
-        column_name: &str,
+        column_name: impl Into<String>,
         arrow_type: ArrowDataType,
     ) -> Result<&mut Self, &'static str> {
         let influxdb_column_type = arrow_type.clone().try_into().map(InfluxColumnType::Field)?;
@@ -127,7 +131,7 @@ impl SchemaBuilder {
     /// Internal helper method to add a column definition
     fn add_column(
         &mut self,
-        column_name: &str,
+        column_name: impl Into<String>,
         nullable: bool,
         column_type: InfluxColumnType,
         arrow_type: ArrowDataType,

From 36c1d23e19d3dd258bdc7dd350082355bbc3ed82 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 11 May 2023 19:46:58 +0000
Subject: [PATCH 086/119] chore(deps): Bump tokio from 1.28.0 to 1.28.1 (#7776)

Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.28.0 to 1.28.1.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.28.0...tokio-1.28.1)

---
updated-dependencies:
- dependency-name: tokio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock              | 4 ++--
 test_helpers/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 06d96aa731..f475bf9df0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5748,9 +5748,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.28.0"
+version = "1.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3c786bf8134e5a3a166db9b29ab8f48134739014a3eca7bc6bfa95d673b136f"
+checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105"
 dependencies = [
  "autocfg",
  "bytes",
diff --git a/test_helpers/Cargo.toml b/test_helpers/Cargo.toml
index 958c1d70a0..a8d9e4b727 100644
--- a/test_helpers/Cargo.toml
+++ b/test_helpers/Cargo.toml
@@ -14,7 +14,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 observability_deps = { path = "../observability_deps" }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 async-trait = { version = "0.1.68", optional = true }
-tokio = { version = "1.28.0", optional = true, default_features = false, features = ["time"] }
+tokio = { version = "1.28.1", optional = true, default_features = false, features = ["time"] }
 
 [features]
 default = []

From bd1d1f648c6931098133d462332e78745ce97446 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 May 2023 13:07:56 +0000
Subject: [PATCH 087/119] chore(deps): Bump serde from 1.0.162 to 1.0.163
 (#7780)

Bumps [serde](https://github.com/serde-rs/serde) from 1.0.162 to 1.0.163.
- [Release notes](https://github.com/serde-rs/serde/releases)
- [Commits](https://github.com/serde-rs/serde/compare/v1.0.162...v1.0.163)

---
updated-dependencies:
- dependency-name: serde
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock              | 8 ++++----
 influxdb_iox/Cargo.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f475bf9df0..a6482f4887 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4936,18 +4936,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
 
 [[package]]
 name = "serde"
-version = "1.0.162"
+version = "1.0.163"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6"
+checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.162"
+version = "1.0.163"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6"
+checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index f644c93803..0e0927b6e9 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -84,7 +84,7 @@ assert_matches = "1.5"
 async-trait = "0.1"
 predicate = { path = "../predicate" }
 predicates = "3.0.3"
-serde = "1.0.162"
+serde = "1.0.163"
 test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
 test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
 insta = { version = "1", features = ["yaml"] }

From 6c185bdec8e19478bbb87a002ec6b028bce0c3b8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 May 2023 13:11:56 +0000
Subject: [PATCH 088/119] chore(deps): Bump handlebars from 4.3.6 to 4.3.7

Bumps [handlebars](https://github.com/sunng87/handlebars-rust) from 4.3.6 to 4.3.7.
- [Release notes](https://github.com/sunng87/handlebars-rust/releases)
- [Changelog](https://github.com/sunng87/handlebars-rust/blob/v4.3.7/CHANGELOG.md)
- [Commits](https://github.com/sunng87/handlebars-rust/compare/v4.3.6...v4.3.7)

---
updated-dependencies:
- dependency-name: handlebars
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 Cargo.lock                    | 4 ++--
 iox_data_generator/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a6482f4887..4f254fcf55 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2173,9 +2173,9 @@ dependencies = [
 
 [[package]]
 name = "handlebars"
-version = "4.3.6"
+version = "4.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "035ef95d03713f2c347a72547b7cd38cbc9af7cd51e6099fb62d586d4a6dee3a"
+checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d"
 dependencies = [
  "log",
  "pest",
diff --git a/iox_data_generator/Cargo.toml b/iox_data_generator/Cargo.toml
index c6b7999b72..cfea11689f 100644
--- a/iox_data_generator/Cargo.toml
+++ b/iox_data_generator/Cargo.toml
@@ -12,7 +12,7 @@ chrono = { version = "0.4", default-features = false }
 clap = { version = "4", features = ["derive", "env", "cargo"] }
 datafusion_util = { path = "../datafusion_util" }
 futures = "0.3"
-handlebars = "4.3.6"
+handlebars = "4.3.7"
 humantime = "2.1.0"
 influxdb2_client = { path = "../influxdb2_client" }
 itertools = "0.10.5"

From ac0ea99cde6625af47a6961b2614052e193c6e35 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 May 2023 13:40:04 +0000
Subject: [PATCH 089/119] chore(deps): Bump tracing-core from 0.1.30 to 0.1.31
 (#7781)

Bumps [tracing-core](https://github.com/tokio-rs/tracing) from 0.1.30 to 0.1.31.
- [Release notes](https://github.com/tokio-rs/tracing/releases)
- [Commits](https://github.com/tokio-rs/tracing/compare/tracing-core-0.1.30...tracing-core-0.1.31)

---
updated-dependencies:
- dependency-name: tracing-core
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Dom <dom@itsallbroken.com>
---
 Cargo.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4f254fcf55..4c16a937d3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6063,9 +6063,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-core"
-version = "0.1.30"
+version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
+checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
 dependencies = [
  "once_cell",
  "valuable",

From 92e5036943768ce42f3232f802c26243eca54502 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)"
 <193874+carols10cents@users.noreply.github.com>
Date: Fri, 12 May 2023 10:58:03 -0400
Subject: [PATCH 090/119] fix: Size of ColumnSet shouldn't be using ChunkId
 (#7786)

---
 data_types/src/columns.rs         | 4 ++--
 querier/src/cache/parquet_file.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index a2142af6b8..a4d6bd8fd9 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -1,6 +1,6 @@
 //! Types having to do with columns.
 
-use super::{ChunkId, TableId};
+use super::TableId;
 use influxdb_line_protocol::FieldValue;
 use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema};
 use sqlx::postgres::PgHasArrayType;
@@ -336,7 +336,7 @@ impl ColumnSet {
 
     /// Estimate the memory consumption of this object and its contents
     pub fn size(&self) -> usize {
-        std::mem::size_of_val(self) + (std::mem::size_of::<ChunkId>() * self.0.capacity())
+        std::mem::size_of_val(self) + (std::mem::size_of::<ColumnId>() * self.0.capacity())
     }
 }
 
diff --git a/querier/src/cache/parquet_file.rs b/querier/src/cache/parquet_file.rs
index ecedc409bd..655bb84011 100644
--- a/querier/src/cache/parquet_file.rs
+++ b/querier/src/cache/parquet_file.rs
@@ -347,8 +347,8 @@ mod tests {
         partition.create_parquet_file(builder).await;
         let table_id = table.table.id;
 
-        let single_file_size = 216;
-        let two_file_size = 392;
+        let single_file_size = 200;
+        let two_file_size = 360;
         assert!(single_file_size < two_file_size);
 
         let cache = make_cache(&catalog);

From 8e74f0a568eb93ab7f2a6cece2f1b893cc58c3fb Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Wed, 12 Apr 2023 16:02:53 +0200
Subject: [PATCH 091/119] test: proptest upstream snapshot cycles

Adds a proptest that ensures the set of upstream ingesters is cycled
over indefinitely, with each element yielded an equal number of times.
---
 .../rpc_write/upstream_snapshot.rs            | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/router/src/dml_handlers/rpc_write/upstream_snapshot.rs b/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
index 68c3d96596..b33c5289c6 100644
--- a/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
+++ b/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
@@ -83,6 +83,8 @@ impl<'a, C> Iterator for UpstreamSnapshot<'a, C> {
 mod tests {
     use std::sync::atomic::{AtomicUsize, Ordering};
 
+    use proptest::proptest;
+
     use super::*;
 
     #[test]
@@ -234,4 +236,39 @@ mod tests {
         assert!(UpstreamSnapshot::<usize>::new([].iter(), 0).is_none());
         assert!(UpstreamSnapshot::<usize>::new([].iter(), 1).is_none());
     }
+
+    proptest! {
+        /// Assert the set always cycles indefinitely, visiting all elements
+        /// equally often (when the number of visits is a multiple of the set
+        /// size).
+        ///
+        /// Ensure the starting offset does not affect this property.
+        #[test]
+        fn prop_upstream_set_cycles(
+            complete_iters in (1_usize..5),
+            set_size in (1_usize..5),
+            offset in (1_usize..10),
+        ) {
+            let elements = (0..set_size).map(|_| AtomicUsize::new(0)).collect::<Vec<_>>();
+
+            // Create a snapshot and iterate over it the specified number of
+            // times.
+            {
+                let mut snap = UpstreamSnapshot::new(elements.iter(), offset)
+                    .expect("non-empty element set should yield snapshot");
+
+                for _ in 0..(elements.len() * complete_iters) {
+                    snap.next()
+                        .expect("should cycle forever")
+                        .fetch_add(1, Ordering::Relaxed);
+                }
+            }
+
+            // Assert all elements were visited exactly complete_iters number of
+            // times.
+            elements
+                .into_iter()
+                .for_each(|v| assert_eq!(v.load(Ordering::Relaxed), complete_iters));
+        }
+    }
 }

From ac656ab1f9563b53b34c2efc2e7b835943e2b8c4 Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Mon, 8 May 2023 12:09:22 +0200
Subject: [PATCH 092/119] refactor: clearer NoHealthyUpstreams error name

Renames NoUpstreams -> NoHealthyUpstreams as it's confusing because we
also have "not enough replicas" which could be no upstreams? This has a
slightly clearer meaning.
---
 router/src/dml_handlers/rpc_write.rs | 16 ++++++++--------
 router/src/server/http.rs            |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index 32f29039b9..5865d1854a 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -46,7 +46,7 @@ pub enum RpcWriteError {
 
     /// There are no healthy ingesters to route a write to.
     #[error("no healthy upstream ingesters available")]
-    NoUpstreams,
+    NoHealthyUpstreams,
 
     /// The write request was not attempted, because not enough upstream
     /// ingesters needed to satisfy the configured replication factor are
@@ -205,7 +205,7 @@ where
         let mut snap = self
             .endpoints
             .endpoints()
-            .ok_or(RpcWriteError::NoUpstreams)?;
+            .ok_or(RpcWriteError::NoHealthyUpstreams)?;
 
         // Validate the required number of writes is possible given the current
         // number of healthy endpoints.
@@ -234,10 +234,10 @@ where
                     // This error is an internal implementation detail - the
                     // meaningful error for the user is "there's no healthy
                     // upstreams".
-                    RpcWriteError::Client(_) => RpcWriteError::NoUpstreams,
+                    RpcWriteError::Client(_) => RpcWriteError::NoHealthyUpstreams,
                     // The number of upstreams no longer satisfies the desired
                     // replication factor.
-                    RpcWriteError::NoUpstreams => RpcWriteError::NotEnoughReplicas,
+                    RpcWriteError::NoHealthyUpstreams => RpcWriteError::NotEnoughReplicas,
                     // All other errors pass through.
                     v => v,
                 }
@@ -284,7 +284,7 @@ where
         loop {
             match endpoints
                 .next()
-                .ok_or(RpcWriteError::NoUpstreams)?
+                .ok_or(RpcWriteError::NoHealthyUpstreams)?
                 .write(req.clone())
                 .await
             {
@@ -607,7 +607,7 @@ mod tests {
         )
         .await;
 
-        assert_matches!(got, Err(RpcWriteError::NoUpstreams));
+        assert_matches!(got, Err(RpcWriteError::NoHealthyUpstreams));
     }
 
     /// Assert the error response when the only upstream continuously returns an
@@ -628,7 +628,7 @@ mod tests {
         )
         .await;
 
-        assert_matches!(got, Err(RpcWriteError::NoUpstreams));
+        assert_matches!(got, Err(RpcWriteError::NoHealthyUpstreams));
     }
 
     /// Assert that an [`RpcWriteClientError::UpstreamNotConnected`] error is mapped
@@ -649,7 +649,7 @@ mod tests {
         )
         .await;
 
-        assert_matches!(got, Err(RpcWriteError::NoUpstreams));
+        assert_matches!(got, Err(RpcWriteError::NoHealthyUpstreams));
     }
 
     /// Assert that an error is returned without any RPC request being made when
diff --git a/router/src/server/http.rs b/router/src/server/http.rs
index 8324ccf42e..d5cd70bcb3 100644
--- a/router/src/server/http.rs
+++ b/router/src/server/http.rs
@@ -158,7 +158,7 @@ impl From<&DmlError> for StatusCode {
             )) => StatusCode::SERVICE_UNAVAILABLE,
             DmlError::RpcWrite(RpcWriteError::Timeout(_)) => StatusCode::GATEWAY_TIMEOUT,
             DmlError::RpcWrite(
-                RpcWriteError::NoUpstreams
+                RpcWriteError::NoHealthyUpstreams
                 | RpcWriteError::NotEnoughReplicas
                 | RpcWriteError::PartialWrite { .. },
             ) => StatusCode::SERVICE_UNAVAILABLE,

From cf622c1b91f24d8e910442d9aaca9b56ff1f00ca Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Mon, 8 May 2023 12:09:45 +0200
Subject: [PATCH 093/119] test: MockWriteClient tracks ACK response count

Changes the MockWriteClient to track how many success responses it has
returned in response to a write request.
---
 router/src/dml_handlers/rpc_write/client.rs | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/router/src/dml_handlers/rpc_write/client.rs b/router/src/dml_handlers/rpc_write/client.rs
index 3d65c56890..b39bb3ab1d 100644
--- a/router/src/dml_handlers/rpc_write/client.rs
+++ b/router/src/dml_handlers/rpc_write/client.rs
@@ -44,6 +44,7 @@ pub mod mock {
     struct State {
         calls: Vec<WriteRequest>,
         ret: Box<dyn Iterator<Item = Result<(), RpcWriteClientError>> + Send + Sync>,
+        returned_oks: usize,
     }
 
     /// A mock implementation of the [`WriteClient`] for testing purposes.
@@ -66,6 +67,7 @@ pub mod mock {
                 state: Mutex::new(State {
                     calls: Default::default(),
                     ret: Box::new(iter::repeat_with(|| Ok(()))),
+                    returned_oks: 0,
                 }),
             }
         }
@@ -77,6 +79,12 @@ pub mod mock {
             self.state.lock().calls.clone()
         }
 
+        /// Retrieve the number of times this mock returned [`Ok`] to a write
+        /// request.
+        pub fn success_count(&self) -> usize {
+            self.state.lock().returned_oks
+        }
+
         /// Read values off of the provided iterator and return them for calls
         /// to [`Self::write()`].
         #[cfg(test)]
@@ -95,7 +103,14 @@ pub mod mock {
         async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError> {
             let mut guard = self.state.lock();
             guard.calls.push(op);
-            guard.ret.next().expect("no mock response")
+
+            let ret = guard.ret.next().expect("no mock response");
+
+            if ret.is_ok() {
+                guard.returned_oks += 1;
+            }
+
+            ret
         }
     }
 }

From 465158e08e6e3c4bb50b8985d8d37feb067aa3fa Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Mon, 8 May 2023 12:09:58 +0200
Subject: [PATCH 094/119] test(router): replication prop/invariant fuzzing

Adds a property-based test of the RPC write handler's replication logic,
ensuring:

    1. If the number of healthy upstreams is 0, NoHealthyUpstreams is
       returned and no requests are attempted.

    2. Given N healthy upstreams (> 0) and a replication factor of R:
       if N < R, "not enough replicas" is returned and no requests are
       attempted.

    3. Upstreams that return an error are retried until the entire
       write succeeds or times out.

    4. Writes are replicated to R distinct upstreams successfully, or
       an error is returned.

    5. One an upstream write is ack'd as successful, it is never
       requested again.

    6. An upstream reporting as unhealthy at the start of the write is
       never requested (excluding probe requests).

These properties describe a mixture of invariants (don't replicate your
two copies of a write to the same ingester) and expected behaviour of
the replication logic (optimisations like "don't try writes when you
already know they'll fail").

This passes for the single-threaded replication logic used at the time
of this commit, and will be used to validate correctness of a concurrent
replication implementation - a concurrent approach should uphold these
properties the same way a single-threaded implementation does.
---
 router/src/dml_handlers/rpc_write.rs | 147 +++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)

diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index 5865d1854a..a210085ed5 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -325,7 +325,9 @@ mod tests {
 
     use assert_matches::assert_matches;
     use data_types::{NamespaceId, PartitionKey};
+    use proptest::{prelude::*, prop_compose, proptest};
     use rand::seq::SliceRandom;
+    use tokio::runtime;
 
     use crate::dml_handlers::rpc_write::circuit_breaking_client::mock::MockCircuitBreaker;
 
@@ -871,4 +873,149 @@ mod tests {
             .chain(client_3.calls().iter())
             .all(|v| *v == calls_1[0]));
     }
+
+    prop_compose! {
+        /// Return an arbitrary results containing [`RpcWriteError`] from a
+        /// subset of easily constructed errors, or [`Ok`].
+        fn arbitrary_write_result()(which in 0..3) -> Result<(), RpcWriteClientError> {
+            match which {
+                0 => Ok(()),
+                1 => Err(RpcWriteClientError::Upstream(tonic::Status::internal("bananas"))),
+                2 => Err(RpcWriteClientError::UpstreamNotConnected("bananas".to_string())),
+                _ => unreachable!(),
+            }
+        }
+    }
+
+    prop_compose! {
+        /// Generate an upstream that is arbitrarily healthy/unhealthy, and will
+        /// arbitrarily succeed or fail when a write is attempted (a bounded
+        /// number of times).
+        fn arbitrary_mock_upstream()(
+            healthy in any::<bool>(),
+            responses in proptest::collection::vec(arbitrary_write_result(), 0..5)
+        ) ->  (Arc<MockCircuitBreaker>, Arc<MockWriteClient>) {
+
+            // Generate a mock client that returns all the errors/successes in
+            // the arbitrarily generated set, and then always succeeds.
+            let client = Arc::new(MockWriteClient::default().with_ret(
+                responses.into_iter().chain(iter::repeat_with(|| Ok(()))))
+            );
+
+            // Mark the upstream as arbitrarily healthy or unhealthy.
+            let circuit = Arc::new(MockCircuitBreaker::default());
+            circuit.set_healthy(healthy);
+
+            (circuit, client)
+        }
+    }
+
+    proptest! {
+        /// The invariants this property test asserts are:
+        ///
+        ///   1. If the number of healthy upstreams is 0, NoHealthyUpstreams is
+        ///      returned and no requests are attempted.
+        ///
+        ///   2. Given N healthy upstreams (> 0) and a replication factor of R:
+        ///      if N < R, "not enough replicas" is returned and no requests are
+        ///      attempted.
+        ///
+        ///   3. Upstreams that return an error are retried until the entire
+        ///      write succeeds or times out.
+        ///
+        ///   4. Writes are replicated to R distinct upstreams successfully, or
+        ///      an error is returned.
+        ///
+        ///   5. One an upstream write is ack'd as successful, it is never
+        ///      requested again.
+        ///
+        ///   6. An upstream reporting as unhealthy at the start of the write is
+        ///      never requested (excluding probe requests).
+        ///
+        #[test]
+        fn prop_distinct_upstreams(
+            upstreams in proptest::collection::vec(arbitrary_mock_upstream(), 1_usize..5),
+            n_copies in 1_usize..5,
+        ) {
+            // Disallow invalid configurations
+            prop_assume!(n_copies <= upstreams.len());
+
+            // Run the request with the given upstreams and desired replication
+            // factor in an async context.
+            let res = runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on({
+                let upstreams = upstreams.clone();
+                async move {
+                    let endpoints = upstreams.into_iter()
+                        .map(|(circuit, client)| {
+                            CircuitBreakingClient::new(client, "bananas")
+                                .with_circuit_breaker(circuit)
+                        });
+
+                    make_request(endpoints, n_copies).await
+                }
+            });
+
+            // Compute the number of upstreams that were healthy at the time the
+            // request was made.
+            let healthy = upstreams.iter()
+                .filter(|(circuit, _client)| circuit.is_healthy())
+                .count();
+
+            if healthy == 0 {
+                // Invariant 1: no healthy upstreams yeilds the appropriate
+                // error.
+                assert_matches!(res, Err(RpcWriteError::NoHealthyUpstreams));
+            } else if healthy < n_copies {
+                // Invariant 2: if N < R yields a "not enough replicas" error
+                assert_matches!(res, Err(RpcWriteError::NotEnoughReplicas));
+            }
+            // For either 1 or 2, no requests should be sent as the unhappy case
+            // can be computed before performing network I/O.
+            if healthy < n_copies {
+                // Assert no upstream requests are made.
+                assert!(
+                    upstreams.iter()
+                        .all(|(_circuit, client)| client.calls().is_empty())
+                );
+            }
+
+            // Invariant 3 is validated by asserting that in the case of a write
+            // timing out, at least one upstream was tried more than once.
+            //
+            // This works because the number of distinct upstreams that will be
+            // requested is small enough that the timeout happens after having
+            // attempted each at least once.
+            if matches!(res, Err(RpcWriteError::Timeout(_) | RpcWriteError::PartialWrite {..})) {
+                assert!(upstreams.iter().any(|(_circuit, client)| client.calls().len() > 1));
+            }
+
+            // Invariant 4 is upheld by ensuring at least R upstreams returned
+            // success if the overall write succeeded, otherwise the result is
+            // an error.
+            let acks = upstreams.iter()
+                .filter(|(_circuit, client)| client.success_count() == 1)
+                .count();
+            assert_eq!(res.is_ok(), acks >= n_copies);
+
+
+            // Invariant 5 is validated by ensuring each mock only returned at
+            // most one Ok response.
+            //
+            // This property should hold regardless of the overall write
+            // succeeding or failing.
+            assert!(
+                upstreams.iter()
+                    .all(|(_circuit, client)| client.success_count() <= 1)
+            );
+
+
+            // Invariant 6 is validated by ensuring all clients with unhealthy
+            // circuits never see a write request.
+            assert!(
+                upstreams.iter()
+                    .filter(|(circuit, _client)| !circuit.is_healthy())
+                    .all(|(_circuit, client)| client.calls().is_empty())
+            );
+        }
+    }
 }

From dc27ae5fbfbc72005d7c40ce12484d8d5830e184 Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Mon, 8 May 2023 15:33:11 +0200
Subject: [PATCH 095/119] refactor: eliminate impossible error

Because the number of candidate upstreams is checked to exceed the
number of desired data copies before starting the write loop, and
because the parallelism of the write loop matches the number of desired
data copies, it's not possible for any thread to observe an empty
snapshot.

This commit removes the unreachable error condition for clarity.
---
 router/src/dml_handlers/rpc_write.rs | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index a210085ed5..a65d366303 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -235,9 +235,6 @@ where
                     // meaningful error for the user is "there's no healthy
                     // upstreams".
                     RpcWriteError::Client(_) => RpcWriteError::NoHealthyUpstreams,
-                    // The number of upstreams no longer satisfies the desired
-                    // replication factor.
-                    RpcWriteError::NoHealthyUpstreams => RpcWriteError::NotEnoughReplicas,
                     // All other errors pass through.
                     v => v,
                 }
@@ -267,6 +264,11 @@ where
 ///
 /// If at least one upstream request has failed (returning an error), the most
 /// recent error is returned.
+///
+/// # Panics
+///
+/// This function panics if `endpoints.next()` returns [`None`] (the number of
+/// upstreams should be validated before starting the write loop).
 async fn write_loop<T>(
     endpoints: &mut UpstreamSnapshot<'_, T>,
     req: &WriteRequest,
@@ -282,9 +284,15 @@ where
         // request succeeds or this async call times out.
         let mut delay = Duration::from_millis(50);
         loop {
+            // Because the number of candidate upstreams is validated to be
+            // greater-than-or-equal-to the number of desired data copies before
+            // starting the write loop, and because the parallelism of the write
+            // loop matches the number of desired data copies, it's not possible
+            // for any thread to observe an empty snapshot, because transitively
+            // the number of upstreams matches or exceeds the parallelism.
             match endpoints
                 .next()
-                .ok_or(RpcWriteError::NoHealthyUpstreams)?
+                .expect("not enough replicas in snapshot to satisfy replication factor")
                 .write(req.clone())
                 .await
             {

From cdaf99268c4c911c73d32ffe372ff147789d1d2d Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Tue, 9 May 2023 15:21:10 +0200
Subject: [PATCH 096/119] refactor: owned client in UpstreamSnapshot

Changes then UpstreamSnapshot to return owned clients, instead of
references to those clients.

This will allow the snapshot to have a 'static lifetime, suitable for
use across tasks.
---
 router/src/dml_handlers/rpc_write.rs          |  4 ++--
 router/src/dml_handlers/rpc_write/balancer.rs | 14 +++++------
 router/src/dml_handlers/rpc_write/client.rs   | 12 ++++++++++
 .../rpc_write/upstream_snapshot.rs            | 23 +++++++++++--------
 4 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index a65d366303..329c0d0760 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -270,11 +270,11 @@ where
 /// This function panics if `endpoints.next()` returns [`None`] (the number of
 /// upstreams should be validated before starting the write loop).
 async fn write_loop<T>(
-    endpoints: &mut UpstreamSnapshot<'_, T>,
+    endpoints: &mut UpstreamSnapshot<T>,
     req: &WriteRequest,
 ) -> Result<(), RpcWriteError>
 where
-    T: WriteClient,
+    T: WriteClient + Clone,
 {
     // The last error returned from an upstream write request attempt.
     let mut last_err = None;
diff --git a/router/src/dml_handlers/rpc_write/balancer.rs b/router/src/dml_handlers/rpc_write/balancer.rs
index fecf664722..6ab55c5d63 100644
--- a/router/src/dml_handlers/rpc_write/balancer.rs
+++ b/router/src/dml_handlers/rpc_write/balancer.rs
@@ -36,7 +36,7 @@ const METRIC_EVAL_INTERVAL: Duration = Duration::from_secs(3);
 /// threads) an approximately uniform distribution is achieved.
 #[derive(Debug)]
 pub(super) struct Balancer<T, C = CircuitBreaker> {
-    endpoints: Arc<[CircuitBreakingClient<T, C>]>,
+    endpoints: Arc<[Arc<CircuitBreakingClient<T, C>>]>,
 
     /// An optional metric exporter task that evaluates the state of this
     /// [`Balancer`] every [`METRIC_EVAL_INTERVAL`].
@@ -54,7 +54,7 @@ where
         endpoints: impl IntoIterator<Item = CircuitBreakingClient<T, C>>,
         metrics: Option<&metric::Registry>,
     ) -> Self {
-        let endpoints = endpoints.into_iter().collect();
+        let endpoints = endpoints.into_iter().map(Arc::new).collect();
         Self {
             metric_task: metrics.map(|m| tokio::spawn(metric_task(m, Arc::clone(&endpoints)))),
             endpoints,
@@ -73,7 +73,7 @@ where
     /// evaluated at this point and the result is returned to the caller as an
     /// infinite / cycling iterator. A node that becomes unavailable after the
     /// snapshot was taken will continue to be returned by the iterator.
-    pub(super) fn endpoints(&self) -> Option<UpstreamSnapshot<'_, CircuitBreakingClient<T, C>>> {
+    pub(super) fn endpoints(&self) -> Option<UpstreamSnapshot<Arc<CircuitBreakingClient<T, C>>>> {
         // Grab and increment the current counter.
         let counter = COUNTER.with(|cell| {
             let mut cell = cell.borrow_mut();
@@ -96,7 +96,7 @@ where
         let mut healthy = Vec::with_capacity(self.endpoints.len());
         for e in &*self.endpoints {
             if e.is_healthy() {
-                healthy.push(e);
+                healthy.push(Arc::clone(e));
                 continue;
             }
 
@@ -104,7 +104,7 @@ where
             // probe request - therefore it is added to the front of the
             // iter/request queue.
             if probe.is_none() && e.should_probe() {
-                probe = Some(e);
+                probe = Some(Arc::clone(e));
             }
         }
 
@@ -128,7 +128,7 @@ where
 /// health evaluation future that updates it.
 fn metric_task<T, C>(
     metrics: &metric::Registry,
-    endpoints: Arc<[CircuitBreakingClient<T, C>]>,
+    endpoints: Arc<[Arc<CircuitBreakingClient<T, C>>]>,
 ) -> impl Future<Output = ()> + Send
 where
     T: Send + Sync + 'static,
@@ -144,7 +144,7 @@ where
 
 async fn metric_loop<T, C>(
     metric: metric::Metric<U64Gauge>,
-    endpoints: Arc<[CircuitBreakingClient<T, C>]>,
+    endpoints: Arc<[Arc<CircuitBreakingClient<T, C>>]>,
 ) where
     T: Send + Sync + 'static,
     C: CircuitBreakerState + 'static,
diff --git a/router/src/dml_handlers/rpc_write/client.rs b/router/src/dml_handlers/rpc_write/client.rs
index b39bb3ab1d..21c6b7b1ca 100644
--- a/router/src/dml_handlers/rpc_write/client.rs
+++ b/router/src/dml_handlers/rpc_write/client.rs
@@ -1,5 +1,7 @@
 //! Abstraction over RPC client
 
+use std::sync::Arc;
+
 use async_trait::async_trait;
 use generated_types::influxdata::iox::ingester::v1::{
     write_service_client::WriteServiceClient, WriteRequest,
@@ -26,6 +28,16 @@ pub(super) trait WriteClient: Send + Sync + std::fmt::Debug {
     async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError>;
 }
 
+#[async_trait]
+impl<T> WriteClient for Arc<T>
+where
+    T: WriteClient,
+{
+    async fn write(&self, op: WriteRequest) -> Result<(), RpcWriteClientError> {
+        (**self).write(op).await
+    }
+}
+
 /// An implementation of [`WriteClient`] for the tonic gRPC client.
 #[async_trait]
 impl WriteClient for WriteServiceClient<tonic::transport::Channel> {
diff --git a/router/src/dml_handlers/rpc_write/upstream_snapshot.rs b/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
index b33c5289c6..f545bbbbfd 100644
--- a/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
+++ b/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
@@ -6,12 +6,12 @@ use smallvec::SmallVec;
 /// The last yielded element can be removed from the iterator by calling
 /// [`UpstreamSnapshot::remove_last_unstable()`].
 #[derive(Debug)]
-pub(super) struct UpstreamSnapshot<'a, C> {
-    clients: SmallVec<[&'a C; 3]>,
+pub(super) struct UpstreamSnapshot<C> {
+    clients: SmallVec<[C; 3]>,
     idx: usize,
 }
 
-impl<'a, C> UpstreamSnapshot<'a, C> {
+impl<C> UpstreamSnapshot<C> {
     /// Initialise a new snapshot, yielding the 0-indexed `i`-th element of
     /// `clients` next (or wrapping around if `i` is out-of-bounds).
     ///
@@ -19,8 +19,8 @@ impl<'a, C> UpstreamSnapshot<'a, C> {
     /// allocation during construction.
     ///
     /// If `clients` is empty, this method returns [`None`].
-    pub(super) fn new(clients: impl Iterator<Item = &'a C>, i: usize) -> Option<Self> {
-        let clients: SmallVec<[&'a C; 3]> = clients.collect();
+    pub(super) fn new(clients: impl Iterator<Item = C>, i: usize) -> Option<Self> {
+        let clients: SmallVec<[C; 3]> = clients.collect();
         if clients.is_empty() {
             return None;
         }
@@ -63,15 +63,18 @@ impl<'a, C> UpstreamSnapshot<'a, C> {
     }
 }
 
-impl<'a, C> Iterator for UpstreamSnapshot<'a, C> {
-    type Item = &'a C;
+impl<C> Iterator for UpstreamSnapshot<C>
+where
+    C: Clone,
+{
+    type Item = C;
 
     fn next(&mut self) -> Option<Self::Item> {
         if self.clients.is_empty() {
             return None;
         }
         self.idx = self.idx.wrapping_add(1);
-        Some(self.clients[self.idx()])
+        Some(self.clients[self.idx()].clone())
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
@@ -233,8 +236,8 @@ mod tests {
 
     #[test]
     fn test_empty_snap() {
-        assert!(UpstreamSnapshot::<usize>::new([].iter(), 0).is_none());
-        assert!(UpstreamSnapshot::<usize>::new([].iter(), 1).is_none());
+        assert!(UpstreamSnapshot::<usize>::new(std::iter::empty(), 0).is_none());
+        assert!(UpstreamSnapshot::<usize>::new(std::iter::empty(), 1).is_none());
     }
 
     proptest! {

From bf93014bb7c7e69c220603f4e60a0bebed2cc38a Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Tue, 9 May 2023 15:22:20 +0200
Subject: [PATCH 097/119] feat: concurrent lending iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changes the UpstreamSnapshot to be suitable for concurrent use. This
type contains the core logic to enable a caller to uphold the
responsibility of ensuring replicated writes land on distinct ingesters
in the presence of concurrent replication.

The clients within the snapshot are returned to at most one concurrent
caller at a time, by tracking the state of each client as a FSM:

                        ┌────────────────┐
                     ┌─▶│   Available    │
                     │  └────────────────┘
                     │           │
                   drop       next()
                     │           │
                     │           ▼
                     │  ┌────────────────┐
                     └──│    Yielded     │
                        └────────────────┘
                                 │
                              remove
                                 │
                                 ▼
                        ┌────────────────┐
                        │      Used      │
                        └────────────────┘

Once a client has been yielded it will not be yielded again until it is
dropped (transitioning the FSM from "yielded" to "available" again,
returning it to the candidate pool of clients) or removed (transitioning
to "used", permanently preventing it from being yielded to another
caller).
---
 router/src/dml_handlers/rpc_write.rs          |  26 +-
 .../rpc_write/upstream_snapshot.rs            | 533 +++++++++++++++---
 2 files changed, 459 insertions(+), 100 deletions(-)

diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index 329c0d0760..5ed5755474 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -209,7 +209,7 @@ where
 
         // Validate the required number of writes is possible given the current
         // number of healthy endpoints.
-        if snap.len() < self.n_copies {
+        if snap.initial_len() < self.n_copies {
             return Err(RpcWriteError::NotEnoughReplicas);
         }
 
@@ -239,9 +239,6 @@ where
                     v => v,
                 }
             })?;
-            // Remove the upstream that was successfully wrote to from the
-            // candidates
-            snap.remove_last_unstable();
         }
 
         debug!(
@@ -274,7 +271,7 @@ async fn write_loop<T>(
     req: &WriteRequest,
 ) -> Result<(), RpcWriteError>
 where
-    T: WriteClient + Clone,
+    T: WriteClient,
 {
     // The last error returned from an upstream write request attempt.
     let mut last_err = None;
@@ -290,18 +287,25 @@ where
             // loop matches the number of desired data copies, it's not possible
             // for any thread to observe an empty snapshot, because transitively
             // the number of upstreams matches or exceeds the parallelism.
-            match endpoints
+            let client = endpoints
                 .next()
-                .expect("not enough replicas in snapshot to satisfy replication factor")
-                .write(req.clone())
-                .await
-            {
-                Ok(()) => return Ok(()),
+                .expect("not enough replicas in snapshot to satisfy replication factor");
+
+            match client.write(req.clone()).await {
+                Ok(()) => {
+                    endpoints.remove(client);
+                    return Ok(());
+                }
                 Err(e) => {
                     warn!(error=%e, "failed ingester rpc write");
                     last_err = Some(e);
                 }
             };
+
+            // Drop the client so that it is returned to the UpstreamSet and may
+            // be retried by another thread before the sleep expires.
+            drop(client);
+
             tokio::time::sleep(delay).await;
             delay = delay.saturating_mul(2);
         }
diff --git a/router/src/dml_handlers/rpc_write/upstream_snapshot.rs b/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
index f545bbbbfd..fdcfcac87f 100644
--- a/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
+++ b/router/src/dml_handlers/rpc_write/upstream_snapshot.rs
@@ -1,14 +1,168 @@
+use std::{ops::Deref, sync::Arc};
+
+use parking_lot::Mutex;
 use smallvec::SmallVec;
 
+/// Possible states of a single client `C` in an [`UpstreamSnapshot`].
+///
+/// ```text
+///                         ┌────────────────┐
+///                      ┌─▶│   Available    │
+///                      │  └────────────────┘
+///                      │           │
+///                    drop       next()
+///                      │           │
+///                      │           ▼
+///                      │  ┌────────────────┐
+///                      └──│    Yielded     │
+///                         └────────────────┘
+///                                  │
+///                               remove
+///                                  │
+///                                  ▼
+///                         ┌────────────────┐
+///                         │      Used      │
+///                         └────────────────┘
+/// ```
+///
+/// When the [`UpstreamSnapshot`] is initialised, all `C` are in the
+/// [`UpstreamState::Available`] state. Once a given `C` is lent out, its slot
+/// in the [`UpstreamSnapshot`] is replaced with [`UpstreamState::Yielded`],
+/// indicating it is lent out, and dropping the reference to `C` (making it
+/// impossible to lend out again!).
+///
+/// Once the caller drops the `C` they were yielded, the state returns to
+/// [`UpstreamState::Available`] to be lent out again.
+///
+/// Once a `C` is removed from the snapshot by calling
+/// [`UpstreamSnapshot::remove()`], the slot is transitioned to
+/// [`UpstreamState::Used`] to indicate it cannot be reused.
+///
+#[derive(Debug)]
+enum UpstreamState<C> {
+    /// The given instance of `C` has not been returned to the caller yet, or
+    /// has been dropped by the caller without calling
+    /// [`UpstreamSnapshot::remove()`] first.
+    Available(C),
+
+    /// The instance of `C` is currently lent to the caller.
+    ///
+    /// The yielded `C` has yet not been dropped, or removed from the
+    /// [`UpstreamSnapshot`].
+    Yielded,
+
+    /// The given `C` has been "used" and removed by a call to
+    /// [`UpstreamSnapshot::remove()`].
+    ///
+    /// It cannot be returned to the caller again.
+    Used,
+}
+
+impl<C> UpstreamState<C> {
+    fn unwrap(self) -> C {
+        match self {
+            UpstreamState::Available(v) => v,
+            UpstreamState::Used | UpstreamState::Yielded => {
+                panic!("unwrap an unavailable upstream state")
+            }
+        }
+    }
+}
+
+/// A smart-pointer dereferencing to a reference to `C`.
+///
+/// The [`UpstreamSnapshot`] ensures that only one [`Upstream`]-wrapped
+/// reference to `C` is ever available at any one time. Dropping an instance of
+/// [`Upstream`] returns the `C` it contains back to the [`UpstreamSnapshot`] it
+/// came from, allowing it to be lent out to another caller.
+///
+/// To permanently remove this `C` from the [`UpstreamSnapshot`], pass it to
+/// [`UpstreamSnapshot::remove()`].
+#[derive(Debug)]
+pub(super) struct Upstream<C> {
+    /// The instance of `C` lent out from the snapshot.
+    ///
+    /// This option is always [`Some`] until dropped or removed from the
+    /// snapshot, at which point it is set to [`None`].
+    ///
+    /// As an optimisation, do not attempt to acquire the set mutex to check if
+    /// this [`Upstream`] has been marked as [`UpstreamState::Used`] before
+    /// setting [`UpstreamState::Available`] if this option is [`None`] to
+    /// reduce lock contention.
+    inner: Option<C>,
+
+    /// The set of clients from which this `C` has been borrowed.
+    state: Arc<Mutex<SharedState<C>>>,
+
+    /// The index into `set` at which this `C` can be found.
+    idx: usize,
+}
+
+impl<C> Deref for Upstream<C> {
+    type Target = C;
+
+    fn deref(&self) -> &Self::Target {
+        self.inner.as_ref().unwrap()
+    }
+}
+
+impl<C> Drop for Upstream<C> {
+    fn drop(&mut self) {
+        let inner = match self.inner.take() {
+            Some(v) => v,
+            None => return,
+        };
+        *self.state.lock().clients.get_mut(self.idx).unwrap() = UpstreamState::Available(inner);
+    }
+}
+
+/// Mutable state shared between clones of a single [`UpstreamSnapshot`].
+#[derive(Debug)]
+struct SharedState<C> {
+    /// The set of `C` for this [`UpstreamSnapshot`].
+    clients: SmallVec<[UpstreamState<C>; 3]>,
+
+    /// The current cursor index for this snapshot instance.
+    idx: usize,
+}
+
+impl<C> SharedState<C> {
+    #[inline(always)]
+    fn current_idx(&self) -> usize {
+        self.idx % self.clients.len()
+    }
+}
+
 /// An infinite cycling iterator, yielding the 0-indexed `i`-th element first
 /// (modulo wrapping).
 ///
-/// The last yielded element can be removed from the iterator by calling
-/// [`UpstreamSnapshot::remove_last_unstable()`].
-#[derive(Debug)]
+/// The [`UpstreamSnapshot`] contains a set of `C`, maintaining an invariant
+/// that writes to `C` do not happen concurrently, by yielding each `C` wrapped
+/// in an [`Upstream`] to exactly one caller at a time.
+///
+/// Combined with the ability to remove a `C` from the set returned by the
+/// [`UpstreamSnapshot`], the caller can ensure that once a write has been
+/// successfully accepted by `C`, no further write attempts are made to it.
+///
+/// Cloning this [`UpstreamSnapshot`] allows it to be shared across thread /
+/// task boundaries, while internally referencing the same set of `C` and
+/// co-ordinating the state of each across each cloned [`UpstreamSnapshot`].
+///
+/// This allows concurrent replication of writes to N ingesters synchronise
+/// using clones of this [`UpstreamSnapshot`], causing each write to land on a
+/// distinct ingester.
+///
+/// If all `C` are currently lent out, this iterator yields [`None`]. If a `C`
+/// is then returned, then the iterator will return [`Some`] at the next poll.
+#[derive(Debug, Clone)]
 pub(super) struct UpstreamSnapshot<C> {
-    clients: SmallVec<[C; 3]>,
-    idx: usize,
+    /// The mutable state shared between each cloned copy of this
+    /// [`UpstreamSnapshot`] instance.
+    state: Arc<Mutex<SharedState<C>>>,
+
+    /// The length of `state.clients` to avoid locking to read this static
+    /// value.
+    len: usize,
 }
 
 impl<C> UpstreamSnapshot<C> {
@@ -20,99 +174,137 @@ impl<C> UpstreamSnapshot<C> {
     ///
     /// If `clients` is empty, this method returns [`None`].
     pub(super) fn new(clients: impl Iterator<Item = C>, i: usize) -> Option<Self> {
-        let clients: SmallVec<[C; 3]> = clients.collect();
+        let clients: SmallVec<[UpstreamState<C>; 3]> =
+            clients.map(UpstreamState::Available).collect();
         if clients.is_empty() {
             return None;
         }
         Some(Self {
-            clients,
-            // So first call is the ith element even after the inc in next().
-            idx: i.wrapping_sub(1),
+            len: clients.len(),
+            state: Arc::new(Mutex::new(SharedState {
+                // So first call is the ith element even after the inc in next().
+                idx: i.wrapping_sub(1),
+                clients,
+            })),
         })
     }
 
-    /// Remove the last yielded upstream from this snapshot.
+    /// Consume the given `upstream` from the [`UpstreamSnapshot`], taking
+    /// ownership of it from the caller, and preventing it from being yielded
+    /// again.
     ///
-    /// # Ordering
+    /// # Panics
     ///
-    /// Calling this method MAY change the order of the yielded elements but
-    /// MUST maintain equal visit counts across all elements.
-    ///
-    /// # Correctness
-    ///
-    /// If called before [`UpstreamSnapshot`] has yielded any elements, this MAY
-    /// remove an arbitrary element from the snapshot.
-    #[allow(unused)]
-    pub(super) fn remove_last_unstable(&mut self) {
-        self.clients.swap_remove(self.idx());
-        // Try the element now in the idx position next.
-        self.idx = self.idx.wrapping_sub(1);
+    /// Panics if `upstream` was not obtained from this [`UpstreamSnapshot`].
+    pub(super) fn remove(&self, mut upstream: Upstream<C>) {
+        // Ensure the `upstream` was yielded from this set.
+        assert!(
+            Arc::ptr_eq(&self.state, &upstream.state),
+            "remove from disjoint sets"
+        );
+
+        let old = std::mem::replace(
+            &mut self.state.lock().clients[upstream.idx],
+            UpstreamState::Used,
+        );
+
+        // Invariant: any upstream being removed must have been yielded by the
+        // iterator - the type system should enforce this as the Upstream does
+        // not implement Clone, and this fn took ownership.
+        assert!(matches!(old, UpstreamState::Yielded));
+
+        // Prevent the drop impl from setting the state to "available" again.
+        upstream.inner = None;
+        drop(upstream); // explicit drop for clarity w.r.t the above
     }
 
-    /// Returns the number of clients in this [`UpstreamSnapshot`].
+    /// Returns the number of clients in this [`UpstreamSnapshot`] when
+    /// constructed.
     ///
-    /// This value decreases as upstreams are removed by calls to
-    /// [`UpstreamSnapshot::remove_last_unstable()`].
-    pub(super) fn len(&self) -> usize {
-        self.clients.len()
-    }
-
-    #[inline(always)]
-    fn idx(&self) -> usize {
-        self.idx % self.clients.len()
+    /// If [`UpstreamSnapshot::remove()`] has been called since construction,
+    /// this iterator will yield fewer distinct `C` than this returned number.
+    pub(super) fn initial_len(&self) -> usize {
+        self.len
     }
 }
 
-impl<C> Iterator for UpstreamSnapshot<C>
-where
-    C: Clone,
-{
-    type Item = C;
+impl<C> Iterator for UpstreamSnapshot<C> {
+    type Item = Upstream<C>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        if self.clients.is_empty() {
-            return None;
-        }
-        self.idx = self.idx.wrapping_add(1);
-        Some(self.clients[self.idx()].clone())
-    }
+        // Obtain the client set mutex outside the loop as the overhead of
+        // acquiring the contended mutex is likely to outweigh the actual loop
+        // critical section cost - it's better to not yield control until an
+        // element has been found in the (fast) linear search to avoid
+        // contention.
+        let mut guard = self.state.lock();
 
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (0, Some(self.len()))
+        // Remember where in the client array this first attempt was.
+        let start_idx = guard.current_idx();
+
+        loop {
+            // Move along the client array.
+            guard.idx = guard.idx.wrapping_add(1);
+
+            // Find the array index of this next client.
+            let current_idx = guard.current_idx();
+
+            // If this C is available, mark it as lent out and yield it to the
+            // caller.
+            let v = guard.clients.get_mut(current_idx).unwrap();
+            if matches!(v, UpstreamState::Available(_)) {
+                let got = std::mem::replace(v, UpstreamState::Yielded).unwrap();
+                return Some(Upstream {
+                    inner: Some(got),
+                    idx: current_idx,
+                    state: Arc::clone(&self.state),
+                });
+            }
+
+            // Otherwise ensure the loop doesn't continue forever.
+            //
+            // Once all the elements have been visited once, the loop should
+            // end. If there's no available upstream now, then this request will
+            // never be satisfiable; another thread holds another upstream may
+            // return it to the pool for this thread to acquire, but the other
+            // thread would then fail to find a free upstream.
+            if current_idx == start_idx {
+                return None;
+            }
+        }
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::{
+        iter,
+        sync::atomic::{AtomicUsize, Ordering},
+        time::Duration,
+    };
 
     use proptest::proptest;
+    use test_helpers::timeout::FutureTimeout;
+    use tokio::sync::mpsc;
 
     use super::*;
 
     #[test]
-    fn test_size_hint() {
+    fn test_len() {
         let elements = [
             AtomicUsize::new(0),
             AtomicUsize::new(0),
             AtomicUsize::new(0),
         ];
 
-        let mut snap = UpstreamSnapshot::new(elements.iter(), 0)
+        let snap = UpstreamSnapshot::new(elements.iter(), 0)
             .expect("non-empty element set should yield snapshot");
 
-        assert_eq!(snap.len(), 3);
+        assert_eq!(snap.initial_len(), 3);
 
         let (min, max) = snap.size_hint();
         assert_eq!(min, 0);
-        assert_eq!(max, Some(3));
-
-        snap.remove_last_unstable(); // Arbitrary element removed
-
-        let (min, max) = snap.size_hint();
-        assert_eq!(min, 0);
-        assert_eq!(max, Some(2));
-        assert_eq!(snap.len(), 2);
+        assert_eq!(max, None);
     }
 
     #[test]
@@ -120,21 +312,21 @@ mod tests {
         let elements = [1, 2, 3];
 
         assert_eq!(
-            *UpstreamSnapshot::new(elements.iter(), 0)
+            **UpstreamSnapshot::new(elements.iter(), 0)
                 .expect("non-empty element set should yield snapshot")
                 .next()
                 .expect("should yield value"),
             1
         );
         assert_eq!(
-            *UpstreamSnapshot::new(elements.iter(), 1)
+            **UpstreamSnapshot::new(elements.iter(), 1)
                 .expect("non-empty element set should yield snapshot")
                 .next()
                 .expect("should yield value"),
             2
         );
         assert_eq!(
-            *UpstreamSnapshot::new(elements.iter(), 2)
+            **UpstreamSnapshot::new(elements.iter(), 2)
                 .expect("non-empty element set should yield snapshot")
                 .next()
                 .expect("should yield value"),
@@ -143,7 +335,7 @@ mod tests {
 
         // Wraparound
         assert_eq!(
-            *UpstreamSnapshot::new(elements.iter(), 3)
+            **UpstreamSnapshot::new(elements.iter(), 3)
                 .expect("non-empty element set should yield snapshot")
                 .next()
                 .expect("should yield value"),
@@ -184,36 +376,59 @@ mod tests {
         {
             let mut snap = UpstreamSnapshot::new(elements.iter(), 0)
                 .expect("non-empty element set should yield snapshot");
-            assert_eq!(snap.next(), Some(&1));
-            snap.remove_last_unstable();
-            assert_eq!(snap.next(), Some(&3)); // Not 2 - unstable remove!
-            assert_eq!(snap.next(), Some(&2));
-            assert_eq!(snap.next(), Some(&3));
+
+            assert_eq!(snap.initial_len(), 3);
+
+            let item = snap.next().unwrap();
+            assert_eq!(**item, 1);
+            assert_eq!(item.idx, 0);
+            snap.remove(item);
+
+            // Removing is stable - it does not permute the item order.
+            assert_eq!(snap.next().as_deref(), Some(&&2));
+            assert_eq!(snap.next().as_deref(), Some(&&3));
+            assert_eq!(snap.next().as_deref(), Some(&&2));
+            assert_eq!(snap.next().as_deref(), Some(&&3));
+            assert_eq!(snap.initial_len(), 3);
         }
 
         // Second element removed
         {
             let mut snap = UpstreamSnapshot::new(elements.iter(), 0)
                 .expect("non-empty element set should yield snapshot");
-            assert_eq!(snap.next(), Some(&1));
-            assert_eq!(snap.next(), Some(&2));
-            snap.remove_last_unstable();
-            assert_eq!(snap.next(), Some(&3));
-            assert_eq!(snap.next(), Some(&1));
-            assert_eq!(snap.next(), Some(&3));
+            assert_eq!(snap.next().as_deref(), Some(&&1));
+
+            assert_eq!(snap.initial_len(), 3);
+
+            let item = snap.next().unwrap();
+            assert_eq!(**item, 2);
+            assert_eq!(item.idx, 1);
+            snap.remove(item);
+
+            assert_eq!(snap.next().as_deref(), Some(&&3));
+            assert_eq!(snap.next().as_deref(), Some(&&1));
+            assert_eq!(snap.next().as_deref(), Some(&&3));
+            assert_eq!(snap.initial_len(), 3);
         }
 
         // Last element removed
         {
             let mut snap = UpstreamSnapshot::new(elements.iter(), 0)
                 .expect("non-empty element set should yield snapshot");
-            assert_eq!(snap.next(), Some(&1));
-            assert_eq!(snap.next(), Some(&2));
-            assert_eq!(snap.next(), Some(&3));
-            snap.remove_last_unstable();
-            assert_eq!(snap.next(), Some(&1));
-            assert_eq!(snap.next(), Some(&2));
-            assert_eq!(snap.next(), Some(&1));
+            assert_eq!(snap.next().as_deref(), Some(&&1));
+            assert_eq!(snap.next().as_deref(), Some(&&2));
+
+            assert_eq!(snap.initial_len(), 3);
+
+            let item = snap.next().unwrap();
+            assert_eq!(**item, 3);
+            assert_eq!(item.idx, 2);
+            snap.remove(item);
+
+            assert_eq!(snap.next().as_deref(), Some(&&1));
+            assert_eq!(snap.next().as_deref(), Some(&&2));
+            assert_eq!(snap.next().as_deref(), Some(&&1));
+            assert_eq!(snap.initial_len(), 3);
         }
     }
 
@@ -223,21 +438,30 @@ mod tests {
         let mut snap = UpstreamSnapshot::new(elements.iter(), 0)
             .expect("non-empty element set should yield snapshot");
 
-        assert_eq!(snap.len(), 1);
+        assert_eq!(snap.initial_len(), 1);
 
-        assert_eq!(snap.next(), Some(&42));
-        assert_eq!(snap.next(), Some(&42));
-        snap.remove_last_unstable();
-        assert_eq!(snap.next(), None);
-        assert_eq!(snap.next(), None);
+        let item = snap.next().unwrap();
+        snap.remove(item);
+        assert!(snap.next().is_none());
+        assert!(snap.next().is_none());
 
-        assert_eq!(snap.len(), 0);
+        assert_eq!(snap.initial_len(), 1);
     }
 
     #[test]
     fn test_empty_snap() {
-        assert!(UpstreamSnapshot::<usize>::new(std::iter::empty(), 0).is_none());
-        assert!(UpstreamSnapshot::<usize>::new(std::iter::empty(), 1).is_none());
+        assert!(UpstreamSnapshot::<usize>::new(iter::empty(), 0).is_none());
+        assert!(UpstreamSnapshot::<usize>::new(iter::empty(), 1).is_none());
+    }
+
+    #[test]
+    #[should_panic(expected = "remove from disjoint sets")]
+    fn test_upstream_from_disjoint_sets() {
+        let mut set_a = UpstreamSnapshot::new([1].iter(), 0).unwrap();
+        let set_b = UpstreamSnapshot::new([1].iter(), 0).unwrap();
+
+        let item = set_a.next().unwrap();
+        set_b.remove(item); // Oops - removing a from b!
     }
 
     proptest! {
@@ -273,5 +497,136 @@ mod tests {
                 .into_iter()
                 .for_each(|v| assert_eq!(v.load(Ordering::Relaxed), complete_iters));
         }
+
+        /// Assert the set yields any item exactly once at any one time.
+        #[test]
+        fn prop_upstream_yield_exactly_once(
+            complete_iters in (2_usize..5),
+            set_size in (1_usize..5),
+            offset in (1_usize..10),
+            hold_idx in (0_usize..100),
+        ) {
+            let elements = (0..set_size).map(|_| Arc::new(AtomicUsize::new(0))).collect::<Vec<_>>();
+
+            // Create a snapshot and iterate over it the specified number of
+            // times.
+            {
+                let mut snap = UpstreamSnapshot::new(elements.iter(), offset)
+                    .expect("non-empty element set should yield snapshot");
+
+                // Take the specified index out of the set and hold onto it.
+                let hold = snap.clone().nth(hold_idx).unwrap();
+
+                // Now iterate over the snapshot and increment the counter of
+                // each yielded upstream.
+                //
+                // Iterate exactly N times over M-1 elements remaining in the
+                // set.
+                let count = (elements.len() - 1) * complete_iters;
+                for _ in 0..count {
+                    snap.next()
+                        .expect("should cycle forever")
+                        .fetch_add(1, Ordering::Relaxed);
+                }
+
+                // Nothing incremented the element we were holding onto.
+                assert_eq!(hold.load(Ordering::Relaxed), 0);
+
+                // Store the expected count so there's a simple check below that
+                // all the non-0 elements have the same expected count value.
+                hold.store(complete_iters, Ordering::Relaxed);
+            }
+
+            // Assert all elements were visited exactly complete_iters number of
+            // times.
+            elements
+                .into_iter()
+                .for_each(|v| assert_eq!(v.load(Ordering::Relaxed), complete_iters));
+        }
+    }
+
+    /// Ensure two concurrent callers obtain two different elements.
+    #[tokio::test]
+    async fn test_concurrent_callers_disjoint_elements() {
+        let elements = (0..2).collect::<Vec<_>>();
+
+        // Create a snapshot and iterate over it the specified number of
+        // times.
+        let snap = UpstreamSnapshot::<_>::new(elements.clone().into_iter(), 0)
+            .expect("non-empty element set should yield snapshot");
+
+        let (tx, mut rx) = mpsc::channel(2);
+
+        tokio::spawn({
+            let mut snap = snap.clone();
+            let tx = tx.clone();
+            async move {
+                let got = snap.next().unwrap();
+                tx.send(*got).await.unwrap();
+            }
+        });
+
+        tokio::spawn({
+            let mut snap = snap.clone();
+            let tx = tx.clone();
+            async move {
+                let got = snap.next().unwrap();
+                tx.send(*got).await.unwrap();
+            }
+        });
+
+        let a = rx.recv().await.unwrap();
+        let b = rx.recv().await.unwrap();
+
+        assert!((a == 0) ^ (b == 0));
+        assert!((a == 1) ^ (b == 1));
+    }
+
+    /// When N concurrent callers attempt to obtain one of N-1 elements, exactly
+    /// one thread must observe [`None`].
+    #[tokio::test]
+    async fn test_all_yielded() {
+        const N: usize = 3;
+
+        let elements = (0..(N - 1)).collect::<Vec<_>>();
+
+        // Create a snapshot and iterate over it the specified number of
+        // times.
+        let snap = UpstreamSnapshot::<_>::new(elements.clone().into_iter(), 0)
+            .expect("non-empty element set should yield snapshot");
+
+        let (tx, mut rx) = mpsc::channel(N);
+
+        // One more thread than elements
+        for _ in 0..N {
+            tokio::spawn({
+                let mut snap = snap.clone();
+                let tx = tx.clone();
+                async move {
+                    let got = snap.next();
+                    tx.send(got.as_ref().map(|v| **v)).await.unwrap();
+
+                    // Do not "drop" the Upstream wrapper, in effect holding
+                    // onto the item forever, ensuring this thread doesn't
+                    // return the item to the snapshot for another thread to
+                    // grab.
+                    std::mem::forget(got);
+                }
+            });
+        }
+
+        let mut saw_nones = 0;
+        for _ in 0..N {
+            let v = rx
+                .recv()
+                .with_timeout_panic(Duration::from_secs(5))
+                .await
+                .expect("exactly N channel writes should occur");
+            if v.is_none() {
+                saw_nones += 1;
+            }
+        }
+
+        assert_eq!(saw_nones, 1);
     }
 }

From dfe1a7dec8e996468803f21ba4b421767308af4f Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Tue, 9 May 2023 15:40:54 +0200
Subject: [PATCH 098/119] perf(router): parallel write replication

This commit changes the write replication loop to concurrently write to
N distinct upstream ingesters, instead of the previous sequential logic.
---
 router/src/dml_handlers/rpc_write.rs | 57 ++++++++++++++++++----------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/router/src/dml_handlers/rpc_write.rs b/router/src/dml_handlers/rpc_write.rs
index 5ed5755474..c95a2c7a5a 100644
--- a/router/src/dml_handlers/rpc_write.rs
+++ b/router/src/dml_handlers/rpc_write.rs
@@ -19,6 +19,7 @@ use super::{DmlHandler, Partitioned};
 use async_trait::async_trait;
 use data_types::{NamespaceName, NamespaceSchema, TableId};
 use dml::{DmlMeta, DmlWrite};
+use futures::{stream::FuturesUnordered, StreamExt};
 use generated_types::influxdata::iox::ingester::v1::WriteRequest;
 use hashbrown::HashMap;
 use mutable_batch::MutableBatch;
@@ -202,7 +203,7 @@ where
 
         // Obtain a snapshot of currently-healthy upstreams (and potentially
         // some that need probing)
-        let mut snap = self
+        let snap = self
             .endpoints
             .endpoints()
             .ok_or(RpcWriteError::NoHealthyUpstreams)?;
@@ -213,32 +214,48 @@ where
             return Err(RpcWriteError::NotEnoughReplicas);
         }
 
-        // Write the desired number of copies of `req`.
-        for i in 0..self.n_copies {
-            // Perform the gRPC write to an ingester.
-            //
-            // This call is bounded to at most RPC_TIMEOUT duration of time.
-            write_loop(&mut snap, &req).await.map_err(|e| {
-                // In all cases, if at least one write succeeded, then this
-                // becomes a partial write error.
-                if i > 0 {
-                    return RpcWriteError::PartialWrite {
+        // Concurrently write to the required number of replicas to reach the
+        // desired replication factor.
+        let mut result_stream = (0..self.n_copies)
+            .map(|_| {
+                // Acquire a request-scoped snapshot that synchronises with
+                // other clone instances to uphold the disjoint replica hosts
+                // invariant.
+                let mut snap = snap.clone();
+                let req = req.clone();
+                async move { write_loop(&mut snap, &req).await }
+            })
+            .collect::<FuturesUnordered<_>>()
+            .enumerate();
+
+        // Consume the result stream, eagerly returning if an error is observed.
+        //
+        // Because partial writes have different semantics to outright failures
+        // (principally that you may expect your write to turn up in queries, even though
+        // the overall request failed), return a PartialWrite error if at least
+        // one write success has been observed. This is best-effort! It's always
+        // possible that PartialWrite is not returned, even though a partial
+        // write has occurred (for example, the next result in the stream is an
+        // already-completed write ACK).
+        while let Some((i, res)) = result_stream.next().await {
+            match res {
+                Ok(_) => {}
+                Err(_e) if i > 0 => {
+                    // In all cases, if at least one write succeeded, then this
+                    // becomes a partial write error.
+                    return Err(RpcWriteError::PartialWrite {
                         want_n_copies: self.n_copies,
                         acks: i,
-                    };
+                    });
                 }
-
-                // This error was for the first request - there have been no
-                // ACKs received.
-                match e {
+                Err(RpcWriteError::Client(_)) => {
                     // This error is an internal implementation detail - the
                     // meaningful error for the user is "there's no healthy
                     // upstreams".
-                    RpcWriteError::Client(_) => RpcWriteError::NoHealthyUpstreams,
-                    // All other errors pass through.
-                    v => v,
+                    return Err(RpcWriteError::NoHealthyUpstreams);
                 }
-            })?;
+                Err(e) => return Err(e),
+            }
         }
 
         debug!(

From 4c7f96ead85862aff64034a7bdf4e0739b7c76a6 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 12 May 2023 11:27:41 -0400
Subject: [PATCH 099/119] fix: Remove unused delete predicate proto conversion
 code

---
 generated_types/src/delete_predicate.rs | 151 ------------------------
 generated_types/src/lib.rs              |   2 -
 2 files changed, 153 deletions(-)
 delete mode 100644 generated_types/src/delete_predicate.rs

diff --git a/generated_types/src/delete_predicate.rs b/generated_types/src/delete_predicate.rs
deleted file mode 100644
index 6fefc157dd..0000000000
--- a/generated_types/src/delete_predicate.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-//! Code to serialize and deserialize certain expressions.
-//!
-//! Note that [Ballista] also provides a serialization using [Protocol Buffers 3]. However the
-//! protocol is meant as a communication channel between workers and clients of Ballista, not for
-//! long term preservation. For IOx we need a more stable solution. Luckily we only need to support
-//! a very small subset of expression.
-//!
-//! [Ballista]: https://github.com/apache/arrow-datafusion/blob/22fcb3d7a68a56afbe12eab9e7d98f7b8de33703/ballista/rust/core/proto/ballista.proto
-//! [Protocol Buffers 3]: https://developers.google.com/protocol-buffers/docs/proto3
-
-use crate::google::{FieldViolation, FromOptionalField, FromRepeatedField, OptionalField};
-use crate::influxdata::iox::predicate::v1 as proto;
-use crate::influxdata::iox::predicate::v1::scalar::Value;
-use crate::influxdata::iox::predicate::v1::{Expr, Predicate};
-use data_types::{DeleteExpr, DeletePredicate, Op, Scalar, TimestampRange};
-
-impl From<DeletePredicate> for proto::Predicate {
-    fn from(predicate: DeletePredicate) -> Self {
-        proto::Predicate {
-            range: Some(proto::TimestampRange {
-                start: predicate.range.start(),
-                end: predicate.range.end(),
-            }),
-            exprs: predicate.exprs.into_iter().map(Into::into).collect(),
-        }
-    }
-}
-
-impl TryFrom<proto::Predicate> for DeletePredicate {
-    type Error = FieldViolation;
-
-    fn try_from(value: Predicate) -> Result<Self, Self::Error> {
-        let range = value.range.unwrap_field("range")?;
-
-        Ok(Self {
-            range: TimestampRange::new(range.start, range.end),
-            exprs: value.exprs.repeated("exprs")?,
-        })
-    }
-}
-
-impl TryFrom<proto::Expr> for DeleteExpr {
-    type Error = FieldViolation;
-
-    fn try_from(value: Expr) -> Result<Self, Self::Error> {
-        Ok(Self {
-            column: value.column,
-            op: proto::Op::from_i32(value.op).required("op")?,
-            scalar: value.scalar.required("scalar")?,
-        })
-    }
-}
-
-impl From<DeleteExpr> for proto::Expr {
-    fn from(expr: DeleteExpr) -> Self {
-        Self {
-            column: expr.column,
-            op: proto::Op::from(expr.op).into(),
-            scalar: Some(expr.scalar.into()),
-        }
-    }
-}
-
-impl TryFrom<proto::Scalar> for Scalar {
-    type Error = FieldViolation;
-
-    fn try_from(value: proto::Scalar) -> Result<Self, Self::Error> {
-        Ok(value.value.unwrap_field("value")?.into())
-    }
-}
-
-impl From<proto::scalar::Value> for Scalar {
-    fn from(value: Value) -> Self {
-        match value {
-            Value::ValueBool(v) => Self::Bool(v),
-            Value::ValueI64(v) => Self::I64(v),
-            Value::ValueF64(v) => Self::F64(v.into()),
-            Value::ValueString(v) => Self::String(v),
-        }
-    }
-}
-
-impl From<Scalar> for proto::Scalar {
-    fn from(value: Scalar) -> Self {
-        let value = match value {
-            Scalar::Bool(v) => Value::ValueBool(v),
-            Scalar::I64(v) => Value::ValueI64(v),
-            Scalar::F64(v) => Value::ValueF64(v.0),
-            Scalar::String(v) => Value::ValueString(v),
-        };
-
-        Self { value: Some(value) }
-    }
-}
-
-impl TryFrom<proto::Op> for Op {
-    type Error = FieldViolation;
-
-    fn try_from(value: proto::Op) -> Result<Self, Self::Error> {
-        match value {
-            proto::Op::Unspecified => Err(FieldViolation::required("")),
-            proto::Op::Eq => Ok(Self::Eq),
-            proto::Op::Ne => Ok(Self::Ne),
-        }
-    }
-}
-
-impl From<Op> for proto::Op {
-    fn from(value: Op) -> Self {
-        match value {
-            Op::Eq => Self::Eq,
-            Op::Ne => Self::Ne,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_roundtrip() {
-        let round_trip = |expr: DeleteExpr| {
-            let serialized: proto::Expr = expr.clone().into();
-            let deserialized: DeleteExpr = serialized.try_into().unwrap();
-            assert_eq!(expr, deserialized);
-        };
-
-        round_trip(DeleteExpr {
-            column: "foo".to_string(),
-            op: Op::Eq,
-            scalar: Scalar::Bool(true),
-        });
-
-        round_trip(DeleteExpr {
-            column: "bar".to_string(),
-            op: Op::Ne,
-            scalar: Scalar::I64(-1),
-        });
-        round_trip(DeleteExpr {
-            column: "baz".to_string(),
-            op: Op::Eq,
-            scalar: Scalar::F64((-1.1).into()),
-        });
-        round_trip(DeleteExpr {
-            column: "col".to_string(),
-            op: Op::Eq,
-            scalar: Scalar::String("foo".to_string()),
-        });
-    }
-}
diff --git a/generated_types/src/lib.rs b/generated_types/src/lib.rs
index b718f51a44..73c3b8085f 100644
--- a/generated_types/src/lib.rs
+++ b/generated_types/src/lib.rs
@@ -242,8 +242,6 @@ pub mod google;
 #[cfg(any(feature = "data_types_conversions", test))]
 pub mod compactor;
 #[cfg(any(feature = "data_types_conversions", test))]
-pub mod delete_predicate;
-#[cfg(any(feature = "data_types_conversions", test))]
 pub mod ingester;
 
 pub use prost::{DecodeError, EncodeError};

From 1770d0f4d8e5a9b3a82dca488eb14174f023de98 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 12 May 2023 11:35:29 -0400
Subject: [PATCH 100/119] fix: Move ingester-querier gRPC communication to its
 own crate

---
 Cargo.lock                                    | 32 +++++++--
 Cargo.toml                                    |  1 +
 generated_types/Cargo.toml                    |  8 +--
 generated_types/build.rs                      |  1 -
 generated_types/src/lib.rs                    |  2 -
 influxdb_iox/Cargo.toml                       |  1 +
 influxdb_iox/src/commands/query_ingester.rs   | 13 ++--
 .../tests/end_to_end_cases/ingester.rs        |  2 +-
 .../querier/multi_ingester.rs                 |  2 +-
 influxdb_iox_client/src/client/flight/mod.rs  |  5 +-
 ingester/Cargo.toml                           |  1 +
 ingester/src/server/grpc/query.rs             |  2 +-
 ingester/tests/write.rs                       |  2 +-
 ingester_query_grpc/Cargo.toml                | 26 +++++++
 ingester_query_grpc/build.rs                  | 56 +++++++++++++++
 .../influxdata/iox/ingester/v1/query.proto    |  0
 .../src/lib.rs                                | 69 ++++++++++++++++++-
 ingester_test_ctx/Cargo.toml                  |  1 +
 ingester_test_ctx/src/lib.rs                  |  4 +-
 querier/Cargo.toml                            |  2 +-
 querier/src/ingester/circuit_breaker.rs       |  7 +-
 querier/src/ingester/flight_client.rs         |  5 +-
 querier/src/ingester/invalidate_on_error.rs   |  2 +-
 querier/src/ingester/mod.rs                   | 10 +--
 test_helpers_end_to_end/Cargo.toml            |  1 +
 test_helpers_end_to_end/src/mini_cluster.rs   |  4 +-
 workspace-hack/Cargo.toml                     |  1 +
 27 files changed, 213 insertions(+), 47 deletions(-)
 create mode 100644 ingester_query_grpc/Cargo.toml
 create mode 100644 ingester_query_grpc/build.rs
 rename {generated_types => ingester_query_grpc}/protos/influxdata/iox/ingester/v1/query.proto (100%)
 rename generated_types/src/ingester.rs => ingester_query_grpc/src/lib.rs (79%)

diff --git a/Cargo.lock b/Cargo.lock
index 4c16a937d3..e6a944b68f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2037,16 +2037,12 @@ dependencies = [
  "base64 0.21.0",
  "bytes",
  "data_types",
- "datafusion",
- "datafusion-proto",
  "observability_deps",
  "pbjson",
  "pbjson-build",
  "pbjson-types",
- "predicate",
  "prost",
  "prost-build",
- "query_functions",
  "serde",
  "snafu",
  "tonic",
@@ -2565,6 +2561,7 @@ dependencies = [
  "influxdb_iox_client",
  "influxdb_storage_client",
  "influxrpc_parser",
+ "ingester_query_grpc",
  "insta",
  "iox_catalog",
  "iox_query",
@@ -2698,6 +2695,7 @@ dependencies = [
  "generated_types",
  "hashbrown 0.13.2",
  "influxdb_iox_client",
+ "ingester_query_grpc",
  "ingester_test_ctx",
  "iox_catalog",
  "iox_query",
@@ -2732,6 +2730,27 @@ dependencies = [
  "workspace-hack",
 ]
 
+[[package]]
+name = "ingester_query_grpc"
+version = "0.1.0"
+dependencies = [
+ "base64 0.21.0",
+ "data_types",
+ "datafusion",
+ "datafusion-proto",
+ "pbjson",
+ "pbjson-build",
+ "pbjson-types",
+ "predicate",
+ "prost",
+ "prost-build",
+ "query_functions",
+ "serde",
+ "snafu",
+ "tonic-build",
+ "workspace-hack",
+]
+
 [[package]]
 name = "ingester_test_ctx"
 version = "0.1.0"
@@ -2746,6 +2765,7 @@ dependencies = [
  "hashbrown 0.13.2",
  "influxdb_iox_client",
  "ingester",
+ "ingester_query_grpc",
  "iox_catalog",
  "iox_query",
  "iox_time",
@@ -4426,8 +4446,8 @@ dependencies = [
  "datafusion",
  "datafusion_util",
  "futures",
- "generated_types",
  "influxdb_iox_client",
+ "ingester_query_grpc",
  "insta",
  "iox_catalog",
  "iox_query",
@@ -5612,6 +5632,7 @@ dependencies = [
  "http",
  "hyper",
  "influxdb_iox_client",
+ "ingester_query_grpc",
  "iox_catalog",
  "mutable_batch_lp",
  "mutable_batch_pb",
@@ -6751,6 +6772,7 @@ dependencies = [
  "sha2",
  "similar",
  "smallvec",
+ "sqlparser",
  "sqlx",
  "sqlx-core",
  "sqlx-macros",
diff --git a/Cargo.toml b/Cargo.toml
index 5705441e4d..3adbedec21 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,6 +30,7 @@ members = [
     "influxrpc_parser",
     "ingester_test_ctx",
     "ingester",
+    "ingester_query_grpc",
     "iox_catalog",
     "iox_data_generator",
     "iox_query_influxql",
diff --git a/generated_types/Cargo.toml b/generated_types/Cargo.toml
index da22a1201d..21d67c1924 100644
--- a/generated_types/Cargo.toml
+++ b/generated_types/Cargo.toml
@@ -9,14 +9,10 @@ license.workspace = true
 base64 = "0.21"
 bytes = "1.4"
 data_types = { path = "../data_types", optional = true }
-datafusion = { workspace = true, optional = true }
-datafusion-proto = { workspace = true, optional = true }
 observability_deps = { path = "../observability_deps" }
 pbjson = "0.5"
 pbjson-types = "0.5"
-predicate = { path = "../predicate", optional = true }
 prost = "0.11"
-query_functions = { path = "../query_functions" }
 serde = { version = "1.0", features = ["derive"] }
 snafu = "0.7"
 tonic = { workspace = true }
@@ -29,9 +25,7 @@ pbjson-build = "0.5"
 
 [dev-dependencies]
 data_types = { path = "../data_types" }
-datafusion = { workspace = true }
-predicate = { path = "../predicate" }
 
 [features]
 default = ["data_types_conversions"]
-data_types_conversions = ["data_types", "datafusion", "datafusion-proto", "predicate"]
+data_types_conversions = ["data_types"]
diff --git a/generated_types/build.rs b/generated_types/build.rs
index de783dd931..feef1964ca 100644
--- a/generated_types/build.rs
+++ b/generated_types/build.rs
@@ -53,7 +53,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
         compactor_path.join("service.proto"),
         delete_path.join("service.proto"),
         ingester_path.join("parquet_metadata.proto"),
-        ingester_path.join("query.proto"),
         ingester_path.join("write.proto"),
         ingester_path.join("replication.proto"),
         ingester_path.join("persist.proto"),
diff --git a/generated_types/src/lib.rs b/generated_types/src/lib.rs
index 73c3b8085f..e981c40590 100644
--- a/generated_types/src/lib.rs
+++ b/generated_types/src/lib.rs
@@ -241,8 +241,6 @@ pub mod google;
 
 #[cfg(any(feature = "data_types_conversions", test))]
 pub mod compactor;
-#[cfg(any(feature = "data_types_conversions", test))]
-pub mod ingester;
 
 pub use prost::{DecodeError, EncodeError};
 
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 0e0927b6e9..e3b5c51db5 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -19,6 +19,7 @@ import = { path = "../import" }
 influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] }
 influxdb_storage_client = { path = "../influxdb_storage_client" }
 influxrpc_parser = { path = "../influxrpc_parser"}
+ingester_query_grpc = { path = "../ingester_query_grpc" }
 iox_catalog = { path = "../iox_catalog" }
 ioxd_common = { path = "../ioxd_common"}
 ioxd_compactor = { path = "../ioxd_compactor"}
diff --git a/influxdb_iox/src/commands/query_ingester.rs b/influxdb_iox/src/commands/query_ingester.rs
index 20f404a207..e1a757ba28 100644
--- a/influxdb_iox/src/commands/query_ingester.rs
+++ b/influxdb_iox/src/commands/query_ingester.rs
@@ -1,12 +1,9 @@
 use arrow_flight::Ticket;
 use futures::TryStreamExt;
-use generated_types::ingester::{
-    decode_proto_predicate_from_base64, DecodeProtoPredicateFromBase64Error,
-};
-use influxdb_iox_client::{
-    connection::Connection,
-    flight::{self},
-    format::QueryOutputFormat,
+use influxdb_iox_client::{connection::Connection, format::QueryOutputFormat};
+use ingester_query_grpc::{
+    decode_proto_predicate_from_base64, influxdata::iox::ingester::v1::IngesterQueryRequest,
+    DecodeProtoPredicateFromBase64Error,
 };
 use prost::Message;
 use std::str::FromStr;
@@ -73,7 +70,7 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
         None
     };
 
-    let request = flight::generated_types::IngesterQueryRequest {
+    let request = IngesterQueryRequest {
         table_id,
         columns,
         predicate,
diff --git a/influxdb_iox/tests/end_to_end_cases/ingester.rs b/influxdb_iox/tests/end_to_end_cases/ingester.rs
index c3dc5034b0..ddae96c165 100644
--- a/influxdb_iox/tests/end_to_end_cases/ingester.rs
+++ b/influxdb_iox/tests/end_to_end_cases/ingester.rs
@@ -2,8 +2,8 @@ use arrow_flight::{error::FlightError, Ticket};
 use arrow_util::assert_batches_sorted_eq;
 use data_types::{NamespaceId, TableId};
 use futures::FutureExt;
-use generated_types::{influxdata::iox::ingester::v1 as proto, ingester::IngesterQueryRequest};
 use http::StatusCode;
+use ingester_query_grpc::{influxdata::iox::ingester::v1 as proto, IngesterQueryRequest};
 use prost::Message;
 use test_helpers_end_to_end::{maybe_skip_integration, MiniCluster, Step, StepTest, StepTestState};
 
diff --git a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
index 1c6ccd7dbd..1680b01c52 100644
--- a/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
+++ b/influxdb_iox/tests/end_to_end_cases/querier/multi_ingester.rs
@@ -1,6 +1,6 @@
 use arrow_util::assert_batches_sorted_eq;
 use futures::FutureExt;
-use generated_types::{influxdata::iox::ingester::v1 as proto, ingester::IngesterQueryRequest};
+use ingester_query_grpc::{influxdata::iox::ingester::v1 as proto, IngesterQueryRequest};
 use std::num::NonZeroUsize;
 use test_helpers_end_to_end::{
     maybe_skip_integration, MiniCluster, Step, StepTest, StepTestState, TestConfig,
diff --git a/influxdb_iox_client/src/client/flight/mod.rs b/influxdb_iox_client/src/client/flight/mod.rs
index 0d5abad11b..d4c66a1272 100644
--- a/influxdb_iox_client/src/client/flight/mod.rs
+++ b/influxdb_iox_client/src/client/flight/mod.rs
@@ -21,10 +21,7 @@ use crate::connection::Connection;
 
 /// Re-export generated_types
 pub mod generated_types {
-    pub use generated_types::influxdata::iox::{
-        ingester::v1::{IngesterQueryRequest, IngesterQueryResponseMetadata, Predicate},
-        querier::v1::*,
-    };
+    pub use generated_types::influxdata::iox::querier::v1::*;
 }
 
 /// Error responses when querying an IOx namespace using the IOx Flight API.
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 7a1bdc77a6..2574f80d12 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -22,6 +22,7 @@ flatbuffers = "23.1.21"
 futures = "0.3.28"
 generated_types = { version = "0.1.0", path = "../generated_types" }
 hashbrown.workspace = true
+ingester_query_grpc = { path = "../ingester_query_grpc" }
 iox_catalog = { version = "0.1.0", path = "../iox_catalog" }
 iox_query = { version = "0.1.0", path = "../iox_query" }
 iox_time = { path = "../iox_time" }
diff --git a/ingester/src/server/grpc/query.rs b/ingester/src/server/grpc/query.rs
index de013c7406..6e3a97d437 100644
--- a/ingester/src/server/grpc/query.rs
+++ b/ingester/src/server/grpc/query.rs
@@ -9,7 +9,7 @@ use arrow_flight::{
 use data_types::{NamespaceId, PartitionId, TableId};
 use flatbuffers::FlatBufferBuilder;
 use futures::{Stream, StreamExt, TryStreamExt};
-use generated_types::influxdata::iox::ingester::v1 as proto;
+use ingester_query_grpc::influxdata::iox::ingester::v1 as proto;
 use metric::U64Counter;
 use observability_deps::tracing::*;
 use prost::Message;
diff --git a/ingester/tests/write.rs b/ingester/tests/write.rs
index ddc0fc1686..930e433461 100644
--- a/ingester/tests/write.rs
+++ b/ingester/tests/write.rs
@@ -1,7 +1,7 @@
 use arrow_util::assert_batches_sorted_eq;
 use assert_matches::assert_matches;
 use data_types::PartitionKey;
-use influxdb_iox_client::flight::generated_types::IngesterQueryRequest;
+use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
 use ingester_test_ctx::TestContextBuilder;
 use iox_catalog::interface::Catalog;
 use metric::{DurationHistogram, U64Histogram};
diff --git a/ingester_query_grpc/Cargo.toml b/ingester_query_grpc/Cargo.toml
new file mode 100644
index 0000000000..35b9623e52
--- /dev/null
+++ b/ingester_query_grpc/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "ingester_query_grpc"
+version.workspace = true
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies] # In alphabetical order
+base64 = "0.21"
+data_types = { path = "../data_types" }
+datafusion = { workspace = true }
+datafusion-proto = { workspace = true }
+pbjson = "0.5"
+pbjson-types = "0.5"
+predicate = { path = "../predicate" }
+prost = "0.11"
+query_functions = { path = "../query_functions" }
+serde = { version = "1.0", features = ["derive"] }
+snafu = "0.7"
+workspace-hack = { version = "0.1", path = "../workspace-hack" }
+
+[build-dependencies] # In alphabetical order
+tonic-build = { workspace = true }
+prost-build = "0.11"
+pbjson-build = "0.5"
+
diff --git a/ingester_query_grpc/build.rs b/ingester_query_grpc/build.rs
new file mode 100644
index 0000000000..3ac0647812
--- /dev/null
+++ b/ingester_query_grpc/build.rs
@@ -0,0 +1,56 @@
+//! Compiles Protocol Buffers into native Rust types.
+
+use std::env;
+use std::path::{Path, PathBuf};
+
+type Error = Box<dyn std::error::Error>;
+type Result<T, E = Error> = std::result::Result<T, E>;
+
+fn main() -> Result<()> {
+    let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
+
+    generate_grpc_types(&root)?;
+
+    Ok(())
+}
+
+/// Schema used with IOx specific Ingester-Querier gRPC requests
+///
+/// Creates:
+///
+/// - `influxdata.iox.ingester.v1.rs`
+fn generate_grpc_types(root: &Path) -> Result<()> {
+    let ingester_path = root.join("influxdata/iox/ingester/v1");
+
+    let proto_files = vec![ingester_path.join("query.proto")];
+
+    // Tell cargo to recompile if any of these proto files are changed
+    for proto_file in &proto_files {
+        println!("cargo:rerun-if-changed={}", proto_file.display());
+    }
+
+    let mut config = prost_build::Config::new();
+
+    config
+        .compile_well_known_types()
+        .disable_comments([".google"])
+        .extern_path(".google.protobuf", "::pbjson_types")
+        .btree_map([
+            ".influxdata.iox.ingester.v1.IngesterQueryResponseMetadata.unpersisted_partitions",
+        ]);
+
+    let descriptor_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("proto_descriptor.bin");
+    tonic_build::configure()
+        .file_descriptor_set_path(&descriptor_path)
+        // protoc in ubuntu builder needs this option
+        .protoc_arg("--experimental_allow_proto3_optional")
+        .compile_with_config(config, &proto_files, &[root])?;
+
+    let descriptor_set = std::fs::read(descriptor_path)?;
+
+    pbjson_build::Builder::new()
+        .register_descriptors(&descriptor_set)?
+        .build(&[".influxdata.iox"])?;
+
+    Ok(())
+}
diff --git a/generated_types/protos/influxdata/iox/ingester/v1/query.proto b/ingester_query_grpc/protos/influxdata/iox/ingester/v1/query.proto
similarity index 100%
rename from generated_types/protos/influxdata/iox/ingester/v1/query.proto
rename to ingester_query_grpc/protos/influxdata/iox/ingester/v1/query.proto
diff --git a/generated_types/src/ingester.rs b/ingester_query_grpc/src/lib.rs
similarity index 79%
rename from generated_types/src/ingester.rs
rename to ingester_query_grpc/src/lib.rs
index e38b09a6e3..8c6b4f1a0e 100644
--- a/generated_types/src/ingester.rs
+++ b/ingester_query_grpc/src/lib.rs
@@ -1,4 +1,10 @@
-use crate::{google::FieldViolation, influxdata::iox::ingester::v1 as proto};
+// This crate deliberately does not use the same linting rules as the other
+// crates because of all the generated code it contains that we don't have much
+// control over.
+#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls)]
+#![allow(clippy::derive_partial_eq_without_eq, clippy::needless_borrow)]
+
+use crate::influxdata::iox::ingester::v1 as proto;
 use base64::{prelude::BASE64_STANDARD, Engine};
 use data_types::{NamespaceId, TableId, TimestampRange};
 use datafusion::{common::DataFusionError, prelude::Expr};
@@ -7,6 +13,67 @@ use predicate::{Predicate, ValueExpr};
 use prost::Message;
 use snafu::{ResultExt, Snafu};
 
+/// This module imports the generated protobuf code into a Rust module
+/// hierarchy that matches the namespace hierarchy of the protobuf
+/// definitions
+pub mod influxdata {
+    pub mod iox {
+        pub mod ingester {
+            pub mod v1 {
+                include!(concat!(env!("OUT_DIR"), "/influxdata.iox.ingester.v1.rs"));
+                include!(concat!(
+                    env!("OUT_DIR"),
+                    "/influxdata.iox.ingester.v1.serde.rs"
+                ));
+            }
+        }
+    }
+}
+
+/// Error returned if a request field has an invalid value. Includes
+/// machinery to add parent field names for context -- thus it will
+/// report `rules.write_timeout` than simply `write_timeout`.
+#[derive(Debug, Default, Clone, PartialEq)]
+pub struct FieldViolation {
+    pub field: String,
+    pub description: String,
+}
+
+impl FieldViolation {
+    pub fn required(field: impl Into<String>) -> Self {
+        Self {
+            field: field.into(),
+            description: "Field is required".to_string(),
+        }
+    }
+
+    /// Re-scopes this error as the child of another field
+    pub fn scope(self, field: impl Into<String>) -> Self {
+        let field = if self.field.is_empty() {
+            field.into()
+        } else {
+            [field.into(), self.field].join(".")
+        };
+
+        Self {
+            field,
+            description: self.description,
+        }
+    }
+}
+
+impl std::error::Error for FieldViolation {}
+
+impl std::fmt::Display for FieldViolation {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "Violation for field \"{}\": {}",
+            self.field, self.description
+        )
+    }
+}
+
 fn expr_to_bytes_violation(field: impl Into<String>, e: DataFusionError) -> FieldViolation {
     FieldViolation {
         field: field.into(),
diff --git a/ingester_test_ctx/Cargo.toml b/ingester_test_ctx/Cargo.toml
index c85ea97082..57ce7dd861 100644
--- a/ingester_test_ctx/Cargo.toml
+++ b/ingester_test_ctx/Cargo.toml
@@ -16,6 +16,7 @@ generated_types = { version = "0.1.0", path = "../generated_types" }
 hashbrown.workspace = true
 influxdb_iox_client = { path = "../influxdb_iox_client" }
 ingester = { path = "../ingester" }
+ingester_query_grpc = { path = "../ingester_query_grpc" }
 iox_catalog = { version = "0.1.0", path = "../iox_catalog" }
 iox_query = { version = "0.1.0", path = "../iox_query" }
 iox_time = { path = "../iox_time" }
diff --git a/ingester_test_ctx/src/lib.rs b/ingester_test_ctx/src/lib.rs
index 83a7ab5b7e..86865f2265 100644
--- a/ingester_test_ctx/src/lib.rs
+++ b/ingester_test_ctx/src/lib.rs
@@ -24,8 +24,8 @@ use futures::{stream::FuturesUnordered, FutureExt, StreamExt, TryStreamExt};
 use generated_types::influxdata::iox::ingester::v1::{
     write_service_server::WriteService, WriteRequest,
 };
-use influxdb_iox_client::flight;
 use ingester::{IngesterGuard, IngesterRpcInterface};
+use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
 use iox_catalog::{
     interface::{Catalog, SoftDeletedRows},
     validate_or_insert_schema,
@@ -341,7 +341,7 @@ where
     /// Submit a query to the ingester's public query interface.
     pub async fn query(
         &self,
-        request: flight::generated_types::IngesterQueryRequest,
+        request: IngesterQueryRequest,
     ) -> Result<Vec<RecordBatch>, influxdb_iox_client::flight::Error> {
         let mut bytes = bytes::BytesMut::new();
         prost::Message::encode(&request, &mut bytes)?;
diff --git a/querier/Cargo.toml b/querier/Cargo.toml
index 331abf7a8f..1bcaec9cab 100644
--- a/querier/Cargo.toml
+++ b/querier/Cargo.toml
@@ -17,11 +17,11 @@ data_types = { path = "../data_types" }
 datafusion = { workspace = true }
 datafusion_util = { path = "../datafusion_util" }
 futures = "0.3"
-generated_types = { path = "../generated_types" }
 influxdb_iox_client = { path = "../influxdb_iox_client" }
 iox_catalog = { path = "../iox_catalog" }
 iox_query = { path = "../iox_query" }
 iox_time = { path = "../iox_time" }
+ingester_query_grpc = { path = "../ingester_query_grpc" }
 metric = { path = "../metric" }
 object_store = "0.5.6"
 observability_deps = { path = "../observability_deps" }
diff --git a/querier/src/ingester/circuit_breaker.rs b/querier/src/ingester/circuit_breaker.rs
index c7a7310bf5..dce04f050d 100644
--- a/querier/src/ingester/circuit_breaker.rs
+++ b/querier/src/ingester/circuit_breaker.rs
@@ -12,7 +12,7 @@ use std::{
 
 use async_trait::async_trait;
 use backoff::{Backoff, BackoffConfig};
-use generated_types::ingester::IngesterQueryRequest;
+use ingester_query_grpc::IngesterQueryRequest;
 use iox_time::{Time, TimeProvider};
 use metric::{Metric, Registry, U64Gauge};
 use observability_deps::tracing::{info, warn};
@@ -505,8 +505,9 @@ mod tests {
     use arrow_flight::decode::DecodedPayload;
     use assert_matches::assert_matches;
     use data_types::{NamespaceId, TableId};
-    use generated_types::google::FieldViolation;
-    use influxdb_iox_client::flight::generated_types::IngesterQueryResponseMetadata;
+    use ingester_query_grpc::{
+        influxdata::iox::ingester::v1::IngesterQueryResponseMetadata, FieldViolation,
+    };
     use iox_time::MockProvider;
     use metric::Attributes;
     use test_helpers::maybe_start_logging;
diff --git a/querier/src/ingester/flight_client.rs b/querier/src/ingester/flight_client.rs
index 20fab425a8..fea503b39d 100644
--- a/querier/src/ingester/flight_client.rs
+++ b/querier/src/ingester/flight_client.rs
@@ -5,8 +5,7 @@ use arrow_flight::{
 use async_trait::async_trait;
 use client_util::connection::{self, Connection};
 use futures::StreamExt;
-use generated_types::ingester::IngesterQueryRequest;
-use influxdb_iox_client::flight::generated_types as proto;
+use ingester_query_grpc::{influxdata::iox::ingester::v1 as proto, IngesterQueryRequest};
 use observability_deps::tracing::{debug, warn};
 use prost::Message;
 use snafu::{ResultExt, Snafu};
@@ -33,7 +32,7 @@ pub enum Error {
 
     #[snafu(display("Internal error creating flight request : {}", source))]
     CreatingRequest {
-        source: influxdb_iox_client::google::FieldViolation,
+        source: ingester_query_grpc::FieldViolation,
     },
 
     #[snafu(display("Failed to perform flight request: {}", source))]
diff --git a/querier/src/ingester/invalidate_on_error.rs b/querier/src/ingester/invalidate_on_error.rs
index a5f54f8b55..f862b5bfa9 100644
--- a/querier/src/ingester/invalidate_on_error.rs
+++ b/querier/src/ingester/invalidate_on_error.rs
@@ -1,7 +1,7 @@
 use std::sync::Arc;
 
 use async_trait::async_trait;
-use generated_types::ingester::IngesterQueryRequest;
+use ingester_query_grpc::IngesterQueryRequest;
 use trace::{ctx::SpanContext, span::SpanRecorder};
 
 use crate::ingester::flight_client::{Error as FlightClientError, IngesterFlightClient, QueryData};
diff --git a/querier/src/ingester/mod.rs b/querier/src/ingester/mod.rs
index 30cccd2157..cc3e550504 100644
--- a/querier/src/ingester/mod.rs
+++ b/querier/src/ingester/mod.rs
@@ -17,8 +17,10 @@ use data_types::{
 };
 use datafusion::error::DataFusionError;
 use futures::{stream::FuturesUnordered, TryStreamExt};
-use generated_types::ingester::{encode_proto_predicate_as_base64, IngesterQueryRequest};
-use influxdb_iox_client::flight::generated_types::IngesterQueryResponseMetadata;
+use ingester_query_grpc::{
+    encode_proto_predicate_as_base64, influxdata::iox::ingester::v1::IngesterQueryResponseMetadata,
+    IngesterQueryRequest,
+};
 use iox_query::{
     exec::{stringset::StringSet, IOxSessionContext},
     util::{compute_timenanosecond_min_max, create_basic_summary},
@@ -623,7 +625,7 @@ impl IngesterStreamDecoder {
 }
 
 fn encode_predicate_as_base64(predicate: &Predicate) -> String {
-    use generated_types::influxdata::iox::ingester::v1::Predicate as ProtoPredicate;
+    use ingester_query_grpc::influxdata::iox::ingester::v1::Predicate as ProtoPredicate;
 
     let predicate = match ProtoPredicate::try_from(predicate.clone()) {
         Ok(predicate) => predicate,
@@ -1053,7 +1055,7 @@ mod tests {
     };
     use assert_matches::assert_matches;
     use data_types::TableId;
-    use influxdb_iox_client::flight::generated_types::IngesterQueryResponseMetadata;
+    use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryResponseMetadata;
     use iox_tests::TestCatalog;
     use metric::Attributes;
     use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
diff --git a/test_helpers_end_to_end/Cargo.toml b/test_helpers_end_to_end/Cargo.toml
index ef31b0a402..17e2518f6c 100644
--- a/test_helpers_end_to_end/Cargo.toml
+++ b/test_helpers_end_to_end/Cargo.toml
@@ -18,6 +18,7 @@ generated_types = { path = "../generated_types" }
 http = "0.2.9"
 hyper = "0.14"
 influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] }
+ingester_query_grpc = { path = "../ingester_query_grpc" }
 iox_catalog = { path = "../iox_catalog" }
 mutable_batch_lp = { path = "../mutable_batch_lp" }
 mutable_batch_pb = { path = "../mutable_batch_pb" }
diff --git a/test_helpers_end_to_end/src/mini_cluster.rs b/test_helpers_end_to_end/src/mini_cluster.rs
index 9b7fe9494c..d670b26663 100644
--- a/test_helpers_end_to_end/src/mini_cluster.rs
+++ b/test_helpers_end_to_end/src/mini_cluster.rs
@@ -15,9 +15,11 @@ use http::Response;
 use hyper::Body;
 use influxdb_iox_client::{
     connection::{Connection, GrpcConnection},
-    flight::generated_types::{IngesterQueryRequest, IngesterQueryResponseMetadata},
     schema::generated_types::{schema_service_client::SchemaServiceClient, GetSchemaRequest},
 };
+use ingester_query_grpc::influxdata::iox::ingester::v1::{
+    IngesterQueryRequest, IngesterQueryResponseMetadata,
+};
 use observability_deps::tracing::{debug, info};
 use once_cell::sync::Lazy;
 use prost::Message;
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 0d07e07fff..c4d6e7809a 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -80,6 +80,7 @@ serde_json = { version = "1", features = ["raw_value"] }
 sha2 = { version = "0.10" }
 similar = { version = "2", features = ["inline"] }
 smallvec = { version = "1", default-features = false, features = ["union"] }
+sqlparser = { version = "0.33", features = ["visitor"] }
 sqlx = { version = "0.6", features = ["json", "postgres", "runtime-tokio-rustls", "sqlite", "tls", "uuid"] }
 sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
 thrift = { version = "0.17" }

From 14007808bdab41990f3a1f9047ba52b0a7b1391d Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Fri, 12 May 2023 13:25:49 -0400
Subject: [PATCH 101/119] fix: Move remaining conversions between data types
 and proto into data_types

And have data_types depend on generated_types rather than vice versa.
---
 Cargo.lock                         |  3 +-
 data_types/Cargo.toml              |  1 +
 data_types/src/columns.rs          | 54 ++++++++++++++++++++++++++++
 data_types/src/lib.rs              | 27 ++++++++++++++
 generated_types/Cargo.toml         |  8 -----
 generated_types/src/compactor.rs   | 28 ---------------
 generated_types/src/lib.rs         | 58 ------------------------------
 influxdb_iox_client/Cargo.toml     |  2 +-
 influxdb_storage_client/Cargo.toml |  2 +-
 workspace-hack/Cargo.toml          |  4 ---
 10 files changed, 85 insertions(+), 102 deletions(-)
 delete mode 100644 generated_types/src/compactor.rs

diff --git a/Cargo.lock b/Cargo.lock
index e6a944b68f..7cc75a95e9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1414,6 +1414,7 @@ name = "data_types"
 version = "0.1.0"
 dependencies = [
  "croaring",
+ "generated_types",
  "influxdb-line-protocol",
  "iox_time",
  "observability_deps",
@@ -2036,7 +2037,6 @@ version = "0.1.0"
 dependencies = [
  "base64 0.21.0",
  "bytes",
- "data_types",
  "observability_deps",
  "pbjson",
  "pbjson-build",
@@ -6735,7 +6735,6 @@ dependencies = [
  "hashbrown 0.12.3",
  "hashbrown 0.13.2",
  "heck",
- "hyper",
  "indexmap",
  "io-lifetimes",
  "itertools",
diff --git a/data_types/Cargo.toml b/data_types/Cargo.toml
index 7de4c08e1d..f3c3d4db1a 100644
--- a/data_types/Cargo.toml
+++ b/data_types/Cargo.toml
@@ -10,6 +10,7 @@ license.workspace = true
 croaring = "0.8.1"
 influxdb-line-protocol = { path = "../influxdb_line_protocol" }
 iox_time = { path = "../iox_time" }
+generated_types = { path = "../generated_types" }
 observability_deps = { path = "../observability_deps" }
 once_cell = "1"
 ordered-float = "3"
diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index a4d6bd8fd9..58c2db0913 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -1,6 +1,7 @@
 //! Types having to do with columns.
 
 use super::TableId;
+use generated_types::influxdata::iox::schema::v1 as proto;
 use influxdb_line_protocol::FieldValue;
 use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema};
 use sqlx::postgres::PgHasArrayType;
@@ -305,6 +306,25 @@ pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType {
     }
 }
 
+impl TryFrom<proto::column_schema::ColumnType> for ColumnType {
+    type Error = Box<dyn std::error::Error>;
+
+    fn try_from(value: proto::column_schema::ColumnType) -> Result<Self, Self::Error> {
+        Ok(match value {
+            proto::column_schema::ColumnType::I64 => ColumnType::I64,
+            proto::column_schema::ColumnType::U64 => ColumnType::U64,
+            proto::column_schema::ColumnType::F64 => ColumnType::F64,
+            proto::column_schema::ColumnType::Bool => ColumnType::Bool,
+            proto::column_schema::ColumnType::String => ColumnType::String,
+            proto::column_schema::ColumnType::Time => ColumnType::Time,
+            proto::column_schema::ColumnType::Tag => ColumnType::Tag,
+            proto::column_schema::ColumnType::Unspecified => {
+                return Err("unknown column type".into())
+            }
+        })
+    }
+}
+
 /// Set of columns.
 #[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)]
 #[sqlx(transparent)]
@@ -363,4 +383,38 @@ mod tests {
     fn test_column_set_duplicates() {
         ColumnSet::new([ColumnId::new(1), ColumnId::new(2), ColumnId::new(1)]);
     }
+
+    #[test]
+    fn test_column_schema() {
+        assert_eq!(
+            ColumnType::try_from(proto::column_schema::ColumnType::I64).unwrap(),
+            ColumnType::I64,
+        );
+        assert_eq!(
+            ColumnType::try_from(proto::column_schema::ColumnType::U64).unwrap(),
+            ColumnType::U64,
+        );
+        assert_eq!(
+            ColumnType::try_from(proto::column_schema::ColumnType::F64).unwrap(),
+            ColumnType::F64,
+        );
+        assert_eq!(
+            ColumnType::try_from(proto::column_schema::ColumnType::Bool).unwrap(),
+            ColumnType::Bool,
+        );
+        assert_eq!(
+            ColumnType::try_from(proto::column_schema::ColumnType::String).unwrap(),
+            ColumnType::String,
+        );
+        assert_eq!(
+            ColumnType::try_from(proto::column_schema::ColumnType::Time).unwrap(),
+            ColumnType::Time,
+        );
+        assert_eq!(
+            ColumnType::try_from(proto::column_schema::ColumnType::Tag).unwrap(),
+            ColumnType::Tag,
+        );
+
+        assert!(ColumnType::try_from(proto::column_schema::ColumnType::Unspecified).is_err());
+    }
 }
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index 84a415da8d..73e7bd8847 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -623,6 +623,33 @@ pub struct SkippedCompaction {
     pub limit_num_files_first_in_partition: i64,
 }
 
+use generated_types::influxdata::iox::compactor::v1 as compactor_proto;
+impl From<SkippedCompaction> for compactor_proto::SkippedCompaction {
+    fn from(skipped_compaction: SkippedCompaction) -> Self {
+        let SkippedCompaction {
+            partition_id,
+            reason,
+            skipped_at,
+            estimated_bytes,
+            limit_bytes,
+            num_files,
+            limit_num_files,
+            limit_num_files_first_in_partition,
+        } = skipped_compaction;
+
+        Self {
+            partition_id: partition_id.get(),
+            reason,
+            skipped_at: skipped_at.get(),
+            estimated_bytes,
+            limit_bytes,
+            num_files,
+            limit_num_files,
+            limit_num_files_first_in_partition: Some(limit_num_files_first_in_partition),
+        }
+    }
+}
+
 /// Data for a parquet file reference that has been inserted in the catalog.
 #[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
 pub struct ParquetFile {
diff --git a/generated_types/Cargo.toml b/generated_types/Cargo.toml
index 21d67c1924..26c8729261 100644
--- a/generated_types/Cargo.toml
+++ b/generated_types/Cargo.toml
@@ -8,7 +8,6 @@ license.workspace = true
 [dependencies] # In alphabetical order
 base64 = "0.21"
 bytes = "1.4"
-data_types = { path = "../data_types", optional = true }
 observability_deps = { path = "../observability_deps" }
 pbjson = "0.5"
 pbjson-types = "0.5"
@@ -22,10 +21,3 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
 tonic-build = { workspace = true }
 prost-build = "0.11"
 pbjson-build = "0.5"
-
-[dev-dependencies]
-data_types = { path = "../data_types" }
-
-[features]
-default = ["data_types_conversions"]
-data_types_conversions = ["data_types"]
diff --git a/generated_types/src/compactor.rs b/generated_types/src/compactor.rs
deleted file mode 100644
index 364647fd43..0000000000
--- a/generated_types/src/compactor.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use crate::influxdata::iox::compactor::v1 as proto;
-use data_types::SkippedCompaction;
-
-impl From<SkippedCompaction> for proto::SkippedCompaction {
-    fn from(skipped_compaction: SkippedCompaction) -> Self {
-        let SkippedCompaction {
-            partition_id,
-            reason,
-            skipped_at,
-            estimated_bytes,
-            limit_bytes,
-            num_files,
-            limit_num_files,
-            limit_num_files_first_in_partition,
-        } = skipped_compaction;
-
-        Self {
-            partition_id: partition_id.get(),
-            reason,
-            skipped_at: skipped_at.get(),
-            estimated_bytes,
-            limit_bytes,
-            num_files,
-            limit_num_files,
-            limit_num_files_first_in_partition: Some(limit_num_files_first_in_partition),
-        }
-    }
-}
diff --git a/generated_types/src/lib.rs b/generated_types/src/lib.rs
index e981c40590..db013bad74 100644
--- a/generated_types/src/lib.rs
+++ b/generated_types/src/lib.rs
@@ -145,25 +145,6 @@ pub mod influxdata {
                     env!("OUT_DIR"),
                     "/influxdata.iox.schema.v1.serde.rs"
                 ));
-
-                impl TryFrom<column_schema::ColumnType> for data_types::ColumnType {
-                    type Error = Box<dyn std::error::Error>;
-
-                    fn try_from(value: column_schema::ColumnType) -> Result<Self, Self::Error> {
-                        Ok(match value {
-                            column_schema::ColumnType::I64 => data_types::ColumnType::I64,
-                            column_schema::ColumnType::U64 => data_types::ColumnType::U64,
-                            column_schema::ColumnType::F64 => data_types::ColumnType::F64,
-                            column_schema::ColumnType::Bool => data_types::ColumnType::Bool,
-                            column_schema::ColumnType::String => data_types::ColumnType::String,
-                            column_schema::ColumnType::Time => data_types::ColumnType::Time,
-                            column_schema::ColumnType::Tag => data_types::ColumnType::Tag,
-                            column_schema::ColumnType::Unspecified => {
-                                return Err("unknown column type".into())
-                            }
-                        })
-                    }
-                }
             }
         }
 
@@ -239,9 +220,6 @@ pub use influxdata::platform::storage::*;
 
 pub mod google;
 
-#[cfg(any(feature = "data_types_conversions", test))]
-pub mod compactor;
-
 pub use prost::{DecodeError, EncodeError};
 
 #[cfg(test)]
@@ -263,40 +241,4 @@ mod tests {
         // The URL must start with the type.googleapis.com prefix
         assert!(!protobuf_type_url_eq(STORAGE_SERVICE, STORAGE_SERVICE,));
     }
-
-    #[test]
-    fn test_column_schema() {
-        use influxdata::iox::schema::v1::*;
-
-        assert_eq!(
-            data_types::ColumnType::try_from(column_schema::ColumnType::I64).unwrap(),
-            data_types::ColumnType::I64,
-        );
-        assert_eq!(
-            data_types::ColumnType::try_from(column_schema::ColumnType::U64).unwrap(),
-            data_types::ColumnType::U64,
-        );
-        assert_eq!(
-            data_types::ColumnType::try_from(column_schema::ColumnType::F64).unwrap(),
-            data_types::ColumnType::F64,
-        );
-        assert_eq!(
-            data_types::ColumnType::try_from(column_schema::ColumnType::Bool).unwrap(),
-            data_types::ColumnType::Bool,
-        );
-        assert_eq!(
-            data_types::ColumnType::try_from(column_schema::ColumnType::String).unwrap(),
-            data_types::ColumnType::String,
-        );
-        assert_eq!(
-            data_types::ColumnType::try_from(column_schema::ColumnType::Time).unwrap(),
-            data_types::ColumnType::Time,
-        );
-        assert_eq!(
-            data_types::ColumnType::try_from(column_schema::ColumnType::Tag).unwrap(),
-            data_types::ColumnType::Tag,
-        );
-
-        assert!(data_types::ColumnType::try_from(column_schema::ColumnType::Unspecified).is_err());
-    }
 }
diff --git a/influxdb_iox_client/Cargo.toml b/influxdb_iox_client/Cargo.toml
index 85425cbceb..63e8cc00e8 100644
--- a/influxdb_iox_client/Cargo.toml
+++ b/influxdb_iox_client/Cargo.toml
@@ -19,7 +19,7 @@ client_util = { path = "../client_util" }
 comfy-table = { version = "6.1", default-features = false}
 futures-util = { version = "0.3" }
 influxdb-line-protocol = { path = "../influxdb_line_protocol"}
-generated_types = { path = "../generated_types", default-features = false, features = ["data_types_conversions"] }
+generated_types = { path = "../generated_types" }
 prost = "0.11"
 rand = "0.8.3"
 reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
diff --git a/influxdb_storage_client/Cargo.toml b/influxdb_storage_client/Cargo.toml
index 28a6c958b2..fdd3a8c9e7 100644
--- a/influxdb_storage_client/Cargo.toml
+++ b/influxdb_storage_client/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 
 [dependencies]
 client_util = { path = "../client_util" }
-generated_types = { path = "../generated_types", default-features=false, features=["data_types"] }
+generated_types = { path = "../generated_types" }
 prost = "0.11"
 tonic = { workspace = true }
 futures-util = { version = "0.3" }
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index c4d6e7809a..6d1980e007 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -155,7 +155,6 @@ url = { version = "2" }
 uuid = { version = "1", features = ["v4"] }
 
 [target.x86_64-unknown-linux-gnu.dependencies]
-hyper = { version = "0.14", features = ["full"] }
 io-lifetimes = { version = "1" }
 nix = { version = "0.26" }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
@@ -167,7 +166,6 @@ once_cell = { version = "1", default-features = false, features = ["unstable"] }
 rustix = { version = "0.37", features = ["fs", "termios"] }
 
 [target.x86_64-apple-darwin.dependencies]
-hyper = { version = "0.14", features = ["full"] }
 io-lifetimes = { version = "1" }
 nix = { version = "0.26" }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
@@ -179,7 +177,6 @@ once_cell = { version = "1", default-features = false, features = ["unstable"] }
 rustix = { version = "0.37", features = ["fs", "termios"] }
 
 [target.aarch64-apple-darwin.dependencies]
-hyper = { version = "0.14", features = ["full"] }
 io-lifetimes = { version = "1" }
 nix = { version = "0.26" }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
@@ -191,7 +188,6 @@ once_cell = { version = "1", default-features = false, features = ["unstable"] }
 rustix = { version = "0.37", features = ["fs", "termios"] }
 
 [target.x86_64-pc-windows-msvc.dependencies]
-hyper = { version = "0.14", features = ["full"] }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
 scopeguard = { version = "1" }
 winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }

From 4c30e7e04d6f8c99be77a28d7b7145bb39e0f784 Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Wed, 3 May 2023 13:16:22 -0700
Subject: [PATCH 102/119] refactor: Authorizer trait should have a single
 interface for requested permissions() * returns an intersection of
 requested_perms and actual perms_on_token * returns ok if any of the
 requested_perms is within the actual perms_on_token

---
 authz/src/lib.rs                              | 49 +++++++++----------
 .../server/http/write/single_tenant/auth.rs   |  4 +-
 service_grpc_flight/src/lib.rs                |  8 +--
 3 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/authz/src/lib.rs b/authz/src/lib.rs
index 6dcfdaa157..316ddd7494 100644
--- a/authz/src/lib.rs
+++ b/authz/src/lib.rs
@@ -55,11 +55,11 @@ pub trait Authorizer: std::fmt::Debug + Send + Sync {
     /// Determine the permissions associated with a request token.
     ///
     /// The returned list of permissions is the intersection of the permissions
-    /// requested and the permissions associated with the token. An error
-    /// will only be returned if there is a failure processing the token.
-    /// An invalid token is taken to have no permissions, so these along
-    /// with tokens that match none of the requested permissions will return
-    /// empty permission sets.
+    /// requested and the permissions associated with the token.
+    ///
+    /// Implementations of this trait should only error if:
+    ///     * there is a failure processing the token.
+    ///     * there is not any intersection of permissions.
     async fn permissions(
         &self,
         token: Option<Vec<u8>>,
@@ -72,24 +72,10 @@ pub trait Authorizer: std::fmt::Debug + Send + Sync {
         self.permissions(Some(b"".to_vec()), &[]).await?;
         Ok(())
     }
-
-    /// Determine if a token has any of the requested permissions.
-    ///
-    /// If the token has none of the permissions requested then a Forbidden
-    /// error is returned.
-    async fn require_any_permission(
-        &self,
-        token: Option<Vec<u8>>,
-        perms: &[Permission],
-    ) -> Result<(), Error> {
-        if self.permissions(token, perms).await?.is_empty() {
-            Err(Error::Forbidden)
-        } else {
-            Ok(())
-        }
-    }
 }
 
+/// Wrapped `Option<dyn Authorizer>`
+/// Provides response to inner `IoxAuthorizer::permissions()`
 #[async_trait]
 impl<T: Authorizer> Authorizer for Option<T> {
     async fn permissions(
@@ -99,6 +85,7 @@ impl<T: Authorizer> Authorizer for Option<T> {
     ) -> Result<Vec<Permission>, Error> {
         match self {
             Some(authz) => authz.permissions(token, perms).await,
+            // no authz rpc service => return same perms requested. Used for testing.
             None => Ok(perms.to_vec()),
         }
     }
@@ -142,18 +129,19 @@ impl Authorizer for IoxAuthorizer {
     async fn permissions(
         &self,
         token: Option<Vec<u8>>,
-        perms: &[Permission],
+        requested_perms: &[Permission],
     ) -> Result<Vec<Permission>, Error> {
         let req = proto::AuthorizeRequest {
             token: token.ok_or(Error::NoToken)?,
-            permissions: perms
+            permissions: requested_perms
                 .iter()
                 .filter_map(|p| p.clone().try_into().ok())
                 .collect(),
         };
         let mut client = self.client.clone();
-        let resp = client.authorize(req).await?;
-        Ok(resp
+        let authz_rpc_result = client.authorize(req).await?;
+
+        let intersected_perms: Vec<Permission> = authz_rpc_result
             .into_inner()
             .permissions
             .into_iter()
@@ -164,7 +152,16 @@ impl Authorizer for IoxAuthorizer {
                     None
                 }
             })
-            .collect())
+            .collect();
+
+        match (requested_perms, &intersected_perms[..]) {
+            // used in connection `Authorizer::probe()`
+            ([], _) => Ok(vec![]),
+            // token does not have any of the requested_perms
+            (_, []) => Err(Error::Forbidden),
+            // if token has `any_of` the requested_perms => return ok
+            _ => Ok(intersected_perms),
+        }
     }
 }
 
diff --git a/router/src/server/http/write/single_tenant/auth.rs b/router/src/server/http/write/single_tenant/auth.rs
index 3970a42f11..acd159bce3 100644
--- a/router/src/server/http/write/single_tenant/auth.rs
+++ b/router/src/server/http/write/single_tenant/auth.rs
@@ -27,7 +27,7 @@ pub(crate) async fn authorize(
         Action::Write,
     )];
 
-    authz.require_any_permission(token, &perms).await?;
+    authz.permissions(token, &perms).await?;
     Ok(())
 }
 
@@ -54,7 +54,7 @@ pub mod mock {
             match token {
                 Some(token) => match (&token as &dyn AsRef<[u8]>).as_ref() {
                     b"GOOD" => Ok(perms.to_vec()),
-                    b"BAD" => Ok(vec![]),
+                    b"BAD" => Err(authz::Error::Forbidden),
                     b"UGLY" => Err(authz::Error::verification("test", "test error")),
                     _ => panic!("unexpected token"),
                 },
diff --git a/service_grpc_flight/src/lib.rs b/service_grpc_flight/src/lib.rs
index 950f7bde83..728aef7041 100644
--- a/service_grpc_flight/src/lib.rs
+++ b/service_grpc_flight/src/lib.rs
@@ -489,7 +489,7 @@ where
             )],
         };
         self.authz
-            .require_any_permission(authz_token, &perms)
+            .permissions(authz_token, &perms)
             .await
             .map_err(Error::from)?;
 
@@ -593,7 +593,7 @@ where
 
         let perms = flightsql_permissions(&namespace_name, &cmd);
         self.authz
-            .require_any_permission(authz_token, &perms)
+            .permissions(authz_token, &perms)
             .await
             .map_err(Error::from)?;
 
@@ -682,7 +682,7 @@ where
 
         let perms = flightsql_permissions(&namespace_name, &cmd);
         self.authz
-            .require_any_permission(authz_token, &perms)
+            .permissions(authz_token, &perms)
             .await
             .map_err(Error::from)?;
 
@@ -1074,7 +1074,7 @@ mod tests {
             match token {
                 Some(token) => match (&token as &dyn AsRef<[u8]>).as_ref() {
                     b"GOOD" => Ok(perms.to_vec()),
-                    b"BAD" => Ok(vec![]),
+                    b"BAD" => Err(authz::Error::Forbidden),
                     b"UGLY" => Err(authz::Error::verification("test", "test error")),
                     _ => panic!("unexpected token"),
                 },

From ea3029ee3f6cdc7b55f4b80fc66ec9425074147c Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Wed, 3 May 2023 15:03:54 -0700
Subject: [PATCH 103/119] refactor: break out Authorizer trait versus
 IoxAuthorizer struct. * make the boundaries more evident * also make explicit
 what actions are tied to the IoxAuthorizer client (a.k.a. the client
 connection & request)

---
 Cargo.lock                  |   1 +
 authz/src/authorizer.rs     |  57 +++++++++++++
 authz/src/iox_authorizer.rs | 120 ++++++++++++++++++++++++++
 authz/src/lib.rs            | 165 ++----------------------------------
 influxdb_iox/Cargo.toml     |   1 +
 5 files changed, 184 insertions(+), 160 deletions(-)
 create mode 100644 authz/src/authorizer.rs
 create mode 100644 authz/src/iox_authorizer.rs

diff --git a/Cargo.lock b/Cargo.lock
index 4c16a937d3..77118425a1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2586,6 +2586,7 @@ dependencies = [
  "observability_deps",
  "once_cell",
  "panic_logging",
+ "parking_lot 0.12.1",
  "parquet_file",
  "parquet_to_line_protocol",
  "predicate",
diff --git a/authz/src/authorizer.rs b/authz/src/authorizer.rs
new file mode 100644
index 0000000000..e9e1a4ff86
--- /dev/null
+++ b/authz/src/authorizer.rs
@@ -0,0 +1,57 @@
+use async_trait::async_trait;
+
+use super::{Error, Permission};
+
+/// An authorizer is used to validate the associated with
+/// an authorization token that has been extracted from a request.
+#[async_trait]
+pub trait Authorizer: std::fmt::Debug + Send + Sync {
+    /// Determine the permissions associated with a request token.
+    ///
+    /// The returned list of permissions is the intersection of the permissions
+    /// requested and the permissions associated with the token.
+    ///
+    /// Implementations of this trait should only error if:
+    ///     * there is a failure processing the token.
+    ///     * there is not any intersection of permissions.
+    async fn permissions(
+        &self,
+        token: Option<Vec<u8>>,
+        perms: &[Permission],
+    ) -> Result<Vec<Permission>, Error>;
+
+    /// Make a test request that determines if end-to-end communication
+    /// with the service is working.
+    async fn probe(&self) -> Result<(), Error> {
+        self.permissions(Some(b"".to_vec()), &[]).await?;
+        Ok(())
+    }
+}
+
+/// Wrapped `Option<dyn Authorizer>`
+/// Provides response to inner `IoxAuthorizer::permissions()`
+#[async_trait]
+impl<T: Authorizer> Authorizer for Option<T> {
+    async fn permissions(
+        &self,
+        token: Option<Vec<u8>>,
+        perms: &[Permission],
+    ) -> Result<Vec<Permission>, Error> {
+        match self {
+            Some(authz) => authz.permissions(token, perms).await,
+            // no authz rpc service => return same perms requested. Used for testing.
+            None => Ok(perms.to_vec()),
+        }
+    }
+}
+
+#[async_trait]
+impl<T: AsRef<dyn Authorizer> + std::fmt::Debug + Send + Sync> Authorizer for T {
+    async fn permissions(
+        &self,
+        token: Option<Vec<u8>>,
+        perms: &[Permission],
+    ) -> Result<Vec<Permission>, Error> {
+        self.as_ref().permissions(token, perms).await
+    }
+}
diff --git a/authz/src/iox_authorizer.rs b/authz/src/iox_authorizer.rs
new file mode 100644
index 0000000000..145c02ce65
--- /dev/null
+++ b/authz/src/iox_authorizer.rs
@@ -0,0 +1,120 @@
+use async_trait::async_trait;
+use generated_types::influxdata::iox::authz::v1::{self as proto, AuthorizeResponse};
+use observability_deps::tracing::warn;
+use snafu::Snafu;
+use tonic::Response;
+
+use super::{Authorizer, Permission};
+
+/// Authorizer implementation using influxdata.iox.authz.v1 protocol.
+#[derive(Clone, Debug)]
+pub struct IoxAuthorizer {
+    client:
+        proto::iox_authorizer_service_client::IoxAuthorizerServiceClient<tonic::transport::Channel>,
+}
+
+impl IoxAuthorizer {
+    /// Attempt to create a new client by connecting to a given endpoint.
+    pub fn connect_lazy<D>(dst: D) -> Result<Self, Box<dyn std::error::Error>>
+    where
+        D: TryInto<tonic::transport::Endpoint> + Send,
+        D::Error: Into<tonic::codegen::StdError>,
+    {
+        let ep = tonic::transport::Endpoint::new(dst)?;
+        let client = proto::iox_authorizer_service_client::IoxAuthorizerServiceClient::new(
+            ep.connect_lazy(),
+        );
+        Ok(Self { client })
+    }
+
+    async fn request(
+        &self,
+        token: Vec<u8>,
+        requested_perms: &[Permission],
+    ) -> Result<Response<AuthorizeResponse>, tonic::Status> {
+        let req = proto::AuthorizeRequest {
+            token,
+            permissions: requested_perms
+                .iter()
+                .filter_map(|p| p.clone().try_into().ok())
+                .collect(),
+        };
+        let mut client = self.client.clone();
+        client.authorize(req).await
+    }
+}
+
+#[async_trait]
+impl Authorizer for IoxAuthorizer {
+    async fn permissions(
+        &self,
+        token: Option<Vec<u8>>,
+        requested_perms: &[Permission],
+    ) -> Result<Vec<Permission>, Error> {
+        let authz_rpc_result = self
+            .request(token.ok_or(Error::NoToken)?, requested_perms)
+            .await?;
+
+        let intersected_perms: Vec<Permission> = authz_rpc_result
+            .into_inner()
+            .permissions
+            .into_iter()
+            .filter_map(|p| match p.try_into() {
+                Ok(p) => Some(p),
+                Err(e) => {
+                    warn!(error=%e, "authz service returned incompatible permission");
+                    None
+                }
+            })
+            .collect();
+
+        match (requested_perms, &intersected_perms[..]) {
+            // used in connection `Authorizer::probe()`
+            ([], _) => Ok(vec![]),
+            // token does not have any of the requested_perms
+            (_, []) => Err(Error::Forbidden),
+            // if token has `any_of` the requested_perms => return ok
+            _ => Ok(intersected_perms),
+        }
+    }
+}
+
+/// Authorization related error.
+#[derive(Debug, Snafu)]
+pub enum Error {
+    /// Communication error when verifying a token.
+    #[snafu(display("token verification not possible: {msg}"))]
+    Verification {
+        /// Message describing the error.
+        msg: String,
+        /// Source of the error.
+        source: Box<dyn std::error::Error + Send + Sync + 'static>,
+    },
+
+    /// The token's permissions do not allow the operation.
+    #[snafu(display("forbidden"))]
+    Forbidden,
+
+    /// No token has been supplied, but is required.
+    #[snafu(display("no token"))]
+    NoToken,
+}
+
+impl Error {
+    /// Create new Error::Verification.
+    pub fn verification(
+        msg: impl Into<String>,
+        source: impl Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
+    ) -> Self {
+        Self::Verification {
+            msg: msg.into(),
+            source: source.into(),
+        }
+    }
+}
+
+impl From<tonic::Status> for Error {
+    fn from(value: tonic::Status) -> Self {
+        Self::verification(value.message(), value.clone())
+    }
+}
diff --git a/authz/src/lib.rs b/authz/src/lib.rs
index 316ddd7494..124c22c4de 100644
--- a/authz/src/lib.rs
+++ b/authz/src/lib.rs
@@ -16,12 +16,14 @@
 )]
 #![allow(rustdoc::private_intra_doc_links)]
 
-use async_trait::async_trait;
 use base64::{prelude::BASE64_STANDARD, Engine};
-use generated_types::influxdata::iox::authz::v1 as proto;
+use generated_types::influxdata::iox::authz::v1::{self as proto};
 use observability_deps::tracing::warn;
-use snafu::Snafu;
 
+mod authorizer;
+pub use authorizer::Authorizer;
+mod iox_authorizer;
+pub use iox_authorizer::{Error, IoxAuthorizer};
 mod permission;
 pub use permission::{Action, Permission, Resource};
 
@@ -48,163 +50,6 @@ pub fn extract_token<T: AsRef<[u8]> + ?Sized>(value: Option<&T>) -> Option<Vec<u
     }
 }
 
-/// An authorizer is used to validate the associated with
-/// an authorization token that has been extracted from a request.
-#[async_trait]
-pub trait Authorizer: std::fmt::Debug + Send + Sync {
-    /// Determine the permissions associated with a request token.
-    ///
-    /// The returned list of permissions is the intersection of the permissions
-    /// requested and the permissions associated with the token.
-    ///
-    /// Implementations of this trait should only error if:
-    ///     * there is a failure processing the token.
-    ///     * there is not any intersection of permissions.
-    async fn permissions(
-        &self,
-        token: Option<Vec<u8>>,
-        perms: &[Permission],
-    ) -> Result<Vec<Permission>, Error>;
-
-    /// Make a test request that determines if end-to-end communication
-    /// with the service is working.
-    async fn probe(&self) -> Result<(), Error> {
-        self.permissions(Some(b"".to_vec()), &[]).await?;
-        Ok(())
-    }
-}
-
-/// Wrapped `Option<dyn Authorizer>`
-/// Provides response to inner `IoxAuthorizer::permissions()`
-#[async_trait]
-impl<T: Authorizer> Authorizer for Option<T> {
-    async fn permissions(
-        &self,
-        token: Option<Vec<u8>>,
-        perms: &[Permission],
-    ) -> Result<Vec<Permission>, Error> {
-        match self {
-            Some(authz) => authz.permissions(token, perms).await,
-            // no authz rpc service => return same perms requested. Used for testing.
-            None => Ok(perms.to_vec()),
-        }
-    }
-}
-
-#[async_trait]
-impl<T: AsRef<dyn Authorizer> + std::fmt::Debug + Send + Sync> Authorizer for T {
-    async fn permissions(
-        &self,
-        token: Option<Vec<u8>>,
-        perms: &[Permission],
-    ) -> Result<Vec<Permission>, Error> {
-        self.as_ref().permissions(token, perms).await
-    }
-}
-
-/// Authorizer implementation using influxdata.iox.authz.v1 protocol.
-#[derive(Clone, Debug)]
-pub struct IoxAuthorizer {
-    client:
-        proto::iox_authorizer_service_client::IoxAuthorizerServiceClient<tonic::transport::Channel>,
-}
-
-impl IoxAuthorizer {
-    /// Attempt to create a new client by connecting to a given endpoint.
-    pub fn connect_lazy<D>(dst: D) -> Result<Self, Box<dyn std::error::Error>>
-    where
-        D: TryInto<tonic::transport::Endpoint> + Send,
-        D::Error: Into<tonic::codegen::StdError>,
-    {
-        let ep = tonic::transport::Endpoint::new(dst)?;
-        let client = proto::iox_authorizer_service_client::IoxAuthorizerServiceClient::new(
-            ep.connect_lazy(),
-        );
-        Ok(Self { client })
-    }
-}
-
-#[async_trait]
-impl Authorizer for IoxAuthorizer {
-    async fn permissions(
-        &self,
-        token: Option<Vec<u8>>,
-        requested_perms: &[Permission],
-    ) -> Result<Vec<Permission>, Error> {
-        let req = proto::AuthorizeRequest {
-            token: token.ok_or(Error::NoToken)?,
-            permissions: requested_perms
-                .iter()
-                .filter_map(|p| p.clone().try_into().ok())
-                .collect(),
-        };
-        let mut client = self.client.clone();
-        let authz_rpc_result = client.authorize(req).await?;
-
-        let intersected_perms: Vec<Permission> = authz_rpc_result
-            .into_inner()
-            .permissions
-            .into_iter()
-            .filter_map(|p| match p.try_into() {
-                Ok(p) => Some(p),
-                Err(e) => {
-                    warn!(error=%e, "authz service returned incompatible permission");
-                    None
-                }
-            })
-            .collect();
-
-        match (requested_perms, &intersected_perms[..]) {
-            // used in connection `Authorizer::probe()`
-            ([], _) => Ok(vec![]),
-            // token does not have any of the requested_perms
-            (_, []) => Err(Error::Forbidden),
-            // if token has `any_of` the requested_perms => return ok
-            _ => Ok(intersected_perms),
-        }
-    }
-}
-
-/// Authorization related error.
-#[derive(Debug, Snafu)]
-pub enum Error {
-    /// Communication error when verifying a token.
-    #[snafu(display("token verification not possible: {msg}"))]
-    Verification {
-        /// Message describing the error.
-        msg: String,
-        /// Source of the error.
-        source: Box<dyn std::error::Error + Send + Sync + 'static>,
-    },
-
-    /// The token's permissions do not allow the operation.
-    #[snafu(display("forbidden"))]
-    Forbidden,
-
-    /// No token has been supplied, but is required.
-    #[snafu(display("no token"))]
-    NoToken,
-}
-
-impl Error {
-    /// Create new Error::Verification.
-    pub fn verification(
-        msg: impl Into<String>,
-        source: impl Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
-    ) -> Self {
-        Self::Verification {
-            msg: msg.into(),
-            source: source.into(),
-        }
-    }
-}
-
-impl From<tonic::Status> for Error {
-    fn from(value: tonic::Status) -> Self {
-        Self::verification(value.message(), value.clone())
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml
index 0e0927b6e9..b4329c2da4 100644
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@@ -75,6 +75,7 @@ uuid = { version = "1", features = ["v4"] }
 # jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
 tikv-jemalloc-sys = { version = "0.5.3", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
+parking_lot = "0.12.1"
 
 [dev-dependencies]
 # In alphabetical order

From 62d83b9219924a7665147083412d494b79d62cae Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Wed, 3 May 2023 21:27:48 -0700
Subject: [PATCH 104/119] test: add tests for IoxAuthorizer contract

---
 Cargo.lock                  |  4 +++
 authz/Cargo.toml            |  6 ++++
 authz/src/iox_authorizer.rs | 68 +++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index 77118425a1..f2167d5252 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -514,12 +514,16 @@ dependencies = [
 name = "authz"
 version = "0.1.0"
 dependencies = [
+ "assert_matches",
  "async-trait",
  "base64 0.21.0",
  "generated_types",
  "http",
  "observability_deps",
+ "paste",
  "snafu",
+ "test_helpers_end_to_end",
+ "tokio",
  "tonic",
  "workspace-hack",
 ]
diff --git a/authz/Cargo.toml b/authz/Cargo.toml
index 86de4fc65b..4ed78cb9f1 100644
--- a/authz/Cargo.toml
+++ b/authz/Cargo.toml
@@ -20,5 +20,11 @@ base64 = "0.21.0"
 snafu = "0.7"
 tonic = { workspace = true }
 
+[dev-dependencies]
+assert_matches = "1.5.0"
+paste = "1.0.12"
+test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
+tokio = "1.28.0"
+
 [features]
 http = ["dep:http"]
diff --git a/authz/src/iox_authorizer.rs b/authz/src/iox_authorizer.rs
index 145c02ce65..a55abf0144 100644
--- a/authz/src/iox_authorizer.rs
+++ b/authz/src/iox_authorizer.rs
@@ -118,3 +118,71 @@ impl From<tonic::Status> for Error {
         Self::verification(value.message(), value.clone())
     }
 }
+
+#[cfg(test)]
+mod test {
+    use assert_matches::assert_matches;
+    use test_helpers_end_to_end::Authorizer as AuthorizerServer;
+
+    use super::*;
+    use crate::{Action, Authorizer, Permission, Resource};
+
+    const NAMESPACE: &str = "bananas";
+
+    macro_rules! test_iox_authorizer {
+        (
+            $name:ident,
+            token_permissions = $token_permissions:expr,
+            permissions_required = $permissions_required:expr,
+            want = $want:pat
+        ) => {
+            paste::paste! {
+                #[tokio::test]
+                async fn [<test_iox_authorizer_ $name>]() {
+                    let mut authz_server = AuthorizerServer::create().await;
+                    let authz = IoxAuthorizer::connect_lazy(authz_server.addr())
+                            .expect("Failed to create IoxAuthorizer client.");
+
+                    let token = authz_server.create_token_for(NAMESPACE, $token_permissions);
+
+                    let got = authz.permissions(
+                        Some(token.as_bytes().to_vec()),
+                        $permissions_required
+                    ).await;
+
+                    assert_matches!(got, $want);
+                }
+            }
+        };
+    }
+
+    test_iox_authorizer!(
+        ok,
+        token_permissions = &["ACTION_WRITE"],
+        permissions_required = &[Permission::ResourceAction(
+            Resource::Database(NAMESPACE.to_string()),
+            Action::Write,
+        )],
+        want = Ok(_)
+    );
+
+    test_iox_authorizer!(
+        insufficient_perms,
+        token_permissions = &["ACTION_READ"],
+        permissions_required = &[Permission::ResourceAction(
+            Resource::Database(NAMESPACE.to_string()),
+            Action::Write,
+        )],
+        want = Err(Error::Forbidden)
+    );
+
+    test_iox_authorizer!(
+        any_of_required_perms,
+        token_permissions = &["ACTION_WRITE"],
+        permissions_required = &[
+            Permission::ResourceAction(Resource::Database(NAMESPACE.to_string()), Action::Write,),
+            Permission::ResourceAction(Resource::Database(NAMESPACE.to_string()), Action::Create,)
+        ],
+        want = Ok(_)
+    );
+}

From 518d10d4c0fa12e99ecc86b2732de2c263d2ea22 Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Thu, 4 May 2023 12:07:44 -0700
Subject: [PATCH 105/119] refactor: set explicit boundaries btwn the
 AuthzServer communication failure, versus an invalid token response from the
 server

---
 authz/src/authorizer.rs     | 14 ++++++++++----
 authz/src/iox_authorizer.rs | 26 +++++++++++++++++---------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/authz/src/authorizer.rs b/authz/src/authorizer.rs
index e9e1a4ff86..81abc8726b 100644
--- a/authz/src/authorizer.rs
+++ b/authz/src/authorizer.rs
@@ -2,8 +2,9 @@ use async_trait::async_trait;
 
 use super::{Error, Permission};
 
-/// An authorizer is used to validate the associated with
-/// an authorization token that has been extracted from a request.
+/// An authorizer is used to validate a request
+/// (+ associated permissions needed to fulfill the request)
+/// with an authorization token that has been extracted from the request.
 #[async_trait]
 pub trait Authorizer: std::fmt::Debug + Send + Sync {
     /// Determine the permissions associated with a request token.
@@ -13,6 +14,7 @@ pub trait Authorizer: std::fmt::Debug + Send + Sync {
     ///
     /// Implementations of this trait should only error if:
     ///     * there is a failure processing the token.
+    ///     * the token is invalid.
     ///     * there is not any intersection of permissions.
     async fn permissions(
         &self,
@@ -23,8 +25,12 @@ pub trait Authorizer: std::fmt::Debug + Send + Sync {
     /// Make a test request that determines if end-to-end communication
     /// with the service is working.
     async fn probe(&self) -> Result<(), Error> {
-        self.permissions(Some(b"".to_vec()), &[]).await?;
-        Ok(())
+        match self.permissions(Some(b"".to_vec()), &[]).await {
+            // got response from authorizer server
+            Ok(_) | Err(Error::Forbidden) | Err(Error::InvalidToken) => Ok(()),
+            // other errors, including Error::Verification
+            Err(e) => Err(e),
+        }
     }
 }
 
diff --git a/authz/src/iox_authorizer.rs b/authz/src/iox_authorizer.rs
index a55abf0144..744d51343c 100644
--- a/authz/src/iox_authorizer.rs
+++ b/authz/src/iox_authorizer.rs
@@ -53,10 +53,18 @@ impl Authorizer for IoxAuthorizer {
     ) -> Result<Vec<Permission>, Error> {
         let authz_rpc_result = self
             .request(token.ok_or(Error::NoToken)?, requested_perms)
-            .await?;
+            .await
+            .map_err(|status| Error::Verification {
+                msg: status.message().to_string(),
+                source: Box::new(status),
+            })?
+            .into_inner();
+
+        if !authz_rpc_result.valid {
+            return Err(Error::InvalidToken);
+        }
 
         let intersected_perms: Vec<Permission> = authz_rpc_result
-            .into_inner()
             .permissions
             .into_iter()
             .filter_map(|p| match p.try_into() {
@@ -68,14 +76,10 @@ impl Authorizer for IoxAuthorizer {
             })
             .collect();
 
-        match (requested_perms, &intersected_perms[..]) {
-            // used in connection `Authorizer::probe()`
-            ([], _) => Ok(vec![]),
-            // token does not have any of the requested_perms
-            (_, []) => Err(Error::Forbidden),
-            // if token has `any_of` the requested_perms => return ok
-            _ => Ok(intersected_perms),
+        if intersected_perms.is_empty() {
+            return Err(Error::Forbidden);
         }
+        Ok(intersected_perms)
     }
 }
 
@@ -91,6 +95,10 @@ pub enum Error {
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
+    /// Token is invalid.
+    #[snafu(display("invalid token"))]
+    InvalidToken,
+
     /// The token's permissions do not allow the operation.
     #[snafu(display("forbidden"))]
     Forbidden,

From 509fab2de097e79182e709bab1a7fae3bb4a5a6a Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Thu, 4 May 2023 12:08:12 -0700
Subject: [PATCH 106/119] test: add test demonstrating the behavior when a
 token is invalid

---
 authz/src/iox_authorizer.rs | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/authz/src/iox_authorizer.rs b/authz/src/iox_authorizer.rs
index 744d51343c..03242fedd0 100644
--- a/authz/src/iox_authorizer.rs
+++ b/authz/src/iox_authorizer.rs
@@ -193,4 +193,38 @@ mod test {
         ],
         want = Ok(_)
     );
+
+    #[tokio::test]
+    async fn test_invalid_token() {
+        let authz_server = AuthorizerServer::create().await;
+        let authz = IoxAuthorizer::connect_lazy(authz_server.addr())
+            .expect("Failed to create IoxAuthorizer client.");
+
+        /*
+         * FIXME:
+         * with this test case, the rpc calls is returning back a valid true.
+         *
+         * authz_rpc_result =
+         *      Response {
+         *          metadata: MetadataMap { headers: {"content-type": "application/grpc", "date": "Thu, 04 May 2023 18:48:19 GMT", "grpc-status": "0"} },
+         *          message: AuthorizeResponse { valid: true, subject: None, permissions: [] },
+         *          extensions: Extensions
+         *      }
+         *
+         * as a result, it's returning a Error::Forbidden, not an Error::InvalidToken
+         */
+        let invalid_token = b"UGLY";
+
+        let got = authz
+            .permissions(
+                Some(invalid_token.to_vec()),
+                &[Permission::ResourceAction(
+                    Resource::Database(NAMESPACE.to_string()),
+                    Action::Read,
+                )],
+            )
+            .await;
+
+        assert_matches!(got, Err(Error::Forbidden));
+    }
 }

From 75d285a83ab27a7b750ddf97d6d2cf480dab811b Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Fri, 5 May 2023 13:50:40 -0700
Subject: [PATCH 107/119] test: fix IoxAuthorizeService mock to not return
 valid for unrecognized tokens

---
 authz/src/iox_authorizer.rs          | 15 +--------------
 test_helpers_end_to_end/src/authz.rs |  9 +++++----
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/authz/src/iox_authorizer.rs b/authz/src/iox_authorizer.rs
index 03242fedd0..8545020450 100644
--- a/authz/src/iox_authorizer.rs
+++ b/authz/src/iox_authorizer.rs
@@ -200,19 +200,6 @@ mod test {
         let authz = IoxAuthorizer::connect_lazy(authz_server.addr())
             .expect("Failed to create IoxAuthorizer client.");
 
-        /*
-         * FIXME:
-         * with this test case, the rpc calls is returning back a valid true.
-         *
-         * authz_rpc_result =
-         *      Response {
-         *          metadata: MetadataMap { headers: {"content-type": "application/grpc", "date": "Thu, 04 May 2023 18:48:19 GMT", "grpc-status": "0"} },
-         *          message: AuthorizeResponse { valid: true, subject: None, permissions: [] },
-         *          extensions: Extensions
-         *      }
-         *
-         * as a result, it's returning a Error::Forbidden, not an Error::InvalidToken
-         */
         let invalid_token = b"UGLY";
 
         let got = authz
@@ -225,6 +212,6 @@ mod test {
             )
             .await;
 
-        assert_matches!(got, Err(Error::Forbidden));
+        assert_matches!(got, Err(Error::InvalidToken));
     }
 }
diff --git a/test_helpers_end_to_end/src/authz.rs b/test_helpers_end_to_end/src/authz.rs
index 2bbc9c7a04..e243161168 100644
--- a/test_helpers_end_to_end/src/authz.rs
+++ b/test_helpers_end_to_end/src/authz.rs
@@ -129,16 +129,17 @@ impl IoxAuthorizerService for AuthorizerService {
         request: tonic::Request<AuthorizeRequest>,
     ) -> Result<tonic::Response<AuthorizeResponse>, tonic::Status> {
         let request = request.into_inner();
-        let perms = self
+        let recognized = self
             .tokens
             .lock()
             .map_err(|e| tonic::Status::internal(e.to_string()))?
             .get(&request.token)
-            .cloned()
-            .unwrap_or_default();
+            .cloned();
+        let valid = recognized.is_some();
+        let perms = recognized.unwrap_or_default();
 
         Ok(tonic::Response::new(AuthorizeResponse {
-            valid: true,
+            valid,
             subject: None,
             permissions: request
                 .permissions

From cb0fb92d86d0d70256f56bada9d563b0d9c27f71 Mon Sep 17 00:00:00 2001
From: kayagokalp <kaya.gokalp@fuel.sh>
Date: Sat, 13 May 2023 15:39:06 +0300
Subject: [PATCH 108/119] refactor: remove borrowed from impl of ColumnsByName
 for Schema

---
 data_types/src/columns.rs | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index a2142af6b8..ff4fd9a872 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -111,10 +111,10 @@ impl ColumnsByName {
 }
 
 // ColumnsByName is a newtype so that we can implement this `TryFrom` in this crate
-impl TryFrom<&ColumnsByName> for Schema {
+impl TryFrom<ColumnsByName> for Schema {
     type Error = schema::builder::Error;
 
-    fn try_from(value: &ColumnsByName) -> Result<Self, Self::Error> {
+    fn try_from(value: ColumnsByName) -> Result<Self, Self::Error> {
         let mut builder = SchemaBuilder::new();
 
         for (column_name, column_schema) in value.iter() {
@@ -126,14 +126,6 @@ impl TryFrom<&ColumnsByName> for Schema {
     }
 }
 
-impl TryFrom<ColumnsByName> for Schema {
-    type Error = schema::builder::Error;
-
-    fn try_from(value: ColumnsByName) -> Result<Self, Self::Error> {
-        Self::try_from(&value)
-    }
-}
-
 /// Data object for a column
 #[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
 pub struct Column {

From e1333ed2277b3c4c4ea0e0c71834c3f45a491b10 Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Fri, 12 May 2023 15:33:37 -0500
Subject: [PATCH 109/119] chore: update description of errors returned from
 Authorizer trait.

Co-authored-by: Dom <dom@itsallbroken.com>
---
 authz/src/authorizer.rs | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/authz/src/authorizer.rs b/authz/src/authorizer.rs
index 81abc8726b..9970ffd3b1 100644
--- a/authz/src/authorizer.rs
+++ b/authz/src/authorizer.rs
@@ -12,10 +12,20 @@ pub trait Authorizer: std::fmt::Debug + Send + Sync {
     /// The returned list of permissions is the intersection of the permissions
     /// requested and the permissions associated with the token.
     ///
-    /// Implementations of this trait should only error if:
-    ///     * there is a failure processing the token.
-    ///     * the token is invalid.
-    ///     * there is not any intersection of permissions.
+    /// Implementations of this trait should return the specified errors under
+    /// the following conditions:
+    ///
+    /// * [`Error::InvalidToken`]: the token is invalid / in an incorrect
+    ///       format / otherwise corrupt and a permission check cannot be
+    ///       performed
+    ///
+    /// * [`Error::NoToken`]: the token was not provided
+    ///
+    /// * [`Error::Forbidden`]: the token was well formed, but lacks
+    ///       authorisation to perform the requested action
+    ///
+    /// * [`Error::Verification`]: the token permissions were not possible
+    ///       to validate - an internal error has occurred
     async fn permissions(
         &self,
         token: Option<Vec<u8>>,

From fba9836f2a77184db87ee7a2f9758f5d00f86d99 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 May 2023 02:02:32 +0000
Subject: [PATCH 110/119] chore(deps): Bump pin-project from 1.0.12 to 1.1.0

Bumps [pin-project](https://github.com/taiki-e/pin-project) from 1.0.12 to 1.1.0.
- [Release notes](https://github.com/taiki-e/pin-project/releases)
- [Changelog](https://github.com/taiki-e/pin-project/blob/main/CHANGELOG.md)
- [Commits](https://github.com/taiki-e/pin-project/compare/v1.0.12...v1.1.0)

---
updated-dependencies:
- dependency-name: pin-project
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 Cargo.lock                        | 10 +++++-----
 datafusion_util/Cargo.toml        |  2 +-
 executor/Cargo.toml               |  2 +-
 grpc-binary-logger/Cargo.toml     |  2 +-
 ingester/Cargo.toml               |  2 +-
 object_store_metrics/Cargo.toml   |  2 +-
 querier/Cargo.toml                |  2 +-
 service_grpc_influxrpc/Cargo.toml |  2 +-
 trace_http/Cargo.toml             |  2 +-
 tracker/Cargo.toml                |  2 +-
 10 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4c16a937d3..f20bdc27f5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4159,22 +4159,22 @@ dependencies = [
 
 [[package]]
 name = "pin-project"
-version = "1.0.12"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
+checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.0.12"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55"
+checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.15",
 ]
 
 [[package]]
diff --git a/datafusion_util/Cargo.toml b/datafusion_util/Cargo.toml
index e76ecdb1fb..1c51495d38 100644
--- a/datafusion_util/Cargo.toml
+++ b/datafusion_util/Cargo.toml
@@ -12,7 +12,7 @@ datafusion = { workspace = true }
 futures = "0.3"
 object_store = "0.5.6"
 observability_deps = { path = "../observability_deps" }
-pin-project = "1.0"
+pin-project = "1.1"
 tokio = { version = "1.28", features = ["parking_lot", "sync"] }
 tokio-stream = "0.1"
 url = "2.2"
diff --git a/executor/Cargo.toml b/executor/Cargo.toml
index a1db7a55b7..e2a73c6529 100644
--- a/executor/Cargo.toml
+++ b/executor/Cargo.toml
@@ -10,7 +10,7 @@ futures = "0.3"
 observability_deps = { path = "../observability_deps" }
 once_cell = { version = "1.17", features = ["parking_lot"] }
 parking_lot = "0.12"
-pin-project = "1.0"
+pin-project = "1.1"
 tokio = { version = "1.28" }
 tokio-util = { version = "0.7.8" }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
diff --git a/grpc-binary-logger/Cargo.toml b/grpc-binary-logger/Cargo.toml
index 2c88c34d97..23eff4c842 100644
--- a/grpc-binary-logger/Cargo.toml
+++ b/grpc-binary-logger/Cargo.toml
@@ -13,7 +13,7 @@ futures = "0.3"
 http = "0.2"
 http-body = "0.4"
 hyper = "0.14"
-pin-project = "1.0"
+pin-project = "1.1"
 prost = "0.11"
 tokio = {version = "1", features = [ "rt" ]}
 tonic = { workspace = true }
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 7a1bdc77a6..cdc4c15daa 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -33,7 +33,7 @@ observability_deps = { version = "0.1.0", path = "../observability_deps" }
 once_cell = "1.17"
 parking_lot = "0.12.1"
 parquet_file = { version = "0.1.0", path = "../parquet_file" }
-pin-project = "1.0.12"
+pin-project = "1.1.0"
 predicate = { version = "0.1.0", path = "../predicate" }
 prost = { version = "0.11.9", default-features = false, features = ["std"] }
 rand = "0.8.5"
diff --git a/object_store_metrics/Cargo.toml b/object_store_metrics/Cargo.toml
index 8ceafd35e1..02a6b18728 100644
--- a/object_store_metrics/Cargo.toml
+++ b/object_store_metrics/Cargo.toml
@@ -12,7 +12,7 @@ futures = "0.3"
 iox_time = { version = "0.1.0", path = "../iox_time" }
 metric = { version = "0.1.0", path = "../metric" }
 object_store = "0.5.6"
-pin-project = "1.0.12"
+pin-project = "1.1.0"
 tokio = { version = "1.28", features = ["io-util"] }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 
diff --git a/querier/Cargo.toml b/querier/Cargo.toml
index 331abf7a8f..c042657781 100644
--- a/querier/Cargo.toml
+++ b/querier/Cargo.toml
@@ -27,7 +27,7 @@ object_store = "0.5.6"
 observability_deps = { path = "../observability_deps" }
 parking_lot = "0.12"
 parquet_file = { path = "../parquet_file" }
-pin-project = "1.0"
+pin-project = "1.1"
 predicate = { path = "../predicate" }
 prost = { version = "0.11" }
 rand = "0.8.3"
diff --git a/service_grpc_influxrpc/Cargo.toml b/service_grpc_influxrpc/Cargo.toml
index 729c4419b6..d2d4ee8e8e 100644
--- a/service_grpc_influxrpc/Cargo.toml
+++ b/service_grpc_influxrpc/Cargo.toml
@@ -25,7 +25,7 @@ tracker = { path = "../tracker" }
 arrow = { workspace = true, features = ["prettyprint"] }
 async-trait = "0.1"
 futures = "0.3"
-pin-project = "1.0"
+pin-project = "1.1"
 prost = "0.11"
 regex = "1.8.1"
 serde = { version = "1.0", features = ["derive"] }
diff --git a/trace_http/Cargo.toml b/trace_http/Cargo.toml
index f3b58ffa62..b5832414f9 100644
--- a/trace_http/Cargo.toml
+++ b/trace_http/Cargo.toml
@@ -16,7 +16,7 @@ itertools = "0.10"
 metric = { path = "../metric" }
 observability_deps = { path = "../observability_deps" }
 parking_lot = "0.12"
-pin-project = "1.0"
+pin-project = "1.1"
 snafu = "0.7"
 tower = "0.4"
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
diff --git a/tracker/Cargo.toml b/tracker/Cargo.toml
index 6ba8facb46..6765b12664 100644
--- a/tracker/Cargo.toml
+++ b/tracker/Cargo.toml
@@ -13,7 +13,7 @@ lock_api = "0.4.9"
 metric = { path = "../metric" }
 observability_deps = { path = "../observability_deps" }
 parking_lot = "0.12"
-pin-project = "1.0"
+pin-project = "1.1"
 iox_time = { path = "../iox_time" }
 tokio = { version = "1.28", features = ["macros", "parking_lot", "sync", "time"] }
 tokio-util = { version = "0.7.8" }

From f51ff7bb4531dcec689bda366d8750deea22400f Mon Sep 17 00:00:00 2001
From: "CircleCI[bot]" <circleci@influxdata.com>
Date: Mon, 15 May 2023 02:03:37 +0000
Subject: [PATCH 111/119] chore: Run cargo hakari tasks

---
 workspace-hack/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 0d07e07fff..6986b872e2 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -146,7 +146,7 @@ sha2 = { version = "0.10" }
 smallvec = { version = "1", default-features = false, features = ["union"] }
 sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
 sqlx-macros = { version = "0.6", default-features = false, features = ["json", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
-syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit-mut"] }
+syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] }
 syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }
 tokio = { version = "1", features = ["full", "test-util", "tracing"] }
 tokio-stream = { version = "0.1", features = ["fs", "net"] }

From 5fe8affb180f46ba0d289a71b5d64ed039c09511 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kaya=20G=C3=B6kalp?= <kayagokalp123@gmail.com>
Date: Mon, 15 May 2023 03:03:55 -0700
Subject: [PATCH 112/119] refactor: accept NamespaceName with Namespace create
 (#7774)

Co-authored-by: Dom <dom@itsallbroken.com>
---
 garbage_collector/src/objectstore/checker.rs  |   9 +-
 .../aggregate_tsm_schema/update_catalog.rs    |  10 +-
 .../buffer_tree/partition/resolver/catalog.rs |   3 +-
 ingester/src/test_util.rs                     |   8 +-
 ingester_test_ctx/src/lib.rs                  |   5 +-
 iox_catalog/src/interface.rs                  | 120 ++++++++++++------
 iox_catalog/src/lib.rs                        |   4 +-
 iox_catalog/src/mem.rs                        |  14 +-
 iox_catalog/src/metrics.rs                    |   8 +-
 iox_catalog/src/postgres.rs                   |  20 +--
 iox_catalog/src/sqlite.rs                     |  20 +--
 iox_tests/src/catalog.rs                      |   8 +-
 ioxd_router/src/lib.rs                        |   6 +-
 .../src/namespace_resolver/ns_autocreation.rs |   2 +-
 router/tests/http.rs                          |  10 +-
 service_grpc_catalog/src/lib.rs               |   8 +-
 service_grpc_object_store/src/lib.rs          |   6 +-
 service_grpc_schema/src/lib.rs                |   4 +-
 18 files changed, 176 insertions(+), 89 deletions(-)

diff --git a/garbage_collector/src/objectstore/checker.rs b/garbage_collector/src/objectstore/checker.rs
index 2913fc458f..a820f0a498 100644
--- a/garbage_collector/src/objectstore/checker.rs
+++ b/garbage_collector/src/objectstore/checker.rs
@@ -137,8 +137,8 @@ mod tests {
     use super::*;
     use chrono::TimeZone;
     use data_types::{
-        ColumnId, ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileParams,
-        PartitionId, TableId, Timestamp,
+        ColumnId, ColumnSet, CompactionLevel, NamespaceId, NamespaceName, ParquetFile,
+        ParquetFileParams, PartitionId, TableId, Timestamp,
     };
     use iox_catalog::{interface::Catalog, mem::MemCatalog};
     use object_store::path::Path;
@@ -157,7 +157,10 @@ mod tests {
         let mut repos = catalog.repositories().await;
         let namespace = repos
             .namespaces()
-            .create("namespace_parquet_file_test", None)
+            .create(
+                &NamespaceName::new("namespace_parquet_file_test").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table = repos
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index 173c5bcfa1..0813baadfd 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -87,7 +87,9 @@ async fn create_namespace<R>(name: &str, repos: &mut R) -> Result<Namespace, Upd
 where
     R: RepoCollection + ?Sized,
 {
-    match repos.namespaces().create(name, None).await {
+    let namespace_name = NamespaceName::new(name)
+        .map_err(|_| UpdateCatalogError::NamespaceCreationError(name.to_string()))?;
+    match repos.namespaces().create(&namespace_name, None).await {
         Ok(ns) => Ok(ns),
         Err(iox_catalog::interface::Error::NameExists { .. }) => {
             // presumably it got created in the meantime?
@@ -428,7 +430,7 @@ mod tests {
         // create namespace, table and columns for weather measurement
         let namespace = txn
             .namespaces()
-            .create("1234_5678", None)
+            .create(&NamespaceName::new("1234_5678").unwrap(), None)
             .await
             .expect("namespace created");
         let mut table = txn
@@ -520,7 +522,7 @@ mod tests {
         // create namespace, table and columns for weather measurement
         let namespace = txn
             .namespaces()
-            .create("1234_5678", None)
+            .create(&NamespaceName::new("1234_5678").unwrap(), None)
             .await
             .expect("namespace created");
         let mut table = txn
@@ -585,7 +587,7 @@ mod tests {
         // create namespace, table and columns for weather measurement
         let namespace = txn
             .namespaces()
-            .create("1234_5678", None)
+            .create(&NamespaceName::new("1234_5678").unwrap(), None)
             .await
             .expect("namespace created");
         let mut table = txn
diff --git a/ingester/src/buffer_tree/partition/resolver/catalog.rs b/ingester/src/buffer_tree/partition/resolver/catalog.rs
index ff76eaf3ff..86aa2767d4 100644
--- a/ingester/src/buffer_tree/partition/resolver/catalog.rs
+++ b/ingester/src/buffer_tree/partition/resolver/catalog.rs
@@ -114,9 +114,10 @@ mod tests {
 
         let (namespace_id, table_id) = {
             let mut repos = catalog.repositories().await;
+            let table_ns_name = data_types::NamespaceName::new(TABLE_NAME).unwrap();
             let ns = repos
                 .namespaces()
-                .create(TABLE_NAME, None)
+                .create(&table_ns_name, None)
                 .await
                 .unwrap();
 
diff --git a/ingester/src/test_util.rs b/ingester/src/test_util.rs
index 4eada1b052..6920ab7c08 100644
--- a/ingester/src/test_util.rs
+++ b/ingester/src/test_util.rs
@@ -298,7 +298,13 @@ pub(crate) async fn populate_catalog(
     table: &str,
 ) -> (NamespaceId, TableId) {
     let mut c = catalog.repositories().await;
-    let ns_id = c.namespaces().create(namespace, None).await.unwrap().id;
+    let namespace_name = data_types::NamespaceName::new(namespace).unwrap();
+    let ns_id = c
+        .namespaces()
+        .create(&namespace_name, None)
+        .await
+        .unwrap()
+        .id;
     let table_id = c.tables().create_or_get(table, ns_id).await.unwrap().id;
 
     (ns_id, table_id)
diff --git a/ingester_test_ctx/src/lib.rs b/ingester_test_ctx/src/lib.rs
index 83a7ab5b7e..3f486e672a 100644
--- a/ingester_test_ctx/src/lib.rs
+++ b/ingester_test_ctx/src/lib.rs
@@ -17,7 +17,8 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
 use arrow::record_batch::RecordBatch;
 use arrow_flight::{decode::FlightRecordBatchStream, flight_service_server::FlightService, Ticket};
 use data_types::{
-    Namespace, NamespaceId, NamespaceSchema, ParquetFile, PartitionKey, SequenceNumber, TableId,
+    Namespace, NamespaceId, NamespaceName, NamespaceSchema, ParquetFile, PartitionKey,
+    SequenceNumber, TableId,
 };
 use dml::{DmlMeta, DmlWrite};
 use futures::{stream::FuturesUnordered, FutureExt, StreamExt, TryStreamExt};
@@ -207,7 +208,7 @@ where
             .repositories()
             .await
             .namespaces()
-            .create(name, None)
+            .create(&NamespaceName::new(name).unwrap(), None)
             .await
             .expect("failed to create test namespace");
 
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 02a51c4633..ccba28e803 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -2,9 +2,9 @@
 
 use async_trait::async_trait;
 use data_types::{
-    Column, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceId, NamespaceSchema,
-    ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey,
-    SkippedCompaction, Table, TableId, TableSchema, Timestamp,
+    Column, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceId, NamespaceName,
+    NamespaceSchema, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId,
+    PartitionKey, SkippedCompaction, Table, TableId, TableSchema, Timestamp,
 };
 use iox_time::TimeProvider;
 use snafu::{OptionExt, Snafu};
@@ -35,6 +35,9 @@ pub enum CasFailure<T> {
 #[allow(missing_copy_implementations, missing_docs)]
 #[snafu(visibility(pub(crate)))]
 pub enum Error {
+    #[snafu(display("invalid name: {}", name))]
+    InvalidName { name: String },
+
     #[snafu(display("name {} already exists", name))]
     NameExists { name: String },
 
@@ -307,7 +310,11 @@ pub trait NamespaceRepo: Send + Sync {
     /// Creates the namespace in the catalog. If one by the same name already exists, an
     /// error is returned.
     /// Specify `None` for `retention_period_ns` to get infinite retention.
-    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace>;
+    async fn create(
+        &mut self,
+        name: &NamespaceName,
+        retention_period_ns: Option<i64>,
+    ) -> Result<Namespace>;
 
     /// Update retention period for a namespace
     async fn update_retention_period(
@@ -787,14 +794,14 @@ pub(crate) mod test_helpers {
 
     async fn test_namespace(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace_name = "test_namespace";
+        let namespace_name = NamespaceName::new("test_namespace").unwrap();
         let namespace = repos
             .namespaces()
-            .create(namespace_name, None)
+            .create(&namespace_name, None)
             .await
             .unwrap();
         assert!(namespace.id > NamespaceId::new(0));
-        assert_eq!(namespace.name, namespace_name);
+        assert_eq!(namespace.name, namespace_name.as_str());
 
         // Assert default values for service protection limits.
         assert_eq!(namespace.max_tables, DEFAULT_MAX_TABLES);
@@ -803,7 +810,7 @@ pub(crate) mod test_helpers {
             DEFAULT_MAX_COLUMNS_PER_TABLE
         );
 
-        let conflict = repos.namespaces().create(namespace_name, None).await;
+        let conflict = repos.namespaces().create(&namespace_name, None).await;
         assert!(matches!(
             conflict.unwrap_err(),
             Error::NameExists { name: _ }
@@ -826,7 +833,7 @@ pub(crate) mod test_helpers {
 
         let found = repos
             .namespaces()
-            .get_by_name(namespace_name, SoftDeletedRows::ExcludeDeleted)
+            .get_by_name(&namespace_name, SoftDeletedRows::ExcludeDeleted)
             .await
             .unwrap()
             .expect("namespace should be there");
@@ -839,10 +846,10 @@ pub(crate) mod test_helpers {
             .unwrap();
         assert!(not_found.is_none());
 
-        let namespace2_name = "test_namespace2";
+        let namespace2_name = NamespaceName::new("test_namespace2").unwrap();
         let namespace2 = repos
             .namespaces()
-            .create(namespace2_name, None)
+            .create(&namespace2_name, None)
             .await
             .unwrap();
         let mut namespaces = repos
@@ -856,7 +863,7 @@ pub(crate) mod test_helpers {
         const NEW_TABLE_LIMIT: i32 = 15000;
         let modified = repos
             .namespaces()
-            .update_table_limit(namespace_name, NEW_TABLE_LIMIT)
+            .update_table_limit(namespace_name.as_str(), NEW_TABLE_LIMIT)
             .await
             .expect("namespace should be updateable");
         assert_eq!(NEW_TABLE_LIMIT, modified.max_tables);
@@ -864,7 +871,7 @@ pub(crate) mod test_helpers {
         const NEW_COLUMN_LIMIT: i32 = 1500;
         let modified = repos
             .namespaces()
-            .update_column_limit(namespace_name, NEW_COLUMN_LIMIT)
+            .update_column_limit(namespace_name.as_str(), NEW_COLUMN_LIMIT)
             .await
             .expect("namespace should be updateable");
         assert_eq!(NEW_COLUMN_LIMIT, modified.max_columns_per_table);
@@ -872,7 +879,7 @@ pub(crate) mod test_helpers {
         const NEW_RETENTION_PERIOD_NS: i64 = 5 * 60 * 60 * 1000 * 1000 * 1000;
         let modified = repos
             .namespaces()
-            .update_retention_period(namespace_name, Some(NEW_RETENTION_PERIOD_NS))
+            .update_retention_period(namespace_name.as_str(), Some(NEW_RETENTION_PERIOD_NS))
             .await
             .expect("namespace should be updateable");
         assert_eq!(
@@ -882,25 +889,25 @@ pub(crate) mod test_helpers {
 
         let modified = repos
             .namespaces()
-            .update_retention_period(namespace_name, None)
+            .update_retention_period(namespace_name.as_str(), None)
             .await
             .expect("namespace should be updateable");
         assert!(modified.retention_period_ns.is_none());
 
         // create namespace with retention period NULL
-        let namespace3_name = "test_namespace3";
+        let namespace3_name = NamespaceName::new("test_namespace3").unwrap();
         let namespace3 = repos
             .namespaces()
-            .create(namespace3_name, None)
+            .create(&namespace3_name, None)
             .await
             .expect("namespace with NULL retention should be created");
         assert!(namespace3.retention_period_ns.is_none());
 
         // create namespace with retention period
-        let namespace4_name = "test_namespace4";
+        let namespace4_name = NamespaceName::new("test_namespace4").unwrap();
         let namespace4 = repos
             .namespaces()
-            .create(namespace4_name, Some(NEW_RETENTION_PERIOD_NS))
+            .create(&namespace4_name, Some(NEW_RETENTION_PERIOD_NS))
             .await
             .expect("namespace with 5-hour retention should be created");
         assert_eq!(
@@ -910,7 +917,7 @@ pub(crate) mod test_helpers {
         // reset retention period to NULL to avoid affecting later tests
         repos
             .namespaces()
-            .update_retention_period(namespace4_name, None)
+            .update_retention_period(&namespace4_name, None)
             .await
             .expect("namespace should be updateable");
 
@@ -947,8 +954,16 @@ pub(crate) mod test_helpers {
     async fn test_namespace_soft_deletion(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
 
-        let deleted_ns = repos.namespaces().create("deleted-ns", None).await.unwrap();
-        let active_ns = repos.namespaces().create("active-ns", None).await.unwrap();
+        let deleted_ns = repos
+            .namespaces()
+            .create(&"deleted-ns".try_into().unwrap(), None)
+            .await
+            .unwrap();
+        let active_ns = repos
+            .namespaces()
+            .create(&"active-ns".try_into().unwrap(), None)
+            .await
+            .unwrap();
 
         // Mark "deleted-ns" as soft-deleted.
         repos.namespaces().soft_delete("deleted-ns").await.unwrap();
@@ -1104,7 +1119,7 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace = repos
             .namespaces()
-            .create("namespace_table_test", None)
+            .create(&NamespaceName::new("namespace_table_test").unwrap(), None)
             .await
             .unwrap();
 
@@ -1139,7 +1154,11 @@ pub(crate) mod test_helpers {
         assert_eq!(vec![t.clone()], tables);
 
         // test we can create a table of the same name in a different namespace
-        let namespace2 = repos.namespaces().create("two", None).await.unwrap();
+        let namespace2 = repos
+            .namespaces()
+            .create(&NamespaceName::new("two").unwrap(), None)
+            .await
+            .unwrap();
         assert_ne!(namespace, namespace2);
         let test_table = repos
             .tables()
@@ -1237,7 +1256,7 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace = repos
             .namespaces()
-            .create("namespace_column_test", None)
+            .create(&NamespaceName::new("namespace_column_test").unwrap(), None)
             .await
             .unwrap();
         let table = repos
@@ -1370,7 +1389,10 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace = repos
             .namespaces()
-            .create("namespace_partition_test", None)
+            .create(
+                &NamespaceName::new("namespace_partition_test").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table = repos
@@ -1652,7 +1674,10 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace = repos
             .namespaces()
-            .create("namespace_parquet_file_test", None)
+            .create(
+                &NamespaceName::new("namespace_parquet_file_test").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table = repos
@@ -1837,7 +1862,10 @@ pub(crate) mod test_helpers {
         // test list_by_namespace_not_to_delete
         let namespace2 = repos
             .namespaces()
-            .create("namespace_parquet_file_test1", None)
+            .create(
+                &NamespaceName::new("namespace_parquet_file_test1").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table2 = repos
@@ -2060,12 +2088,12 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace_1 = repos
             .namespaces()
-            .create("retention_broken_1", None)
+            .create(&NamespaceName::new("retention_broken_1").unwrap(), None)
             .await
             .unwrap();
         let namespace_2 = repos
             .namespaces()
-            .create("retention_broken_2", Some(1))
+            .create(&NamespaceName::new("retention_broken_2").unwrap(), Some(1))
             .await
             .unwrap();
         let table_1 = repos
@@ -2140,7 +2168,10 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace = repos
             .namespaces()
-            .create("test_partitions_new_file_between", None)
+            .create(
+                &NamespaceName::new("test_partitions_new_file_between").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table = repos
@@ -2507,7 +2538,8 @@ pub(crate) mod test_helpers {
         let namespace = repos
             .namespaces()
             .create(
-                "namespace_parquet_file_test_list_by_partiton_not_to_delete",
+                &NamespaceName::new("namespace_parquet_file_test_list_by_partiton_not_to_delete")
+                    .unwrap(),
                 None,
             )
             .await
@@ -2616,7 +2648,10 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace = repos
             .namespaces()
-            .create("namespace_update_to_compaction_level_1_test", None)
+            .create(
+                &NamespaceName::new("namespace_update_to_compaction_level_1_test").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table = repos
@@ -2702,7 +2737,10 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace_1 = repos
             .namespaces()
-            .create("namespace_test_delete_namespace_1", None)
+            .create(
+                &NamespaceName::new("namespace_test_delete_namespace_1").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table_1 = repos
@@ -2757,7 +2795,10 @@ pub(crate) mod test_helpers {
         // it, let's create another so we can ensure that doesn't get deleted.
         let namespace_2 = repos
             .namespaces()
-            .create("namespace_test_delete_namespace_2", None)
+            .create(
+                &NamespaceName::new("namespace_test_delete_namespace_2").unwrap(),
+                None,
+            )
             .await
             .unwrap();
         let table_2 = repos
@@ -2943,7 +2984,7 @@ pub(crate) mod test_helpers {
 
             let mut txn = catalog_captured.start_transaction().await.unwrap();
             txn.namespaces()
-                .create("test_txn_isolation", None)
+                .create(&NamespaceName::new("test_txn_isolation").unwrap(), None)
                 .await
                 .unwrap();
 
@@ -2980,7 +3021,7 @@ pub(crate) mod test_helpers {
         let capture = TracingCapture::new();
         let mut txn = catalog.start_transaction().await.unwrap();
         txn.namespaces()
-            .create("test_txn_drop", None)
+            .create(&NamespaceName::new("test_txn_drop").unwrap(), None)
             .await
             .unwrap();
         drop(txn);
@@ -3009,7 +3050,10 @@ pub(crate) mod test_helpers {
     where
         R: RepoCollection + ?Sized,
     {
-        let namespace = repos.namespaces().create(namespace_name, None).await;
+        let namespace = repos
+            .namespaces()
+            .create(&NamespaceName::new(namespace_name).unwrap(), None)
+            .await;
 
         let namespace = match namespace {
             Ok(v) => v,
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 880e98e1f6..fa4cd9de9c 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -210,6 +210,7 @@ mod tests {
         interface::{get_schema_by_name, SoftDeletedRows},
         mem::MemCatalog,
     };
+    use data_types::NamespaceName;
 
     // Generate a test that simulates multiple, sequential writes in `lp` and
     // asserts the resulting schema.
@@ -231,6 +232,7 @@ mod tests {
                     use std::ops::DerefMut;
                     use pretty_assertions::assert_eq;
                     const NAMESPACE_NAME: &str = "bananas";
+                    let ns_name = NamespaceName::new(NAMESPACE_NAME).unwrap();
 
                     let metrics = Arc::new(metric::Registry::default());
                     let repo = MemCatalog::new(metrics);
@@ -238,7 +240,7 @@ mod tests {
 
                     let namespace = txn
                         .namespaces()
-                        .create(NAMESPACE_NAME, None)
+                        .create(&ns_name, None)
                         .await
                         .unwrap();
 
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index a67c21f1e1..ba889d080a 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -12,9 +12,9 @@ use crate::{
 };
 use async_trait::async_trait;
 use data_types::{
-    Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
-    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction,
-    Table, TableId, Timestamp,
+    Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceName,
+    ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey,
+    SkippedCompaction, Table, TableId, Timestamp,
 };
 use iox_time::{SystemProvider, TimeProvider};
 use observability_deps::tracing::warn;
@@ -217,10 +217,14 @@ impl RepoCollection for MemTxn {
 
 #[async_trait]
 impl NamespaceRepo for MemTxn {
-    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace> {
+    async fn create(
+        &mut self,
+        name: &NamespaceName,
+        retention_period_ns: Option<i64>,
+    ) -> Result<Namespace> {
         let stage = self.stage();
 
-        if stage.namespaces.iter().any(|n| n.name == name) {
+        if stage.namespaces.iter().any(|n| n.name == name.as_str()) {
             return Err(Error::NameExists {
                 name: name.to_string(),
             });
diff --git a/iox_catalog/src/metrics.rs b/iox_catalog/src/metrics.rs
index d4a899718d..988e321e82 100644
--- a/iox_catalog/src/metrics.rs
+++ b/iox_catalog/src/metrics.rs
@@ -6,9 +6,9 @@ use crate::interface::{
 };
 use async_trait::async_trait;
 use data_types::{
-    Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId,
-    ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction, Table, TableId,
-    Timestamp,
+    Column, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceName, ParquetFile,
+    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction,
+    Table, TableId, Timestamp,
 };
 use iox_time::{SystemProvider, TimeProvider};
 use metric::{DurationHistogram, Metric};
@@ -145,7 +145,7 @@ macro_rules! decorate {
 decorate!(
     impl_trait = NamespaceRepo,
     methods = [
-        "namespace_create" = create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace>;
+        "namespace_create" = create(&mut self, name: &NamespaceName, retention_period_ns: Option<i64>) -> Result<Namespace>;
         "namespace_update_retention_period" = update_retention_period(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace>;
         "namespace_list" = list(&mut self, deleted: SoftDeletedRows) -> Result<Vec<Namespace>>;
         "namespace_get_by_id" = get_by_id(&mut self, id: NamespaceId, deleted: SoftDeletedRows) -> Result<Option<Namespace>>;
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 9b83420963..b470aff6cd 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -16,9 +16,9 @@ use crate::{
 };
 use async_trait::async_trait;
 use data_types::{
-    Column, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId,
-    ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction, Table, TableId,
-    Timestamp,
+    Column, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceName, ParquetFile,
+    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction,
+    Table, TableId, Timestamp,
 };
 use iox_time::{SystemProvider, TimeProvider};
 use observability_deps::tracing::{debug, info, warn};
@@ -578,7 +578,11 @@ impl RepoCollection for PostgresTxn {
 
 #[async_trait]
 impl NamespaceRepo for PostgresTxn {
-    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace> {
+    async fn create(
+        &mut self,
+        name: &NamespaceName,
+        retention_period_ns: Option<i64>,
+    ) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
                 INSERT INTO namespace ( name, topic_id, query_pool_id, retention_period_ns, max_tables )
@@ -586,7 +590,7 @@ impl NamespaceRepo for PostgresTxn {
                 RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
             "#,
         )
-        .bind(name) // $1
+        .bind(name.as_str()) // $1
         .bind(SHARED_TOPIC_ID) // $2
         .bind(SHARED_QUERY_POOL_ID) // $3
         .bind(retention_period_ns) // $4
@@ -1822,7 +1826,7 @@ mod tests {
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None)
+            .create(&NamespaceName::new("ns4").unwrap(), None)
             .await
             .expect("namespace create failed")
             .id;
@@ -1960,7 +1964,7 @@ mod tests {
                         .repositories()
                         .await
                         .namespaces()
-                        .create("ns4", None)
+                        .create(&NamespaceName::new("ns4").unwrap(), None)
                         .await
                         .expect("namespace create failed")
                         .id;
@@ -2123,7 +2127,7 @@ mod tests {
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None)
+            .create(&NamespaceName::new("ns4").unwrap(), None)
             .await
             .expect("namespace create failed")
             .id;
diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs
index 8bed76f556..c7ff0ea9d8 100644
--- a/iox_catalog/src/sqlite.rs
+++ b/iox_catalog/src/sqlite.rs
@@ -16,9 +16,9 @@ use crate::{
 };
 use async_trait::async_trait;
 use data_types::{
-    Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile,
-    ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, SkippedCompaction,
-    Table, TableId, Timestamp,
+    Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId,
+    NamespaceName, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId,
+    PartitionKey, SkippedCompaction, Table, TableId, Timestamp,
 };
 use serde::{Deserialize, Serialize};
 use std::ops::Deref;
@@ -340,7 +340,11 @@ impl RepoCollection for SqliteTxn {
 
 #[async_trait]
 impl NamespaceRepo for SqliteTxn {
-    async fn create(&mut self, name: &str, retention_period_ns: Option<i64>) -> Result<Namespace> {
+    async fn create(
+        &mut self,
+        name: &NamespaceName,
+        retention_period_ns: Option<i64>,
+    ) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
 INSERT INTO namespace ( name, topic_id, query_pool_id, retention_period_ns, max_tables )
@@ -348,7 +352,7 @@ VALUES ( $1, $2, $3, $4, $5 )
 RETURNING id, name, retention_period_ns, max_tables, max_columns_per_table, deleted_at;
             "#,
         )
-        .bind(name) // $1
+        .bind(name.as_str()) // $1
         .bind(SHARED_TOPIC_ID) // $2
         .bind(SHARED_QUERY_POOL_ID) // $3
         .bind(retention_period_ns) // $4
@@ -1552,7 +1556,7 @@ mod tests {
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None)
+            .create(&NamespaceName::new("ns4").unwrap(), None)
             .await
             .expect("namespace create failed")
             .id;
@@ -1605,7 +1609,7 @@ mod tests {
                         .repositories()
                         .await
                         .namespaces()
-                        .create("ns4", None)
+                        .create(&NamespaceName::new("ns4").unwrap(), None)
                         .await
                         .expect("namespace create failed")
                         .id;
@@ -1767,7 +1771,7 @@ mod tests {
             .repositories()
             .await
             .namespaces()
-            .create("ns4", None)
+            .create(&NamespaceName::new("ns4").unwrap(), None)
             .await
             .expect("namespace create failed")
             .id;
diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs
index eaa6140484..859826caed 100644
--- a/iox_tests/src/catalog.rs
+++ b/iox_tests/src/catalog.rs
@@ -5,8 +5,9 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use data_types::{
-    Column, ColumnSet, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceSchema,
-    ParquetFile, ParquetFileParams, Partition, PartitionId, Table, TableId, TableSchema, Timestamp,
+    Column, ColumnSet, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceName,
+    NamespaceSchema, ParquetFile, ParquetFileParams, Partition, PartitionId, Table, TableId,
+    TableSchema, Timestamp,
 };
 use datafusion::physical_plan::metrics::Count;
 use datafusion_util::MemoryStream;
@@ -143,9 +144,10 @@ impl TestCatalog {
         retention_period_ns: Option<i64>,
     ) -> Arc<TestNamespace> {
         let mut repos = self.catalog.repositories().await;
+        let namespace_name = NamespaceName::new(name).unwrap();
         let namespace = repos
             .namespaces()
-            .create(name, retention_period_ns)
+            .create(&namespace_name, retention_period_ns)
             .await
             .unwrap();
 
diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index 9a2e98d14b..aba7a9e3bf 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -391,7 +391,11 @@ mod tests {
         let catalog = Arc::new(MemCatalog::new(Default::default()));
 
         let mut repos = catalog.repositories().await;
-        let namespace = repos.namespaces().create("test_ns", None).await.unwrap();
+        let namespace = repos
+            .namespaces()
+            .create(&NamespaceName::new("test_ns").unwrap(), None)
+            .await
+            .unwrap();
 
         let table = repos
             .tables()
diff --git a/router/src/namespace_resolver/ns_autocreation.rs b/router/src/namespace_resolver/ns_autocreation.rs
index 36d42099e7..0a1f16fe50 100644
--- a/router/src/namespace_resolver/ns_autocreation.rs
+++ b/router/src/namespace_resolver/ns_autocreation.rs
@@ -107,7 +107,7 @@ where
                         .repositories()
                         .await
                         .namespaces()
-                        .create(namespace.as_str(), retention_period_ns)
+                        .create(namespace, retention_period_ns)
                         .await
                     {
                         Ok(_) => {
diff --git a/router/tests/http.rs b/router/tests/http.rs
index 264c0b1a36..226a664735 100644
--- a/router/tests/http.rs
+++ b/router/tests/http.rs
@@ -270,7 +270,10 @@ async fn test_write_propagate_ids() {
         .repositories()
         .await
         .namespaces()
-        .create("bananas_test", None)
+        .create(
+            &data_types::NamespaceName::new("bananas_test").unwrap(),
+            None,
+        )
         .await
         .expect("failed to update table limit");
 
@@ -352,7 +355,10 @@ async fn test_delete_unsupported() {
         .repositories()
         .await
         .namespaces()
-        .create("bananas_test", None)
+        .create(
+            &data_types::NamespaceName::new("bananas_test").unwrap(),
+            None,
+        )
         .await
         .expect("failed to update table limit");
 
diff --git a/service_grpc_catalog/src/lib.rs b/service_grpc_catalog/src/lib.rs
index 1ee97856c4..bdbb2a5334 100644
--- a/service_grpc_catalog/src/lib.rs
+++ b/service_grpc_catalog/src/lib.rs
@@ -197,7 +197,9 @@ fn to_partition(p: data_types::Partition) -> Partition {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
+    use data_types::{
+        ColumnId, ColumnSet, CompactionLevel, NamespaceName, ParquetFileParams, Timestamp,
+    };
     use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService;
     use iox_catalog::mem::MemCatalog;
     use uuid::Uuid;
@@ -214,7 +216,7 @@ mod tests {
             let mut repos = catalog.repositories().await;
             let namespace = repos
                 .namespaces()
-                .create("catalog_partition_test", None)
+                .create(&NamespaceName::new("catalog_partition_test").unwrap(), None)
                 .await
                 .unwrap();
             let table = repos
@@ -277,7 +279,7 @@ mod tests {
             let mut repos = catalog.repositories().await;
             let namespace = repos
                 .namespaces()
-                .create("catalog_partition_test", None)
+                .create(&NamespaceName::new("catalog_partition_test").unwrap(), None)
                 .await
                 .unwrap();
             let table = repos
diff --git a/service_grpc_object_store/src/lib.rs b/service_grpc_object_store/src/lib.rs
index 8a92ac2ba9..293ea31ff3 100644
--- a/service_grpc_object_store/src/lib.rs
+++ b/service_grpc_object_store/src/lib.rs
@@ -96,7 +96,9 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
 mod tests {
     use super::*;
     use bytes::Bytes;
-    use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
+    use data_types::{
+        ColumnId, ColumnSet, CompactionLevel, NamespaceName, ParquetFileParams, Timestamp,
+    };
     use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService;
     use iox_catalog::mem::MemCatalog;
     use object_store::{memory::InMemory, ObjectStore};
@@ -112,7 +114,7 @@ mod tests {
             let mut repos = catalog.repositories().await;
             let namespace = repos
                 .namespaces()
-                .create("catalog_partition_test", None)
+                .create(&NamespaceName::new("catalog_partition_test").unwrap(), None)
                 .await
                 .unwrap();
             let table = repos
diff --git a/service_grpc_schema/src/lib.rs b/service_grpc_schema/src/lib.rs
index f359e57224..318417c473 100644
--- a/service_grpc_schema/src/lib.rs
+++ b/service_grpc_schema/src/lib.rs
@@ -81,7 +81,7 @@ fn schema_to_proto(schema: Arc<data_types::NamespaceSchema>) -> GetSchemaRespons
 #[cfg(test)]
 mod tests {
     use super::*;
-    use data_types::ColumnType;
+    use data_types::{ColumnType, NamespaceName};
     use generated_types::influxdata::iox::schema::v1::schema_service_server::SchemaService;
     use iox_catalog::mem::MemCatalog;
     use std::sync::Arc;
@@ -95,7 +95,7 @@ mod tests {
             let mut repos = catalog.repositories().await;
             let namespace = repos
                 .namespaces()
-                .create("namespace_schema_test", None)
+                .create(&NamespaceName::new("namespace_schema_test").unwrap(), None)
                 .await
                 .unwrap();
             let table = repos

From 160628a7f8cbc68f74f394f48a074c6b9f937fe9 Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Mon, 15 May 2023 12:30:11 +0200
Subject: [PATCH 113/119] refactor: impl intoIterator for ColumnsByName

Allows the ColumnsByName to be converted into an iterator yielding owned
column names & schema.
---
 data_types/src/columns.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index c713da0e26..0d993c5d1e 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -110,6 +110,15 @@ impl ColumnsByName {
     }
 }
 
+impl IntoIterator for ColumnsByName {
+    type Item = (String, ColumnSchema);
+    type IntoIter = std::collections::btree_map::IntoIter<String, ColumnSchema>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
 // ColumnsByName is a newtype so that we can implement this `TryFrom` in this crate
 impl TryFrom<ColumnsByName> for Schema {
     type Error = schema::builder::Error;

From 7f52959d29d73205229f166989b727df8b558677 Mon Sep 17 00:00:00 2001
From: Dom Dwyer <dom@itsallbroken.com>
Date: Mon, 15 May 2023 12:31:19 +0200
Subject: [PATCH 114/119] perf: move column names for Schema construction

When converting from a ColumnsByName into a schema::Schema instance,
move the column names instead of cloning them.
---
 data_types/src/columns.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data_types/src/columns.rs b/data_types/src/columns.rs
index 0d993c5d1e..ec120491df 100644
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@@ -126,7 +126,7 @@ impl TryFrom<ColumnsByName> for Schema {
     fn try_from(value: ColumnsByName) -> Result<Self, Self::Error> {
         let mut builder = SchemaBuilder::new();
 
-        for (column_name, column_schema) in value.iter() {
+        for (column_name, column_schema) in value.into_iter() {
             let t = InfluxColumnType::from(column_schema.column_type);
             builder.influx_column(column_name, t);
         }

From 7735e7c95b05e6fb8dade5d0df4db12a4076674f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <alamb@influxdata.com>
Date: Mon, 15 May 2023 08:38:45 -0400
Subject: [PATCH 115/119] chore: Update DataFusion again (#7777)

* chore: Update datafusion again

* chore: Run cargo hakari tasks

---------

Co-authored-by: CircleCI[bot] <circleci@influxdata.com>
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 Cargo.lock                                    | 36 +++++------
 Cargo.toml                                    |  4 +-
 datafusion_util/src/lib.rs                    | 15 +----
 iox_query/src/exec/seriesset/converter.rs     |  2 +-
 .../src/logical_optimizer/handle_gapfill.rs   | 36 ++++++-----
 .../influx_regex_to_datafusion_regex.rs       |  6 +-
 iox_query_influxql/src/plan/planner.rs        | 15 ++---
 iox_query_influxql/src/plan/util_copy.rs      | 63 +++++++++++--------
 query_functions/src/coalesce_struct.rs        |  4 +-
 workspace-hack/Cargo.toml                     |  6 +-
 10 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f20bdc27f5..afd82c56c4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1432,8 +1432,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1481,8 +1481,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1495,8 +1495,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "dashmap",
  "datafusion-common",
@@ -1512,8 +1512,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1523,8 +1523,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1540,8 +1540,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "ahash 0.8.3",
  "arrow",
@@ -1572,8 +1572,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "arrow",
  "chrono",
@@ -1586,8 +1586,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-row"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1597,8 +1597,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "23.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=06e9f53637f20dd91bef43b74942ec36c38c22d5#06e9f53637f20dd91bef43b74942ec36c38c22d5"
+version = "24.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=496fc399de700ae14fab436fdff8711cd3132436#496fc399de700ae14fab436fdff8711cd3132436"
 dependencies = [
  "arrow",
  "arrow-schema",
diff --git a/Cargo.toml b/Cargo.toml
index 5705441e4d..196459db6e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -115,8 +115,8 @@ license = "MIT OR Apache-2.0"
 [workspace.dependencies]
 arrow = { version = "38.0.0" }
 arrow-flight = { version = "38.0.0" }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="06e9f53637f20dd91bef43b74942ec36c38c22d5", default-features = false }
-datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="06e9f53637f20dd91bef43b74942ec36c38c22d5" }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="496fc399de700ae14fab436fdff8711cd3132436", default-features = false }
+datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="496fc399de700ae14fab436fdff8711cd3132436" }
 hashbrown = { version = "0.13.2" }
 parquet = { version = "38.0.0" }
 tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] }
diff --git a/datafusion_util/src/lib.rs b/datafusion_util/src/lib.rs
index c57c311910..967d0bac21 100644
--- a/datafusion_util/src/lib.rs
+++ b/datafusion_util/src/lib.rs
@@ -23,13 +23,10 @@ use datafusion::arrow::datatypes::{DataType, Fields};
 use datafusion::common::{DataFusionError, ToDFSchema};
 use datafusion::datasource::MemTable;
 use datafusion::execution::context::TaskContext;
-use datafusion::execution::memory_pool::UnboundedMemoryPool;
 use datafusion::logical_expr::expr::Sort;
 use datafusion::physical_expr::execution_props::ExecutionProps;
 use datafusion::physical_expr::{create_physical_expr, PhysicalExpr};
 use datafusion::physical_optimizer::pruning::PruningPredicate;
-use datafusion::physical_plan::common::SizedRecordBatchStream;
-use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MemTrackingMetrics};
 use datafusion::physical_plan::{collect, EmptyRecordBatchStream, ExecutionPlan};
 use datafusion::prelude::{lit, Column, Expr, SessionContext};
 use datafusion::{
@@ -245,24 +242,18 @@ where
 
 /// Create a SendableRecordBatchStream a RecordBatch
 pub fn stream_from_batch(schema: SchemaRef, batch: RecordBatch) -> SendableRecordBatchStream {
-    stream_from_batches(schema, vec![Arc::new(batch)])
+    stream_from_batches(schema, vec![batch])
 }
 
 /// Create a SendableRecordBatchStream from Vec of RecordBatches with the same schema
 pub fn stream_from_batches(
     schema: SchemaRef,
-    batches: Vec<Arc<RecordBatch>>,
+    batches: Vec<RecordBatch>,
 ) -> SendableRecordBatchStream {
     if batches.is_empty() {
         return Box::pin(EmptyRecordBatchStream::new(schema));
     }
-
-    // TODO should track this memory properly
-    let dummy_pool = Arc::new(UnboundedMemoryPool::default()) as _;
-    let dummy_metrics = ExecutionPlanMetricsSet::new();
-    let mem_metrics = MemTrackingMetrics::new(&dummy_metrics, &dummy_pool, 0);
-    let stream = SizedRecordBatchStream::new(batches[0].schema(), batches, mem_metrics);
-    Box::pin(stream)
+    Box::pin(MemoryStream::new_with_schema(batches, schema))
 }
 
 /// Create a SendableRecordBatchStream that sends back no RecordBatches with a specific schema
diff --git a/iox_query/src/exec/seriesset/converter.rs b/iox_query/src/exec/seriesset/converter.rs
index 8e3b58502c..f4a3ed5d85 100644
--- a/iox_query/src/exec/seriesset/converter.rs
+++ b/iox_query/src/exec/seriesset/converter.rs
@@ -1281,7 +1281,7 @@ mod tests {
             .map(|batches| {
                 let batches = batches
                     .into_iter()
-                    .map(|chunk| Arc::new(parse_to_record_batch(Arc::clone(&schema), &chunk)))
+                    .map(|chunk| parse_to_record_batch(Arc::clone(&schema), &chunk))
                     .collect::<Vec<_>>();
 
                 stream_from_batches(Arc::clone(&schema), batches)
diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs
index ebca4636d2..50be1a52b3 100644
--- a/iox_query/src/logical_optimizer/handle_gapfill.rs
+++ b/iox_query/src/logical_optimizer/handle_gapfill.rs
@@ -8,8 +8,9 @@ use datafusion::{
     common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter, VisitRecursion},
     error::{DataFusionError, Result},
     logical_expr::{
-        utils::expr_to_columns, Aggregate, BuiltinScalarFunction, Extension, LogicalPlan,
-        Projection,
+        expr::{ScalarFunction, ScalarUDF},
+        utils::expr_to_columns,
+        Aggregate, BuiltinScalarFunction, Extension, LogicalPlan, Projection,
     },
     optimizer::{optimizer::ApplyOrder, OptimizerConfig, OptimizerRule},
     prelude::{col, Expr},
@@ -330,7 +331,7 @@ impl TreeNodeRewriter for DateBinGapfillRewriter {
     type N = Expr;
     fn pre_visit(&mut self, expr: &Expr) -> Result<RewriteRecursion> {
         match expr {
-            Expr::ScalarUDF { fun, .. } if fun.name == DATE_BIN_GAPFILL_UDF_NAME => {
+            Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == DATE_BIN_GAPFILL_UDF_NAME => {
                 Ok(RewriteRecursion::Mutate)
             }
             _ => Ok(RewriteRecursion::Continue),
@@ -342,12 +343,12 @@ impl TreeNodeRewriter for DateBinGapfillRewriter {
         // so that everything stays wired up.
         let orig_name = expr.display_name()?;
         match expr {
-            Expr::ScalarUDF { fun, args } if fun.name == DATE_BIN_GAPFILL_UDF_NAME => {
+            Expr::ScalarUDF(ScalarUDF { fun, args }) if fun.name == DATE_BIN_GAPFILL_UDF_NAME => {
                 self.args = Some(args.clone());
-                Ok(Expr::ScalarFunction {
+                Ok(Expr::ScalarFunction(ScalarFunction {
                     fun: BuiltinScalarFunction::DateBin,
                     args,
-                }
+                })
                 .alias(orig_name))
             }
             _ => Ok(expr),
@@ -442,7 +443,7 @@ impl TreeNodeRewriter for FillFnRewriter {
     type N = Expr;
     fn pre_visit(&mut self, expr: &Expr) -> Result<RewriteRecursion> {
         match expr {
-            Expr::ScalarUDF { fun, .. } if udf_to_fill_strategy(&fun.name).is_some() => {
+            Expr::ScalarUDF(ScalarUDF { fun, .. }) if udf_to_fill_strategy(&fun.name).is_some() => {
                 Ok(RewriteRecursion::Mutate)
             }
             _ => Ok(RewriteRecursion::Continue),
@@ -452,10 +453,12 @@ impl TreeNodeRewriter for FillFnRewriter {
     fn mutate(&mut self, expr: Expr) -> Result<Expr> {
         let orig_name = expr.display_name()?;
         match expr {
-            Expr::ScalarUDF { ref fun, .. } if udf_to_fill_strategy(&fun.name).is_none() => {
+            Expr::ScalarUDF(ScalarUDF { ref fun, .. })
+                if udf_to_fill_strategy(&fun.name).is_none() =>
+            {
                 Ok(expr)
             }
-            Expr::ScalarUDF { fun, mut args } => {
+            Expr::ScalarUDF(ScalarUDF { fun, mut args }) => {
                 let fs = udf_to_fill_strategy(&fun.name).expect("must be a fill fn");
                 let arg = args.remove(0);
                 self.add_fill_strategy(arg.clone(), fs)?;
@@ -484,7 +487,7 @@ fn count_udf(e: &Expr, name: &str) -> Result<usize> {
     let mut count = 0;
     e.apply(&mut |expr| {
         match expr {
-            Expr::ScalarUDF { fun, .. } if fun.name == name => {
+            Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == name => {
                 count += 1;
             }
             _ => (),
@@ -522,6 +525,7 @@ mod test {
 
     use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
     use datafusion::error::Result;
+    use datafusion::logical_expr::expr::ScalarUDF;
     use datafusion::logical_expr::{logical_plan, LogicalPlan, LogicalPlanBuilder};
     use datafusion::optimizer::optimizer::Optimizer;
     use datafusion::optimizer::OptimizerContext;
@@ -562,24 +566,24 @@ mod test {
         if let Some(origin) = origin {
             args.push(origin)
         }
-        Ok(Expr::ScalarUDF {
+        Ok(Expr::ScalarUDF(ScalarUDF {
             fun: query_functions::registry().udf(DATE_BIN_GAPFILL_UDF_NAME)?,
             args,
-        })
+        }))
     }
 
     fn locf(arg: Expr) -> Result<Expr> {
-        Ok(Expr::ScalarUDF {
+        Ok(Expr::ScalarUDF(ScalarUDF {
             fun: query_functions::registry().udf(LOCF_UDF_NAME)?,
             args: vec![arg],
-        })
+        }))
     }
 
     fn interpolate(arg: Expr) -> Result<Expr> {
-        Ok(Expr::ScalarUDF {
+        Ok(Expr::ScalarUDF(ScalarUDF {
             fun: query_functions::registry().udf(INTERPOLATE_UDF_NAME)?,
             args: vec![arg],
-        })
+        }))
     }
 
     fn optimize(plan: &LogicalPlan) -> Result<Option<LogicalPlan>> {
diff --git a/iox_query/src/logical_optimizer/influx_regex_to_datafusion_regex.rs b/iox_query/src/logical_optimizer/influx_regex_to_datafusion_regex.rs
index 75ea9f92a6..7b857f420f 100644
--- a/iox_query/src/logical_optimizer/influx_regex_to_datafusion_regex.rs
+++ b/iox_query/src/logical_optimizer/influx_regex_to_datafusion_regex.rs
@@ -1,7 +1,7 @@
 use datafusion::{
     common::{tree_node::TreeNodeRewriter, DFSchema},
     error::DataFusionError,
-    logical_expr::{utils::from_plan, LogicalPlan, Operator},
+    logical_expr::{expr::ScalarUDF, utils::from_plan, LogicalPlan, Operator},
     optimizer::{utils::rewrite_preserving_name, OptimizerConfig, OptimizerRule},
     prelude::{binary_expr, lit, Expr},
     scalar::ScalarValue,
@@ -72,7 +72,7 @@ impl TreeNodeRewriter for InfluxRegexToDataFusionRegex {
 
     fn mutate(&mut self, expr: Expr) -> Result<Expr, DataFusionError> {
         match expr {
-            Expr::ScalarUDF { fun, mut args } => {
+            Expr::ScalarUDF(ScalarUDF { fun, mut args }) => {
                 if (args.len() == 2)
                     && ((fun.name == REGEX_MATCH_UDF_NAME)
                         || (fun.name == REGEX_NOT_MATCH_UDF_NAME))
@@ -88,7 +88,7 @@ impl TreeNodeRewriter for InfluxRegexToDataFusionRegex {
                     }
                 }
 
-                Ok(Expr::ScalarUDF { fun, args })
+                Ok(Expr::ScalarUDF(ScalarUDF { fun, args }))
             }
             _ => Ok(expr),
         }
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index 1d7bb246ca..b61409d992 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -20,6 +20,7 @@ use chrono_tz::Tz;
 use datafusion::catalog::TableReference;
 use datafusion::common::{DFSchema, DFSchemaRef, Result, ScalarValue, ToDFSchema};
 use datafusion::datasource::{provider_as_source, MemTable};
+use datafusion::logical_expr::expr::ScalarFunction;
 use datafusion::logical_expr::expr_rewriter::normalize_col;
 use datafusion::logical_expr::logical_plan::builder::project;
 use datafusion::logical_expr::logical_plan::Analyze;
@@ -617,10 +618,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             && fill_option != FillClause::None
         {
             let args = match select_exprs[time_column_index].clone().unalias() {
-                Expr::ScalarFunction {
+                Expr::ScalarFunction(ScalarFunction {
                     fun: BuiltinScalarFunction::DateBin,
                     args,
-                } => args,
+                }) => args,
                 _ => {
                     // The InfluxQL planner adds the `date_bin` function,
                     // so this condition represents an internal failure.
@@ -1159,13 +1160,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                 if args.len() != 2 {
                     error::query("invalid number of arguments for log, expected 2, got 1")
                 } else {
-                    Ok(Expr::ScalarFunction {
+                    Ok(Expr::ScalarFunction(ScalarFunction {
                         fun: BuiltinScalarFunction::Log,
                         args: args.into_iter().rev().collect(),
-                    })
+                    }))
                 }
             }
-            fun => Ok(Expr::ScalarFunction { fun, args }),
+            fun => Ok(Expr::ScalarFunction(ScalarFunction { fun, args })),
         }
     }
 
@@ -1411,7 +1412,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                         // - not null if it had any non-null values
                         //
                         // note that since we only have a single row, this is efficient
-                        .project([Expr::ScalarFunction {
+                        .project([Expr::ScalarFunction(ScalarFunction {
                             fun: BuiltinScalarFunction::MakeArray,
                             args: tags
                                 .iter()
@@ -1421,7 +1422,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                                     when(tag_col.gt(lit(0)), lit(*tag)).end()
                                 })
                                 .collect::<Result<Vec<_>, _>>()?,
-                        }
+                        })
                         .alias(tag_key_col)])?
                         // roll our single array row into one row per tag key
                         .unnest_column(tag_key_df_col)?
diff --git a/iox_query_influxql/src/plan/util_copy.rs b/iox_query_influxql/src/plan/util_copy.rs
index 2ab65be451..ca3d4dc449 100644
--- a/iox_query_influxql/src/plan/util_copy.rs
+++ b/iox_query_influxql/src/plan/util_copy.rs
@@ -8,6 +8,9 @@
 //!
 //! NOTE
 use datafusion::common::Result;
+use datafusion::logical_expr::expr::{
+    AggregateUDF, InList, InSubquery, Placeholder, ScalarFunction, ScalarUDF,
+};
 use datafusion::logical_expr::{
     expr::{
         AggregateFunction, Between, BinaryExpr, Case, Cast, Expr, GetIndexedField, GroupingSet,
@@ -84,14 +87,16 @@ where
                         .collect::<Result<Vec<_>>>()?,
                     window_frame.clone(),
                 ))),
-                Expr::AggregateUDF { fun, args, filter } => Ok(Expr::AggregateUDF {
-                    fun: fun.clone(),
-                    args: args
-                        .iter()
-                        .map(|e| clone_with_replacement(e, replacement_fn))
-                        .collect::<Result<Vec<Expr>>>()?,
-                    filter: filter.clone(),
-                }),
+                Expr::AggregateUDF(AggregateUDF { fun, args, filter }) => {
+                    Ok(Expr::AggregateUDF(AggregateUDF {
+                        fun: fun.clone(),
+                        args: args
+                            .iter()
+                            .map(|e| clone_with_replacement(e, replacement_fn))
+                            .collect::<Result<Vec<Expr>>>()?,
+                        filter: filter.clone(),
+                    }))
+                }
                 Expr::Alias(nested_expr, alias_name) => Ok(Expr::Alias(
                     Box::new(clone_with_replacement(nested_expr, replacement_fn)?),
                     alias_name.clone(),
@@ -107,18 +112,18 @@ where
                     Box::new(clone_with_replacement(low, replacement_fn)?),
                     Box::new(clone_with_replacement(high, replacement_fn)?),
                 ))),
-                Expr::InList {
+                Expr::InList(InList {
                     expr: nested_expr,
                     list,
                     negated,
-                } => Ok(Expr::InList {
+                }) => Ok(Expr::InList(InList {
                     expr: Box::new(clone_with_replacement(nested_expr, replacement_fn)?),
                     list: list
                         .iter()
                         .map(|e| clone_with_replacement(e, replacement_fn))
                         .collect::<Result<Vec<Expr>>>()?,
                     negated: *negated,
-                }),
+                })),
                 Expr::BinaryExpr(BinaryExpr { left, right, op }) => {
                     Ok(Expr::BinaryExpr(BinaryExpr::new(
                         Box::new(clone_with_replacement(left, replacement_fn)?),
@@ -182,20 +187,22 @@ where
                         None => None,
                     },
                 ))),
-                Expr::ScalarFunction { fun, args } => Ok(Expr::ScalarFunction {
-                    fun: fun.clone(),
-                    args: args
-                        .iter()
-                        .map(|e| clone_with_replacement(e, replacement_fn))
-                        .collect::<Result<Vec<Expr>>>()?,
-                }),
-                Expr::ScalarUDF { fun, args } => Ok(Expr::ScalarUDF {
+                Expr::ScalarFunction(ScalarFunction { fun, args }) => {
+                    Ok(Expr::ScalarFunction(ScalarFunction {
+                        fun: fun.clone(),
+                        args: args
+                            .iter()
+                            .map(|e| clone_with_replacement(e, replacement_fn))
+                            .collect::<Result<Vec<Expr>>>()?,
+                    }))
+                }
+                Expr::ScalarUDF(ScalarUDF { fun, args }) => Ok(Expr::ScalarUDF(ScalarUDF {
                     fun: fun.clone(),
                     args: args
                         .iter()
                         .map(|arg| clone_with_replacement(arg, replacement_fn))
                         .collect::<Result<Vec<Expr>>>()?,
-                }),
+                })),
                 Expr::Negative(nested_expr) => Ok(Expr::Negative(Box::new(
                     clone_with_replacement(nested_expr, replacement_fn)?,
                 ))),
@@ -256,15 +263,15 @@ where
                 | Expr::ScalarVariable(_, _)
                 | Expr::Exists { .. }
                 | Expr::ScalarSubquery(_) => Ok(expr.clone()),
-                Expr::InSubquery {
+                Expr::InSubquery(InSubquery {
                     expr: nested_expr,
                     subquery,
                     negated,
-                } => Ok(Expr::InSubquery {
+                }) => Ok(Expr::InSubquery(InSubquery {
                     expr: Box::new(clone_with_replacement(nested_expr, replacement_fn)?),
                     subquery: subquery.clone(),
                     negated: *negated,
-                }),
+                })),
                 Expr::Wildcard => Ok(Expr::Wildcard),
                 Expr::QualifiedWildcard { .. } => Ok(expr.clone()),
                 Expr::GetIndexedField(GetIndexedField { key, expr }) => {
@@ -301,10 +308,12 @@ where
                         )))
                     }
                 },
-                Expr::Placeholder { id, data_type } => Ok(Expr::Placeholder {
-                    id: id.clone(),
-                    data_type: data_type.clone(),
-                }),
+                Expr::Placeholder(Placeholder { id, data_type }) => {
+                    Ok(Expr::Placeholder(Placeholder {
+                        id: id.clone(),
+                        data_type: data_type.clone(),
+                    }))
+                }
             }
         }
     }
diff --git a/query_functions/src/coalesce_struct.rs b/query_functions/src/coalesce_struct.rs
index c2077d6fb6..33a43a2d5b 100644
--- a/query_functions/src/coalesce_struct.rs
+++ b/query_functions/src/coalesce_struct.rs
@@ -181,10 +181,10 @@ fn scalar_coalesce_struct(scalar1: ScalarValue, scalar2: &ScalarValue) -> Scalar
 ///
 /// See [module-level docs](self) for more information.
 pub fn coalesce_struct(args: Vec<Expr>) -> Expr {
-    Expr::ScalarUDF {
+    Expr::ScalarUDF(datafusion::logical_expr::expr::ScalarUDF {
         fun: Arc::clone(&COALESCE_STRUCT_UDF),
         args,
-    }
+    })
 }
 
 #[cfg(test)]
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 6986b872e2..1a173969b0 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -30,9 +30,9 @@ bytes = { version = "1" }
 chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
 crossbeam-utils = { version = "0.8" }
 crypto-common = { version = "0.1", default-features = false, features = ["std"] }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "06e9f53637f20dd91bef43b74942ec36c38c22d5" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "06e9f53637f20dd91bef43b74942ec36c38c22d5", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "06e9f53637f20dd91bef43b74942ec36c38c22d5", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "496fc399de700ae14fab436fdff8711cd3132436" }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "496fc399de700ae14fab436fdff8711cd3132436", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "496fc399de700ae14fab436fdff8711cd3132436", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
 digest = { version = "0.10", features = ["mac", "std"] }
 either = { version = "1" }
 fixedbitset = { version = "0.4" }

From ea3eaee05616619da413d64504ecabd231fb4031 Mon Sep 17 00:00:00 2001
From: Gavin Cabbage <gavincabbage@users.noreply.github.com>
Date: Mon, 15 May 2023 12:47:24 -0400
Subject: [PATCH 116/119] chore: accept database in addition to namespace_name
 (#7788)

* chore: accept database in addition to namespace_name

* chore: accept bucket and bucket-name as well

* test: additional test for sql request namespace aliases
---
 service_grpc_flight/src/request.rs | 36 ++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/service_grpc_flight/src/request.rs b/service_grpc_flight/src/request.rs
index ea5b2e9391..b3093d1ddb 100644
--- a/service_grpc_flight/src/request.rs
+++ b/service_grpc_flight/src/request.rs
@@ -183,6 +183,7 @@ impl IoxGetRequest {
         /// This represents ths JSON fields
         #[derive(Deserialize, Debug)]
         struct ReadInfoJson {
+            #[serde(alias = "database", alias = "bucket", alias = "bucket-name")]
             namespace_name: String,
             sql_query: String,
             // If query type is not supplied, defaults to SQL
@@ -350,6 +351,41 @@ mod tests {
                 "my_otherdb",
                 "SHOW DATABASES;",
             ),
+            TestCase::new_influxql(
+                r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
+                "my_otherdb",
+                "SHOW DATABASES;",
+            ),
+            // influxql bucket metadata
+            TestCase::new_influxql(
+                r#"{"bucket": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
+                "my_otherdb",
+                "SHOW DATABASES;",
+            ),
+            // influxql bucket-name metadata
+            TestCase::new_influxql(
+                r#"{"bucket-name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
+                "my_otherdb",
+                "SHOW DATABASES;",
+            ),
+            // sql database metadata
+            TestCase::new_sql(
+                r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
+                "my_db",
+                "SELECT 1;",
+            ),
+            // sql bucket metadata
+            TestCase::new_sql(
+                r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
+                "my_db",
+                "SELECT 1;",
+            ),
+            // sql bucket-name metadata
+            TestCase::new_sql(
+                r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
+                "my_db",
+                "SELECT 1;",
+            ),
         ];
 
         for TestCase { json, expected } in cases {

From 57bedb1c2d66ec25590aeaf0572d676e16cc8e3b Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 15 May 2023 12:02:45 -0400
Subject: [PATCH 117/119] refactor: Extract a test helper function to create a
 basic namespace

---
 garbage_collector/src/objectstore/checker.rs  |  15 +-
 .../aggregate_tsm_schema/update_catalog.rs    |  20 +--
 .../buffer_tree/partition/resolver/catalog.rs |   8 +-
 ingester/src/test_util.rs                     |  10 +-
 ingester_test_ctx/src/lib.rs                  |  14 +-
 iox_catalog/src/interface.rs                  | 147 ++++--------------
 iox_catalog/src/lib.rs                        |  39 ++++-
 iox_catalog/src/postgres.rs                   |  74 +++------
 iox_catalog/src/sqlite.rs                     |  76 +++------
 ioxd_router/src/lib.rs                        |   8 +-
 router/tests/http.rs                          |  14 +-
 service_grpc_catalog/src/lib.rs               |  18 +--
 service_grpc_object_store/src/lib.rs          |  12 +-
 service_grpc_schema/src/lib.rs                |  10 +-
 14 files changed, 127 insertions(+), 338 deletions(-)

diff --git a/garbage_collector/src/objectstore/checker.rs b/garbage_collector/src/objectstore/checker.rs
index a820f0a498..51e08545e9 100644
--- a/garbage_collector/src/objectstore/checker.rs
+++ b/garbage_collector/src/objectstore/checker.rs
@@ -137,10 +137,10 @@ mod tests {
     use super::*;
     use chrono::TimeZone;
     use data_types::{
-        ColumnId, ColumnSet, CompactionLevel, NamespaceId, NamespaceName, ParquetFile,
-        ParquetFileParams, PartitionId, TableId, Timestamp,
+        ColumnId, ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileParams,
+        PartitionId, TableId, Timestamp,
     };
-    use iox_catalog::{interface::Catalog, mem::MemCatalog};
+    use iox_catalog::{interface::Catalog, mem::MemCatalog, test_helpers::arbitrary_namespace};
     use object_store::path::Path;
     use once_cell::sync::Lazy;
     use parquet_file::ParquetFilePath;
@@ -155,14 +155,7 @@ mod tests {
         let metric_registry = Arc::new(metric::Registry::new());
         let catalog = Arc::new(MemCatalog::new(Arc::clone(&metric_registry)));
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_parquet_file_test").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(&mut *repos, "namespace_parquet_file_test").await;
         let table = repos
             .tables()
             .create_or_get("test_table", namespace.id)
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index 0813baadfd..cf2172dd2e 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -345,7 +345,7 @@ mod tests {
     use crate::{AggregateTSMField, AggregateTSMTag};
     use assert_matches::assert_matches;
     use data_types::{PartitionId, TableId};
-    use iox_catalog::mem::MemCatalog;
+    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
     use std::collections::HashSet;
 
     #[tokio::test]
@@ -428,11 +428,7 @@ mod tests {
             .await
             .expect("started transaction");
         // create namespace, table and columns for weather measurement
-        let namespace = txn
-            .namespaces()
-            .create(&NamespaceName::new("1234_5678").unwrap(), None)
-            .await
-            .expect("namespace created");
+        let namespace = arbitrary_namespace(&mut *txn, "1234_5678").await;
         let mut table = txn
             .tables()
             .create_or_get("weather", namespace.id)
@@ -520,11 +516,7 @@ mod tests {
             .await
             .expect("started transaction");
         // create namespace, table and columns for weather measurement
-        let namespace = txn
-            .namespaces()
-            .create(&NamespaceName::new("1234_5678").unwrap(), None)
-            .await
-            .expect("namespace created");
+        let namespace = arbitrary_namespace(&mut *txn, "1234_5678").await;
         let mut table = txn
             .tables()
             .create_or_get("weather", namespace.id)
@@ -585,11 +577,7 @@ mod tests {
             .expect("started transaction");
 
         // create namespace, table and columns for weather measurement
-        let namespace = txn
-            .namespaces()
-            .create(&NamespaceName::new("1234_5678").unwrap(), None)
-            .await
-            .expect("namespace created");
+        let namespace = arbitrary_namespace(&mut *txn, "1234_5678").await;
         let mut table = txn
             .tables()
             .create_or_get("weather", namespace.id)
diff --git a/ingester/src/buffer_tree/partition/resolver/catalog.rs b/ingester/src/buffer_tree/partition/resolver/catalog.rs
index 86aa2767d4..e783f507bd 100644
--- a/ingester/src/buffer_tree/partition/resolver/catalog.rs
+++ b/ingester/src/buffer_tree/partition/resolver/catalog.rs
@@ -99,6 +99,7 @@ mod tests {
     use std::{sync::Arc, time::Duration};
 
     use assert_matches::assert_matches;
+    use iox_catalog::test_helpers::arbitrary_namespace;
 
     use super::*;
 
@@ -114,12 +115,7 @@ mod tests {
 
         let (namespace_id, table_id) = {
             let mut repos = catalog.repositories().await;
-            let table_ns_name = data_types::NamespaceName::new(TABLE_NAME).unwrap();
-            let ns = repos
-                .namespaces()
-                .create(&table_ns_name, None)
-                .await
-                .unwrap();
+            let ns = arbitrary_namespace(&mut *repos, NAMESPACE_NAME).await;
 
             let table = repos
                 .tables()
diff --git a/ingester/src/test_util.rs b/ingester/src/test_util.rs
index 6920ab7c08..2bda8c2721 100644
--- a/ingester/src/test_util.rs
+++ b/ingester/src/test_util.rs
@@ -2,7 +2,7 @@ use std::{collections::BTreeMap, sync::Arc, time::Duration};
 
 use data_types::{NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, TableId};
 use dml::{DmlMeta, DmlWrite};
-use iox_catalog::interface::Catalog;
+use iox_catalog::{interface::Catalog, test_helpers::arbitrary_namespace};
 use lazy_static::lazy_static;
 use mutable_batch_lp::lines_to_batches;
 use schema::Projection;
@@ -298,13 +298,7 @@ pub(crate) async fn populate_catalog(
     table: &str,
 ) -> (NamespaceId, TableId) {
     let mut c = catalog.repositories().await;
-    let namespace_name = data_types::NamespaceName::new(namespace).unwrap();
-    let ns_id = c
-        .namespaces()
-        .create(&namespace_name, None)
-        .await
-        .unwrap()
-        .id;
+    let ns_id = arbitrary_namespace(&mut *c, namespace).await.id;
     let table_id = c.tables().create_or_get(table, ns_id).await.unwrap().id;
 
     (ns_id, table_id)
diff --git a/ingester_test_ctx/src/lib.rs b/ingester_test_ctx/src/lib.rs
index 47f4860b1e..d3f7f68f44 100644
--- a/ingester_test_ctx/src/lib.rs
+++ b/ingester_test_ctx/src/lib.rs
@@ -17,8 +17,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
 use arrow::record_batch::RecordBatch;
 use arrow_flight::{decode::FlightRecordBatchStream, flight_service_server::FlightService, Ticket};
 use data_types::{
-    Namespace, NamespaceId, NamespaceName, NamespaceSchema, ParquetFile, PartitionKey,
-    SequenceNumber, TableId,
+    Namespace, NamespaceId, NamespaceSchema, ParquetFile, PartitionKey, SequenceNumber, TableId,
 };
 use dml::{DmlMeta, DmlWrite};
 use futures::{stream::FuturesUnordered, FutureExt, StreamExt, TryStreamExt};
@@ -29,6 +28,7 @@ use ingester::{IngesterGuard, IngesterRpcInterface};
 use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
 use iox_catalog::{
     interface::{Catalog, SoftDeletedRows},
+    test_helpers::arbitrary_namespace,
     validate_or_insert_schema,
 };
 use iox_time::TimeProvider;
@@ -203,14 +203,8 @@ where
         name: &str,
         retention_period_ns: Option<i64>,
     ) -> Namespace {
-        let ns = self
-            .catalog
-            .repositories()
-            .await
-            .namespaces()
-            .create(&NamespaceName::new(name).unwrap(), None)
-            .await
-            .expect("failed to create test namespace");
+        let mut repos = self.catalog.repositories().await;
+        let ns = arbitrary_namespace(&mut *repos, name).await;
 
         assert!(
             self.namespaces
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index ccba28e803..15be8c4913 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -736,7 +736,10 @@ pub async fn list_schemas(
 
 #[cfg(test)]
 pub(crate) mod test_helpers {
-    use crate::{validate_or_insert_schema, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES};
+    use crate::{
+        test_helpers::arbitrary_namespace, validate_or_insert_schema,
+        DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
+    };
 
     use super::*;
     use ::test_helpers::{assert_contains, tracing::TracingCapture};
@@ -846,12 +849,7 @@ pub(crate) mod test_helpers {
             .unwrap();
         assert!(not_found.is_none());
 
-        let namespace2_name = NamespaceName::new("test_namespace2").unwrap();
-        let namespace2 = repos
-            .namespaces()
-            .create(&namespace2_name, None)
-            .await
-            .unwrap();
+        let namespace2 = arbitrary_namespace(&mut *repos, "test_namespace2").await;
         let mut namespaces = repos
             .namespaces()
             .list(SoftDeletedRows::ExcludeDeleted)
@@ -894,13 +892,8 @@ pub(crate) mod test_helpers {
             .expect("namespace should be updateable");
         assert!(modified.retention_period_ns.is_none());
 
-        // create namespace with retention period NULL
-        let namespace3_name = NamespaceName::new("test_namespace3").unwrap();
-        let namespace3 = repos
-            .namespaces()
-            .create(&namespace3_name, None)
-            .await
-            .expect("namespace with NULL retention should be created");
+        // create namespace with retention period NULL (the default)
+        let namespace3 = arbitrary_namespace(&mut *repos, "test_namespace3").await;
         assert!(namespace3.retention_period_ns.is_none());
 
         // create namespace with retention period
@@ -954,16 +947,8 @@ pub(crate) mod test_helpers {
     async fn test_namespace_soft_deletion(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
 
-        let deleted_ns = repos
-            .namespaces()
-            .create(&"deleted-ns".try_into().unwrap(), None)
-            .await
-            .unwrap();
-        let active_ns = repos
-            .namespaces()
-            .create(&"active-ns".try_into().unwrap(), None)
-            .await
-            .unwrap();
+        let deleted_ns = arbitrary_namespace(&mut *repos, "deleted-ns").await;
+        let active_ns = arbitrary_namespace(&mut *repos, "active-ns").await;
 
         // Mark "deleted-ns" as soft-deleted.
         repos.namespaces().soft_delete("deleted-ns").await.unwrap();
@@ -1117,11 +1102,7 @@ pub(crate) mod test_helpers {
 
     async fn test_table(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(&NamespaceName::new("namespace_table_test").unwrap(), None)
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(&mut *repos, "namespace_table_test").await;
 
         // test we can create or get a table
         let t = repos
@@ -1154,11 +1135,7 @@ pub(crate) mod test_helpers {
         assert_eq!(vec![t.clone()], tables);
 
         // test we can create a table of the same name in a different namespace
-        let namespace2 = repos
-            .namespaces()
-            .create(&NamespaceName::new("two").unwrap(), None)
-            .await
-            .unwrap();
+        let namespace2 = arbitrary_namespace(&mut *repos, "two").await;
         assert_ne!(namespace, namespace2);
         let test_table = repos
             .tables()
@@ -1254,11 +1231,7 @@ pub(crate) mod test_helpers {
 
     async fn test_column(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(&NamespaceName::new("namespace_column_test").unwrap(), None)
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(&mut *repos, "namespace_column_test").await;
         let table = repos
             .tables()
             .create_or_get("test_table", namespace.id)
@@ -1387,14 +1360,7 @@ pub(crate) mod test_helpers {
 
     async fn test_partition(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_partition_test").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(&mut *repos, "namespace_partition_test").await;
         let table = repos
             .tables()
             .create_or_get("test_table", namespace.id)
@@ -1672,14 +1638,7 @@ pub(crate) mod test_helpers {
     /// tests many interactions with the catalog and parquet files. See the individual conditions herein
     async fn test_parquet_file(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_parquet_file_test").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(&mut *repos, "namespace_parquet_file_test").await;
         let table = repos
             .tables()
             .create_or_get("test_table", namespace.id)
@@ -1860,14 +1819,7 @@ pub(crate) mod test_helpers {
         assert_eq!(files.len(), 1);
 
         // test list_by_namespace_not_to_delete
-        let namespace2 = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_parquet_file_test1").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace2 = arbitrary_namespace(&mut *repos, "namespace_parquet_file_test1").await;
         let table2 = repos
             .tables()
             .create_or_get("test_table2", namespace2.id)
@@ -2086,11 +2038,7 @@ pub(crate) mod test_helpers {
 
     async fn test_parquet_file_delete_broken(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace_1 = repos
-            .namespaces()
-            .create(&NamespaceName::new("retention_broken_1").unwrap(), None)
-            .await
-            .unwrap();
+        let namespace_1 = arbitrary_namespace(&mut *repos, "retention_broken_1").await;
         let namespace_2 = repos
             .namespaces()
             .create(&NamespaceName::new("retention_broken_2").unwrap(), Some(1))
@@ -2166,14 +2114,7 @@ pub(crate) mod test_helpers {
 
     async fn test_partitions_new_file_between(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("test_partitions_new_file_between").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(&mut *repos, "test_partitions_new_file_between").await;
         let table = repos
             .tables()
             .create_or_get("test_table_for_new_file_between", namespace.id)
@@ -2535,15 +2476,11 @@ pub(crate) mod test_helpers {
 
     async fn test_list_by_partiton_not_to_delete(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_parquet_file_test_list_by_partiton_not_to_delete")
-                    .unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(
+            &mut *repos,
+            "namespace_parquet_file_test_list_by_partiton_not_to_delete",
+        )
+        .await;
         let table = repos
             .tables()
             .create_or_get("test_table", namespace.id)
@@ -2646,14 +2583,8 @@ pub(crate) mod test_helpers {
 
     async fn test_update_to_compaction_level_1(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_update_to_compaction_level_1_test").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace =
+            arbitrary_namespace(&mut *repos, "namespace_update_to_compaction_level_1_test").await;
         let table = repos
             .tables()
             .create_or_get("update_table", namespace.id)
@@ -2735,14 +2666,8 @@ pub(crate) mod test_helpers {
     /// effective.
     async fn test_delete_namespace(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
-        let namespace_1 = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_test_delete_namespace_1").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace_1 =
+            arbitrary_namespace(&mut *repos, "namespace_test_delete_namespace_1").await;
         let table_1 = repos
             .tables()
             .create_or_get("test_table_1", namespace_1.id)
@@ -2793,14 +2718,8 @@ pub(crate) mod test_helpers {
 
         // we've now created a namespace with a table and parquet files. before we test deleting
         // it, let's create another so we can ensure that doesn't get deleted.
-        let namespace_2 = repos
-            .namespaces()
-            .create(
-                &NamespaceName::new("namespace_test_delete_namespace_2").unwrap(),
-                None,
-            )
-            .await
-            .unwrap();
+        let namespace_2 =
+            arbitrary_namespace(&mut *repos, "namespace_test_delete_namespace_2").await;
         let table_2 = repos
             .tables()
             .create_or_get("test_table_2", namespace_2.id)
@@ -2983,10 +2902,7 @@ pub(crate) mod test_helpers {
             barrier_captured.wait().await;
 
             let mut txn = catalog_captured.start_transaction().await.unwrap();
-            txn.namespaces()
-                .create(&NamespaceName::new("test_txn_isolation").unwrap(), None)
-                .await
-                .unwrap();
+            arbitrary_namespace(&mut *txn, "test_txn_isolation").await;
 
             tokio::time::sleep(Duration::from_millis(200)).await;
             txn.abort().await.unwrap();
@@ -3020,10 +2936,7 @@ pub(crate) mod test_helpers {
     async fn test_txn_drop(catalog: Arc<dyn Catalog>) {
         let capture = TracingCapture::new();
         let mut txn = catalog.start_transaction().await.unwrap();
-        txn.namespaces()
-            .create(&NamespaceName::new("test_txn_drop").unwrap(), None)
-            .await
-            .unwrap();
+        arbitrary_namespace(&mut *txn, "test_txn_drop").await;
         drop(txn);
 
         // got a warning
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index fa4cd9de9c..7d728debc6 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -201,6 +201,34 @@ where
     Ok(())
 }
 
+/// Catalog helper functions for creation of catalog objects
+pub mod test_helpers {
+    use crate::RepoCollection;
+    use data_types::{Namespace, NamespaceName};
+
+    /// When the details of the namespace don't matter; the test just needs *a* catalog namespace
+    /// with a particular name.
+    ///
+    /// Use [`NamespaceRepo::create`] directly if:
+    ///
+    /// - The values of the parameters to `create` need to be different than what's here
+    /// - The values of the parameters to `create` are relevant to the behavior under test
+    /// - You expect namespace creation to fail in the test
+    ///
+    /// [`NamespaceRepo::create`]: crate::interface::NamespaceRepo::create
+    pub async fn arbitrary_namespace<R: RepoCollection + ?Sized>(
+        repos: &mut R,
+        name: &str,
+    ) -> Namespace {
+        let namespace_name = NamespaceName::new(name).unwrap();
+        repos
+            .namespaces()
+            .create(&namespace_name, None)
+            .await
+            .unwrap()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::{collections::BTreeMap, sync::Arc};
@@ -210,7 +238,6 @@ mod tests {
         interface::{get_schema_by_name, SoftDeletedRows},
         mem::MemCatalog,
     };
-    use data_types::NamespaceName;
 
     // Generate a test that simulates multiple, sequential writes in `lp` and
     // asserts the resulting schema.
@@ -228,21 +255,17 @@ mod tests {
                 #[allow(clippy::bool_assert_comparison)]
                 #[tokio::test]
                 async fn [<test_validate_schema_ $name>]() {
-                    use crate::interface::Catalog;
+                    use crate::{interface::Catalog, test_helpers::arbitrary_namespace};
                     use std::ops::DerefMut;
                     use pretty_assertions::assert_eq;
                     const NAMESPACE_NAME: &str = "bananas";
-                    let ns_name = NamespaceName::new(NAMESPACE_NAME).unwrap();
 
                     let metrics = Arc::new(metric::Registry::default());
                     let repo = MemCatalog::new(metrics);
                     let mut txn = repo.start_transaction().await.unwrap();
 
-                    let namespace = txn
-                        .namespaces()
-                        .create(&ns_name, None)
-                        .await
-                        .unwrap();
+                    let namespace = arbitrary_namespace(&mut *txn, NAMESPACE_NAME)
+                        .await;
 
                     let schema = NamespaceSchema::new_empty_from(&namespace);
 
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index b470aff6cd..5462008ee4 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -1638,6 +1638,7 @@ fn is_fk_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::test_helpers::arbitrary_namespace;
     use assert_matches::assert_matches;
     use data_types::{ColumnId, ColumnSet};
     use metric::{Attributes, DurationHistogram, Metric};
@@ -1821,18 +1822,10 @@ mod tests {
 
         let postgres = setup_db().await;
         let postgres: Arc<dyn Catalog> = Arc::new(postgres);
+        let mut repos = postgres.repositories().await;
 
-        let namespace_id = postgres
-            .repositories()
-            .await
-            .namespaces()
-            .create(&NamespaceName::new("ns4").unwrap(), None)
-            .await
-            .expect("namespace create failed")
-            .id;
-        let table_id = postgres
-            .repositories()
-            .await
+        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
+        let table_id = repos
             .tables()
             .create_or_get("table", namespace_id)
             .await
@@ -1841,18 +1834,14 @@ mod tests {
 
         let key = "bananas";
 
-        let a = postgres
-            .repositories()
-            .await
+        let a = repos
             .partitions()
             .create_or_get(key.into(), table_id)
             .await
             .expect("should create OK");
 
         // Call create_or_get for the same (key, table_id) pair, to ensure the write is idempotent.
-        let b = postgres
-            .repositories()
-            .await
+        let b = repos
             .partitions()
             .create_or_get(key.into(), table_id)
             .await
@@ -1959,18 +1948,12 @@ mod tests {
                     let postgres = setup_db().await;
                     let metrics = Arc::clone(&postgres.metrics);
                     let postgres: Arc<dyn Catalog> = Arc::new(postgres);
+                    let mut repos = postgres.repositories().await;
 
-                    let namespace_id = postgres
-                        .repositories()
+                    let namespace_id = arbitrary_namespace(&mut *repos, "ns4")
                         .await
-                        .namespaces()
-                        .create(&NamespaceName::new("ns4").unwrap(), None)
-                        .await
-                        .expect("namespace create failed")
                         .id;
-                    let table_id = postgres
-                        .repositories()
-                        .await
+                    let table_id = repos
                         .tables()
                         .create_or_get("table", namespace_id)
                         .await
@@ -1983,9 +1966,7 @@ mod tests {
                             insert.insert($col_name, $col_type);
                         )+
 
-                        let got = postgres
-                            .repositories()
-                            .await
+                        let got = repos
                             .columns()
                             .create_or_get_many_unchecked(table_id, insert.clone())
                             .await;
@@ -2122,18 +2103,9 @@ mod tests {
         let postgres = setup_db().await;
         let pool = postgres.pool.clone();
         let postgres: Arc<dyn Catalog> = Arc::new(postgres);
-
-        let namespace_id = postgres
-            .repositories()
-            .await
-            .namespaces()
-            .create(&NamespaceName::new("ns4").unwrap(), None)
-            .await
-            .expect("namespace create failed")
-            .id;
-        let table_id = postgres
-            .repositories()
-            .await
+        let mut repos = postgres.repositories().await;
+        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
+        let table_id = repos
             .tables()
             .create_or_get("table", namespace_id)
             .await
@@ -2142,9 +2114,7 @@ mod tests {
 
         let key = "bananas";
 
-        let partition_id = postgres
-            .repositories()
-            .await
+        let partition_id = repos
             .partitions()
             .create_or_get(key.into(), table_id)
             .await
@@ -2169,9 +2139,7 @@ mod tests {
             column_set: ColumnSet::new([ColumnId::new(1), ColumnId::new(2)]),
             max_l0_created_at: time_now,
         };
-        let f1 = postgres
-            .repositories()
-            .await
+        let f1 = repos
             .parquet_files()
             .create(p1.clone())
             .await
@@ -2179,9 +2147,7 @@ mod tests {
         // insert the same again with a different size; we should then have 3x1337 as total file size
         p1.object_store_id = Uuid::new_v4();
         p1.file_size_bytes *= 2;
-        let _f2 = postgres
-            .repositories()
-            .await
+        let _f2 = repos
             .parquet_files()
             .create(p1.clone())
             .await
@@ -2196,9 +2162,7 @@ mod tests {
         assert_eq!(total_file_size_bytes, 1337 * 3);
 
         // flag f1 for deletion and assert that the total file size is reduced accordingly.
-        postgres
-            .repositories()
-            .await
+        repos
             .parquet_files()
             .flag_for_delete(f1.id)
             .await
@@ -2213,9 +2177,7 @@ mod tests {
 
         // actually deleting shouldn't change the total
         let now = Timestamp::from(time_provider.now());
-        postgres
-            .repositories()
-            .await
+        repos
             .parquet_files()
             .delete_old_ids_only(now)
             .await
diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs
index c7ff0ea9d8..a16a4ee2e5 100644
--- a/iox_catalog/src/sqlite.rs
+++ b/iox_catalog/src/sqlite.rs
@@ -1508,6 +1508,7 @@ fn is_unique_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::test_helpers::arbitrary_namespace;
     use assert_matches::assert_matches;
     use metric::{Attributes, DurationHistogram, Metric};
     use std::sync::Arc;
@@ -1549,20 +1550,11 @@ mod tests {
     #[tokio::test]
     async fn test_partition_create_or_get_idempotent() {
         let sqlite = setup_db().await;
-
         let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+        let mut repos = sqlite.repositories().await;
 
-        let namespace_id = sqlite
-            .repositories()
-            .await
-            .namespaces()
-            .create(&NamespaceName::new("ns4").unwrap(), None)
-            .await
-            .expect("namespace create failed")
-            .id;
-        let table_id = sqlite
-            .repositories()
-            .await
+        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
+        let table_id = repos
             .tables()
             .create_or_get("table", namespace_id)
             .await
@@ -1571,18 +1563,14 @@ mod tests {
 
         let key = "bananas";
 
-        let a = sqlite
-            .repositories()
-            .await
+        let a = repos
             .partitions()
             .create_or_get(key.into(), table_id)
             .await
             .expect("should create OK");
 
         // Call create_or_get for the same (key, table_id) pair, to ensure the write is idempotent.
-        let b = sqlite
-            .repositories()
-            .await
+        let b = repos
             .partitions()
             .create_or_get(key.into(), table_id)
             .await
@@ -1602,20 +1590,13 @@ mod tests {
                 async fn [<test_column_create_or_get_many_unchecked_ $name>]() {
                     let sqlite = setup_db().await;
                     let metrics = Arc::clone(&sqlite.metrics);
-
                     let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+                    let mut repos = sqlite.repositories().await;
 
-                    let namespace_id = sqlite
-                        .repositories()
+                    let namespace_id = arbitrary_namespace(&mut *repos, "ns4")
                         .await
-                        .namespaces()
-                        .create(&NamespaceName::new("ns4").unwrap(), None)
-                        .await
-                        .expect("namespace create failed")
                         .id;
-                    let table_id = sqlite
-                        .repositories()
-                        .await
+                    let table_id = repos
                         .tables()
                         .create_or_get("table", namespace_id)
                         .await
@@ -1628,9 +1609,7 @@ mod tests {
                             insert.insert($col_name, $col_type);
                         )+
 
-                        let got = sqlite
-                            .repositories()
-                            .await
+                        let got = repos
                             .columns()
                             .create_or_get_many_unchecked(table_id, insert.clone())
                             .await;
@@ -1764,20 +1743,11 @@ mod tests {
     async fn test_billing_summary_on_parqet_file_creation() {
         let sqlite = setup_db().await;
         let pool = sqlite.pool.clone();
-
         let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+        let mut repos = sqlite.repositories().await;
 
-        let namespace_id = sqlite
-            .repositories()
-            .await
-            .namespaces()
-            .create(&NamespaceName::new("ns4").unwrap(), None)
-            .await
-            .expect("namespace create failed")
-            .id;
-        let table_id = sqlite
-            .repositories()
-            .await
+        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
+        let table_id = repos
             .tables()
             .create_or_get("table", namespace_id)
             .await
@@ -1786,9 +1756,7 @@ mod tests {
 
         let key = "bananas";
 
-        let partition_id = sqlite
-            .repositories()
-            .await
+        let partition_id = repos
             .partitions()
             .create_or_get(key.into(), table_id)
             .await
@@ -1813,9 +1781,7 @@ mod tests {
             column_set: ColumnSet::new([ColumnId::new(1), ColumnId::new(2)]),
             max_l0_created_at: time_now,
         };
-        let f1 = sqlite
-            .repositories()
-            .await
+        let f1 = repos
             .parquet_files()
             .create(p1.clone())
             .await
@@ -1823,9 +1789,7 @@ mod tests {
         // insert the same again with a different size; we should then have 3x1337 as total file size
         p1.object_store_id = Uuid::new_v4();
         p1.file_size_bytes *= 2;
-        let _f2 = sqlite
-            .repositories()
-            .await
+        let _f2 = repos
             .parquet_files()
             .create(p1.clone())
             .await
@@ -1840,9 +1804,7 @@ mod tests {
         assert_eq!(total_file_size_bytes, 1337 * 3);
 
         // flag f1 for deletion and assert that the total file size is reduced accordingly.
-        sqlite
-            .repositories()
-            .await
+        repos
             .parquet_files()
             .flag_for_delete(f1.id)
             .await
@@ -1857,9 +1819,7 @@ mod tests {
 
         // actually deleting shouldn't change the total
         let now = Timestamp::from(time_provider.now());
-        sqlite
-            .repositories()
-            .await
+        repos
             .parquet_files()
             .delete_old_ids_only(now)
             .await
diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index aba7a9e3bf..839015639a 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -382,7 +382,7 @@ where
 #[cfg(test)]
 mod tests {
     use data_types::ColumnType;
-    use iox_catalog::mem::MemCatalog;
+    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
 
     use super::*;
 
@@ -391,11 +391,7 @@ mod tests {
         let catalog = Arc::new(MemCatalog::new(Default::default()));
 
         let mut repos = catalog.repositories().await;
-        let namespace = repos
-            .namespaces()
-            .create(&NamespaceName::new("test_ns").unwrap(), None)
-            .await
-            .unwrap();
+        let namespace = arbitrary_namespace(&mut *repos, "test_ns").await;
 
         let table = repos
             .tables()
diff --git a/router/tests/http.rs b/router/tests/http.rs
index 226a664735..5b5cb65dcd 100644
--- a/router/tests/http.rs
+++ b/router/tests/http.rs
@@ -5,7 +5,7 @@ use futures::{stream::FuturesUnordered, StreamExt};
 use generated_types::influxdata::{iox::ingester::v1::WriteRequest, pbdata::v1::DatabaseBatch};
 use hashbrown::HashMap;
 use hyper::{Body, Request, StatusCode};
-use iox_catalog::interface::SoftDeletedRows;
+use iox_catalog::{interface::SoftDeletedRows, test_helpers::arbitrary_namespace};
 use iox_time::{SystemProvider, TimeProvider};
 use metric::{Attributes, DurationHistogram, Metric, U64Counter};
 use router::dml_handlers::{DmlError, RetentionError, SchemaError};
@@ -265,17 +265,7 @@ async fn test_write_propagate_ids() {
         .await;
 
     // Create the namespace and a set of tables.
-    let ns = ctx
-        .catalog()
-        .repositories()
-        .await
-        .namespaces()
-        .create(
-            &data_types::NamespaceName::new("bananas_test").unwrap(),
-            None,
-        )
-        .await
-        .expect("failed to update table limit");
+    let ns = arbitrary_namespace(&mut *ctx.catalog().repositories().await, "bananas_test").await;
 
     let catalog = ctx.catalog();
     let ids = ["another", "test", "table", "platanos"]
diff --git a/service_grpc_catalog/src/lib.rs b/service_grpc_catalog/src/lib.rs
index bdbb2a5334..9cadc8a75f 100644
--- a/service_grpc_catalog/src/lib.rs
+++ b/service_grpc_catalog/src/lib.rs
@@ -197,11 +197,9 @@ fn to_partition(p: data_types::Partition) -> Partition {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use data_types::{
-        ColumnId, ColumnSet, CompactionLevel, NamespaceName, ParquetFileParams, Timestamp,
-    };
+    use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
     use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService;
-    use iox_catalog::mem::MemCatalog;
+    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
     use uuid::Uuid;
 
     #[tokio::test]
@@ -214,11 +212,7 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let namespace = repos
-                .namespaces()
-                .create(&NamespaceName::new("catalog_partition_test").unwrap(), None)
-                .await
-                .unwrap();
+            let namespace = arbitrary_namespace(&mut *repos, "catalog_partition_test").await;
             let table = repos
                 .tables()
                 .create_or_get("schema_test_table", namespace.id)
@@ -277,11 +271,7 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let namespace = repos
-                .namespaces()
-                .create(&NamespaceName::new("catalog_partition_test").unwrap(), None)
-                .await
-                .unwrap();
+            let namespace = arbitrary_namespace(&mut *repos, "catalog_partition_test").await;
             let table = repos
                 .tables()
                 .create_or_get("schema_test_table", namespace.id)
diff --git a/service_grpc_object_store/src/lib.rs b/service_grpc_object_store/src/lib.rs
index 293ea31ff3..39883f6097 100644
--- a/service_grpc_object_store/src/lib.rs
+++ b/service_grpc_object_store/src/lib.rs
@@ -96,11 +96,9 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService {
 mod tests {
     use super::*;
     use bytes::Bytes;
-    use data_types::{
-        ColumnId, ColumnSet, CompactionLevel, NamespaceName, ParquetFileParams, Timestamp,
-    };
+    use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
     use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService;
-    use iox_catalog::mem::MemCatalog;
+    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
     use object_store::{memory::InMemory, ObjectStore};
     use uuid::Uuid;
 
@@ -112,11 +110,7 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let namespace = repos
-                .namespaces()
-                .create(&NamespaceName::new("catalog_partition_test").unwrap(), None)
-                .await
-                .unwrap();
+            let namespace = arbitrary_namespace(&mut *repos, "catalog_partition_test").await;
             let table = repos
                 .tables()
                 .create_or_get("schema_test_table", namespace.id)
diff --git a/service_grpc_schema/src/lib.rs b/service_grpc_schema/src/lib.rs
index 318417c473..171029b2f9 100644
--- a/service_grpc_schema/src/lib.rs
+++ b/service_grpc_schema/src/lib.rs
@@ -81,9 +81,9 @@ fn schema_to_proto(schema: Arc<data_types::NamespaceSchema>) -> GetSchemaRespons
 #[cfg(test)]
 mod tests {
     use super::*;
-    use data_types::{ColumnType, NamespaceName};
+    use data_types::ColumnType;
     use generated_types::influxdata::iox::schema::v1::schema_service_server::SchemaService;
-    use iox_catalog::mem::MemCatalog;
+    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
     use std::sync::Arc;
 
     #[tokio::test]
@@ -93,11 +93,7 @@ mod tests {
             let metrics = Arc::new(metric::Registry::default());
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
-            let namespace = repos
-                .namespaces()
-                .create(&NamespaceName::new("namespace_schema_test").unwrap(), None)
-                .await
-                .unwrap();
+            let namespace = arbitrary_namespace(&mut *repos, "namespace_schema_test").await;
             let table = repos
                 .tables()
                 .create_or_get("schema_test_table", namespace.id)

From 7268ea5c29690b38dc05186e0ce25391850f63a6 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Mon, 15 May 2023 13:39:44 -0400
Subject: [PATCH 118/119] refactor: Extract a test helper function to create a
 basic table

---
 garbage_collector/src/objectstore/checker.rs  |  12 +-
 .../aggregate_tsm_schema/update_catalog.rs    |  29 ++---
 .../buffer_tree/partition/resolver/catalog.rs |   9 +-
 iox_catalog/src/interface.rs                  | 113 ++++--------------
 iox_catalog/src/lib.rs                        |  24 +++-
 iox_catalog/src/postgres.rs                   |  31 ++---
 iox_catalog/src/sqlite.rs                     |  31 ++---
 iox_tests/src/catalog.rs                      |   7 +-
 ioxd_router/src/lib.rs                        |  11 +-
 service_grpc_catalog/src/lib.rs               |  17 +--
 service_grpc_object_store/src/lib.rs          |  11 +-
 service_grpc_schema/src/lib.rs                |  11 +-
 12 files changed, 104 insertions(+), 202 deletions(-)

diff --git a/garbage_collector/src/objectstore/checker.rs b/garbage_collector/src/objectstore/checker.rs
index 51e08545e9..bab02cb001 100644
--- a/garbage_collector/src/objectstore/checker.rs
+++ b/garbage_collector/src/objectstore/checker.rs
@@ -140,7 +140,11 @@ mod tests {
         ColumnId, ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileParams,
         PartitionId, TableId, Timestamp,
     };
-    use iox_catalog::{interface::Catalog, mem::MemCatalog, test_helpers::arbitrary_namespace};
+    use iox_catalog::{
+        interface::Catalog,
+        mem::MemCatalog,
+        test_helpers::{arbitrary_namespace, arbitrary_table},
+    };
     use object_store::path::Path;
     use once_cell::sync::Lazy;
     use parquet_file::ParquetFilePath;
@@ -156,11 +160,7 @@ mod tests {
         let catalog = Arc::new(MemCatalog::new(Arc::clone(&metric_registry)));
         let mut repos = catalog.repositories().await;
         let namespace = arbitrary_namespace(&mut *repos, "namespace_parquet_file_test").await;
-        let table = repos
-            .tables()
-            .create_or_get("test_table", namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, "test_table", &namespace).await;
         let partition = repos
             .partitions()
             .create_or_get("one".into(), table.id)
diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs
index cf2172dd2e..1c501a0996 100644
--- a/import/src/aggregate_tsm_schema/update_catalog.rs
+++ b/import/src/aggregate_tsm_schema/update_catalog.rs
@@ -345,7 +345,10 @@ mod tests {
     use crate::{AggregateTSMField, AggregateTSMTag};
     use assert_matches::assert_matches;
     use data_types::{PartitionId, TableId};
-    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
+    use iox_catalog::{
+        mem::MemCatalog,
+        test_helpers::{arbitrary_namespace, arbitrary_table},
+    };
     use std::collections::HashSet;
 
     #[tokio::test]
@@ -429,12 +432,8 @@ mod tests {
             .expect("started transaction");
         // create namespace, table and columns for weather measurement
         let namespace = arbitrary_namespace(&mut *txn, "1234_5678").await;
-        let mut table = txn
-            .tables()
-            .create_or_get("weather", namespace.id)
-            .await
-            .map(|t| TableSchema::new_empty_from(&t))
-            .expect("table created");
+        let table = arbitrary_table(&mut *txn, "weather", &namespace).await;
+        let mut table = TableSchema::new_empty_from(&table);
         let time_col = txn
             .columns()
             .create_or_get("time", table.id, ColumnType::Time)
@@ -517,12 +516,8 @@ mod tests {
             .expect("started transaction");
         // create namespace, table and columns for weather measurement
         let namespace = arbitrary_namespace(&mut *txn, "1234_5678").await;
-        let mut table = txn
-            .tables()
-            .create_or_get("weather", namespace.id)
-            .await
-            .map(|t| TableSchema::new_empty_from(&t))
-            .expect("table created");
+        let table = arbitrary_table(&mut *txn, "weather", &namespace).await;
+        let mut table = TableSchema::new_empty_from(&table);
         let time_col = txn
             .columns()
             .create_or_get("time", table.id, ColumnType::Time)
@@ -578,12 +573,8 @@ mod tests {
 
         // create namespace, table and columns for weather measurement
         let namespace = arbitrary_namespace(&mut *txn, "1234_5678").await;
-        let mut table = txn
-            .tables()
-            .create_or_get("weather", namespace.id)
-            .await
-            .map(|t| TableSchema::new_empty_from(&t))
-            .expect("table created");
+        let table = arbitrary_table(&mut *txn, "weather", &namespace).await;
+        let mut table = TableSchema::new_empty_from(&table);
         let time_col = txn
             .columns()
             .create_or_get("time", table.id, ColumnType::Time)
diff --git a/ingester/src/buffer_tree/partition/resolver/catalog.rs b/ingester/src/buffer_tree/partition/resolver/catalog.rs
index e783f507bd..a709aa3a31 100644
--- a/ingester/src/buffer_tree/partition/resolver/catalog.rs
+++ b/ingester/src/buffer_tree/partition/resolver/catalog.rs
@@ -99,7 +99,7 @@ mod tests {
     use std::{sync::Arc, time::Duration};
 
     use assert_matches::assert_matches;
-    use iox_catalog::test_helpers::arbitrary_namespace;
+    use iox_catalog::test_helpers::{arbitrary_namespace, arbitrary_table};
 
     use super::*;
 
@@ -117,11 +117,8 @@ mod tests {
             let mut repos = catalog.repositories().await;
             let ns = arbitrary_namespace(&mut *repos, NAMESPACE_NAME).await;
 
-            let table = repos
-                .tables()
-                .create_or_get(TABLE_NAME, ns.id)
-                .await
-                .unwrap();
+            let table = arbitrary_table(&mut *repos, TABLE_NAME, &ns)
+                .await;
 
             (ns.id, table.id)
         };
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 15be8c4913..08f344a40c 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -737,8 +737,8 @@ pub async fn list_schemas(
 #[cfg(test)]
 pub(crate) mod test_helpers {
     use crate::{
-        test_helpers::arbitrary_namespace, validate_or_insert_schema,
-        DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
+        test_helpers::{arbitrary_namespace, arbitrary_table},
+        validate_or_insert_schema, DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES,
     };
 
     use super::*;
@@ -1105,16 +1105,8 @@ pub(crate) mod test_helpers {
         let namespace = arbitrary_namespace(&mut *repos, "namespace_table_test").await;
 
         // test we can create or get a table
-        let t = repos
-            .tables()
-            .create_or_get("test_table", namespace.id)
-            .await
-            .unwrap();
-        let tt = repos
-            .tables()
-            .create_or_get("test_table", namespace.id)
-            .await
-            .unwrap();
+        let t = arbitrary_table(&mut *repos, "test_table", &namespace).await;
+        let tt = arbitrary_table(&mut *repos, "test_table", &namespace).await;
         assert!(t.id > TableId::new(0));
         assert_eq!(t, tt);
 
@@ -1137,20 +1129,12 @@ pub(crate) mod test_helpers {
         // test we can create a table of the same name in a different namespace
         let namespace2 = arbitrary_namespace(&mut *repos, "two").await;
         assert_ne!(namespace, namespace2);
-        let test_table = repos
-            .tables()
-            .create_or_get("test_table", namespace2.id)
-            .await
-            .unwrap();
+        let test_table = arbitrary_table(&mut *repos, "test_table", &namespace2).await;
         assert_ne!(tt, test_table);
         assert_eq!(test_table.namespace_id, namespace2.id);
 
         // test get by namespace and name
-        let foo_table = repos
-            .tables()
-            .create_or_get("foo", namespace2.id)
-            .await
-            .unwrap();
+        let foo_table = arbitrary_table(&mut *repos, "foo", &namespace2).await;
         assert_eq!(
             repos
                 .tables()
@@ -1232,11 +1216,7 @@ pub(crate) mod test_helpers {
     async fn test_column(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
         let namespace = arbitrary_namespace(&mut *repos, "namespace_column_test").await;
-        let table = repos
-            .tables()
-            .create_or_get("test_table", namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, "test_table", &namespace).await;
         assert_eq!(table.namespace_id, namespace.id);
 
         // test we can create or get a column
@@ -1263,11 +1243,7 @@ pub(crate) mod test_helpers {
         assert!(matches!(err, Error::ColumnTypeMismatch { .. }));
 
         // test that we can create a column of the same name under a different table
-        let table2 = repos
-            .tables()
-            .create_or_get("test_table_2", namespace.id)
-            .await
-            .unwrap();
+        let table2 = arbitrary_table(&mut *repos, "test_table_2", &namespace).await;
         let ccc = repos
             .columns()
             .create_or_get("column_test", table2.id, ColumnType::U64)
@@ -1334,11 +1310,7 @@ pub(crate) mod test_helpers {
         ));
 
         // test per-namespace column limits are NOT enforced with create_or_get_many_unchecked
-        let table3 = repos
-            .tables()
-            .create_or_get("test_table_3", namespace.id)
-            .await
-            .unwrap();
+        let table3 = arbitrary_table(&mut *repos, "test_table_3", &namespace).await;
         let mut columns = HashMap::new();
         columns.insert("apples", ColumnType::Tag);
         columns.insert("oranges", ColumnType::Tag);
@@ -1361,11 +1333,7 @@ pub(crate) mod test_helpers {
     async fn test_partition(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
         let namespace = arbitrary_namespace(&mut *repos, "namespace_partition_test").await;
-        let table = repos
-            .tables()
-            .create_or_get("test_table", namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, "test_table", &namespace).await;
 
         let mut created = BTreeMap::new();
         for key in ["foo", "bar"] {
@@ -1639,16 +1607,8 @@ pub(crate) mod test_helpers {
     async fn test_parquet_file(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
         let namespace = arbitrary_namespace(&mut *repos, "namespace_parquet_file_test").await;
-        let table = repos
-            .tables()
-            .create_or_get("test_table", namespace.id)
-            .await
-            .unwrap();
-        let other_table = repos
-            .tables()
-            .create_or_get("other", namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, "test_table", &namespace).await;
+        let other_table = arbitrary_table(&mut *repos, "other", &namespace).await;
         let partition = repos
             .partitions()
             .create_or_get("one".into(), table.id)
@@ -1820,11 +1780,7 @@ pub(crate) mod test_helpers {
 
         // test list_by_namespace_not_to_delete
         let namespace2 = arbitrary_namespace(&mut *repos, "namespace_parquet_file_test1").await;
-        let table2 = repos
-            .tables()
-            .create_or_get("test_table2", namespace2.id)
-            .await
-            .unwrap();
+        let table2 = arbitrary_table(&mut *repos, "test_table2", &namespace2).await;
         let partition2 = repos
             .partitions()
             .create_or_get("foo".into(), table2.id)
@@ -2044,16 +2000,8 @@ pub(crate) mod test_helpers {
             .create(&NamespaceName::new("retention_broken_2").unwrap(), Some(1))
             .await
             .unwrap();
-        let table_1 = repos
-            .tables()
-            .create_or_get("test_table", namespace_1.id)
-            .await
-            .unwrap();
-        let table_2 = repos
-            .tables()
-            .create_or_get("test_table", namespace_2.id)
-            .await
-            .unwrap();
+        let table_1 = arbitrary_table(&mut *repos, "test_table", &namespace_1).await;
+        let table_2 = arbitrary_table(&mut *repos, "test_table", &namespace_2).await;
         let partition_1 = repos
             .partitions()
             .create_or_get("one".into(), table_1.id)
@@ -2115,11 +2063,8 @@ pub(crate) mod test_helpers {
     async fn test_partitions_new_file_between(catalog: Arc<dyn Catalog>) {
         let mut repos = catalog.repositories().await;
         let namespace = arbitrary_namespace(&mut *repos, "test_partitions_new_file_between").await;
-        let table = repos
-            .tables()
-            .create_or_get("test_table_for_new_file_between", namespace.id)
-            .await
-            .unwrap();
+        let table =
+            arbitrary_table(&mut *repos, "test_table_for_new_file_between", &namespace).await;
 
         // param for the tests
         let time_now = Timestamp::from(catalog.time_provider().now());
@@ -2481,11 +2426,7 @@ pub(crate) mod test_helpers {
             "namespace_parquet_file_test_list_by_partiton_not_to_delete",
         )
         .await;
-        let table = repos
-            .tables()
-            .create_or_get("test_table", namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, "test_table", &namespace).await;
 
         let partition = repos
             .partitions()
@@ -2585,11 +2526,7 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace =
             arbitrary_namespace(&mut *repos, "namespace_update_to_compaction_level_1_test").await;
-        let table = repos
-            .tables()
-            .create_or_get("update_table", namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, "update_table", &namespace).await;
         let partition = repos
             .partitions()
             .create_or_get("test_update_to_compaction_level_1_one".into(), table.id)
@@ -2668,11 +2605,7 @@ pub(crate) mod test_helpers {
         let mut repos = catalog.repositories().await;
         let namespace_1 =
             arbitrary_namespace(&mut *repos, "namespace_test_delete_namespace_1").await;
-        let table_1 = repos
-            .tables()
-            .create_or_get("test_table_1", namespace_1.id)
-            .await
-            .unwrap();
+        let table_1 = arbitrary_table(&mut *repos, "test_table_1", &namespace_1).await;
         let _c = repos
             .columns()
             .create_or_get("column_test_1", table_1.id, ColumnType::Tag)
@@ -2720,11 +2653,7 @@ pub(crate) mod test_helpers {
         // it, let's create another so we can ensure that doesn't get deleted.
         let namespace_2 =
             arbitrary_namespace(&mut *repos, "namespace_test_delete_namespace_2").await;
-        let table_2 = repos
-            .tables()
-            .create_or_get("test_table_2", namespace_2.id)
-            .await
-            .unwrap();
+        let table_2 = arbitrary_table(&mut *repos, "test_table_2", &namespace_2).await;
         let _c = repos
             .columns()
             .create_or_get("column_test_2", table_2.id, ColumnType::Tag)
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 7d728debc6..f03c73b088 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -204,7 +204,7 @@ where
 /// Catalog helper functions for creation of catalog objects
 pub mod test_helpers {
     use crate::RepoCollection;
-    use data_types::{Namespace, NamespaceName};
+    use data_types::{Namespace, NamespaceName, Table};
 
     /// When the details of the namespace don't matter; the test just needs *a* catalog namespace
     /// with a particular name.
@@ -227,6 +227,28 @@ pub mod test_helpers {
             .await
             .unwrap()
     }
+
+    /// When the details of the table don't matter; the test just needs *a* catalog table
+    /// with a particular name in a particular namespace.
+    ///
+    /// Use [`TableRepo::create_or_get`] directly if:
+    ///
+    /// - The values of the parameters to `create_or_get` need to be different than what's here
+    /// - The values of the parameters to `create_or_get` are relevant to the behavior under test
+    /// - You expect table creation to fail in the test
+    ///
+    /// [`TableRepo::create_or_get`]: crate::interface::TableRepo::create_or_get
+    pub async fn arbitrary_table<R: RepoCollection + ?Sized>(
+        repos: &mut R,
+        name: &str,
+        namespace: &Namespace,
+    ) -> Table {
+        repos
+            .tables()
+            .create_or_get(name, namespace.id)
+            .await
+            .unwrap()
+    }
 }
 
 #[cfg(test)]
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 5462008ee4..e1150f5b24 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -1638,7 +1638,7 @@ fn is_fk_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::test_helpers::arbitrary_namespace;
+    use crate::test_helpers::{arbitrary_namespace, arbitrary_table};
     use assert_matches::assert_matches;
     use data_types::{ColumnId, ColumnSet};
     use metric::{Attributes, DurationHistogram, Metric};
@@ -1824,13 +1824,8 @@ mod tests {
         let postgres: Arc<dyn Catalog> = Arc::new(postgres);
         let mut repos = postgres.repositories().await;
 
-        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
-        let table_id = repos
-            .tables()
-            .create_or_get("table", namespace_id)
-            .await
-            .expect("create table failed")
-            .id;
+        let namespace = arbitrary_namespace(&mut *repos, "ns4").await;
+        let table_id = arbitrary_table(&mut *repos, "table", &namespace).await.id;
 
         let key = "bananas";
 
@@ -1950,15 +1945,11 @@ mod tests {
                     let postgres: Arc<dyn Catalog> = Arc::new(postgres);
                     let mut repos = postgres.repositories().await;
 
-                    let namespace_id = arbitrary_namespace(&mut *repos, "ns4")
+                    let namespace = arbitrary_namespace(&mut *repos, "ns4")
+                        .await;
+                    let table_id = arbitrary_table(&mut *repos, "table", &namespace)
                         .await
                         .id;
-                    let table_id = repos
-                        .tables()
-                        .create_or_get("table", namespace_id)
-                        .await
-                        .expect("create table failed")
-                        .id;
 
                     $(
                         let mut insert = HashMap::new();
@@ -2104,13 +2095,9 @@ mod tests {
         let pool = postgres.pool.clone();
         let postgres: Arc<dyn Catalog> = Arc::new(postgres);
         let mut repos = postgres.repositories().await;
-        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
-        let table_id = repos
-            .tables()
-            .create_or_get("table", namespace_id)
-            .await
-            .expect("create table failed")
-            .id;
+        let namespace = arbitrary_namespace(&mut *repos, "ns4").await;
+        let namespace_id = namespace.id;
+        let table_id = arbitrary_table(&mut *repos, "table", &namespace).await.id;
 
         let key = "bananas";
 
diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs
index a16a4ee2e5..3b294b5f6b 100644
--- a/iox_catalog/src/sqlite.rs
+++ b/iox_catalog/src/sqlite.rs
@@ -1508,7 +1508,7 @@ fn is_unique_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::test_helpers::arbitrary_namespace;
+    use crate::test_helpers::{arbitrary_namespace, arbitrary_table};
     use assert_matches::assert_matches;
     use metric::{Attributes, DurationHistogram, Metric};
     use std::sync::Arc;
@@ -1553,13 +1553,8 @@ mod tests {
         let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
         let mut repos = sqlite.repositories().await;
 
-        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
-        let table_id = repos
-            .tables()
-            .create_or_get("table", namespace_id)
-            .await
-            .expect("create table failed")
-            .id;
+        let namespace = arbitrary_namespace(&mut *repos, "ns4").await;
+        let table_id = arbitrary_table(&mut *repos, "table", &namespace).await.id;
 
         let key = "bananas";
 
@@ -1593,15 +1588,11 @@ mod tests {
                     let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
                     let mut repos = sqlite.repositories().await;
 
-                    let namespace_id = arbitrary_namespace(&mut *repos, "ns4")
+                    let namespace = arbitrary_namespace(&mut *repos, "ns4")
+                        .await;
+                    let table_id = arbitrary_table(&mut *repos, "table", &namespace)
                         .await
                         .id;
-                    let table_id = repos
-                        .tables()
-                        .create_or_get("table", namespace_id)
-                        .await
-                        .expect("create table failed")
-                        .id;
 
                     $(
                         let mut insert = HashMap::new();
@@ -1746,13 +1737,9 @@ mod tests {
         let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
         let mut repos = sqlite.repositories().await;
 
-        let namespace_id = arbitrary_namespace(&mut *repos, "ns4").await.id;
-        let table_id = repos
-            .tables()
-            .create_or_get("table", namespace_id)
-            .await
-            .expect("create table failed")
-            .id;
+        let namespace = arbitrary_namespace(&mut *repos, "ns4").await;
+        let namespace_id = namespace.id;
+        let table_id = arbitrary_table(&mut *repos, "table", &namespace).await.id;
 
         let key = "bananas";
 
diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs
index 859826caed..a1c2ad3fcf 100644
--- a/iox_tests/src/catalog.rs
+++ b/iox_tests/src/catalog.rs
@@ -16,6 +16,7 @@ use iox_catalog::{
         get_schema_by_id, get_table_columns_by_id, Catalog, PartitionRepo, SoftDeletedRows,
     },
     mem::MemCatalog,
+    test_helpers::arbitrary_table,
 };
 use iox_query::{
     exec::{DedicatedExecutors, Executor, ExecutorConfig},
@@ -220,11 +221,7 @@ impl TestNamespace {
     pub async fn create_table(self: &Arc<Self>, name: &str) -> Arc<TestTable> {
         let mut repos = self.catalog.catalog.repositories().await;
 
-        let table = repos
-            .tables()
-            .create_or_get(name, self.namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, name, &self.namespace).await;
 
         Arc::new(TestTable {
             catalog: Arc::clone(&self.catalog),
diff --git a/ioxd_router/src/lib.rs b/ioxd_router/src/lib.rs
index 839015639a..216090e7d8 100644
--- a/ioxd_router/src/lib.rs
+++ b/ioxd_router/src/lib.rs
@@ -382,7 +382,10 @@ where
 #[cfg(test)]
 mod tests {
     use data_types::ColumnType;
-    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
+    use iox_catalog::{
+        mem::MemCatalog,
+        test_helpers::{arbitrary_namespace, arbitrary_table},
+    };
 
     use super::*;
 
@@ -393,11 +396,7 @@ mod tests {
         let mut repos = catalog.repositories().await;
         let namespace = arbitrary_namespace(&mut *repos, "test_ns").await;
 
-        let table = repos
-            .tables()
-            .create_or_get("name", namespace.id)
-            .await
-            .unwrap();
+        let table = arbitrary_table(&mut *repos, "name", &namespace).await;
         let _column = repos
             .columns()
             .create_or_get("name", table.id, ColumnType::U64)
diff --git a/service_grpc_catalog/src/lib.rs b/service_grpc_catalog/src/lib.rs
index 9cadc8a75f..8d9a439f66 100644
--- a/service_grpc_catalog/src/lib.rs
+++ b/service_grpc_catalog/src/lib.rs
@@ -199,7 +199,10 @@ mod tests {
     use super::*;
     use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
     use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService;
-    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
+    use iox_catalog::{
+        mem::MemCatalog,
+        test_helpers::{arbitrary_namespace, arbitrary_table},
+    };
     use uuid::Uuid;
 
     #[tokio::test]
@@ -213,11 +216,7 @@ mod tests {
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
             let namespace = arbitrary_namespace(&mut *repos, "catalog_partition_test").await;
-            let table = repos
-                .tables()
-                .create_or_get("schema_test_table", namespace.id)
-                .await
-                .unwrap();
+            let table = arbitrary_table(&mut *repos, "schema_test_table", &namespace).await;
             let partition = repos
                 .partitions()
                 .create_or_get("foo".into(), table.id)
@@ -272,11 +271,7 @@ mod tests {
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
             let namespace = arbitrary_namespace(&mut *repos, "catalog_partition_test").await;
-            let table = repos
-                .tables()
-                .create_or_get("schema_test_table", namespace.id)
-                .await
-                .unwrap();
+            let table = arbitrary_table(&mut *repos, "schema_test_table", &namespace).await;
             partition1 = repos
                 .partitions()
                 .create_or_get("foo".into(), table.id)
diff --git a/service_grpc_object_store/src/lib.rs b/service_grpc_object_store/src/lib.rs
index 39883f6097..d892ac99ca 100644
--- a/service_grpc_object_store/src/lib.rs
+++ b/service_grpc_object_store/src/lib.rs
@@ -98,7 +98,10 @@ mod tests {
     use bytes::Bytes;
     use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp};
     use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService;
-    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
+    use iox_catalog::{
+        mem::MemCatalog,
+        test_helpers::{arbitrary_namespace, arbitrary_table},
+    };
     use object_store::{memory::InMemory, ObjectStore};
     use uuid::Uuid;
 
@@ -111,11 +114,7 @@ mod tests {
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
             let namespace = arbitrary_namespace(&mut *repos, "catalog_partition_test").await;
-            let table = repos
-                .tables()
-                .create_or_get("schema_test_table", namespace.id)
-                .await
-                .unwrap();
+            let table = arbitrary_table(&mut *repos, "schema_test_table", &namespace).await;
             let partition = repos
                 .partitions()
                 .create_or_get("foo".into(), table.id)
diff --git a/service_grpc_schema/src/lib.rs b/service_grpc_schema/src/lib.rs
index 171029b2f9..76f579b0fe 100644
--- a/service_grpc_schema/src/lib.rs
+++ b/service_grpc_schema/src/lib.rs
@@ -83,7 +83,10 @@ mod tests {
     use super::*;
     use data_types::ColumnType;
     use generated_types::influxdata::iox::schema::v1::schema_service_server::SchemaService;
-    use iox_catalog::{mem::MemCatalog, test_helpers::arbitrary_namespace};
+    use iox_catalog::{
+        mem::MemCatalog,
+        test_helpers::{arbitrary_namespace, arbitrary_table},
+    };
     use std::sync::Arc;
 
     #[tokio::test]
@@ -94,11 +97,7 @@ mod tests {
             let catalog = Arc::new(MemCatalog::new(metrics));
             let mut repos = catalog.repositories().await;
             let namespace = arbitrary_namespace(&mut *repos, "namespace_schema_test").await;
-            let table = repos
-                .tables()
-                .create_or_get("schema_test_table", namespace.id)
-                .await
-                .unwrap();
+            let table = arbitrary_table(&mut *repos, "schema_test_table", &namespace).await;
             repos
                 .columns()
                 .create_or_get("schema_test_column", table.id, ColumnType::Tag)

From 3462e298596d97ec220809e468365d64a14437c4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 May 2023 02:00:24 +0000
Subject: [PATCH 119/119] chore(deps): Bump uuid from 1.3.2 to 1.3.3

Bumps [uuid](https://github.com/uuid-rs/uuid) from 1.3.2 to 1.3.3.
- [Release notes](https://github.com/uuid-rs/uuid/releases)
- [Commits](https://github.com/uuid-rs/uuid/compare/1.3.2...1.3.3)

---
updated-dependencies:
- dependency-name: uuid
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 Cargo.lock          | 4 ++--
 ingester/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6edd657da2..1884c24c5d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6271,9 +6271,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
 
 [[package]]
 name = "uuid"
-version = "1.3.2"
+version = "1.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4dad5567ad0cf5b760e5665964bec1b47dfd077ba8a2544b513f3556d3d239a2"
+checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
 dependencies = [
  "getrandom",
 ]
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 10d7f64ccf..dbaa7b1a3f 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -47,7 +47,7 @@ tokio = { version = "1.28", features = ["macros", "parking_lot", "rt-multi-threa
 tokio-util = "0.7.8"
 tonic = { workspace = true }
 trace = { version = "0.1.0", path = "../trace" }
-uuid = "1.3.2"
+uuid = "1.3.3"
 wal = { version = "0.1.0", path = "../wal" }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }