Merge pull request #7753 from influxdata/sgc/issue/7739_select_ir_02

refactor: Use intermediate representation of `SELECT` in InfluxQL planner
2023-05-05 06:06:09 +00:00 · 2023-05-05 06:06:09 +00:00 · 2ba5fb4759
parent af726e092f f026d546b0
commit 2ba5fb4759
7 changed files with 588 additions and 564 deletions
--- a/influxdb_influxql_parser/src/select.rs
+++ b/influxdb_influxql_parser/src/select.rs
@ -45,7 +45,7 @@ pub struct SelectStatement {
    pub group_by: Option<GroupByClause>,

    /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`],
-    /// it is the same behavior as `fill(none)`.
+    /// it is the same behavior as `fill(null)`.
    ///
    /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill
    pub fill: Option<FillClause>,
--- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
+++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected
@ -1003,11 +1003,11 @@ tags: non_existent=
 +---------------------+-------+--------------------+--------------------+
 -- InfluxQL: SELECT COUNT(f64), COUNT(f64) + COUNT(f64), COUNT(f64) * 3 FROM m0;
 name: m0
-+---------------------+-------+---------------------+-----------+
-| time                | count | count_f64_count_f64 | count_f64 |
-+---------------------+-------+---------------------+-----------+
-| 1970-01-01T00:00:00 | 7     | 14                  | 21        |
-+---------------------+-------+---------------------+-----------+
+---------------------+-------+-------------+---------+
+| time                | count | count_count | count_1 |
+---------------------+-------+-------------+---------+
+| 1970-01-01T00:00:00 | 7     | 14          | 21      |
+---------------------+-------+-------------+---------+
 -- InfluxQL: SELECT COUNT(f64) as the_count, SUM(non_existent) as foo FROM m0;
 name: m0
 +---------------------+-----------+-----+
@ -1052,31 +1052,31 @@ name: disk
 +---------------------+------+--------+
 -- InfluxQL: SELECT MEAN(usage_idle) + MEAN(bytes_free) FROM cpu, disk;
 name: cpu
-+---------------------+---------------------------------+
-| time                | mean_usage_idle_mean_bytes_free |
-+---------------------+---------------------------------+
-| 1970-01-01T00:00:00 |                                 |
-+---------------------+---------------------------------+
+---------------------+-----------+
+| time                | mean_mean |
+---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
+---------------------+-----------+
 name: disk
-+---------------------+---------------------------------+
-| time                | mean_usage_idle_mean_bytes_free |
-+---------------------+---------------------------------+
-| 1970-01-01T00:00:00 |                                 |
-+---------------------+---------------------------------+
+---------------------+-----------+
+| time                | mean_mean |
+---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
+---------------------+-----------+
 -- InfluxQL: SELECT MEAN(usage_idle) + MEAN(foo) FROM cpu;
 name: cpu
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
+---------------------+-----------+
+| time                | mean_mean |
+---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
+---------------------+-----------+
 -- InfluxQL: SELECT MEAN(usage_idle), MEAN(usage_idle) + MEAN(foo) FROM cpu;
 name: cpu
-+---------------------+--------------------+--------------------------+
-| time                | mean               | mean_usage_idle_mean_foo |
-+---------------------+--------------------+--------------------------+
-| 1970-01-01T00:00:00 | 1.9850000000000003 |                          |
-+---------------------+--------------------+--------------------------+
+---------------------+--------------------+-----------+
+| time                | mean               | mean_mean |
+---------------------+--------------------+-----------+
+| 1970-01-01T00:00:00 | 1.9850000000000003 |           |
+---------------------+--------------------+-----------+
 -- InfluxQL: SELECT MEAN(foo) FROM cpu;
 ++
 ++
@ -1084,47 +1084,47 @@ name: cpu
 -- InfluxQL: SELECT MEAN(usage_idle) + MEAN(foo) FROM cpu GROUP BY cpu;
 name: cpu
 tags: cpu=cpu-total
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
+---------------------+-----------+
+| time                | mean_mean |
+---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
+---------------------+-----------+
 name: cpu
 tags: cpu=cpu0
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
+---------------------+-----------+
+| time                | mean_mean |
+---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
+---------------------+-----------+
 name: cpu
 tags: cpu=cpu1
-+---------------------+--------------------------+
-| time                | mean_usage_idle_mean_foo |
-+---------------------+--------------------------+
-| 1970-01-01T00:00:00 |                          |
-+---------------------+--------------------------+
+---------------------+-----------+
+| time                | mean_mean |
+---------------------+-----------+
+| 1970-01-01T00:00:00 |           |
+---------------------+-----------+
 -- InfluxQL: SELECT MEAN(usage_idle), MEAN(usage_idle) + MEAN(foo) FROM cpu GROUP BY cpu;
 name: cpu
 tags: cpu=cpu-total
-+---------------------+--------------------+--------------------------+
-| time                | mean               | mean_usage_idle_mean_foo |
-+---------------------+--------------------+--------------------------+
-| 1970-01-01T00:00:00 | 2.9850000000000003 |                          |
-+---------------------+--------------------+--------------------------+
+---------------------+--------------------+-----------+
+| time                | mean               | mean_mean |
+---------------------+--------------------+-----------+
+| 1970-01-01T00:00:00 | 2.9850000000000003 |           |
+---------------------+--------------------+-----------+
 name: cpu
 tags: cpu=cpu0
-+---------------------+-------+--------------------------+
-| time                | mean  | mean_usage_idle_mean_foo |
-+---------------------+-------+--------------------------+
-| 1970-01-01T00:00:00 | 0.985 |                          |
-+---------------------+-------+--------------------------+
+---------------------+-------+-----------+
+| time                | mean  | mean_mean |
+---------------------+-------+-----------+
+| 1970-01-01T00:00:00 | 0.985 |           |
+---------------------+-------+-----------+
 name: cpu
 tags: cpu=cpu1
-+---------------------+--------------------+--------------------------+
-| time                | mean               | mean_usage_idle_mean_foo |
-+---------------------+--------------------+--------------------------+
-| 1970-01-01T00:00:00 | 1.9849999999999999 |                          |
-+---------------------+--------------------+--------------------------+
+---------------------+--------------------+-----------+
+| time                | mean               | mean_mean |
+---------------------+--------------------+-----------+
+| 1970-01-01T00:00:00 | 1.9849999999999999 |           |
+---------------------+--------------------+-----------+
 -- InfluxQL: SELECT MEAN(foo) FROM cpu GROUP BY cpu;
 ++
 ++
--- a/iox_query_influxql/src/plan/expr_type_evaluator.rs
+++ b/iox_query_influxql/src/plan/expr_type_evaluator.rs
@ -1,6 +1,6 @@
 use crate::plan::field::field_by_name;
 use crate::plan::field_mapper::map_type;
-use crate::plan::ir::TableReference;
+use crate::plan::ir::DataSource;
 use crate::plan::{error, SchemaProvider};
 use datafusion::common::Result;
 use influxdb_influxql_parser::expression::{
@ -16,22 +16,25 @@ use itertools::Itertools;
 pub(super) fn evaluate_type(
    s: &dyn SchemaProvider,
    expr: &Expr,
-    from: &[TableReference],
+    from: &[DataSource],
 ) -> Result<Option<VarRefDataType>> {
-    TypeEvaluator::new(from, s).eval_type(expr)
+    TypeEvaluator::new(s, from).eval_type(expr)
 }

-struct TypeEvaluator<'a> {
+/// Evaluate the type of the specified expression.
+///
+/// Derived from [Go implementation](https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4796-L4797).
+pub(super) struct TypeEvaluator<'a> {
    s: &'a dyn SchemaProvider,
-    from: &'a [TableReference],
+    from: &'a [DataSource],
 }

 impl<'a> TypeEvaluator<'a> {
-    fn new(from: &'a [TableReference], s: &'a dyn SchemaProvider) -> Self {
+    pub(super) fn new(s: &'a dyn SchemaProvider, from: &'a [DataSource]) -> Self {
        Self { from, s }
    }

-    fn eval_type(&self, expr: &Expr) -> Result<Option<VarRefDataType>> {
+    pub(super) fn eval_type(&self, expr: &Expr) -> Result<Option<VarRefDataType>> {
        Ok(match expr {
            Expr::VarRef(v) => self.eval_var_ref(v)?,
            Expr::Call(v) => self.eval_call(v)?,
@ -80,9 +83,10 @@ impl<'a> TypeEvaluator<'a> {
        }
    }

-    /// Returns the type for the specified [`Expr`].
+    /// Returns the type for the specified [`VarRef`].
+    ///
    /// This function assumes that the expression has already been reduced.
-    fn eval_var_ref(&self, expr: &VarRef) -> Result<Option<VarRefDataType>> {
+    pub(super) fn eval_var_ref(&self, expr: &VarRef) -> Result<Option<VarRefDataType>> {
        Ok(match expr.data_type {
            Some(dt)
                if matches!(
@ -100,7 +104,7 @@ impl<'a> TypeEvaluator<'a> {
                let mut data_type: Option<VarRefDataType> = None;
                for tr in self.from.iter() {
                    match tr {
-                        TableReference::Name(name) => match (
+                        DataSource::Table(name) => match (
                            data_type,
                            map_type(self.s, name.as_str(), expr.name.as_str())?,
                        ) {
@ -112,7 +116,7 @@ impl<'a> TypeEvaluator<'a> {
                            (None, Some(res)) => data_type = Some(res),
                            _ => continue,
                        },
-                        TableReference::Subquery(select) => {
+                        DataSource::Subquery(select) => {
                            // find the field by name
                            if let Some(field) = field_by_name(&select.fields, expr.name.as_str()) {
                                match (data_type, evaluate_type(self.s, &field.expr, &select.from)?)
--- a/iox_query_influxql/src/plan/field.rs
+++ b/iox_query_influxql/src/plan/field.rs
@ -66,7 +66,7 @@ impl<'a> Visitor for BinaryExprNameVisitor<'a> {

    fn pre_visit_call(self, n: &Call) -> Result<Recursion<Self>, Self::Error> {
        self.0.push(n.name.clone());
-        Ok(Recursion::Continue(self))
+        Ok(Recursion::Stop(self))
    }
 }

@ -102,7 +102,7 @@ mod test {
        assert_eq!(field_name(&f), "count");

        let f = get_first_field("SELECT COUNT(usage) + SUM(usage_idle) FROM cpu");
-        assert_eq!(field_name(&f), "count_usage_sum_usage_idle");
+        assert_eq!(field_name(&f), "count_sum");

        let f = get_first_field("SELECT 1+2 FROM cpu");
        assert_eq!(field_name(&f), "");
--- a/iox_query_influxql/src/plan/ir.rs
+++ b/iox_query_influxql/src/plan/ir.rs
@ -5,7 +5,6 @@ use influxdb_influxql_parser::common::{
    LimitClause, MeasurementName, OffsetClause, OrderByClause, QualifiedMeasurementName,
    WhereClause,
 };
-use influxdb_influxql_parser::expression::ConditionalExpression;
 use influxdb_influxql_parser::select::{
    Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection,
    SelectStatement, TimeZoneClause,
@ -19,17 +18,17 @@ pub(super) struct Select {
    /// Projection clause of the selection.
    pub(super) fields: Vec<Field>,

-    /// A list of tables or subqueries used as the source data for the selection.
-    pub(super) from: Vec<TableReference>,
+    /// A list of data sources for the selection.
+    pub(super) from: Vec<DataSource>,

    /// A conditional expression to filter the selection.
-    pub(super) condition: Option<ConditionalExpression>,
+    pub(super) condition: Option<WhereClause>,

    /// The GROUP BY clause of the selection.
    pub(super) group_by: Option<GroupByClause>,

    /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`],
-    /// it is the same behavior as `fill(none)`.
+    /// it is the same behavior as `fill(null)`.
    ///
    /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill
    pub(super) fill: Option<FillClause>,
@ -38,10 +37,10 @@ pub(super) struct Select {
    pub(super) order_by: Option<OrderByClause>,

    /// A value to restrict the number of rows returned.
-    pub(super) limit: Option<u64>,
+    pub(super) limit: Option<LimitClause>,

    /// A value to specify an offset to start retrieving rows.
-    pub(super) offset: Option<u64>,
+    pub(super) offset: Option<OffsetClause>,

    /// The timezone for the query, specified as [`tz('<time zone>')`][time_zone_clause].
    ///
@ -58,25 +57,25 @@ impl From<Select> for SelectStatement {
                    .from
                    .into_iter()
                    .map(|tr| match tr {
-                        TableReference::Name(name) => {
+                        DataSource::Table(name) => {
                            MeasurementSelection::Name(QualifiedMeasurementName {
                                database: None,
                                retention_policy: None,
                                name: MeasurementName::Name(name.as_str().into()),
                            })
                        }
-                        TableReference::Subquery(q) => {
+                        DataSource::Subquery(q) => {
                            MeasurementSelection::Subquery(Box::new((*q).into()))
                        }
                    })
                    .collect(),
            ),
-            condition: value.condition.map(WhereClause::new),
+            condition: value.condition,
            group_by: value.group_by,
            fill: value.fill,
            order_by: value.order_by,
-            limit: value.limit.map(LimitClause::new),
-            offset: value.offset.map(OffsetClause::new),
+            limit: value.limit,
+            offset: value.offset,
            series_limit: None,
            series_offset: None,
            timezone: value.timezone.map(TimeZoneClause::new),
@ -84,9 +83,9 @@ impl From<Select> for SelectStatement {
    }
 }

-/// Represents a concrete reference to a table in a [`Select`] from clause.
+/// Represents a data source that is either a table or a subquery in a [`Select`] from clause.
 #[derive(Debug, Clone)]
-pub(super) enum TableReference {
-    Name(String),
+pub(super) enum DataSource {
+    Table(String),
    Subquery(Box<Select>),
 }
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@ -1,5 +1,6 @@
 mod select;

+use crate::plan::ir::{DataSource, Select};
 use crate::plan::planner::select::{
    check_exprs_satisfy_columns, fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort,
 };
@ -43,9 +44,7 @@ use influxdb_influxql_parser::expression::{
 use influxdb_influxql_parser::functions::{
    is_aggregate_function, is_now_function, is_scalar_math_function,
 };
-use influxdb_influxql_parser::select::{
-    FillClause, GroupByClause, SLimitClause, SOffsetClause, TimeZoneClause,
-};
+use influxdb_influxql_parser::select::{FillClause, GroupByClause};
 use influxdb_influxql_parser::show_field_keys::ShowFieldKeysStatement;
 use influxdb_influxql_parser::show_measurements::{
    ShowMeasurementsStatement, WithMeasurementClause,
@ -56,9 +55,8 @@ use influxdb_influxql_parser::simple_from_clause::ShowFromClause;
 use influxdb_influxql_parser::{
    common::{MeasurementName, WhereClause},
    expression::Expr as IQLExpr,
-    identifier::Identifier,
    literal::Literal,
-    select::{Field, FromMeasurementClause, MeasurementSelection, SelectStatement},
+    select::{Field, SelectStatement},
    statement::Statement,
 };
 use iox_query::config::{IoxConfigExt, MetadataCutoff};
@ -75,7 +73,7 @@ use schema::{
    InfluxColumnType, InfluxFieldType, Schema, INFLUXQL_MEASUREMENT_COLUMN_NAME,
    INFLUXQL_METADATA_KEY,
 };
-use std::collections::{HashSet, VecDeque};
+use std::collections::HashSet;
 use std::fmt::Debug;
 use std::iter;
 use std::ops::{Bound, ControlFlow, Deref, Range};
@ -155,12 +153,11 @@ impl<'a> Context<'a> {
        Self { scope, ..*self }
    }

-    fn with_timezone(&self, timezone: Option<TimeZoneClause>) -> Self {
-        let tz = timezone.as_deref().cloned();
+    fn with_timezone(&self, tz: Option<Tz>) -> Self {
        Self { tz, ..*self }
    }

-    fn with_group_by_fill(&self, select: &'a SelectStatement) -> Self {
+    fn with_group_by_fill(&self, select: &'a Select) -> Self {
        Self {
            group_by: select.group_by.as_ref(),
            fill: select.fill,
@ -269,14 +266,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        )
    }

-    fn rewrite_select_statement(&self, select: SelectStatement) -> Result<SelectStatement> {
+    fn rewrite_select_statement(&self, select: SelectStatement) -> Result<Select> {
        rewrite_statement(self.s, &select)
    }

    /// Create a [`LogicalPlan`] from the specified InfluxQL `SELECT` statement.
-    fn select_statement_to_plan(&self, select: &SelectStatement) -> Result<LogicalPlan> {
-        let mut plans = self.plan_from_tables(&select.from)?;
-
+    fn select_statement_to_plan(&self, select: &Select) -> Result<LogicalPlan> {
        let ctx = Context::new(select_statement_info(select)?)
            .with_timezone(select.timezone)
            .with_group_by_fill(select);
@ -341,65 +336,37 @@ impl<'a> InfluxQLToLogicalPlan<'a> {

        fields.extend(fields_no_time.iter().cloned());

-        // Build the first non-empty plan
        let plan = {
-            loop {
-                match plans.pop_front() {
-                    Some((plan, proj)) => match self.project_select(
-                        &ctx,
+            let mut iter = select.from.iter();
+            let plan = match iter.next() {
+                Some(ds) => self.project_select(&ctx, ds, select, &fields, &group_by_tag_set),
+                None => {
+                    // empty result, but let's at least have all the strictly necessary metadata
+                    let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
+                        INFLUXQL_MEASUREMENT_COLUMN_NAME,
+                        (&InfluxColumnType::Tag).into(),
+                        false,
+                    )]));
+                    let plan = LogicalPlan::EmptyRelation(EmptyRelation {
+                        produce_one_row: false,
+                        schema: schema.to_dfschema_ref()?,
+                    });
+                    let plan = plan_with_metadata(
                        plan,
-                        proj,
-                        select,
-                        &fields,
-                        &group_by_tag_set,
-                    )? {
-                        // Exclude any plans that produce no data, which is
-                        // consistent with InfluxQL.
-                        LogicalPlan::EmptyRelation(EmptyRelation {
-                            produce_one_row: false,
-                            ..
-                        }) => continue,
-                        plan => break plan,
-                    },
-                    None => {
-                        // empty result, but let's at least have all the strictly necessary metadata
-                        let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
-                            INFLUXQL_MEASUREMENT_COLUMN_NAME,
-                            (&InfluxColumnType::Tag).into(),
-                            false,
-                        )]));
-                        let plan = LogicalPlan::EmptyRelation(EmptyRelation {
-                            produce_one_row: false,
-                            schema: schema.to_dfschema_ref()?,
-                        });
-                        let plan = plan_with_metadata(
-                            plan,
-                            &InfluxQlMetadata {
-                                measurement_column_index: MEASUREMENT_COLUMN_INDEX,
-                                tag_key_columns: vec![],
-                            },
-                        )?;
-                        return Ok(plan);
-                    }
+                        &InfluxQlMetadata {
+                            measurement_column_index: MEASUREMENT_COLUMN_INDEX,
+                            tag_key_columns: vec![],
+                        },
+                    )?;
+                    return Ok(plan);
                }
-            }
-        };
+            }?;

-        // UNION the remaining plans
-        let plan = plans.into_iter().try_fold(plan, |prev, (next, proj)| {
-            let next = self.project_select(&ctx, next, proj, select, &fields, &group_by_tag_set)?;
-            if let LogicalPlan::EmptyRelation(EmptyRelation {
-                produce_one_row: false,
-                ..
-            }) = next
-            {
-                // Exclude any plans that produce no data, which is
-                // consistent with InfluxQL.
-                Ok(prev)
-            } else {
+            iter.try_fold(plan, |prev, ds| {
+                let next = self.project_select(&ctx, ds, select, &fields, &group_by_tag_set)?;
                LogicalPlanBuilder::from(prev).union(next)?.build()
-            }
-        })?;
+            })?
+        };

        let plan = plan_with_metadata(
            plan,
@ -451,23 +418,22 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            &projection_tag_set,
        )?;

-        let plan = self.slimit(plan, select.series_offset, select.series_limit)?;
-
        Ok(plan)
    }

    fn project_select(
        &self,
        ctx: &Context<'_>,
-        input: LogicalPlan,
-        proj: Vec<Expr>,
-        select: &SelectStatement,
+        ds: &DataSource,
+        select: &Select,
        fields: &[Field],
        group_by_tag_set: &[&str],
    ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let (plan, proj) = self.plan_from_data_source(ds)?;

-        let plan = self.plan_where_clause(ctx, &select.condition, input, &schemas)?;
+        let schemas = Schemas::new(plan.schema())?;
+
+        let plan = self.plan_where_clause(ctx, &select.condition, plan, &schemas)?;

        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
        let mut select_exprs = self.field_list_to_exprs(ctx, &plan, fields, &schemas)?;
@ -864,27 +830,6 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        }
    }

-    /// Verifies the `SLIMIT` and `SOFFSET` clauses are `None`; otherwise, return a
-    /// `NotImplemented` error.
-    ///
-    /// ## Why?
-    /// * `SLIMIT` and `SOFFSET` don't work as expected per issue [#7571]
-    /// * This issue [is noted](https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#the-slimit-clause) in our official documentation
-    ///
-    /// [#7571]: https://github.com/influxdata/influxdb/issues/7571
-    fn slimit(
-        &self,
-        input: LogicalPlan,
-        offset: Option<SOffsetClause>,
-        limit: Option<SLimitClause>,
-    ) -> Result<LogicalPlan> {
-        if offset.is_none() && limit.is_none() {
-            return Ok(input);
-        }
-
-        error::not_implemented("SLIMIT or SOFFSET")
-    }
-
    /// Map the InfluxQL `SELECT` projection list into a list of DataFusion expressions.
    fn field_list_to_exprs(
        &self,
@ -1001,8 +946,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                name,
                data_type: opt_dst_type,
            }) => {
-                let name = normalize_identifier(name);
-                Ok(match (ctx.scope, name.as_str()) {
+                Ok(match (ctx.scope, name.deref().as_str()) {
                    // Per the Go implementation, the time column is case-insensitive in the
                    // `WHERE` clause and disregards any postfix type cast operator.
                    //
@ -1261,42 +1205,31 @@ impl<'a> InfluxQLToLogicalPlan<'a> {

    /// Generate a list of logical plans for each of the tables references in the `FROM`
    /// clause.
-    fn plan_from_tables(
-        &self,
-        from: &FromMeasurementClause,
-    ) -> Result<VecDeque<(LogicalPlan, Vec<Expr>)>> {
-        // A list of scans and their initial projections
-        let mut table_projs = VecDeque::new();
-        for ms in from.iter() {
-            let Some(table_proj) = match ms {
-                MeasurementSelection::Name(qn) => match qn.name {
-                    MeasurementName::Name(ref ident) => {
-                        self.create_table_ref(normalize_identifier(ident))
-                    }
-                    // rewriter is expected to expand the regular expression
-                    MeasurementName::Regex(_) => error::internal(
-                        "unexpected regular expression in FROM clause",
-                    ),
-                },
-                MeasurementSelection::Subquery(_) => error::not_implemented(
-                    "subquery in FROM clause",
-                ),
-            }? else { continue };
-            table_projs.push_back(table_proj);
+    fn plan_from_data_source(&self, ds: &DataSource) -> Result<(LogicalPlan, Vec<Expr>)> {
+        match ds {
+            DataSource::Table(table_name) => {
+                // `rewrite_statement` guarantees the table should exist
+                let source = self.s.get_table_provider(table_name)?;
+                let table_ref = TableReference::bare(table_name.to_owned());
+                Ok((
+                    LogicalPlanBuilder::scan(table_ref, source, None)?.build()?,
+                    vec![lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
+                ))
+            }
+            DataSource::Subquery(_) => error::not_implemented("subquery in FROM clause"),
        }
-        Ok(table_projs)
    }

    /// Create a [LogicalPlan] that refers to the specified `table_name`.
    ///
    /// Normally, this functions will not return a `None`, as tables have been matched]
    /// by the [`rewrite_statement`] function.
-    fn create_table_ref(&self, table_name: String) -> Result<Option<(LogicalPlan, Vec<Expr>)>> {
-        Ok(if let Ok(source) = self.s.get_table_provider(&table_name) {
-            let table_ref = TableReference::bare(table_name.to_string());
+    fn create_table_ref(&self, table_name: &str) -> Result<Option<(LogicalPlan, Vec<Expr>)>> {
+        Ok(if let Ok(source) = self.s.get_table_provider(table_name) {
+            let table_ref = TableReference::bare(table_name.to_owned());
            Some((
                LogicalPlanBuilder::scan(table_ref, source, None)?.build()?,
-                vec![lit_dict(&table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
+                vec![lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
            ))
        } else {
            None
@ -1436,7 +1369,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                let mut union_plan = None;
                for table in tables {
                    let Some(table_schema) = self.s.table_schema(&table) else {continue};
-                    let Some((plan, measurement_expr)) = self.create_table_ref(table.clone())? else {continue;};
+                    let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};

                    let schemas = Schemas::new(plan.schema())?;
                    let plan =
@ -1683,7 +1616,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                continue;
            }

-            let Some((plan, measurement_expr)) = self.create_table_ref(table)? else {continue;};
+            let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};

            let schemas = Schemas::new(plan.schema())?;
            let plan = self.plan_where_clause(
@ -1787,7 +1720,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {

                let mut union_plan = None;
                for table in tables {
-                    let Some((plan, _measurement_expr)) = self.create_table_ref(table.clone())? else {continue;};
+                    let Some((plan, _measurement_expr)) = self.create_table_ref(&table)? else {continue;};

                    let schemas = Schemas::new(plan.schema())?;
                    let plan =
@ -2128,13 +2061,6 @@ fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> {
    }
 }

-// Normalize an identifier. Identifiers in InfluxQL are case sensitive,
-// and therefore not transformed to lower case.
-fn normalize_identifier(ident: &Identifier) -> String {
-    // Dereference the identifier to return the unquoted value.
-    ident.deref().clone()
-}
-
 /// Find the index of the time column in the fields list.
 ///
 /// > **Note**
@ -3264,11 +3190,11 @@ mod test {

                // The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection
                assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###"
-            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field_count_f64_field:Int64;N, count_f64_field:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_f64_field_count_f64_field, COUNT(data.f64_field) * Int64(3) AS count_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field_count_f64_field:Int64;N, count_f64_field:Int64;N]
-                Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
-                  TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
-            "###);
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_count, COUNT(data.f64_field) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
+                    Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
+                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                "###);

                // non-existent tags are excluded from the Aggregate groupBy and Sort operators
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###"
@ -3280,31 +3206,31 @@ mod test {

                // Aggregate expression is projected once and reused in final projection
                assert_snapshot!(plan("SELECT COUNT(f64_field),  COUNT(f64_field) * 2 FROM data"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field:Int64;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
                    Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);

                // Aggregate expression selecting non-existent field
                assert_snapshot!(plan("SELECT MEAN(f64_field) + MEAN(non_existent) FROM data"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_f64_field_mean_non_existent:Null;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS mean_f64_field_mean_non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_f64_field_mean_non_existent:Null;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_mean:Null;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS mean_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean_mean:Null;N]
                    EmptyRelation []
                "###);

                // Aggregate expression with GROUP BY and non-existent field
                assert_snapshot!(plan("SELECT MEAN(f64_field) + MEAN(non_existent) FROM data GROUP BY foo"), @r###"
-                Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_f64_field_mean_non_existent:Null;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS mean_f64_field_mean_non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_f64_field_mean_non_existent:Null;N]
+                Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_mean:Null;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS mean_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, mean_mean:Null;N]
                    Aggregate: groupBy=[[data.foo]], aggr=[[]] [foo:Dictionary(Int32, Utf8);N]
                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);

                // Aggregate expression selecting tag, should treat as non-existent
                assert_snapshot!(plan("SELECT MEAN(f64_field), MEAN(f64_field) + MEAN(non_existent) FROM data"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_f64_field_mean_non_existent:Null;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, AVG(data.f64_field) AS mean, NULL AS mean_f64_field_mean_non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_f64_field_mean_non_existent:Null;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_mean:Null;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, AVG(data.f64_field) AS mean, NULL AS mean_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), mean:Float64;N, mean_mean:Null;N]
                    Aggregate: groupBy=[[]], aggr=[[AVG(data.f64_field)]] [AVG(data.f64_field):Float64;N]
                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
@ -3421,8 +3347,8 @@ mod test {

                // Aggregates as part of a binary expression
                assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
-                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_f64_field_mean_f64_field:Float64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_f64_field_mean_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_f64_field_mean_f64_field:Float64;N]
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
                      Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@ -1,7 +1,7 @@
-use crate::plan::expr_type_evaluator::evaluate_type;
+use crate::plan::expr_type_evaluator::TypeEvaluator;
 use crate::plan::field::{field_by_name, field_name};
 use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet};
-use crate::plan::ir::{Select, TableReference};
+use crate::plan::ir::{DataSource, Select};
 use crate::plan::{error, util, SchemaProvider};
 use datafusion::common::{DataFusionError, Result};
 use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName};
@ -16,40 +16,39 @@ use influxdb_influxql_parser::select::{
    Dimension, Field, FromMeasurementClause, GroupByClause, MeasurementSelection, SelectStatement,
 };
 use itertools::Itertools;
-use std::borrow::Borrow;
 use std::collections::{HashMap, HashSet};
 use std::ops::{ControlFlow, Deref};

 /// Recursively rewrite the specified [`SelectStatement`] by performing a series of passes
 /// to validate and normalize the statement.
-pub(super) fn rewrite_statement(
-    s: &dyn SchemaProvider,
-    q: &SelectStatement,
-) -> Result<SelectStatement> {
+pub(super) fn rewrite_statement(s: &dyn SchemaProvider, q: &SelectStatement) -> Result<Select> {
    let mut stmt = map_select(s, q)?;
    from_drop_empty(s, &mut stmt);
    field_list_normalize_time(&mut stmt);
    field_list_rewrite_aliases(&mut stmt.fields)?;

-    Ok(stmt.into())
+    Ok(stmt)
 }

 /// Map a `SelectStatement` to a `Select`, which is an intermediate representation to be
-/// used by the InfluxQL planner.
+/// used by the InfluxQL planner. Mapping also expands any wildcards in the `FROM` and
+/// projection clauses.
 ///
 /// # NOTE
 ///
 /// The goal is that `Select` will eventually be used by the InfluxQL planner.
 pub(super) fn map_select(s: &dyn SchemaProvider, stmt: &SelectStatement) -> Result<Select> {
+    check_features(stmt)?;
+
    let mut sel = Select {
        fields: vec![],
        from: vec![],
-        condition: stmt.condition.as_ref().map(|v| (**v).clone()),
+        condition: stmt.condition.clone(),
        group_by: stmt.group_by.clone(),
        fill: stmt.fill,
        order_by: stmt.order_by,
-        limit: stmt.limit.map(|v| *v),
-        offset: stmt.offset.map(|v| *v),
+        limit: stmt.limit,
+        offset: stmt.offset,
        timezone: stmt.timezone.map(|v| *v),
    };
    from_expand_wildcards(s, stmt, &mut sel)?;
@ -58,6 +57,24 @@ pub(super) fn map_select(s: &dyn SchemaProvider, stmt: &SelectStatement) -> Resu
    Ok(sel)
 }

+/// Asserts that the `SELECT` statement does not use any unimplemented features.
+///
+/// The list of unimplemented or unsupported features are listed below.
+///
+/// # `SLIMIT` and `SOFFSET`
+///
+/// * `SLIMIT` and `SOFFSET` don't work as expected per issue [#7571]
+/// * This issue [is noted](https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#the-slimit-clause) in our official documentation
+///
+/// [#7571]: https://github.com/influxdata/influxdb/issues/7571
+fn check_features(stmt: &SelectStatement) -> Result<()> {
+    if stmt.series_limit.is_some() || stmt.series_offset.is_some() {
+        return error::not_implemented("SLIMIT or SOFFSET");
+    }
+
+    Ok(())
+}
+
 /// Ensure the time field is added to all projections,
 /// and is moved to the first position, which is a requirement
 /// for InfluxQL compatibility.
@ -99,7 +116,7 @@ fn field_list_normalize_time(stmt: &mut Select) {
    normalize_time(stmt, false);

    for stmt in stmt.from.iter_mut().filter_map(|ms| match ms {
-        TableReference::Subquery(stmt) => Some(stmt),
+        DataSource::Subquery(stmt) => Some(stmt),
        _ => None,
    }) {
        normalize_time(stmt, true)
@ -121,7 +138,7 @@ fn from_expand_wildcards(
                    ..
                } => {
                    if s.table_exists(name) {
-                        new_from.push(TableReference::Name(name.deref().to_owned()))
+                        new_from.push(DataSource::Table(name.deref().to_owned()))
                    }
                }
                QualifiedMeasurementName {
@ -132,11 +149,11 @@ fn from_expand_wildcards(
                    s.table_names()
                        .into_iter()
                        .filter(|table| re.is_match(table))
-                        .for_each(|table| new_from.push(TableReference::Name(table.to_owned())));
+                        .for_each(|table| new_from.push(DataSource::Table(table.to_owned())));
                }
            },
            MeasurementSelection::Subquery(q) => {
-                new_from.push(TableReference::Subquery(Box::new(map_select(s, q)?)))
+                new_from.push(DataSource::Subquery(Box::new(map_select(s, q)?)))
            }
        }
    }
@ -150,7 +167,7 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut Select) {
    use schema::InfluxColumnType;
    stmt.from.retain_mut(|tr| {
        match tr {
-            TableReference::Name(name) => {
+            DataSource::Table(name) => {
                // drop any measurements that have no matching fields in the
                // projection

@ -168,7 +185,7 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut Select) {
                    false
                }
            }
-            TableReference::Subquery(q) => {
+            DataSource::Subquery(q) => {
                from_drop_empty(s, q);
                if q.from.is_empty() {
                    return false;
@ -191,14 +208,14 @@ fn from_drop_empty(s: &dyn SchemaProvider, stmt: &mut Select) {
 /// Determine the merged fields and tags of the `FROM` clause.
 fn from_field_and_dimensions(
    s: &dyn SchemaProvider,
-    from: &[TableReference],
+    from: &[DataSource],
 ) -> Result<(FieldTypeMap, TagSet)> {
    let mut fs = FieldTypeMap::new();
    let mut ts = TagSet::new();

    for tr in from {
        match tr {
-            TableReference::Name(name) => {
+            DataSource::Table(name) => {
                let (field_set, tag_set) = match field_and_dimensions(s, name.as_str())? {
                    Some(res) => res,
                    None => continue,
@ -220,11 +237,11 @@ fn from_field_and_dimensions(

                ts.extend(tag_set);
            }
-            TableReference::Subquery(select) => {
+            DataSource::Subquery(select) => {
+                let tv = TypeEvaluator::new(s, &select.from);
                for f in &select.fields {
-                    let dt = match evaluate_type(s, &f.expr, &select.from)? {
-                        Some(dt) => dt,
-                        None => continue,
+                    let Some(dt) = tv.eval_type(&f.expr)? else {
+                        continue
                    };

                    let name = field_name(f);
@ -332,16 +349,14 @@ fn field_list_expand_wildcards(
    // Attempt to rewrite all variable references in the fields with their types, if one
    // hasn't been specified.
    if let ControlFlow::Break(e) = sel.fields.iter_mut().try_for_each(|f| {
-        walk_expr_mut::<DataFusionError>(&mut f.expr, &mut |e| {
-            if matches!(e, Expr::VarRef(_)) {
-                let new_type = match evaluate_type(s, e.borrow(), &sel.from) {
-                    Err(e) => ControlFlow::Break(e)?,
-                    Ok(v) => v,
-                };
+        let tv = TypeEvaluator::new(s, &sel.from);

-                if let Expr::VarRef(v) = e {
-                    v.data_type = new_type;
-                }
+        walk_expr_mut::<DataFusionError>(&mut f.expr, &mut |e| {
+            if let Expr::VarRef(ref mut v) = e {
+                v.data_type = match tv.eval_var_ref(v) {
+                    Ok(v) => v,
+                    Err(e) => ControlFlow::Break(e)?,
+                };
            }
            ControlFlow::Continue(())
        })
@ -690,7 +705,7 @@ struct FieldChecker {
 }

 impl FieldChecker {
-    fn check_fields(&mut self, q: &SelectStatement) -> Result<ProjectionType> {
+    fn check_fields(&mut self, q: &Select) -> Result<ProjectionType> {
        q.fields.iter().try_for_each(|f| self.check_expr(&f.expr))?;

        match self.function_count() {
@ -1300,7 +1315,7 @@ pub(crate) struct SelectStatementInfo {
 ///
 /// * Are not combined with other aggregate, selector or window-like functions and may
 ///   only project additional fields
-pub(crate) fn select_statement_info(q: &SelectStatement) -> Result<SelectStatementInfo> {
+pub(super) fn select_statement_info(q: &Select) -> Result<SelectStatementInfo> {
    let has_group_by_time = q
        .group_by
        .as_ref()
@ -1319,8 +1334,9 @@ pub(crate) fn select_statement_info(q: &SelectStatement) -> Result<SelectStateme

 #[cfg(test)]
 mod test {
+    use crate::plan::ir::Select;
    use crate::plan::rewriter::{
-        has_wildcards, rewrite_statement, select_statement_info, ProjectionType,
+        has_wildcards, map_select, rewrite_statement, select_statement_info, ProjectionType,
    };
    use crate::plan::test_utils::{parse_select, MockSchemaProvider};
    use assert_matches::assert_matches;
@ -1329,6 +1345,12 @@ mod test {

    #[test]
    fn test_select_statement_info() {
+        let namespace = MockSchemaProvider::default();
+        let parse_select = |s: &str| -> Select {
+            let select = parse_select(s);
+            map_select(&namespace, &select).unwrap()
+        };
+
        let info = select_statement_info(&parse_select("SELECT foo, bar FROM cpu")).unwrap();
        assert_matches!(info.projection_type, ProjectionType::Raw);

@ -1368,6 +1390,12 @@ mod test {
    /// by `select_statement_info`.
    #[test]
    fn test_select_statement_info_functions() {
+        let namespace = MockSchemaProvider::default();
+        let parse_select = |s: &str| -> Select {
+            let select = parse_select(s);
+            map_select(&namespace, &select).unwrap()
+        };
+
        // percentile
        let sel = parse_select("SELECT percentile(foo, 2) FROM cpu");
        select_statement_info(&sel).unwrap();
@ -1582,16 +1610,10 @@ mod test {
        select_statement_info(&sel).unwrap();
        let sel = parse_select("SELECT count(distinct('foo')) FROM cpu");
        assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "expected field argument in distinct()");
-        let sel = parse_select("SELECT count(distinct foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected distinct clause in count");

        // Test rules for math functions
        let sel = parse_select("SELECT abs(usage_idle) FROM cpu");
        select_statement_info(&sel).unwrap();
-        let sel = parse_select("SELECT abs(*) + ceil(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard");
-        let sel = parse_select("SELECT abs(/f/) + ceil(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected regex");

        // Fallible

@ -1611,350 +1633,423 @@ mod test {
        let sel = parse_select("SELECT foo, 1 FROM cpu");
        assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "field must contain at least one variable");

-        // wildcard expansion is not supported in binary expressions for aggregates
-        let sel = parse_select("SELECT count(*) + count(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex");
-
-        // regex expansion is not supported in binary expressions
-        let sel = parse_select("SELECT sum(/foo/) + count(foo) FROM cpu");
-        assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex");
-
        // aggregate functions require a field reference
        let sel = parse_select("SELECT sum(1) FROM cpu");
        assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "expected field argument in sum(), got Literal(Integer(1))");
    }

-    #[test]
-    fn test_rewrite_statement() {
-        let namespace = MockSchemaProvider::default();
-        // Exact, match
-        let stmt = parse_select("SELECT usage_user FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_user::float AS usage_user FROM cpu"
-        );
+    mod rewrite_statement {
+        use super::*;
+        use datafusion::common::Result;
+        use influxdb_influxql_parser::select::SelectStatement;

-        // Duplicate columns do not have conflicting aliases
-        let stmt = parse_select("SELECT usage_user, usage_user FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu"
-        );
+        /// Test implementation that converts `Select` to `SelectStatement` so that it can be
+        /// converted back to a string.
+        fn rewrite_statement(
+            s: &MockSchemaProvider,
+            q: &SelectStatement,
+        ) -> Result<SelectStatement> {
+            let stmt = super::rewrite_statement(s, q)?;
+            Ok(stmt.into())
+        }

-        // Multiple aliases with no conflicts
-        let stmt = parse_select("SELECT usage_user as usage_user_1, usage_user FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu"
-        );
+        /// Validating types for simple projections
+        #[test]
+        fn projection_simple() {
+            let namespace = MockSchemaProvider::default();

-        // Multiple aliases with conflicts
-        let stmt =
-            parse_select("SELECT usage_user as usage_user_1, usage_user, usage_user, usage_user as usage_user_2, usage_user, usage_user_2 FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(stmt.to_string(), "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu");
+            // Exact, match
+            let stmt = parse_select("SELECT usage_user FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_user::float AS usage_user FROM cpu"
+            );

-        // Only include measurements with at least one field projection
-        let stmt = parse_select("SELECT usage_idle FROM cpu, disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
-        );
+            // Duplicate columns do not have conflicting aliases
+            let stmt = parse_select("SELECT usage_user, usage_user FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu"
+            );

-        // Rewriting FROM clause
+            // Multiple aliases with no conflicts
+            let stmt = parse_select("SELECT usage_user as usage_user_1, usage_user FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu"
+            );

-        // Regex, match, fields from multiple measurements
-        let stmt = parse_select("SELECT bytes_free, bytes_read FROM /d/");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio"
-        );
+            // Multiple aliases with conflicts
+            let stmt =
+                parse_select("SELECT usage_user as usage_user_1, usage_user, usage_user, usage_user as usage_user_2, usage_user, usage_user_2 FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(stmt.to_string(), "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu");

-        // Regex matches multiple measurement, but only one has a matching field
-        let stmt = parse_select("SELECT bytes_free FROM /d/");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM disk"
-        );
+            // Only include measurements with at least one field projection
+            let stmt = parse_select("SELECT usage_idle FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
+            );

-        // Exact, no match
-        let stmt = parse_select("SELECT usage_idle FROM foo");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Field does not exist in single measurement
+            let stmt = parse_select("SELECT usage_idle, bytes_free FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, bytes_free AS bytes_free FROM cpu"
+            );

-        // Regex, no match
-        let stmt = parse_select("SELECT bytes_free FROM /^d$/");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Field exists in each measurement
+            let stmt = parse_select("SELECT usage_idle, bytes_free FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, bytes_free::integer AS bytes_free FROM cpu, disk"
+            );
+        }

-        // Rewriting projection list
+        /// Validate the expansion of the `FROM` clause using regular expressions
+        #[test]
+        fn from_expand_wildcards() {
+            let namespace = MockSchemaProvider::default();

-        // Single wildcard, single measurement
-        let stmt = parse_select("SELECT * FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
-        );
+            // Regex, match, fields from multiple measurements
+            let stmt = parse_select("SELECT bytes_free, bytes_read FROM /d/");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio"
+            );

-        let stmt = parse_select("SELECT * FROM cpu, disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
-        );
+            // Regex matches multiple measurement, but only one has a matching field
+            let stmt = parse_select("SELECT bytes_free FROM /d/");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM disk"
+            );

-        // Regular expression selects fields from multiple measurements
-        let stmt = parse_select("SELECT /usage|bytes/ FROM cpu, disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
-        );
+            // Exact, no match
+            let stmt = parse_select("SELECT usage_idle FROM foo");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());

-        // Selective wildcard for tags
-        let stmt = parse_select("SELECT *::tag, usage_idle FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu"
-        );
+            // Regex, no match
+            let stmt = parse_select("SELECT bytes_free FROM /^d$/");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
+        }

-        // Selective wildcard for tags only should not select any measurements
-        let stmt = parse_select("SELECT *::tag FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+        /// Expanding the projection using wildcards
+        #[test]
+        fn projection_expand_wildcards() {
+            let namespace = MockSchemaProvider::default();

-        // Selective wildcard for fields
-        let stmt = parse_select("SELECT *::field FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
-        );
+            // Single wildcard, single measurement
+            let stmt = parse_select("SELECT * FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
+            );

-        // Mixed fields and wildcards
-        let stmt = parse_select("SELECT usage_idle, *::tag FROM cpu");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu"
-        );
+            let stmt = parse_select("SELECT * FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
+            );

-        // GROUP BY expansion
+            // Regular expression selects fields from multiple measurements
+            let stmt = parse_select("SELECT /usage|bytes/ FROM cpu, disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk"
+            );

-        let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY host");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY host"
-        );
+            // Selective wildcard for tags
+            let stmt = parse_select("SELECT *::tag, usage_idle FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu"
+            );

-        let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY *");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region"
-        );
+            // Selective wildcard for tags only should not select any measurements
+            let stmt = parse_select("SELECT *::tag FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());

-        // Does not include tags in projection when expanded in GROUP BY
-        let stmt = parse_select("SELECT * FROM cpu GROUP BY *");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
-        );
+            // Selective wildcard for fields
+            let stmt = parse_select("SELECT *::field FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu"
+            );

-        // Does include explicitly listed tags in projection
-        let stmt = parse_select("SELECT host, * FROM cpu GROUP BY *");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
-        );
+            // Mixed fields and wildcards
+            let stmt = parse_select("SELECT usage_idle, *::tag FROM cpu");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu"
+            );

-        // Fallible
+            let stmt = parse_select("SELECT * FROM merge_00, merge_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01"
+            );

-        // Invalid regex
-        let stmt = parse_select("SELECT usage_idle FROM /(not/");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_contains!(err.to_string(), "invalid regular expression");
+            // This should only select merge_01, as col0 is a tag in merge_00
+            let stmt = parse_select("SELECT /col0/ FROM merge_00, merge_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1 FROM merge_01"
+            );
+        }

-        // Subqueries
+        #[test]
+        fn group_by() {
+            let namespace = MockSchemaProvider::default();

-        // Subquery, exact, match
-        let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM (SELECT time::timestamp AS time, usage_idle::float FROM cpu)"
-        );
+            let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY host");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY host"
+            );

-        // Subquery, regex, match
-        let stmt = parse_select("SELECT bytes_free FROM (SELECT bytes_free, bytes_read FROM /d/)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM (SELECT time::timestamp AS time, bytes_free::integer, bytes_read::integer FROM disk, diskio)"
-        );
+            let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY *");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region"
+            );

-        // Subquery, exact, no match
-        let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM foo)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Does not include tags in projection when expanded in GROUP BY
+            let stmt = parse_select("SELECT * FROM cpu GROUP BY *");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
+            );

-        // Subquery, regex, no match
-        let stmt = parse_select("SELECT bytes_free FROM (SELECT bytes_free FROM /^d$/)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            // Does include explicitly listed tags in projection
+            let stmt = parse_select("SELECT host, * FROM cpu GROUP BY *");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region"
+            );
+        }

-        // Correct data type is resolved from subquery
-        let stmt = parse_select("SELECT *::field FROM (SELECT usage_system + usage_idle FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT time::timestamp AS time, usage_system::float + usage_idle::float FROM cpu)"
-        );
+        /// Uncategorized fallible cases
+        #[test]
+        fn fallible() {
+            let namespace = MockSchemaProvider::default();

-        // Subquery, no fields projected should be dropped
-        let stmt = parse_select("SELECT usage_idle FROM cpu, (SELECT usage_system FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
-        );
+            // Invalid regex
+            let stmt = parse_select("SELECT usage_idle FROM /(not/");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_contains!(err.to_string(), "invalid regular expression");

-        // Outer query are permitted to project tags only, as long as there are other fields
-        // in the subquery
-        let stmt = parse_select("SELECT cpu FROM (SELECT cpu, usage_system FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, cpu::tag AS cpu FROM (SELECT time::timestamp AS time, cpu::tag, usage_system::float FROM cpu)"
-        );
+            let stmt = parse_select("SELECT *::field + *::tag FROM cpu");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "Error during planning: unsupported expression: contains a wildcard or regular expression"
+            );

-        // Outer FROM should be empty, as the subquery does not project any fields
-        let stmt = parse_select("SELECT cpu FROM (SELECT cpu FROM cpu)");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert!(stmt.from.is_empty());
+            let stmt = parse_select("SELECT COUNT(*::tag) FROM cpu");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "Error during planning: unable to use tag as wildcard in count()"
+            );

-        // Binary expression
-        let stmt = parse_select("SELECT bytes_free+bytes_used FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk"
-        );
+            let stmt = parse_select("SELECT usage_idle FROM cpu SLIMIT 1");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "This feature is not implemented: SLIMIT or SOFFSET"
+            );

-        // Unary expressions
-        let stmt = parse_select("SELECT -bytes_free FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, -1 * bytes_free::integer AS bytes_free FROM disk"
-        );
+            let stmt = parse_select("SELECT usage_idle FROM cpu SOFFSET 1");
+            let err = rewrite_statement(&namespace, &stmt).unwrap_err();
+            assert_eq!(
+                err.to_string(),
+                "This feature is not implemented: SLIMIT or SOFFSET"
+            );
+        }

-        // DISTINCT clause
+        /// Verify subqueries
+        #[test]
+        fn subqueries() {
+            let namespace = MockSchemaProvider::default();

-        // COUNT(DISTINCT)
-        let stmt = parse_select("SELECT COUNT(DISTINCT bytes_free) FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(distinct(bytes_free::integer)) AS count FROM disk"
-        );
+            // Subquery, exact, match
+            let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM (SELECT time::timestamp AS time, usage_idle::float FROM cpu)"
+            );

-        let stmt = parse_select("SELECT DISTINCT bytes_free FROM disk");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, distinct(bytes_free::integer) AS \"distinct\" FROM disk"
-        );
+            // Subquery, regex, match
+            let stmt =
+                parse_select("SELECT bytes_free FROM (SELECT bytes_free, bytes_read FROM /d/)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM (SELECT time::timestamp AS time, bytes_free::integer, bytes_read::integer FROM disk, diskio)"
+            );

-        // Call expressions
+            // Subquery, exact, no match
+            let stmt = parse_select("SELECT usage_idle FROM (SELECT usage_idle FROM foo)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());

-        let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_i64::integer) AS count FROM temp_01"
-        );
+            // Subquery, regex, no match
+            let stmt = parse_select("SELECT bytes_free FROM (SELECT bytes_free FROM /^d$/)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());

-        // Duplicate aggregate columns
-        let stmt = parse_select("SELECT COUNT(field_i64), COUNT(field_i64) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01"
-        );
+            // Correct data type is resolved from subquery
+            let stmt =
+                parse_select("SELECT *::field FROM (SELECT usage_system + usage_idle FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT time::timestamp AS time, usage_system::float + usage_idle::float FROM cpu)"
+            );

-        let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_f64::float) AS count FROM temp_01"
-        );
+            // Subquery, no fields projected should be dropped
+            let stmt = parse_select("SELECT usage_idle FROM cpu, (SELECT usage_system FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu"
+            );

-        // Expands all fields
-        let stmt = parse_select("SELECT COUNT(*) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01"
-        );
+            // Outer query are permitted to project tags only, as long as there are other fields
+            // in the subquery
+            let stmt = parse_select("SELECT cpu FROM (SELECT cpu, usage_system FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, cpu::tag AS cpu FROM (SELECT time::timestamp AS time, cpu::tag, usage_system::float FROM cpu)"
+            );

-        // Expands matching fields
-        let stmt = parse_select("SELECT COUNT(/64$/) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01"
-        );
+            // Outer FROM should be empty, as the subquery does not project any fields
+            let stmt = parse_select("SELECT cpu FROM (SELECT cpu FROM cpu)");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert!(stmt.from.is_empty());
+        }

-        // Expands only numeric fields
-        let stmt = parse_select("SELECT SUM(*) FROM temp_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
-        );
+        /// `DISTINCT` clause and `distinct` function
+        #[test]
+        fn projection_distinct() {
+            let namespace = MockSchemaProvider::default();

-        let stmt = parse_select("SELECT * FROM merge_00, merge_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01"
-        );
+            // COUNT(DISTINCT)
+            let stmt = parse_select("SELECT COUNT(DISTINCT bytes_free) FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(distinct(bytes_free::integer)) AS count FROM disk"
+            );

-        // This should only select merge_01, as col0 is a tag in merge_00
-        let stmt = parse_select("SELECT /col0/ FROM merge_00, merge_01");
-        let stmt = rewrite_statement(&namespace, &stmt).unwrap();
-        assert_eq!(
-            stmt.to_string(),
-            "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1 FROM merge_01"
-        );
+            let stmt = parse_select("SELECT DISTINCT bytes_free FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, distinct(bytes_free::integer) AS \"distinct\" FROM disk"
+            );
+        }

-        // Fallible cases
+        /// Projections with unary and binary expressions
+        #[test]
+        fn projection_unary_binary_expr() {
+            let namespace = MockSchemaProvider::default();

-        let stmt = parse_select("SELECT *::field + *::tag FROM cpu");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: unsupported expression: contains a wildcard or regular expression"
-        );
+            // Binary expression
+            let stmt = parse_select("SELECT bytes_free+bytes_used FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk"
+            );

-        let stmt = parse_select("SELECT COUNT(*::tag) FROM cpu");
-        let err = rewrite_statement(&namespace, &stmt).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: unable to use tag as wildcard in count()"
-        );
+            // Unary expressions
+            let stmt = parse_select("SELECT -bytes_free FROM disk");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, -1 * bytes_free::integer AS bytes_free FROM disk"
+            );
+        }
+
+        /// Projections which contain function calls
+        #[test]
+        fn projection_call_expr() {
+            let namespace = MockSchemaProvider::default();
+
+            let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_i64::integer) AS count FROM temp_01"
+            );
+
+            // Duplicate aggregate columns
+            let stmt = parse_select("SELECT COUNT(field_i64), COUNT(field_i64) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01"
+            );
+
+            let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_f64::float) AS count FROM temp_01"
+            );
+
+            // Expands all fields
+            let stmt = parse_select("SELECT COUNT(*) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01"
+            );
+
+            // Expands matching fields
+            let stmt = parse_select("SELECT COUNT(/64$/) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01"
+            );
+
+            // Expands only numeric fields
+            let stmt = parse_select("SELECT SUM(*) FROM temp_01");
+            let stmt = rewrite_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
+            );
+        }
    }

    #[test]