feat: complete InfluxQL subquery compatibility

Closes #7794
2023-05-22 16:22:54 +10:00 · 2023-05-22 16:22:54 +10:00 · af76865b2c
parent ccd73a0b32
commit af76865b2c
4 changed files with 180 additions and 217 deletions
--- a/iox_query_influxql/src/plan/ir.rs
+++ b/iox_query_influxql/src/plan/ir.rs
@ -2,8 +2,6 @@
 //! statement after it has been processed

 use crate::plan::rewriter::ProjectionType;
-use crate::plan::{error, SchemaProvider};
-use datafusion::common::Result;
 use influxdb_influxql_parser::common::{
    LimitClause, MeasurementName, OffsetClause, OrderByClause, QualifiedMeasurementName,
    WhereClause,
@ -13,14 +11,14 @@ use influxdb_influxql_parser::select::{
    FieldList, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection,
    SelectStatement, TimeZoneClause,
 };
-use schema::{InfluxColumnType, Schema};
+use schema::InfluxColumnType;
 use std::collections::HashSet;
 use std::fmt::{Display, Formatter};

 /// A set of tag keys.
 pub(super) type TagSet = HashSet<String>;

-/// Represents a validated and normalized top-level [`SelectStatement]`.
+/// Represents a validated and normalized top-level [`SelectStatement`].
 #[derive(Debug, Default, Clone)]
 pub(super) struct SelectQuery {
    pub(super) select: Select,
@ -119,36 +117,6 @@ pub(super) enum DataSource {
    Subquery(Box<Select>),
 }

-impl DataSource {
-    pub(super) fn schema(&self, s: &dyn SchemaProvider) -> Result<DataSourceSchema<'_>> {
-        match self {
-            Self::Table(table_name) => s
-                .table_schema(table_name)
-                .map(DataSourceSchema::Table)
-                .ok_or_else(|| error::map::internal("expected table")),
-            Self::Subquery(q) => Ok(DataSourceSchema::Subquery(q)),
-        }
-    }
-}
-
-pub(super) enum DataSourceSchema<'a> {
-    Table(Schema),
-    Subquery(&'a Select),
-}
-
-impl<'a> DataSourceSchema<'a> {
-    /// Returns `true` if the specified name is a tag field or a projection of a tag field if
-    /// the `DataSource` is a subquery.
-    pub(super) fn is_tag_field(&self, name: &str) -> bool {
-        match self {
-            DataSourceSchema::Table(s) => {
-                matches!(s.field_type_by_name(name), Some(InfluxColumnType::Tag))
-            }
-            DataSourceSchema::Subquery(q) => q.tag_set.contains(name),
-        }
-    }
-}
-
 #[derive(Debug, Clone)]
 pub(super) struct Field {
    pub(super) expr: Expr,
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@ -2,12 +2,12 @@ mod select;

 use crate::plan::ir::{DataSource, Field, Select, SelectQuery};
 use crate::plan::planner::select::{
-    fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort,
+    fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort, ProjectionInfo,
 };
 use crate::plan::planner_time_range_expression::{
    duration_expr_to_nanoseconds, expr_to_df_interval_dt, time_range_to_df_expr,
 };
-use crate::plan::rewriter::{find_tables, rewrite_statement, ProjectionType};
+use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
 use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
 use crate::plan::var_ref::var_ref_data_type_to_data_type;
 use crate::plan::{error, planner_rewrite_expression};
@ -25,11 +25,11 @@ use datafusion::logical_expr::logical_plan::builder::project;
 use datafusion::logical_expr::logical_plan::Analyze;
 use datafusion::logical_expr::utils::find_aggregate_exprs;
 use datafusion::logical_expr::{
-    binary_expr, col, date_bin, expr, expr::WindowFunction, lit, lit_timestamp_nano, now,
+    binary_expr, col, date_bin, expr, expr::WindowFunction, lit, lit_timestamp_nano, now, union,
    window_function, Aggregate, AggregateFunction, AggregateUDF, Between, BuiltInWindowFunction,
    BuiltinScalarFunction, EmptyRelation, Explain, Expr, ExprSchemable, Extension, GetIndexedField,
-    LogicalPlan, LogicalPlanBuilder, Operator, PlanType, ScalarUDF, TableSource, ToStringifiedPlan,
-    WindowFrame, WindowFrameBound, WindowFrameUnits,
+    LogicalPlan, LogicalPlanBuilder, Operator, PlanType, Projection, ScalarUDF, TableSource,
+    ToStringifiedPlan, WindowFrame, WindowFrameBound, WindowFrameUnits,
 };
 use datafusion::prelude::{cast, sum, when, Column};
 use datafusion_util::{lit_dict, AsExpr};
@ -39,7 +39,6 @@ use influxdb_influxql_parser::explain::{ExplainOption, ExplainStatement};
 use influxdb_influxql_parser::expression::walk::{walk_expr, walk_expression, Expression};
 use influxdb_influxql_parser::expression::{
    Binary, Call, ConditionalBinary, ConditionalExpression, ConditionalOperator, VarRef,
-    VarRefDataType,
 };
 use influxdb_influxql_parser::functions::{
    is_aggregate_function, is_now_function, is_scalar_math_function,
@ -181,7 +180,7 @@ impl<'a> Context<'a> {
        }
    }

-    /// Returns the combined GROUP BY tags clause from the root
+    /// Returns the combined `GROUP BY` tags clause from the root
    /// and current statement. The list is sorted and guaranteed to be unique.
    fn group_by_tags(&self) -> Vec<&str> {
        match (self.root_group_by_tags.is_empty(), self.group_by) {
@ -315,18 +314,29 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            vec![]
        };

-        let tables = find_tables(select);
+        let ProjectionInfo {
+            fields,
+            group_by_tag_set,
+            projection_tag_set,
+            is_projected,
+        } = ProjectionInfo::new(&select.fields, &group_by_tags);
+
+        let table_names = find_table_names(select);
+        let sort_by_measurement = table_names.len() > 1;
        let mut plans = Vec::new();
-        for table_name in tables {
+        for table_name in table_names {
            let ctx = Context::new(table_name)
                .with_projection_type(select.projection_type)
                .with_timezone(select.timezone)
                .with_group_by_fill(select)
                .with_root_group_by_tags(&group_by_tags);

-            if let Some(plan) = self.select_to_plan(&ctx, select)? {
-                plans.push((table_name, plan));
-            }
+            let Some(plan) = self.union_from(&ctx, select)? else {
+                continue;
+            };
+
+            let plan = self.project_select(&ctx, plan, &fields, &group_by_tag_set)?;
+            plans.push((table_name, plan));
        }

        let plan = {
@ -334,19 +344,26 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                table_name: &str,
                input: LogicalPlan,
            ) -> Result<LogicalPlan> {
-                // Prepare new projection with measurement
-                let proj_exprs =
-                    iter::once(lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME))
-                        .chain(
-                            input
-                                .schema()
-                                .fields()
-                                .iter()
-                                .map(|expr| Expr::Column(expr.unqualified_column())),
-                        )
-                        .collect::<Vec<_>>();
-
-                project(input, proj_exprs)
+                if let LogicalPlan::Projection(Projection { expr, input, .. }) = input {
+                    // Rewrite the existing projection with the measurement name column first
+                    project(
+                        input.deref().clone(),
+                        iter::once(lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME))
+                            .chain(expr),
+                    )
+                } else {
+                    project(
+                        input.clone(),
+                        iter::once(lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME))
+                            .chain(
+                                input
+                                    .schema()
+                                    .fields()
+                                    .iter()
+                                    .map(|expr| Expr::Column(expr.unqualified_column())),
+                            ),
+                    )
+                }
            }

            let mut iter = plans.into_iter();
@ -376,66 +393,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {

            iter.try_fold(plan, |prev, (table_name, input)| {
                let next = project_with_measurement(table_name, input)?;
-                LogicalPlanBuilder::from(prev).union(next)?.build()
+                union(prev, next)
            })?
        };

-        // Skip the `time` column
-        let fields_no_time = &select.fields[1..];
-        // always start with the time column
-        let mut fields = vec![select.fields.first().cloned().unwrap()];
-
-        // group_by_tag_set   : a list of tag columns specified in the GROUP BY clause
-        // projection_tag_set : a list of tag columns specified exclusively in the SELECT projection
-        // is_projected       : a list of booleans indicating whether matching elements in the
-        //                      group_by_tag_set are also projected in the query
-
-        let (group_by_tag_set, projection_tag_set, is_projected) = if group_by_tags.is_empty() {
-            let tag_columns = find_tag_and_unknown_columns(fields_no_time)
-                .sorted()
-                .collect::<Vec<_>>();
-            (vec![], tag_columns, vec![])
-        } else {
-            let mut tag_columns =
-                find_tag_and_unknown_columns(fields_no_time).collect::<HashSet<_>>();
-
-            // Find the list of tag keys specified in the `GROUP BY` clause, and
-            // whether any of the tag keys are also projected in the SELECT list.
-            let (tag_set, is_projected): (Vec<_>, Vec<_>) = group_by_tags
-                .into_iter()
-                .map(|s| (s, tag_columns.contains(s)))
-                .unzip();
-
-            // Tags specified in the `GROUP BY` clause that are not already added to the
-            // projection must be projected, so they can be used in the group key.
-            //
-            // At the end of the loop, the `tag_columns` set will contain the tag columns that
-            // exist in the projection and not in the `GROUP BY`.
-            fields.extend(
-                tag_set
-                    .iter()
-                    .filter_map(|col| match tag_columns.remove(*col) {
-                        true => None,
-                        false => Some(Field {
-                            expr: IQLExpr::VarRef(VarRef {
-                                name: (*col).into(),
-                                data_type: Some(VarRefDataType::Tag),
-                            }),
-                            name: col.to_string(),
-                            data_type: Some(InfluxColumnType::Tag),
-                        }),
-                    }),
-            );
-
-            (
-                tag_set,
-                tag_columns.into_iter().sorted().collect::<Vec<_>>(),
-                is_projected,
-            )
-        };
-
-        fields.extend(fields_no_time.iter().cloned());
-
        let plan = plan_with_metadata(
            plan,
            &InfluxQlMetadata {
@ -460,77 +421,38 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            false,
        );

-        plan_with_sort(
+        let plan = plan_with_sort(
            plan,
+            vec![time_sort_expr.clone()],
+            sort_by_measurement,
+            &group_by_tag_set,
+            &projection_tag_set,
+        )?;
+
+        self.limit(
+            plan,
+            select.offset,
+            select.limit,
            vec![time_sort_expr],
-            true,
+            sort_by_measurement,
            &group_by_tag_set,
            &projection_tag_set,
        )
    }

-    fn select_to_plan(&self, ctx: &Context<'_>, select: &Select) -> Result<Option<LogicalPlan>> {
-        // Skip the `time` column
-        let fields_no_time = &select.fields[1..];
-        // always start with the time column
-        let mut fields = vec![select.fields.first().cloned().unwrap()];
-
-        // group_by_tag_set   : a list of tag columns specified in the GROUP BY clause
-        // projection_tag_set : a list of tag columns specified exclusively in the SELECT projection
-        // is_projected       : a list of booleans indicating whether matching elements in the
-        //                      group_by_tag_set are also projected in the query
-
-        let group_by_tags = ctx.group_by_tags();
-        let (group_by_tag_set, projection_tag_set, is_projected) = if group_by_tags.is_empty() {
-            let tag_columns = find_tag_and_unknown_columns(fields_no_time)
-                .sorted()
-                .collect::<Vec<_>>();
-            (vec![], tag_columns, vec![])
-        } else {
-            let mut tag_columns =
-                find_tag_and_unknown_columns(fields_no_time).collect::<HashSet<_>>();
-
-            // Find the list of tag keys specified in the `GROUP BY` clause, and
-            // whether any of the tag keys are also projected in the SELECT list.
-            let (tag_set, is_projected): (Vec<_>, Vec<_>) = group_by_tags
-                .into_iter()
-                .map(|s| (s, tag_columns.contains(s)))
-                .unzip();
-
-            // Tags specified in the `GROUP BY` clause that are not already added to the
-            // projection must be projected, so they can be used in the group key.
-            //
-            // At the end of the loop, the `tag_columns` set will contain the tag columns that
-            // exist in the projection and not in the `GROUP BY`.
-            fields.extend(
-                tag_set
-                    .iter()
-                    .filter_map(|col| match tag_columns.remove(*col) {
-                        true => None,
-                        false => Some(Field {
-                            expr: IQLExpr::VarRef(VarRef {
-                                name: (*col).into(),
-                                data_type: Some(VarRefDataType::Tag),
-                            }),
-                            name: col.to_string(),
-                            data_type: Some(InfluxColumnType::Tag),
-                        }),
-                    }),
-            );
-
-            (
-                tag_set,
-                tag_columns.into_iter().sorted().collect::<Vec<_>>(),
-                is_projected,
-            )
-        };
-
-        fields.extend(fields_no_time.iter().cloned());
-
+    fn subquery_to_plan(&self, ctx: &Context<'_>, select: &Select) -> Result<Option<LogicalPlan>> {
        let Some(plan) = self.union_from(ctx, select)? else {
            return Ok(None)
        };

+        let group_by_tags = ctx.group_by_tags();
+        let ProjectionInfo {
+            fields,
+            group_by_tag_set,
+            projection_tag_set,
+            ..
+        } = ProjectionInfo::new(&select.fields, &group_by_tags);
+
        let plan = self.project_select(ctx, plan, &fields, &group_by_tag_set)?;

        // the sort planner node must refer to the time column using
@ -554,7 +476,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            &projection_tag_set,
        )?;

-        let plan = self.limit(
+        Ok(Some(self.limit(
            plan,
            select.offset,
            select.limit,
@ -562,12 +484,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            false,
            &group_by_tag_set,
            &projection_tag_set,
-        )?;
-
-        Ok(Some(plan))
+        )?))
    }

-    ///
+    /// Returns a `LogicalPlan` that combines the `FROM` clause as a `UNION ALL`.
    fn union_from(&self, ctx: &Context<'_>, select: &Select) -> Result<Option<LogicalPlan>> {
        let mut plans = Vec::new();
        for ds in &select.from {
@ -608,9 +528,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                    let plan = iter
                        .next()
                        .ok_or_else(|| error::map::internal("expected plan"))?;
-                    iter.try_fold(plan, |prev, next| {
-                        LogicalPlanBuilder::from(prev).union(next)?.build()
-                    })?
+                    iter.try_fold(plan, union)?
                };
                Some(plan)
            }
@ -848,8 +766,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
    /// - `offset`: The number of input rows to skip.
    /// - `limit`: The maximum number of rows to return in the output plan per group.
    /// - `time_sort_expr`: An `Expr::Sort` referring to the `time` column of the input.
-    /// - `has_multiple_measurements`: `true` if the `input` produces multiple measurements,
-    ///   and therefore the limit should be applied per measurement and any additional group tags.
+    /// - `sort_by_measurement`: `true` if the `input` must be sorted by the measurement column.
    /// - `group_by_tag_set`: Tag columns from the `input` plan that should be used to partition
    ///   the `input` plan and sort the `output` plan.
    /// - `projection_tag_set`: Additional tag columns that should be used to sort the `output`
@ -861,7 +778,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        offset: Option<OffsetClause>,
        limit: Option<LimitClause>,
        sort_exprs: Vec<Expr>,
-        has_multiple_measurements: bool,
+        sort_by_measurement: bool,
        group_by_tag_set: &[&str],
        projection_tag_set: &[&str],
    ) -> Result<LogicalPlan> {
@ -869,7 +786,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            return Ok(input);
        }

-        if group_by_tag_set.is_empty() && !has_multiple_measurements {
+        if group_by_tag_set.is_empty() && !sort_by_measurement {
            // If the query is not grouping by tags, and is a single measurement, the DataFusion
            // Limit operator is sufficient.
            let skip = offset.map_or(0, |v| *v as usize);
@ -893,7 +810,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            // ) AS iox::row
            let order_by = sort_exprs.clone();

-            let partition_by = if has_multiple_measurements {
+            let partition_by = if sort_by_measurement {
                iter::once(INFLUXQL_MEASUREMENT_COLUMN_NAME.as_expr())
                    .chain(fields_to_exprs_no_nulls(input.schema(), group_by_tag_set))
                    .collect::<Vec<_>>()
@ -974,7 +891,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            plan_with_sort(
                plan,
                sort_exprs,
-                has_multiple_measurements,
+                sort_by_measurement,
                group_by_tag_set,
                projection_tag_set,
            )
@ -1354,8 +1271,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        }
    }

-    /// Generate a list of logical plans for each of the tables references in the `FROM`
-    /// clause.
+    /// Generate a logical plan for the specified `DataSource`.
    fn plan_from_data_source(
        &self,
        ctx: &Context<'_>,
@ -1378,7 +1294,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                    .with_group_by_fill(select)
                    .with_root_group_by_tags(ctx.root_group_by_tags);

-                Ok(self.select_to_plan(&ctx, select)?)
+                Ok(self.subquery_to_plan(&ctx, select)?)
            }
        }
    }
@ -2229,15 +2145,6 @@ fn is_aggregate_field(f: &Field) -> bool {
    .is_break()
 }

-/// Find all the columns where the resolved data type
-/// is a tag or is [`None`], which is unknown.
-fn find_tag_and_unknown_columns(fields: &[Field]) -> impl Iterator<Item = &str> {
-    fields.iter().filter_map(|f| match f.data_type {
-        Some(InfluxColumnType::Tag) | None => Some(f.name.as_str()),
-        _ => None,
-    })
-}
-
 fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> {
    match op {
        ConditionalOperator::Eq => Ok(Operator::Eq),
@ -2472,7 +2379,7 @@ mod test {

    #[test]
    fn test_find_var_refs() {
-        use VarRefDataType::*;
+        use influxdb_influxql_parser::expression::VarRefDataType::*;

        macro_rules! var_ref {
            ($NAME: literal) => {
--- a/iox_query_influxql/src/plan/planner/select.rs
+++ b/iox_query_influxql/src/plan/planner/select.rs
@ -5,8 +5,9 @@ use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
 use datafusion_util::AsExpr;
 use generated_types::influxdata::iox::querier::v1::influx_ql_metadata::TagKeyColumn;
 use influxdb_influxql_parser::expression::{Expr as IQLExpr, VarRef, VarRefDataType};
-use schema::INFLUXQL_MEASUREMENT_COLUMN_NAME;
-use std::collections::HashMap;
+use itertools::Itertools;
+use schema::{InfluxColumnType, INFLUXQL_MEASUREMENT_COLUMN_NAME};
+use std::collections::{HashMap, HashSet};
 use std::ops::Deref;

 pub(super) fn make_tag_key_column_meta(
@ -60,10 +61,6 @@ pub(super) fn plan_with_sort(
    group_by_tag_set: &[&str],
    projection_tag_set: &[&str],
 ) -> Result<LogicalPlan> {
-    // If there are multiple measurements, we need to sort by the measurement column
-    // NOTE: Ideally DataFusion would maintain the order of the UNION ALL, which would eliminate
-    //  the need to sort by measurement.
-    //  See: https://github.com/influxdata/influxdb_iox/issues/7062
    let mut series_sort = if sort_by_measurement {
        vec![Expr::sort(
            INFLUXQL_MEASUREMENT_COLUMN_NAME.as_expr(),
@ -113,3 +110,92 @@ pub(super) fn fields_to_exprs_no_nulls<'a>(
        })
        .map(|f| f.as_expr())
 }
+
+/// Contains an expanded `SELECT` projection
+pub(super) struct ProjectionInfo<'a> {
+    /// A copy of the `SELECT` fields that includes tags from the `GROUP BY` that were not
+    /// specified in the original `SELECT` projection.
+    pub(super) fields: Vec<Field>,
+
+    /// A list of tag column names specified in the `GROUP BY` clause.
+    pub(super) group_by_tag_set: Vec<&'a str>,
+
+    /// A list of tag column names specified exclusively in the `SELECT` projection.
+    pub(super) projection_tag_set: Vec<&'a str>,
+
+    /// A list of booleans indicating whether matching elements in the
+    /// `group_by_tag_set` are also projected in the query.
+    pub(super) is_projected: Vec<bool>,
+}
+
+impl<'a> ProjectionInfo<'a> {
+    /// Computes a `ProjectionInfo` from the specified `fields` and `group_by_tags`.
+    pub(super) fn new(fields: &'a [Field], group_by_tags: &'a [&'a str]) -> Self {
+        // Skip the `time` column
+        let fields_no_time = &fields[1..];
+        // always start with the time column
+        let mut fields = vec![fields.first().cloned().unwrap()];
+
+        let (group_by_tag_set, projection_tag_set, is_projected) = if group_by_tags.is_empty() {
+            let tag_columns = find_tag_and_unknown_columns(fields_no_time)
+                .sorted()
+                .collect::<Vec<_>>();
+            (vec![], tag_columns, vec![])
+        } else {
+            let mut tag_columns =
+                find_tag_and_unknown_columns(fields_no_time).collect::<HashSet<_>>();
+
+            // Find the list of tag keys specified in the `GROUP BY` clause, and
+            // whether any of the tag keys are also projected in the SELECT list.
+            let (tag_set, is_projected): (Vec<_>, Vec<_>) = group_by_tags
+                .iter()
+                .map(|s| (*s, tag_columns.contains(s)))
+                .unzip();
+
+            // Tags specified in the `GROUP BY` clause that are not already added to the
+            // projection must be projected, so they can be used in the group key.
+            //
+            // At the end of the loop, the `tag_columns` set will contain the tag columns that
+            // exist in the projection and not in the `GROUP BY`.
+            fields.extend(
+                tag_set
+                    .iter()
+                    .filter_map(|col| match tag_columns.remove(*col) {
+                        true => None,
+                        false => Some(Field {
+                            expr: IQLExpr::VarRef(VarRef {
+                                name: (*col).into(),
+                                data_type: Some(VarRefDataType::Tag),
+                            }),
+                            name: col.to_string(),
+                            data_type: Some(InfluxColumnType::Tag),
+                        }),
+                    }),
+            );
+
+            (
+                tag_set,
+                tag_columns.into_iter().sorted().collect::<Vec<_>>(),
+                is_projected,
+            )
+        };
+
+        fields.extend(fields_no_time.iter().cloned());
+
+        Self {
+            fields,
+            group_by_tag_set,
+            projection_tag_set,
+            is_projected,
+        }
+    }
+}
+
+/// Find all the columns where the resolved data type
+/// is a tag or is [`None`], which is unknown.
+fn find_tag_and_unknown_columns(fields: &[Field]) -> impl Iterator<Item = &str> {
+    fields.iter().filter_map(|f| match f.data_type {
+        Some(InfluxColumnType::Tag) | None => Some(f.name.as_str()),
+        _ => None,
+    })
+}
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@ -38,7 +38,9 @@ pub(super) fn rewrite_statement(
    Ok(SelectQuery { select })
 }

-pub(super) fn find_tables(s: &Select) -> Vec<&str> {
+/// Find the unique list of tables used by `s`, recursively following all `FROM` clauses and
+/// return the results in lexicographically in ascending order.
+pub(super) fn find_table_names(s: &Select) -> Vec<&str> {
    let mut data_sources = vec![s.from.as_slice()];
    let mut tables = Vec::new();
    while let Some(from) = data_sources.pop() {
@ -1512,7 +1514,7 @@ mod test {
    use super::Result;
    use crate::plan::ir::{Field, Select};
    use crate::plan::rewriter::{
-        find_tables, has_wildcards, rewrite_select, rewrite_statement, ProjectionType,
+        find_table_names, has_wildcards, rewrite_select, rewrite_statement, ProjectionType,
        SelectStatementInfo,
    };
    use crate::plan::test_utils::{parse_select, MockSchemaProvider};
@ -1522,7 +1524,7 @@ mod test {
    use test_helpers::{assert_contains, assert_error};

    #[test]
-    fn test_find_tables() {
+    fn test_find_table_names() {
        let namespace = MockSchemaProvider::default();
        let parse_select = |s: &str| -> Select {
            let select = parse_select(s);
@ -1530,21 +1532,21 @@ mod test {
        };

        let s = parse_select("SELECT usage_idle FROM cpu");
-        assert_eq!(find_tables(&s), &["cpu"]);
+        assert_eq!(find_table_names(&s), &["cpu"]);

        let s = parse_select("SELECT usage_idle FROM cpu, disk");
-        assert_eq!(find_tables(&s), &["cpu", "disk"]);
+        assert_eq!(find_table_names(&s), &["cpu", "disk"]);

        let s = parse_select("SELECT usage_idle FROM disk, cpu, disk");
-        assert_eq!(find_tables(&s), &["cpu", "disk"]);
+        assert_eq!(find_table_names(&s), &["cpu", "disk"]);

        // subqueries

        let s = parse_select("SELECT usage_idle FROM (select * from cpu, disk)");
-        assert_eq!(find_tables(&s), &["cpu", "disk"]);
+        assert_eq!(find_table_names(&s), &["cpu", "disk"]);

        let s = parse_select("SELECT usage_idle FROM cpu, (select * from cpu, disk)");
-        assert_eq!(find_tables(&s), &["cpu", "disk"]);
+        assert_eq!(find_table_names(&s), &["cpu", "disk"]);
    }

    #[test]