Merge pull request #7102 from influxdata/cn/share-normalization

fix: Use the same normalization code for explain tests as e2e tests do
2023-03-02 03:18:51 +00:00 · 2023-03-02 03:18:51 +00:00 · f3267f992a
parent 04ee075a73 f730991602
commit f3267f992a
10 changed files with 249 additions and 259 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -340,8 +340,11 @@ dependencies = [
 "datafusion",
 "hashbrown 0.13.2",
 "num-traits",
+ "once_cell",
 "rand",
+ "regex",
 "snafu",
+ "uuid",
 "workspace-hack",
 ]

@ -4513,7 +4516,6 @@ dependencies = [
 "predicate",
 "prost",
 "rand",
- "regex",
 "schema",
 "service_common",
 "service_grpc_catalog",
@ -5656,7 +5658,6 @@ dependencies = [
 "parking_lot 0.12.1",
 "prost",
 "rand",
- "regex",
 "reqwest",
 "snafu",
 "sqlx",
@ -5665,7 +5666,6 @@ dependencies = [
 "tokio",
 "tokio-util",
 "tonic",
- "uuid",
 "workspace-hack",
 ]

--- a/arrow_util/Cargo.toml
+++ b/arrow_util/Cargo.toml
@ -15,7 +15,10 @@ chrono = { version = "0.4", default-features = false }
 comfy-table = { version = "6.1", default-features = false }
 hashbrown = { workspace = true }
 num-traits = "0.2"
+once_cell = { version = "1.17", features = ["parking_lot"] }
+regex = "1.7.0"
 snafu = "0.7"
+uuid = "1"
 workspace-hack = { version = "0.1", path = "../workspace-hack" }

 [dev-dependencies]
--- a/arrow_util/src/test_util.rs
+++ b/arrow_util/src/test_util.rs
@ -1,6 +1,7 @@
 //! A collection of testing functions for arrow based code
 use std::sync::Arc;

+use crate::display::pretty_format_batches;
 use arrow::{
    array::{new_null_array, ArrayRef, StringArray},
    compute::kernels::sort::{lexsort, SortColumn, SortOptions},
@ -8,6 +9,10 @@ use arrow::{
    error::ArrowError,
    record_batch::RecordBatch,
 };
+use once_cell::sync::Lazy;
+use regex::{Captures, Regex};
+use std::{borrow::Cow, collections::HashMap};
+use uuid::Uuid;

 /// Compares the formatted output with the pretty formatted results of
 /// record batches. This is a macro so errors appear on the correct line
@ -181,3 +186,195 @@ pub fn equalize_batch_schemas(batches: Vec<RecordBatch>) -> Result<Vec<RecordBat
        })
        .collect())
 }
+
+/// Match the parquet UUID
+///
+/// For example, given
+/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
+///
+/// matches `1d325760-2b20-48de-ab48-2267b034133d`
+static REGEX_UUID: Lazy<Regex> = Lazy::new(|| {
+    Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").expect("UUID regex")
+});
+
+/// Match the parquet directory names
+/// For example, given
+/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
+///
+/// matches `32/51/216/13452`
+static REGEX_DIRS: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex"));
+
+/// Replace table row separators of flexible width with fixed with. This is required
+/// because the original timing values may differ in "printed width", so the table
+/// cells have different widths and hence the separators / borders. E.g.:
+///
+///   `+--+--+`     -> `----------`
+///   `+--+------+` -> `----------`
+///
+/// Note that we're kinda inexact with our regex here, but it gets the job done.
+static REGEX_LINESEP: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex"));
+
+/// Similar to the row separator issue above, the table columns are right-padded
+/// with spaces. Due to the different "printed width" of the timing values, we need
+/// to normalize this padding as well. E.g.:
+///
+///   `        |`  -> `    |`
+///   `         |` -> `    |`
+static REGEX_COL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex"));
+
+/// Matches line like `metrics=[foo=1, bar=2]`
+static REGEX_METRICS: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex"));
+
+/// Matches things like  `1s`, `1.2ms` and `10.2μs`
+static REGEX_TIMING: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex"));
+
+/// Matches things like `FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000`
+static REGEX_FILTER: Lazy<Regex> =
+    Lazy::new(|| Regex::new("FilterExec: .*").expect("filter regex"));
+
+fn normalize_for_variable_width(s: Cow<'_, str>) -> String {
+    let s = REGEX_LINESEP.replace_all(&s, "----------");
+    REGEX_COL.replace_all(&s, "    |").to_string()
+}
+
+/// A query to run with optional annotations
+#[derive(Debug, PartialEq, Eq, Default, Clone, Copy)]
+pub struct Normalizer {
+    /// If true, results are sorted first
+    pub sorted_compare: bool,
+
+    /// If true, replace UUIDs with static placeholders.
+    pub normalized_uuids: bool,
+
+    /// If true, normalize timings in queries by replacing them with
+    /// static placeholders, for example:
+    ///
+    /// `1s`     -> `1.234ms`
+    pub normalized_metrics: bool,
+
+    /// if true, normalize filter predicates for explain plans
+    /// `FilterExec: <REDACTED>`
+    pub normalized_filters: bool,
+}
+
+impl Normalizer {
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Take the output of running the query and apply the specified normalizations to them
+    pub fn normalize_results(&self, mut results: Vec<RecordBatch>) -> Vec<String> {
+        // compare against sorted results, if requested
+        if self.sorted_compare && !results.is_empty() {
+            let schema = results[0].schema();
+            let batch =
+                arrow::compute::concat_batches(&schema, &results).expect("concatenating batches");
+            results = vec![sort_record_batch(batch)];
+        }
+
+        let mut current_results = pretty_format_batches(&results)
+            .unwrap()
+            .trim()
+            .lines()
+            .map(|s| s.to_string())
+            .collect::<Vec<_>>();
+
+        // normalize UUIDs, if requested
+        if self.normalized_uuids {
+            let mut seen: HashMap<String, u128> = HashMap::new();
+            current_results = current_results
+                .into_iter()
+                .map(|s| {
+                    // Rewrite  parquet directory names like
+                    // `51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
+                    //
+                    // to:
+                    // 1/1/1/1/00000000-0000-0000-0000-000000000000.parquet
+
+                    let s = REGEX_UUID.replace_all(&s, |s: &Captures<'_>| {
+                        let next = seen.len() as u128;
+                        Uuid::from_u128(
+                            *seen
+                                .entry(s.get(0).unwrap().as_str().to_owned())
+                                .or_insert(next),
+                        )
+                        .to_string()
+                    });
+
+                    let s = normalize_for_variable_width(s);
+                    REGEX_DIRS.replace_all(&s, "1/1/1/1").to_string()
+                })
+                .collect();
+        }
+
+        // normalize metrics, if requested
+        if self.normalized_metrics {
+            current_results = current_results
+                .into_iter()
+                .map(|s| {
+                    // Replace timings with fixed value, e.g.:
+                    //
+                    //   `1s`     -> `1.234ms`
+                    //   `1.2ms`  -> `1.234ms`
+                    //   `10.2μs` -> `1.234ms`
+                    let s = REGEX_TIMING.replace_all(&s, "1.234ms");
+
+                    let s = normalize_for_variable_width(s);
+
+                    // Metrics are currently ordered by value (not by key), so different timings may
+                    // reorder them. We "parse" the list and normalize the sorting. E.g.:
+                    //
+                    // `metrics=[]`             => `metrics=[]`
+                    // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
+                    // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
+                    REGEX_METRICS
+                        .replace_all(&s, |c: &Captures<'_>| {
+                            let mut metrics: Vec<_> = c[1].split(", ").collect();
+                            metrics.sort();
+                            format!("metrics=[{}]", metrics.join(", "))
+                        })
+                        .to_string()
+                })
+                .collect();
+        }
+
+        // normalize Filters, if requested
+        //
+        // Converts:
+        // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
+        //
+        // to
+        // FilterExec: <REDACTED>
+        if self.normalized_filters {
+            current_results = current_results
+                .into_iter()
+                .map(|s| {
+                    REGEX_FILTER
+                        .replace_all(&s, |_: &Captures<'_>| "FilterExec: <REDACTED>")
+                        .to_string()
+                })
+                .collect();
+        }
+
+        current_results
+    }
+
+    /// Adds information on what normalizations were applied to the input
+    pub fn add_description(&self, output: &mut Vec<String>) {
+        if self.sorted_compare {
+            output.push("-- Results After Sorting".into())
+        }
+        if self.normalized_uuids {
+            output.push("-- Results After Normalizing UUIDs".into())
+        }
+        if self.normalized_metrics {
+            output.push("-- Results After Normalizing Metrics".into())
+        }
+        if self.normalized_filters {
+            output.push("-- Results After Normalizing Filters".into())
+        }
+    }
+}
--- a/querier/Cargo.toml
+++ b/querier/Cargo.toml
@ -56,5 +56,4 @@ insta = { version = "1.28.0", features = ["yaml"] }
 iox_tests = { path = "../iox_tests" }
 mutable_batch_lp = { path = "../mutable_batch_lp" }
 object_store_metrics = { path = "../object_store_metrics" }
-regex = "1.7.1"
 test_helpers = { path = "../test_helpers" }
--- a/querier/src/namespace/query_access.rs
+++ b/querier/src/namespace/query_access.rs
@ -199,13 +199,12 @@ mod tests {
    use super::*;
    use crate::namespace::test_util::{clear_parquet_cache, querier_namespace};
    use arrow::record_batch::RecordBatch;
-    use arrow_util::test_util::batches_to_sorted_lines;
+    use arrow_util::test_util::{batches_to_sorted_lines, Normalizer};
    use data_types::ColumnType;
    use datafusion::common::DataFusionError;
    use iox_query::frontend::sql::SqlQueryPlanner;
    use iox_tests::{TestCatalog, TestParquetFileBuilder};
    use metric::{Observation, RawReporter};
-    use regex::Regex;
    use snafu::{ResultExt, Snafu};
    use trace::{span::SpanStatus, RingBufferTraceCollector};

@ -492,13 +491,13 @@ mod tests {
            format_explain(&querier_namespace, "EXPLAIN SELECT * FROM cpu").await,
            @r###"
        ---
-        - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-        - "| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                |"
-        - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-        - "| logical_plan  | TableScan: cpu projection=[foo, host, load, time]                                                                                                                                                                                                                                                                                                                   |"
-        - "| physical_plan | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/2/2/<uuid>.parquet, 1/1/1/3/<uuid>.parquet]]}, projection=[foo, host, load, time] |"
-        - "|               |                                                                                                                                                                                                                                                                                                                                                                     |"
-        - +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+        - "----------"
+        - "| plan_type    | plan    |"
+        - "----------"
+        - "| logical_plan    | TableScan: cpu projection=[foo, host, load, time]    |"
+        - "| physical_plan    | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000004.parquet]]}, projection=[foo, host, load, time]    |"
+        - "|    |    |"
+        - "----------"
        "###
        );

@ -509,22 +508,22 @@ mod tests {
            format_explain(&querier_namespace, "EXPLAIN SELECT * FROM mem ORDER BY host,time").await,
            @r###"
        ---
-        - +---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
-        - "| plan_type     | plan                                                                                                                                             |"
-        - +---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
-        - "| logical_plan  | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST                                                                                           |"
-        - "|               |   TableScan: mem projection=[host, perc, time]                                                                                                   |"
-        - "| physical_plan | SortExec: expr=[host@0 ASC NULLS LAST,time@2 ASC NULLS LAST]                                                                                     |"
-        - "|               |   CoalescePartitionsExec                                                                                                                         |"
-        - "|               |     UnionExec                                                                                                                                    |"
-        - "|               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                |"
-        - "|               |         FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8))                                                 |"
-        - "|               |           ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/<uuid>.parquet]]}, projection=[host, perc, time] |"
-        - "|               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                |"
-        - "|               |         FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8))                                                 |"
-        - "|               |           ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/<uuid>.parquet]]}, projection=[host, perc, time] |"
-        - "|               |                                                                                                                                                  |"
-        - +---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
+        - "----------"
+        - "| plan_type    | plan    |"
+        - "----------"
+        - "| logical_plan    | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST    |"
+        - "|    |   TableScan: mem projection=[host, perc, time]    |"
+        - "| physical_plan    | SortExec: expr=[host@0 ASC NULLS LAST,time@2 ASC NULLS LAST]    |"
+        - "|    |   CoalescePartitionsExec    |"
+        - "|    |     UnionExec    |"
+        - "|    |       CoalesceBatchesExec: target_batch_size=8192    |"
+        - "|    |         FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8))    |"
+        - "|    |           ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[host, perc, time]    |"
+        - "|    |       CoalesceBatchesExec: target_batch_size=8192    |"
+        - "|    |         FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8))    |"
+        - "|    |           ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[host, perc, time]    |"
+        - "|    |    |"
+        - "----------"
        "###
        );

@ -567,19 +566,19 @@ mod tests {
            format_explain(&querier_namespace, "EXPLAIN SELECT * FROM cpu").await,
            @r###"
        ---
-        - +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-        - "| plan_type     | plan                                                                                                                                                                                                                                                                                                            |"
-        - +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-        - "| logical_plan  | TableScan: cpu projection=[foo, host, load, time]                                                                                                                                                                                                                                                               |"
-        - "| physical_plan | UnionExec                                                                                                                                                                                                                                                                                                       |"
-        - "|               |   DeduplicateExec: [host@1 ASC,time@3 ASC]                                                                                                                                                                                                                                                                      |"
-        - "|               |     SortPreservingMergeExec: [host@1 ASC,time@3 ASC]                                                                                                                                                                                                                                                            |"
-        - "|               |       UnionExec                                                                                                                                                                                                                                                                                                 |"
-        - "|               |         ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/<uuid>.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time]                                                                                                                   |"
-        - "|               |         ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/<uuid>.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time]                                                                                                                   |"
-        - "|               |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/3/<uuid>.parquet]]}, projection=[foo, host, load, time] |"
-        - "|               |                                                                                                                                                                                                                                                                                                                 |"
-        - +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+        - "----------"
+        - "| plan_type    | plan    |"
+        - "----------"
+        - "| logical_plan    | TableScan: cpu projection=[foo, host, load, time]    |"
+        - "| physical_plan    | UnionExec    |"
+        - "|    |   DeduplicateExec: [host@1 ASC,time@3 ASC]    |"
+        - "|    |     SortPreservingMergeExec: [host@1 ASC,time@3 ASC]    |"
+        - "|    |       UnionExec    |"
+        - "|    |         ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time]    |"
+        - "|    |         ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time]    |"
+        - "|    |   ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000005.parquet]]}, projection=[foo, host, load, time]    |"
+        - "|    |    |"
+        - "----------"
        "###
        );
    }
@ -599,15 +598,11 @@ mod tests {

    async fn format_explain(querier_namespace: &Arc<QuerierNamespace>, sql: &str) -> Vec<String> {
        let results = run(querier_namespace, sql, None).await;
-        let formatted = arrow_util::display::pretty_format_batches(&results).unwrap();
-
-        let regex = Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
-            .expect("UUID regex");
-        formatted
-            .trim()
-            .split('\n')
-            .map(|s| regex.replace_all(s, "<uuid>").to_string())
-            .collect::<Vec<_>>()
+        let normalizer = Normalizer {
+            normalized_uuids: true,
+            ..Default::default()
+        };
+        normalizer.normalize_results(results)
    }

    async fn run(
--- a/test_helpers_end_to_end/Cargo.toml
+++ b/test_helpers_end_to_end/Cargo.toml
@ -26,7 +26,6 @@ once_cell = { version = "1.17", features = ["parking_lot"] }
 parking_lot = "0.12"
 prost = "0.11"
 rand = "0.8.3"
-regex = "1.7.0"
 reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
 snafu = "0.7"
 sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
@ -35,5 +34,4 @@ test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
 tokio = { version = "1.25", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
 tokio-util = "0.7"
 tonic = "0.8"
-uuid = "1"
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
--- a/test_helpers_end_to_end/src/lib.rs
+++ b/test_helpers_end_to_end/src/lib.rs
@ -13,7 +13,7 @@ mod grpc;
 mod mini_cluster;
 mod server_fixture;
 mod server_type;
-mod snapshot_comparison;
+pub mod snapshot_comparison;
 mod steps;
 mod udp_listener;

--- a/test_helpers_end_to_end/src/snapshot_comparison.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison.rs
@ -1,11 +1,9 @@
-mod normalization;
 mod queries;

-use crate::snapshot_comparison::queries::TestQueries;
-use crate::{run_influxql, run_sql, MiniCluster};
+use crate::{run_influxql, run_sql, snapshot_comparison::queries::TestQueries, MiniCluster};
 use snafu::{OptionExt, ResultExt, Snafu};
-use std::fmt::{Display, Formatter};
 use std::{
+    fmt::{Display, Formatter},
    fs,
    path::{Path, PathBuf},
 };
--- a/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
@ -1,199 +0,0 @@
-use arrow::record_batch::RecordBatch;
-use arrow_util::{display::pretty_format_batches, test_util::sort_record_batch};
-use once_cell::sync::Lazy;
-use regex::{Captures, Regex};
-use std::{borrow::Cow, collections::HashMap};
-use uuid::Uuid;
-
-/// Match the parquet UUID
-///
-/// For example, given
-/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
-///
-/// matches `1d325760-2b20-48de-ab48-2267b034133d`
-static REGEX_UUID: Lazy<Regex> = Lazy::new(|| {
-    Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").expect("UUID regex")
-});
-
-/// Match the parquet directory names
-/// For example, given
-/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
-///
-/// matches `32/51/216/13452`
-static REGEX_DIRS: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex"));
-
-/// Replace table row separators of flexible width with fixed with. This is required
-/// because the original timing values may differ in "printed width", so the table
-/// cells have different widths and hence the separators / borders. E.g.:
-///
-///   `+--+--+`     -> `----------`
-///   `+--+------+` -> `----------`
-///
-/// Note that we're kinda inexact with our regex here, but it gets the job done.
-static REGEX_LINESEP: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex"));
-
-/// Similar to the row separator issue above, the table columns are right-padded
-/// with spaces. Due to the different "printed width" of the timing values, we need
-/// to normalize this padding as well. E.g.:
-///
-///   `        |`  -> `    |`
-///   `         |` -> `    |`
-static REGEX_COL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex"));
-
-/// Matches line like `metrics=[foo=1, bar=2]`
-static REGEX_METRICS: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex"));
-
-/// Matches things like  `1s`, `1.2ms` and `10.2μs`
-static REGEX_TIMING: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex"));
-
-/// Matches things like `FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000`
-static REGEX_FILTER: Lazy<Regex> =
-    Lazy::new(|| Regex::new("FilterExec: .*").expect("filter regex"));
-
-fn normalize_for_variable_width(s: Cow<str>) -> String {
-    let s = REGEX_LINESEP.replace_all(&s, "----------");
-    REGEX_COL.replace_all(&s, "    |").to_string()
-}
-
-/// A query to run with optional annotations
-#[derive(Debug, PartialEq, Eq, Default)]
-pub struct Normalizer {
-    /// If true, results are sorted first
-    pub sorted_compare: bool,
-
-    /// If true, replace UUIDs with static placeholders.
-    pub normalized_uuids: bool,
-
-    /// If true, normalize timings in queries by replacing them with
-    /// static placeholders, for example:
-    ///
-    /// `1s`     -> `1.234ms`
-    pub normalized_metrics: bool,
-
-    /// if true, normalize filter predicates for explain plans
-    /// `FilterExec: <REDACTED>`
-    pub normalized_filters: bool,
-}
-
-impl Normalizer {
-    #[cfg(test)]
-    pub fn new() -> Self {
-        Default::default()
-    }
-
-    /// Take the output of running the query and apply the specified normalizations to them
-    pub fn normalize_results(&self, mut results: Vec<RecordBatch>) -> Vec<String> {
-        // compare against sorted results, if requested
-        if self.sorted_compare && !results.is_empty() {
-            let schema = results[0].schema();
-            let batch =
-                arrow::compute::concat_batches(&schema, &results).expect("concatenating batches");
-            results = vec![sort_record_batch(batch)];
-        }
-
-        let mut current_results = pretty_format_batches(&results)
-            .unwrap()
-            .trim()
-            .lines()
-            .map(|s| s.to_string())
-            .collect::<Vec<_>>();
-
-        // normalize UUIDs, if requested
-        if self.normalized_uuids {
-            let mut seen: HashMap<String, u128> = HashMap::new();
-            current_results = current_results
-                .into_iter()
-                .map(|s| {
-                    // Rewrite  parquet directory names like
-                    // `51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
-                    //
-                    // to:
-                    // 1/1/1/1/00000000-0000-0000-0000-000000000000.parquet
-
-                    let s = REGEX_UUID.replace_all(&s, |s: &Captures| {
-                        let next = seen.len() as u128;
-                        Uuid::from_u128(
-                            *seen
-                                .entry(s.get(0).unwrap().as_str().to_owned())
-                                .or_insert(next),
-                        )
-                        .to_string()
-                    });
-
-                    let s = normalize_for_variable_width(s);
-                    REGEX_DIRS.replace_all(&s, "1/1/1/1").to_string()
-                })
-                .collect();
-        }
-
-        // normalize metrics, if requested
-        if self.normalized_metrics {
-            current_results = current_results
-                .into_iter()
-                .map(|s| {
-                    // Replace timings with fixed value, e.g.:
-                    //
-                    //   `1s`     -> `1.234ms`
-                    //   `1.2ms`  -> `1.234ms`
-                    //   `10.2μs` -> `1.234ms`
-                    let s = REGEX_TIMING.replace_all(&s, "1.234ms");
-
-                    let s = normalize_for_variable_width(s);
-
-                    // Metrics are currently ordered by value (not by key), so different timings may
-                    // reorder them. We "parse" the list and normalize the sorting. E.g.:
-                    //
-                    // `metrics=[]`             => `metrics=[]`
-                    // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
-                    // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
-                    REGEX_METRICS
-                        .replace_all(&s, |c: &Captures| {
-                            let mut metrics: Vec<_> = c[1].split(", ").collect();
-                            metrics.sort();
-                            format!("metrics=[{}]", metrics.join(", "))
-                        })
-                        .to_string()
-                })
-                .collect();
-        }
-
-        // normalize Filters, if requested
-        //
-        // Converts:
-        // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
-        //
-        // to
-        // FilterExec: <REDACTED>
-        if self.normalized_filters {
-            current_results = current_results
-                .into_iter()
-                .map(|s| {
-                    REGEX_FILTER
-                        .replace_all(&s, |_: &Captures| "FilterExec: <REDACTED>")
-                        .to_string()
-                })
-                .collect();
-        }
-
-        current_results
-    }
-
-    /// Adds information on what normalizations were applied to the input
-    pub fn add_description(&self, output: &mut Vec<String>) {
-        if self.sorted_compare {
-            output.push("-- Results After Sorting".into())
-        }
-        if self.normalized_uuids {
-            output.push("-- Results After Normalizing UUIDs".into())
-        }
-        if self.normalized_metrics {
-            output.push("-- Results After Normalizing Metrics".into())
-        }
-        if self.normalized_filters {
-            output.push("-- Results After Normalizing Filters".into())
-        }
-    }
-}
--- a/test_helpers_end_to_end/src/snapshot_comparison/queries.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison/queries.rs
@ -1,6 +1,5 @@
 use arrow::record_batch::RecordBatch;
-
-use super::normalization::Normalizer;
+use arrow_util::test_util::Normalizer;

 /// A query to run with optional annotations
 #[derive(Debug, PartialEq, Eq, Default)]