From f1d0568272ec32f0deab68094e7a4eda204af924 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 3 Feb 2023 19:17:21 +0100 Subject: [PATCH] refactor: Break snapshot_comparison into smaller modules (#6846) Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- .../src/snapshot_comparison.rs | 449 +----------------- .../src/snapshot_comparison/normalization.rs | 114 +++++ .../src/snapshot_comparison/queries.rs | 329 +++++++++++++ 3 files changed, 452 insertions(+), 440 deletions(-) create mode 100644 test_helpers_end_to_end/src/snapshot_comparison/normalization.rs create mode 100644 test_helpers_end_to_end/src/snapshot_comparison/queries.rs diff --git a/test_helpers_end_to_end/src/snapshot_comparison.rs b/test_helpers_end_to_end/src/snapshot_comparison.rs index 642efce6a9..51e4ad2809 100644 --- a/test_helpers_end_to_end/src/snapshot_comparison.rs +++ b/test_helpers_end_to_end/src/snapshot_comparison.rs @@ -1,16 +1,18 @@ +mod normalization; +mod queries; + +use crate::snapshot_comparison::queries::TestQueries; use crate::{run_influxql, run_sql, MiniCluster}; use arrow_util::{display::pretty_format_batches, test_util::sort_record_batch}; -use once_cell::sync::Lazy; -use regex::{Captures, Regex}; use snafu::{OptionExt, ResultExt, Snafu}; use std::fmt::{Display, Formatter}; use std::{ - borrow::Cow, - collections::HashMap, fs, path::{Path, PathBuf}, }; -use uuid::Uuid; + +use self::normalization::normalize_results; +use self::queries::Query; #[derive(Debug, Snafu)] pub enum Error { @@ -224,29 +226,6 @@ fn make_absolute(path: &Path) -> PathBuf { absolute } -/// Replace table row separators of flexible width with fixed with. This is required -/// because the original timing values may differ in "printed width", so the table -/// cells have different widths and hence the separators / borders. E.g.: -/// -/// `+--+--+` -> `----------` -/// `+--+------+` -> `----------` -/// -/// Note that we're kinda inexact with our regex here, but it gets the job done. -static REGEX_LINESEP: Lazy = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex")); - -/// Similar to the row separator issue above, the table columns are right-padded -/// with spaces. Due to the different "printed width" of the timing values, we need -/// to normalize this padding as well. E.g.: -/// -/// ` |` -> ` |` -/// ` |` -> ` |` -static REGEX_COL: Lazy = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex")); - -fn normalize_for_variable_width(s: Cow) -> String { - let s = REGEX_LINESEP.replace_all(&s, "----------"); - REGEX_COL.replace_all(&s, " |").to_string() -} - async fn run_query( cluster: &MiniCluster, query: &Query, @@ -281,424 +260,14 @@ async fn run_query( results = vec![sort_record_batch(batch)]; } - let mut current_results = pretty_format_batches(&results) + let current_results = pretty_format_batches(&results) .unwrap() .trim() .lines() .map(|s| s.to_string()) .collect::>(); - // normalize UUIDs, if requested - if query.normalized_uuids() { - let regex_uuid = Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") - .expect("UUID regex"); - let regex_dirs = Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex"); - - let mut seen: HashMap = HashMap::new(); - current_results = current_results - .into_iter() - .map(|s| { - let s = regex_uuid.replace_all(&s, |s: &Captures| { - let next = seen.len() as u128; - Uuid::from_u128( - *seen - .entry(s.get(0).unwrap().as_str().to_owned()) - .or_insert(next), - ) - .to_string() - }); - - let s = normalize_for_variable_width(s); - - regex_dirs.replace_all(&s, "1/1/1/1").to_string() - }) - .collect(); - } - - // normalize metrics, if requested - if query.normalized_metrics() { - // Parse regex once and apply to all rows. See description around the `replace...` calls on - // why/how the regexes are used. - let regex_metrics = Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex"); - let regex_timing = Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex"); - - current_results = current_results - .into_iter() - .map(|s| { - // Replace timings with fixed value, e.g.: - // - // `1s` -> `1.234ms` - // `1.2ms` -> `1.234ms` - // `10.2μs` -> `1.234ms` - let s = regex_timing.replace_all(&s, "1.234ms"); - - let s = normalize_for_variable_width(s); - - // Metrics are currently ordered by value (not by key), so different timings may - // reorder them. We "parse" the list and normalize the sorting. E.g.: - // - // `metrics=[]` => `metrics=[]` - // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]` - // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]` - regex_metrics - .replace_all(&s, |c: &Captures| { - let mut metrics: Vec<_> = c[1].split(", ").collect(); - metrics.sort(); - format!("metrics=[{}]", metrics.join(", ")) - }) - .to_string() - }) - .collect(); - } - - // normalize Filters, if requested - // - // Converts: - // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000 - // - // to - // FilterExec: - if query.normalized_filters() { - let filter_regex = Regex::new("FilterExec: .*").expect("filter regex"); - current_results = current_results - .into_iter() - .map(|s| { - filter_regex - .replace_all(&s, |_: &Captures| "FilterExec: ") - .to_string() - }) - .collect(); - } + let current_results = normalize_results(query, current_results); Ok(current_results) } - -/// A query to run with optional annotations -#[derive(Debug, PartialEq, Eq, Default)] -pub struct Query { - /// If true, results are sorted first prior to comparison, meaning that differences in the - /// output order compared with expected order do not cause a diff - sorted_compare: bool, - - /// If true, replace UUIDs with static placeholders. - normalized_uuids: bool, - - /// If true, normalize timings in queries by replacing them with - /// static placeholders, for example: - /// - /// `1s` -> `1.234ms` - normalized_metrics: bool, - - /// if true, normalize filter predicates for explain plans - /// `FilterExec: ` - normalized_filters: bool, - - /// The query string - text: String, -} - -impl Query { - #[cfg(test)] - fn new(text: impl Into) -> Self { - let text = text.into(); - Self { - sorted_compare: false, - normalized_uuids: false, - normalized_metrics: false, - normalized_filters: false, - text, - } - } - - #[cfg(test)] - fn with_sorted_compare(mut self) -> Self { - self.sorted_compare = true; - self - } - - /// Get a reference to the query text. - pub fn text(&self) -> &str { - self.text.as_ref() - } - - /// Get the query's sorted compare. - pub fn sorted_compare(&self) -> bool { - self.sorted_compare - } - - /// Get queries normalized UUID - pub fn normalized_uuids(&self) -> bool { - self.normalized_uuids - } - - /// Use normalized timing values - pub fn normalized_metrics(&self) -> bool { - self.normalized_metrics - } - - /// Use normalized filter plans - pub fn normalized_filters(&self) -> bool { - self.normalized_filters - } -} - -#[derive(Debug, Default)] -struct QueryBuilder { - query: Query, -} - -impl QueryBuilder { - fn new() -> Self { - Default::default() - } - - fn push_str(&mut self, s: &str) { - self.query.text.push_str(s) - } - - fn push(&mut self, c: char) { - self.query.text.push(c) - } - - fn sorted_compare(&mut self) { - self.query.sorted_compare = true; - } - - fn normalized_uuids(&mut self) { - self.query.normalized_uuids = true; - } - - fn normalize_metrics(&mut self) { - self.query.normalized_metrics = true; - } - - fn normalize_filters(&mut self) { - self.query.normalized_filters = true; - } - - fn is_empty(&self) -> bool { - self.query.text.is_empty() - } - - /// Creates a Query and resets this builder to default - fn build_and_reset(&mut self) -> Option { - (!self.is_empty()).then(|| std::mem::take(&mut self.query)) - } -} - -/// Poor man's parser to find all the SQL queries in an input file -#[derive(Debug, PartialEq, Eq)] -pub struct TestQueries { - queries: Vec, -} - -impl TestQueries { - /// find all queries (more or less a fancy split on `;` - pub fn from_lines(lines: I) -> Self - where - I: IntoIterator, - S: AsRef, - { - let mut queries = vec![]; - let mut builder = QueryBuilder::new(); - - lines.into_iter().for_each(|line| { - let line = line.as_ref().trim(); - const COMPARE_STR: &str = "-- IOX_COMPARE: "; - if line.starts_with(COMPARE_STR) { - let (_, options) = line.split_at(COMPARE_STR.len()); - for option in options.split(',') { - let option = option.trim(); - match option { - "sorted" => { - builder.sorted_compare(); - } - "uuid" => { - builder.normalized_uuids(); - } - "metrics" => { - builder.normalize_metrics(); - } - "filters" => { - builder.normalize_filters(); - } - _ => {} - } - } - } - - if line.starts_with("--") { - return; - } - if line.is_empty() { - return; - } - - // replace newlines - if !builder.is_empty() { - builder.push(' '); - } - builder.push_str(line); - - // declare queries when we see a semicolon at the end of the line - if line.ends_with(';') { - if let Some(q) = builder.build_and_reset() { - queries.push(q); - } - } - }); - - if let Some(q) = builder.build_and_reset() { - queries.push(q); - } - - Self { queries } - } - - // Get an iterator over the queries - pub fn iter(&self) -> impl Iterator { - self.queries.iter() - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_parse_queries() { - let input = r#" --- This is a test -select * from foo; --- another comment - -select * from bar; --- This query has been commented out and should not be seen --- select * from baz; -"#; - let queries = TestQueries::from_lines(input.split('\n')); - assert_eq!( - queries, - TestQueries { - queries: vec![ - Query::new("select * from foo;"), - Query::new("select * from bar;"), - ] - } - ) - } - - #[test] - fn test_parse_queries_no_ending_semi() { - let input = r#" -select * from foo; --- no ending semi colon -select * from bar -"#; - let queries = TestQueries::from_lines(input.split('\n')); - assert_eq!( - queries, - TestQueries { - queries: vec![ - Query::new("select * from foo;"), - Query::new("select * from bar") - ] - } - ) - } - - #[test] - fn test_parse_queries_mulit_line() { - let input = r#" -select - * -from - foo; - -select * from bar; - -"#; - let queries = TestQueries::from_lines(input.split('\n')); - assert_eq!( - queries, - TestQueries { - queries: vec![ - Query::new("select * from foo;"), - Query::new("select * from bar;"), - ] - } - ) - } - - #[test] - fn test_parse_queries_empty() { - let input = r#" --- This is a test --- another comment -"#; - let queries = TestQueries::from_lines(input.split('\n')); - assert_eq!(queries, TestQueries { queries: vec![] }) - } - - #[test] - fn test_parse_queries_sorted_compare() { - let input = r#" -select * from foo; - --- The second query should be compared to expected after sorting --- IOX_COMPARE: sorted -select * from bar; - --- Since this query is not annotated, it should not use exected sorted -select * from baz; -select * from baz2; - --- IOX_COMPARE: sorted -select * from waz; --- (But the compare should work subsequently) -"#; - let queries = TestQueries::from_lines(input.split('\n')); - assert_eq!( - queries, - TestQueries { - queries: vec![ - Query::new("select * from foo;"), - Query::new("select * from bar;").with_sorted_compare(), - Query::new("select * from baz;"), - Query::new("select * from baz2;"), - Query::new("select * from waz;").with_sorted_compare(), - ] - } - ) - } - - #[test] - fn test_parse_queries_sorted_compare_after() { - let input = r#" -select * from foo; --- IOX_COMPARE: sorted -"#; - let queries = TestQueries::from_lines(input.split('\n')); - assert_eq!( - queries, - TestQueries { - queries: vec![Query::new("select * from foo;")] - } - ) - } - - #[test] - fn test_parse_queries_sorted_compare_not_match_ignored() { - let input = r#" --- IOX_COMPARE: something_else -select * from foo; -"#; - let queries = TestQueries::from_lines(input.split('\n')); - assert_eq!( - queries, - TestQueries { - queries: vec![Query::new("select * from foo;")] - } - ) - } -} diff --git a/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs b/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs new file mode 100644 index 0000000000..39ac696c58 --- /dev/null +++ b/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs @@ -0,0 +1,114 @@ +use crate::snapshot_comparison::queries::Query; +use once_cell::sync::Lazy; +use regex::{Captures, Regex}; +use std::{borrow::Cow, collections::HashMap}; +use uuid::Uuid; + +/// Replace table row separators of flexible width with fixed with. This is required +/// because the original timing values may differ in "printed width", so the table +/// cells have different widths and hence the separators / borders. E.g.: +/// +/// `+--+--+` -> `----------` +/// `+--+------+` -> `----------` +/// +/// Note that we're kinda inexact with our regex here, but it gets the job done. +static REGEX_LINESEP: Lazy = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex")); + +/// Similar to the row separator issue above, the table columns are right-padded +/// with spaces. Due to the different "printed width" of the timing values, we need +/// to normalize this padding as well. E.g.: +/// +/// ` |` -> ` |` +/// ` |` -> ` |` +static REGEX_COL: Lazy = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex")); + +fn normalize_for_variable_width(s: Cow) -> String { + let s = REGEX_LINESEP.replace_all(&s, "----------"); + REGEX_COL.replace_all(&s, " |").to_string() +} + +pub(crate) fn normalize_results(query: &Query, mut current_results: Vec) -> Vec { + // normalize UUIDs, if requested + if query.normalized_uuids() { + let regex_uuid = Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") + .expect("UUID regex"); + let regex_dirs = Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex"); + + let mut seen: HashMap = HashMap::new(); + current_results = current_results + .into_iter() + .map(|s| { + let s = regex_uuid.replace_all(&s, |s: &Captures| { + let next = seen.len() as u128; + Uuid::from_u128( + *seen + .entry(s.get(0).unwrap().as_str().to_owned()) + .or_insert(next), + ) + .to_string() + }); + + let s = normalize_for_variable_width(s); + + regex_dirs.replace_all(&s, "1/1/1/1").to_string() + }) + .collect(); + } + + // normalize metrics, if requested + if query.normalized_metrics() { + // Parse regex once and apply to all rows. See description around the `replace...` calls on + // why/how the regexes are used. + let regex_metrics = Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex"); + let regex_timing = Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex"); + + current_results = current_results + .into_iter() + .map(|s| { + // Replace timings with fixed value, e.g.: + // + // `1s` -> `1.234ms` + // `1.2ms` -> `1.234ms` + // `10.2μs` -> `1.234ms` + let s = regex_timing.replace_all(&s, "1.234ms"); + + let s = normalize_for_variable_width(s); + + // Metrics are currently ordered by value (not by key), so different timings may + // reorder them. We "parse" the list and normalize the sorting. E.g.: + // + // `metrics=[]` => `metrics=[]` + // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]` + // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]` + regex_metrics + .replace_all(&s, |c: &Captures| { + let mut metrics: Vec<_> = c[1].split(", ").collect(); + metrics.sort(); + format!("metrics=[{}]", metrics.join(", ")) + }) + .to_string() + }) + .collect(); + } + + // normalize Filters, if requested + // + // Converts: + // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000 + // + // to + // FilterExec: + if query.normalized_filters() { + let filter_regex = Regex::new("FilterExec: .*").expect("filter regex"); + current_results = current_results + .into_iter() + .map(|s| { + filter_regex + .replace_all(&s, |_: &Captures| "FilterExec: ") + .to_string() + }) + .collect(); + } + + current_results +} diff --git a/test_helpers_end_to_end/src/snapshot_comparison/queries.rs b/test_helpers_end_to_end/src/snapshot_comparison/queries.rs new file mode 100644 index 0000000000..3c76195aba --- /dev/null +++ b/test_helpers_end_to_end/src/snapshot_comparison/queries.rs @@ -0,0 +1,329 @@ +/// A query to run with optional annotations +#[derive(Debug, PartialEq, Eq, Default)] +pub struct Query { + /// If true, results are sorted first prior to comparison, meaning that differences in the + /// output order compared with expected order do not cause a diff + sorted_compare: bool, + + /// If true, replace UUIDs with static placeholders. + normalized_uuids: bool, + + /// If true, normalize timings in queries by replacing them with + /// static placeholders, for example: + /// + /// `1s` -> `1.234ms` + normalized_metrics: bool, + + /// if true, normalize filter predicates for explain plans + /// `FilterExec: ` + normalized_filters: bool, + + /// The query string + text: String, +} + +impl Query { + #[cfg(test)] + fn new(text: impl Into) -> Self { + let text = text.into(); + Self { + sorted_compare: false, + normalized_uuids: false, + normalized_metrics: false, + normalized_filters: false, + text, + } + } + + #[cfg(test)] + fn with_sorted_compare(mut self) -> Self { + self.sorted_compare = true; + self + } + + /// Get a reference to the query text. + pub fn text(&self) -> &str { + self.text.as_ref() + } + + /// Get the query's sorted compare. + pub fn sorted_compare(&self) -> bool { + self.sorted_compare + } + + /// Get queries normalized UUID + pub fn normalized_uuids(&self) -> bool { + self.normalized_uuids + } + + /// Use normalized timing values + pub fn normalized_metrics(&self) -> bool { + self.normalized_metrics + } + + /// Use normalized filter plans + pub fn normalized_filters(&self) -> bool { + self.normalized_filters + } +} + +#[derive(Debug, Default)] +struct QueryBuilder { + query: Query, +} + +impl QueryBuilder { + fn new() -> Self { + Default::default() + } + + fn push_str(&mut self, s: &str) { + self.query.text.push_str(s) + } + + fn push(&mut self, c: char) { + self.query.text.push(c) + } + + fn sorted_compare(&mut self) { + self.query.sorted_compare = true; + } + + fn normalized_uuids(&mut self) { + self.query.normalized_uuids = true; + } + + fn normalize_metrics(&mut self) { + self.query.normalized_metrics = true; + } + + fn normalize_filters(&mut self) { + self.query.normalized_filters = true; + } + + fn is_empty(&self) -> bool { + self.query.text.is_empty() + } + + /// Creates a Query and resets this builder to default + fn build_and_reset(&mut self) -> Option { + (!self.is_empty()).then(|| std::mem::take(&mut self.query)) + } +} + +/// Poor man's parser to find all the SQL queries in an input file +#[derive(Debug, PartialEq, Eq)] +pub struct TestQueries { + queries: Vec, +} + +impl TestQueries { + /// find all queries (more or less a fancy split on `;` + pub fn from_lines(lines: I) -> Self + where + I: IntoIterator, + S: AsRef, + { + let mut queries = vec![]; + let mut builder = QueryBuilder::new(); + + lines.into_iter().for_each(|line| { + let line = line.as_ref().trim(); + const COMPARE_STR: &str = "-- IOX_COMPARE: "; + if line.starts_with(COMPARE_STR) { + let (_, options) = line.split_at(COMPARE_STR.len()); + for option in options.split(',') { + let option = option.trim(); + match option { + "sorted" => { + builder.sorted_compare(); + } + "uuid" => { + builder.normalized_uuids(); + } + "metrics" => { + builder.normalize_metrics(); + } + "filters" => { + builder.normalize_filters(); + } + _ => {} + } + } + } + + if line.starts_with("--") { + return; + } + if line.is_empty() { + return; + } + + // replace newlines + if !builder.is_empty() { + builder.push(' '); + } + builder.push_str(line); + + // declare queries when we see a semicolon at the end of the line + if line.ends_with(';') { + if let Some(q) = builder.build_and_reset() { + queries.push(q); + } + } + }); + + if let Some(q) = builder.build_and_reset() { + queries.push(q); + } + + Self { queries } + } + + // Get an iterator over the queries + pub fn iter(&self) -> impl Iterator { + self.queries.iter() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_parse_queries() { + let input = r#" +-- This is a test +select * from foo; +-- another comment + +select * from bar; +-- This query has been commented out and should not be seen +-- select * from baz; +"#; + let queries = TestQueries::from_lines(input.split('\n')); + assert_eq!( + queries, + TestQueries { + queries: vec![ + Query::new("select * from foo;"), + Query::new("select * from bar;"), + ] + } + ) + } + + #[test] + fn test_parse_queries_no_ending_semi() { + let input = r#" +select * from foo; +-- no ending semi colon +select * from bar +"#; + let queries = TestQueries::from_lines(input.split('\n')); + assert_eq!( + queries, + TestQueries { + queries: vec![ + Query::new("select * from foo;"), + Query::new("select * from bar") + ] + } + ) + } + + #[test] + fn test_parse_queries_mulit_line() { + let input = r#" +select + * +from + foo; + +select * from bar; + +"#; + let queries = TestQueries::from_lines(input.split('\n')); + assert_eq!( + queries, + TestQueries { + queries: vec![ + Query::new("select * from foo;"), + Query::new("select * from bar;"), + ] + } + ) + } + + #[test] + fn test_parse_queries_empty() { + let input = r#" +-- This is a test +-- another comment +"#; + let queries = TestQueries::from_lines(input.split('\n')); + assert_eq!(queries, TestQueries { queries: vec![] }) + } + + #[test] + fn test_parse_queries_sorted_compare() { + let input = r#" +select * from foo; + +-- The second query should be compared to expected after sorting +-- IOX_COMPARE: sorted +select * from bar; + +-- Since this query is not annotated, it should not use exected sorted +select * from baz; +select * from baz2; + +-- IOX_COMPARE: sorted +select * from waz; +-- (But the compare should work subsequently) +"#; + let queries = TestQueries::from_lines(input.split('\n')); + assert_eq!( + queries, + TestQueries { + queries: vec![ + Query::new("select * from foo;"), + Query::new("select * from bar;").with_sorted_compare(), + Query::new("select * from baz;"), + Query::new("select * from baz2;"), + Query::new("select * from waz;").with_sorted_compare(), + ] + } + ) + } + + #[test] + fn test_parse_queries_sorted_compare_after() { + let input = r#" +select * from foo; +-- IOX_COMPARE: sorted +"#; + let queries = TestQueries::from_lines(input.split('\n')); + assert_eq!( + queries, + TestQueries { + queries: vec![Query::new("select * from foo;")] + } + ) + } + + #[test] + fn test_parse_queries_sorted_compare_not_match_ignored() { + let input = r#" +-- IOX_COMPARE: something_else +select * from foo; +"#; + let queries = TestQueries::from_lines(input.split('\n')); + assert_eq!( + queries, + TestQueries { + queries: vec![Query::new("select * from foo;")] + } + ) + } +}