From f1d0568272ec32f0deab68094e7a4eda204af924 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <alamb@influxdata.com>
Date: Fri, 3 Feb 2023 19:17:21 +0100
Subject: [PATCH] refactor: Break snapshot_comparison into smaller modules
 (#6846)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 .../src/snapshot_comparison.rs                | 449 +-----------------
 .../src/snapshot_comparison/normalization.rs  | 114 +++++
 .../src/snapshot_comparison/queries.rs        | 329 +++++++++++++
 3 files changed, 452 insertions(+), 440 deletions(-)
 create mode 100644 test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
 create mode 100644 test_helpers_end_to_end/src/snapshot_comparison/queries.rs
diff --git a/test_helpers_end_to_end/src/snapshot_comparison.rs b/test_helpers_end_to_end/src/snapshot_comparison.rs
index 642efce6a9..51e4ad2809 100644
--- a/test_helpers_end_to_end/src/snapshot_comparison.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison.rs
@@ -1,16 +1,18 @@
+mod normalization;
+mod queries;
+
+use crate::snapshot_comparison::queries::TestQueries;
 use crate::{run_influxql, run_sql, MiniCluster};
 use arrow_util::{display::pretty_format_batches, test_util::sort_record_batch};
-use once_cell::sync::Lazy;
-use regex::{Captures, Regex};
 use snafu::{OptionExt, ResultExt, Snafu};
 use std::fmt::{Display, Formatter};
 use std::{
-    borrow::Cow,
-    collections::HashMap,
     fs,
     path::{Path, PathBuf},
 };
-use uuid::Uuid;
+
+use self::normalization::normalize_results;
+use self::queries::Query;
 
 #[derive(Debug, Snafu)]
 pub enum Error {
@@ -224,29 +226,6 @@ fn make_absolute(path: &Path) -> PathBuf {
     absolute
 }
 
-/// Replace table row separators of flexible width with fixed with. This is required
-/// because the original timing values may differ in "printed width", so the table
-/// cells have different widths and hence the separators / borders. E.g.:
-///
-///   `+--+--+`     -> `----------`
-///   `+--+------+` -> `----------`
-///
-/// Note that we're kinda inexact with our regex here, but it gets the job done.
-static REGEX_LINESEP: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex"));
-
-/// Similar to the row separator issue above, the table columns are right-padded
-/// with spaces. Due to the different "printed width" of the timing values, we need
-/// to normalize this padding as well. E.g.:
-///
-///   `        |`  -> `    |`
-///   `         |` -> `    |`
-static REGEX_COL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex"));
-
-fn normalize_for_variable_width(s: Cow<str>) -> String {
-    let s = REGEX_LINESEP.replace_all(&s, "----------");
-    REGEX_COL.replace_all(&s, "    |").to_string()
-}
-
 async fn run_query(
     cluster: &MiniCluster,
     query: &Query,
@@ -281,424 +260,14 @@ async fn run_query(
         results = vec![sort_record_batch(batch)];
     }
 
-    let mut current_results = pretty_format_batches(&results)
+    let current_results = pretty_format_batches(&results)
         .unwrap()
         .trim()
         .lines()
         .map(|s| s.to_string())
         .collect::<Vec<_>>();
 
-    // normalize UUIDs, if requested
-    if query.normalized_uuids() {
-        let regex_uuid = Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
-            .expect("UUID regex");
-        let regex_dirs = Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex");
-
-        let mut seen: HashMap<String, u128> = HashMap::new();
-        current_results = current_results
-            .into_iter()
-            .map(|s| {
-                let s = regex_uuid.replace_all(&s, |s: &Captures| {
-                    let next = seen.len() as u128;
-                    Uuid::from_u128(
-                        *seen
-                            .entry(s.get(0).unwrap().as_str().to_owned())
-                            .or_insert(next),
-                    )
-                    .to_string()
-                });
-
-                let s = normalize_for_variable_width(s);
-
-                regex_dirs.replace_all(&s, "1/1/1/1").to_string()
-            })
-            .collect();
-    }
-
-    // normalize metrics, if requested
-    if query.normalized_metrics() {
-        // Parse regex once and apply to all rows. See description around the `replace...` calls on
-        // why/how the regexes are used.
-        let regex_metrics = Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex");
-        let regex_timing = Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex");
-
-        current_results = current_results
-            .into_iter()
-            .map(|s| {
-                // Replace timings with fixed value, e.g.:
-                //
-                //   `1s`     -> `1.234ms`
-                //   `1.2ms`  -> `1.234ms`
-                //   `10.2μs` -> `1.234ms`
-                let s = regex_timing.replace_all(&s, "1.234ms");
-
-                let s = normalize_for_variable_width(s);
-
-                // Metrics are currently ordered by value (not by key), so different timings may
-                // reorder them. We "parse" the list and normalize the sorting. E.g.:
-                //
-                // `metrics=[]`             => `metrics=[]`
-                // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
-                // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
-                regex_metrics
-                    .replace_all(&s, |c: &Captures| {
-                        let mut metrics: Vec<_> = c[1].split(", ").collect();
-                        metrics.sort();
-                        format!("metrics=[{}]", metrics.join(", "))
-                    })
-                    .to_string()
-            })
-            .collect();
-    }
-
-    // normalize Filters, if requested
-    //
-    // Converts:
-    // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
-    //
-    // to
-    // FilterExec: <REDACTED>
-    if query.normalized_filters() {
-        let filter_regex = Regex::new("FilterExec: .*").expect("filter regex");
-        current_results = current_results
-            .into_iter()
-            .map(|s| {
-                filter_regex
-                    .replace_all(&s, |_: &Captures| "FilterExec: <REDACTED>")
-                    .to_string()
-            })
-            .collect();
-    }
+    let current_results = normalize_results(query, current_results);
 
     Ok(current_results)
 }
-
-/// A query to run with optional annotations
-#[derive(Debug, PartialEq, Eq, Default)]
-pub struct Query {
-    /// If true, results are sorted first prior to comparison, meaning that differences in the
-    /// output order compared with expected order do not cause a diff
-    sorted_compare: bool,
-
-    /// If true, replace UUIDs with static placeholders.
-    normalized_uuids: bool,
-
-    /// If true, normalize timings in queries by replacing them with
-    /// static placeholders, for example:
-    ///
-    /// `1s`     -> `1.234ms`
-    normalized_metrics: bool,
-
-    /// if true, normalize filter predicates for explain plans
-    /// `FilterExec: <REDACTED>`
-    normalized_filters: bool,
-
-    /// The query string
-    text: String,
-}
-
-impl Query {
-    #[cfg(test)]
-    fn new(text: impl Into<String>) -> Self {
-        let text = text.into();
-        Self {
-            sorted_compare: false,
-            normalized_uuids: false,
-            normalized_metrics: false,
-            normalized_filters: false,
-            text,
-        }
-    }
-
-    #[cfg(test)]
-    fn with_sorted_compare(mut self) -> Self {
-        self.sorted_compare = true;
-        self
-    }
-
-    /// Get a reference to the query text.
-    pub fn text(&self) -> &str {
-        self.text.as_ref()
-    }
-
-    /// Get the query's sorted compare.
-    pub fn sorted_compare(&self) -> bool {
-        self.sorted_compare
-    }
-
-    /// Get queries normalized UUID
-    pub fn normalized_uuids(&self) -> bool {
-        self.normalized_uuids
-    }
-
-    /// Use normalized timing values
-    pub fn normalized_metrics(&self) -> bool {
-        self.normalized_metrics
-    }
-
-    /// Use normalized filter plans
-    pub fn normalized_filters(&self) -> bool {
-        self.normalized_filters
-    }
-}
-
-#[derive(Debug, Default)]
-struct QueryBuilder {
-    query: Query,
-}
-
-impl QueryBuilder {
-    fn new() -> Self {
-        Default::default()
-    }
-
-    fn push_str(&mut self, s: &str) {
-        self.query.text.push_str(s)
-    }
-
-    fn push(&mut self, c: char) {
-        self.query.text.push(c)
-    }
-
-    fn sorted_compare(&mut self) {
-        self.query.sorted_compare = true;
-    }
-
-    fn normalized_uuids(&mut self) {
-        self.query.normalized_uuids = true;
-    }
-
-    fn normalize_metrics(&mut self) {
-        self.query.normalized_metrics = true;
-    }
-
-    fn normalize_filters(&mut self) {
-        self.query.normalized_filters = true;
-    }
-
-    fn is_empty(&self) -> bool {
-        self.query.text.is_empty()
-    }
-
-    /// Creates a Query and resets this builder to default
-    fn build_and_reset(&mut self) -> Option<Query> {
-        (!self.is_empty()).then(|| std::mem::take(&mut self.query))
-    }
-}
-
-/// Poor man's parser to find all the SQL queries in an input file
-#[derive(Debug, PartialEq, Eq)]
-pub struct TestQueries {
-    queries: Vec<Query>,
-}
-
-impl TestQueries {
-    /// find all queries (more or less a fancy split on `;`
-    pub fn from_lines<I, S>(lines: I) -> Self
-    where
-        I: IntoIterator<Item = S>,
-        S: AsRef<str>,
-    {
-        let mut queries = vec![];
-        let mut builder = QueryBuilder::new();
-
-        lines.into_iter().for_each(|line| {
-            let line = line.as_ref().trim();
-            const COMPARE_STR: &str = "-- IOX_COMPARE: ";
-            if line.starts_with(COMPARE_STR) {
-                let (_, options) = line.split_at(COMPARE_STR.len());
-                for option in options.split(',') {
-                    let option = option.trim();
-                    match option {
-                        "sorted" => {
-                            builder.sorted_compare();
-                        }
-                        "uuid" => {
-                            builder.normalized_uuids();
-                        }
-                        "metrics" => {
-                            builder.normalize_metrics();
-                        }
-                        "filters" => {
-                            builder.normalize_filters();
-                        }
-                        _ => {}
-                    }
-                }
-            }
-
-            if line.starts_with("--") {
-                return;
-            }
-            if line.is_empty() {
-                return;
-            }
-
-            // replace newlines
-            if !builder.is_empty() {
-                builder.push(' ');
-            }
-            builder.push_str(line);
-
-            // declare queries when we see a semicolon at the end of the line
-            if line.ends_with(';') {
-                if let Some(q) = builder.build_and_reset() {
-                    queries.push(q);
-                }
-            }
-        });
-
-        if let Some(q) = builder.build_and_reset() {
-            queries.push(q);
-        }
-
-        Self { queries }
-    }
-
-    // Get an iterator over the queries
-    pub fn iter(&self) -> impl Iterator<Item = &Query> {
-        self.queries.iter()
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_parse_queries() {
-        let input = r#"
--- This is a test
-select * from foo;
--- another comment
-
-select * from bar;
--- This query has been commented out and should not be seen
--- select * from baz;
-"#;
-        let queries = TestQueries::from_lines(input.split('\n'));
-        assert_eq!(
-            queries,
-            TestQueries {
-                queries: vec![
-                    Query::new("select * from foo;"),
-                    Query::new("select * from bar;"),
-                ]
-            }
-        )
-    }
-
-    #[test]
-    fn test_parse_queries_no_ending_semi() {
-        let input = r#"
-select * from foo;
--- no ending semi colon
-select * from bar
-"#;
-        let queries = TestQueries::from_lines(input.split('\n'));
-        assert_eq!(
-            queries,
-            TestQueries {
-                queries: vec![
-                    Query::new("select * from foo;"),
-                    Query::new("select * from bar")
-                ]
-            }
-        )
-    }
-
-    #[test]
-    fn test_parse_queries_mulit_line() {
-        let input = r#"
-select
-  *
-from
-  foo;
-
-select * from bar;
-
-"#;
-        let queries = TestQueries::from_lines(input.split('\n'));
-        assert_eq!(
-            queries,
-            TestQueries {
-                queries: vec![
-                    Query::new("select * from foo;"),
-                    Query::new("select * from bar;"),
-                ]
-            }
-        )
-    }
-
-    #[test]
-    fn test_parse_queries_empty() {
-        let input = r#"
--- This is a test
--- another comment
-"#;
-        let queries = TestQueries::from_lines(input.split('\n'));
-        assert_eq!(queries, TestQueries { queries: vec![] })
-    }
-
-    #[test]
-    fn test_parse_queries_sorted_compare() {
-        let input = r#"
-select * from foo;
-
--- The second query should be compared to expected after sorting
--- IOX_COMPARE: sorted
-select * from bar;
-
--- Since this query is not annotated, it should not use exected sorted
-select * from baz;
-select * from baz2;
-
--- IOX_COMPARE: sorted
-select * from waz;
--- (But the compare should work subsequently)
-"#;
-        let queries = TestQueries::from_lines(input.split('\n'));
-        assert_eq!(
-            queries,
-            TestQueries {
-                queries: vec![
-                    Query::new("select * from foo;"),
-                    Query::new("select * from bar;").with_sorted_compare(),
-                    Query::new("select * from baz;"),
-                    Query::new("select * from baz2;"),
-                    Query::new("select * from waz;").with_sorted_compare(),
-                ]
-            }
-        )
-    }
-
-    #[test]
-    fn test_parse_queries_sorted_compare_after() {
-        let input = r#"
-select * from foo;
--- IOX_COMPARE: sorted
-"#;
-        let queries = TestQueries::from_lines(input.split('\n'));
-        assert_eq!(
-            queries,
-            TestQueries {
-                queries: vec![Query::new("select * from foo;")]
-            }
-        )
-    }
-
-    #[test]
-    fn test_parse_queries_sorted_compare_not_match_ignored() {
-        let input = r#"
--- IOX_COMPARE: something_else
-select * from foo;
-"#;
-        let queries = TestQueries::from_lines(input.split('\n'));
-        assert_eq!(
-            queries,
-            TestQueries {
-                queries: vec![Query::new("select * from foo;")]
-            }
-        )
-    }
-}
diff --git a/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs b/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
new file mode 100644
index 0000000000..39ac696c58
--- /dev/null
+++ b/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
@@ -0,0 +1,114 @@
+use crate::snapshot_comparison::queries::Query;
+use once_cell::sync::Lazy;
+use regex::{Captures, Regex};
+use std::{borrow::Cow, collections::HashMap};
+use uuid::Uuid;
+
+/// Replace table row separators of flexible width with fixed with. This is required
+/// because the original timing values may differ in "printed width", so the table
+/// cells have different widths and hence the separators / borders. E.g.:
+///
+///   `+--+--+`     -> `----------`
+///   `+--+------+` -> `----------`
+///
+/// Note that we're kinda inexact with our regex here, but it gets the job done.
+static REGEX_LINESEP: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex"));
+
+/// Similar to the row separator issue above, the table columns are right-padded
+/// with spaces. Due to the different "printed width" of the timing values, we need
+/// to normalize this padding as well. E.g.:
+///
+///   `        |`  -> `    |`
+///   `         |` -> `    |`
+static REGEX_COL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex"));
+
+fn normalize_for_variable_width(s: Cow<str>) -> String {
+    let s = REGEX_LINESEP.replace_all(&s, "----------");
+    REGEX_COL.replace_all(&s, "    |").to_string()
+}
+
+pub(crate) fn normalize_results(query: &Query, mut current_results: Vec<String>) -> Vec<String> {
+    // normalize UUIDs, if requested
+    if query.normalized_uuids() {
+        let regex_uuid = Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
+            .expect("UUID regex");
+        let regex_dirs = Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex");
+
+        let mut seen: HashMap<String, u128> = HashMap::new();
+        current_results = current_results
+            .into_iter()
+            .map(|s| {
+                let s = regex_uuid.replace_all(&s, |s: &Captures| {
+                    let next = seen.len() as u128;
+                    Uuid::from_u128(
+                        *seen
+                            .entry(s.get(0).unwrap().as_str().to_owned())
+                            .or_insert(next),
+                    )
+                    .to_string()
+                });
+
+                let s = normalize_for_variable_width(s);
+
+                regex_dirs.replace_all(&s, "1/1/1/1").to_string()
+            })
+            .collect();
+    }
+
+    // normalize metrics, if requested
+    if query.normalized_metrics() {
+        // Parse regex once and apply to all rows. See description around the `replace...` calls on
+        // why/how the regexes are used.
+        let regex_metrics = Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex");
+        let regex_timing = Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex");
+
+        current_results = current_results
+            .into_iter()
+            .map(|s| {
+                // Replace timings with fixed value, e.g.:
+                //
+                //   `1s`     -> `1.234ms`
+                //   `1.2ms`  -> `1.234ms`
+                //   `10.2μs` -> `1.234ms`
+                let s = regex_timing.replace_all(&s, "1.234ms");
+
+                let s = normalize_for_variable_width(s);
+
+                // Metrics are currently ordered by value (not by key), so different timings may
+                // reorder them. We "parse" the list and normalize the sorting. E.g.:
+                //
+                // `metrics=[]`             => `metrics=[]`
+                // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
+                // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
+                regex_metrics
+                    .replace_all(&s, |c: &Captures| {
+                        let mut metrics: Vec<_> = c[1].split(", ").collect();
+                        metrics.sort();
+                        format!("metrics=[{}]", metrics.join(", "))
+                    })
+                    .to_string()
+            })
+            .collect();
+    }
+
+    // normalize Filters, if requested
+    //
+    // Converts:
+    // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
+    //
+    // to
+    // FilterExec: <REDACTED>
+    if query.normalized_filters() {
+        let filter_regex = Regex::new("FilterExec: .*").expect("filter regex");
+        current_results = current_results
+            .into_iter()
+            .map(|s| {
+                filter_regex
+                    .replace_all(&s, |_: &Captures| "FilterExec: <REDACTED>")
+                    .to_string()
+            })
+            .collect();
+    }
+
+    current_results
+}
diff --git a/test_helpers_end_to_end/src/snapshot_comparison/queries.rs b/test_helpers_end_to_end/src/snapshot_comparison/queries.rs
new file mode 100644
index 0000000000..3c76195aba
--- /dev/null
+++ b/test_helpers_end_to_end/src/snapshot_comparison/queries.rs
@@ -0,0 +1,329 @@
+/// A query to run with optional annotations
+#[derive(Debug, PartialEq, Eq, Default)]
+pub struct Query {
+    /// If true, results are sorted first prior to comparison, meaning that differences in the
+    /// output order compared with expected order do not cause a diff
+    sorted_compare: bool,
+
+    /// If true, replace UUIDs with static placeholders.
+    normalized_uuids: bool,
+
+    /// If true, normalize timings in queries by replacing them with
+    /// static placeholders, for example:
+    ///
+    /// `1s`     -> `1.234ms`
+    normalized_metrics: bool,
+
+    /// if true, normalize filter predicates for explain plans
+    /// `FilterExec: <REDACTED>`
+    normalized_filters: bool,
+
+    /// The query string
+    text: String,
+}
+
+impl Query {
+    #[cfg(test)]
+    fn new(text: impl Into<String>) -> Self {
+        let text = text.into();
+        Self {
+            sorted_compare: false,
+            normalized_uuids: false,
+            normalized_metrics: false,
+            normalized_filters: false,
+            text,
+        }
+    }
+
+    #[cfg(test)]
+    fn with_sorted_compare(mut self) -> Self {
+        self.sorted_compare = true;
+        self
+    }
+
+    /// Get a reference to the query text.
+    pub fn text(&self) -> &str {
+        self.text.as_ref()
+    }
+
+    /// Get the query's sorted compare.
+    pub fn sorted_compare(&self) -> bool {
+        self.sorted_compare
+    }
+
+    /// Get queries normalized UUID
+    pub fn normalized_uuids(&self) -> bool {
+        self.normalized_uuids
+    }
+
+    /// Use normalized timing values
+    pub fn normalized_metrics(&self) -> bool {
+        self.normalized_metrics
+    }
+
+    /// Use normalized filter plans
+    pub fn normalized_filters(&self) -> bool {
+        self.normalized_filters
+    }
+}
+
+#[derive(Debug, Default)]
+struct QueryBuilder {
+    query: Query,
+}
+
+impl QueryBuilder {
+    fn new() -> Self {
+        Default::default()
+    }
+
+    fn push_str(&mut self, s: &str) {
+        self.query.text.push_str(s)
+    }
+
+    fn push(&mut self, c: char) {
+        self.query.text.push(c)
+    }
+
+    fn sorted_compare(&mut self) {
+        self.query.sorted_compare = true;
+    }
+
+    fn normalized_uuids(&mut self) {
+        self.query.normalized_uuids = true;
+    }
+
+    fn normalize_metrics(&mut self) {
+        self.query.normalized_metrics = true;
+    }
+
+    fn normalize_filters(&mut self) {
+        self.query.normalized_filters = true;
+    }
+
+    fn is_empty(&self) -> bool {
+        self.query.text.is_empty()
+    }
+
+    /// Creates a Query and resets this builder to default
+    fn build_and_reset(&mut self) -> Option<Query> {
+        (!self.is_empty()).then(|| std::mem::take(&mut self.query))
+    }
+}
+
+/// Poor man's parser to find all the SQL queries in an input file
+#[derive(Debug, PartialEq, Eq)]
+pub struct TestQueries {
+    queries: Vec<Query>,
+}
+
+impl TestQueries {
+    /// find all queries (more or less a fancy split on `;`
+    pub fn from_lines<I, S>(lines: I) -> Self
+    where
+        I: IntoIterator<Item = S>,
+        S: AsRef<str>,
+    {
+        let mut queries = vec![];
+        let mut builder = QueryBuilder::new();
+
+        lines.into_iter().for_each(|line| {
+            let line = line.as_ref().trim();
+            const COMPARE_STR: &str = "-- IOX_COMPARE: ";
+            if line.starts_with(COMPARE_STR) {
+                let (_, options) = line.split_at(COMPARE_STR.len());
+                for option in options.split(',') {
+                    let option = option.trim();
+                    match option {
+                        "sorted" => {
+                            builder.sorted_compare();
+                        }
+                        "uuid" => {
+                            builder.normalized_uuids();
+                        }
+                        "metrics" => {
+                            builder.normalize_metrics();
+                        }
+                        "filters" => {
+                            builder.normalize_filters();
+                        }
+                        _ => {}
+                    }
+                }
+            }
+
+            if line.starts_with("--") {
+                return;
+            }
+            if line.is_empty() {
+                return;
+            }
+
+            // replace newlines
+            if !builder.is_empty() {
+                builder.push(' ');
+            }
+            builder.push_str(line);
+
+            // declare queries when we see a semicolon at the end of the line
+            if line.ends_with(';') {
+                if let Some(q) = builder.build_and_reset() {
+                    queries.push(q);
+                }
+            }
+        });
+
+        if let Some(q) = builder.build_and_reset() {
+            queries.push(q);
+        }
+
+        Self { queries }
+    }
+
+    // Get an iterator over the queries
+    pub fn iter(&self) -> impl Iterator<Item = &Query> {
+        self.queries.iter()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_parse_queries() {
+        let input = r#"
+-- This is a test
+select * from foo;
+-- another comment
+
+select * from bar;
+-- This query has been commented out and should not be seen
+-- select * from baz;
+"#;
+        let queries = TestQueries::from_lines(input.split('\n'));
+        assert_eq!(
+            queries,
+            TestQueries {
+                queries: vec![
+                    Query::new("select * from foo;"),
+                    Query::new("select * from bar;"),
+                ]
+            }
+        )
+    }
+
+    #[test]
+    fn test_parse_queries_no_ending_semi() {
+        let input = r#"
+select * from foo;
+-- no ending semi colon
+select * from bar
+"#;
+        let queries = TestQueries::from_lines(input.split('\n'));
+        assert_eq!(
+            queries,
+            TestQueries {
+                queries: vec![
+                    Query::new("select * from foo;"),
+                    Query::new("select * from bar")
+                ]
+            }
+        )
+    }
+
+    #[test]
+    fn test_parse_queries_mulit_line() {
+        let input = r#"
+select
+  *
+from
+  foo;
+
+select * from bar;
+
+"#;
+        let queries = TestQueries::from_lines(input.split('\n'));
+        assert_eq!(
+            queries,
+            TestQueries {
+                queries: vec![
+                    Query::new("select * from foo;"),
+                    Query::new("select * from bar;"),
+                ]
+            }
+        )
+    }
+
+    #[test]
+    fn test_parse_queries_empty() {
+        let input = r#"
+-- This is a test
+-- another comment
+"#;
+        let queries = TestQueries::from_lines(input.split('\n'));
+        assert_eq!(queries, TestQueries { queries: vec![] })
+    }
+
+    #[test]
+    fn test_parse_queries_sorted_compare() {
+        let input = r#"
+select * from foo;
+
+-- The second query should be compared to expected after sorting
+-- IOX_COMPARE: sorted
+select * from bar;
+
+-- Since this query is not annotated, it should not use exected sorted
+select * from baz;
+select * from baz2;
+
+-- IOX_COMPARE: sorted
+select * from waz;
+-- (But the compare should work subsequently)
+"#;
+        let queries = TestQueries::from_lines(input.split('\n'));
+        assert_eq!(
+            queries,
+            TestQueries {
+                queries: vec![
+                    Query::new("select * from foo;"),
+                    Query::new("select * from bar;").with_sorted_compare(),
+                    Query::new("select * from baz;"),
+                    Query::new("select * from baz2;"),
+                    Query::new("select * from waz;").with_sorted_compare(),
+                ]
+            }
+        )
+    }
+
+    #[test]
+    fn test_parse_queries_sorted_compare_after() {
+        let input = r#"
+select * from foo;
+-- IOX_COMPARE: sorted
+"#;
+        let queries = TestQueries::from_lines(input.split('\n'));
+        assert_eq!(
+            queries,
+            TestQueries {
+                queries: vec![Query::new("select * from foo;")]
+            }
+        )
+    }
+
+    #[test]
+    fn test_parse_queries_sorted_compare_not_match_ignored() {
+        let input = r#"
+-- IOX_COMPARE: something_else
+select * from foo;
+"#;
+        let queries = TestQueries::from_lines(input.split('\n'));
+        assert_eq!(
+            queries,
+            TestQueries {
+                queries: vec![Query::new("select * from foo;")]
+            }
+        )
+    }
+}