Merge pull request #7102 from influxdata/cn/share-normalization

fix: Use the same normalization code for explain tests as e2e tests do
pull/24376/head
kodiakhq[bot] 2023-03-02 03:18:51 +00:00 committed by GitHub
commit f3267f992a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 249 additions and 259 deletions

6
Cargo.lock generated
View File

@ -340,8 +340,11 @@ dependencies = [
"datafusion",
"hashbrown 0.13.2",
"num-traits",
"once_cell",
"rand",
"regex",
"snafu",
"uuid",
"workspace-hack",
]
@ -4513,7 +4516,6 @@ dependencies = [
"predicate",
"prost",
"rand",
"regex",
"schema",
"service_common",
"service_grpc_catalog",
@ -5656,7 +5658,6 @@ dependencies = [
"parking_lot 0.12.1",
"prost",
"rand",
"regex",
"reqwest",
"snafu",
"sqlx",
@ -5665,7 +5666,6 @@ dependencies = [
"tokio",
"tokio-util",
"tonic",
"uuid",
"workspace-hack",
]

View File

@ -15,7 +15,10 @@ chrono = { version = "0.4", default-features = false }
comfy-table = { version = "6.1", default-features = false }
hashbrown = { workspace = true }
num-traits = "0.2"
once_cell = { version = "1.17", features = ["parking_lot"] }
regex = "1.7.0"
snafu = "0.7"
uuid = "1"
workspace-hack = { version = "0.1", path = "../workspace-hack" }
[dev-dependencies]

View File

@ -1,6 +1,7 @@
//! A collection of testing functions for arrow based code
use std::sync::Arc;
use crate::display::pretty_format_batches;
use arrow::{
array::{new_null_array, ArrayRef, StringArray},
compute::kernels::sort::{lexsort, SortColumn, SortOptions},
@ -8,6 +9,10 @@ use arrow::{
error::ArrowError,
record_batch::RecordBatch,
};
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use std::{borrow::Cow, collections::HashMap};
use uuid::Uuid;
/// Compares the formatted output with the pretty formatted results of
/// record batches. This is a macro so errors appear on the correct line
@ -181,3 +186,195 @@ pub fn equalize_batch_schemas(batches: Vec<RecordBatch>) -> Result<Vec<RecordBat
})
.collect())
}
/// Match the parquet UUID
///
/// For example, given
/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
///
/// matches `1d325760-2b20-48de-ab48-2267b034133d`
static REGEX_UUID: Lazy<Regex> = Lazy::new(|| {
Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").expect("UUID regex")
});
/// Match the parquet directory names
/// For example, given
/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
///
/// matches `32/51/216/13452`
static REGEX_DIRS: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex"));
/// Replace table row separators of flexible width with fixed with. This is required
/// because the original timing values may differ in "printed width", so the table
/// cells have different widths and hence the separators / borders. E.g.:
///
/// `+--+--+` -> `----------`
/// `+--+------+` -> `----------`
///
/// Note that we're kinda inexact with our regex here, but it gets the job done.
static REGEX_LINESEP: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex"));
/// Similar to the row separator issue above, the table columns are right-padded
/// with spaces. Due to the different "printed width" of the timing values, we need
/// to normalize this padding as well. E.g.:
///
/// ` |` -> ` |`
/// ` |` -> ` |`
static REGEX_COL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex"));
/// Matches line like `metrics=[foo=1, bar=2]`
static REGEX_METRICS: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex"));
/// Matches things like `1s`, `1.2ms` and `10.2μs`
static REGEX_TIMING: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex"));
/// Matches things like `FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000`
static REGEX_FILTER: Lazy<Regex> =
Lazy::new(|| Regex::new("FilterExec: .*").expect("filter regex"));
fn normalize_for_variable_width(s: Cow<'_, str>) -> String {
let s = REGEX_LINESEP.replace_all(&s, "----------");
REGEX_COL.replace_all(&s, " |").to_string()
}
/// A query to run with optional annotations
#[derive(Debug, PartialEq, Eq, Default, Clone, Copy)]
pub struct Normalizer {
/// If true, results are sorted first
pub sorted_compare: bool,
/// If true, replace UUIDs with static placeholders.
pub normalized_uuids: bool,
/// If true, normalize timings in queries by replacing them with
/// static placeholders, for example:
///
/// `1s` -> `1.234ms`
pub normalized_metrics: bool,
/// if true, normalize filter predicates for explain plans
/// `FilterExec: <REDACTED>`
pub normalized_filters: bool,
}
impl Normalizer {
pub fn new() -> Self {
Default::default()
}
/// Take the output of running the query and apply the specified normalizations to them
pub fn normalize_results(&self, mut results: Vec<RecordBatch>) -> Vec<String> {
// compare against sorted results, if requested
if self.sorted_compare && !results.is_empty() {
let schema = results[0].schema();
let batch =
arrow::compute::concat_batches(&schema, &results).expect("concatenating batches");
results = vec![sort_record_batch(batch)];
}
let mut current_results = pretty_format_batches(&results)
.unwrap()
.trim()
.lines()
.map(|s| s.to_string())
.collect::<Vec<_>>();
// normalize UUIDs, if requested
if self.normalized_uuids {
let mut seen: HashMap<String, u128> = HashMap::new();
current_results = current_results
.into_iter()
.map(|s| {
// Rewrite parquet directory names like
// `51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
//
// to:
// 1/1/1/1/00000000-0000-0000-0000-000000000000.parquet
let s = REGEX_UUID.replace_all(&s, |s: &Captures<'_>| {
let next = seen.len() as u128;
Uuid::from_u128(
*seen
.entry(s.get(0).unwrap().as_str().to_owned())
.or_insert(next),
)
.to_string()
});
let s = normalize_for_variable_width(s);
REGEX_DIRS.replace_all(&s, "1/1/1/1").to_string()
})
.collect();
}
// normalize metrics, if requested
if self.normalized_metrics {
current_results = current_results
.into_iter()
.map(|s| {
// Replace timings with fixed value, e.g.:
//
// `1s` -> `1.234ms`
// `1.2ms` -> `1.234ms`
// `10.2μs` -> `1.234ms`
let s = REGEX_TIMING.replace_all(&s, "1.234ms");
let s = normalize_for_variable_width(s);
// Metrics are currently ordered by value (not by key), so different timings may
// reorder them. We "parse" the list and normalize the sorting. E.g.:
//
// `metrics=[]` => `metrics=[]`
// `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
// `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
REGEX_METRICS
.replace_all(&s, |c: &Captures<'_>| {
let mut metrics: Vec<_> = c[1].split(", ").collect();
metrics.sort();
format!("metrics=[{}]", metrics.join(", "))
})
.to_string()
})
.collect();
}
// normalize Filters, if requested
//
// Converts:
// FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
//
// to
// FilterExec: <REDACTED>
if self.normalized_filters {
current_results = current_results
.into_iter()
.map(|s| {
REGEX_FILTER
.replace_all(&s, |_: &Captures<'_>| "FilterExec: <REDACTED>")
.to_string()
})
.collect();
}
current_results
}
/// Adds information on what normalizations were applied to the input
pub fn add_description(&self, output: &mut Vec<String>) {
if self.sorted_compare {
output.push("-- Results After Sorting".into())
}
if self.normalized_uuids {
output.push("-- Results After Normalizing UUIDs".into())
}
if self.normalized_metrics {
output.push("-- Results After Normalizing Metrics".into())
}
if self.normalized_filters {
output.push("-- Results After Normalizing Filters".into())
}
}
}

View File

@ -56,5 +56,4 @@ insta = { version = "1.28.0", features = ["yaml"] }
iox_tests = { path = "../iox_tests" }
mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store_metrics = { path = "../object_store_metrics" }
regex = "1.7.1"
test_helpers = { path = "../test_helpers" }

View File

@ -199,13 +199,12 @@ mod tests {
use super::*;
use crate::namespace::test_util::{clear_parquet_cache, querier_namespace};
use arrow::record_batch::RecordBatch;
use arrow_util::test_util::batches_to_sorted_lines;
use arrow_util::test_util::{batches_to_sorted_lines, Normalizer};
use data_types::ColumnType;
use datafusion::common::DataFusionError;
use iox_query::frontend::sql::SqlQueryPlanner;
use iox_tests::{TestCatalog, TestParquetFileBuilder};
use metric::{Observation, RawReporter};
use regex::Regex;
use snafu::{ResultExt, Snafu};
use trace::{span::SpanStatus, RingBufferTraceCollector};
@ -492,13 +491,13 @@ mod tests {
format_explain(&querier_namespace, "EXPLAIN SELECT * FROM cpu").await,
@r###"
---
- +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- "| plan_type | plan |"
- +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- "| logical_plan | TableScan: cpu projection=[foo, host, load, time] |"
- "| physical_plan | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/2/2/<uuid>.parquet, 1/1/1/3/<uuid>.parquet]]}, projection=[foo, host, load, time] |"
- "| | |"
- +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- "----------"
- "| plan_type | plan |"
- "----------"
- "| logical_plan | TableScan: cpu projection=[foo, host, load, time] |"
- "| physical_plan | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000004.parquet]]}, projection=[foo, host, load, time] |"
- "| | |"
- "----------"
"###
);
@ -509,22 +508,22 @@ mod tests {
format_explain(&querier_namespace, "EXPLAIN SELECT * FROM mem ORDER BY host,time").await,
@r###"
---
- +---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
- "| plan_type | plan |"
- +---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
- "| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |"
- "| | TableScan: mem projection=[host, perc, time] |"
- "| physical_plan | SortExec: expr=[host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |"
- "| | CoalescePartitionsExec |"
- "| | UnionExec |"
- "| | CoalesceBatchesExec: target_batch_size=8192 |"
- "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/<uuid>.parquet]]}, projection=[host, perc, time] |"
- "| | CoalesceBatchesExec: target_batch_size=8192 |"
- "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/<uuid>.parquet]]}, projection=[host, perc, time] |"
- "| | |"
- +---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
- "----------"
- "| plan_type | plan |"
- "----------"
- "| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |"
- "| | TableScan: mem projection=[host, perc, time] |"
- "| physical_plan | SortExec: expr=[host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |"
- "| | CoalescePartitionsExec |"
- "| | UnionExec |"
- "| | CoalesceBatchesExec: target_batch_size=8192 |"
- "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[host, perc, time] |"
- "| | CoalesceBatchesExec: target_batch_size=8192 |"
- "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[host, perc, time] |"
- "| | |"
- "----------"
"###
);
@ -567,19 +566,19 @@ mod tests {
format_explain(&querier_namespace, "EXPLAIN SELECT * FROM cpu").await,
@r###"
---
- +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- "| plan_type | plan |"
- +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- "| logical_plan | TableScan: cpu projection=[foo, host, load, time] |"
- "| physical_plan | UnionExec |"
- "| | DeduplicateExec: [host@1 ASC,time@3 ASC] |"
- "| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |"
- "| | UnionExec |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/<uuid>.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/<uuid>.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/1/<uuid>.parquet, 1/1/1/3/<uuid>.parquet]]}, projection=[foo, host, load, time] |"
- "| | |"
- +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
- "----------"
- "| plan_type | plan |"
- "----------"
- "| logical_plan | TableScan: cpu projection=[foo, host, load, time] |"
- "| physical_plan | UnionExec |"
- "| | DeduplicateExec: [host@1 ASC,time@3 ASC] |"
- "| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |"
- "| | UnionExec |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |"
- "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000005.parquet]]}, projection=[foo, host, load, time] |"
- "| | |"
- "----------"
"###
);
}
@ -599,15 +598,11 @@ mod tests {
async fn format_explain(querier_namespace: &Arc<QuerierNamespace>, sql: &str) -> Vec<String> {
let results = run(querier_namespace, sql, None).await;
let formatted = arrow_util::display::pretty_format_batches(&results).unwrap();
let regex = Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
.expect("UUID regex");
formatted
.trim()
.split('\n')
.map(|s| regex.replace_all(s, "<uuid>").to_string())
.collect::<Vec<_>>()
let normalizer = Normalizer {
normalized_uuids: true,
..Default::default()
};
normalizer.normalize_results(results)
}
async fn run(

View File

@ -26,7 +26,6 @@ once_cell = { version = "1.17", features = ["parking_lot"] }
parking_lot = "0.12"
prost = "0.11"
rand = "0.8.3"
regex = "1.7.0"
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
snafu = "0.7"
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
@ -35,5 +34,4 @@ test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
tokio = { version = "1.25", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-util = "0.7"
tonic = "0.8"
uuid = "1"
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -13,7 +13,7 @@ mod grpc;
mod mini_cluster;
mod server_fixture;
mod server_type;
mod snapshot_comparison;
pub mod snapshot_comparison;
mod steps;
mod udp_listener;

View File

@ -1,11 +1,9 @@
mod normalization;
mod queries;
use crate::snapshot_comparison::queries::TestQueries;
use crate::{run_influxql, run_sql, MiniCluster};
use crate::{run_influxql, run_sql, snapshot_comparison::queries::TestQueries, MiniCluster};
use snafu::{OptionExt, ResultExt, Snafu};
use std::fmt::{Display, Formatter};
use std::{
fmt::{Display, Formatter},
fs,
path::{Path, PathBuf},
};

View File

@ -1,199 +0,0 @@
use arrow::record_batch::RecordBatch;
use arrow_util::{display::pretty_format_batches, test_util::sort_record_batch};
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use std::{borrow::Cow, collections::HashMap};
use uuid::Uuid;
/// Match the parquet UUID
///
/// For example, given
/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
///
/// matches `1d325760-2b20-48de-ab48-2267b034133d`
static REGEX_UUID: Lazy<Regex> = Lazy::new(|| {
Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").expect("UUID regex")
});
/// Match the parquet directory names
/// For example, given
/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
///
/// matches `32/51/216/13452`
static REGEX_DIRS: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex"));
/// Replace table row separators of flexible width with fixed with. This is required
/// because the original timing values may differ in "printed width", so the table
/// cells have different widths and hence the separators / borders. E.g.:
///
/// `+--+--+` -> `----------`
/// `+--+------+` -> `----------`
///
/// Note that we're kinda inexact with our regex here, but it gets the job done.
static REGEX_LINESEP: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expect("linesep regex"));
/// Similar to the row separator issue above, the table columns are right-padded
/// with spaces. Due to the different "printed width" of the timing values, we need
/// to normalize this padding as well. E.g.:
///
/// ` |` -> ` |`
/// ` |` -> ` |`
static REGEX_COL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex"));
/// Matches line like `metrics=[foo=1, bar=2]`
static REGEX_METRICS: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex"));
/// Matches things like `1s`, `1.2ms` and `10.2μs`
static REGEX_TIMING: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex"));
/// Matches things like `FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000`
static REGEX_FILTER: Lazy<Regex> =
Lazy::new(|| Regex::new("FilterExec: .*").expect("filter regex"));
fn normalize_for_variable_width(s: Cow<str>) -> String {
let s = REGEX_LINESEP.replace_all(&s, "----------");
REGEX_COL.replace_all(&s, " |").to_string()
}
/// A query to run with optional annotations
#[derive(Debug, PartialEq, Eq, Default)]
pub struct Normalizer {
/// If true, results are sorted first
pub sorted_compare: bool,
/// If true, replace UUIDs with static placeholders.
pub normalized_uuids: bool,
/// If true, normalize timings in queries by replacing them with
/// static placeholders, for example:
///
/// `1s` -> `1.234ms`
pub normalized_metrics: bool,
/// if true, normalize filter predicates for explain plans
/// `FilterExec: <REDACTED>`
pub normalized_filters: bool,
}
impl Normalizer {
#[cfg(test)]
pub fn new() -> Self {
Default::default()
}
/// Take the output of running the query and apply the specified normalizations to them
pub fn normalize_results(&self, mut results: Vec<RecordBatch>) -> Vec<String> {
// compare against sorted results, if requested
if self.sorted_compare && !results.is_empty() {
let schema = results[0].schema();
let batch =
arrow::compute::concat_batches(&schema, &results).expect("concatenating batches");
results = vec![sort_record_batch(batch)];
}
let mut current_results = pretty_format_batches(&results)
.unwrap()
.trim()
.lines()
.map(|s| s.to_string())
.collect::<Vec<_>>();
// normalize UUIDs, if requested
if self.normalized_uuids {
let mut seen: HashMap<String, u128> = HashMap::new();
current_results = current_results
.into_iter()
.map(|s| {
// Rewrite parquet directory names like
// `51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
//
// to:
// 1/1/1/1/00000000-0000-0000-0000-000000000000.parquet
let s = REGEX_UUID.replace_all(&s, |s: &Captures| {
let next = seen.len() as u128;
Uuid::from_u128(
*seen
.entry(s.get(0).unwrap().as_str().to_owned())
.or_insert(next),
)
.to_string()
});
let s = normalize_for_variable_width(s);
REGEX_DIRS.replace_all(&s, "1/1/1/1").to_string()
})
.collect();
}
// normalize metrics, if requested
if self.normalized_metrics {
current_results = current_results
.into_iter()
.map(|s| {
// Replace timings with fixed value, e.g.:
//
// `1s` -> `1.234ms`
// `1.2ms` -> `1.234ms`
// `10.2μs` -> `1.234ms`
let s = REGEX_TIMING.replace_all(&s, "1.234ms");
let s = normalize_for_variable_width(s);
// Metrics are currently ordered by value (not by key), so different timings may
// reorder them. We "parse" the list and normalize the sorting. E.g.:
//
// `metrics=[]` => `metrics=[]`
// `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
// `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
REGEX_METRICS
.replace_all(&s, |c: &Captures| {
let mut metrics: Vec<_> = c[1].split(", ").collect();
metrics.sort();
format!("metrics=[{}]", metrics.join(", "))
})
.to_string()
})
.collect();
}
// normalize Filters, if requested
//
// Converts:
// FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
//
// to
// FilterExec: <REDACTED>
if self.normalized_filters {
current_results = current_results
.into_iter()
.map(|s| {
REGEX_FILTER
.replace_all(&s, |_: &Captures| "FilterExec: <REDACTED>")
.to_string()
})
.collect();
}
current_results
}
/// Adds information on what normalizations were applied to the input
pub fn add_description(&self, output: &mut Vec<String>) {
if self.sorted_compare {
output.push("-- Results After Sorting".into())
}
if self.normalized_uuids {
output.push("-- Results After Normalizing UUIDs".into())
}
if self.normalized_metrics {
output.push("-- Results After Normalizing Metrics".into())
}
if self.normalized_filters {
output.push("-- Results After Normalizing Filters".into())
}
}
}

View File

@ -1,6 +1,5 @@
use arrow::record_batch::RecordBatch;
use super::normalization::Normalizer;
use arrow_util::test_util::Normalizer;
/// A query to run with optional annotations
#[derive(Debug, PartialEq, Eq, Default)]