From 67712b595c1e2203e83770127746b99878aac1a2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 16 Nov 2022 16:14:55 -0500 Subject: [PATCH] Revert "chore: Update datafusion again (#6108)" (#6159) This reverts commit fbe9f27f107434623862c87626d8ad691eae495d. --- Cargo.lock | 49 ++-- Cargo.toml | 4 +- iox_query/src/exec/non_null_checker.rs | 4 +- iox_query/src/exec/schema_pivot.rs | 4 +- iox_query/src/exec/split.rs | 4 +- iox_query/src/provider/deduplicate.rs | 7 +- iox_query/src/provider/physical.rs | 147 ++--------- parquet_file/src/storage.rs | 2 - parquet_to_line_protocol/src/lib.rs | 1 - querier/src/ingester/flight_client.rs | 73 +----- querier/src/namespace/query_access.rs | 64 ++--- .../cases/in/duplicates_ingester.expected | 162 ++++++------ .../cases/in/duplicates_parquet.expected | 136 +++++----- .../cases/in/duplicates_parquet_many.expected | 20 +- query_tests/cases/in/pushdown.expected | 236 +++++++++--------- query_tests/cases/in/several_chunks.expected | 128 +++++----- query_tests/cases/in/two_chunks.expected | 58 ++--- workspace-hack/Cargo.toml | 2 +- 18 files changed, 460 insertions(+), 641 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65ded38b6d..8151cc735e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1135,19 +1135,6 @@ dependencies = [ "syn", ] -[[package]] -name = "dashmap" -version = "5.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" -dependencies = [ - "cfg-if", - "hashbrown 0.12.3", - "lock_api", - "once_cell", - "parking_lot_core 0.9.4", -] - [[package]] name = "data_types" version = "0.1.0" @@ -1168,8 +1155,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "ahash 0.8.2", "arrow", @@ -1178,7 +1165,6 @@ dependencies = [ "bytes", "bzip2", "chrono", - "dashmap", "datafusion-common", "datafusion-expr", "datafusion-optimizer", @@ -1194,6 +1180,7 @@ dependencies = [ "log", "num_cpus", "object_store", + "ordered-float 3.4.0", "parking_lot 0.12.1", "parquet", "paste", @@ -1212,20 +1199,21 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "arrow", "chrono", "object_store", + "ordered-float 3.4.0", "parquet", "sqlparser 0.26.0", ] [[package]] name = "datafusion-expr" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "ahash 0.8.2", "arrow", @@ -1236,8 +1224,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "arrow", "async-trait", @@ -1251,8 +1239,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "ahash 0.8.2", "arrow", @@ -1270,6 +1258,7 @@ dependencies = [ "lazy_static", "md-5", "num-traits", + "ordered-float 3.4.0", "paste", "rand", "regex", @@ -1280,8 +1269,8 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "arrow", "datafusion", @@ -1294,8 +1283,8 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "arrow", "datafusion-common", @@ -1305,8 +1294,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +version = "13.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd081d64a2fba8574e63bdd0662c14aec5852b48#dd081d64a2fba8574e63bdd0662c14aec5852b48" dependencies = [ "arrow", "datafusion-common", diff --git a/Cargo.toml b/Cargo.toml index f7b6cab4b5..091ac86bd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,8 +111,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "26.0.0" } arrow-flight = { version = "26.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="d2814c960168b45c4a0f5d7bbb72d9f412cb08bd", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="dd081d64a2fba8574e63bdd0662c14aec5852b48", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="dd081d64a2fba8574e63bdd0662c14aec5852b48" } hashbrown = { version = "0.13.1" } parquet = { version = "26.0.0" } diff --git a/iox_query/src/exec/non_null_checker.rs b/iox_query/src/exec/non_null_checker.rs index 08fd7bc817..ce6ed0a0f4 100644 --- a/iox_query/src/exec/non_null_checker.rs +++ b/iox_query/src/exec/non_null_checker.rs @@ -218,8 +218,8 @@ impl ExecutionPlan for NonNullCheckerExec { None } - fn required_input_distribution(&self) -> Vec { - vec![Distribution::UnspecifiedDistribution] + fn required_child_distribution(&self) -> Distribution { + Distribution::UnspecifiedDistribution } fn children(&self) -> Vec> { diff --git a/iox_query/src/exec/schema_pivot.rs b/iox_query/src/exec/schema_pivot.rs index 293afe1356..5d2a2696ae 100644 --- a/iox_query/src/exec/schema_pivot.rs +++ b/iox_query/src/exec/schema_pivot.rs @@ -189,8 +189,8 @@ impl ExecutionPlan for SchemaPivotExec { None } - fn required_input_distribution(&self) -> Vec { - vec![Distribution::UnspecifiedDistribution] + fn required_child_distribution(&self) -> Distribution { + Distribution::UnspecifiedDistribution } fn children(&self) -> Vec> { diff --git a/iox_query/src/exec/split.rs b/iox_query/src/exec/split.rs index 1a5639e603..efe0212fa7 100644 --- a/iox_query/src/exec/split.rs +++ b/iox_query/src/exec/split.rs @@ -207,8 +207,8 @@ impl ExecutionPlan for StreamSplitExec { /// Always require a single input (eventually we might imagine /// running this on multiple partitions concurrently to compute /// the splits in parallel, but not now) - fn required_input_distribution(&self) -> Vec { - vec![Distribution::SinglePartition] + fn required_child_distribution(&self) -> Distribution { + Distribution::SinglePartition } fn children(&self) -> Vec> { diff --git a/iox_query/src/provider/deduplicate.rs b/iox_query/src/provider/deduplicate.rs index f75bb7c7ba..2f765e552d 100644 --- a/iox_query/src/provider/deduplicate.rs +++ b/iox_query/src/provider/deduplicate.rs @@ -219,11 +219,8 @@ impl ExecutionPlan for DeduplicateExec { Ok(AdapterStream::adapt(self.schema(), rx, handle)) } - fn required_input_distribution(&self) -> Vec { - // For now use a single input -- it might be helpful - // eventually to deduplicate in parallel by hash partitioning - // the inputs (based on sort keys) - vec![Distribution::SinglePartition] + fn required_child_distribution(&self) -> Distribution { + Distribution::SinglePartition } fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { diff --git a/iox_query/src/provider/physical.rs b/iox_query/src/provider/physical.rs index 945fcaa3e4..b2977e3ee2 100644 --- a/iox_query/src/provider/physical.rs +++ b/iox_query/src/provider/physical.rs @@ -1,9 +1,6 @@ //! Implementation of a DataFusion PhysicalPlan node across partition chunks -use crate::{ - provider::record_batch_exec::RecordBatchesExec, util::arrow_sort_key_exprs, QueryChunk, - QueryChunkData, -}; +use crate::{provider::record_batch_exec::RecordBatchesExec, QueryChunk, QueryChunkData}; use arrow::{datatypes::SchemaRef, record_batch::RecordBatch}; use data_types::TableSummary; use datafusion::{ @@ -18,65 +15,12 @@ use datafusion::{ }; use object_store::ObjectMeta; use predicate::Predicate; -use schema::{sort::SortKey, Schema}; +use schema::Schema; use std::{ collections::{hash_map::Entry, HashMap}, sync::Arc, }; -/// Holds a list of chunks that all have the same "URL" -#[derive(Debug)] -struct ParquetChunkList { - object_store_url: ObjectStoreUrl, - object_metas: Vec, - sort_key: Option, -} - -impl ParquetChunkList { - fn new(object_store_url: ObjectStoreUrl, chunk: &dyn QueryChunk, meta: ObjectMeta) -> Self { - Self { - object_store_url, - object_metas: vec![meta], - sort_key: chunk.sort_key().cloned(), - } - } - - /// Add the chunk to the list of files - fn add_parquet_file(&mut self, chunk: &dyn QueryChunk, meta: ObjectMeta) { - self.object_metas.push(meta); - - self.sort_key = combine_sort_key(self.sort_key.take(), chunk.sort_key()); - } -} - -/// Combines the existing sort key with the sort key of the chunk, -/// returning the new combined compatible sort key that describes both -/// chunks. -/// -/// If it is not possible to find a compatible sort key, None is -/// returned signifying "unknown sort order" -fn combine_sort_key( - existing_sort_key: Option, - chunk_sort_key: Option<&SortKey>, -) -> Option { - if let (Some(existing_sort_key), Some(chunk_sort_key)) = (existing_sort_key, chunk_sort_key) { - let combined_sort_key = SortKey::try_merge_key(&existing_sort_key, chunk_sort_key); - - // Avoid cloning the sort key when possible, as the sort key - // is likely to commonly be the same - match combined_sort_key { - Some(combined_sort_key) if combined_sort_key == &existing_sort_key => { - Some(existing_sort_key) - } - Some(combined_sort_key) => Some(combined_sort_key.clone()), - None => None, - } - } else { - // no existing sort key means the data wasn't consistently sorted so leave it alone - None - } -} - /// Place [chunk](QueryChunk)s into physical nodes. /// /// This will group chunks into [record batch](QueryChunkData::RecordBatches) and [parquet @@ -108,7 +52,7 @@ pub fn chunks_to_physical_nodes( } let mut record_batch_chunks: Vec<(SchemaRef, Vec, Arc)> = vec![]; - let mut parquet_chunks: HashMap = HashMap::new(); + let mut parquet_chunks: HashMap)> = HashMap::new(); for chunk in &chunks { match chunk.data() { @@ -119,14 +63,12 @@ pub fn chunks_to_physical_nodes( let url_str = parquet_input.object_store_url.as_str().to_owned(); match parquet_chunks.entry(url_str) { Entry::Occupied(mut o) => { - o.get_mut() - .add_parquet_file(chunk.as_ref(), parquet_input.object_meta); + o.get_mut().1.push(parquet_input.object_meta); } Entry::Vacant(v) => { - v.insert(ParquetChunkList::new( + v.insert(( parquet_input.object_store_url, - chunk.as_ref(), - parquet_input.object_meta, + vec![parquet_input.object_meta], )); } } @@ -144,15 +86,9 @@ pub fn chunks_to_physical_nodes( let mut parquet_chunks: Vec<_> = parquet_chunks.into_iter().collect(); parquet_chunks.sort_by_key(|(url_str, _)| url_str.clone()); let target_partitions = context.session_config().target_partitions; - for (_url_str, chunk_list) in parquet_chunks { - let ParquetChunkList { - object_store_url, - object_metas, - sort_key, - } = chunk_list; - + for (_url_str, (url, chunks)) in parquet_chunks { let file_groups = distribute( - object_metas.into_iter().map(|object_meta| PartitionedFile { + chunks.into_iter().map(|object_meta| PartitionedFile { object_meta, partition_values: vec![], range: None, @@ -160,26 +96,21 @@ pub fn chunks_to_physical_nodes( }), target_partitions, ); - - // Tell datafusion about the sort key, if any - let file_schema = iox_schema.as_arrow(); - let output_ordering = - sort_key.map(|sort_key| arrow_sort_key_exprs(&sort_key, &file_schema)); - let base_config = FileScanConfig { - object_store_url, - file_schema, + object_store_url: url, + file_schema: iox_schema.as_arrow(), file_groups, statistics: Statistics::default(), projection: None, limit: None, table_partition_cols: vec![], config_options: context.session_config().config_options(), - output_ordering, }; - let meta_size_hint = None; - let parquet_exec = ParquetExec::new(base_config, predicate.filter_expr(), meta_size_hint); - output_nodes.push(Arc::new(parquet_exec)); + output_nodes.push(Arc::new(ParquetExec::new( + base_config, + predicate.filter_expr(), + None, + ))); } assert!(!output_nodes.is_empty()); @@ -213,8 +144,6 @@ where #[cfg(test)] mod tests { - use schema::sort::SortKeyBuilder; - use super::*; #[test] @@ -227,50 +156,4 @@ mod tests { assert_eq!(distribute(0..3u8, 10), vec![vec![0], vec![1], vec![2]],); } - - #[test] - fn test_combine_sort_key() { - let skey_t1 = SortKeyBuilder::new() - .with_col("t1") - .with_col("time") - .build(); - - let skey_t1_t2 = SortKeyBuilder::new() - .with_col("t1") - .with_col("t2") - .with_col("time") - .build(); - - let skey_t2_t1 = SortKeyBuilder::new() - .with_col("t2") - .with_col("t1") - .with_col("time") - .build(); - - assert_eq!(combine_sort_key(None, None), None); - assert_eq!(combine_sort_key(Some(skey_t1.clone()), None), None); - assert_eq!(combine_sort_key(None, Some(&skey_t1)), None); - - assert_eq!( - combine_sort_key(Some(skey_t1.clone()), Some(&skey_t1)), - Some(skey_t1.clone()) - ); - - assert_eq!( - combine_sort_key(Some(skey_t1.clone()), Some(&skey_t1_t2)), - Some(skey_t1_t2.clone()) - ); - - assert_eq!( - combine_sort_key(Some(skey_t1_t2.clone()), Some(&skey_t1)), - Some(skey_t1_t2.clone()) - ); - - assert_eq!( - combine_sort_key(Some(skey_t2_t1.clone()), Some(&skey_t1)), - Some(skey_t2_t1.clone()) - ); - - assert_eq!(combine_sort_key(Some(skey_t2_t1), Some(&skey_t1_t2)), None); - } } diff --git a/parquet_file/src/storage.rs b/parquet_file/src/storage.rs index a41d8fda4e..963bab4618 100644 --- a/parquet_file/src/storage.rs +++ b/parquet_file/src/storage.rs @@ -123,8 +123,6 @@ impl ParquetExecInput { table_partition_cols: vec![], // TODO avoid this `copied_config` when config_options are directly available on context config_options: session_ctx.copied_config().config_options(), - // Parquet files ARE actually sorted but we don't care here since we just construct a `collect` plan. - output_ordering: None, }; let exec = ParquetExec::new(base_config, None, None); let exec_schema = exec.schema(); diff --git a/parquet_to_line_protocol/src/lib.rs b/parquet_to_line_protocol/src/lib.rs index 46710d5db1..0d8ff70210 100644 --- a/parquet_to_line_protocol/src/lib.rs +++ b/parquet_to_line_protocol/src/lib.rs @@ -213,7 +213,6 @@ impl ParquetFileReader { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, config_options: ConfigOptions::new().into_shareable(), }; diff --git a/querier/src/ingester/flight_client.rs b/querier/src/ingester/flight_client.rs index c621802a7f..fae40e0646 100644 --- a/querier/src/ingester/flight_client.rs +++ b/querier/src/ingester/flight_client.rs @@ -217,32 +217,11 @@ impl CachedConnection { #[cfg(test)] mod tests { use data_types::{NamespaceId, TableId}; - use datafusion::prelude::{col, lit, when, Expr}; + use datafusion::prelude::{col, lit}; use predicate::Predicate; use super::*; - #[test] - fn serialize_deeply_nested_and() { - // we need more stack space so this doesn't overflow in dev builds - std::thread::Builder::new() - .stack_size(10_000_000) - .spawn(|| { - let n = 100; - println!("testing: {n}"); - - // build a deeply nested (a < 5) AND (a < 5) AND .... tree - let expr_base = col("a").lt(lit(5i32)); - let expr = (0..n).fold(expr_base.clone(), |expr, _| expr.and(expr_base.clone())); - - let (request, request2) = serialize_roundtrip(expr); - assert_eq!(request, request2); - }) - .expect("spawning thread") - .join() - .expect("joining thread"); - } - #[test] fn serialize_deeply_nested_predicate() { // see https://github.com/influxdata/influxdb_iox/issues/5974 @@ -255,54 +234,28 @@ mod tests { for n in [1, 2, n_max] { println!("testing: {n}"); + let expr_base = col("a").lt(lit(5i32)); + let expr = (0..n).fold(expr_base.clone(), |expr, _| expr.and(expr_base.clone())); - // build a deeply recursive nested expression: - // - // CASE - // WHEN TRUE - // THEN (WHEN ...) - // ELSE FALSE - // - let expr = (0..n).fold(lit(false), |expr, _|{ - when(lit(true), expr) - .end() - .unwrap() - }); + let predicate = Predicate {exprs: vec![expr], ..Default::default()}; - let (request1, request2) = serialize_roundtrip(expr); + let request = IngesterQueryRequest { + namespace_id: NamespaceId::new(42), + table_id: TableId::new(1337), + columns: vec![String::from("col1"), String::from("col2")], + predicate: Some(predicate), + }; + + let proto = serialize_ingester_query_request(request.clone()).expect("serialization"); + let request2 = IngesterQueryRequest::try_from(proto).expect("deserialization"); - // expect that the self preservation mechanism has - // kicked in and the predicate has been ignored. if request2.predicate.is_none() { assert!(n > 2, "not really deeply nested"); return; - } else { - assert_eq!(request1, request2); } } panic!("did not find a 'too deeply nested' expression, tested up to a depth of {n_max}") }).expect("spawning thread").join().expect("joining thread"); } - - /// Creates a [`IngesterQueryRequest`] and round trips it through - /// serialization, returning both the original and the serialized - /// request - fn serialize_roundtrip(expr: Expr) -> (IngesterQueryRequest, IngesterQueryRequest) { - let predicate = Predicate { - exprs: vec![expr], - ..Default::default() - }; - - let request = IngesterQueryRequest { - namespace_id: NamespaceId::new(42), - table_id: TableId::new(1337), - columns: vec![String::from("col1"), String::from("col2")], - predicate: Some(predicate), - }; - - let proto = serialize_ingester_query_request(request.clone()).expect("serialization"); - let request2 = IngesterQueryRequest::try_from(proto).expect("deserialization"); - (request, request2) - } } diff --git a/querier/src/namespace/query_access.rs b/querier/src/namespace/query_access.rs index 7dd36ad112..1d6df8fe5c 100644 --- a/querier/src/namespace/query_access.rs +++ b/querier/src/namespace/query_access.rs @@ -508,24 +508,24 @@ mod tests { &querier_namespace, "EXPLAIN SELECT * FROM mem ORDER BY host,time", &[ - "+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |", - "| | Projection: mem.host, mem.perc, mem.time |", - "| | TableScan: mem projection=[host, perc, time] |", - "| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |", - "| | CoalescePartitionsExec |", - "| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |", - "| | UnionExec |", - "| | CoalesceBatchesExec: target_batch_size=4096 |", - "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", - "| | ParquetExec: limit=None, partitions=[1/2/1/4/.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, perc, time] |", - "| | CoalesceBatchesExec: target_batch_size=4096 |", - "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", - "| | ParquetExec: limit=None, partitions=[1/2/1/4/.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, perc, time] |", - "| | |", - "+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "+---------------+---------------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+---------------------------------------------------------------------------------------------------------------------------------------+", + "| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |", + "| | Projection: mem.host, mem.perc, mem.time |", + "| | TableScan: mem projection=[host, perc, time] |", + "| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |", + "| | CoalescePartitionsExec |", + "| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |", + "| | UnionExec |", + "| | CoalesceBatchesExec: target_batch_size=4096 |", + "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", + "| | ParquetExec: limit=None, partitions=[1/2/1/4/.parquet], projection=[host, perc, time] |", + "| | CoalesceBatchesExec: target_batch_size=4096 |", + "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", + "| | ParquetExec: limit=None, partitions=[1/2/1/4/.parquet], projection=[host, perc, time] |", + "| | |", + "+---------------+---------------------------------------------------------------------------------------------------------------------------------------+", ], ) .await; @@ -569,20 +569,20 @@ mod tests { "EXPLAIN SELECT * FROM cpu", &[ "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |", - "| | TableScan: cpu projection=[foo, host, load, time] |", - "| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |", - "| | UnionExec |", - "| | DeduplicateExec: [host@1 ASC,time@3 ASC] |", - "| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |", - "| | UnionExec |", - "| | ParquetExec: limit=None, partitions=[1/1/2/2/.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |", - "| | ParquetExec: limit=None, partitions=[1/1/2/2/.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |", - "| | ParquetExec: limit=None, partitions=[1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/3/.parquet], projection=[foo, host, load, time] |", - "| | |", - "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |", + "| | TableScan: cpu projection=[foo, host, load, time] |", + "| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |", + "| | UnionExec |", + "| | DeduplicateExec: [host@1 ASC,time@3 ASC] |", + "| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |", + "| | UnionExec |", + "| | ParquetExec: limit=None, partitions=[1/1/2/2/.parquet], projection=[foo, host, load, time] |", + "| | ParquetExec: limit=None, partitions=[1/1/2/2/.parquet], projection=[foo, host, load, time] |", + "| | ParquetExec: limit=None, partitions=[1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/3/.parquet], projection=[foo, host, load, time] |", + "| | |", + "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", ], ) .await; diff --git a/query_tests/cases/in/duplicates_ingester.expected b/query_tests/cases/in/duplicates_ingester.expected index 5aa8b0cc41..560145efe9 100644 --- a/query_tests/cases/in/duplicates_ingester.expected +++ b/query_tests/cases/in/duplicates_ingester.expected @@ -1,91 +1,91 @@ -- Test Setup: OneMeasurementFourChunksWithDuplicatesWithIngester -- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | -| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | +| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Union | -| | Projection: h2o.state AS name | -| | TableScan: h2o projection=[state] | -| | Projection: h2o.city AS name | -| | TableScan: h2o projection=[city] | -| physical_plan | UnionExec | -| | ProjectionExec: expr=[state@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[state@1 as state] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ProjectionExec: expr=[state@1 as state] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[state@0 ASC], projection=[state] | -| | ProjectionExec: expr=[city@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[city@0 as city] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ProjectionExec: expr=[city@0 as city] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC], projection=[city] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Union | +| | Projection: h2o.state AS name | +| | TableScan: h2o projection=[state] | +| | Projection: h2o.city AS name | +| | TableScan: h2o projection=[city] | +| physical_plan | UnionExec | +| | ProjectionExec: expr=[state@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[state@1 as state] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] | +| | ProjectionExec: expr=[state@1 as state] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[state] | +| | ProjectionExec: expr=[city@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[city@0 as city] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] | +| | ProjectionExec: expr=[city@0 as city] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select count(*) from h2o; +-----------------+ | COUNT(UInt8(1)) | diff --git a/query_tests/cases/in/duplicates_parquet.expected b/query_tests/cases/in/duplicates_parquet.expected index bde1410942..57751bb248 100644 --- a/query_tests/cases/in/duplicates_parquet.expected +++ b/query_tests/cases/in/duplicates_parquet.expected @@ -1,75 +1,75 @@ -- Test Setup: OneMeasurementFourChunksWithDuplicatesParquetOnly -- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | -| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | +| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Union | -| | Projection: h2o.state AS name | -| | TableScan: h2o projection=[state] | -| | Projection: h2o.city AS name | -| | TableScan: h2o projection=[city] | -| physical_plan | UnionExec | -| | ProjectionExec: expr=[state@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[state@1 as state] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[state@0 ASC], projection=[state] | -| | ProjectionExec: expr=[city@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[city@0 as city] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[city@0 ASC], projection=[city] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Union | +| | Projection: h2o.state AS name | +| | TableScan: h2o projection=[state] | +| | Projection: h2o.city AS name | +| | TableScan: h2o projection=[city] | +| physical_plan | UnionExec | +| | ProjectionExec: expr=[state@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[state@1 as state] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[state] | +| | ProjectionExec: expr=[city@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[city@0 as city] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select count(*) from h2o; +-----------------+ | COUNT(UInt8(1)) | @@ -91,8 +91,8 @@ | | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, num_dupes=2, output_rows=5, spill_count=0, spilled_bytes=0] | | | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | | | UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=591, bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=628, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=2, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=591, bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=628, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=2, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | | | | ---------- diff --git a/query_tests/cases/in/duplicates_parquet_many.expected b/query_tests/cases/in/duplicates_parquet_many.expected index 679b685697..3de03d3c29 100644 --- a/query_tests/cases/in/duplicates_parquet_many.expected +++ b/query_tests/cases/in/duplicates_parquet_many.expected @@ -23,16 +23,16 @@ | | DeduplicateExec: [tag@1 ASC,time@2 ASC] | | | SortPreservingMergeExec: [tag@1 ASC,time@2 ASC] | | | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet], projection=[f, tag, time] | | | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000c.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000013.parquet], projection=[f] | | | | +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/pushdown.expected b/query_tests/cases/in/pushdown.expected index ae21077c1b..8347c216aa 100644 --- a/query_tests/cases/in/pushdown.expected +++ b/query_tests/cases/in/pushdown.expected @@ -14,15 +14,15 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | TableScan: restaurant projection=[count, system, time, town] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | TableScan: restaurant projection=[count, system, time, town] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -37,49 +37,49 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64) > Int64(200) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Int64) > 200 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: CAST(restaurant.count AS Int64) > Int64(200) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: CAST(count@0 AS Int64) > 200 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200.0; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Float64) > Float64(200) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Float64) > 200 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: CAST(restaurant.count AS Float64) > Float64(200) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: CAST(count@0 AS Float64) > 200 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 4.0; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(4) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 4 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(4) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 4 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury'; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -93,19 +93,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury'; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence'); -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -128,7 +128,7 @@ | | CoalesceBatchesExec: target_batch_size=4096 | | | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence | | | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] | | | | +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000; @@ -154,7 +154,7 @@ | | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND town@4 != tewsbury AND system@2 = 5 OR town@4 = lawrence AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 | | | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] | | | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] | | | | +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and count < 40000; @@ -170,21 +170,21 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and count < 40000; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) | -| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), CAST(restaurant.count AS Int64) < Int64(40000)] | -| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 | -| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) | +| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), CAST(restaurant.count AS Int64) < Int64(40000)] | +| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 | +| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -199,19 +199,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 4.0 and system < 7.0; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 4 AND system@1 < 7 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4 AND system_min@1 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 4 AND system@1 < 7 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and system < 7.0; -- Results After Sorting +-------+--------+--------------------------------+----------+ @@ -223,19 +223,19 @@ +-------+--------+--------------------------------+----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and system < 7.0; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND system@1 < 7 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND system_min@1 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND system@1 < 7 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system; -- Results After Sorting +-------+--------+--------------------------------+----------+ @@ -246,19 +246,19 @@ +-------+--------+--------------------------------+----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading'); -- Results After Sorting +-------+--------+--------------------------------+---------+ @@ -278,7 +278,7 @@ | | CoalesceBatchesExec: target_batch_size=4096 | | | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND CAST(count@0 AS Int64) = 632 OR town@3 = reading | | | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, output_ordering=[town@3 ASC, time@2 ASC], projection=[count, system, time, town] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] | | | | +---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00'); diff --git a/query_tests/cases/in/several_chunks.expected b/query_tests/cases/in/several_chunks.expected index 11adaf06f4..48a28fac67 100644 --- a/query_tests/cases/in/several_chunks.expected +++ b/query_tests/cases/in/several_chunks.expected @@ -14,25 +14,25 @@ +---------+------------+-------+------+--------------------------------+ -- SQL: EXPLAIN SELECT * from h2o; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | -| | TableScan: h2o projection=[city, other_temp, state, temp, time] | -| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | -| | UnionExec | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | +| | TableScan: h2o projection=[city, other_temp, state, temp, time] | +| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | +| | UnionExec | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city, other_temp, state, temp, time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city, other_temp, state, temp, time] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select temp, other_temp, time from h2o; -- Results After Sorting +------+------------+--------------------------------+ @@ -48,50 +48,50 @@ +------+------------+--------------------------------+ -- SQL: EXPLAIN select temp, other_temp, time from h2o; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | -| | TableScan: h2o projection=[other_temp, temp, time] | -| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | -| | UnionExec | -| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[time@2 ASC], projection=[other_temp, temp, time] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | +| | TableScan: h2o projection=[other_temp, temp, time] | +| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | +| | UnionExec | +| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city, other_temp, state, temp, time] | +| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[other_temp, temp, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from h2o where time >= to_timestamp('1970-01-01T00:00:00.000000250+00:00'); -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | -| | Filter: h2o.time >= TimestampNanosecond(250, None) | -| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] | -| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@4 >= 250 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | UnionExec | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | +| | Filter: h2o.time >= TimestampNanosecond(250, None) | +| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] | +| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@4 >= 250 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | UnionExec | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/two_chunks.expected b/query_tests/cases/in/two_chunks.expected index 661ffcefa0..7ccfd44fd7 100644 --- a/query_tests/cases/in/two_chunks.expected +++ b/query_tests/cases/in/two_chunks.expected @@ -10,20 +10,20 @@ +--------+------------+-------+------+--------------------------------+ -- SQL: EXPLAIN SELECT * from h2o; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | -| | TableScan: h2o projection=[city, other_temp, state, temp, time] | -| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | +| | TableScan: h2o projection=[city, other_temp, state, temp, time] | +| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select temp, other_temp, time from h2o; +------+------------+--------------------------------+ | temp | other_temp | time | @@ -34,18 +34,18 @@ +------+------------+--------------------------------+ -- SQL: EXPLAIN select temp, other_temp, time from h2o; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | -| | TableScan: h2o projection=[other_temp, temp, time] | -| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | -| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | +| | TableScan: h2o projection=[other_temp, temp, time] | +| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | +| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[city, other_temp, state, temp, time] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 884c9131b9..9e30ea0f9c 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -26,7 +26,7 @@ bytes = { version = "1", features = ["std"] } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] } crossbeam-utils = { version = "0.8", features = ["std"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "d2814c960168b45c4a0f5d7bbb72d9f412cb08bd", features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd081d64a2fba8574e63bdd0662c14aec5852b48", features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] } either = { version = "1", features = ["use_std"] } fixedbitset = { version = "0.4", features = ["std"] }