diff --git a/Cargo.lock b/Cargo.lock index a7e0d28be3..f40a65b5f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1237,8 +1237,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ "ahash 0.8.2", "arrow", @@ -1270,6 +1270,7 @@ dependencies = [ "pin-project-lite", "rand", "smallvec", + "sqllogictest", "sqlparser 0.27.0", "tempfile", "tokio", @@ -1282,8 +1283,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ "arrow", "chrono", @@ -1294,8 +1295,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ "ahash 0.8.2", "arrow", @@ -1306,8 +1307,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ "arrow", "async-trait", @@ -1321,8 +1322,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ "ahash 0.8.2", "arrow", @@ -1350,8 +1351,8 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ "arrow", "chrono", @@ -1367,8 +1368,8 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ "arrow", "datafusion-common", @@ -1378,10 +1379,10 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=799dd747152f6574638a844986b8ea8470d3f4d6#799dd747152f6574638a844986b8ea8470d3f4d6" +version = "15.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a" dependencies = [ - "arrow", + "arrow-schema", "datafusion-common", "datafusion-expr", "sqlparser 0.27.0", @@ -1417,6 +1418,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "difference" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" + [[package]] name = "difflib" version = "0.4.0" @@ -3017,6 +3024,17 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +[[package]] +name = "libtest-mimic" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7b603516767d1ab23d0de09d023e62966c3322f7148297c35cf3d97aa8b37fa" +dependencies = [ + "clap 4.0.29", + "termcolor", + "threadpool", +] + [[package]] name = "link-cplusplus" version = "1.0.7" @@ -3707,6 +3725,7 @@ name = "parquet_to_line_protocol" version = "0.1.0" dependencies = [ "datafusion", + "datafusion_util", "futures", "influxdb_line_protocol", "mutable_batch", @@ -5075,6 +5094,25 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "sqllogictest" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba41e01d229d7725401de371e323851f82d839d68732a06162405362b60852fe" +dependencies = [ + "async-trait", + "difference", + "futures", + "glob", + "humantime", + "itertools", + "libtest-mimic", + "regex", + "tempfile", + "thiserror", + "tracing", +] + [[package]] name = "sqlparser" version = "0.27.0" @@ -6327,6 +6365,7 @@ dependencies = [ "bytes", "cc", "chrono", + "clap 4.0.29", "crossbeam-utils", "crypto-common", "datafusion", @@ -6346,7 +6385,6 @@ dependencies = [ "hashbrown 0.13.1", "heck", "indexmap", - "io-lifetimes", "libc", "lock_api", "log", diff --git a/Cargo.toml b/Cargo.toml index fa0a68d6a4..9f4ed11dd9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -114,8 +114,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "28.0.0" } arrow-flight = { version = "28.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="799dd747152f6574638a844986b8ea8470d3f4d6", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="799dd747152f6574638a844986b8ea8470d3f4d6" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="fbadebb894672f61327a30f77cda2ee88a343b2a", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="fbadebb894672f61327a30f77cda2ee88a343b2a" } hashbrown = { version = "0.13.1" } parquet = { version = "28.0.0" } diff --git a/datafusion_util/src/config.rs b/datafusion_util/src/config.rs index d16a9a001e..b3ac8bae01 100644 --- a/datafusion_util/src/config.rs +++ b/datafusion_util/src/config.rs @@ -26,7 +26,7 @@ pub fn iox_session_config() -> SessionConfig { // Enable parquet predicate pushdown optimization .set_bool(OPT_PARQUET_PUSHDOWN_FILTERS, true) .set_bool(OPT_PARQUET_REORDER_FILTERS, true) - .create_default_catalog_and_schema(true) + .with_create_default_catalog_and_schema(true) .with_information_schema(true) .with_default_catalog_and_schema(DEFAULT_CATALOG, DEFAULT_SCHEMA) } diff --git a/iox_query/src/frontend/influxrpc.rs b/iox_query/src/frontend/influxrpc.rs index 21d793fbee..6bbd697608 100644 --- a/iox_query/src/frontend/influxrpc.rs +++ b/iox_query/src/frontend/influxrpc.rs @@ -1445,7 +1445,7 @@ fn table_chunk_stream<'a>( .chunks( table_name, predicate, - &projection, + projection.as_ref(), ctx.child_ctx("table chunks"), ) .await diff --git a/iox_query/src/lib.rs b/iox_query/src/lib.rs index 8570ab5b8b..29e080def4 100644 --- a/iox_query/src/lib.rs +++ b/iox_query/src/lib.rs @@ -154,7 +154,7 @@ pub trait QueryNamespace: QueryNamespaceMeta + Debug + Send + Sync { &self, table_name: &str, predicate: &Predicate, - projection: &Option>, + projection: Option<&Vec>, ctx: IOxSessionContext, ) -> Result>, DataFusionError>; diff --git a/iox_query/src/provider.rs b/iox_query/src/provider.rs index 589d06893c..5801099c59 100644 --- a/iox_query/src/provider.rs +++ b/iox_query/src/provider.rs @@ -221,7 +221,7 @@ impl TableProvider for ChunkTableProvider { async fn scan( &self, _ctx: &SessionState, - projection: &Option>, + projection: Option<&Vec>, filters: &[Expr], _limit: Option, ) -> std::result::Result, DataFusionError> { diff --git a/iox_query/src/provider/physical.rs b/iox_query/src/provider/physical.rs index 40779e3e22..bd2fbcc575 100644 --- a/iox_query/src/provider/physical.rs +++ b/iox_query/src/provider/physical.rs @@ -166,7 +166,7 @@ pub fn chunks_to_physical_nodes( } let mut parquet_chunks: Vec<_> = parquet_chunks.into_iter().collect(); parquet_chunks.sort_by_key(|(url_str, _)| url_str.clone()); - let target_partitions = context.session_config().target_partitions; + let target_partitions = context.session_config().target_partitions(); for (_url_str, chunk_list) in parquet_chunks { let ParquetChunkList { object_store_url, diff --git a/iox_query/src/test.rs b/iox_query/src/test.rs index b189f0565a..670224f334 100644 --- a/iox_query/src/test.rs +++ b/iox_query/src/test.rs @@ -104,7 +104,7 @@ impl QueryNamespace for TestDatabase { &self, table_name: &str, predicate: &Predicate, - _projection: &Option>, + _projection: Option<&Vec>, _ctx: IOxSessionContext, ) -> Result>, DataFusionError> { // save last predicate diff --git a/parquet_to_line_protocol/Cargo.toml b/parquet_to_line_protocol/Cargo.toml index 966184c8bb..0d4af4df25 100644 --- a/parquet_to_line_protocol/Cargo.toml +++ b/parquet_to_line_protocol/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] datafusion = { workspace = true } +datafusion_util = { path = "../datafusion_util" } influxdb_line_protocol = { path = "../influxdb_line_protocol" } futures = {version = "0.3"} num_cpus = "1.13.1" diff --git a/parquet_to_line_protocol/src/lib.rs b/parquet_to_line_protocol/src/lib.rs index 46710d5db1..05704f571a 100644 --- a/parquet_to_line_protocol/src/lib.rs +++ b/parquet_to_line_protocol/src/lib.rs @@ -2,7 +2,6 @@ use datafusion::{ arrow::datatypes::SchemaRef as ArrowSchemaRef, - config::ConfigOptions, datasource::{ file_format::{parquet::ParquetFormat, FileFormat}, listing::PartitionedFile, @@ -16,6 +15,7 @@ use datafusion::{ }, prelude::{SessionConfig, SessionContext}, }; +use datafusion_util::config::iox_session_config; use futures::StreamExt; use object_store::{ local::LocalFileSystem, path::Path as ObjectStorePath, ObjectMeta, ObjectStore, @@ -162,9 +162,8 @@ pub struct ParquetFileReader { /// Parquet file metadata schema: ArrowSchemaRef, - /// number of rows to read in each batch (can pick small to - /// increase parallelism). Defaults to 1000 - batch_size: usize, + /// DataFusion configuration, such as the target batchsize, etc + session_config: SessionConfig, } impl ParquetFileReader { @@ -174,8 +173,11 @@ impl ParquetFileReader { object_store_url: ObjectStoreUrl, object_meta: ObjectMeta, ) -> Result { + let session_config = iox_session_config(); + // Keep metadata so we can find the measurement name - let format = ParquetFormat::default().with_skip_metadata(false); + let format = + ParquetFormat::new(session_config.config_options()).with_skip_metadata(Some(false)); // Use datafusion parquet reader to read the metadata from the // file. @@ -189,7 +191,7 @@ impl ParquetFileReader { object_store_url, object_meta, schema, - batch_size: 1000, + session_config, }) } @@ -214,15 +216,14 @@ impl ParquetFileReader { limit: None, table_partition_cols: vec![], output_ordering: None, - config_options: ConfigOptions::new().into_shareable(), + config_options: self.session_config.config_options(), }; // set up enough datafusion context to do the real read session let predicate = None; let metadata_size_hint = None; let exec = ParquetExec::new(base_config, predicate, metadata_size_hint); - let session_config = SessionConfig::new().with_batch_size(self.batch_size); - let session_ctx = SessionContext::with_config(session_config); + let session_ctx = SessionContext::with_config(self.session_config.clone()); let object_store = Arc::clone(&self.object_store); let task_ctx = Arc::new(TaskContext::from(&session_ctx)); diff --git a/querier/src/namespace/query_access.rs b/querier/src/namespace/query_access.rs index 7bbefefb19..0cb71ce49a 100644 --- a/querier/src/namespace/query_access.rs +++ b/querier/src/namespace/query_access.rs @@ -42,7 +42,7 @@ impl QueryNamespace for QuerierNamespace { &self, table_name: &str, predicate: &Predicate, - projection: &Option>, + projection: Option<&Vec>, ctx: IOxSessionContext, ) -> Result>, DataFusionError> { debug!(%table_name, %predicate, "Finding chunks for table"); @@ -487,15 +487,15 @@ mod tests { &querier_namespace, "EXPLAIN SELECT * FROM cpu", &[ - "+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |", - "| | TableScan: cpu projection=[foo, host, load, time] |", - "| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |", - "| | ParquetExec: limit=None, partitions=[1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/2/2/.parquet, 1/1/1/3/.parquet], projection=[foo, host, load, time] |", - "| | |", - "+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |", + "| | TableScan: cpu projection=[foo, host, load, time] |", + "| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |", + "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/2/2/.parquet, 1/1/1/3/.parquet]]}, projection=[foo, host, load, time] |", + "| | |", + "+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", ], ) .await; @@ -507,24 +507,24 @@ mod tests { &querier_namespace, "EXPLAIN SELECT * FROM mem ORDER BY host,time", &[ - "+---------------+---------------------------------------------------------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+---------------------------------------------------------------------------------------------------------------------------------------+", - "| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |", - "| | Projection: mem.host, mem.perc, mem.time |", - "| | TableScan: mem projection=[host, perc, time] |", - "| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |", - "| | CoalescePartitionsExec |", - "| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |", - "| | UnionExec |", - "| | CoalesceBatchesExec: target_batch_size=4096 |", - "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", - "| | ParquetExec: limit=None, partitions=[1/2/1/4/.parquet], projection=[host, perc, time] |", - "| | CoalesceBatchesExec: target_batch_size=4096 |", - "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", - "| | ParquetExec: limit=None, partitions=[1/2/1/4/.parquet], projection=[host, perc, time] |", - "| | |", - "+---------------+---------------------------------------------------------------------------------------------------------------------------------------+", + "+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+", + "| logical_plan | Sort: mem.host ASC NULLS LAST, mem.time ASC NULLS LAST |", + "| | Projection: mem.host, mem.perc, mem.time |", + "| | TableScan: mem projection=[host, perc, time] |", + "| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] |", + "| | CoalescePartitionsExec |", + "| | ProjectionExec: expr=[host@0 as host, perc@1 as perc, time@2 as time] |", + "| | UnionExec |", + "| | CoalesceBatchesExec: target_batch_size=4096 |", + "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", + "| | ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/.parquet]]}, projection=[host, perc, time] |", + "| | CoalesceBatchesExec: target_batch_size=4096 |", + "| | FilterExec: time@2 < 1 OR time@2 > 13 OR NOT host@0 = CAST(d AS Dictionary(Int32, Utf8)) |", + "| | ParquetExec: limit=None, partitions={1 group: [[1/2/1/4/.parquet]]}, projection=[host, perc, time] |", + "| | |", + "+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+", ], ) .await; @@ -567,21 +567,21 @@ mod tests { &querier_namespace, "EXPLAIN SELECT * FROM cpu", &[ - "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", - "| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |", - "| | TableScan: cpu projection=[foo, host, load, time] |", - "| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |", - "| | UnionExec |", - "| | DeduplicateExec: [host@1 ASC,time@3 ASC] |", - "| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |", - "| | UnionExec |", - "| | ParquetExec: limit=None, partitions=[1/1/2/2/.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |", - "| | ParquetExec: limit=None, partitions=[1/1/2/2/.parquet], output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |", - "| | ParquetExec: limit=None, partitions=[1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/3/.parquet], projection=[foo, host, load, time] |", - "| | |", - "+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| logical_plan | Projection: cpu.foo, cpu.host, cpu.load, cpu.time |", + "| | TableScan: cpu projection=[foo, host, load, time] |", + "| physical_plan | ProjectionExec: expr=[foo@0 as foo, host@1 as host, load@2 as load, time@3 as time] |", + "| | UnionExec |", + "| | DeduplicateExec: [host@1 ASC,time@3 ASC] |", + "| | SortPreservingMergeExec: [host@1 ASC,time@3 ASC] |", + "| | UnionExec |", + "| | ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |", + "| | ParquetExec: limit=None, partitions={1 group: [[1/1/2/2/.parquet]]}, output_ordering=[host@1 ASC, time@3 ASC], projection=[foo, host, load, time] |", + "| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/1/.parquet, 1/1/1/3/.parquet]]}, projection=[foo, host, load, time] |", + "| | |", + "+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+", ], ) .await; diff --git a/querier/src/system_tables/mod.rs b/querier/src/system_tables/mod.rs index 55faed8429..0429b55a1f 100644 --- a/querier/src/system_tables/mod.rs +++ b/querier/src/system_tables/mod.rs @@ -101,7 +101,7 @@ where async fn scan( &self, _ctx: &SessionState, - projection: &Option>, + projection: Option<&Vec>, // It would be cool to push projection and limit down _filters: &[Expr], _limit: Option, @@ -114,7 +114,7 @@ where Ok(Arc::new(SystemTableExecutionPlan { table: Arc::clone(&self.table), - projection: projection.clone(), + projection: projection.cloned(), projected_schema, })) } diff --git a/querier/src/table/mod.rs b/querier/src/table/mod.rs index 5f86bd3433..be25956c11 100644 --- a/querier/src/table/mod.rs +++ b/querier/src/table/mod.rs @@ -193,7 +193,7 @@ impl QuerierTable { &self, predicate: &Predicate, span: Option, - projection: &Option>, + projection: Option<&Vec>, ) -> Result>> { let mut span_recorder = SpanRecorder::new(span); match self @@ -215,7 +215,7 @@ impl QuerierTable { &self, predicate: &Predicate, span_recorder: &SpanRecorder, - projection: &Option>, + projection: Option<&Vec>, ) -> Result>> { debug!( ?predicate, @@ -429,7 +429,7 @@ impl QuerierTable { &self, predicate: &Predicate, span: Option, - projection: &Option>, + projection: Option<&Vec>, ) -> Result> { let mut span_recorder = SpanRecorder::new(span); @@ -464,7 +464,7 @@ impl QuerierTable { ingester_connection: Arc, predicate: &Predicate, span_recorder: &SpanRecorder, - projection: &Option>, + projection: Option<&Vec>, ) -> Result> { // If the projection is provided, use it. Otherwise, use all columns of the table // The provided projection should include all columns needed by the query @@ -826,7 +826,7 @@ mod tests { // Expect one chunk from the ingester let pred = Predicate::new().with_range(0, 100); let chunks = querier_table - .chunks_with_predicate_and_projection(&pred, &Some(vec![1])) // only select `foo` column + .chunks_with_predicate_and_projection(&pred, Some(&vec![1])) // only select `foo` column .await .unwrap(); assert_eq!(chunks.len(), 1); @@ -1369,14 +1369,14 @@ mod tests { &self, pred: &Predicate, ) -> Result>> { - self.chunks_with_predicate_and_projection(pred, &None).await + self.chunks_with_predicate_and_projection(pred, None).await } /// Invokes querier_table.chunks modeling the ingester sending the partitions in this table async fn chunks_with_predicate_and_projection( &self, pred: &Predicate, - projection: &Option>, + projection: Option<&Vec>, ) -> Result>> { self.querier_table .ingester_connection diff --git a/querier/src/table/query_access/mod.rs b/querier/src/table/query_access/mod.rs index a0702dac67..e82af6dac3 100644 --- a/querier/src/table/query_access/mod.rs +++ b/querier/src/table/query_access/mod.rs @@ -44,7 +44,7 @@ impl TableProvider for QuerierTable { async fn scan( &self, ctx: &SessionState, - projection: &Option>, + projection: Option<&Vec>, filters: &[Expr], limit: Option, ) -> Result, DataFusionError> { diff --git a/query_tests/cases/in/dedup_and_predicates_parquet.expected b/query_tests/cases/in/dedup_and_predicates_parquet.expected index 80b4c85b52..f4067049ad 100644 --- a/query_tests/cases/in/dedup_and_predicates_parquet.expected +++ b/query_tests/cases/in/dedup_and_predicates_parquet.expected @@ -8,23 +8,23 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" ORDER BY tag; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: table.tag ASC NULLS LAST | -| | Projection: table.bar, table.foo, table.tag, table.time | -| | TableScan: table projection=[bar, foo, tag, time] | -| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: table.tag ASC NULLS LAST | +| | Projection: table.bar, table.foo, table.tag, table.time | +| | TableScan: table projection=[bar, foo, tag, time] | +| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE tag='A'; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -33,23 +33,23 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A'; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -58,23 +58,23 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE foo=1 AND bar=2; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: foo@1 = 1 AND bar@0 = 2 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: foo@1 = 1 AND bar@0 = 2 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -84,26 +84,26 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: table.tag ASC NULLS LAST | -| | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | -| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: table.tag ASC NULLS LAST | +| | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | +| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -112,20 +112,20 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected b/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected index 3cb766c556..c7a05cd987 100644 --- a/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected +++ b/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected @@ -8,24 +8,24 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" ORDER BY tag; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: table.tag ASC NULLS LAST | -| | Projection: table.bar, table.foo, table.tag, table.time | -| | TableScan: table projection=[bar, foo, tag, time] | -| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: table.tag ASC NULLS LAST | +| | Projection: table.bar, table.foo, table.tag, table.time | +| | TableScan: table projection=[bar, foo, tag, time] | +| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE tag='A'; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -34,24 +34,24 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A'; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -60,24 +60,24 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE foo=1 AND bar=2; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: foo@1 = 1 AND bar@0 = 2 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.foo = Float64(1) AND table.bar = Float64(2) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.foo = Float64(1), table.bar = Float64(2)] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: foo@1 = 1 AND bar@0 = 2 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -87,27 +87,27 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: table.tag ASC NULLS LAST | -| | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | -| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: table.tag ASC NULLS LAST | +| | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | +| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -116,21 +116,21 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/duplicates_ingester.expected b/query_tests/cases/in/duplicates_ingester.expected index 17c5b66ac6..3c5fa34bbe 100644 --- a/query_tests/cases/in/duplicates_ingester.expected +++ b/query_tests/cases/in/duplicates_ingester.expected @@ -1,91 +1,91 @@ -- Test Setup: OneMeasurementFourChunksWithDuplicatesWithIngester -- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | -| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | +| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | DeduplicateExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | SortExec: [city@1 ASC,state@4 ASC,time@5 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Union | -| | Projection: h2o.state AS name | -| | TableScan: h2o projection=[state] | -| | Projection: h2o.city AS name | -| | TableScan: h2o projection=[city] | -| physical_plan | UnionExec | -| | ProjectionExec: expr=[state@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[state@1 as state] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ProjectionExec: expr=[state@1 as state] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[state] | -| | ProjectionExec: expr=[city@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[city@0 as city] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ProjectionExec: expr=[city@0 as city] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], projection=[city] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Union | +| | Projection: h2o.state AS name | +| | TableScan: h2o projection=[state] | +| | Projection: h2o.city AS name | +| | TableScan: h2o projection=[city] | +| physical_plan | UnionExec | +| | ProjectionExec: expr=[state@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[state@1 as state] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ProjectionExec: expr=[state@1 as state] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[state] | +| | ProjectionExec: expr=[city@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[city@0 as city] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ProjectionExec: expr=[city@0 as city] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | SortExec: [city@0 ASC,state@1 ASC,time@2 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, projection=[city] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select count(*) from h2o; +-----------------+ | COUNT(UInt8(1)) | diff --git a/query_tests/cases/in/duplicates_parquet.expected b/query_tests/cases/in/duplicates_parquet.expected index 81f7a0b091..57f3aee984 100644 --- a/query_tests/cases/in/duplicates_parquet.expected +++ b/query_tests/cases/in/duplicates_parquet.expected @@ -1,75 +1,75 @@ -- Test Setup: OneMeasurementFourChunksWithDuplicatesParquetOnly -- SQL: explain select time, state, city, min_temp, max_temp, area from h2o order by time, state, city; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | -| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: h2o.time ASC NULLS LAST, h2o.state ASC NULLS LAST, h2o.city ASC NULLS LAST | +| | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | SortExec: [time@0 ASC NULLS LAST,state@1 ASC NULLS LAST,city@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | -| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | -| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | -| | UnionExec | -| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[area, city, max_temp, min_temp, state, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.time, h2o.state, h2o.city, h2o.min_temp, h2o.max_temp, h2o.area | +| | TableScan: h2o projection=[area, city, max_temp, min_temp, state, time] | +| physical_plan | ProjectionExec: expr=[time@5 as time, state@4 as state, city@1 as city, min_temp@3 as min_temp, max_temp@2 as max_temp, area@0 as area] | +| | UnionExec | +| | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time] | +| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[area, city, max_temp, min_temp, state, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Union | -| | Projection: h2o.state AS name | -| | TableScan: h2o projection=[state] | -| | Projection: h2o.city AS name | -| | TableScan: h2o projection=[city] | -| physical_plan | UnionExec | -| | ProjectionExec: expr=[state@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[state@1 as state] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[state] | -| | ProjectionExec: expr=[city@0 as name] | -| | UnionExec | -| | ProjectionExec: expr=[city@0 as city] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Union | +| | Projection: h2o.state AS name | +| | TableScan: h2o projection=[state] | +| | Projection: h2o.city AS name | +| | TableScan: h2o projection=[city] | +| physical_plan | UnionExec | +| | ProjectionExec: expr=[state@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[state@1 as state] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[state] | +| | ProjectionExec: expr=[city@0 as name] | +| | UnionExec | +| | ProjectionExec: expr=[city@0 as city] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [state@1 ASC,city@0 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[state@1 ASC, city@0 ASC, time@2 ASC], projection=[city, state, time] | +| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[city] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select count(*) from h2o; +-----------------+ | COUNT(UInt8(1)) | @@ -91,8 +91,8 @@ | | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, num_dupes=2, output_rows=5, spill_count=0, spilled_bytes=0] | | | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | | | UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=0, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=0, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions={2 groups: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | | | | ---------- diff --git a/query_tests/cases/in/duplicates_parquet_many.expected b/query_tests/cases/in/duplicates_parquet_many.expected index 679b685697..899284c45a 100644 --- a/query_tests/cases/in/duplicates_parquet_many.expected +++ b/query_tests/cases/in/duplicates_parquet_many.expected @@ -7,32 +7,32 @@ +-----------------+----------+ -- SQL: EXPLAIN select count(*), sum(f) from m; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: COUNT(UInt8(1)), SUM(m.f) | -| | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)), SUM(m.f)]] | -| | TableScan: m projection=[f] | -| physical_plan | ProjectionExec: expr=[COUNT(UInt8(1))@0 as COUNT(UInt8(1)), SUM(m.f)@1 as SUM(m.f)] | -| | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] | -| | CoalescePartitionsExec | -| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] | -| | UnionExec | -| | ProjectionExec: expr=[f@0 as f] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@1 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [tag@1 ASC,time@2 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet], output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000c.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000013.parquet], projection=[f] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: COUNT(UInt8(1)), SUM(m.f) | +| | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)), SUM(m.f)]] | +| | TableScan: m projection=[f] | +| physical_plan | ProjectionExec: expr=[COUNT(UInt8(1))@0 as COUNT(UInt8(1)), SUM(m.f)@1 as SUM(m.f)] | +| | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] | +| | CoalescePartitionsExec | +| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] | +| | UnionExec | +| | ProjectionExec: expr=[f@0 as f] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@1 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [tag@1 ASC,time@2 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000004.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000005.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000006.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000007.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000008.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000009.parquet]]}, output_ordering=[tag@1 ASC, time@2 ASC], projection=[f, tag, time] | +| | ParquetExec: limit=None, partitions={4 groups: [[1/1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/1/00000000-0000-0000-0000-00000000000f.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000011.parquet], [1/1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000013.parquet]]}, projection=[f] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/periods.expected b/query_tests/cases/in/periods.expected new file mode 100644 index 0000000000..47a9dd7349 --- /dev/null +++ b/query_tests/cases/in/periods.expected @@ -0,0 +1,21 @@ +-- Test Setup: PeriodsInNames +-- SQL: SELECT * from "measurement.one"; ++-----------+-----------+---------+---------+--------------------------------+ +| field.one | field.two | tag.one | tag.two | time | ++-----------+-----------+---------+---------+--------------------------------+ +| 1 | true | value | other | 2021-01-01T00:00:01.000000001Z | +| 1 | false | value2 | other2 | 2021-01-01T00:00:01.000000002Z | ++-----------+-----------+---------+---------+--------------------------------+ +-- SQL: SELECT "tag.one" from "measurement.one"; ++---------+ +| tag.one | ++---------+ +| value | +| value2 | ++---------+ +-- SQL: SELECT "tag.one" from "measurement.one" where "field.two" is TRUE; ++---------+ +| tag.one | ++---------+ +| value | ++---------+ diff --git a/query_tests/cases/in/periods.sql b/query_tests/cases/in/periods.sql new file mode 100644 index 0000000000..886897decb --- /dev/null +++ b/query_tests/cases/in/periods.sql @@ -0,0 +1,13 @@ +-- Basic query tests for measurement names that have periods in their names +-- IOX_SETUP: PeriodsInNames + +-- query data +SELECT * from "measurement.one"; + + + +-- projection +SELECT "tag.one" from "measurement.one"; + +-- predicate +SELECT "tag.one" from "measurement.one" where "field.two" is TRUE; diff --git a/query_tests/cases/in/pushdown.expected b/query_tests/cases/in/pushdown.expected index 2fa10056e9..196fac6170 100644 --- a/query_tests/cases/in/pushdown.expected +++ b/query_tests/cases/in/pushdown.expected @@ -14,15 +14,15 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | TableScan: restaurant projection=[count, system, time, town] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | TableScan: restaurant projection=[count, system, time, town] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[count, system, time, town] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -37,49 +37,49 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200), pruning_predicate=count_max@0 > 200, projection=[count, system, time, town] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200), pruning_predicate=count_max@0 > 200, projection=[count, system, time, town] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200.0; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Float64) > Float64(200) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Float64) > 200 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=CAST(count AS Float64) > Float64(200), projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: CAST(restaurant.count AS Float64) > Float64(200) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: CAST(count@0 AS Float64) > 200 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=CAST(count AS Float64) > Float64(200), projection=[count, system, time, town] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 4.0; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(4) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 4 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(4), pruning_predicate=system_max@0 > 4, projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(4) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 4 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(4), pruning_predicate=system_max@0 > 4, projection=[count, system, time, town] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury'; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -93,19 +93,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury'; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND town@3 != tewsbury | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2, projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2, projection=[count, system, time, town] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence'); -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -118,19 +118,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence'); -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2, projection=[count, system, time, town] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2, projection=[count, system, time, town] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -142,19 +142,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND restaurant.count < UInt64(40000) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND count@0 < 40000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2 AND count_min@5 < 40000, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND restaurant.count < UInt64(40000) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND count@0 < 40000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2 AND count_min@5 < 40000, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and count < 40000; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -168,19 +168,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and count < 40000; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.count < UInt64(40000) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND count@0 < 40000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND count_min@1 < 40000, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.count < UInt64(40000) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND count@0 < 40000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=count > UInt64(200) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND count_min@1 < 40000, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -195,19 +195,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 4.0 and system < 7.0; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 4 AND system@1 < 7 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(4) AND system < Float64(7), pruning_predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 4 AND system@1 < 7 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(4) AND system < Float64(7), pruning_predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and system < 7.0; -- Results After Sorting +-------+--------+--------------------------------+----------+ @@ -219,19 +219,19 @@ +-------+--------+--------------------------------+----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and system < 7.0; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND system@1 < 7 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND system < Float64(7), pruning_predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND system@1 < 7 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(5) AND system < Float64(7), pruning_predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system; -- Results After Sorting +-------+--------+--------------------------------+----------+ @@ -242,19 +242,19 @@ +-------+--------+--------------------------------+----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > system, pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(5) AND town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > system, pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading'); -- Results After Sorting +-------+--------+--------------------------------+---------+ @@ -264,19 +264,19 @@ +-------+--------+--------------------------------+---------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading'); -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND count@0 = 632 OR town@3 = reading | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != town AND system < Float64(7) AND (count = UInt64(632) OR town = Dictionary(Int32, Utf8("reading"))), pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND count@0 = 632 OR town@3 = reading | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != town AND system < Float64(7) AND (count = UInt64(632) OR town = Dictionary(Int32, Utf8("reading"))), pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00'); -- Results After Sorting ++ diff --git a/query_tests/cases/in/retention.expected b/query_tests/cases/in/retention.expected index 7bfbcdec45..ec8856fb1f 100644 --- a/query_tests/cases/in/retention.expected +++ b/query_tests/cases/in/retention.expected @@ -9,30 +9,30 @@ +------+------+----------------------+ -- SQL: EXPLAIN SELECT * FROM cpu order by host, load, time; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.load ASC NULLS LAST, cpu.time ASC NULLS LAST | -| | Projection: cpu.host, cpu.load, cpu.time | -| | TableScan: cpu projection=[host, load, time] | -| physical_plan | SortExec: [host@0 ASC NULLS LAST,load@1 ASC NULLS LAST,time@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [host@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] | -| | SortExec: [host@0 ASC,time@2 ASC] | -| | UnionExec | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.load ASC NULLS LAST, cpu.time ASC NULLS LAST | +| | Projection: cpu.host, cpu.load, cpu.time | +| | TableScan: cpu projection=[host, load, time] | +| physical_plan | SortExec: [host@0 ASC NULLS LAST,load@1 ASC NULLS LAST,time@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [host@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] | +| | SortExec: [host@0 ASC,time@2 ASC] | +| | UnionExec | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM cpu WHERE host != 'b' ORDER BY host,time; +------+------+----------------------+ | host | load | time | @@ -42,30 +42,30 @@ +------+------+----------------------+ -- SQL: EXPLAIN SELECT * FROM cpu WHERE host != 'b' ORDER BY host,time; -- Results After Normalizing UUIDs -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.time ASC NULLS LAST | -| | Projection: cpu.host, cpu.load, cpu.time | -| | Filter: cpu.host != Dictionary(Int32, Utf8("b")) | -| | TableScan: cpu projection=[host, load, time], partial_filters=[cpu.host != Dictionary(Int32, Utf8("b"))] | -| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: host@0 != b | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [host@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] | -| | SortExec: [host@0 ASC,time@2 ASC] | -| | UnionExec | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.time ASC NULLS LAST | +| | Projection: cpu.host, cpu.load, cpu.time | +| | Filter: cpu.host != Dictionary(Int32, Utf8("b")) | +| | TableScan: cpu projection=[host, load, time], partial_filters=[cpu.host != Dictionary(Int32, Utf8("b"))] | +| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: host@0 != b | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [host@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] | +| | SortExec: [host@0 ASC,time@2 ASC] | +| | UnionExec | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/several_chunks.expected b/query_tests/cases/in/several_chunks.expected index b1f8640a5a..62848cefb2 100644 --- a/query_tests/cases/in/several_chunks.expected +++ b/query_tests/cases/in/several_chunks.expected @@ -14,25 +14,25 @@ +---------+------------+-------+------+--------------------------------+ -- SQL: EXPLAIN SELECT * from h2o; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | -| | TableScan: h2o projection=[city, other_temp, state, temp, time] | -| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | -| | UnionExec | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[city, other_temp, state, temp, time] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | +| | TableScan: h2o projection=[city, other_temp, state, temp, time] | +| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | +| | UnionExec | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[city, other_temp, state, temp, time] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select temp, other_temp, time from h2o; -- Results After Sorting +------+------------+--------------------------------+ @@ -48,50 +48,50 @@ +------+------------+--------------------------------+ -- SQL: EXPLAIN select temp, other_temp, time from h2o; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | -| | TableScan: h2o projection=[other_temp, temp, time] | -| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | -| | UnionExec | -| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], projection=[other_temp, temp, time] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | +| | TableScan: h2o projection=[other_temp, temp, time] | +| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | +| | UnionExec | +| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[other_temp, temp, time] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from h2o where time >= to_timestamp('1970-01-01T00:00:00.000000250+00:00'); -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | -| | Filter: h2o.time >= TimestampNanosecond(250, None) | -| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] | -| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@4 >= 250 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | UnionExec | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | +| | Filter: h2o.time >= TimestampNanosecond(250, None) | +| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] | +| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@4 >= 250 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | UnionExec | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet]]}, predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/two_chunks.expected b/query_tests/cases/in/two_chunks.expected index 661ffcefa0..33faba6e84 100644 --- a/query_tests/cases/in/two_chunks.expected +++ b/query_tests/cases/in/two_chunks.expected @@ -10,20 +10,20 @@ +--------+------------+-------+------+--------------------------------+ -- SQL: EXPLAIN SELECT * from h2o; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | -| | TableScan: h2o projection=[city, other_temp, state, temp, time] | -| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | +| | TableScan: h2o projection=[city, other_temp, state, temp, time] | +| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: select temp, other_temp, time from h2o; +------+------------+--------------------------------+ | temp | other_temp | time | @@ -34,18 +34,18 @@ +------+------------+--------------------------------+ -- SQL: EXPLAIN select temp, other_temp, time from h2o; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | -| | TableScan: h2o projection=[other_temp, temp, time] | -| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | -| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.temp, h2o.other_temp, h2o.time | +| | TableScan: h2o projection=[other_temp, temp, time] | +| physical_plan | ProjectionExec: expr=[temp@1 as temp, other_temp@0 as other_temp, time@2 as time] | +| | ProjectionExec: expr=[other_temp@1 as other_temp, temp@3 as temp, time@4 as time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/src/cases.rs b/query_tests/src/cases.rs index 183ec69e51..f305fd4ee5 100644 --- a/query_tests/src/cases.rs +++ b/query_tests/src/cases.rs @@ -116,6 +116,22 @@ async fn test_cases_new_sql_system_tables_sql() { .expect("flush worked"); } +#[tokio::test] +// Tests from "periods.sql", +async fn test_cases_periods_sql() { + test_helpers::maybe_start_logging(); + + let input_path = Path::new("cases").join("in").join("periods.sql"); + let mut runner = Runner::new(); + runner + .run(input_path) + .await + .expect("test failed"); + runner + .flush() + .expect("flush worked"); +} + #[tokio::test] // Tests from "pushdown.sql", async fn test_cases_pushdown_sql() { diff --git a/query_tests/src/scenarios.rs b/query_tests/src/scenarios.rs index fb477d868c..c299042f0b 100644 --- a/query_tests/src/scenarios.rs +++ b/query_tests/src/scenarios.rs @@ -66,6 +66,7 @@ pub fn get_all_setups() -> &'static HashMap> { register_setup!(ManyFieldsSeveralChunks), register_setup!(TwoChunksMissingColumns), register_setup!(AllTypes), + register_setup!(PeriodsInNames), register_setup!(TwoChunksDedupWeirdnessParquet), register_setup!(TwoChunksDedupWeirdnessParquetIngester), register_setup!(ThreeChunksWithRetention), diff --git a/query_tests/src/table_schema.rs b/query_tests/src/table_schema.rs index 7ef8d40b7e..748bb81d41 100644 --- a/query_tests/src/table_schema.rs +++ b/query_tests/src/table_schema.rs @@ -35,7 +35,7 @@ async fn run_table_schema_test_case( let ctx = db.new_query_context(None); let chunks = db - .chunks(table_name, &Default::default(), &None, ctx) + .chunks(table_name, &Default::default(), None, ctx) .await .expect("error getting chunks"); for chunk in chunks { diff --git a/schema/src/lib.rs b/schema/src/lib.rs index 9773ebb1e8..9e711dc0a3 100644 --- a/schema/src/lib.rs +++ b/schema/src/lib.rs @@ -375,7 +375,7 @@ impl Schema { /// Return names of the columns of given indexes with all PK columns (tags and time) /// If the columns are not provided, return all columns - pub fn select_given_and_pk_columns(&self, cols: &Option>) -> Vec { + pub fn select_given_and_pk_columns(&self, cols: Option<&Vec>) -> Vec { match cols { Some(cols) => { let mut columns = cols diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 057447362a..153ec9fd8c 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -23,9 +23,10 @@ bitflags = { version = "1" } byteorder = { version = "1", features = ["std"] } bytes = { version = "1", features = ["std"] } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] } +clap = { version = "4", features = ["color", "derive", "env", "error-context", "help", "std", "suggestions", "usage"] } crossbeam-utils = { version = "0.8", features = ["std"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "799dd747152f6574638a844986b8ea8470d3f4d6", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fbadebb894672f61327a30f77cda2ee88a343b2a", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] } digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] } either = { version = "1", features = ["use_std"] } fixedbitset = { version = "0.4", features = ["std"] } @@ -136,7 +137,6 @@ url = { version = "2" } uuid = { version = "1", features = ["getrandom", "rng", "std", "v4"] } [target.x86_64-unknown-linux-gnu.dependencies] -io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] } once_cell = { version = "1", default-features = false, features = ["unstable"] } rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] } @@ -144,7 +144,6 @@ rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "t once_cell = { version = "1", default-features = false, features = ["unstable"] } [target.x86_64-apple-darwin.dependencies] -io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] } once_cell = { version = "1", default-features = false, features = ["unstable"] } rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] } @@ -152,7 +151,6 @@ rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "t once_cell = { version = "1", default-features = false, features = ["unstable"] } [target.aarch64-apple-darwin.dependencies] -io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] } once_cell = { version = "1", default-features = false, features = ["unstable"] } rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] }