diff --git a/Cargo.lock b/Cargo.lock index ecd291aa30..6b1f4241e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -329,6 +329,7 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", + "xz2", ] [[package]] @@ -1236,7 +1237,7 @@ dependencies = [ [[package]] name = "datafusion" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "ahash 0.8.2", "arrow", @@ -1255,7 +1256,7 @@ dependencies = [ "flate2", "futures", "glob", - "hashbrown 0.12.3", + "hashbrown 0.13.1", "itertools", "lazy_static", "log", @@ -1268,43 +1269,44 @@ dependencies = [ "pin-project-lite", "rand", "smallvec", - "sqlparser 0.26.0", + "sqlparser", "tempfile", "tokio", "tokio-stream", "tokio-util", "url", "uuid", + "xz2", ] [[package]] name = "datafusion-common" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "arrow", "chrono", "object_store", "parquet", - "sqlparser 0.26.0", + "sqlparser", ] [[package]] name = "datafusion-expr" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "ahash 0.8.2", "arrow", "datafusion-common", "log", - "sqlparser 0.26.0", + "sqlparser", ] [[package]] name = "datafusion-optimizer" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "arrow", "async-trait", @@ -1312,14 +1314,14 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.12.3", + "hashbrown 0.13.1", "log", ] [[package]] name = "datafusion-physical-expr" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "ahash 0.8.2", "arrow", @@ -1332,7 +1334,7 @@ dependencies = [ "datafusion-expr", "datafusion-row", "half 2.1.0", - "hashbrown 0.12.3", + "hashbrown 0.13.1", "itertools", "lazy_static", "md-5", @@ -1348,7 +1350,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "arrow", "datafusion", @@ -1362,7 +1364,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "arrow", "datafusion-common", @@ -1373,12 +1375,12 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ebb24c5bf46f2af362aebffba2012875b328e799#ebb24c5bf46f2af362aebffba2012875b328e799" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a61615b2949bea9027eefe686613605e135780f2#a61615b2949bea9027eefe686613605e135780f2" dependencies = [ "arrow", "datafusion-common", "datafusion-expr", - "sqlparser 0.26.0", + "sqlparser", ] [[package]] @@ -2395,7 +2397,7 @@ version = "0.1.0" dependencies = [ "generated_types", "snafu", - "sqlparser 0.27.0", + "sqlparser", "workspace-hack", ] @@ -3066,6 +3068,17 @@ dependencies = [ "libc", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "matchers" version = "0.1.0" @@ -3903,7 +3916,7 @@ dependencies = [ "query_functions", "schema", "snafu", - "sqlparser 0.27.0", + "sqlparser", "test_helpers", "workspace-hack", ] @@ -5052,15 +5065,6 @@ dependencies = [ "unicode_categories", ] -[[package]] -name = "sqlparser" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb" -dependencies = [ - "log", -] - [[package]] name = "sqlparser" version = "0.27.0" @@ -6288,7 +6292,6 @@ dependencies = [ name = "workspace-hack" version = "0.1.0" dependencies = [ - "ahash 0.7.6", "ahash 0.8.2", "arrow", "base64", @@ -6304,6 +6307,7 @@ dependencies = [ "either", "fixedbitset", "flatbuffers", + "flate2", "futures-channel", "futures-core", "futures-io", @@ -6416,6 +6420,15 @@ dependencies = [ "workspace-hack", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index 49d95fdb14..b62da9cf3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -113,8 +113,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "27.0.0" } arrow-flight = { version = "27.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="ebb24c5bf46f2af362aebffba2012875b328e799", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="ebb24c5bf46f2af362aebffba2012875b328e799" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="a61615b2949bea9027eefe686613605e135780f2", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="a61615b2949bea9027eefe686613605e135780f2" } hashbrown = { version = "0.13.1" } parquet = { version = "27.0.0" } diff --git a/iox_query/src/frontend.rs b/iox_query/src/frontend.rs index d442b448c0..a289660429 100644 --- a/iox_query/src/frontend.rs +++ b/iox_query/src/frontend.rs @@ -314,7 +314,7 @@ mod test { if !(self.pred)(plan) { return Ok(true); } - let metrics = plan.metrics().unwrap().aggregate_by_partition(); + let metrics = plan.metrics().unwrap().aggregate_by_name(); let mut elapsed_compute: Option = None; let mut output_rows: Option = None; let mut start_timestamp: Option = None; diff --git a/query_functions/src/selectors.rs b/query_functions/src/selectors.rs index 5feb4087be..48020f5c4a 100644 --- a/query_functions/src/selectors.rs +++ b/query_functions/src/selectors.rs @@ -393,6 +393,12 @@ trait Selector: Debug + Default + Send + Sync { /// Update this selector's state based on values in value_arr and time_arr fn update_batch(&mut self, value_arr: &ArrayRef, time_arr: &ArrayRef) -> DataFusionResult<()>; + + /// Allocated size required for this selector, in bytes, + /// including `Self`. Allocated means that for internal + /// containers such as `Vec`, the `capacity` should be used not + /// the `len` + fn size(&self) -> usize; } /// Describes which part of the selector to return: the timestamp or @@ -527,6 +533,14 @@ where self.selector.datafusion_state() } + /// Allocated size required for this accumulator, in bytes, + /// including `Self`. Allocated means that for internal + /// containers such as `Vec`, the `capacity` should be used not + /// the `len` + fn size(&self) -> usize { + std::mem::size_of_val(self) - std::mem::size_of_val(&self.selector) + self.selector.size() + } + // Return the final value of this aggregator. fn evaluate(&self) -> DataFusionResult { self.selector.evaluate(&self.output) diff --git a/query_functions/src/selectors/internal.rs b/query_functions/src/selectors/internal.rs index 25beff73e2..bfe5b3c570 100644 --- a/query_functions/src/selectors/internal.rs +++ b/query_functions/src/selectors/internal.rs @@ -225,6 +225,11 @@ macro_rules! make_first_selector { Ok(()) } + + fn size(&self) -> usize { + // no nested types + std::mem::size_of_val(self) + } } }; } @@ -335,6 +340,11 @@ macro_rules! make_last_selector { Ok(()) } + + fn size(&self) -> usize { + // no nested types + std::mem::size_of_val(self) + } } }; } @@ -476,6 +486,11 @@ macro_rules! make_min_selector { } Ok(()) } + + fn size(&self) -> usize { + // no nested types + std::mem::size_of_val(self) + } } }; } @@ -593,6 +608,11 @@ macro_rules! make_max_selector { } Ok(()) } + + fn size(&self) -> usize { + // no nested types + std::mem::size_of_val(self) + } } }; } diff --git a/query_tests/cases/in/dedup_and_predicates_parquet.expected b/query_tests/cases/in/dedup_and_predicates_parquet.expected index aa4926c2a5..80b4c85b52 100644 --- a/query_tests/cases/in/dedup_and_predicates_parquet.expected +++ b/query_tests/cases/in/dedup_and_predicates_parquet.expected @@ -33,23 +33,23 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A'; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -84,26 +84,26 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: table.tag ASC NULLS LAST | -| | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | -| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: table.tag ASC NULLS LAST | +| | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | +| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -112,20 +112,20 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected b/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected index b32c6d9fbb..3cb766c556 100644 --- a/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected +++ b/query_tests/cases/in/dedup_and_predicates_parquet_ingester.expected @@ -34,24 +34,24 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A'; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A"))] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE foo=1 AND bar=2; +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -87,27 +87,27 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE time=to_timestamp('1970-01-01T00:00:00.000000000+00:00') ORDER BY tag; -- Results After Normalizing UUIDs -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: table.tag ASC NULLS LAST | -| | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | -| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: table.tag ASC NULLS LAST | +| | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.time = TimestampNanosecond(0, None)] | +| physical_plan | SortExec: [tag@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time = TimestampNanosecond(0, None), pruning_predicate=time_min@0 <= 0 AND 0 <= time_max@1, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); +-----+-----+-----+----------------------+ | bar | foo | tag | time | @@ -116,21 +116,21 @@ +-----+-----+-----+----------------------+ -- SQL: EXPLAIN SELECT * FROM "table" WHERE tag='A' AND foo=1 AND time=to_timestamp('1970-01-01T00:00:00.000000000+00:00'); -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | -| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | -| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | -| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | -| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | -| | SortExec: [tag@2 ASC,time@3 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: table.bar, table.foo, table.tag, table.time | +| | Filter: table.tag = Dictionary(Int32, Utf8("A")) AND table.foo = Float64(1) AND table.time = TimestampNanosecond(0, None) | +| | TableScan: table projection=[bar, foo, tag, time], partial_filters=[table.tag = Dictionary(Int32, Utf8("A")), table.foo = Float64(1), table.time = TimestampNanosecond(0, None)] | +| physical_plan | ProjectionExec: expr=[bar@0 as bar, foo@1 as foo, tag@2 as tag, time@3 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: tag@2 = A AND foo@1 = 1 AND time@3 = 0 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [tag@2 ASC,time@3 ASC] | +| | SortPreservingMergeExec: [tag@2 ASC,time@3 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=tag = Dictionary(Int32, Utf8("A")) AND time = TimestampNanosecond(0, None), pruning_predicate=tag_min@0 <= A AND A <= tag_max@1 AND time_min@2 <= 0 AND 0 <= time_max@3, output_ordering=[tag@2 ASC, time@3 ASC], projection=[bar, foo, tag, time] | +| | SortExec: [tag@2 ASC,time@3 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/duplicates_parquet.expected b/query_tests/cases/in/duplicates_parquet.expected index c48378bfa3..81f7a0b091 100644 --- a/query_tests/cases/in/duplicates_parquet.expected +++ b/query_tests/cases/in/duplicates_parquet.expected @@ -86,13 +86,13 @@ | | ProjectionExec: expr=[area@0 as area, city@1 as city, max_temp@2 as max_temp, min_temp@3 as min_temp, state@4 as state, time@5 as time], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=10, spill_count=0, spilled_bytes=0] | | | CoalesceBatchesExec: target_batch_size=4096, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=10, spill_count=0, spilled_bytes=0] | | | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=10, spill_count=0, spilled_bytes=0] | -| | RepartitionExec: partitioning=RoundRobinBatch(4), metrics=[fetch_time{inputPartition=0}=1.234ms, repart_time{inputPartition=0}=1.234ms, send_time{inputPartition=0}=1.234ms] | +| | RepartitionExec: partitioning=RoundRobinBatch(4), metrics=[fetch_time=1.234ms, repart_time=1.234ms, send_time=1.234ms] | | | UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=10, spill_count=0, spilled_bytes=0] | | | DeduplicateExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, num_dupes=2, output_rows=5, spill_count=0, spilled_bytes=0] | | | SortPreservingMergeExec: [state@4 ASC,city@1 ASC,time@5 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | | | UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000000.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000001.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=591, bytes_scanned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=628, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, page_index_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, page_index_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, predicate_evaluation_errors{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=1.234ms, pushdown_eval_time{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=1.234ms, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=2, pushdown_rows_filtered{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=3, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000002.parquet}=0, row_groups_pruned{filename=1/1/1/1/00000000-0000-0000-0000-000000000003.parquet}=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=474, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=4, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=0, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=632, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=3, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/1/00000000-0000-0000-0000-000000000003.parquet], predicate=state = Dictionary(Int32, Utf8("MA")), pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, projection=[area, city, max_temp, min_temp, state, time], metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning=1.234ms] | | | | ---------- diff --git a/query_tests/cases/in/pushdown.expected b/query_tests/cases/in/pushdown.expected index 3c1c6001ef..2fa10056e9 100644 --- a/query_tests/cases/in/pushdown.expected +++ b/query_tests/cases/in/pushdown.expected @@ -37,49 +37,49 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200, projection=[count, system, time, town] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200), pruning_predicate=count_max@0 > 200, projection=[count, system, time, town] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200.0; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Float64) > Float64(200) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Float64) > 200 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: CAST(restaurant.count AS Float64) > Float64(200) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: CAST(count@0 AS Float64) > 200 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=CAST(count AS Float64) > Float64(200), projection=[count, system, time, town] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 4.0; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(4) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 4 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(4) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 4 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(4), pruning_predicate=system_max@0 > 4, projection=[count, system, time, town] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury'; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -93,19 +93,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury'; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND town@3 != tewsbury | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2, projection=[count, system, time, town] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence'); -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -118,19 +118,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence'); -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2, projection=[count, system, time, town] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2, projection=[count, system, time, town] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -142,19 +142,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND restaurant.count < UInt64(40000) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND count@0 < 40000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2 AND count_min@5 < 40000, projection=[count, system, time, town] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND restaurant.count < UInt64(40000) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND count@0 < 40000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND town != Dictionary(Int32, Utf8("tewsbury")) AND (system = Float64(5) OR town = Dictionary(Int32, Utf8("lawrence"))) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2 AND count_min@5 < 40000, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and count < 40000; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -168,19 +168,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and count < 40000; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.count > UInt64(200) AND restaurant.count < UInt64(40000) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: count@0 > 200 AND count@0 < 40000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND count_min@1 < 40000, projection=[count, system, time, town] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.count < UInt64(40000) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND count@0 < 40000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count > UInt64(200) AND count < UInt64(40000), pruning_predicate=count_max@0 > 200 AND count_min@1 < 40000, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -195,19 +195,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 4.0 and system < 7.0; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 4 AND system@1 < 7 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(4) AND restaurant.system < Float64(7) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(4), restaurant.system < Float64(7)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 4 AND system@1 < 7 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(4) AND system < Float64(7), pruning_predicate=system_max@0 > 4 AND system_min@1 < 7, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and system < 7.0; -- Results After Sorting +-------+--------+--------------------------------+----------+ @@ -219,19 +219,19 @@ +-------+--------+--------------------------------+----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and system < 7.0; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND system@1 < 7 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND restaurant.system < Float64(7) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.system < Float64(7)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND system@1 < 7 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND system < Float64(7), pruning_predicate=system_max@0 > 5 AND system_min@1 < 7, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system; -- Results After Sorting +-------+--------+--------------------------------+----------+ @@ -242,19 +242,19 @@ +-------+--------+--------------------------------+----------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and town != 'tewsbury' and 7.0 > system; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > restaurant.system | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), Float64(7) > restaurant.system] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND town@3 != tewsbury AND 7 > system@1 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND town != Dictionary(Int32, Utf8("tewsbury")) AND Float64(7) > system, pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading'); -- Results After Sorting +-------+--------+--------------------------------+---------+ @@ -264,19 +264,19 @@ +-------+--------+--------------------------------+---------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading'); -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND count@0 = 632 OR town@3 = reading | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND count@0 = 632 OR town@3 = reading | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != town AND system < Float64(7) AND (count = UInt64(632) OR town = Dictionary(Int32, Utf8("reading"))), pruning_predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00'); -- Results After Sorting ++ diff --git a/query_tests/cases/in/retention.expected b/query_tests/cases/in/retention.expected index 7adfb68667..7bfbcdec45 100644 --- a/query_tests/cases/in/retention.expected +++ b/query_tests/cases/in/retention.expected @@ -42,30 +42,30 @@ +------+------+----------------------+ -- SQL: EXPLAIN SELECT * FROM cpu WHERE host != 'b' ORDER BY host,time; -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.time ASC NULLS LAST | -| | Projection: cpu.host, cpu.load, cpu.time | -| | Filter: cpu.host != Dictionary(Int32, Utf8("b")) | -| | TableScan: cpu projection=[host, load, time], partial_filters=[cpu.host != Dictionary(Int32, Utf8("b"))] | -| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] | -| | CoalescePartitionsExec | -| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: host@0 != b | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | DeduplicateExec: [host@0 ASC,time@2 ASC] | -| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] | -| | SortExec: [host@0 ASC,time@2 ASC] | -| | UnionExec | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: cpu.host ASC NULLS LAST, cpu.time ASC NULLS LAST | +| | Projection: cpu.host, cpu.load, cpu.time | +| | Filter: cpu.host != Dictionary(Int32, Utf8("b")) | +| | TableScan: cpu projection=[host, load, time], partial_filters=[cpu.host != Dictionary(Int32, Utf8("b"))] | +| physical_plan | SortExec: [host@0 ASC NULLS LAST,time@2 ASC NULLS LAST] | +| | CoalescePartitionsExec | +| | ProjectionExec: expr=[host@0 as host, load@1 as load, time@2 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: host@0 != b | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | DeduplicateExec: [host@0 ASC,time@2 ASC] | +| | SortPreservingMergeExec: [host@0 ASC,time@2 ASC] | +| | SortExec: [host@0 ASC,time@2 ASC] | +| | UnionExec | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@2 < -9223372036854775808 OR time@2 > -3600000000000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=host != Dictionary(Int32, Utf8("b")), pruning_predicate=host_min@0 != b OR b != host_max@1, output_ordering=[host@0 ASC, time@2 ASC], projection=[host, load, time] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/query_tests/cases/in/several_chunks.expected b/query_tests/cases/in/several_chunks.expected index 808413a60b..b1f8640a5a 100644 --- a/query_tests/cases/in/several_chunks.expected +++ b/query_tests/cases/in/several_chunks.expected @@ -73,25 +73,25 @@ +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from h2o where time >= to_timestamp('1970-01-01T00:00:00.000000250+00:00'); -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | -| | Filter: h2o.time >= TimestampNanosecond(250, None) | -| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] | -| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: time@4 >= 250 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | UnionExec | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | UnionExec | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | -| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | -| | RecordBatchesExec: batches_groups=1 batches=1 | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: h2o.city, h2o.other_temp, h2o.state, h2o.temp, h2o.time | +| | Filter: h2o.time >= TimestampNanosecond(250, None) | +| | TableScan: h2o projection=[city, other_temp, state, temp, time], partial_filters=[h2o.time >= TimestampNanosecond(250, None)] | +| physical_plan | ProjectionExec: expr=[city@0 as city, other_temp@1 as other_temp, state@2 as state, temp@3 as temp, time@4 as time] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: time@4 >= 250 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | UnionExec | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortPreservingMergeExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | UnionExec | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000001.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, output_ordering=[city@0 ASC, state@2 ASC, time@4 ASC], projection=[city, other_temp, state, temp, time] | +| | DeduplicateExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | SortExec: [city@0 ASC,state@2 ASC,time@4 ASC] | +| | RecordBatchesExec: batches_groups=1 batches=1 | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000002.parquet], predicate=time >= TimestampNanosecond(250, None), pruning_predicate=time_max@0 >= 250, projection=[city, other_temp, state, temp, time] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1030140964..e08575a8db 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -16,8 +16,7 @@ license.workspace = true ### BEGIN HAKARI SECTION [dependencies] -ahash-ca01ad9e24f5d932 = { package = "ahash", version = "0.7", features = ["std"] } -ahash-c38e5c1d305a1b54 = { package = "ahash", version = "0.8", default-features = false, features = ["compile-time-rng", "const-random", "getrandom", "runtime-rng"] } +ahash = { version = "0.8", default-features = false, features = ["compile-time-rng", "const-random", "getrandom", "runtime-rng"] } arrow = { version = "27", features = ["arrow-csv", "arrow-ipc", "arrow-json", "comfy-table", "csv", "dyn_cmp_dict", "ipc", "json", "prettyprint"] } base64 = { version = "0.13", features = ["std"] } bitflags = { version = "1" } @@ -26,11 +25,12 @@ bytes = { version = "1", features = ["std"] } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] } crossbeam-utils = { version = "0.8", features = ["std"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ebb24c5bf46f2af362aebffba2012875b328e799", features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a61615b2949bea9027eefe686613605e135780f2", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] } digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] } either = { version = "1", features = ["use_std"] } fixedbitset = { version = "0.4", features = ["std"] } flatbuffers = { version = "22", features = ["thiserror"] } +flate2 = { version = "1", features = ["miniz_oxide", "rust_backend"] } futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] } futures-core = { version = "0.3", features = ["alloc", "std"] } futures-io = { version = "0.3", features = ["std"] } @@ -39,7 +39,7 @@ futures-task = { version = "0.3", default-features = false, features = ["alloc", futures-util = { version = "0.3", features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "sink", "slab", "std"] } getrandom = { version = "0.2", default-features = false, features = ["std"] } hashbrown-5ef9efb8ec2df382 = { package = "hashbrown", version = "0.12", features = ["ahash", "inline-more", "raw"] } -hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13", features = ["ahash", "inline-more"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13", features = ["ahash", "inline-more", "raw"] } indexmap = { version = "1", default-features = false, features = ["std"] } libc = { version = "0.2", features = ["extra_traits", "std"] } lock_api = { version = "0.4", default-features = false, features = ["arc_lock"] } @@ -87,7 +87,6 @@ zstd-safe = { version = "5", default-features = false, features = ["arrays", "le zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] } [build-dependencies] -ahash-ca01ad9e24f5d932 = { package = "ahash", version = "0.7", features = ["std"] } base64 = { version = "0.13", features = ["std"] } bitflags = { version = "1" } byteorder = { version = "1", features = ["std"] }