From 9fb1de04287e2078011c78286ca42a5d3b3ea030 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 23 Nov 2022 06:04:14 -0500 Subject: [PATCH] chore: Update datafusion (2 of N) right before arrow 27 upgrade (#6207) * chore: Update datafusion (2 of N) right before arrow 27 upgrade * fix: Update tests for better unsigned pushdown * chore: Run cargo hakari tasks Co-authored-by: CircleCI[bot] --- Cargo.lock | 16 +-- Cargo.toml | 4 +- query_tests/cases/in/pushdown.expected | 186 ++++++++++++------------- workspace-hack/Cargo.toml | 2 +- 4 files changed, 102 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac328a66db..a58ccfe613 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1169,7 +1169,7 @@ dependencies = [ [[package]] name = "datafusion" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "ahash 0.8.2", "arrow", @@ -1213,7 +1213,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "arrow", "chrono", @@ -1225,7 +1225,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "ahash 0.8.2", "arrow", @@ -1237,7 +1237,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "arrow", "async-trait", @@ -1252,7 +1252,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "ahash 0.8.2", "arrow", @@ -1281,7 +1281,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "arrow", "datafusion", @@ -1295,7 +1295,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "arrow", "datafusion-common", @@ -1306,7 +1306,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "14.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=d2814c960168b45c4a0f5d7bbb72d9f412cb08bd#d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=4dcf9857c0aad3d11050037bd46889a54f83a394#4dcf9857c0aad3d11050037bd46889a54f83a394" dependencies = [ "arrow", "datafusion-common", diff --git a/Cargo.toml b/Cargo.toml index f7b6cab4b5..a3d860860b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,8 +111,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "26.0.0" } arrow-flight = { version = "26.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="d2814c960168b45c4a0f5d7bbb72d9f412cb08bd", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="d2814c960168b45c4a0f5d7bbb72d9f412cb08bd" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="4dcf9857c0aad3d11050037bd46889a54f83a394", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="4dcf9857c0aad3d11050037bd46889a54f83a394" } hashbrown = { version = "0.13.1" } parquet = { version = "26.0.0" } diff --git a/query_tests/cases/in/pushdown.expected b/query_tests/cases/in/pushdown.expected index 6ec292ab8d..3c1c6001ef 100644 --- a/query_tests/cases/in/pushdown.expected +++ b/query_tests/cases/in/pushdown.expected @@ -37,19 +37,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64) > Int64(200) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Int64) > 200 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200.0; -- Results After Normalizing UUIDs +---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -93,19 +93,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury'; -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2, projection=[count, system, time, town] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence'); -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -118,19 +118,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence'); -- Results After Normalizing UUIDs -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1 AND system_min@2 <= 5 AND 5 <= system_max@3 OR town_min@0 <= lawrence AND lawrence <= town_max@1, projection=[count, system, time, town] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -142,21 +142,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000; -- Results After Normalizing UUIDs -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) | -| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), CAST(restaurant.count AS Int64) < Int64(40000)] | -| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND town@4 != tewsbury AND system@2 = 5 OR town@4 = lawrence AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 | -| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1 AND system_min@2 <= 5 AND 5 <= system_max@3 OR town_min@0 <= lawrence AND lawrence <= town_max@1, projection=[count, system, time, town] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) AND restaurant.count < UInt64(40000) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), restaurant.count < UInt64(40000)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND count@0 < 40000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 <= 5 AND 5 <= system_max@4 OR town_min@1 <= lawrence AND lawrence <= town_max@2 AND count_min@5 < 40000, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where count > 200 and count < 40000; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -170,21 +168,19 @@ +-------+--------+--------------------------------+-----------+ -- SQL: EXPLAIN SELECT * from restaurant where count > 200 and count < 40000; -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) | -| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), CAST(restaurant.count AS Int64) < Int64(40000)] | -| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 | -| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.count > UInt64(200) AND restaurant.count < UInt64(40000) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.count > UInt64(200), restaurant.count < UInt64(40000)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: count@0 > 200 AND count@0 < 40000 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=count_max@0 > 200 AND count_min@1 < 40000, projection=[count, system, time, town] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0; -- Results After Sorting +-------+--------+--------------------------------+-----------+ @@ -268,38 +264,38 @@ +-------+--------+--------------------------------+---------+ -- SQL: EXPLAIN SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and (count = 632 or town = 'reading'); -- Results After Normalizing UUIDs -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (CAST(restaurant.count AS Int64) = Int64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), CAST(restaurant.count AS Int64) = Int64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND CAST(count@0 AS Int64) = 632 OR town@3 = reading | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: restaurant.system > Float64(5) AND Dictionary(Int32, Utf8("tewsbury")) != restaurant.town AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[restaurant.system > Float64(5), Dictionary(Int32, Utf8("tewsbury")) != restaurant.town, restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND count@0 = 632 OR town@3 = reading | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND count_min@4 <= 632 AND 632 <= count_max@5 OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00'); -- Results After Sorting ++ ++ -- SQL: EXPLAIN SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00'); -- Results After Normalizing UUIDs -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | -| | Filter: Float64(5) < restaurant.system AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND restaurant.system < Float64(7) AND (CAST(restaurant.count AS Int64) = Int64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) AND restaurant.time > TimestampNanosecond(130, None) | -| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[Float64(5) < restaurant.system, restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system < Float64(7), CAST(restaurant.count AS Int64) = Int64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading")), restaurant.time > TimestampNanosecond(130, None)] | -| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | -| | CoalesceBatchesExec: target_batch_size=4096 | -| | FilterExec: 5 < system@1 AND town@3 != tewsbury AND system@1 < 7 AND CAST(count@0 AS Int64) = 632 OR town@3 = reading AND time@2 > 130 | -| | RepartitionExec: partitioning=RoundRobinBatch(4) | -| | EmptyExec: produce_one_row=false | -| | | -+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town | +| | Filter: Float64(5) < restaurant.system AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND restaurant.system < Float64(7) AND (restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading"))) AND restaurant.time > TimestampNanosecond(130, None) | +| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[Float64(5) < restaurant.system, restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system < Float64(7), restaurant.count = UInt64(632) OR restaurant.town = Dictionary(Int32, Utf8("reading")), restaurant.time > TimestampNanosecond(130, None)] | +| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] | +| | CoalesceBatchesExec: target_batch_size=4096 | +| | FilterExec: 5 < system@1 AND town@3 != tewsbury AND system@1 < 7 AND count@0 = 632 OR town@3 = reading AND time@2 > 130 | +| | RepartitionExec: partitioning=RoundRobinBatch(4) | +| | EmptyExec: produce_one_row=false | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -- SQL: SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and town = 'reading'; -- Results After Sorting +-------+--------+--------------------------------+---------+ diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 884c9131b9..8c02281151 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -26,7 +26,7 @@ bytes = { version = "1", features = ["std"] } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] } crossbeam-utils = { version = "0.8", features = ["std"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "d2814c960168b45c4a0f5d7bbb72d9f412cb08bd", features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4dcf9857c0aad3d11050037bd46889a54f83a394", features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] } either = { version = "1", features = ["use_std"] } fixedbitset = { version = "0.4", features = ["std"] }