chore: Update DataFusion pin (#8390)

* chore: Update DataFusion pin

* chore: Update for API

* fix: update plans

---------

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Andrew Lamb 2023-08-02 09:58:16 -05:00 committed by GitHub
parent 3969b40925
commit 6e13ff8cb8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 88 additions and 86 deletions

31
Cargo.lock generated
View File

@ -699,7 +699,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
dependencies = [
"memchr",
"regex-automata 0.3.4",
"regex-automata 0.3.3",
"serde",
]
@ -778,12 +778,11 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.80"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51f1226cd9da55587234753d1245dd5b132343ea240f26b6a9003d68706141ba"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
dependencies = [
"jobserver",
"libc",
]
[[package]]
@ -1376,7 +1375,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"ahash",
"arrow",
@ -1424,7 +1423,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"arrow",
"arrow-array",
@ -1438,7 +1437,7 @@ dependencies = [
[[package]]
name = "datafusion-execution"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"dashmap",
"datafusion-common",
@ -1455,7 +1454,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"ahash",
"arrow",
@ -1469,7 +1468,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"arrow",
"async-trait",
@ -1486,7 +1485,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"ahash",
"arrow",
@ -1520,7 +1519,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"arrow",
"chrono",
@ -1534,7 +1533,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "28.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=5faa10b2911ecca4c2199f78ae675363c7d8230e#5faa10b2911ecca4c2199f78ae675363c7d8230e"
dependencies = [
"arrow",
"arrow-schema",
@ -4569,7 +4568,7 @@ checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.3.4",
"regex-automata 0.3.3",
"regex-syntax 0.7.4",
]
@ -4584,9 +4583,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.3.4"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
dependencies = [
"aho-corasick",
"memchr",
@ -6866,7 +6865,7 @@ dependencies = [
"rand",
"rand_core",
"regex",
"regex-automata 0.3.4",
"regex-automata 0.3.3",
"regex-syntax 0.7.4",
"reqwest",
"ring",

View File

@ -121,8 +121,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "43.0.0" }
arrow-flight = { version = "43.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "5faa10b2911ecca4c2199f78ae675363c7d8230e", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "5faa10b2911ecca4c2199f78ae675363c7d8230e" }
hashbrown = { version = "0.14.0" }
object_store = { version = "0.6.0" }

View File

@ -1,11 +1,11 @@
-- Test Setup: OneMeasurementWithTags
-- SQL: SELECT count(time), count(*), count(bar), min(bar), max(bar), min(time), max(time) FROM cpu;
-- Results After Sorting
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
| COUNT(cpu.time) | COUNT(UInt8(1)) | COUNT(cpu.bar) | MIN(cpu.bar) | MAX(cpu.bar) | MIN(cpu.time) | MAX(cpu.time) |
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
| 4 | 4 | 4 | 1.0 | 2.0 | 1970-01-01T00:00:00.000000010Z | 1970-01-01T00:00:00.000000040Z |
+-----------------+-----------------+----------------+--------------+--------------+--------------------------------+--------------------------------+
+-----------------+----------+----------------+--------------+--------------+--------------------------------+--------------------------------+
| COUNT(cpu.time) | COUNT(*) | COUNT(cpu.bar) | MIN(cpu.bar) | MAX(cpu.bar) | MIN(cpu.time) | MAX(cpu.time) |
+-----------------+----------+----------------+--------------+--------------+--------------------------------+--------------------------------+
| 4 | 4 | 4 | 1.0 | 2.0 | 1970-01-01T00:00:00.000000010Z | 1970-01-01T00:00:00.000000040Z |
+-----------------+----------+----------------+--------------+--------------+--------------------------------+--------------------------------+
-- SQL: SELECT max(foo) FROM cpu;
-- Results After Sorting
+--------------+

View File

@ -8,10 +8,10 @@
+-------------------------+
-- SQL: SELECT count(*), city FROM o2 GROUP BY city;
-- Results After Sorting
+-----------------+--------+
| COUNT(UInt8(1)) | city |
+-----------------+--------+
| 1 | Boston |
| 2 | NYC |
| 2 | |
+-----------------+--------+
+----------+--------+
| COUNT(*) | city |
+----------+--------+
| 1 | Boston |
| 2 | NYC |
| 2 | |
+----------+--------+

View File

@ -46,11 +46,11 @@
| 21.0 | west |
+------+--------+
-- SQL: SELECT count(*) from cpu group by region;
+-----------------+
| COUNT(UInt8(1)) |
+-----------------+
| 2 |
+-----------------+
+----------+
| COUNT(*) |
+----------+
| 2 |
+----------+
-- SQL: SELECT * from disk;
+-------+--------+--------------------------------+
| bytes | region | time |

View File

@ -102,8 +102,8 @@
| | |
----------
-- SQL: select count(*) from h2o;
+-----------------+
| COUNT(UInt8(1)) |
+-----------------+
| 18 |
+-----------------+
+----------+
| COUNT(*) |
+----------+
| 18 |
+----------+

View File

@ -85,11 +85,11 @@
| | |
----------
-- SQL: select count(*) from h2o;
+-----------------+
| COUNT(UInt8(1)) |
+-----------------+
| 18 |
+-----------------+
+----------+
| COUNT(*) |
+----------+
| 18 |
+----------+
-- SQL: EXPLAIN ANALYZE SELECT * from h2o where state = 'MA'
-- Results After Normalizing UUIDs
-- Results After Normalizing Metrics

View File

@ -1,20 +1,20 @@
-- Test Setup: TwentySortedParquetFiles
-- SQL: select count(*), sum(f) from m;
+-----------------+----------+
| COUNT(UInt8(1)) | SUM(m.f) |
+-----------------+----------+
| 21 | 33.0 |
+-----------------+----------+
+----------+----------+
| COUNT(*) | SUM(m.f) |
+----------+----------+
| 21 | 33.0 |
+----------+----------+
-- SQL: EXPLAIN select count(*), sum(f) from m;
-- Results After Normalizing UUIDs
----------
| plan_type | plan |
----------
| logical_plan | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)), SUM(m.f)]] |
| logical_plan | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)) AS COUNT(*), SUM(m.f)]] |
| | TableScan: m projection=[f] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(*), SUM(m.f)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(*), SUM(m.f)] |
| | UnionExec |
| | ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet], [1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet], [1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet]]}, projection=[f] |
| | ProjectionExec: expr=[f@1 as f] |

View File

@ -1,20 +1,20 @@
-- Test Setup: TwentySortedParquetFilesAndIngester
-- SQL: select count(*), sum(f) from m;
+-----------------+----------+
| COUNT(UInt8(1)) | SUM(m.f) |
+-----------------+----------+
| 21 | 33.0 |
+-----------------+----------+
+----------+----------+
| COUNT(*) | SUM(m.f) |
+----------+----------+
| 21 | 33.0 |
+----------+----------+
-- SQL: EXPLAIN select count(*), sum(f) from m;
-- Results After Normalizing UUIDs
----------
| plan_type | plan |
----------
| logical_plan | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)), SUM(m.f)]] |
| logical_plan | Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1)) AS COUNT(*), SUM(m.f)]] |
| | TableScan: m projection=[f] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(*), SUM(m.f)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1)), SUM(m.f)] |
| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(*), SUM(m.f)] |
| | UnionExec |
| | ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet], [1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet], [1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet]]}, projection=[f] |
| | ProjectionExec: expr=[f@1 as f] |

View File

@ -183,29 +183,29 @@ Error during planning: gap-filling query is missing lower time bound
| 2000-05-05T12:40:00Z | 60.0 |
+----------------------+---------------------+
-- SQL: SELECT date_bin_gapfill(interval '4 minutes', time) as four_minute, interpolate(min(cpu.idle)), interpolate(min(cpu."user")), count(*) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z' group by four_minute;
+----------------------+----------------------------+----------------------------+-----------------+
| four_minute | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(UInt8(1)) |
+----------------------+----------------------------+----------------------------+-----------------+
| 2000-05-05T12:16:00Z | | | |
| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 |
| 2000-05-05T12:24:00Z | 67.5 | 24.2 | |
| 2000-05-05T12:28:00Z | 65.0 | 25.2 | 1 |
| 2000-05-05T12:32:00Z | 62.5 | 27.05 | |
| 2000-05-05T12:36:00Z | 60.0 | 28.9 | 1 |
| 2000-05-05T12:40:00Z | | 21.0 | 1 |
+----------------------+----------------------------+----------------------------+-----------------+
+----------------------+----------------------------+----------------------------+----------+
| four_minute | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(*) |
+----------------------+----------------------------+----------------------------+----------+
| 2000-05-05T12:16:00Z | | | |
| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 |
| 2000-05-05T12:24:00Z | 67.5 | 24.2 | |
| 2000-05-05T12:28:00Z | 65.0 | 25.2 | 1 |
| 2000-05-05T12:32:00Z | 62.5 | 27.05 | |
| 2000-05-05T12:36:00Z | 60.0 | 28.9 | 1 |
| 2000-05-05T12:40:00Z | | 21.0 | 1 |
+----------------------+----------------------------+----------------------------+----------+
-- SQL: SELECT date_bin_gapfill(interval '4 minutes 1 nanosecond', time, timestamp '2000-05-05T12:15:59.999999999') as four_minute, interpolate(min(cpu.idle)), interpolate(min(cpu."user")), count(*) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:44:00Z' group by four_minute;
+--------------------------------+----------------------------+----------------------------+-----------------+
| four_minute | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(UInt8(1)) |
+--------------------------------+----------------------------+----------------------------+-----------------+
| 2000-05-05T12:15:59.999999999Z | | | |
| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 |
| 2000-05-05T12:24:00.000000001Z | 67.5 | 24.2 | |
| 2000-05-05T12:28:00.000000002Z | 65.0 | 25.2 | 1 |
| 2000-05-05T12:32:00.000000003Z | 62.5 | 23.1 | |
| 2000-05-05T12:36:00.000000004Z | 60.0 | 21.0 | 2 |
| 2000-05-05T12:40:00.000000005Z | | | |
+--------------------------------+----------------------------+----------------------------+-----------------+
+--------------------------------+----------------------------+----------------------------+----------+
| four_minute | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(*) |
+--------------------------------+----------------------------+----------------------------+----------+
| 2000-05-05T12:15:59.999999999Z | | | |
| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 |
| 2000-05-05T12:24:00.000000001Z | 67.5 | 24.2 | |
| 2000-05-05T12:28:00.000000002Z | 65.0 | 25.2 | 1 |
| 2000-05-05T12:32:00.000000003Z | 62.5 | 23.1 | |
| 2000-05-05T12:36:00.000000004Z | 60.0 | 21.0 | 2 |
| 2000-05-05T12:40:00.000000005Z | | | |
+--------------------------------+----------------------------+----------------------------+----------+
-- SQL: SELECT region, date_bin_gapfill('10 minute', time) as minute, locf(avg(cpu.user)) as locf_avg_user from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by region, minute;
+--------+----------------------+--------------------+
| region | minute | locf_avg_user |

View File

@ -159,6 +159,9 @@ fn is_comparison(op: Operator) -> bool {
Operator::RegexNotMatch => true,
Operator::RegexNotIMatch => true,
Operator::StringConcat => false,
// array containment operators
Operator::ArrowAt => true,
Operator::AtArrow => true,
}
}

View File

@ -28,9 +28,9 @@ bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
crossbeam-utils = { version = "0.8" }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "5faa10b2911ecca4c2199f78ae675363c7d8230e" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "5faa10b2911ecca4c2199f78ae675363c7d8230e", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "5faa10b2911ecca4c2199f78ae675363c7d8230e", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
digest = { version = "0.10", features = ["mac", "std"] }
either = { version = "1", features = ["serde"] }
fixedbitset = { version = "0.4" }