chore: Update DataFusion (get median fix and automatic string to timestamp coercion) (#6363)

* chore: Update DataFusion pin to get median fix

* chore: Update for new Expr node

* test: add test for median

* test: add test for coercion of strings to timestamps

* chore: Run cargo hakari tasks

Co-authored-by: CircleCI[bot] <circleci@influxdata.com>
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Andrew Lamb 2022-12-12 07:14:00 -05:00 committed by GitHub
parent 95969ad24f
commit e0ecacf6cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 46 additions and 51 deletions

56
Cargo.lock generated
View File

@ -1244,7 +1244,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1276,7 +1276,6 @@ dependencies = [
"pin-project-lite",
"rand",
"smallvec",
"sqllogictest",
"sqlparser 0.27.0",
"tempfile",
"tokio",
@ -1290,7 +1289,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"arrow",
"chrono",
@ -1302,7 +1301,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1314,7 +1313,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"arrow",
"async-trait",
@ -1329,7 +1328,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1358,7 +1357,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"arrow",
"chrono",
@ -1375,7 +1374,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"arrow",
"datafusion-common",
@ -1386,11 +1385,12 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fbadebb894672f61327a30f77cda2ee88a343b2a#fbadebb894672f61327a30f77cda2ee88a343b2a"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=f2eb3b2bebffe75df06f3e55f2413728e7e19f2c#f2eb3b2bebffe75df06f3e55f2413728e7e19f2c"
dependencies = [
"arrow-schema",
"datafusion-common",
"datafusion-expr",
"log",
"sqlparser 0.27.0",
]
@ -1424,12 +1424,6 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "difference"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
[[package]]
name = "difflib"
version = "0.4.0"
@ -3030,17 +3024,6 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
[[package]]
name = "libtest-mimic"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7b603516767d1ab23d0de09d023e62966c3322f7148297c35cf3d97aa8b37fa"
dependencies = [
"clap 4.0.29",
"termcolor",
"threadpool",
]
[[package]]
name = "link-cplusplus"
version = "1.0.7"
@ -5099,25 +5082,6 @@ dependencies = [
"unicode_categories",
]
[[package]]
name = "sqllogictest"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba41e01d229d7725401de371e323851f82d839d68732a06162405362b60852fe"
dependencies = [
"async-trait",
"difference",
"futures",
"glob",
"humantime",
"itertools",
"libtest-mimic",
"regex",
"tempfile",
"thiserror",
"tracing",
]
[[package]]
name = "sqlparser"
version = "0.27.0"
@ -6371,7 +6335,6 @@ dependencies = [
"bytes",
"cc",
"chrono",
"clap 4.0.29",
"crossbeam-utils",
"crypto-common",
"datafusion",
@ -6391,6 +6354,7 @@ dependencies = [
"hashbrown 0.13.1",
"heck",
"indexmap",
"io-lifetimes",
"libc",
"lock_api",
"log",

View File

@ -114,8 +114,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "28.0.0" }
arrow-flight = { version = "28.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="fbadebb894672f61327a30f77cda2ee88a343b2a", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="fbadebb894672f61327a30f77cda2ee88a343b2a" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="f2eb3b2bebffe75df06f3e55f2413728e7e19f2c", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="f2eb3b2bebffe75df06f3e55f2413728e7e19f2c" }
hashbrown = { version = "0.13.1" }
parquet = { version = "28.0.0" }

View File

@ -635,6 +635,7 @@ impl ExpressionVisitor for RowBasedVisitor {
| Expr::Literal(_)
| Expr::Negative(_)
| Expr::Not(_)
| Expr::Placeholder { .. }
| Expr::QualifiedWildcard { .. }
| Expr::ScalarFunction { .. }
| Expr::ScalarSubquery(_)

View File

@ -25,6 +25,20 @@
+--------+--------------------------------+------+
| west | 1970-01-01T00:00:00.000000150Z | 21 |
+--------+--------------------------------+------+
-- SQL: SELECT * from cpu where time > '1970-01-01T00:00:00'::timestamp ORDER BY time;
+--------+--------------------------------+------+
| region | time | user |
+--------+--------------------------------+------+
| west | 1970-01-01T00:00:00.000000100Z | 23.2 |
| west | 1970-01-01T00:00:00.000000150Z | 21 |
+--------+--------------------------------+------+
-- SQL: SELECT * from cpu where time > '1970-01-01T00:00:00' ORDER BY time;
+--------+--------------------------------+------+
| region | time | user |
+--------+--------------------------------+------+
| west | 1970-01-01T00:00:00.000000100Z | 23.2 |
| west | 1970-01-01T00:00:00.000000150Z | 21 |
+--------+--------------------------------+------+
-- SQL: SELECT "user", region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00');
+------+--------+
| user | region |
@ -43,3 +57,9 @@
+-------+--------+--------------------------------+
| 99 | east | 1970-01-01T00:00:00.000000200Z |
+-------+--------+--------------------------------+
-- SQL: select MEDIAN("user"), region from cpu group by region;
+------------------+--------+
| MEDIAN(cpu.user) | region |
+------------------+--------+
| 22.1 | west |
+------------------+--------+

View File

@ -13,6 +13,12 @@ SELECT "user", region from cpu;
-- predicate on CPU
SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00');
-- predicate on CPU with explicit coercion (cast string to timestamp)
SELECT * from cpu where time > '1970-01-01T00:00:00'::timestamp ORDER BY time;
-- predicate on CPU with automatic coercion (comparing time to string)
SELECT * from cpu where time > '1970-01-01T00:00:00' ORDER BY time;
-- projection and predicate
-- expect that to get a subset of the columns and in the order specified
SELECT "user", region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00');
@ -20,6 +26,8 @@ SELECT "user", region from cpu where time > to_timestamp('1970-01-01T00:00:00.00
-- basic grouping
SELECT count(*) from cpu group by region;
-- select from a different measurement
SELECT * from disk;
-- MEDIAN should work
select MEDIAN("user"), region from cpu group by region;

View File

@ -23,10 +23,9 @@ bitflags = { version = "1" }
byteorder = { version = "1", features = ["std"] }
bytes = { version = "1", features = ["std"] }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] }
clap = { version = "4", features = ["color", "derive", "env", "error-context", "help", "std", "suggestions", "usage"] }
crossbeam-utils = { version = "0.8", features = ["std"] }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fbadebb894672f61327a30f77cda2ee88a343b2a", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "f2eb3b2bebffe75df06f3e55f2413728e7e19f2c", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] }
digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] }
either = { version = "1", features = ["use_std"] }
fixedbitset = { version = "0.4", features = ["std"] }
@ -137,6 +136,7 @@ url = { version = "2" }
uuid = { version = "1", features = ["getrandom", "rng", "std", "v4"] }
[target.x86_64-unknown-linux-gnu.dependencies]
io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] }
@ -144,6 +144,7 @@ rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "t
once_cell = { version = "1", default-features = false, features = ["unstable"] }
[target.x86_64-apple-darwin.dependencies]
io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] }
@ -151,6 +152,7 @@ rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "t
once_cell = { version = "1", default-features = false, features = ["unstable"] }
[target.aarch64-apple-darwin.dependencies]
io-lifetimes = { version = "1", features = ["close", "libc", "windows-sys"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.36", features = ["fs", "io-lifetimes", "libc", "std", "termios", "use-libc-auxv"] }