From de79619e714e7cda3d220a4f8fc1518f2b07c4d7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Jul 2023 10:41:00 -0500 Subject: [PATCH] chore: Update datafusion (#8355) * chore: Update datafusion pin * fix: Update for change in API * chore: Update plan --------- Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- Cargo.lock | 70 +++++++------------ Cargo.toml | 4 +- .../query_tests/cases/in/gapfill.sql.expected | 2 +- .../src/logical_optimizer/handle_gapfill.rs | 4 +- iox_query_influxql/src/plan/planner.rs | 8 ++- workspace-hack/Cargo.toml | 7 +- 6 files changed, 40 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2403afb813..21e684f036 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1374,8 +1374,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "ahash", "arrow", @@ -1392,11 +1392,11 @@ dependencies = [ "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", - "datafusion-row", "datafusion-sql", "flate2", "futures", "glob", + "half 2.3.1", "hashbrown 0.14.0", "indexmap 2.0.0", "itertools 0.11.0", @@ -1410,7 +1410,7 @@ dependencies = [ "pin-project-lite", "rand", "smallvec", - "sqlparser 0.35.0", + "sqlparser", "tempfile", "tokio", "tokio-util", @@ -1422,8 +1422,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "arrow", "arrow-array", @@ -1431,13 +1431,13 @@ dependencies = [ "num_cpus", "object_store", "parquet", - "sqlparser 0.35.0", + "sqlparser", ] [[package]] name = "datafusion-execution" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "dashmap", "datafusion-common", @@ -1453,22 +1453,22 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "ahash", "arrow", "datafusion-common", "lazy_static", - "sqlparser 0.35.0", + "sqlparser", "strum 0.25.0", "strum_macros 0.25.1", ] [[package]] name = "datafusion-optimizer" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "arrow", "async-trait", @@ -1484,8 +1484,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "ahash", "arrow", @@ -1498,7 +1498,6 @@ dependencies = [ "chrono", "datafusion-common", "datafusion-expr", - "datafusion-row", "half 2.3.1", "hashbrown 0.14.0", "hex", @@ -1519,8 +1518,8 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "arrow", "chrono", @@ -1531,28 +1530,17 @@ dependencies = [ "prost", ] -[[package]] -name = "datafusion-row" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" -dependencies = [ - "arrow", - "datafusion-common", - "paste", - "rand", -] - [[package]] name = "datafusion-sql" -version = "27.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=44008d71180f2d03e9d21944788e61cb8845abc7#44008d71180f2d03e9d21944788e61cb8845abc7" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2cf5f5b5bb824598de185d64c541c52c930728cf#2cf5f5b5bb824598de185d64c541c52c930728cf" dependencies = [ "arrow", "arrow-schema", "datafusion-common", "datafusion-expr", "log", - "sqlparser 0.35.0", + "sqlparser", ] [[package]] @@ -2639,7 +2627,7 @@ version = "0.1.0" dependencies = [ "generated_types", "snafu", - "sqlparser 0.36.1", + "sqlparser", "workspace-hack", ] @@ -4232,7 +4220,7 @@ dependencies = [ "query_functions", "schema", "snafu", - "sqlparser 0.36.1", + "sqlparser", "test_helpers", "workspace-hack", ] @@ -5304,16 +5292,6 @@ dependencies = [ "unicode_categories", ] -[[package]] -name = "sqlparser" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca597d77c98894be1f965f2e4e2d2a61575d4998088e655476c73715c54b2b43" -dependencies = [ - "log", - "sqlparser_derive", -] - [[package]] name = "sqlparser" version = "0.36.1" @@ -5321,6 +5299,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" dependencies = [ "log", + "sqlparser_derive", ] [[package]] @@ -6896,6 +6875,7 @@ dependencies = [ "sha2", "similar", "smallvec", + "sqlparser", "sqlx", "sqlx-core", "sqlx-macros", diff --git a/Cargo.toml b/Cargo.toml index 95909dfe86..93bd0fc959 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,8 +121,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "43.0.0" } arrow-flight = { version = "43.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf" } hashbrown = { version = "0.14.0" } object_store = { version = "0.6.0" } diff --git a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected index f0d0654db6..f540b17fdf 100644 --- a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected +++ b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected @@ -225,7 +225,7 @@ Error during planning: gap-filling query is missing lower time bound +--------+----------------------+--------------------+ -- SQL: SELECT date_bin_gapfill(INTERVAL '1 minute', time) as _time, pod, locf(selector_last(image, time)) FROM (VALUES ('2023-06-10T12:00:00Z'::timestamp, 'pod1', 'imageA'), ('2023-06-10T12:00:00Z'::timestamp, 'pod2', 'imageA'), ('2023-06-10T12:00:01Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod2', 'imageB') ) AS data(time, pod, image) WHERE time >= timestamp '2023-06-10T11:55:00Z' AND time < timestamp '2023-06-10T12:05:00Z' GROUP BY _time, pod; +----------------------+------+--------------------------------------------+ -| _time | pod | locf(selector_last(image,time)) | +| _time | pod | locf(selector_last(data.image,data.time)) | +----------------------+------+--------------------------------------------+ | 2023-06-10T11:55:00Z | pod1 | | | 2023-06-10T11:56:00Z | pod1 | | diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs index efc5f987c9..d4290f7ea7 100644 --- a/iox_query/src/logical_optimizer/handle_gapfill.rs +++ b/iox_query/src/logical_optimizer/handle_gapfill.rs @@ -198,7 +198,9 @@ fn build_gapfill_node( let time_column = col(new_aggr_plan.schema().fields()[date_bin_gapfill_index].qualified_column()); - let aggr = Aggregate::try_from_plan(&new_aggr_plan)?; + let LogicalPlan::Aggregate(aggr) = &new_aggr_plan else { + return Err(DataFusionError::Internal(format!("Expected Aggregate plan, got {}", new_aggr_plan.display()))); + }; let mut new_group_expr: Vec<_> = aggr .schema .fields() diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs index 7ebfcc8be4..8529a97475 100644 --- a/iox_query_influxql/src/plan/planner.rs +++ b/iox_query_influxql/src/plan/planner.rs @@ -27,7 +27,7 @@ use arrow::record_batch::RecordBatch; use chrono_tz::Tz; use datafusion::catalog::TableReference; use datafusion::common::tree_node::{TreeNode, VisitRecursion}; -use datafusion::common::{DFSchema, DFSchemaRef, Result, ScalarValue, ToDFSchema}; +use datafusion::common::{DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, ToDFSchema}; use datafusion::datasource::{provider_as_source, MemTable}; use datafusion::logical_expr::expr::{Alias, ScalarFunction}; use datafusion::logical_expr::expr_rewriter::normalize_col; @@ -36,7 +36,7 @@ use datafusion::logical_expr::logical_plan::Analyze; use datafusion::logical_expr::utils::{expr_as_column_expr, find_aggregate_exprs}; use datafusion::logical_expr::{ binary_expr, col, date_bin, expr, expr::WindowFunction, lit, lit_timestamp_nano, now, union, - window_function, Aggregate, AggregateFunction, AggregateUDF, Between, BuiltInWindowFunction, + window_function, AggregateFunction, AggregateUDF, Between, BuiltInWindowFunction, BuiltinScalarFunction, EmptyRelation, Explain, Expr, ExprSchemable, Extension, GetIndexedField, LogicalPlan, LogicalPlanBuilder, Operator, PlanType, Projection, ScalarUDF, TableSource, ToStringifiedPlan, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -2924,7 +2924,9 @@ fn build_gap_fill_node( } }; - let aggr = Aggregate::try_from_plan(&input)?; + let LogicalPlan::Aggregate(aggr) = &input else { + return Err(DataFusionError::Internal(format!("Expected Aggregate plan, got {}", input.display()))); + }; let mut new_group_expr: Vec<_> = aggr .schema .fields() diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 217a01eb44..cf612bea13 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -28,9 +28,9 @@ bytes = { version = "1" } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] } crossbeam-utils = { version = "0.8" } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } -datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2cf5f5b5bb824598de185d64c541c52c930728cf", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1", features = ["serde"] } fixedbitset = { version = "0.4" } @@ -77,6 +77,7 @@ serde_json = { version = "1", features = ["raw_value"] } sha2 = { version = "0.10" } similar = { version = "2", features = ["inline"] } smallvec = { version = "1", default-features = false, features = ["union"] } +sqlparser = { version = "0.36", features = ["visitor"] } sqlx = { version = "0.7", features = ["postgres", "runtime-tokio-rustls", "sqlite", "uuid"] } sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "any", "json", "migrate", "offline", "uuid"] } sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }