From f3913f89e3b0b6a52eec4c57b18d87788c683969 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 8 Aug 2022 08:28:21 -0400 Subject: [PATCH] chore: Update datafusion (to get fix for pruning bug) (#5339) * chore: Update datafusion * chore: Update AggregateSelector API --- Cargo.lock | 38 +++++++++-------------- datafusion/Cargo.toml | 4 +-- query_functions/src/selectors.rs | 6 ++-- query_functions/src/selectors/internal.rs | 29 +++++++++-------- 4 files changed, 36 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 20bac82b4c..fe3b209128 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1089,7 +1089,7 @@ dependencies = [ [[package]] name = "datafusion" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "ahash", "arrow", @@ -1117,7 +1117,7 @@ dependencies = [ "pin-project-lite", "rand", "smallvec", - "sqlparser 0.19.0", + "sqlparser", "tempfile", "tokio", "tokio-stream", @@ -1128,30 +1128,31 @@ dependencies = [ [[package]] name = "datafusion-common" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "arrow", "object_store", "ordered-float 3.0.0", "parquet", - "sqlparser 0.19.0", + "serde_json", + "sqlparser", ] [[package]] name = "datafusion-expr" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "ahash", "arrow", "datafusion-common", - "sqlparser 0.19.0", + "sqlparser", ] [[package]] name = "datafusion-optimizer" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "arrow", "async-trait", @@ -1166,7 +1167,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "ahash", "arrow", @@ -1190,7 +1191,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "arrow", "datafusion 10.0.0", @@ -1203,7 +1204,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "arrow", "datafusion-common", @@ -1214,14 +1215,14 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "10.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr", "hashbrown", - "sqlparser 0.19.0", + "sqlparser", "tokio", ] @@ -2143,7 +2144,7 @@ version = "0.1.0" dependencies = [ "generated_types", "snafu", - "sqlparser 0.20.0", + "sqlparser", "workspace-hack", ] @@ -3601,7 +3602,7 @@ dependencies = [ "schema", "serde_json", "snafu", - "sqlparser 0.20.0", + "sqlparser", "test_helpers", "workspace-hack", ] @@ -4866,15 +4867,6 @@ dependencies = [ "unicode_categories", ] -[[package]] -name = "sqlparser" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8ec7ef1bad82a2453dbaef7218b6f036e545edcce1ffd55f6e7af7bea43cce2" -dependencies = [ - "log", -] - [[package]] name = "sqlparser" version = "0.20.0" diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index baf6088443..cf50355827 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -9,6 +9,6 @@ description = "Re-exports datafusion at a specific version" # Rename to workaround doctest bug # Turn off optional datafusion features (e.g. don't get support for crypto functions or avro) -upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2", default-features = false, package = "datafusion" } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" } +upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e6f3bf9dec82b332e21f11699552c34f72493ac", default-features = false, package = "datafusion" } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e6f3bf9dec82b332e21f11699552c34f72493ac" } workspace-hack = { path = "../workspace-hack"} diff --git a/query_functions/src/selectors.rs b/query_functions/src/selectors.rs index 0ae6829800..f8a81f0db6 100644 --- a/query_functions/src/selectors.rs +++ b/query_functions/src/selectors.rs @@ -19,7 +19,7 @@ use std::{fmt::Debug, sync::Arc}; use arrow::{array::ArrayRef, datatypes::DataType}; use datafusion::{ error::{DataFusionError, Result as DataFusionResult}, - logical_expr::{Signature, Volatility}, + logical_expr::{AggregateState, Signature, Volatility}, physical_plan::{udaf::AggregateUDF, Accumulator}, scalar::ScalarValue, }; @@ -163,7 +163,7 @@ trait Selector: Debug + Default + Send + Sync { fn value_data_type() -> DataType; /// return state in a form that DataFusion can store during execution - fn datafusion_state(&self) -> DataFusionResult>; + fn datafusion_state(&self) -> DataFusionResult>; /// produces the final value of this selector for the specified output type fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult; @@ -259,7 +259,7 @@ where // this function serializes our state to a vector of // `ScalarValue`s, which DataFusion uses to pass this state // between execution stages. - fn state(&self) -> DataFusionResult> { + fn state(&self) -> DataFusionResult> { self.selector.datafusion_state() } diff --git a/query_functions/src/selectors/internal.rs b/query_functions/src/selectors/internal.rs index bc98ef43c5..4d37f81a53 100644 --- a/query_functions/src/selectors/internal.rs +++ b/query_functions/src/selectors/internal.rs @@ -19,7 +19,10 @@ use arrow::{ }, datatypes::DataType, }; -use datafusion::{error::Result as DataFusionResult, scalar::ScalarValue}; +use datafusion::{ + error::Result as DataFusionResult, logical_expr::AggregateState, scalar::ScalarValue, +}; + use observability_deps::tracing::debug; use super::{Selector, SelectorOutput}; @@ -116,10 +119,10 @@ macro_rules! make_first_selector { $ARROWTYPE } - fn datafusion_state(&self) -> DataFusionResult> { + fn datafusion_state(&self) -> DataFusionResult> { Ok(vec![ - $TO_SCALARVALUE(self.value.clone()), - ScalarValue::TimestampNanosecond(self.time, None), + AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())), + AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)), ]) } @@ -223,10 +226,10 @@ macro_rules! make_last_selector { $ARROWTYPE } - fn datafusion_state(&self) -> DataFusionResult> { + fn datafusion_state(&self) -> DataFusionResult> { Ok(vec![ - $TO_SCALARVALUE(self.value.clone()), - ScalarValue::TimestampNanosecond(self.time, None), + AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())), + AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)), ]) } @@ -354,10 +357,10 @@ macro_rules! make_min_selector { $ARROWTYPE } - fn datafusion_state(&self) -> DataFusionResult> { + fn datafusion_state(&self) -> DataFusionResult> { Ok(vec![ - $TO_SCALARVALUE(self.value.clone()), - ScalarValue::TimestampNanosecond(self.time, None), + AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())), + AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)), ]) } @@ -466,10 +469,10 @@ macro_rules! make_max_selector { $ARROWTYPE } - fn datafusion_state(&self) -> DataFusionResult> { + fn datafusion_state(&self) -> DataFusionResult> { Ok(vec![ - $TO_SCALARVALUE(self.value.clone()), - ScalarValue::TimestampNanosecond(self.time, None), + AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())), + AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)), ]) }