chore: Update datafusion (to get fix for pruning bug) (#5339)

* chore: Update datafusion

* chore: Update AggregateSelector API
pull/24376/head
Andrew Lamb 2022-08-08 08:28:21 -04:00 committed by GitHub
parent 5f407ec8cd
commit f3913f89e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 41 deletions

38
Cargo.lock generated
View File

@ -1089,7 +1089,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion" name = "datafusion"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"ahash", "ahash",
"arrow", "arrow",
@ -1117,7 +1117,7 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
"rand", "rand",
"smallvec", "smallvec",
"sqlparser 0.19.0", "sqlparser",
"tempfile", "tempfile",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
@ -1128,30 +1128,31 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-common" name = "datafusion-common"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"arrow", "arrow",
"object_store", "object_store",
"ordered-float 3.0.0", "ordered-float 3.0.0",
"parquet", "parquet",
"sqlparser 0.19.0", "serde_json",
"sqlparser",
] ]
[[package]] [[package]]
name = "datafusion-expr" name = "datafusion-expr"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"ahash", "ahash",
"arrow", "arrow",
"datafusion-common", "datafusion-common",
"sqlparser 0.19.0", "sqlparser",
] ]
[[package]] [[package]]
name = "datafusion-optimizer" name = "datafusion-optimizer"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"arrow", "arrow",
"async-trait", "async-trait",
@ -1166,7 +1167,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-physical-expr" name = "datafusion-physical-expr"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"ahash", "ahash",
"arrow", "arrow",
@ -1190,7 +1191,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-proto" name = "datafusion-proto"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"arrow", "arrow",
"datafusion 10.0.0", "datafusion 10.0.0",
@ -1203,7 +1204,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-row" name = "datafusion-row"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"arrow", "arrow",
"datafusion-common", "datafusion-common",
@ -1214,14 +1215,14 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-sql" name = "datafusion-sql"
version = "10.0.0" version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [ dependencies = [
"ahash", "ahash",
"arrow", "arrow",
"datafusion-common", "datafusion-common",
"datafusion-expr", "datafusion-expr",
"hashbrown", "hashbrown",
"sqlparser 0.19.0", "sqlparser",
"tokio", "tokio",
] ]
@ -2143,7 +2144,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"generated_types", "generated_types",
"snafu", "snafu",
"sqlparser 0.20.0", "sqlparser",
"workspace-hack", "workspace-hack",
] ]
@ -3601,7 +3602,7 @@ dependencies = [
"schema", "schema",
"serde_json", "serde_json",
"snafu", "snafu",
"sqlparser 0.20.0", "sqlparser",
"test_helpers", "test_helpers",
"workspace-hack", "workspace-hack",
] ]
@ -4866,15 +4867,6 @@ dependencies = [
"unicode_categories", "unicode_categories",
] ]
[[package]]
name = "sqlparser"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8ec7ef1bad82a2453dbaef7218b6f036e545edcce1ffd55f6e7af7bea43cce2"
dependencies = [
"log",
]
[[package]] [[package]]
name = "sqlparser" name = "sqlparser"
version = "0.20.0" version = "0.20.0"

View File

@ -9,6 +9,6 @@ description = "Re-exports datafusion at a specific version"
# Rename to workaround doctest bug # Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. don't get support for crypto functions or avro) # Turn off optional datafusion features (e.g. don't get support for crypto functions or avro)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2", default-features = false, package = "datafusion" } upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e6f3bf9dec82b332e21f11699552c34f72493ac", default-features = false, package = "datafusion" }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" } datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e6f3bf9dec82b332e21f11699552c34f72493ac" }
workspace-hack = { path = "../workspace-hack"} workspace-hack = { path = "../workspace-hack"}

View File

@ -19,7 +19,7 @@ use std::{fmt::Debug, sync::Arc};
use arrow::{array::ArrayRef, datatypes::DataType}; use arrow::{array::ArrayRef, datatypes::DataType};
use datafusion::{ use datafusion::{
error::{DataFusionError, Result as DataFusionResult}, error::{DataFusionError, Result as DataFusionResult},
logical_expr::{Signature, Volatility}, logical_expr::{AggregateState, Signature, Volatility},
physical_plan::{udaf::AggregateUDF, Accumulator}, physical_plan::{udaf::AggregateUDF, Accumulator},
scalar::ScalarValue, scalar::ScalarValue,
}; };
@ -163,7 +163,7 @@ trait Selector: Debug + Default + Send + Sync {
fn value_data_type() -> DataType; fn value_data_type() -> DataType;
/// return state in a form that DataFusion can store during execution /// return state in a form that DataFusion can store during execution
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>>; fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>>;
/// produces the final value of this selector for the specified output type /// produces the final value of this selector for the specified output type
fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue>; fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue>;
@ -259,7 +259,7 @@ where
// this function serializes our state to a vector of // this function serializes our state to a vector of
// `ScalarValue`s, which DataFusion uses to pass this state // `ScalarValue`s, which DataFusion uses to pass this state
// between execution stages. // between execution stages.
fn state(&self) -> DataFusionResult<Vec<ScalarValue>> { fn state(&self) -> DataFusionResult<Vec<AggregateState>> {
self.selector.datafusion_state() self.selector.datafusion_state()
} }

View File

@ -19,7 +19,10 @@ use arrow::{
}, },
datatypes::DataType, datatypes::DataType,
}; };
use datafusion::{error::Result as DataFusionResult, scalar::ScalarValue}; use datafusion::{
error::Result as DataFusionResult, logical_expr::AggregateState, scalar::ScalarValue,
};
use observability_deps::tracing::debug; use observability_deps::tracing::debug;
use super::{Selector, SelectorOutput}; use super::{Selector, SelectorOutput};
@ -116,10 +119,10 @@ macro_rules! make_first_selector {
$ARROWTYPE $ARROWTYPE
} }
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> { fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![ Ok(vec![
$TO_SCALARVALUE(self.value.clone()), AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
ScalarValue::TimestampNanosecond(self.time, None), AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
]) ])
} }
@ -223,10 +226,10 @@ macro_rules! make_last_selector {
$ARROWTYPE $ARROWTYPE
} }
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> { fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![ Ok(vec![
$TO_SCALARVALUE(self.value.clone()), AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
ScalarValue::TimestampNanosecond(self.time, None), AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
]) ])
} }
@ -354,10 +357,10 @@ macro_rules! make_min_selector {
$ARROWTYPE $ARROWTYPE
} }
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> { fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![ Ok(vec![
$TO_SCALARVALUE(self.value.clone()), AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
ScalarValue::TimestampNanosecond(self.time, None), AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
]) ])
} }
@ -466,10 +469,10 @@ macro_rules! make_max_selector {
$ARROWTYPE $ARROWTYPE
} }
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> { fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![ Ok(vec![
$TO_SCALARVALUE(self.value.clone()), AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
ScalarValue::TimestampNanosecond(self.time, None), AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
]) ])
} }