chore: Update datafusion (to get fix for pruning bug) (#5339)

* chore: Update datafusion

* chore: Update AggregateSelector API
pull/24376/head
Andrew Lamb 2022-08-08 08:28:21 -04:00 committed by GitHub
parent 5f407ec8cd
commit f3913f89e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 41 deletions

38
Cargo.lock generated
View File

@ -1089,7 +1089,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"ahash",
"arrow",
@ -1117,7 +1117,7 @@ dependencies = [
"pin-project-lite",
"rand",
"smallvec",
"sqlparser 0.19.0",
"sqlparser",
"tempfile",
"tokio",
"tokio-stream",
@ -1128,30 +1128,31 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"arrow",
"object_store",
"ordered-float 3.0.0",
"parquet",
"sqlparser 0.19.0",
"serde_json",
"sqlparser",
]
[[package]]
name = "datafusion-expr"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"ahash",
"arrow",
"datafusion-common",
"sqlparser 0.19.0",
"sqlparser",
]
[[package]]
name = "datafusion-optimizer"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"arrow",
"async-trait",
@ -1166,7 +1167,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"ahash",
"arrow",
@ -1190,7 +1191,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"arrow",
"datafusion 10.0.0",
@ -1203,7 +1204,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"arrow",
"datafusion-common",
@ -1214,14 +1215,14 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "10.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2#9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=6e6f3bf9dec82b332e21f11699552c34f72493ac#6e6f3bf9dec82b332e21f11699552c34f72493ac"
dependencies = [
"ahash",
"arrow",
"datafusion-common",
"datafusion-expr",
"hashbrown",
"sqlparser 0.19.0",
"sqlparser",
"tokio",
]
@ -2143,7 +2144,7 @@ version = "0.1.0"
dependencies = [
"generated_types",
"snafu",
"sqlparser 0.20.0",
"sqlparser",
"workspace-hack",
]
@ -3601,7 +3602,7 @@ dependencies = [
"schema",
"serde_json",
"snafu",
"sqlparser 0.20.0",
"sqlparser",
"test_helpers",
"workspace-hack",
]
@ -4866,15 +4867,6 @@ dependencies = [
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8ec7ef1bad82a2453dbaef7218b6f036e545edcce1ffd55f6e7af7bea43cce2"
dependencies = [
"log",
]
[[package]]
name = "sqlparser"
version = "0.20.0"

View File

@ -9,6 +9,6 @@ description = "Re-exports datafusion at a specific version"
# Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. don't get support for crypto functions or avro)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2", default-features = false, package = "datafusion" }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="9a5f17e6bbc9b8acbb0d0e584ebc9ade291622f2" }
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e6f3bf9dec82b332e21f11699552c34f72493ac", default-features = false, package = "datafusion" }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e6f3bf9dec82b332e21f11699552c34f72493ac" }
workspace-hack = { path = "../workspace-hack"}

View File

@ -19,7 +19,7 @@ use std::{fmt::Debug, sync::Arc};
use arrow::{array::ArrayRef, datatypes::DataType};
use datafusion::{
error::{DataFusionError, Result as DataFusionResult},
logical_expr::{Signature, Volatility},
logical_expr::{AggregateState, Signature, Volatility},
physical_plan::{udaf::AggregateUDF, Accumulator},
scalar::ScalarValue,
};
@ -163,7 +163,7 @@ trait Selector: Debug + Default + Send + Sync {
fn value_data_type() -> DataType;
/// return state in a form that DataFusion can store during execution
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>>;
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>>;
/// produces the final value of this selector for the specified output type
fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue>;
@ -259,7 +259,7 @@ where
// this function serializes our state to a vector of
// `ScalarValue`s, which DataFusion uses to pass this state
// between execution stages.
fn state(&self) -> DataFusionResult<Vec<ScalarValue>> {
fn state(&self) -> DataFusionResult<Vec<AggregateState>> {
self.selector.datafusion_state()
}

View File

@ -19,7 +19,10 @@ use arrow::{
},
datatypes::DataType,
};
use datafusion::{error::Result as DataFusionResult, scalar::ScalarValue};
use datafusion::{
error::Result as DataFusionResult, logical_expr::AggregateState, scalar::ScalarValue,
};
use observability_deps::tracing::debug;
use super::{Selector, SelectorOutput};
@ -116,10 +119,10 @@ macro_rules! make_first_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
])
}
@ -223,10 +226,10 @@ macro_rules! make_last_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
])
}
@ -354,10 +357,10 @@ macro_rules! make_min_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
])
}
@ -466,10 +469,10 @@ macro_rules! make_max_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
Ok(vec![
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
])
}