chore: Upgrade datafusion (#6407)

* chore: Update datafusion

* chore: Update for API change
pull/24376/head
Andrew Lamb 2022-12-15 01:51:35 -05:00 committed by GitHub
parent 8e1bf9cdf7
commit be45889be1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 28 additions and 34 deletions

16
Cargo.lock generated
View File

@ -1274,7 +1274,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1319,7 +1319,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"arrow",
"chrono",
@ -1331,7 +1331,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1343,7 +1343,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"arrow",
"async-trait",
@ -1358,7 +1358,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"ahash 0.8.2",
"arrow",
@ -1387,7 +1387,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"arrow",
"chrono",
@ -1404,7 +1404,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"arrow",
"datafusion-common",
@ -1415,7 +1415,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=119f90f0a1e681639d630fe616bb4599e1d20dee#119f90f0a1e681639d630fe616bb4599e1d20dee"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=40e6a67604514124332bf132020a026bbe5b5903#40e6a67604514124332bf132020a026bbe5b5903"
dependencies = [
"arrow-schema",
"datafusion-common",

View File

@ -114,8 +114,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "29.0.0" }
arrow-flight = { version = "29.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="119f90f0a1e681639d630fe616bb4599e1d20dee", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="119f90f0a1e681639d630fe616bb4599e1d20dee" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="40e6a67604514124332bf132020a026bbe5b5903", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="40e6a67604514124332bf132020a026bbe5b5903" }
hashbrown = { version = "0.13.1" }
parquet = { version = "29.0.0" }

View File

@ -231,8 +231,6 @@ impl ParquetFileReader {
.runtime_env()
.register_object_store("iox", "iox", object_store);
execute_stream(Arc::new(exec), task_ctx)
.await
.context(ExecutingStreamSnafu)
execute_stream(Arc::new(exec), task_ctx).context(ExecutingStreamSnafu)
}
}

View File

@ -23,9 +23,7 @@ use arrow::{
use datafusion::{
error::{DataFusionError, Result as DataFusionResult},
execution::context::SessionState,
logical_expr::{
AccumulatorFunctionImplementation, AggregateState, Signature, TypeSignature, Volatility,
},
logical_expr::{AccumulatorFunctionImplementation, Signature, TypeSignature, Volatility},
physical_plan::{udaf::AggregateUDF, Accumulator},
scalar::ScalarValue,
};
@ -386,7 +384,7 @@ trait Selector: Debug + Default + Send + Sync {
fn value_data_type() -> DataType;
/// return state in a form that DataFusion can store during execution
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>>;
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>>;
/// produces the final value of this selector for the specified output type
fn evaluate(&self, output: &SelectorOutput) -> DataFusionResult<ScalarValue>;
@ -529,7 +527,7 @@ where
// this function serializes our state to a vector of
// `ScalarValue`s, which DataFusion uses to pass this state
// between execution stages.
fn state(&self) -> DataFusionResult<Vec<AggregateState>> {
fn state(&self) -> DataFusionResult<Vec<ScalarValue>> {
self.selector.datafusion_state()
}

View File

@ -19,9 +19,7 @@ use arrow::{
},
datatypes::{DataType, Field},
};
use datafusion::{
error::Result as DataFusionResult, logical_expr::AggregateState, scalar::ScalarValue,
};
use datafusion::{error::Result as DataFusionResult, scalar::ScalarValue};
use observability_deps::tracing::debug;
@ -140,10 +138,10 @@ macro_rules! make_first_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
Ok(vec![
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
])
}
@ -256,10 +254,10 @@ macro_rules! make_last_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
Ok(vec![
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
])
}
@ -396,10 +394,10 @@ macro_rules! make_min_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
Ok(vec![
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
])
}
@ -517,10 +515,10 @@ macro_rules! make_max_selector {
$ARROWTYPE
}
fn datafusion_state(&self) -> DataFusionResult<Vec<AggregateState>> {
fn datafusion_state(&self) -> DataFusionResult<Vec<ScalarValue>> {
Ok(vec![
AggregateState::Scalar($TO_SCALARVALUE(self.value.clone())),
AggregateState::Scalar(ScalarValue::TimestampNanosecond(self.time, None)),
$TO_SCALARVALUE(self.value.clone()),
ScalarValue::TimestampNanosecond(self.time, None),
])
}

View File

@ -27,7 +27,7 @@ bytes = { version = "1", features = ["std"] }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "iana-time-zone", "serde", "std", "winapi"] }
crossbeam-utils = { version = "0.8", features = ["std"] }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "119f90f0a1e681639d630fe616bb4599e1d20dee", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "40e6a67604514124332bf132020a026bbe5b5903", features = ["async-compression", "bzip2", "compression", "crypto_expressions", "flate2", "regex_expressions", "unicode_expressions", "xz2"] }
digest = { version = "0.10", features = ["alloc", "block-buffer", "core-api", "mac", "std", "subtle"] }
either = { version = "1", features = ["use_std"] }
fixedbitset = { version = "0.4", features = ["std"] }