chore: Update datafusion (again) (#4518)
* chore: Update datafusion (again) * refactor: Update ExecutionPlan:execute to not be asyncpull/24376/head
parent
bc5725b1fc
commit
37c7ce793c
|
@ -1195,7 +1195,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6#b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=7304719bb4830c873af32f873ce22f205fef4c77#7304719bb4830c873af32f873ce22f205fef4c77"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
|
@ -1227,7 +1227,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6#b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=7304719bb4830c873af32f873ce22f205fef4c77#7304719bb4830c873af32f873ce22f205fef4c77"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"ordered-float 3.0.0",
|
||||
|
@ -1238,7 +1238,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-data-access"
|
||||
version = "1.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6#b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=7304719bb4830c873af32f873ce22f205fef4c77#7304719bb4830c873af32f873ce22f205fef4c77"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"chrono",
|
||||
|
@ -1251,7 +1251,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-expr"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6#b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=7304719bb4830c873af32f873ce22f205fef4c77#7304719bb4830c873af32f873ce22f205fef4c77"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
|
@ -1262,7 +1262,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-physical-expr"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6#b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=7304719bb4830c873af32f873ce22f205fef4c77#7304719bb4830c873af32f873ce22f205fef4c77"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow",
|
||||
|
@ -1285,7 +1285,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-proto"
|
||||
version = "7.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6#b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=7304719bb4830c873af32f873ce22f205fef4c77#7304719bb4830c873af32f873ce22f205fef4c77"
|
||||
dependencies = [
|
||||
"datafusion 7.0.0",
|
||||
"prost",
|
||||
|
|
|
@ -9,6 +9,6 @@ description = "Re-exports datafusion at a specific version"
|
|||
|
||||
# Rename to workaround doctest bug
|
||||
# Turn off optional datafusion features (e.g. don't get support for crypo functions or avro)
|
||||
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6", default-features = false, package = "datafusion" }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="b7bb2cfba13cc04a08c2f687102dd14a8dedc7b6" }
|
||||
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="7304719bb4830c873af32f873ce22f205fef4c77", default-features = false, package = "datafusion" }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="7304719bb4830c873af32f873ce22f205fef4c77" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -277,7 +277,7 @@ pub async fn test_execute_partition(
|
|||
) -> SendableRecordBatchStream {
|
||||
let session_ctx = SessionContext::new();
|
||||
let task_ctx = Arc::new(TaskContext::from(&session_ctx));
|
||||
plan.execute(partition, task_ctx).await.unwrap()
|
||||
plan.execute(partition, task_ctx).unwrap()
|
||||
}
|
||||
|
||||
/// Execute the specified partition of the [ExecutionPlan] with a
|
||||
|
|
|
@ -198,7 +198,6 @@ impl<T> std::fmt::Debug for SystemTableExecutionPlan<T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: IoxSystemTable + 'static> ExecutionPlan for SystemTableExecutionPlan<T> {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
|
@ -226,7 +225,7 @@ impl<T: IoxSystemTable + 'static> ExecutionPlan for SystemTableExecutionPlan<T>
|
|||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
|
|
|
@ -133,7 +133,6 @@ impl<T> std::fmt::Debug for SystemTableExecutionPlan<T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: IoxSystemTable + 'static> ExecutionPlan for SystemTableExecutionPlan<T> {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
|
@ -161,7 +160,7 @@ impl<T: IoxSystemTable + 'static> ExecutionPlan for SystemTableExecutionPlan<T>
|
|||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
|
|
|
@ -358,7 +358,7 @@ impl IOxSessionContext {
|
|||
let task_context = Arc::new(TaskContext::from(self.inner()));
|
||||
|
||||
self.run(async move {
|
||||
let stream = physical_plan.execute(partition, task_context).await?;
|
||||
let stream = physical_plan.execute(partition, task_context)?;
|
||||
let stream = TracedStream::new(stream, span, physical_plan);
|
||||
Ok(Box::pin(stream) as _)
|
||||
})
|
||||
|
|
|
@ -42,8 +42,6 @@ use std::{
|
|||
sync::Arc,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use arrow::{
|
||||
array::{new_empty_array, StringArray},
|
||||
datatypes::{DataType, Field, Schema, SchemaRef},
|
||||
|
@ -195,7 +193,6 @@ impl Debug for NonNullCheckerExec {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for NonNullCheckerExec {
|
||||
fn as_any(&self) -> &(dyn std::any::Any + 'static) {
|
||||
self
|
||||
|
@ -246,7 +243,7 @@ impl ExecutionPlan for NonNullCheckerExec {
|
|||
}
|
||||
|
||||
/// Execute one partition and return an iterator over RecordBatch
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
|
@ -260,7 +257,7 @@ impl ExecutionPlan for NonNullCheckerExec {
|
|||
}
|
||||
|
||||
let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
|
||||
let input_stream = self.input.execute(partition, context).await?;
|
||||
let input_stream = self.input.execute(partition, context)?;
|
||||
|
||||
let (tx, rx) = mpsc::channel(1);
|
||||
|
||||
|
|
|
@ -312,7 +312,6 @@ fn get_timestamps(metrics: &MetricsSet) -> (Option<DateTime<Utc>>, Option<DateTi
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use async_trait::async_trait;
|
||||
use chrono::TimeZone;
|
||||
use datafusion::{
|
||||
execution::context::TaskContext,
|
||||
|
@ -619,7 +618,6 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for TestExec {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
|
@ -648,7 +646,7 @@ mod tests {
|
|||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_context: Arc<TaskContext>,
|
||||
|
|
|
@ -25,8 +25,6 @@ use std::{
|
|||
sync::Arc,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use arrow::{
|
||||
array::StringArray,
|
||||
datatypes::{DataType, Field, Schema, SchemaRef},
|
||||
|
@ -166,7 +164,6 @@ impl Debug for SchemaPivotExec {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for SchemaPivotExec {
|
||||
fn as_any(&self) -> &(dyn std::any::Any + 'static) {
|
||||
self
|
||||
|
@ -216,7 +213,7 @@ impl ExecutionPlan for SchemaPivotExec {
|
|||
}
|
||||
|
||||
/// Execute one partition and return an iterator over RecordBatch
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
|
@ -232,7 +229,7 @@ impl ExecutionPlan for SchemaPivotExec {
|
|||
|
||||
let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
|
||||
let input_schema = self.input.schema();
|
||||
let input_stream = self.input.execute(partition, context).await?;
|
||||
let input_stream = self.input.execute(partition, context)?;
|
||||
|
||||
// the operation performed in a separate task which is
|
||||
// then sent via a channel to the output
|
||||
|
|
|
@ -6,8 +6,6 @@ use std::{
|
|||
sync::Arc,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use arrow::{
|
||||
array::{Array, ArrayRef, BooleanArray},
|
||||
compute::{self, filter_record_batch},
|
||||
|
@ -32,7 +30,8 @@ use datafusion::{
|
|||
use datafusion_util::AdapterStream;
|
||||
use futures::StreamExt;
|
||||
use observability_deps::tracing::*;
|
||||
use tokio::sync::{mpsc::UnboundedSender, Mutex};
|
||||
use parking_lot::Mutex;
|
||||
use tokio::sync::mpsc::UnboundedSender;
|
||||
|
||||
/// Implements stream splitting described in `make_stream_split`
|
||||
///
|
||||
|
@ -145,7 +144,6 @@ impl Debug for StreamSplitExec {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for StreamSplitExec {
|
||||
fn as_any(&self) -> &(dyn std::any::Any + 'static) {
|
||||
self
|
||||
|
@ -194,15 +192,15 @@ impl ExecutionPlan for StreamSplitExec {
|
|||
///
|
||||
/// * partition 0 are the rows for which the split_expr evaluates to true
|
||||
/// * partition 1 are the rows for which the split_expr does not evaluate to true (e.g. Null or false)
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
debug!(partition, "Start SplitExec::execute");
|
||||
self.start_if_needed(context).await?;
|
||||
self.start_if_needed(context)?;
|
||||
|
||||
let mut state = self.state.lock().await;
|
||||
let mut state = self.state.lock();
|
||||
match &mut (*state) {
|
||||
State::New => panic!("should have been initialized"),
|
||||
State::Running { stream0, stream1 } => {
|
||||
|
@ -241,8 +239,8 @@ impl ExecutionPlan for StreamSplitExec {
|
|||
|
||||
impl StreamSplitExec {
|
||||
/// if in State::New, sets up the output running and sets self.state --> `Running`
|
||||
async fn start_if_needed(&self, context: Arc<TaskContext>) -> Result<()> {
|
||||
let mut state = self.state.lock().await;
|
||||
fn start_if_needed(&self, context: Arc<TaskContext>) -> Result<()> {
|
||||
let mut state = self.state.lock();
|
||||
if matches!(*state, State::Running { .. }) {
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -258,7 +256,7 @@ impl StreamSplitExec {
|
|||
|
||||
trace!("Setting up SplitStreamExec state");
|
||||
|
||||
let input_stream = self.input.execute(0, context).await?;
|
||||
let input_stream = self.input.execute(0, context)?;
|
||||
let (tx0, rx0) = tokio::sync::mpsc::unbounded_channel();
|
||||
let (tx1, rx1) = tokio::sync::mpsc::unbounded_channel();
|
||||
let split_expr = Arc::clone(&self.split_expr);
|
||||
|
|
|
@ -8,7 +8,6 @@ use arrow::{
|
|||
error::{ArrowError, Result as ArrowResult},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use datafusion_util::{watch::watch_task, AdapterStream};
|
||||
|
||||
pub use self::algo::RecordBatchDeduplicator;
|
||||
|
@ -140,7 +139,6 @@ impl DeduplicateMetrics {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for DeduplicateExec {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
|
@ -183,7 +181,7 @@ impl ExecutionPlan for DeduplicateExec {
|
|||
Ok(Arc::new(Self::new(input, self.sort_keys.clone())))
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
|
@ -197,7 +195,7 @@ impl ExecutionPlan for DeduplicateExec {
|
|||
}
|
||||
let deduplicate_metrics = DeduplicateMetrics::new(&self.metrics, partition);
|
||||
|
||||
let input_stream = self.input.execute(0, context).await?;
|
||||
let input_stream = self.input.execute(0, context)?;
|
||||
|
||||
// the deduplication is performed in a separate task which is
|
||||
// then sent via a channel to the output
|
||||
|
@ -1119,7 +1117,6 @@ mod test {
|
|||
batches: Vec<ArrowResult<RecordBatch>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for DummyExec {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
|
@ -1148,7 +1145,7 @@ mod test {
|
|||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
_context: Arc<TaskContext>,
|
||||
|
@ -1157,19 +1154,19 @@ mod test {
|
|||
|
||||
debug!(partition, "Start DummyExec::execute");
|
||||
|
||||
// ensure there is space to queue up the channel
|
||||
let (tx, rx) = mpsc::channel(self.batches.len());
|
||||
// queue them all up
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
|
||||
// queue up all the results
|
||||
for r in &self.batches {
|
||||
match r {
|
||||
Ok(batch) => tx.send(Ok(batch.clone())).await.unwrap(),
|
||||
Err(e) => tx.send(Err(clone_error(e))).await.unwrap(),
|
||||
Ok(batch) => tx.send(Ok(batch.clone())).unwrap(),
|
||||
Err(e) => tx.send(Err(clone_error(e))).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
debug!(partition, "End DummyExec::execute");
|
||||
Ok(AdapterStream::adapt(self.schema(), rx))
|
||||
Ok(AdapterStream::adapt_unbounded(self.schema(), rx))
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
|
|
|
@ -20,8 +20,6 @@ use schema::Schema;
|
|||
use crate::{exec::IOxSessionContext, QueryChunk};
|
||||
use predicate::Predicate;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use super::adapter::SchemaAdapterStream;
|
||||
|
||||
/// Implements the DataFusion physical plan interface
|
||||
|
@ -62,7 +60,6 @@ impl IOxReadFilterNode {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for IOxReadFilterNode {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
|
@ -108,7 +105,7 @@ impl ExecutionPlan for IOxReadFilterNode {
|
|||
Ok(Arc::new(new_self))
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
_context: Arc<TaskContext>,
|
||||
|
|
Loading…
Reference in New Issue