refactor: upgrade DataFusion to `19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8` (#7813)

I need:

- https://github.com/apache/arrow-datafusion/pull/6226.

Changes in code due to:

- https://github.com/apache/arrow-datafusion/pull/6332

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Marco Neumann 2023-05-17 15:57:12 +02:00 committed by GitHub
parent 1ff11d0856
commit 62fed73bcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 43 additions and 35 deletions

18
Cargo.lock generated
View File

@ -1441,7 +1441,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion" name = "datafusion"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"ahash 0.8.3", "ahash 0.8.3",
"arrow", "arrow",
@ -1490,7 +1490,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-common" name = "datafusion-common"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@ -1504,7 +1504,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-execution" name = "datafusion-execution"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"dashmap", "dashmap",
"datafusion-common", "datafusion-common",
@ -1521,7 +1521,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-expr" name = "datafusion-expr"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"ahash 0.8.3", "ahash 0.8.3",
"arrow", "arrow",
@ -1532,7 +1532,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-optimizer" name = "datafusion-optimizer"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"arrow", "arrow",
"async-trait", "async-trait",
@ -1549,7 +1549,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-physical-expr" name = "datafusion-physical-expr"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"ahash 0.8.3", "ahash 0.8.3",
"arrow", "arrow",
@ -1581,7 +1581,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-proto" name = "datafusion-proto"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"arrow", "arrow",
"chrono", "chrono",
@ -1595,7 +1595,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-row" name = "datafusion-row"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"arrow", "arrow",
"datafusion-common", "datafusion-common",
@ -1606,7 +1606,7 @@ dependencies = [
[[package]] [[package]]
name = "datafusion-sql" name = "datafusion-sql"
version = "24.0.0" version = "24.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2#e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" source = "git+https://github.com/apache/arrow-datafusion.git?rev=19b03240920ad63cac916b42951754c0337bdac8#19b03240920ad63cac916b42951754c0337bdac8"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-schema", "arrow-schema",

View File

@ -117,8 +117,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies] [workspace.dependencies]
arrow = { version = "39.0.0" } arrow = { version = "39.0.0" }
arrow-flight = { version = "39.0.0" } arrow-flight = { version = "39.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2", default-features = false } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="19b03240920ad63cac916b42951754c0337bdac8", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" } datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="19b03240920ad63cac916b42951754c0337bdac8" }
hashbrown = { version = "0.13.2" } hashbrown = { version = "0.13.2" }
parquet = { version = "39.0.0" } parquet = { version = "39.0.0" }
tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] } tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] }

View File

@ -96,16 +96,16 @@
---------- ----------
| plan_type | plan | | plan_type | plan |
---------- ----------
| Plan with Metrics | CoalescePartitionsExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=10, spill_count=0, spilled_bytes=0] | | Plan with Metrics | CoalescePartitionsExec, metrics=[elapsed_compute=1.234ms, output_rows=10] |
| | UnionExec, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=10, spill_count=0, spilled_bytes=0] | | | UnionExec, metrics=[elapsed_compute=1.234ms, output_rows=10] |
| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=5, spill_count=0, spilled_bytes=0] | | | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=5] |
| | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=5, spill_count=0, spilled_bytes=0] | | | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, output_rows=5] |
| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1219, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | | | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1683, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] |
| | ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=5, spill_count=0, spilled_bytes=0] | | | ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, output_rows=5] |
| | DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, num_dupes=2, output_rows=5, spill_count=0, spilled_bytes=0] | | | DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, num_dupes=2, output_rows=5] |
| | SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | | | SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, output_rows=7] |
| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | | | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=7] |
| | FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, mem_used=0, output_rows=7, spill_count=0, spilled_bytes=0] | | | FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, output_rows=7] |
| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1106, elapsed_compute=1.234ms, mem_used=0, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, spill_count=0, spilled_bytes=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | | | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1532, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] |
| | | | | |
---------- ----------

View File

@ -1118,6 +1118,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
vec![expr], vec![expr],
distinct, distinct,
None, None,
None,
))) )))
} }
"sum" | "stddev" | "mean" | "median" => { "sum" | "stddev" | "mean" | "median" => {
@ -1132,6 +1133,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
vec![expr], vec![expr],
false, false,
None, None,
None,
))) )))
} }
name @ ("first" | "last" | "min" | "max") => { name @ ("first" | "last" | "min" | "max") => {

View File

@ -58,6 +58,7 @@ where
args, args,
distinct, distinct,
filter, filter,
order_by,
}) => Ok(Expr::AggregateFunction(AggregateFunction::new( }) => Ok(Expr::AggregateFunction(AggregateFunction::new(
fun.clone(), fun.clone(),
args.iter() args.iter()
@ -65,6 +66,7 @@ where
.collect::<Result<Vec<Expr>>>()?, .collect::<Result<Vec<Expr>>>()?,
*distinct, *distinct,
filter.clone(), filter.clone(),
order_by.clone(),
))), ))),
Expr::WindowFunction(WindowFunction { Expr::WindowFunction(WindowFunction {
fun, fun,
@ -87,16 +89,20 @@ where
.collect::<Result<Vec<_>>>()?, .collect::<Result<Vec<_>>>()?,
window_frame.clone(), window_frame.clone(),
))), ))),
Expr::AggregateUDF(AggregateUDF { fun, args, filter }) => { Expr::AggregateUDF(AggregateUDF {
Ok(Expr::AggregateUDF(AggregateUDF { fun,
args,
filter,
order_by,
}) => Ok(Expr::AggregateUDF(AggregateUDF {
fun: fun.clone(), fun: fun.clone(),
args: args args: args
.iter() .iter()
.map(|e| clone_with_replacement(e, replacement_fn)) .map(|e| clone_with_replacement(e, replacement_fn))
.collect::<Result<Vec<Expr>>>()?, .collect::<Result<Vec<Expr>>>()?,
filter: filter.clone(), filter: filter.clone(),
})) order_by: order_by.clone(),
} })),
Expr::Alias(nested_expr, alias_name) => Ok(Expr::Alias( Expr::Alias(nested_expr, alias_name) => Ok(Expr::Alias(
Box::new(clone_with_replacement(nested_expr, replacement_fn)?), Box::new(clone_with_replacement(nested_expr, replacement_fn)?),
alias_name.clone(), alias_name.clone(),

View File

@ -30,9 +30,9 @@ bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
crossbeam-utils = { version = "0.8" } crossbeam-utils = { version = "0.8" }
crypto-common = { version = "0.1", default-features = false, features = ["std"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2" } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "19b03240920ad63cac916b42951754c0337bdac8" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "19b03240920ad63cac916b42951754c0337bdac8", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "e6d7e46dedbe5046e4606bfd3d7a1199dd0aaae2", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "19b03240920ad63cac916b42951754c0337bdac8", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
digest = { version = "0.10", features = ["mac", "std"] } digest = { version = "0.10", features = ["mac", "std"] }
either = { version = "1" } either = { version = "1" }
fixedbitset = { version = "0.4" } fixedbitset = { version = "0.4" }