chore: Update datafusion and arrow / parquet / arrow-flight 25.0.0 (#5900)

* chore: Update datafusion and  `arrow` / `parquet` / `arrow-flight` 25.0.0

* chore: Update for structure changes

* chore: Update for new projection pushdown

* chore: Run cargo hakari tasks

* fix: fmt

Co-authored-by: CircleCI[bot] <circleci@influxdata.com>
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Andrew Lamb 2022-10-18 16:58:47 -04:00 committed by GitHub
parent c28ac4a3c3
commit d706f8221d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 135 additions and 175 deletions

45
Cargo.lock generated
View File

@ -100,9 +100,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
version = "24.0.0"
version = "25.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d68391300d5237f6725f0f869ae7cb65d45fcf8a6d18f6ceecd328fb803bef93"
checksum = "76312eb67808c67341f4234861c4fcd2f9868f55e88fa2186ab3b357a6c5830b"
dependencies = [
"ahash 0.8.0",
"arrow-array",
@ -128,9 +128,9 @@ dependencies = [
[[package]]
name = "arrow-array"
version = "24.0.0"
version = "25.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0bb00c5862b5eea683812083c495bef01a9a5149da46ad2f4c0e4aa8800f64d"
checksum = "69dd2c257fa76de0bcc63cabe8c81d34c46ef6fa7651e3e497922c3c9878bd67"
dependencies = [
"ahash 0.8.0",
"arrow-buffer",
@ -144,18 +144,19 @@ dependencies = [
[[package]]
name = "arrow-buffer"
version = "24.0.0"
version = "25.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e594d0fe0026a8bc2459bdc5ac9623e5fb666724a715e0acbc96ba30c5d4cc7"
checksum = "af963e71bdbbf928231d521083ddc8e8068cf5c8d45d4edcfeaf7eb5cdd779a9"
dependencies = [
"half 2.1.0",
"num",
]
[[package]]
name = "arrow-data"
version = "24.0.0"
version = "25.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8500df05060d86fdc53e9b5cb32e51bfeaacc040fdeced3eb99ac0d59200ff45"
checksum = "52554ffff560c366d7210c2621a3cf1dc408f9969a0c7688a3ba0a62248a945d"
dependencies = [
"arrow-buffer",
"arrow-schema",
@ -165,9 +166,9 @@ dependencies = [
[[package]]
name = "arrow-flight"
version = "24.0.0"
version = "25.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5438183d2f3404f57418847a06a3925037916f80ed8aefc904276b41e7f90322"
checksum = "ded4fc9e8a4c7afe216cdd92989f089d7ce47810d974da898b2b42a4b9cec9f3"
dependencies = [
"arrow",
"base64",
@ -183,9 +184,9 @@ dependencies = [
[[package]]
name = "arrow-schema"
version = "24.0.0"
version = "25.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86d1fef01f25e1452c86fa6887f078de8e0aaeeb828370feab205944cfc30e27"
checksum = "1a5518f2bd7775057391f88257627cbb760ba3e1c2f2444a005ba79158624654"
[[package]]
name = "arrow_util"
@ -1194,7 +1195,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"ahash 0.8.0",
"arrow",
@ -1238,7 +1239,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"arrow",
"object_store",
@ -1250,7 +1251,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"ahash 0.8.0",
"arrow",
@ -1262,7 +1263,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"arrow",
"async-trait",
@ -1277,7 +1278,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"ahash 0.8.0",
"arrow",
@ -1301,7 +1302,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"arrow",
"datafusion 13.0.0",
@ -1314,7 +1315,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"arrow",
"datafusion-common",
@ -1325,7 +1326,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "13.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fc5081d48ef59e39c1b353dd45fcd13af6186676#fc5081d48ef59e39c1b353dd45fcd13af6186676"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=feff5dc805449afa4a7ecb5ca88b979ad5739cce#feff5dc805449afa4a7ecb5ca88b979ad5739cce"
dependencies = [
"arrow",
"datafusion-common",
@ -3457,9 +3458,9 @@ dependencies = [
[[package]]
name = "parquet"
version = "24.0.0"
version = "25.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74fd590f0672998df84503d1bcbebc69732583d03cc3495c7dd8d3e5a1d8437f"
checksum = "f7758803135c32b243e52832473fc8f7c768a0a170b0851fb1bb37904c6b3550"
dependencies = [
"ahash 0.8.0",
"arrow",

View File

@ -8,7 +8,7 @@ description = "Apache Arrow utilities"
[dependencies]
ahash = { version = "0.8.0", default-features = false, features = ["runtime-rng"] }
# need dyn_cmp_dict feature for comparing dictionary arrays
arrow = { version = "24.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
arrow = { version = "25.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
# used by arrow anyway (needed for printing workaround)
chrono = { version = "0.4", default-features = false }
comfy-table = { version = "6.1", default-features = false }
@ -19,5 +19,5 @@ snafu = "0.7"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
arrow-flight = "24.0.0"
arrow-flight = "25.0.0"
rand = "0.8.3"

View File

@ -5,7 +5,7 @@ authors = ["Luke Bond <luke.n.bond@gmail.com>"]
edition = "2021"
[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
async-trait = "0.1.57"
backoff = { path = "../backoff" }
bytes = "1.2"

View File

@ -9,6 +9,6 @@ description = "Re-exports datafusion at a specific version"
# Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. don't get support for crypto functions or avro)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="fc5081d48ef59e39c1b353dd45fcd13af6186676", default-features = false, package = "datafusion" }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="fc5081d48ef59e39c1b353dd45fcd13af6186676" }
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="feff5dc805449afa4a7ecb5ca88b979ad5739cce", default-features = false, package = "datafusion" }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="feff5dc805449afa4a7ecb5ca88b979ad5739cce" }
workspace-hack = { path = "../workspace-hack"}

View File

@ -40,7 +40,7 @@ trogging = { path = "../trogging", default-features = false, features = ["clap"]
# Crates.io dependencies, in alphabetical order
nu-ansi-term = "0.46.0"
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
backtrace = "0.3"
bytes = "1.2"
clap = { version = "4", features = ["derive", "env"] }

View File

@ -17,8 +17,8 @@ influxdb_line_protocol = { path = "../influxdb_line_protocol"}
generated_types = { path = "../generated_types", default-features = false, features = ["data_types_conversions"] }
# Crates.io dependencies, in alphabetical order
arrow = { version = "24.0.0", optional = true }
arrow-flight = { version = "24.0.0", optional = true }
arrow = { version = "25.0.0", optional = true }
arrow-flight = { version = "25.0.0", optional = true }
bytes = "1.2"
futures-util = { version = "0.3", optional = true }
prost = "0.11"

View File

@ -5,8 +5,8 @@ authors = ["Nga Tran <nga-tran@live.com>"]
edition = "2021"
[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow-flight = "24.0.0"
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow-flight = "25.0.0"
arrow_util = { path = "../arrow_util" }
async-trait = "0.1.57"
backoff = { path = "../backoff" }

View File

@ -14,7 +14,7 @@ description = "IOx Query Interface and Executor"
# 2. Allow for query logic testing without bringing in all the storage systems.
[dependencies] # In alphabetical order
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
async-trait = "0.1"
chrono = { version = "0.4", default-features = false }

View File

@ -20,14 +20,16 @@ use datafusion::{
datasource::{provider_as_source, MemTable},
error::{DataFusionError, Result as DatafusionResult},
execution::context::ExecutionProps,
logical_expr::{expr_rewriter::ExprRewriter, ExprSchemable, LogicalPlan, LogicalPlanBuilder},
logical_expr::{
expr_rewriter::ExprRewriter, BinaryExpr, ExprSchemable, LogicalPlan, LogicalPlanBuilder,
},
optimizer::expr_simplifier::{ExprSimplifier, SimplifyContext},
physical_expr::create_physical_expr,
physical_plan::{
expressions::{col as physical_col, PhysicalSortExpr},
ExecutionPlan, PhysicalExpr,
},
prelude::{lit, Expr},
prelude::{binary_expr, lit, Expr},
scalar::ScalarValue,
};
@ -211,14 +213,10 @@ impl<'a> ExprRewriter for MissingColumnsToNull<'a> {
// Until then, we need to know what type of expr the column is
// being compared with, so workaround by finding the datatype of the other arg
match expr {
Expr::BinaryExpr { left, op, right } => {
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
let left = self.rewrite_op_arg(*left, &right)?;
let right = self.rewrite_op_arg(*right, &left)?;
Ok(Expr::BinaryExpr {
left: Box::new(left),
op,
right: Box::new(right),
})
Ok(binary_expr(left, op, right))
}
Expr::IsNull(expr) if self.is_null_column(&expr) => Ok(lit(true)),
Expr::IsNotNull(expr) if self.is_null_column(&expr) => Ok(lit(false)),

View File

@ -6,7 +6,7 @@ edition = "2021"
description = "IOx test utils and tests"
[dependencies]
arrow = "24.0.0"
arrow = "25.0.0"
bytes = "1.2"
data_types = { path = "../data_types" }
datafusion = { path = "../datafusion" }

View File

@ -22,7 +22,7 @@ iox_time = { path = "../iox_time" }
trace = { path = "../trace" }
# Crates.io dependencies, in alphabetical order
arrow-flight = "24.0.0"
arrow-flight = "25.0.0"
async-trait = "0.1"
hyper = "0.14"
thiserror = "1.0.37"

View File

@ -5,7 +5,7 @@ edition = "2021"
description = "A mutable arrow RecordBatch"
[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
chrono = { version = "0.4", default-features = false }
data_types = { path = "../data_types" }

View File

@ -5,7 +5,7 @@ authors = ["Nga Tran <nga-tran@live.com>"]
edition = "2021"
[dependencies] # In alphabetical order
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
base64 = "0.13"
bytes = "1.2"
data_types = { path = "../data_types" }
@ -17,7 +17,7 @@ iox_time = { path = "../iox_time" }
object_store = "0.5.1"
observability_deps = { path = "../observability_deps" }
parking_lot = "0.12"
parquet = {version = "24.0.0", features = ["experimental"]}
parquet = {version = "25.0.0", features = ["experimental"]}
pbjson-types = "0.5"
predicate = { path = "../predicate" }
prost = "0.11"

View File

@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
chrono = { version = "0.4", default-features = false }
data_types = { path = "../data_types" }
datafusion = { path = "../datafusion" }

View File

@ -1,5 +1,8 @@
use data_types::{DeleteExpr, Op, Scalar};
use datafusion::prelude::Expr;
use datafusion::{
logical_expr::BinaryExpr,
prelude::{binary_expr, lit, Expr},
};
use snafu::{ResultExt, Snafu};
use std::ops::Deref;
@ -9,11 +12,11 @@ pub(crate) fn expr_to_df(expr: DeleteExpr) -> Expr {
name: expr.column,
};
Expr::BinaryExpr {
left: Box::new(Expr::Column(column)),
op: op_to_df(expr.op),
right: Box::new(Expr::Literal(scalar_to_df(expr.scalar))),
}
binary_expr(
Expr::Column(column),
op_to_df(expr.op),
lit(scalar_to_df(expr.scalar)),
)
}
#[derive(Debug, Snafu)]
@ -37,7 +40,7 @@ pub enum DataFusionToExprError {
pub(crate) fn df_to_expr(expr: Expr) -> Result<DeleteExpr, DataFusionToExprError> {
match expr {
Expr::BinaryExpr { left, op, right } => {
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
let (column, scalar) = match (left.deref(), right.deref()) {
// The delete predicate parser currently only supports `<column><op><value>`, not `<value><op><column>`,
// however this could can easily be extended to support the latter case as well.
@ -126,6 +129,7 @@ mod tests {
use test_helpers::assert_contains;
use super::*;
use datafusion::prelude::col;
#[test]
fn test_roundtrips() {
@ -173,33 +177,14 @@ mod tests {
#[test]
fn test_unsupported_expression() {
let expr = Expr::Not(Box::new(Expr::BinaryExpr {
left: Box::new(Expr::Column(datafusion::prelude::Column {
relation: None,
name: "foo".to_string(),
})),
op: datafusion::logical_expr::Operator::Eq,
right: Box::new(Expr::Literal(datafusion::scalar::ScalarValue::Utf8(Some(
"x".to_string(),
)))),
}));
let expr = (col("foo").eq(lit("x"))).not();
let res = df_to_expr(expr);
assert_contains!(res.unwrap_err().to_string(), "unsupported expression:");
}
#[test]
fn test_unsupported_operants() {
let expr = Expr::BinaryExpr {
left: Box::new(Expr::Column(datafusion::prelude::Column {
relation: None,
name: "foo".to_string(),
})),
op: datafusion::logical_expr::Operator::Eq,
right: Box::new(Expr::Column(datafusion::prelude::Column {
relation: None,
name: "bar".to_string(),
})),
};
let expr = col("foo").eq(col("bar"));
let res = df_to_expr(expr);
assert_contains!(res.unwrap_err().to_string(), "unsupported operants:");
}
@ -220,21 +205,10 @@ mod tests {
#[test]
fn test_unsupported_scalar_value_in_expr() {
let expr = Expr::BinaryExpr {
left: Box::new(Expr::Column(datafusion::prelude::Column {
relation: None,
name: "foo".to_string(),
})),
op: datafusion::logical_expr::Operator::Eq,
right: Box::new(Expr::Literal(datafusion::scalar::ScalarValue::List(
Some(vec![]),
Box::new(Field::new(
"field",
arrow::datatypes::DataType::Float64,
true,
)),
))),
};
let expr = col("foo").eq(lit(datafusion::scalar::ScalarValue::new_list(
Some(vec![]),
arrow::datatypes::DataType::Float64,
)));
let res = df_to_expr(expr);
assert_contains!(res.unwrap_err().to_string(), "unsupported scalar value:");
}
@ -247,16 +221,7 @@ mod tests {
#[test]
fn test_unsupported_operator_in_expr() {
let expr = Expr::BinaryExpr {
left: Box::new(Expr::Column(datafusion::prelude::Column {
relation: None,
name: "foo".to_string(),
})),
op: datafusion::logical_expr::Operator::Like,
right: Box::new(Expr::Literal(datafusion::scalar::ScalarValue::Utf8(Some(
"x".to_string(),
)))),
};
let expr = col("foo").like(lit("x"));
let res = df_to_expr(expr);
assert_contains!(res.unwrap_err().to_string(), "unsupported operator:");
}

View File

@ -2,7 +2,7 @@ use crate::delete_expr::{df_to_expr, expr_to_df};
use chrono::DateTime;
use data_types::{DeleteExpr, DeletePredicate, TimestampRange, Tombstone};
use datafusion::logical_expr::Operator;
use datafusion::prelude::{lit, Column, Expr};
use datafusion::prelude::{binary_expr, lit, Column, Expr};
use snafu::Snafu;
use sqlparser::{
ast::{BinaryOperator, Expr as SqlParserExpr, Ident, Statement, Value},
@ -203,11 +203,7 @@ fn split_members(predicate: &SqlParserExpr, predicates: &mut Vec<DeleteExpr>) ->
_ => return false, // not a literal
};
let expr = Expr::BinaryExpr {
left: Box::new(column),
op,
right: Box::new(value),
};
let expr = binary_expr(column, op, value);
let expr: Result<DeleteExpr, _> = df_to_expr(expr);
match expr {
Ok(expr) => {

View File

@ -23,7 +23,7 @@ use arrow::{
use data_types::{InfluxDbType, TableSummary, TimestampRange};
use datafusion::{
error::DataFusionError,
logical_expr::{binary_expr, utils::expr_to_columns, Operator},
logical_expr::{binary_expr, utils::expr_to_columns, BinaryExpr, Operator},
optimizer::utils::split_conjunction,
physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
prelude::{col, lit_timestamp_nano, Expr},
@ -520,7 +520,7 @@ impl Predicate {
// and op must be a comparison one
pub fn primitive_binary_expr(expr: &Expr) -> bool {
match expr {
Expr::BinaryExpr { left, op, right } => {
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
matches!(
(&**left, &**right),
(Expr::Column(_), Expr::Literal(_)) | (Expr::Literal(_), Expr::Column(_))
@ -553,11 +553,11 @@ impl TryFrom<Expr> for ValueExpr {
/// tries to create a new ValueExpr. If `expr` follows the
/// expected pattrn, returns Ok(Self). If not, returns Err(expr)
fn try_from(expr: Expr) -> Result<Self, Self::Error> {
if let Expr::BinaryExpr {
if let Expr::BinaryExpr(BinaryExpr {
left,
op: _,
right: _,
} = &expr
}) = &expr
{
if let Expr::Column(inner) = left.as_ref() {
if inner.name == VALUE_COLUMN_NAME {
@ -573,7 +573,7 @@ impl ValueExpr {
/// Returns a new [`Expr`] with the reference to the `_value`
/// column replaced with the specified column name
pub fn replace_col(&self, name: &str) -> Expr {
if let Expr::BinaryExpr { left: _, op, right } = &self.expr {
if let Expr::BinaryExpr(BinaryExpr { left: _, op, right }) = &self.expr {
binary_expr(col(name), *op, right.as_ref().clone())
} else {
unreachable!("Unexpected content in ValueExpr")

View File

@ -4,7 +4,7 @@ use datafusion::{
binary_expr,
expr::Case,
expr_rewriter::{ExprRewritable, ExprRewriter},
Operator,
BinaryExpr, Operator,
},
prelude::Expr,
};
@ -114,10 +114,14 @@ fn is_comparison(op: Operator) -> bool {
impl ExprRewriter for IOxExprRewriter {
fn mutate(&mut self, expr: Expr) -> Result<Expr> {
match expr {
Expr::BinaryExpr { left, op, right } if is_case(&left) && is_comparison(op) => {
Expr::BinaryExpr(BinaryExpr { left, op, right })
if is_case(&left) && is_comparison(op) =>
{
Ok(inline_case(true, *left, *right, op))
}
Expr::BinaryExpr { left, op, right } if is_case(&right) && is_comparison(op) => {
Expr::BinaryExpr(BinaryExpr { left, op, right })
if is_case(&right) && is_comparison(op) =>
{
Ok(inline_case(false, *left, *right, op))
}
expr => Ok(expr),
@ -218,8 +222,8 @@ fn is_lit(expr: &Expr) -> bool {
/// returns the column name for an expression like `col = <lit>`
fn is_col_op_lit(expr: &Expr) -> Option<&str> {
match expr {
Expr::BinaryExpr { left, op: _, right } if is_lit(right) => is_col(left),
Expr::BinaryExpr { left, op: _, right } if is_lit(left) => is_col(right),
Expr::BinaryExpr(BinaryExpr { left, op: _, right }) if is_lit(right) => is_col(left),
Expr::BinaryExpr(BinaryExpr { left, op: _, right }) if is_lit(left) => is_col(right),
_ => None,
}
}
@ -251,11 +255,11 @@ impl ExprRewriter for IOxPredicateRewriter {
// true AND true
// true
match expr {
Expr::BinaryExpr {
Expr::BinaryExpr(BinaryExpr {
left,
op: Operator::And,
right,
} => {
}) => {
if let (Some(coll), Some(colr)) = (is_col_not_null(&left), is_col_op_lit(&right)) {
if colr == coll {
return Ok(*right);
@ -268,11 +272,11 @@ impl ExprRewriter for IOxPredicateRewriter {
}
};
Ok(Expr::BinaryExpr {
Ok(Expr::BinaryExpr(BinaryExpr {
left,
op: Operator::And,
right,
})
}))
}
expr => Ok(expr),
}
@ -387,26 +391,18 @@ mod tests {
fn run_case(op: Operator, expect_rewrite: bool, lit1: Expr, lit2: Expr) {
// CASE WHEN tag IS NULL then '' ELSE tag END = 'bar'
let expr = Expr::BinaryExpr {
left: Box::new(make_case(col("tag").is_null(), lit1.clone(), col("tag"))),
let expr = binary_expr(
make_case(col("tag").is_null(), lit1.clone(), col("tag")),
op,
right: Box::new(lit2.clone()),
};
lit2.clone(),
);
// CASE WHEN tag IS NULL then '' = 'bar' ELSE tag = 'bar' END
let expected = if expect_rewrite {
make_case(
col("tag").is_null(),
Expr::BinaryExpr {
left: Box::new(lit1),
op,
right: Box::new(lit2.clone()),
},
Expr::BinaryExpr {
left: Box::new(col("tag")),
op,
right: Box::new(lit2),
},
binary_expr(lit1, op, lit2.clone()),
binary_expr(col("tag"), op, lit2),
)
} else {
expr.clone()

View File

@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
arrow = "24.0.0"
arrow = "25.0.0"
async-trait = "0.1.57"
backoff = { path = "../backoff" }
bytes = "1.2"

View File

@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
chrono = { version = "0.4", default-features = false }
datafusion = { path = "../datafusion" }
itertools = "0.10.5"

View File

@ -6,7 +6,7 @@ edition = "2021"
description = "Tests of the query engine against different database configurations"
[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
async-trait = "0.1"
backoff = { path = "../backoff" }
@ -34,7 +34,7 @@ workspace-hack = { path = "../workspace-hack"}
parquet_file = { version = "0.1.0", path = "../parquet_file" }
[dev-dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
snafu = "0.7"
tempfile = "3.1.0"

View File

@ -135,21 +135,23 @@
| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000;
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")) AND CAST(restaurant.count AS Int64) < Int64(40000) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), CAST(restaurant.count AS Int64) < Int64(40000)] |
| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence AND CAST(count@0 AS Int64) < 40000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | IOxReadFilterNode: table_name=restaurant, chunks=1 predicate=Predicate exprs: [town != Dictionary(Int32, Utf8("tewsbury"))] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) |
| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence")), CAST(restaurant.count AS Int64) < Int64(40000)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND town@4 != tewsbury AND system@2 = 5 OR town@4 = lawrence AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | IOxReadFilterNode: table_name=restaurant, chunks=1 predicate=Predicate exprs: [town != Dictionary(Int32, Utf8("tewsbury"))] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and count < 40000;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -162,21 +164,23 @@
| 872 | 6 | 1970-01-01T00:00:00.000000110Z | lawrence |
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and count < 40000;
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND CAST(restaurant.count AS Int64) < Int64(40000) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), CAST(restaurant.count AS Int64) < Int64(40000)] |
| physical_plan | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 AND CAST(count@0 AS Int64) < 40000 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | IOxReadFilterNode: table_name=restaurant, chunks=1 predicate=Predicate |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64)restaurant.count > Int64(200) AND CAST(restaurant.count AS Int64)restaurant.count < Int64(40000) |
| | Projection: CAST(restaurant.count AS Int64) AS CAST(restaurant.count AS Int64)restaurant.count, restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), CAST(restaurant.count AS Int64) < Int64(40000)] |
| physical_plan | ProjectionExec: expr=[count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | ProjectionExec: expr=[count@1 as count, system@2 as system, time@3 as time, town@4 as town] |
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | IOxReadFilterNode: table_name=restaurant, chunks=1 predicate=Predicate |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+

View File

@ -6,7 +6,7 @@ edition = "2021"
description = "IOx Schema definition"
[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
hashbrown = "0.12"
indexmap = { version = "1.9", features = ["std"] }
itertools = "0.10.5"

View File

@ -19,8 +19,8 @@ trace_http = { path = "../trace_http"}
tracker = { path = "../tracker" }
# Crates.io dependencies, in alphabetical order
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow-flight = "24.0.0"
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow-flight = "25.0.0"
bytes = "1.2"
futures = "0.3"
pin-project = "1.0"

View File

@ -19,7 +19,7 @@ trace_http = { path = "../trace_http"}
tracker = { path = "../tracker" }
# Crates.io dependencies, in alphabetical order
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
async-trait = "0.1"
futures = "0.3"
pin-project = "1.0"

View File

@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies] # In alphabetical order
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
assert_cmd = "2.0.2"
bytes = "1.2"

View File

@ -14,7 +14,7 @@ publish = false
### BEGIN HAKARI SECTION
[dependencies]
ahash-ca01ad9e24f5d932 = { package = "ahash", version = "0.7", features = ["std"] }
arrow = { version = "24", features = ["comfy-table", "csv", "csv_crate", "dyn_cmp_dict", "flatbuffers", "ipc", "json", "prettyprint", "serde_json"] }
arrow = { version = "25", features = ["comfy-table", "csv", "csv_crate", "dyn_cmp_dict", "flatbuffers", "ipc", "json", "prettyprint", "serde_json"] }
base64 = { version = "0.13", features = ["std"] }
bitflags = { version = "1" }
byteorder = { version = "1", features = ["std"] }
@ -45,7 +45,7 @@ num-traits = { version = "0.2", features = ["i128", "libm", "std"] }
object_store = { version = "0.5", default-features = false, features = ["aws", "azure", "base64", "cloud", "gcp", "quick-xml", "rand", "reqwest", "ring", "rustls-pemfile", "serde", "serde_json"] }
once_cell = { version = "1", features = ["alloc", "parking_lot", "parking_lot_core", "race", "std"] }
parking_lot = { version = "0.12", features = ["arc_lock"] }
parquet = { version = "24", features = ["arrow", "async", "base64", "brotli", "experimental", "flate2", "futures", "lz4", "snap", "tokio", "zstd"] }
parquet = { version = "25", features = ["arrow", "async", "base64", "brotli", "experimental", "flate2", "futures", "lz4", "snap", "tokio", "zstd"] }
predicates = { version = "2", features = ["diff", "difflib", "float-cmp", "normalize-line-endings", "regex"] }
prost = { version = "0.11", features = ["prost-derive", "std"] }
prost-types = { version = "0.11", features = ["std"] }