chore: Update datafusion, arrow, flight and parquet (#4000)

* chore: Update datafusion, arrow, flight and parquet

* fix: api change

* fix: fmt

* fix: update test metadata size

* fix: Update sizes in parquet test

* fix: more metadata size update
pull/24376/head
Andrew Lamb 2022-03-10 07:24:47 -05:00 committed by GitHub
parent 12771a7061
commit 2c3d30ca32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 77 additions and 69 deletions

43
Cargo.lock generated
View File

@ -96,9 +96,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
version = "9.1.0"
version = "10.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9864ca2fdcd3d4883259495b4517879877c5991d9928cc9713794d8076d3e78b"
checksum = "1328dbc6d5d76a08b13df3ac630f61a6a31276d9e9d08eb813e98efa624c2382"
dependencies = [
"bitflags",
"chrono",
@ -121,9 +121,9 @@ dependencies = [
[[package]]
name = "arrow-flight"
version = "9.1.0"
version = "10.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae6fadf09e154455a27259f45ee4c462ca76ae453ce70f2f76220b7cfd140da8"
checksum = "63ffffdef0e51a8fc57f25f75c06cbe0a1f4f047251047bebc0318212fb7d386"
dependencies = [
"arrow",
"base64 0.13.0",
@ -1084,7 +1084,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "7.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=3261e2bcdfa13e460d7a11a64dd72f272c7d872b#3261e2bcdfa13e460d7a11a64dd72f272c7d872b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=0e440eaa5ca52c0261e88590b846808539198241#0e440eaa5ca52c0261e88590b846808539198241"
dependencies = [
"ahash",
"arrow",
@ -1105,7 +1105,7 @@ dependencies = [
"pin-project-lite",
"rand",
"smallvec",
"sqlparser",
"sqlparser 0.15.0",
"tempfile",
"tokio",
"tokio-stream",
@ -1114,29 +1114,29 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "7.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=3261e2bcdfa13e460d7a11a64dd72f272c7d872b#3261e2bcdfa13e460d7a11a64dd72f272c7d872b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=0e440eaa5ca52c0261e88590b846808539198241#0e440eaa5ca52c0261e88590b846808539198241"
dependencies = [
"arrow",
"ordered-float 2.10.0",
"parquet",
"sqlparser",
"sqlparser 0.15.0",
]
[[package]]
name = "datafusion-expr"
version = "7.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=3261e2bcdfa13e460d7a11a64dd72f272c7d872b#3261e2bcdfa13e460d7a11a64dd72f272c7d872b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=0e440eaa5ca52c0261e88590b846808539198241#0e440eaa5ca52c0261e88590b846808539198241"
dependencies = [
"ahash",
"arrow",
"datafusion-common",
"sqlparser",
"sqlparser 0.15.0",
]
[[package]]
name = "datafusion-physical-expr"
version = "7.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=3261e2bcdfa13e460d7a11a64dd72f272c7d872b#3261e2bcdfa13e460d7a11a64dd72f272c7d872b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=0e440eaa5ca52c0261e88590b846808539198241#0e440eaa5ca52c0261e88590b846808539198241"
dependencies = [
"ahash",
"arrow",
@ -1477,9 +1477,9 @@ checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e"
[[package]]
name = "flatbuffers"
version = "2.0.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6"
checksum = "6ea97b4fe4b84e2f2765449bcea21cbdb3ee28cecb88afbf38a0c2e1639f5eb5"
dependencies = [
"bitflags",
"smallvec",
@ -2335,7 +2335,7 @@ version = "0.1.0"
dependencies = [
"generated_types",
"snafu",
"sqlparser",
"sqlparser 0.14.0",
"workspace-hack",
]
@ -3555,9 +3555,9 @@ dependencies = [
[[package]]
name = "parquet"
version = "9.1.0"
version = "10.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1697d963e6319c19099adcf131a5440685053d4902890f9e4bb272cbd0dc6532"
checksum = "53e9c8fc20af9b92d85d42ec86e5217b2eaf1340fbba75c4b4296de764ea7921"
dependencies = [
"arrow",
"base64 0.13.0",
@ -3912,7 +3912,7 @@ dependencies = [
"schema",
"serde_json",
"snafu",
"sqlparser",
"sqlparser 0.14.0",
"test_helpers",
"tokio",
"workspace-hack",
@ -5187,6 +5187,15 @@ dependencies = [
"log",
]
[[package]]
name = "sqlparser"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adbbea2526ad0d02ad9414a07c396078a5b944bbf9ca4fbab8f01bb4cb579081"
dependencies = [
"log",
]
[[package]]
name = "sqlx"
version = "0.5.11"

View File

@ -7,7 +7,7 @@ description = "Apache Arrow utilities"
[dependencies]
ahash = { version = "0.7.5", default-features = false }
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
# used by arrow anyway (needed for printing workaround)
chrono = { version = "0.4", default-features = false }
comfy-table = { version = "5.0", default-features = false }

View File

@ -9,5 +9,5 @@ description = "Re-exports datafusion at a specific version"
# Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. don't get support for crypo functions or avro)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="3261e2bcdfa13e460d7a11a64dd72f272c7d872b", default-features = false, package = "datafusion" }
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="0e440eaa5ca52c0261e88590b846808539198241", default-features = false, package = "datafusion" }
workspace-hack = { path = "../workspace-hack"}

View File

@ -5,7 +5,7 @@ authors = ["pauldix <paul@pauldix.net>"]
edition = "2021"
[dependencies] # In alphabetical order
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
async-trait = "0.1"
data_types = { path = "../data_types" }
datafusion = { path = "../datafusion" }

View File

@ -1698,7 +1698,7 @@ mod tests {
.id();
// A chunk is now in the object store and still in read buffer
let expected_parquet_size = 1257;
let expected_parquet_size = 1258;
catalog_chunk_size_bytes_metric_eq(registry, "read_buffer", expected_read_buffer_size);
// now also in OS
catalog_chunk_size_bytes_metric_eq(registry, "object_store", expected_parquet_size);
@ -2730,8 +2730,8 @@ mod tests {
id: chunk_summaries[0].id,
storage: ChunkStorage::ReadBufferAndObjectStore,
lifecycle_action,
memory_bytes: 4102, // size of RB and OS chunks
object_store_bytes: 1573, // size of parquet file
memory_bytes: 4103, // size of RB and OS chunks
object_store_bytes: 1574, // size of parquet file
row_count: 2,
time_of_last_access: None,
time_of_first_write: Time::from_timestamp_nanos(1),
@ -2781,7 +2781,7 @@ mod tests {
assert_eq!(db.catalog.metrics().memory().mutable_buffer(), 2486 + 1463);
assert_eq!(db.catalog.metrics().memory().read_buffer(), 2550);
assert_eq!(db.catalog.metrics().memory().object_store(), 1552);
assert_eq!(db.catalog.metrics().memory().object_store(), 1553);
}
#[tokio::test]

View File

@ -55,8 +55,8 @@ write_buffer = { path = "../write_buffer" }
# Crates.io dependencies, in alphabetical order
ansi_term = "0.12"
arrow = { version = "9.1", features = ["prettyprint"] }
arrow-flight = "9.1"
arrow = { version = "10", features = ["prettyprint"] }
arrow-flight = "10"
async-trait = "0.1"
backtrace = "0.3"
byteorder = "1.3.4"
@ -79,7 +79,7 @@ log = "0.4"
num_cpus = "1.13.0"
once_cell = { version = "1.10.0", features = ["parking_lot"] }
parking_lot = "0.12"
parquet = "9.1"
parquet = "10"
pin-project = "1.0"
pprof = { version = "0.6", default-features = false, features = ["flamegraph", "protobuf"], optional = true }
prost = "0.9"

View File

@ -17,8 +17,8 @@ client_util = { path = "../client_util" }
generated_types = { path = "../generated_types", default-features = false }
# Crates.io dependencies, in alphabetical order
arrow = { version = "9.1", optional = true }
arrow-flight = { version = "9.1", optional = true }
arrow = { version = "10", optional = true }
arrow-flight = { version = "10", optional = true }
bytes = "1.0"
futures-util = { version = "0.3", optional = true }
dml = { path = "../dml", optional = true }

View File

@ -52,8 +52,8 @@ write_buffer = { path = "../write_buffer" }
# Crates.io dependencies, in alphabetical order
ansi_term = "0.12"
arrow = { version = "9.1", features = ["prettyprint"] }
arrow-flight = "9.1"
arrow = { version = "10", features = ["prettyprint"] }
arrow-flight = "10"
async-trait = "0.1"
byteorder = "1.3.4"
bytes = "1.0"
@ -73,7 +73,7 @@ log = "0.4"
num_cpus = "1.13.0"
once_cell = { version = "1.10.0", features = ["parking_lot"] }
parking_lot = "0.12"
parquet = "9.1"
parquet = "10"
pin-project = "1.0"
pprof = { version = "0.6", default-features = false, features = ["flamegraph", "protobuf"], optional = true }
prost = "0.9"

View File

@ -5,8 +5,8 @@ authors = ["Nga Tran <nga-tran@live.com>"]
edition = "2021"
[dependencies]
arrow = { version = "9.1", features = ["prettyprint"] }
arrow-flight = "9.1"
arrow = { version = "10", features = ["prettyprint"] }
arrow-flight = "10"
arrow_util = { path = "../arrow_util" }
async-trait = "0.1.42"
backoff = { path = "../backoff" }
@ -28,7 +28,7 @@ mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
parking_lot = "0.12"
parquet = "9.1"
parquet = "10"
parquet_file = { path = "../parquet_file" }
pin-project = "1.0"
predicate = { path = "../predicate" }
@ -50,4 +50,3 @@ trace = { path = "../trace" }
[dev-dependencies]
bitflags = {version = "1.3.2"}
test_helpers = { path = "../test_helpers" }

View File

@ -5,7 +5,7 @@ edition = "2021"
description = "A mutable arrow RecordBatch"
[dependencies]
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
chrono = { version = "0.4", default-features = false }
data_types = { path = "../data_types" }

View File

@ -5,7 +5,7 @@ authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2021"
[dependencies] # In alphabetical order
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
data_types = { path = "../data_types" }
schema = { path = "../schema" }
metric = { path = "../metric" }

View File

@ -5,12 +5,12 @@ authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2021"
[dependencies] # In alphabetical order
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
influxdb_tsm = { path = "../influxdb_tsm" }
schema = { path = "../schema" }
snafu = "0.7"
observability_deps = { path = "../observability_deps" }
parquet = "9.1"
parquet = "10"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order

View File

@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
base64 = "0.13"
bytes = "1.0"
data_types = { path = "../data_types" }
@ -16,7 +16,7 @@ iox_object_store = { path = "../iox_object_store" }
metric = { path = "../metric" }
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
parquet = "9.1"
parquet = "10"
parquet_file = { path = "../parquet_file" }
parquet-format = "4.0"
parking_lot = "0.12"

View File

@ -301,8 +301,8 @@ File {
file_name: "00000000-0000-0000-0000-000000000001.parquet",
},
),
file_size_bytes: 3660,
metadata: b"metadata omitted (952 bytes)",
file_size_bytes: 3661,
metadata: b"metadata omitted (953 bytes)",
},
),
),
@ -333,7 +333,7 @@ File {
assert_eq!(
actual, expected,
"actual:\n{}\n\nexpected:\n{}",
"\n\nactual:\n{}\n\nexpected:\n{}",
actual, expected
);
}
@ -417,8 +417,8 @@ File {
file_name: "00000000-0000-0000-0000-000000000001.parquet",
},
),
file_size_bytes: 3660,
metadata: b"metadata omitted (952 bytes)",
file_size_bytes: 3661,
metadata: b"metadata omitted (953 bytes)",
},
),
),
@ -617,7 +617,7 @@ File {
assert_eq!(
actual, expected,
"actual:\n{}\n\nexpected:\n{}",
"\n\nactual:\n{}\n\nexpected:\n{}",
actual, expected
);
}

View File

@ -5,7 +5,7 @@ authors = ["Nga Tran <nga-tran@live.com>"]
edition = "2021"
[dependencies] # In alphabetical order
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
base64 = "0.13"
bytes = "1.0"
data_types = { path = "../data_types" }
@ -18,7 +18,7 @@ iox_object_store = { path = "../iox_object_store" }
metric = { path = "../metric" }
object_store = { path = "../object_store" }
observability_deps = { path = "../observability_deps" }
parquet = {version = "9.1", features = ["experimental"]}
parquet = {version = "10", features = ["experimental"]}
parquet-format = "4.0"
parking_lot = "0.12"
pbjson-types = "0.2"

View File

@ -1262,7 +1262,7 @@ mod tests {
let mut generator = ChunkGenerator::new().await;
let (chunk, _) = generator.generate().await.unwrap();
let parquet_metadata = chunk.parquet_metadata();
assert_eq!(parquet_metadata.size(), 4068);
assert_eq!(parquet_metadata.size(), 4069);
}
#[test]

View File

@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
chrono = { version = "0.4", default-features = false }
data_types = { path = "../data_types" }
datafusion = { path = "../datafusion" }

View File

@ -4,8 +4,8 @@ version = "0.1.0"
edition = "2021"
[dependencies]
arrow = "9.1"
arrow-flight = "9.1"
arrow = "10"
arrow-flight = "10"
async-trait = "0.1.42"
backoff = { path = "../backoff" }
bytes = "1.0"
@ -40,7 +40,6 @@ uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
arrow = { version = "9.1", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
bytes = "1.0"
mutable_batch_lp = { path = "../mutable_batch_lp" }

View File

@ -14,7 +14,7 @@ description = "IOx Query Interface and Executor"
# 2. Allow for query logic testing without bringing in all the storage systems.
[dependencies] # In alphabetical order
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
async-trait = "0.1"
chrono = { version = "0.4", default-features = false }

View File

@ -57,14 +57,15 @@ where
};
trace!(%filter_expr, "Filter_expr of pruning chunks");
let pruning_predicate = match PruningPredicate::try_new(&filter_expr, table_schema.as_arrow()) {
Ok(p) => p,
Err(e) => {
observer.could_not_prune("Can not create pruning predicate");
trace!(%e, ?filter_expr, "Can not create pruning predicate");
return chunks;
}
};
let pruning_predicate =
match PruningPredicate::try_new(filter_expr.clone(), table_schema.as_arrow()) {
Ok(p) => p,
Err(e) => {
observer.could_not_prune("Can not create pruning predicate");
trace!(%e, ?filter_expr, "Can not create pruning predicate");
return chunks;
}
};
let statistics = ChunkPruningStatistics {
table_schema: table_schema.as_ref(),

View File

@ -18,7 +18,7 @@ query = { path = "../query" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
metric = { path = "../metric" }
object_store = { path = "../object_store" }

View File

@ -11,7 +11,7 @@ edition = "2021"
# 2. Keep change/compile/link time down during development when working on just this crate
[dependencies] # In alphabetical order
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
croaring = "0.5"
data_types = { path = "../data_types" }

View File

@ -6,7 +6,7 @@ edition = "2021"
description = "IOx Schema definition"
[dependencies]
arrow = { version = "9.1", features = ["prettyprint"] }
arrow = { version = "10", features = ["prettyprint"] }
hashbrown = "0.12"
indexmap = { version = "1.7", features = ["std"] }
itertools = "0.10.1"

View File

@ -14,7 +14,7 @@ publish = false
### BEGIN HAKARI SECTION
[dependencies]
ahash = { version = "0.7", features = ["std"] }
arrow = { version = "9", features = ["comfy-table", "csv", "csv_crate", "flatbuffers", "ipc", "prettyprint", "rand", "test_utils"] }
arrow = { version = "10", features = ["comfy-table", "csv", "csv_crate", "flatbuffers", "ipc", "prettyprint", "rand", "test_utils"] }
base64 = { version = "0.13", features = ["alloc", "std"] }
bitflags = { version = "1" }
byteorder = { version = "1", features = ["std"] }
@ -41,7 +41,7 @@ num-bigint = { version = "0.4", features = ["std"] }
num-integer = { version = "0.1", default-features = false, features = ["i128", "std"] }
num-traits = { version = "0.2", features = ["i128", "libm", "std"] }
once_cell = { version = "1", features = ["alloc", "parking_lot", "race", "std"] }
parquet = { version = "9", features = ["arrow", "base64", "brotli", "experimental", "flate2", "lz4", "snap", "zstd"] }
parquet = { version = "10", features = ["arrow", "base64", "brotli", "experimental", "flate2", "lz4", "snap", "zstd"] }
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
regex-automata = { version = "0.1", features = ["regex-syntax", "std"] }