From 4da8a16c1808804ba349292ba2622b1afac8926b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 19 Jul 2021 08:49:51 -0400 Subject: [PATCH] chore: update to arrow 5.0 and master datafusion (#2049) * chore: update to arrow 5.0 and master datafusion * fix: Update test for change in object size --- Cargo.lock | 22 ++++++++++++---------- Cargo.toml | 12 +++--------- arrow_util/Cargo.toml | 2 +- datafusion/Cargo.toml | 2 +- influxdb_iox_client/Cargo.toml | 4 ++-- internal_types/Cargo.toml | 2 +- mem_qe/Cargo.toml | 4 ++-- mutable_buffer/Cargo.toml | 2 +- packers/Cargo.toml | 4 ++-- parquet_file/Cargo.toml | 4 ++-- query/Cargo.toml | 2 +- query_tests/Cargo.toml | 2 +- read_buffer/Cargo.toml | 2 +- server/Cargo.toml | 2 +- server/src/db.rs | 2 +- 15 files changed, 32 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 36eb430f39..fda86e8a9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,14 +126,14 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "arrow" -version = "4.4.0" -source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/perf_integration#d416e9158275148e2be5e64a1c8a6689c7a83fac" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06d2bd50fddbdcecd832742b388228eec02eb3aabd33144ef46f6bc5420bf662" dependencies = [ "bitflags", "chrono", "csv", "flatbuffers", - "getrandom 0.2.3", "hex", "indexmap", "lazy_static", @@ -150,8 +150,9 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "4.4.0" -source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/perf_integration#d416e9158275148e2be5e64a1c8a6689c7a83fac" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628b4735031e1d5e41c7c1f210ad233ab942adcf155edcba05c3e6d762062349" dependencies = [ "arrow", "base64 0.13.0", @@ -841,7 +842,7 @@ dependencies = [ [[package]] name = "datafusion" version = "4.0.0-SNAPSHOT" -source = "git+https://github.com/alamb/arrow-datafusion.git?branch=alamb/perf_integration_df_2#d201ebf323a532ac858fe33083639df4a8d321ee" +source = "git+https://github.com/apache/arrow-datafusion.git#bd3ee23520a3e6f135891ec32d96fcea7ee2bb55" dependencies = [ "ahash 0.7.4", "arrow", @@ -2593,8 +2594,9 @@ dependencies = [ [[package]] name = "parquet" -version = "4.4.0" -source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/perf_integration#d416e9158275148e2be5e64a1c8a6689c7a83fac" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9297cb17ef7287f6105685d230abbb2b37247657edf2b4a99271088e7d5b0ddd" dependencies = [ "arrow", "base64 0.13.0", @@ -4980,9 +4982,9 @@ checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a" [[package]] name = "zeroize" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4756f7db3f7b5574938c3eb1c117038b8e07f95ee6718c0efad4ac21508f1efd" +checksum = "eeafe61337cb2c879d328b74aa6cd9d794592c82da6be559fdf11493f02a2d18" [[package]] name = "zstd" diff --git a/Cargo.toml b/Cargo.toml index b86ddd5c62..6f5111b77e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,8 +70,8 @@ tracker = { path = "tracker" } trogging = { path = "trogging", features = ["structopt"] } # Crates.io dependencies, in alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } -arrow-flight = "4.0" +arrow = { version = "5.0", features = ["prettyprint"] } +arrow-flight = "5.0" byteorder = "1.3.4" bytes = "1.0" chrono = "0.4" @@ -88,7 +88,7 @@ opentelemetry-jaeger = { version = "0.12", features = ["tokio"] } opentelemetry-otlp = "0.6" parking_lot = "0.11.1" itertools = "0.10.1" -parquet = "4.0" +parquet = "5.0" # used by arrow/datafusion anyway prettytable-rs = "0.8" pprof = { version = "^0.4", default-features = false, features = ["flamegraph", "protobuf"] } @@ -138,9 +138,3 @@ tempfile = "3.1.0" azure = ["object_store/azure"] gcp = ["object_store/gcp"] aws = ["object_store/aws"] - - -[patch.crates-io] -arrow = { git="https://github.com/alamb/arrow-rs.git", branch = "alamb/perf_integration" } -parquet = { git="https://github.com/alamb/arrow-rs.git", branch = "alamb/perf_integration" } -arrow-flight= { git="https://github.com/alamb/arrow-rs.git", branch = "alamb/perf_integration" } diff --git a/arrow_util/Cargo.toml b/arrow_util/Cargo.toml index 3a0baae30a..23ff39ef37 100644 --- a/arrow_util/Cargo.toml +++ b/arrow_util/Cargo.toml @@ -7,7 +7,7 @@ description = "Apache Arrow utilities" [dependencies] -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } ahash = "0.7.2" num-traits = "0.2" snafu = "0.6" diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index e780e1c993..ace0470c2e 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -9,4 +9,4 @@ description = "Re-exports datafusion at a specific version" # Rename to workaround doctest bug # Turn off optional datafusion features (function packages) -upstream = { git = "https://github.com/alamb/arrow-datafusion.git", branch = "alamb/perf_integration_df_2", default-features = false, package = "datafusion" } +upstream = { git = "https://github.com/apache/arrow-datafusion.git", ref="bd3ee23520a3e6f135891ec32d96fcea7ee2bb55", default-features = false, package = "datafusion" } diff --git a/influxdb_iox_client/Cargo.toml b/influxdb_iox_client/Cargo.toml index 2d9f39bb4f..df6fe83ad4 100644 --- a/influxdb_iox_client/Cargo.toml +++ b/influxdb_iox_client/Cargo.toml @@ -10,8 +10,8 @@ format = ["arrow"] [dependencies] # Workspace dependencies, in alphabetical order -arrow = { version = "4.0", optional = true } -arrow-flight = { version = "4.0", optional = true} +arrow = { version = "5.0", optional = true } +arrow-flight = { version = "5.0", optional = true} generated_types = { path = "../generated_types" } # Crates.io dependencies, in alphabetical order diff --git a/internal_types/Cargo.toml b/internal_types/Cargo.toml index 7b60daa2eb..2eee530504 100644 --- a/internal_types/Cargo.toml +++ b/internal_types/Cargo.toml @@ -7,7 +7,7 @@ description = "InfluxDB IOx internal types, shared between IOx instances" readme = "README.md" [dependencies] -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } hashbrown = "0.11" indexmap = "1.6" itertools = "0.10.1" diff --git a/mem_qe/Cargo.toml b/mem_qe/Cargo.toml index b26ab0ac06..c111c9ea7d 100644 --- a/mem_qe/Cargo.toml +++ b/mem_qe/Cargo.toml @@ -5,7 +5,7 @@ authors = ["Edd Robinson "] edition = "2018" [dependencies] # In alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } chrono = "0.4" croaring = "0.5" crossbeam = "0.8" @@ -14,7 +14,7 @@ human_format = "1.0.3" packers = { path = "../packers" } snafu = "0.6.8" observability_deps = { path = "../observability_deps" } -parquet = "4.0" +parquet = "5.0" [dev-dependencies] # In alphabetical order criterion = "0.3" diff --git a/mutable_buffer/Cargo.toml b/mutable_buffer/Cargo.toml index 500382bda0..4ab0e7fc01 100644 --- a/mutable_buffer/Cargo.toml +++ b/mutable_buffer/Cargo.toml @@ -14,7 +14,7 @@ edition = "2018" # 2. Keep change/compile/link time down during development when working on just this crate [dependencies] # In alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } arrow_util = { path = "../arrow_util" } async-trait = "0.1" chrono = "0.4" diff --git a/packers/Cargo.toml b/packers/Cargo.toml index 48b2d06668..928c5e36d1 100644 --- a/packers/Cargo.toml +++ b/packers/Cargo.toml @@ -5,13 +5,13 @@ authors = ["Andrew Lamb "] edition = "2018" [dependencies] # In alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } human_format = "1.0.3" influxdb_tsm = { path = "../influxdb_tsm" } internal_types = { path = "../internal_types" } snafu = "0.6.2" observability_deps = { path = "../observability_deps" } -parquet = "4.0" +parquet = "5.0" [dev-dependencies] # In alphabetical order rand = "0.8.3" diff --git a/parquet_file/Cargo.toml b/parquet_file/Cargo.toml index 41c87a19aa..98177ffc62 100644 --- a/parquet_file/Cargo.toml +++ b/parquet_file/Cargo.toml @@ -5,7 +5,7 @@ authors = ["Nga Tran "] edition = "2018" [dependencies] # In alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } base64 = "0.13" bytes = "1.0" chrono = "0.4" @@ -20,7 +20,7 @@ object_store = {path = "../object_store"} observability_deps = { path = "../observability_deps" } # Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time # and we're not currently using it anyway -parquet = "4.0" +parquet = "5.0" parquet-format = "2.6" parking_lot = "0.11.1" persistence_windows = { path = "../persistence_windows" } diff --git a/query/Cargo.toml b/query/Cargo.toml index 0ac4d91b47..325b99b0cd 100644 --- a/query/Cargo.toml +++ b/query/Cargo.toml @@ -14,7 +14,7 @@ description = "IOx Query Interface and Executor" # 2. Allow for query logic testing without bringing in all the storage systems. [dependencies] # In alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } arrow_util = { path = "../arrow_util" } async-trait = "0.1" chrono = "0.4" diff --git a/query_tests/Cargo.toml b/query_tests/Cargo.toml index b55347d5b9..5ef5c8af73 100644 --- a/query_tests/Cargo.toml +++ b/query_tests/Cargo.toml @@ -15,7 +15,7 @@ query = { path = "../query" } server = { path = "../server" } [dev-dependencies] -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } arrow_util = { path = "../arrow_util" } datafusion = { path = "../datafusion" } data_types = { path = "../data_types" } diff --git a/read_buffer/Cargo.toml b/read_buffer/Cargo.toml index cb18c85903..3d18f0b24f 100644 --- a/read_buffer/Cargo.toml +++ b/read_buffer/Cargo.toml @@ -11,7 +11,7 @@ edition = "2018" # 2. Keep change/compile/link time down during development when working on just this crate [dependencies] # In alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } arrow_util = { path = "../arrow_util" } croaring = "0.5" data_types = { path = "../data_types" } diff --git a/server/Cargo.toml b/server/Cargo.toml index 7bf4077e8b..2a77eaa0ba 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -5,7 +5,7 @@ authors = ["pauldix "] edition = "2018" [dependencies] # In alphabetical order -arrow = { version = "4.0", features = ["prettyprint"] } +arrow = { version = "5.0", features = ["prettyprint"] } arrow_util = { path = "../arrow_util" } async-trait = "0.1" bytes = { version = "1.0" } diff --git a/server/src/db.rs b/server/src/db.rs index b6792835da..afe9ad03fa 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -2489,7 +2489,7 @@ mod tests { ChunkStorage::ReadBufferAndObjectStore, lifecycle_action, 3236, - 1528, + 1479, 2, ), ChunkSummary::new_without_timestamps(