chore: update to arrow 5.0 and master datafusion (#2049)

* chore: update to arrow 5.0 and master datafusion

* fix: Update test for change in object size
pull/24376/head
Andrew Lamb 2021-07-19 08:49:51 -04:00 committed by GitHub
parent e2a23c7ac3
commit 4da8a16c18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 32 additions and 36 deletions

22
Cargo.lock generated
View File

@ -126,14 +126,14 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "arrow"
version = "4.4.0"
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/perf_integration#d416e9158275148e2be5e64a1c8a6689c7a83fac"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06d2bd50fddbdcecd832742b388228eec02eb3aabd33144ef46f6bc5420bf662"
dependencies = [
"bitflags",
"chrono",
"csv",
"flatbuffers",
"getrandom 0.2.3",
"hex",
"indexmap",
"lazy_static",
@ -150,8 +150,9 @@ dependencies = [
[[package]]
name = "arrow-flight"
version = "4.4.0"
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/perf_integration#d416e9158275148e2be5e64a1c8a6689c7a83fac"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "628b4735031e1d5e41c7c1f210ad233ab942adcf155edcba05c3e6d762062349"
dependencies = [
"arrow",
"base64 0.13.0",
@ -841,7 +842,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/alamb/arrow-datafusion.git?branch=alamb/perf_integration_df_2#d201ebf323a532ac858fe33083639df4a8d321ee"
source = "git+https://github.com/apache/arrow-datafusion.git#bd3ee23520a3e6f135891ec32d96fcea7ee2bb55"
dependencies = [
"ahash 0.7.4",
"arrow",
@ -2593,8 +2594,9 @@ dependencies = [
[[package]]
name = "parquet"
version = "4.4.0"
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/perf_integration#d416e9158275148e2be5e64a1c8a6689c7a83fac"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9297cb17ef7287f6105685d230abbb2b37247657edf2b4a99271088e7d5b0ddd"
dependencies = [
"arrow",
"base64 0.13.0",
@ -4980,9 +4982,9 @@ checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
[[package]]
name = "zeroize"
version = "1.3.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4756f7db3f7b5574938c3eb1c117038b8e07f95ee6718c0efad4ac21508f1efd"
checksum = "eeafe61337cb2c879d328b74aa6cd9d794592c82da6be559fdf11493f02a2d18"
[[package]]
name = "zstd"

View File

@ -70,8 +70,8 @@ tracker = { path = "tracker" }
trogging = { path = "trogging", features = ["structopt"] }
# Crates.io dependencies, in alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow-flight = "4.0"
arrow = { version = "5.0", features = ["prettyprint"] }
arrow-flight = "5.0"
byteorder = "1.3.4"
bytes = "1.0"
chrono = "0.4"
@ -88,7 +88,7 @@ opentelemetry-jaeger = { version = "0.12", features = ["tokio"] }
opentelemetry-otlp = "0.6"
parking_lot = "0.11.1"
itertools = "0.10.1"
parquet = "4.0"
parquet = "5.0"
# used by arrow/datafusion anyway
prettytable-rs = "0.8"
pprof = { version = "^0.4", default-features = false, features = ["flamegraph", "protobuf"] }
@ -138,9 +138,3 @@ tempfile = "3.1.0"
azure = ["object_store/azure"]
gcp = ["object_store/gcp"]
aws = ["object_store/aws"]
[patch.crates-io]
arrow = { git="https://github.com/alamb/arrow-rs.git", branch = "alamb/perf_integration" }
parquet = { git="https://github.com/alamb/arrow-rs.git", branch = "alamb/perf_integration" }
arrow-flight= { git="https://github.com/alamb/arrow-rs.git", branch = "alamb/perf_integration" }

View File

@ -7,7 +7,7 @@ description = "Apache Arrow utilities"
[dependencies]
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
ahash = "0.7.2"
num-traits = "0.2"
snafu = "0.6"

View File

@ -9,4 +9,4 @@ description = "Re-exports datafusion at a specific version"
# Rename to workaround doctest bug
# Turn off optional datafusion features (function packages)
upstream = { git = "https://github.com/alamb/arrow-datafusion.git", branch = "alamb/perf_integration_df_2", default-features = false, package = "datafusion" }
upstream = { git = "https://github.com/apache/arrow-datafusion.git", ref="bd3ee23520a3e6f135891ec32d96fcea7ee2bb55", default-features = false, package = "datafusion" }

View File

@ -10,8 +10,8 @@ format = ["arrow"]
[dependencies]
# Workspace dependencies, in alphabetical order
arrow = { version = "4.0", optional = true }
arrow-flight = { version = "4.0", optional = true}
arrow = { version = "5.0", optional = true }
arrow-flight = { version = "5.0", optional = true}
generated_types = { path = "../generated_types" }
# Crates.io dependencies, in alphabetical order

View File

@ -7,7 +7,7 @@ description = "InfluxDB IOx internal types, shared between IOx instances"
readme = "README.md"
[dependencies]
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
hashbrown = "0.11"
indexmap = "1.6"
itertools = "0.10.1"

View File

@ -5,7 +5,7 @@ authors = ["Edd Robinson <me@edd.io>"]
edition = "2018"
[dependencies] # In alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
chrono = "0.4"
croaring = "0.5"
crossbeam = "0.8"
@ -14,7 +14,7 @@ human_format = "1.0.3"
packers = { path = "../packers" }
snafu = "0.6.8"
observability_deps = { path = "../observability_deps" }
parquet = "4.0"
parquet = "5.0"
[dev-dependencies] # In alphabetical order
criterion = "0.3"

View File

@ -14,7 +14,7 @@ edition = "2018"
# 2. Keep change/compile/link time down during development when working on just this crate
[dependencies] # In alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
async-trait = "0.1"
chrono = "0.4"

View File

@ -5,13 +5,13 @@ authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
edition = "2018"
[dependencies] # In alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
human_format = "1.0.3"
influxdb_tsm = { path = "../influxdb_tsm" }
internal_types = { path = "../internal_types" }
snafu = "0.6.2"
observability_deps = { path = "../observability_deps" }
parquet = "4.0"
parquet = "5.0"
[dev-dependencies] # In alphabetical order
rand = "0.8.3"

View File

@ -5,7 +5,7 @@ authors = ["Nga Tran <nga-tran@live.com>"]
edition = "2018"
[dependencies] # In alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
base64 = "0.13"
bytes = "1.0"
chrono = "0.4"
@ -20,7 +20,7 @@ object_store = {path = "../object_store"}
observability_deps = { path = "../observability_deps" }
# Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
# and we're not currently using it anyway
parquet = "4.0"
parquet = "5.0"
parquet-format = "2.6"
parking_lot = "0.11.1"
persistence_windows = { path = "../persistence_windows" }

View File

@ -14,7 +14,7 @@ description = "IOx Query Interface and Executor"
# 2. Allow for query logic testing without bringing in all the storage systems.
[dependencies] # In alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
async-trait = "0.1"
chrono = "0.4"

View File

@ -15,7 +15,7 @@ query = { path = "../query" }
server = { path = "../server" }
[dev-dependencies]
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
datafusion = { path = "../datafusion" }
data_types = { path = "../data_types" }

View File

@ -11,7 +11,7 @@ edition = "2018"
# 2. Keep change/compile/link time down during development when working on just this crate
[dependencies] # In alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
croaring = "0.5"
data_types = { path = "../data_types" }

View File

@ -5,7 +5,7 @@ authors = ["pauldix <paul@pauldix.net>"]
edition = "2018"
[dependencies] # In alphabetical order
arrow = { version = "4.0", features = ["prettyprint"] }
arrow = { version = "5.0", features = ["prettyprint"] }
arrow_util = { path = "../arrow_util" }
async-trait = "0.1"
bytes = { version = "1.0" }

View File

@ -2489,7 +2489,7 @@ mod tests {
ChunkStorage::ReadBufferAndObjectStore,
lifecycle_action,
3236,
1528,
1479,
2,
),
ChunkSummary::new_without_timestamps(