Merge branch 'main' into crepererum/issue3030
commit
a6a0eda142
|
@ -40,6 +40,7 @@ commands:
|
|||
rustup show
|
||||
cargo fmt --version
|
||||
cargo clippy --version
|
||||
cargo install cargo-hakari && cargo hakari --version
|
||||
|
||||
cache_restore:
|
||||
description: Restore Cargo Cache
|
||||
|
@ -137,6 +138,19 @@ jobs:
|
|||
command: tar -cvzf rustdoc.tar.gz target/doc/
|
||||
- store_artifacts:
|
||||
path: rustdoc.tar.gz
|
||||
workspace_hack_checks:
|
||||
docker:
|
||||
- image: quay.io/influxdb/rust:ci
|
||||
steps:
|
||||
- checkout
|
||||
- rust_components
|
||||
- cache_restore
|
||||
- run:
|
||||
name: Check that the workspace hack crate contains all features in use
|
||||
command: cargo hakari generate --diff || echo "If this fails, fix it by running \`cargo hakari generate\` locally and committing the changes"
|
||||
- run:
|
||||
name: Check that all crates in the workspace depend on the workspace hack crate
|
||||
command: cargo hakari manage-deps --dry-run || echo "If this fails, fix it by running \`cargo hakari manage-deps\` locally and committing the changes"
|
||||
|
||||
test:
|
||||
docker:
|
||||
|
@ -414,6 +428,7 @@ workflows:
|
|||
- test_perf
|
||||
- build
|
||||
- doc
|
||||
- workspace_hack_checks
|
||||
- perf_image:
|
||||
filters:
|
||||
branches:
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# This file contains settings for `cargo hakari`.
|
||||
# See https://docs.rs/cargo-hakari/*/cargo_hakari/config for a full list of options.
|
||||
|
||||
hakari-package = "workspace-hack"
|
||||
|
||||
# Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended.
|
||||
# Hakari works much better with the new feature resolver.
|
||||
# For more about the new feature resolver, see:
|
||||
# https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
|
||||
resolver = "2"
|
||||
|
||||
# Add triples corresponding to platforms commonly used by developers here.
|
||||
# https://doc.rust-lang.org/rustc/platform-support.html
|
||||
platforms = [
|
||||
# "x86_64-unknown-linux-gnu",
|
||||
# "x86_64-apple-darwin",
|
||||
# "x86_64-pc-windows-msvc",
|
||||
]
|
||||
|
||||
# Write out exact versions rather than a semver range. (Defaults to false.)
|
||||
# exact-versions = true
|
||||
|
||||
# Don't search in these crates for dependencies, and don't have these crates depend on the
|
||||
# workspace-hack crate.
|
||||
#
|
||||
# Includes most bench- or test-only crates except for query_tests, as that crate is built often
|
||||
# and should share as many dependencies as possible.
|
||||
[traversal-excludes]
|
||||
workspace-members = [
|
||||
"grpc-router",
|
||||
"grpc-router-test-gen",
|
||||
"influxdb_iox_client",
|
||||
"iox_data_generator",
|
||||
"mutable_batch_tests",
|
||||
"server_benchmarks",
|
||||
"trogging",
|
||||
]
|
||||
third-party = [
|
||||
{ name = "tikv-jemalloc-sys" },
|
||||
]
|
|
@ -144,6 +144,7 @@ dependencies = [
|
|||
"num-traits",
|
||||
"rand",
|
||||
"snafu",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -535,6 +536,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tonic",
|
||||
"tower",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -797,6 +799,7 @@ dependencies = [
|
|||
"test_helpers",
|
||||
"time 0.1.0",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -804,6 +807,7 @@ name = "datafusion"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"datafusion 6.0.0",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -839,6 +843,7 @@ dependencies = [
|
|||
"futures",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -924,6 +929,7 @@ dependencies = [
|
|||
"schema",
|
||||
"time 0.1.0",
|
||||
"trace",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1215,6 +1221,7 @@ dependencies = [
|
|||
"time 0.1.0",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1321,9 +1328,9 @@ checksum = "ac5956d4e63858efaec57e0d6c1c2f6a41e1487f830314a324ccd7e2223a7ca0"
|
|||
|
||||
[[package]]
|
||||
name = "handlebars"
|
||||
version = "4.1.4"
|
||||
version = "4.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1874024f4a29f47d609014caec0b1c866f1c1eb0661a09c9733ecc4757f5f88"
|
||||
checksum = "8ad84da8f63da982543fc85fcabaee2ad1fdd809d99d64a48887e2e942ddfe46"
|
||||
dependencies = [
|
||||
"log",
|
||||
"pest",
|
||||
|
@ -1434,9 +1441,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
|||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.14"
|
||||
version = "0.14.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b91bb1f221b6ea1f1e4371216b70f40748774c2fb5971b450c07773fb92d26b"
|
||||
checksum = "436ec0091e4f20e655156a30a0df3770fe2900aa301e548e08446ec794b6953c"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
|
@ -1551,6 +1558,7 @@ dependencies = [
|
|||
"test_helpers",
|
||||
"tokio",
|
||||
"url",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1644,6 +1652,7 @@ dependencies = [
|
|||
"tracker",
|
||||
"trogging",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
"write_buffer",
|
||||
]
|
||||
|
||||
|
@ -1681,6 +1690,7 @@ dependencies = [
|
|||
"smallvec",
|
||||
"snafu",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1692,6 +1702,7 @@ dependencies = [
|
|||
"generated_types",
|
||||
"prost",
|
||||
"tonic",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1706,6 +1717,7 @@ dependencies = [
|
|||
"snafu",
|
||||
"snap",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1737,6 +1749,7 @@ dependencies = [
|
|||
"parking_lot",
|
||||
"time 0.1.0",
|
||||
"tokio",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1781,6 +1794,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tokio-stream",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1914,9 +1928,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.106"
|
||||
version = "0.2.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673"
|
||||
checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
|
@ -1959,6 +1973,7 @@ dependencies = [
|
|||
"time 0.1.0",
|
||||
"tokio",
|
||||
"tracker",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1988,6 +2003,7 @@ dependencies = [
|
|||
"parking_lot",
|
||||
"regex",
|
||||
"tracing-subscriber",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2078,6 +2094,7 @@ name = "metric"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"parking_lot",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2088,6 +2105,7 @@ dependencies = [
|
|||
"observability_deps",
|
||||
"prometheus",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2191,6 +2209,7 @@ dependencies = [
|
|||
"rand",
|
||||
"schema",
|
||||
"snafu",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2203,6 +2222,7 @@ dependencies = [
|
|||
"mutable_batch",
|
||||
"schema",
|
||||
"snafu",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2217,6 +2237,7 @@ dependencies = [
|
|||
"mutable_batch_lp",
|
||||
"schema",
|
||||
"snafu",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2250,6 +2271,7 @@ dependencies = [
|
|||
"snafu",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2534,6 +2556,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tokio-util",
|
||||
"walkdir",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2541,6 +2564,7 @@ name = "observability_deps"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"tracing",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2634,6 +2658,7 @@ dependencies = [
|
|||
"schema",
|
||||
"snafu",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2641,6 +2666,7 @@ name = "panic_logging"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"observability_deps",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2730,6 +2756,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tokio-stream",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
|
@ -2766,6 +2793,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tokio-stream",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
|
@ -2851,6 +2879,7 @@ dependencies = [
|
|||
"snafu",
|
||||
"test_helpers",
|
||||
"time 0.1.0",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3017,6 +3046,7 @@ dependencies = [
|
|||
"sqlparser",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3197,6 +3227,7 @@ dependencies = [
|
|||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"trace",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3219,6 +3250,7 @@ dependencies = [
|
|||
"tempfile",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3388,6 +3420,7 @@ dependencies = [
|
|||
"schema",
|
||||
"snafu",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3528,6 +3561,7 @@ dependencies = [
|
|||
"time 0.1.0",
|
||||
"tokio",
|
||||
"trace",
|
||||
"workspace-hack",
|
||||
"write_buffer",
|
||||
]
|
||||
|
||||
|
@ -3710,6 +3744,7 @@ dependencies = [
|
|||
"indexmap",
|
||||
"itertools",
|
||||
"snafu",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3801,9 +3836,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.70"
|
||||
version = "1.0.71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e277c495ac6cd1a01a58d0a0c574568b4d1ddf14f59965c6a58b8d96400b54f3"
|
||||
checksum = "063bf466a64011ac24040a49009724ee60a57da1b437617ceb32e53ad61bfb19"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
|
@ -3887,6 +3922,7 @@ dependencies = [
|
|||
"trace",
|
||||
"tracker",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
"write_buffer",
|
||||
]
|
||||
|
||||
|
@ -4232,6 +4268,7 @@ dependencies = [
|
|||
"parking_lot",
|
||||
"tempfile",
|
||||
"tracing-subscriber",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4322,6 +4359,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"chrono",
|
||||
"parking_lot",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4530,9 +4568,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.4.10"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c00e500fff5fa1131c866b246041a6bf96da9c965f8fe4128cb1421f23e93c00"
|
||||
checksum = "5651b5f6860a99bd1adb59dbfe1db8beb433e73709d9032b413a77e2fb7c066a"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
|
@ -4569,6 +4607,7 @@ dependencies = [
|
|||
"observability_deps",
|
||||
"parking_lot",
|
||||
"rand",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4584,6 +4623,7 @@ dependencies = [
|
|||
"thrift",
|
||||
"tokio",
|
||||
"trace",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4602,6 +4642,7 @@ dependencies = [
|
|||
"snafu",
|
||||
"tower",
|
||||
"trace",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4670,9 +4711,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.1"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80a4ddde70311d8da398062ecf6fc2c309337de6b0f77d6c27aff8d53f6fca52"
|
||||
checksum = "7507ec620f809cdf07cccb5bc57b13069a88031b795efd4079b1c71b66c1613d"
|
||||
dependencies = [
|
||||
"ansi_term 0.12.1",
|
||||
"lazy_static",
|
||||
|
@ -4703,6 +4744,7 @@ dependencies = [
|
|||
"time 0.1.0",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5014,6 +5056,55 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "workspace-hack"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"bytes",
|
||||
"cc",
|
||||
"chrono",
|
||||
"clap",
|
||||
"either",
|
||||
"futures",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
"getrandom",
|
||||
"hashbrown",
|
||||
"hyper",
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"libc",
|
||||
"log",
|
||||
"memchr",
|
||||
"num-bigint 0.4.3",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"rand",
|
||||
"regex",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smallvec",
|
||||
"syn",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "write_buffer"
|
||||
version = "0.1.0"
|
||||
|
@ -5040,6 +5131,7 @@ dependencies = [
|
|||
"trace",
|
||||
"trace_http",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -50,6 +50,7 @@ members = [
|
|||
"trace_http",
|
||||
"tracker",
|
||||
"trogging",
|
||||
"workspace-hack",
|
||||
"write_buffer",
|
||||
]
|
||||
default-members = ["influxdb_iox"]
|
||||
|
|
|
@ -14,6 +14,7 @@ comfy-table = { version = "5.0", default-features = false }
|
|||
hashbrown = "0.11"
|
||||
num-traits = "0.2"
|
||||
snafu = "0.6"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.8.3"
|
||||
|
|
|
@ -11,6 +11,7 @@ prost = "0.8"
|
|||
thiserror = "1.0.30"
|
||||
tonic = { version = "0.5.0" }
|
||||
tower = "0.4"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1.13", features = ["macros", "rt-multi-thread"] }
|
||||
|
|
|
@ -17,6 +17,7 @@ siphasher = "0.3"
|
|||
snafu = "0.6"
|
||||
time = { path = "../time" }
|
||||
uuid = { version = "0.8", features = ["v4"] }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -97,6 +97,9 @@ pub enum ChunkLifecycleAction {
|
|||
/// Chunk is in the process of being compacted
|
||||
Compacting,
|
||||
|
||||
/// Object Store Chunk is in the process of being compacted
|
||||
CompactingObjectStore,
|
||||
|
||||
/// Chunk is about to be dropped from memory and (if persisted) from object store
|
||||
Dropping,
|
||||
}
|
||||
|
@ -112,6 +115,7 @@ impl ChunkLifecycleAction {
|
|||
match self {
|
||||
Self::Persisting => "Persisting to Object Storage",
|
||||
Self::Compacting => "Compacting",
|
||||
Self::CompactingObjectStore => "Compacting Object Store",
|
||||
Self::Dropping => "Dropping",
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,12 @@ pub enum Job {
|
|||
chunks: Vec<ChunkId>,
|
||||
},
|
||||
|
||||
/// Compact a set of object store chunks
|
||||
CompactObjectStoreChunks {
|
||||
partition: PartitionAddr,
|
||||
chunks: Vec<ChunkId>,
|
||||
},
|
||||
|
||||
/// Split and persist a set of chunks
|
||||
PersistChunks {
|
||||
partition: PartitionAddr,
|
||||
|
@ -47,6 +53,7 @@ impl Job {
|
|||
Self::Dummy { db_name, .. } => db_name.as_ref(),
|
||||
Self::WriteChunk { chunk, .. } => Some(&chunk.db_name),
|
||||
Self::CompactChunks { partition, .. } => Some(&partition.db_name),
|
||||
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.db_name),
|
||||
Self::PersistChunks { partition, .. } => Some(&partition.db_name),
|
||||
Self::DropChunk { chunk, .. } => Some(&chunk.db_name),
|
||||
Self::DropPartition { partition, .. } => Some(&partition.db_name),
|
||||
|
@ -60,6 +67,7 @@ impl Job {
|
|||
Self::Dummy { .. } => None,
|
||||
Self::WriteChunk { chunk, .. } => Some(&chunk.partition_key),
|
||||
Self::CompactChunks { partition, .. } => Some(&partition.partition_key),
|
||||
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.partition_key),
|
||||
Self::PersistChunks { partition, .. } => Some(&partition.partition_key),
|
||||
Self::DropChunk { chunk, .. } => Some(&chunk.partition_key),
|
||||
Self::DropPartition { partition, .. } => Some(&partition.partition_key),
|
||||
|
@ -73,6 +81,7 @@ impl Job {
|
|||
Self::Dummy { .. } => None,
|
||||
Self::WriteChunk { chunk, .. } => Some(&chunk.table_name),
|
||||
Self::CompactChunks { partition, .. } => Some(&partition.table_name),
|
||||
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.table_name),
|
||||
Self::PersistChunks { partition, .. } => Some(&partition.table_name),
|
||||
Self::DropChunk { chunk, .. } => Some(&chunk.table_name),
|
||||
Self::DropPartition { partition, .. } => Some(&partition.table_name),
|
||||
|
@ -86,6 +95,7 @@ impl Job {
|
|||
Self::Dummy { .. } => None,
|
||||
Self::WriteChunk { chunk, .. } => Some(vec![chunk.chunk_id]),
|
||||
Self::CompactChunks { chunks, .. } => Some(chunks.clone()),
|
||||
Self::CompactObjectStoreChunks { chunks, .. } => Some(chunks.clone()),
|
||||
Self::PersistChunks { chunks, .. } => Some(chunks.clone()),
|
||||
Self::DropChunk { chunk, .. } => Some(vec![chunk.chunk_id]),
|
||||
Self::DropPartition { .. } => None,
|
||||
|
@ -99,6 +109,9 @@ impl Job {
|
|||
Self::Dummy { .. } => "Dummy Job, for testing",
|
||||
Self::WriteChunk { .. } => "Writing chunk to Object Storage",
|
||||
Self::CompactChunks { .. } => "Compacting chunks to ReadBuffer",
|
||||
Self::CompactObjectStoreChunks { .. } => {
|
||||
"Compacting Object Store chunks to an Object Store chunk"
|
||||
}
|
||||
Self::PersistChunks { .. } => "Persisting chunks to object storage",
|
||||
Self::DropChunk { .. } => "Drop chunk from memory and (if persisted) from object store",
|
||||
Self::DropPartition { .. } => {
|
||||
|
@ -115,6 +128,9 @@ impl std::fmt::Display for Job {
|
|||
Job::Dummy { .. } => write!(f, "Job::Dummy"),
|
||||
Job::WriteChunk { chunk } => write!(f, "Job::WriteChunk({}))", chunk),
|
||||
Job::CompactChunks { partition, .. } => write!(f, "Job::CompactChunks({})", partition),
|
||||
Job::CompactObjectStoreChunks { partition, .. } => {
|
||||
write!(f, "Job::CompactObjectStoreChunks({})", partition)
|
||||
}
|
||||
Job::PersistChunks { partition, .. } => write!(f, "Job::PersistChunks({})", partition),
|
||||
Job::DropChunk { chunk } => write!(f, "Job::DropChunk({})", chunk),
|
||||
Job::DropPartition { partition } => write!(f, "Job::DropPartition({})", partition),
|
||||
|
|
|
@ -11,7 +11,8 @@ impl NonEmptyString {
|
|||
/// Create a new `NonEmptyString` from the provided `String`
|
||||
///
|
||||
/// Returns None if empty
|
||||
pub fn new(s: String) -> Option<Self> {
|
||||
pub fn new(s: impl Into<String>) -> Option<Self> {
|
||||
let s = s.into();
|
||||
match s.is_empty() {
|
||||
true => None,
|
||||
false => Some(Self(s.into_boxed_str())),
|
||||
|
|
|
@ -10,3 +10,4 @@ description = "Re-exports datafusion at a specific version"
|
|||
# Rename to workaround doctest bug
|
||||
# Turn off optional datafusion features (e.g. don't get support for crypo functions or avro)
|
||||
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="79f129d048667a4552e44ef740e1b1cf9de306a1", default-features = false, package = "datafusion" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -10,3 +10,4 @@ datafusion = { path = "../datafusion" }
|
|||
futures = "0.3"
|
||||
tokio = { version = "1.13", features = ["macros"] }
|
||||
tokio-stream = "0.1.8"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -13,3 +13,4 @@ ordered-float = "2"
|
|||
schema = { path = "../schema" }
|
||||
time = { path = "../time" }
|
||||
trace = { path = "../trace" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -15,6 +15,7 @@ regex = "1.4"
|
|||
serde = { version = "1.0", features = ["derive"] }
|
||||
tonic = "0.5"
|
||||
time = { path = "../time" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
data_types = { path = "../data_types" }
|
||||
|
|
|
@ -18,7 +18,6 @@ fn main() -> Result<()> {
|
|||
///
|
||||
/// Creates:
|
||||
///
|
||||
/// - `com.github.influxdata.idpe.storage.read.rs`
|
||||
/// - `influxdata.iox.delete.v1.rs`
|
||||
/// - `influxdata.iox.deployment.v1.rs`
|
||||
/// - `influxdata.iox.management.v1.rs`
|
||||
|
@ -31,7 +30,6 @@ fn main() -> Result<()> {
|
|||
fn generate_grpc_types(root: &Path) -> Result<()> {
|
||||
let delete_path = root.join("influxdata/iox/delete/v1");
|
||||
let deployment_path = root.join("influxdata/iox/deployment/v1");
|
||||
let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
|
||||
let management_path = root.join("influxdata/iox/management/v1");
|
||||
let predicate_path = root.join("influxdata/iox/predicate/v1");
|
||||
let preserved_catalog_path = root.join("influxdata/iox/preserved_catalog/v1");
|
||||
|
@ -43,7 +41,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
let proto_files = vec![
|
||||
delete_path.join("service.proto"),
|
||||
deployment_path.join("service.proto"),
|
||||
idpe_path.join("source.proto"),
|
||||
management_path.join("chunk.proto"),
|
||||
management_path.join("database_rules.proto"),
|
||||
management_path.join("jobs.proto"),
|
||||
|
@ -67,8 +64,8 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
router_path.join("shard.proto"),
|
||||
storage_path.join("predicate.proto"),
|
||||
storage_path.join("service.proto"),
|
||||
storage_path.join("source.proto"),
|
||||
storage_path.join("storage_common.proto"),
|
||||
storage_path.join("storage_common_idpe.proto"),
|
||||
storage_path.join("test.proto"),
|
||||
write_buffer_path.join("write_buffer.proto"),
|
||||
];
|
||||
|
@ -88,6 +85,7 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
".influxdata.iox.management.v1.Chunk.id",
|
||||
".influxdata.iox.management.v1.ClosePartitionChunkRequest.chunk_id",
|
||||
".influxdata.iox.management.v1.CompactChunks.chunks",
|
||||
".influxdata.iox.management.v1.CompactObjectStoreChunks.chunks",
|
||||
".influxdata.iox.management.v1.DropChunk.chunk_id",
|
||||
".influxdata.iox.management.v1.PersistChunks.chunks",
|
||||
".influxdata.iox.management.v1.WriteChunk.chunk_id",
|
||||
|
@ -114,7 +112,12 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
|
||||
pbjson_build::Builder::new()
|
||||
.register_descriptors(&descriptor_set)?
|
||||
.build(&[".influxdata", ".google.longrunning", ".google.rpc"])?;
|
||||
.build(&[
|
||||
".influxdata.iox",
|
||||
".influxdata.pbdata",
|
||||
".google.longrunning",
|
||||
".google.rpc",
|
||||
])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -42,6 +42,9 @@ enum ChunkLifecycleAction {
|
|||
|
||||
/// Chunk is about to be dropped from memory and (if persisted) from object store.
|
||||
CHUNK_LIFECYCLE_ACTION_DROPPING = 4;
|
||||
|
||||
/// Chunk is in the process of being compacted
|
||||
CHUNK_LIFECYCLE_ACTION_COMPACTING_OBJECT_STORE = 5;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ message OperationMetadata {
|
|||
PersistChunks persist_chunks = 11;
|
||||
DropChunk drop_chunk = 12;
|
||||
DropPartition drop_partition = 17;
|
||||
CompactObjectStoreChunks compact_object_store_chunks = 18;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,6 +92,23 @@ message CompactChunks {
|
|||
repeated bytes chunks = 5;
|
||||
}
|
||||
|
||||
// Compact chunks into a single chunk
|
||||
message CompactObjectStoreChunks {
|
||||
// name of the database
|
||||
string db_name = 1;
|
||||
|
||||
// partition key
|
||||
string partition_key = 2;
|
||||
|
||||
// table name
|
||||
string table_name = 3;
|
||||
|
||||
// chunk_id
|
||||
// UUID is stored as 16 bytes in big-endian order.
|
||||
repeated bytes chunks = 4;
|
||||
}
|
||||
|
||||
|
||||
// Split and write chunks to object store
|
||||
message PersistChunks {
|
||||
// name of the database
|
||||
|
|
|
@ -9,7 +9,6 @@ package influxdata.platform.storage;
|
|||
|
||||
import "google/protobuf/empty.proto";
|
||||
import "influxdata/platform/storage/storage_common.proto";
|
||||
import "influxdata/platform/storage/storage_common_idpe.proto";
|
||||
|
||||
service Storage {
|
||||
// ReadFilter performs a filter operation at storage
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
syntax = "proto3";
|
||||
package com.github.influxdata.idpe.storage.read;
|
||||
package influxdata.platform.storage.read;
|
||||
|
||||
message ReadSource {
|
||||
// OrgID specifies the organization identifier for this request.
|
|
@ -9,59 +9,91 @@ package influxdata.platform.storage;
|
|||
|
||||
import "google/protobuf/any.proto";
|
||||
import "influxdata/platform/storage/predicate.proto";
|
||||
import "influxdata/platform/storage/source.proto";
|
||||
|
||||
|
||||
message OffsetsResponse {
|
||||
message PartitionOffsetResponse {
|
||||
int64 id = 1;
|
||||
int64 offset = 2;
|
||||
}
|
||||
repeated PartitionOffsetResponse partitions = 1;
|
||||
}
|
||||
|
||||
enum TagKeyMetaNames {
|
||||
// option (gogoproto.goproto_enum_prefix) = false;
|
||||
|
||||
// TagKeyMetaNamesText means the tag keys for measurement and field will
|
||||
// be returned as _measurement and _field respectively.
|
||||
TagKeyMetaNamesText = 0;
|
||||
|
||||
// TagKeyMetaNames means the tag keys for measurement and field will
|
||||
// be returned as \x00 and \xff respectively.
|
||||
TagKeyMetaNamesBinary = 1;
|
||||
}
|
||||
|
||||
message ReadFilterRequest {
|
||||
google.protobuf.Any read_source = 1;
|
||||
TimestampRange range = 2;
|
||||
google.protobuf.Any ReadSource = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
// KeySort determines the ordering of series keys from the server.
|
||||
KeySort key_sort = 4;
|
||||
|
||||
// TagKeyMetaNames determines the key format used for the measurement and field
|
||||
// tags.
|
||||
TagKeyMetaNames tag_key_meta_names = 5;
|
||||
|
||||
enum KeySort {
|
||||
// option (gogoproto.goproto_enum_prefix) = false;
|
||||
|
||||
// KeySortUnspecified means the key order is unspecified.
|
||||
KeySortUnspecified = 0;
|
||||
|
||||
// KeySortAscending means the key order should be lexicographically ascending.
|
||||
//
|
||||
// NOTE: In order to preserve sort order, canonical tag keys are not
|
||||
// transformed from 0x00 → _measurement and 0xff → _field.
|
||||
KeySortAscending = 1;
|
||||
}
|
||||
}
|
||||
|
||||
message ReadGroupRequest {
|
||||
google.protobuf.Any read_source = 1;
|
||||
TimestampRange range = 2;
|
||||
google.protobuf.Any ReadSource = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
|
||||
enum Group {
|
||||
// option (gogoproto.goproto_enum_prefix) = false;
|
||||
|
||||
// GroupNone returns all series as a single group.
|
||||
// The single GroupFrame.TagKeys will be the union of all tag keys.
|
||||
GROUP_NONE = 0;
|
||||
GroupNone = 0;
|
||||
|
||||
// GroupBy returns a group for each unique value of the specified GroupKeys.
|
||||
GROUP_BY = 2;
|
||||
GroupBy = 2;
|
||||
}
|
||||
|
||||
// GroupKeys specifies a list of tag keys used to order the data.
|
||||
// It is dependent on the Group property to determine its behavior.
|
||||
repeated string group_keys = 4;
|
||||
repeated string GroupKeys = 4;
|
||||
|
||||
Group group = 5;
|
||||
Aggregate aggregate = 6;
|
||||
|
||||
// TODO(jlapacik): This field is only used in unit tests.
|
||||
// Specifically the two tests in group_resultset_test.go.
|
||||
// This field should be removed and the tests that depend
|
||||
// on it refactored.
|
||||
enum HintFlags {
|
||||
HINT_NONE = 0x00;
|
||||
HINT_NO_POINTS = 0x01;
|
||||
HINT_NO_SERIES = 0x02;
|
||||
// HintSchemaAllTime performs schema queries without using time ranges
|
||||
HINT_SCHEMA_ALL_TIME = 0x04;
|
||||
}
|
||||
fixed32 hints = 7;
|
||||
// Deprecated field only used in TSM storage-related tests.
|
||||
reserved "Hints";
|
||||
}
|
||||
|
||||
message Aggregate {
|
||||
enum AggregateType {
|
||||
NONE = 0;
|
||||
SUM = 1;
|
||||
COUNT = 2;
|
||||
MIN = 3;
|
||||
MAX = 4;
|
||||
FIRST = 5;
|
||||
LAST = 6;
|
||||
MEAN = 7;
|
||||
AggregateTypeNone = 0;
|
||||
AggregateTypeSum = 1;
|
||||
AggregateTypeCount = 2;
|
||||
AggregateTypeMin = 3;
|
||||
AggregateTypeMax = 4;
|
||||
AggregateTypeFirst = 5;
|
||||
AggregateTypeLast = 6;
|
||||
AggregateTypeMean = 7;
|
||||
}
|
||||
|
||||
AggregateType type = 1;
|
||||
|
@ -77,39 +109,39 @@ message Tag {
|
|||
// Response message for ReadFilter and ReadGroup
|
||||
message ReadResponse {
|
||||
enum FrameType {
|
||||
SERIES = 0;
|
||||
POINTS = 1;
|
||||
FrameTypeSeries = 0;
|
||||
FrameTypePoints = 1;
|
||||
}
|
||||
|
||||
enum DataType {
|
||||
FLOAT = 0;
|
||||
INTEGER = 1;
|
||||
UNSIGNED = 2;
|
||||
BOOLEAN = 3;
|
||||
STRING = 4;
|
||||
DataTypeFloat = 0;
|
||||
DataTypeInteger = 1;
|
||||
DataTypeUnsigned = 2;
|
||||
DataTypeBoolean = 3;
|
||||
DataTypeString = 4;
|
||||
}
|
||||
|
||||
message Frame {
|
||||
oneof data {
|
||||
GroupFrame group = 7;
|
||||
SeriesFrame series = 1;
|
||||
FloatPointsFrame float_points = 2;
|
||||
IntegerPointsFrame integer_points = 3;
|
||||
UnsignedPointsFrame unsigned_points = 4;
|
||||
BooleanPointsFrame boolean_points = 5;
|
||||
StringPointsFrame string_points = 6;
|
||||
FloatPointsFrame FloatPoints = 2;
|
||||
IntegerPointsFrame IntegerPoints = 3;
|
||||
UnsignedPointsFrame UnsignedPoints = 4;
|
||||
BooleanPointsFrame BooleanPoints = 5;
|
||||
StringPointsFrame StringPoints = 6;
|
||||
}
|
||||
}
|
||||
|
||||
message GroupFrame {
|
||||
// TagKeys
|
||||
repeated bytes tag_keys = 1;
|
||||
repeated bytes TagKeys = 1;
|
||||
// PartitionKeyVals is the values of the partition key for this group, order matching ReadGroupRequest.GroupKeys
|
||||
repeated bytes partition_key_vals = 2;
|
||||
repeated bytes PartitionKeyVals = 2;
|
||||
}
|
||||
|
||||
message SeriesFrame {
|
||||
repeated Tag tags = 1;
|
||||
repeated Tag tags = 1; // [(gogoproto.nullable) = false];
|
||||
DataType data_type = 2;
|
||||
}
|
||||
|
||||
|
@ -138,7 +170,7 @@ message ReadResponse {
|
|||
repeated string values = 2;
|
||||
}
|
||||
|
||||
repeated Frame frames = 1;
|
||||
repeated Frame frames = 1; // [(gogoproto.nullable) = false];
|
||||
}
|
||||
|
||||
message Capability {
|
||||
|
@ -165,32 +197,47 @@ message TimestampRange {
|
|||
|
||||
// TagKeysRequest is the request message for Storage.TagKeys.
|
||||
message TagKeysRequest {
|
||||
google.protobuf.Any tags_source = 1;
|
||||
TimestampRange range = 2;
|
||||
google.protobuf.Any TagsSource = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
}
|
||||
|
||||
// TagValuesRequest is the request message for Storage.TagValues.
|
||||
message TagValuesRequest {
|
||||
google.protobuf.Any tags_source = 1 ;
|
||||
google.protobuf.Any TagsSource = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
|
||||
// string tag_key = 4;
|
||||
// AAL changed from string --> bytes to handle \xff literals in Rust which are not valid UTF-8
|
||||
bytes tag_key = 4;
|
||||
}
|
||||
|
||||
message ReadSeriesCardinalityRequest {
|
||||
google.protobuf.Any ReadSeriesCardinalitySource = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
}
|
||||
|
||||
// Response message for Storage.TagKeys, Storage.TagValues Storage.MeasurementNames,
|
||||
// Storage.MeasurementTagKeys and Storage.MeasurementTagValues.
|
||||
message StringValuesResponse {
|
||||
repeated bytes values = 1;
|
||||
}
|
||||
|
||||
// Response message for Storage.TagValuesGroupedByMeasurementAndTagKey.
|
||||
message TagValuesResponse {
|
||||
string measurement = 1;
|
||||
string key = 2;
|
||||
repeated string values = 3;
|
||||
}
|
||||
|
||||
// Response message for Storage.SeriesCardinality
|
||||
message Int64ValuesResponse {
|
||||
repeated int64 values = 1;
|
||||
}
|
||||
|
||||
// MeasurementNamesRequest is the request message for Storage.MeasurementNames.
|
||||
message MeasurementNamesRequest {
|
||||
google.protobuf.Any source = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false]
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
}
|
||||
|
||||
|
@ -198,7 +245,7 @@ message MeasurementNamesRequest {
|
|||
message MeasurementTagKeysRequest {
|
||||
google.protobuf.Any source = 1;
|
||||
string measurement = 2;
|
||||
TimestampRange range = 3; // [(gogoproto.nullable) = false]
|
||||
TimestampRange range = 3; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 4;
|
||||
}
|
||||
|
||||
|
@ -222,12 +269,12 @@ message MeasurementFieldsRequest {
|
|||
// MeasurementFieldsResponse is the response message for Storage.MeasurementFields.
|
||||
message MeasurementFieldsResponse {
|
||||
enum FieldType {
|
||||
FLOAT = 0;
|
||||
INTEGER = 1;
|
||||
UNSIGNED = 2;
|
||||
STRING = 3;
|
||||
BOOLEAN = 4;
|
||||
UNDEFINED = 5;
|
||||
FieldTypeFloat = 0;
|
||||
FieldTypeInteger = 1;
|
||||
FieldTypeUnsigned = 2;
|
||||
FieldTypeString = 3;
|
||||
FieldTypeBoolean = 4;
|
||||
FieldTypeUndefined = 5;
|
||||
}
|
||||
|
||||
message MessageField {
|
||||
|
@ -236,11 +283,11 @@ message MeasurementFieldsResponse {
|
|||
sfixed64 timestamp = 3;
|
||||
}
|
||||
|
||||
repeated MessageField fields = 1;// [(gogoproto.nullable) = false];
|
||||
repeated MessageField fields = 1; // [(gogoproto.nullable) = false];
|
||||
}
|
||||
|
||||
message ReadWindowAggregateRequest {
|
||||
google.protobuf.Any read_source = 1;
|
||||
google.protobuf.Any ReadSource = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
int64 WindowEvery = 4;
|
||||
|
@ -249,6 +296,48 @@ message ReadWindowAggregateRequest {
|
|||
Window window = 7;
|
||||
}
|
||||
|
||||
message TagValuesGroupedByMeasurementAndTagKeyRequest {
|
||||
google.protobuf.Any source = 1;
|
||||
|
||||
// MeasurementPatterns holds the patterns to match the measurements
|
||||
// against (the "FROM" part of the SHOW TAG VALUES statement).
|
||||
repeated LiteralOrRegex MeasurementPatterns = 2;
|
||||
|
||||
// TagKeyPredicate holds a predicate for the tags to find values on.
|
||||
// (the "WITH KEY" part of the SHOW TAG VALUES statement.
|
||||
// It's in one of the forms:
|
||||
// OR(IDENT, OR(IDENT, ...))
|
||||
// EQ(IDENT)
|
||||
// NEQ(IDENT)
|
||||
// EQREGEX(REGEX)
|
||||
// NEQREGEX(REGEX)
|
||||
TagKeyPredicate TagKeyPredicate = 3;
|
||||
|
||||
// Condition holds any additional condition to evaluate on the results.
|
||||
Predicate Condition = 4;
|
||||
}
|
||||
|
||||
message TagKeyPredicate {
|
||||
oneof value {
|
||||
string Eq = 1;
|
||||
string Neq = 2;
|
||||
string EqRegex = 3;
|
||||
string NeqRegex = 4;
|
||||
StringList In = 5;
|
||||
}
|
||||
}
|
||||
|
||||
message StringList {
|
||||
repeated string Vals = 1;
|
||||
}
|
||||
|
||||
message LiteralOrRegex {
|
||||
oneof value {
|
||||
string literal_value = 1;
|
||||
string regex_value = 2;
|
||||
}
|
||||
}
|
||||
|
||||
message Window {
|
||||
Duration every = 1;
|
||||
Duration offset = 2;
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
// This file defines extensions to the InfluxDB storage gRPC common message types
|
||||
// that have not yet made it into influxdb.
|
||||
|
||||
// It is, effectively, the delta between these two files:
|
||||
// https://github.com/influxdata/influxdb/blob/master/storage/reads/datatypes/storage_common.proto
|
||||
// https://github.com/influxdata/idpe/blob/master/storage/storageproto/storage_common.proto
|
||||
|
||||
|
||||
syntax = "proto3";
|
||||
package influxdata.platform.storage;
|
||||
|
||||
import "google/protobuf/any.proto";
|
||||
import "influxdata/platform/storage/predicate.proto";
|
||||
import "influxdata/platform/storage/storage_common.proto";
|
||||
|
||||
message ReadSeriesCardinalityRequest {
|
||||
google.protobuf.Any read_series_cardinality_source = 1;
|
||||
TimestampRange range = 2; // [(gogoproto.nullable) = false];
|
||||
Predicate predicate = 3;
|
||||
}
|
||||
|
||||
// Response message for Storage.SeriesCardinality
|
||||
message Int64ValuesResponse {
|
||||
repeated int64 values = 1;
|
||||
}
|
|
@ -64,6 +64,7 @@ impl From<Option<ChunkLifecycleAction>> for management::ChunkLifecycleAction {
|
|||
match lifecycle_action {
|
||||
Some(ChunkLifecycleAction::Persisting) => Self::Persisting,
|
||||
Some(ChunkLifecycleAction::Compacting) => Self::Compacting,
|
||||
Some(ChunkLifecycleAction::CompactingObjectStore) => Self::CompactingObjectStore,
|
||||
Some(ChunkLifecycleAction::Dropping) => Self::Dropping,
|
||||
None => Self::Unspecified,
|
||||
}
|
||||
|
@ -153,6 +154,9 @@ impl TryFrom<management::ChunkLifecycleAction> for Option<ChunkLifecycleAction>
|
|||
management::ChunkLifecycleAction::Compacting => {
|
||||
Ok(Some(ChunkLifecycleAction::Compacting))
|
||||
}
|
||||
management::ChunkLifecycleAction::CompactingObjectStore => {
|
||||
Ok(Some(ChunkLifecycleAction::CompactingObjectStore))
|
||||
}
|
||||
management::ChunkLifecycleAction::Dropping => Ok(Some(ChunkLifecycleAction::Dropping)),
|
||||
management::ChunkLifecycleAction::Unspecified => Ok(None),
|
||||
}
|
||||
|
|
|
@ -27,6 +27,14 @@ impl From<Job> for management::operation_metadata::Job {
|
|||
chunks: chunks.into_iter().map(|chunk_id| chunk_id.into()).collect(),
|
||||
})
|
||||
}
|
||||
Job::CompactObjectStoreChunks { partition, chunks } => {
|
||||
Self::CompactObjectStoreChunks(management::CompactObjectStoreChunks {
|
||||
db_name: partition.db_name.to_string(),
|
||||
partition_key: partition.partition_key.to_string(),
|
||||
table_name: partition.table_name.to_string(),
|
||||
chunks: chunks.into_iter().map(|chunk_id| chunk_id.into()).collect(),
|
||||
})
|
||||
}
|
||||
Job::PersistChunks { partition, chunks } => {
|
||||
Self::PersistChunks(management::PersistChunks {
|
||||
db_name: partition.db_name.to_string(),
|
||||
|
|
|
@ -9,11 +9,11 @@
|
|||
pub mod influxdata {
|
||||
pub mod platform {
|
||||
pub mod storage {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.platform.storage.serde.rs"
|
||||
"/influxdata.platform.storage.read.rs"
|
||||
));
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
|
||||
|
||||
// Can't implement `Default` because `prost::Message` implements `Default`
|
||||
impl TimestampRange {
|
||||
|
@ -127,23 +127,6 @@ pub mod influxdata {
|
|||
}
|
||||
}
|
||||
|
||||
pub mod com {
|
||||
pub mod github {
|
||||
pub mod influxdata {
|
||||
pub mod idpe {
|
||||
pub mod storage {
|
||||
pub mod read {
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/com.github.influxdata.idpe.storage.read.rs"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Needed because of https://github.com/hyperium/tonic/issues/471
|
||||
pub mod grpc {
|
||||
pub mod health {
|
||||
|
@ -199,7 +182,6 @@ pub fn protobuf_type_url_eq(url: &str, protobuf_type: &str) -> bool {
|
|||
}
|
||||
|
||||
// TODO: Remove these (#2419)
|
||||
pub use com::github::influxdata::idpe::storage::read::*;
|
||||
pub use influxdata::platform::storage::*;
|
||||
|
||||
pub mod google;
|
||||
|
|
|
@ -9,9 +9,10 @@ bytes = "1.0"
|
|||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", features = ["stream", "json"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
snafu = "0.6.6"
|
||||
url = "2.1.1"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
mockito = "0.30"
|
||||
|
|
|
@ -70,7 +70,7 @@ pprof = { version = "^0.5", default-features = false, features = ["flamegraph",
|
|||
prost = "0.8"
|
||||
rustyline = { version = "9.0", default-features = false }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
serde_urlencoded = "0.7.0"
|
||||
snafu = "0.6.9"
|
||||
structopt = "0.3.25"
|
||||
|
@ -88,6 +88,7 @@ uuid = { version = "0.8", features = ["v4"] }
|
|||
# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
|
||||
tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
|
||||
heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -220,6 +220,7 @@ where
|
|||
read_source: _read_source,
|
||||
range,
|
||||
predicate,
|
||||
..
|
||||
} = read_filter_request;
|
||||
|
||||
info!(%db_name, ?range, predicate=%predicate.loggable(),"read filter");
|
||||
|
@ -251,15 +252,10 @@ where
|
|||
group_keys,
|
||||
group,
|
||||
aggregate,
|
||||
hints,
|
||||
} = read_group_request;
|
||||
|
||||
info!(%db_name, ?range, ?group_keys, ?group, ?aggregate,predicate=%predicate.loggable(),"read_group");
|
||||
|
||||
if hints != 0 {
|
||||
InternalHintsFieldNotSupported { hints }.fail()?
|
||||
}
|
||||
|
||||
let aggregate_string = format!(
|
||||
"aggregate: {:?}, group: {:?}, group_keys: {:?}",
|
||||
aggregate, group, group_keys
|
||||
|
@ -1772,6 +1768,7 @@ mod tests {
|
|||
read_source: source.clone(),
|
||||
range: Some(make_timestamp_range(0, 10000)),
|
||||
predicate: Some(make_state_ma_predicate()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let frames = fixture.storage_client.read_filter(request).await.unwrap();
|
||||
|
@ -1812,6 +1809,7 @@ mod tests {
|
|||
read_source: source.clone(),
|
||||
range: None,
|
||||
predicate: None,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Note we don't set the response on the test database, so we expect an error
|
||||
|
@ -1855,7 +1853,6 @@ mod tests {
|
|||
aggregate: Some(Aggregate {
|
||||
r#type: aggregate::AggregateType::Sum as i32,
|
||||
}),
|
||||
hints: 0,
|
||||
};
|
||||
|
||||
let frames = fixture.storage_client.read_group(request).await.unwrap();
|
||||
|
@ -1890,34 +1887,6 @@ mod tests {
|
|||
|
||||
let group = generated_types::read_group_request::Group::By as i32;
|
||||
|
||||
// ---
|
||||
// test error hit in request processing
|
||||
// ---
|
||||
let request = ReadGroupRequest {
|
||||
read_source: source.clone(),
|
||||
range: None,
|
||||
predicate: None,
|
||||
group_keys: vec!["tag1".into()],
|
||||
group,
|
||||
aggregate: Some(Aggregate {
|
||||
r#type: aggregate::AggregateType::Sum as i32,
|
||||
}),
|
||||
hints: 42,
|
||||
};
|
||||
|
||||
let response_string = fixture
|
||||
.storage_client
|
||||
.read_group(request)
|
||||
.await
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
assert_contains!(
|
||||
response_string,
|
||||
"Unexpected hint value on read_group request. Expected 0, got 42"
|
||||
);
|
||||
|
||||
grpc_request_metric_has_count(&fixture, "ReadGroup", "server_error", 1);
|
||||
|
||||
// ---
|
||||
// test error returned in database processing
|
||||
// ---
|
||||
|
@ -1930,7 +1899,6 @@ mod tests {
|
|||
aggregate: Some(Aggregate {
|
||||
r#type: aggregate::AggregateType::Sum as i32,
|
||||
}),
|
||||
hints: 0,
|
||||
};
|
||||
|
||||
// Note we don't set the response on the test database, so we expect an error
|
||||
|
|
|
@ -1349,7 +1349,7 @@ async fn test_get_server_status_db_error() {
|
|||
// create valid owner info but malformed DB rules that will put DB in an error state
|
||||
let my_db_uuid = Uuid::new_v4();
|
||||
let mut path = server_fixture.dir().to_path_buf();
|
||||
path.push("42");
|
||||
path.push("dbs");
|
||||
path.push(my_db_uuid.to_string());
|
||||
std::fs::create_dir_all(path.clone()).unwrap();
|
||||
let mut owner_info_path = path.clone();
|
||||
|
@ -1360,11 +1360,13 @@ async fn test_get_server_status_db_error() {
|
|||
|
||||
// create the server config listing the ownership of this database
|
||||
let mut path = server_fixture.dir().to_path_buf();
|
||||
path.push("nodes");
|
||||
path.push("42");
|
||||
std::fs::create_dir_all(path.clone()).unwrap();
|
||||
path.push("config.pb");
|
||||
|
||||
let data = ServerConfig {
|
||||
databases: vec![(String::from("my_db"), format!("42/{}", my_db_uuid))]
|
||||
databases: vec![(String::from("my_db"), format!("dbs/{}", my_db_uuid))]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
};
|
||||
|
|
|
@ -63,6 +63,7 @@ async fn read_filter_endpoint(storage_client: &mut StorageClient<Connection>, sc
|
|||
read_source,
|
||||
range,
|
||||
predicate,
|
||||
..Default::default()
|
||||
});
|
||||
let read_response = storage_client
|
||||
.read_filter(read_filter_request)
|
||||
|
@ -316,6 +317,7 @@ pub async fn regex_operator_test() {
|
|||
end: 2001, // include all data
|
||||
}),
|
||||
predicate: Some(make_regex_match_predicate("host", "^b.+")),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let expected_frames = vec![
|
||||
|
@ -391,7 +393,6 @@ async fn test_read_group_none_agg() {
|
|||
aggregate: Some(Aggregate {
|
||||
r#type: AggregateType::None as i32,
|
||||
}),
|
||||
hints: 0,
|
||||
};
|
||||
|
||||
let expected_group_frames = vec![
|
||||
|
@ -442,7 +443,6 @@ async fn test_read_group_none_agg_with_predicate() {
|
|||
aggregate: Some(Aggregate {
|
||||
r#type: AggregateType::None as i32,
|
||||
}),
|
||||
hints: 0,
|
||||
};
|
||||
|
||||
let expected_group_frames = vec![
|
||||
|
@ -488,7 +488,6 @@ async fn test_read_group_sum_agg() {
|
|||
aggregate: Some(Aggregate {
|
||||
r#type: AggregateType::Sum as i32,
|
||||
}),
|
||||
hints: 0,
|
||||
};
|
||||
|
||||
let expected_group_frames = vec![
|
||||
|
@ -541,7 +540,6 @@ async fn test_read_group_count_agg() {
|
|||
aggregate: Some(Aggregate {
|
||||
r#type: AggregateType::Count as i32,
|
||||
}),
|
||||
hints: 0,
|
||||
};
|
||||
|
||||
let expected_group_frames = vec![
|
||||
|
@ -595,7 +593,6 @@ async fn test_read_group_last_agg() {
|
|||
aggregate: Some(Aggregate {
|
||||
r#type: AggregateType::Last as i32,
|
||||
}),
|
||||
hints: 0,
|
||||
};
|
||||
|
||||
let expected_group_frames = vec![
|
||||
|
|
|
@ -85,6 +85,7 @@ pub async fn test_tracing_storage_api() {
|
|||
read_source,
|
||||
range,
|
||||
predicate,
|
||||
..Default::default()
|
||||
});
|
||||
let mut storage_client = StorageClient::new(server_fixture.grpc_channel());
|
||||
let read_response = storage_client
|
||||
|
|
|
@ -27,7 +27,7 @@ mutable_batch_pb = { path = "../mutable_batch_pb", optional = true }
|
|||
prost = "0.8"
|
||||
rand = "0.8.3"
|
||||
serde = "1.0.128"
|
||||
serde_json = { version = "1.0.70", optional = true }
|
||||
serde_json = { version = "1.0.71", optional = true }
|
||||
thiserror = "1.0.30"
|
||||
tonic = { version = "0.5.0" }
|
||||
uuid = { version = "0.8", features = ["v4"] }
|
||||
|
|
|
@ -9,6 +9,7 @@ nom = "7"
|
|||
smallvec = "1.7.0"
|
||||
snafu = "0.6.2"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -10,5 +10,6 @@ generated_types = { path = "../generated_types" }
|
|||
prost = "0.8"
|
||||
tonic = { version = "0.5.0" }
|
||||
futures-util = { version = "0.3.1" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -21,7 +21,6 @@ use std::collections::HashMap;
|
|||
|
||||
/// Re-export generated_types
|
||||
pub mod generated_types {
|
||||
pub use generated_types::com::github::influxdata::idpe::storage::read::*;
|
||||
pub use generated_types::influxdata::platform::storage::*;
|
||||
}
|
||||
|
||||
|
@ -97,8 +96,7 @@ impl Client {
|
|||
.encode(&mut d)
|
||||
.expect("encoded read source appropriately");
|
||||
Any {
|
||||
type_url: "type.googleapis.com/com.github.influxdata.idpe.storage.read.ReadSource"
|
||||
.to_string(),
|
||||
type_url: "type.googleapis.com/influxdata.platform.storage.read.ReadSource".to_string(),
|
||||
value: d.freeze(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ integer-encoding = "3.0.2"
|
|||
snafu = "0.6.2"
|
||||
snap = "1.0.0"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
flate2 = "1.0"
|
||||
|
|
|
@ -10,6 +10,7 @@ readme = "README.md"
|
|||
parking_lot = "0.11"
|
||||
time = { path = "../time" }
|
||||
tokio = { version = "1.13", features = ["sync"] }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
futures = "0.3"
|
||||
|
|
|
@ -10,7 +10,7 @@ chrono = "0.4.13"
|
|||
chrono-english = "0.1.4"
|
||||
clap = "2.33.1"
|
||||
futures = "0.3.5"
|
||||
handlebars = "4.1.4"
|
||||
handlebars = "4.1.5"
|
||||
humantime = "2.1.0"
|
||||
data_types = { path = "../data_types" }
|
||||
generated_types = { path = "../generated_types" }
|
||||
|
@ -19,12 +19,12 @@ influxdb_iox_client = { path = "../influxdb_iox_client" }
|
|||
itertools = "0.10.0"
|
||||
rand = { version = "0.8.3", features = ["small_rng"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
snafu = "0.6.8"
|
||||
tokio = { version = "1.13", features = ["macros", "rt-multi-thread"] }
|
||||
toml = "0.5.6"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3.1"
|
||||
tracing-subscriber = "0.3.2"
|
||||
uuid = { version = "0.8.1", default_features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -14,6 +14,7 @@ snafu = "0.6"
|
|||
tokio = { version = "1.13", features = ["macros", "time"] }
|
||||
tokio-stream = "0.1"
|
||||
uuid = { version = "0.8", features = ["serde", "v4"] }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -59,9 +59,23 @@ pub struct IoxObjectStore {
|
|||
impl IoxObjectStore {
|
||||
/// Get the data for the server config to determine the names and locations of the databases
|
||||
/// that this server owns.
|
||||
///
|
||||
/// TEMPORARY: Server config used to be at the top level instead of beneath `/nodes/`. Until
|
||||
/// all deployments have transitioned, check both locations before reporting that the server
|
||||
/// config is not found.
|
||||
pub async fn get_server_config_file(inner: &ObjectStore, server_id: ServerId) -> Result<Bytes> {
|
||||
let path = paths::server_config_path(inner, server_id);
|
||||
let mut stream = inner.get(&path).await?;
|
||||
let mut stream = match inner.get(&path).await {
|
||||
Err(object_store::Error::NotFound { .. }) => {
|
||||
use object_store::path::ObjectStorePath;
|
||||
let mut legacy_path = inner.new_path();
|
||||
legacy_path.push_dir(server_id.to_string());
|
||||
legacy_path.set_file_name(paths::SERVER_CONFIG_FILE_NAME);
|
||||
|
||||
inner.get(&legacy_path).await
|
||||
}
|
||||
other => other,
|
||||
}?;
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
while let Some(buf) = stream.next().await {
|
||||
|
|
|
@ -15,13 +15,15 @@ pub mod transaction_file;
|
|||
use transaction_file::TransactionFilePath;
|
||||
|
||||
pub(crate) const ALL_DATABASES_DIRECTORY: &str = "dbs";
|
||||
const SERVER_CONFIG_FILE_NAME: &str = "config.pb";
|
||||
const ALL_SERVERS_DIRECTORY: &str = "nodes";
|
||||
pub(crate) const SERVER_CONFIG_FILE_NAME: &str = "config.pb";
|
||||
const DATABASE_OWNER_FILE_NAME: &str = "owner.pb";
|
||||
|
||||
/// The path to the server file containing the list of databases this server owns.
|
||||
// TODO: this is in the process of replacing all_databases_path for the floating databases design
|
||||
pub(crate) fn server_config_path(object_store: &ObjectStore, server_id: ServerId) -> Path {
|
||||
let mut path = object_store.new_path();
|
||||
path.push_dir(ALL_SERVERS_DIRECTORY);
|
||||
path.push_dir(server_id.to_string());
|
||||
path.set_file_name(SERVER_CONFIG_FILE_NAME);
|
||||
path
|
||||
|
|
|
@ -15,6 +15,7 @@ parking_lot = "0.11"
|
|||
time = { path = "../time" }
|
||||
tokio = { version = "1.13", features = ["macros", "time"] }
|
||||
tracker = { path = "../tracker" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1.13", features = ["macros", "time", "rt"] }
|
||||
|
|
|
@ -79,6 +79,12 @@ pub trait LockablePartition: Sized + std::fmt::Display {
|
|||
chunks: Vec<LifecycleWriteGuard<'_, <Self::Chunk as LockableChunk>::Chunk, Self::Chunk>>,
|
||||
) -> Result<TaskTracker<<Self::Chunk as LockableChunk>::Job>, Self::Error>;
|
||||
|
||||
/// Compact object store chunks into a single object store chunk
|
||||
fn compact_object_store_chunks(
|
||||
partition: LifecycleWriteGuard<'_, Self::Partition, Self>,
|
||||
chunks: Vec<LifecycleWriteGuard<'_, <Self::Chunk as LockableChunk>::Chunk, Self::Chunk>>,
|
||||
) -> Result<TaskTracker<<Self::Chunk as LockableChunk>::Job>, Self::Error>;
|
||||
|
||||
/// Returns a PersistHandle for the provided partition, and the
|
||||
/// timestamp up to which to to flush
|
||||
///
|
||||
|
|
|
@ -908,6 +908,13 @@ mod tests {
|
|||
Ok(db.registry.lock().complete(()))
|
||||
}
|
||||
|
||||
fn compact_object_store_chunks(
|
||||
_partition: LifecycleWriteGuard<'_, TestPartition, Self>,
|
||||
_chunks: Vec<LifecycleWriteGuard<'_, TestChunk, Self::Chunk>>,
|
||||
) -> Result<TaskTracker<()>, Self::Error> {
|
||||
unimplemented!("The test does not need compact os chunks");
|
||||
}
|
||||
|
||||
fn prepare_persist(
|
||||
partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>,
|
||||
_force: bool,
|
||||
|
|
|
@ -8,6 +8,7 @@ edition = "2021"
|
|||
[dependencies] # In alphabetical order
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
tracing-subscriber = "0.3"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
once_cell = { version = "1.4.0", features = ["parking_lot"] }
|
||||
|
|
|
@ -7,5 +7,6 @@ edition = "2021"
|
|||
[dependencies] # In alphabetical order
|
||||
|
||||
parking_lot = "0.11"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
|
|
|
@ -9,6 +9,7 @@ edition = "2021"
|
|||
observability_deps = { path = "../observability_deps" }
|
||||
metric = { path = "../metric" }
|
||||
prometheus = { version = "0.13", default-features = false }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -13,6 +13,7 @@ schema = { path = "../schema" }
|
|||
snafu = "0.6"
|
||||
hashbrown = "0.11"
|
||||
itertools = "0.10"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.8"
|
||||
|
|
|
@ -10,6 +10,7 @@ influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
|||
mutable_batch = { path = "../mutable_batch" }
|
||||
schema = { path = "../schema" }
|
||||
snafu = "0.6"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
|
|
|
@ -12,6 +12,7 @@ hashbrown = "0.11"
|
|||
mutable_batch = { path = "../mutable_batch" }
|
||||
schema = { path = "../schema" }
|
||||
snafu = "0.6"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
|
|
|
@ -14,6 +14,7 @@ mutable_batch_lp = { path = "../mutable_batch_lp" }
|
|||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.11.2"
|
||||
snafu = "0.6.2"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
tokio = { version = "1.13", features = ["macros"] }
|
||||
|
|
|
@ -31,6 +31,7 @@ reqwest = { version = "0.11", optional = true }
|
|||
# Filesystem integration
|
||||
walkdir = "2"
|
||||
tempfile = "3.1.0"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[features]
|
||||
azure = ["azure_core", "azure_storage", "indexmap", "reqwest"]
|
||||
|
|
|
@ -7,3 +7,4 @@ description = "Observability ecosystem dependencies for InfluxDB IOx, to ensure
|
|||
|
||||
[dependencies] # In alphabetical order
|
||||
tracing = { version = "0.1", features = ["max_level_trace", "release_max_level_debug"] }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -11,6 +11,7 @@ schema = { path = "../schema" }
|
|||
snafu = "0.6.2"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parquet = "6.0"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
criterion = "0.3.3"
|
||||
|
|
|
@ -6,3 +6,4 @@ edition = "2021"
|
|||
|
||||
[dependencies] # In alphabetical order
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -33,3 +33,4 @@ tokio = { version = "1.13", features = ["macros", "rt", "rt-multi-thread", "sync
|
|||
tokio-stream = "0.1"
|
||||
uuid = { version = "0.8", features = ["serde", "v4"] }
|
||||
zstd = "0.9"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -155,7 +155,7 @@ impl CatalogState for TracerCatalogState {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_helpers::{make_config, new_empty};
|
||||
use parquet_file::test_utils::{chunk_addr, make_metadata, TestSize};
|
||||
use parquet_file::test_utils::generator::ChunkGenerator;
|
||||
use std::{collections::HashSet, sync::Arc};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
|
@ -176,6 +176,7 @@ mod tests {
|
|||
async fn test_cleanup_rules() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
|
||||
|
@ -186,36 +187,20 @@ mod tests {
|
|||
let mut transaction = catalog.open_transaction().await;
|
||||
|
||||
// an ordinary tracked parquet file => keep
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
|
||||
let metadata = Arc::new(metadata);
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata,
|
||||
};
|
||||
|
||||
transaction.add_parquet(&info);
|
||||
paths_keep.push(info.path);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
paths_keep.push(chunk.path().clone());
|
||||
|
||||
// another ordinary tracked parquet file that was added and removed => keep (for time
|
||||
// travel)
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(2), TestSize::Full).await;
|
||||
let metadata = Arc::new(metadata);
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata,
|
||||
};
|
||||
transaction.add_parquet(&info);
|
||||
transaction.remove_parquet(&info.path);
|
||||
paths_keep.push(info.path);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.remove_parquet(chunk.path());
|
||||
paths_keep.push(chunk.path().clone());
|
||||
|
||||
// an untracked parquet file => delete
|
||||
let (path, _md) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
|
||||
paths_delete.push(path);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
paths_delete.push(chunk.path().clone());
|
||||
|
||||
transaction.commit().await.unwrap();
|
||||
}
|
||||
|
@ -224,6 +209,7 @@ mod tests {
|
|||
let files = get_unreferenced_parquet_files(&catalog, 1_000)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
delete_files(&catalog, &files).await.unwrap();
|
||||
|
||||
// deleting a second time should just work
|
||||
|
@ -243,39 +229,33 @@ mod tests {
|
|||
async fn test_cleanup_with_parallel_transaction() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
let lock: RwLock<()> = Default::default();
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
|
||||
// try multiple times to provoke a conflict
|
||||
for i in 0..100 {
|
||||
for i in 1..100 {
|
||||
// Every so often try to create a file with the same ChunkAddr beforehand. This should
|
||||
// not trick the cleanup logic to remove the actual file because file paths contains a
|
||||
// UUIDv4 part.
|
||||
if i % 2 == 0 {
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
|
||||
generator.generate_id(i).await;
|
||||
}
|
||||
|
||||
let (path, _) = tokio::join!(
|
||||
let (chunk, _) = tokio::join!(
|
||||
async {
|
||||
let guard = lock.read().await;
|
||||
let (path, md) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
|
||||
|
||||
let metadata = Arc::new(md);
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata,
|
||||
};
|
||||
let (chunk, _) = generator.generate_id(i).await;
|
||||
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
transaction.add_parquet(&info);
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.commit().await.unwrap();
|
||||
|
||||
drop(guard);
|
||||
|
||||
info.path
|
||||
chunk
|
||||
},
|
||||
async {
|
||||
let guard = lock.write().await;
|
||||
|
@ -289,7 +269,7 @@ mod tests {
|
|||
);
|
||||
|
||||
let all_files = list_all_files(iox_object_store).await;
|
||||
assert!(dbg!(all_files).contains(dbg!(&path)));
|
||||
assert!(dbg!(all_files).contains(dbg!(chunk.path())));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,20 +277,15 @@ mod tests {
|
|||
async fn test_cleanup_max_files() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
|
||||
// create some files
|
||||
let mut to_remove = HashSet::default();
|
||||
for chunk_id in 0..3 {
|
||||
let (path, _md) = make_metadata(
|
||||
iox_object_store,
|
||||
"foo",
|
||||
chunk_addr(chunk_id),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
to_remove.insert(path);
|
||||
for _ in 0..3 {
|
||||
let (chunk, _) = generator.generate().await;
|
||||
to_remove.insert(chunk.path().clone());
|
||||
}
|
||||
|
||||
// run clean-up
|
||||
|
|
|
@ -1064,7 +1064,10 @@ mod tests {
|
|||
use std::vec;
|
||||
|
||||
use bytes::Bytes;
|
||||
use parquet_file::test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize};
|
||||
use data_types::chunk_metadata::ChunkAddr;
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use parquet_file::test_utils::generator::ChunkGenerator;
|
||||
use parquet_file::test_utils::make_iox_object_store;
|
||||
|
||||
use super::*;
|
||||
use crate::test_helpers::{
|
||||
|
@ -1642,6 +1645,7 @@ mod tests {
|
|||
async fn test_checkpoint() {
|
||||
let config = make_config().await;
|
||||
let mut trace = assert_single_catalog_inmem_works(config.clone()).await;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(&config.iox_object_store));
|
||||
|
||||
// re-open catalog
|
||||
let (catalog, mut state) = load_ok(config.clone()).await.unwrap();
|
||||
|
@ -1659,21 +1663,10 @@ mod tests {
|
|||
|
||||
// create another transaction on-top that adds a file (this transaction will be required to load the full state)
|
||||
{
|
||||
let addr = chunk_addr(1337);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"foo",
|
||||
addr.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(1337).await;
|
||||
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
state.insert(info.clone()).unwrap();
|
||||
transaction.add_parquet(&info);
|
||||
let ckpt_handle = transaction.commit().await.unwrap();
|
||||
|
@ -1713,6 +1706,7 @@ mod tests {
|
|||
async fn test_delete_predicates() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
let mut state = TestCatalogState::default();
|
||||
|
@ -1722,16 +1716,11 @@ mod tests {
|
|||
|
||||
// create 3 chunks
|
||||
let mut chunk_addrs = vec![];
|
||||
for id in 0..3 {
|
||||
let chunk_addr = chunk_addr(id);
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr.clone(), TestSize::Full)
|
||||
.await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
for _ in 0..3 {
|
||||
let (chunk, metadata) = generator.generate().await;
|
||||
let chunk_addr = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
|
@ -1819,6 +1808,29 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// Assert that set of parquet files tracked by a catalog are identical to the given sorted list.
|
||||
fn assert_catalog_chunks(state: &TestCatalogState, expected: &[ParquetChunk]) {
|
||||
let actual = get_catalog_parquet_files(state);
|
||||
let mut expected: Vec<_> = expected.iter().collect();
|
||||
expected.sort_by(|a, b| a.path().cmp(b.path()));
|
||||
|
||||
for ((actual_path, actual_md), chunk) in actual.iter().zip(expected.iter()) {
|
||||
assert_eq!(actual_path, chunk.path());
|
||||
|
||||
let actual_md = actual_md.decode().unwrap();
|
||||
|
||||
let actual_schema = actual_md.read_schema().unwrap();
|
||||
let expected_schema = chunk.schema();
|
||||
assert_eq!(actual_schema, expected_schema);
|
||||
|
||||
// NOTE: the actual table name is not important here as long as it is the same for both calls, since it is
|
||||
// only used to generate out statistics struct (not to read / dispatch anything).
|
||||
let actual_stats = actual_md.read_statistics(&actual_schema).unwrap();
|
||||
let expected_stats = &chunk.table_summary().columns;
|
||||
assert_eq!(&actual_stats, expected_stats);
|
||||
}
|
||||
}
|
||||
|
||||
async fn checked_delete(iox_object_store: &IoxObjectStore, path: &TransactionFilePath) {
|
||||
// issue full GET operation to check if object is preset
|
||||
iox_object_store
|
||||
|
@ -1872,6 +1884,7 @@ mod tests {
|
|||
|
||||
async fn assert_single_catalog_inmem_works(config: PreservedCatalogConfig) -> TestTrace {
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
let mut state = TestCatalogState::default();
|
||||
|
||||
|
@ -1889,102 +1902,56 @@ mod tests {
|
|||
{
|
||||
let mut t = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "bar", chunk_addr(1), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "bar", chunk_addr(2), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
for _ in 0..4 {
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
expected.push(chunk);
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
}
|
||||
|
||||
t.commit().await.unwrap();
|
||||
}
|
||||
assert_eq!(catalog.revision_counter(), 1);
|
||||
assert_catalog_parquet_files(&state, &expected);
|
||||
assert_catalog_chunks(&state, &expected);
|
||||
trace.record(&catalog, &state, false);
|
||||
|
||||
// modify catalog with examples
|
||||
{
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(4), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
expected.push(chunk);
|
||||
|
||||
let mut t = catalog.open_transaction().await;
|
||||
|
||||
// "real" modifications
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, _) = expected.remove(0);
|
||||
state.remove(&path).unwrap();
|
||||
t.remove_parquet(&path);
|
||||
let chunk = expected.remove(0);
|
||||
state.remove(chunk.path()).unwrap();
|
||||
t.remove_parquet(chunk.path());
|
||||
|
||||
t.commit().await.unwrap();
|
||||
}
|
||||
assert_eq!(catalog.revision_counter(), 2);
|
||||
assert_catalog_parquet_files(&state, &expected);
|
||||
assert_catalog_chunks(&state, &expected);
|
||||
trace.record(&catalog, &state, false);
|
||||
|
||||
// uncommitted modifications have no effect
|
||||
{
|
||||
let mut t = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
|
||||
t.add_parquet(&info);
|
||||
t.remove_parquet(&expected[0].0);
|
||||
t.remove_parquet(expected[0].path());
|
||||
|
||||
// NO commit here!
|
||||
}
|
||||
assert_eq!(catalog.revision_counter(), 2);
|
||||
assert_catalog_parquet_files(&state, &expected);
|
||||
assert_catalog_chunks(&state, &expected);
|
||||
trace.record(&catalog, &state, true);
|
||||
|
||||
trace
|
||||
|
|
|
@ -222,7 +222,7 @@ impl Debug for Metadata {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::{core::PreservedCatalog, interface::CatalogParquetInfo, test_helpers::make_config};
|
||||
use parquet_file::test_utils::{chunk_addr, make_metadata, TestSize};
|
||||
use parquet_file::test_utils::generator::{ChunkGenerator, GeneratorConfig};
|
||||
use time::Time;
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -235,21 +235,15 @@ mod tests {
|
|||
.with_time_provider(time_provider);
|
||||
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
generator.set_config(GeneratorConfig::Simple);
|
||||
|
||||
// build catalog with some data
|
||||
let catalog = PreservedCatalog::new_empty(config.clone()).await.unwrap();
|
||||
{
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
transaction.add_parquet(&info);
|
||||
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.commit().await.unwrap();
|
||||
}
|
||||
|
||||
|
@ -304,11 +298,11 @@ File {
|
|||
"table1",
|
||||
"part1",
|
||||
],
|
||||
file_name: "00000000-0000-0000-0000-000000000000.parquet",
|
||||
file_name: "00000000-0000-0000-0000-000000000001.parquet",
|
||||
},
|
||||
),
|
||||
file_size_bytes: 33,
|
||||
metadata: b"metadata omitted (937 bytes)",
|
||||
file_size_bytes: 3052,
|
||||
metadata: b"metadata omitted (935 bytes)",
|
||||
},
|
||||
),
|
||||
),
|
||||
|
@ -352,21 +346,15 @@ File {
|
|||
.with_fixed_uuid(Uuid::nil())
|
||||
.with_time_provider(time_provider);
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
generator.set_config(GeneratorConfig::Simple);
|
||||
|
||||
// build catalog with some data
|
||||
let catalog = PreservedCatalog::new_empty(config.clone()).await.unwrap();
|
||||
{
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
transaction.add_parquet(&info);
|
||||
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.commit().await.unwrap();
|
||||
}
|
||||
|
||||
|
@ -426,11 +414,11 @@ File {
|
|||
"table1",
|
||||
"part1",
|
||||
],
|
||||
file_name: "00000000-0000-0000-0000-000000000000.parquet",
|
||||
file_name: "00000000-0000-0000-0000-000000000001.parquet",
|
||||
},
|
||||
),
|
||||
file_size_bytes: 33,
|
||||
metadata: b"metadata omitted (937 bytes)",
|
||||
file_size_bytes: 3052,
|
||||
metadata: b"metadata omitted (935 bytes)",
|
||||
},
|
||||
),
|
||||
),
|
||||
|
@ -460,7 +448,7 @@ File {
|
|||
table_name: "table1",
|
||||
partition_key: "part1",
|
||||
chunk_id: ChunkId(
|
||||
0,
|
||||
1,
|
||||
),
|
||||
partition_checkpoint: PartitionCheckpoint {
|
||||
table_name: "table1",
|
||||
|
@ -500,7 +488,7 @@ File {
|
|||
},
|
||||
},
|
||||
chunk_order: ChunkOrder(
|
||||
5,
|
||||
1,
|
||||
),
|
||||
},
|
||||
),
|
||||
|
|
|
@ -7,6 +7,7 @@ use std::{
|
|||
use data_types::chunk_metadata::{ChunkAddr, ChunkId};
|
||||
use data_types::delete_predicate::DeletePredicate;
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use snafu::Snafu;
|
||||
|
||||
use parquet_file::metadata::IoxParquetMetaData;
|
||||
|
@ -24,6 +25,17 @@ pub struct CatalogParquetInfo {
|
|||
pub metadata: Arc<IoxParquetMetaData>,
|
||||
}
|
||||
|
||||
impl CatalogParquetInfo {
|
||||
/// Creates a [`CatalogParquetInfo`] from a [`ParquetChunk`]
|
||||
pub fn from_chunk(chunk: &ParquetChunk) -> Self {
|
||||
Self {
|
||||
path: chunk.path().clone(),
|
||||
file_size_bytes: chunk.file_size_bytes(),
|
||||
metadata: chunk.parquet_metadata(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Same as [ChunkAddr] but w/o the database part.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct ChunkAddrWithoutDatabase {
|
||||
|
|
|
@ -10,11 +10,14 @@ use crate::{
|
|||
},
|
||||
};
|
||||
use data_types::delete_predicate::{DeleteExpr, DeletePredicate, Op, Scalar};
|
||||
use data_types::{chunk_metadata::ChunkId, timestamp::TimestampRange};
|
||||
use data_types::{
|
||||
chunk_metadata::{ChunkAddr, ChunkId},
|
||||
timestamp::TimestampRange,
|
||||
};
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath, TransactionFilePath};
|
||||
use parquet_file::{
|
||||
metadata::IoxParquetMetaData,
|
||||
test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize},
|
||||
chunk::ParquetChunk,
|
||||
test_utils::{generator::ChunkGenerator, make_iox_object_store},
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
use std::{
|
||||
|
@ -259,158 +262,107 @@ where
|
|||
F: Fn(&S) -> CheckpointData + Send,
|
||||
{
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
// The expected state of the catalog
|
||||
let mut expected_files: HashMap<ChunkId, (ParquetFilePath, Arc<IoxParquetMetaData>)> =
|
||||
HashMap::new();
|
||||
let mut expected_chunks: HashMap<u32, ParquetChunk> = HashMap::new();
|
||||
let mut expected_predicates: HashMap<Arc<DeletePredicate>, HashSet<ChunkAddrWithoutDatabase>> =
|
||||
HashMap::new();
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add files
|
||||
{
|
||||
for chunk_id in 0..5 {
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(chunk_id),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
for chunk_id in 1..5 {
|
||||
let (chunk, _) = generator.generate_id(chunk_id).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(ChunkId::new_test(chunk_id), (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(chunk_id, chunk);
|
||||
}
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// remove files
|
||||
{
|
||||
let (path, _) = expected_files.remove(&ChunkId::new_test(1)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&1).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add and remove in the same transaction
|
||||
{
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(5),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(5).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// remove and add in the same transaction
|
||||
{
|
||||
let (path, metadata) = expected_files.get(&ChunkId::new_test(3)).unwrap();
|
||||
state.remove(path).unwrap();
|
||||
let chunk = expected_chunks.get(&3).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(chunk),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add, remove, add in the same transaction
|
||||
{
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(6),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(6).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(ChunkId::new_test(6), (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(6, chunk);
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// remove, add, remove in same transaction
|
||||
{
|
||||
let (path, metadata) = expected_files.remove(&ChunkId::new_test(4)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&4).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(&metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// error handling, no real opt
|
||||
{
|
||||
// TODO: Error handling should disambiguate between chunk collision and filename collision
|
||||
|
||||
// chunk with same ID already exists (should also not change the metadata)
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"fail",
|
||||
chunk_addr(0),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(2).await;
|
||||
let err = state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(
|
||||
|
@ -418,21 +370,16 @@ where
|
|||
CatalogStateAddError::ParquetFileAlreadyExists { .. }
|
||||
));
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// error handling, still something works
|
||||
{
|
||||
// already exists (should also not change the metadata)
|
||||
let (_, metadata) = expected_files.get(&ChunkId::new_test(0)).unwrap();
|
||||
let (chunk, _) = generator.generate_id(2).await;
|
||||
let err = state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
// Intentionally "incorrect" path
|
||||
path: ParquetFilePath::new(&chunk_addr(10)),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(
|
||||
|
@ -441,97 +388,57 @@ where
|
|||
));
|
||||
|
||||
// this transaction will still work
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(7),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let metadata = Arc::new(metadata);
|
||||
let (chunk, _) = generator.generate_id(7).await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(&metadata),
|
||||
},
|
||||
)
|
||||
.add(Arc::clone(iox_object_store), info.clone())
|
||||
.unwrap();
|
||||
expected_files.insert(ChunkId::new_test(7), (path.clone(), Arc::clone(&metadata)));
|
||||
expected_chunks.insert(7, chunk);
|
||||
|
||||
// recently added
|
||||
let err = state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(&metadata),
|
||||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
let err = state.add(Arc::clone(iox_object_store), info).unwrap_err();
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateAddError::ParquetFileAlreadyExists { .. }
|
||||
));
|
||||
|
||||
// this still works
|
||||
let (path, _) = expected_files.remove(&ChunkId::new_test(7)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&7).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
|
||||
// recently removed
|
||||
let err = state.remove(&path).unwrap_err();
|
||||
let err = state.remove(chunk.path()).unwrap_err();
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateRemoveError::ParquetFileDoesNotExist { .. }
|
||||
));
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add predicates
|
||||
{
|
||||
// create two chunks that we can use for delete predicate
|
||||
let chunk_addr_1 = chunk_addr(8);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr_1.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(chunk_addr_1.chunk_id, (path, Arc::new(metadata)));
|
||||
let (chunk, metadata) = generator.generate_id(8).await;
|
||||
let chunk_addr_1 = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
let chunk_addr_2 = chunk_addr(9);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr_2.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(chunk_addr_2.chunk_id, (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(8, chunk);
|
||||
|
||||
let (chunk, metadata) = generator.generate_id(9).await;
|
||||
let chunk_addr_2 = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
state
|
||||
.add(
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_chunks.insert(9, chunk);
|
||||
|
||||
// first predicate used only a single chunk
|
||||
let predicate_1 = create_delete_predicate(1);
|
||||
|
@ -546,32 +453,21 @@ where
|
|||
expected_predicates.insert(predicate_2, chunks_2.into_iter().collect());
|
||||
|
||||
// chunks created afterwards are unaffected
|
||||
let chunk_addr_3 = chunk_addr(10);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr_3.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(10).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(chunk_addr_3.chunk_id, (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(10, chunk);
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// removing a chunk will also remove its predicates
|
||||
{
|
||||
let (path, _) = expected_files.remove(&ChunkId::new_test(8)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&8).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
expected_predicates = expected_predicates
|
||||
.into_iter()
|
||||
.filter_map(|(predicate, chunks)| {
|
||||
|
@ -583,7 +479,7 @@ where
|
|||
})
|
||||
.collect();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// Registering predicates for unknown chunks is just ignored because chunks might been in "persisting" intermediate
|
||||
// state while the predicate was reported.
|
||||
|
@ -596,30 +492,30 @@ where
|
|||
}];
|
||||
state.delete_predicate(Arc::clone(&predicate), chunks);
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
}
|
||||
|
||||
/// Assert that tracked files and their linked metadata are equal.
|
||||
fn assert_checkpoint<S, F>(
|
||||
state: &S,
|
||||
f: &F,
|
||||
expected_files: &HashMap<ChunkId, (ParquetFilePath, Arc<IoxParquetMetaData>)>,
|
||||
expected_chunks: &HashMap<u32, ParquetChunk>,
|
||||
expected_predicates: &HashMap<Arc<DeletePredicate>, HashSet<ChunkAddrWithoutDatabase>>,
|
||||
) where
|
||||
F: Fn(&S) -> CheckpointData,
|
||||
{
|
||||
let data = f(state);
|
||||
let data: CheckpointData = f(state);
|
||||
let actual_files = data.files;
|
||||
|
||||
let sorted_keys_actual = get_sorted_keys(actual_files.keys());
|
||||
let sorted_keys_expected = get_sorted_keys(expected_files.values().map(|(path, _)| path));
|
||||
let sorted_keys_expected = get_sorted_keys(expected_chunks.values().map(|chunk| chunk.path()));
|
||||
assert_eq!(sorted_keys_actual, sorted_keys_expected);
|
||||
|
||||
for (path, md_expected) in expected_files.values() {
|
||||
let md_actual = &actual_files[path].metadata;
|
||||
for chunk in expected_chunks.values() {
|
||||
let md_actual = &actual_files[chunk.path()].metadata;
|
||||
|
||||
let md_actual = md_actual.decode().unwrap();
|
||||
let md_expected = md_expected.decode().unwrap();
|
||||
let md_expected = chunk.parquet_metadata().decode().unwrap();
|
||||
|
||||
let iox_md_actual = md_actual.read_iox_metadata().unwrap();
|
||||
let iox_md_expected = md_expected.read_iox_metadata().unwrap();
|
||||
|
|
|
@ -33,6 +33,7 @@ tokio = { version = "1.13", features = ["macros", "rt", "rt-multi-thread", "sync
|
|||
tokio-stream = "0.1"
|
||||
uuid = { version = "0.8", features = ["serde", "v4"] }
|
||||
zstd = "0.9"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
|
|
|
@ -870,26 +870,15 @@ mod tests {
|
|||
|
||||
use schema::TIME_COLUMN_NAME;
|
||||
|
||||
use crate::test_utils::{
|
||||
chunk_addr, create_partition_and_database_checkpoint, load_parquet_from_store, make_chunk,
|
||||
make_chunk_no_row_group, make_iox_object_store, TestSize,
|
||||
};
|
||||
use crate::test_utils::create_partition_and_database_checkpoint;
|
||||
use crate::test_utils::generator::{ChunkGenerator, GeneratorConfig};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_restore_from_file() {
|
||||
// setup: preserve chunk to object store
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let chunk = make_chunk(
|
||||
Arc::clone(&iox_object_store),
|
||||
"foo",
|
||||
chunk_addr(1),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let parquet_metadata = chunk.parquet_metadata();
|
||||
let decoded = parquet_metadata.decode().unwrap();
|
||||
|
||||
// step 1: read back schema
|
||||
|
@ -911,18 +900,9 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_restore_from_thrift() {
|
||||
// setup: write chunk to object store and only keep thrift-encoded metadata
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let chunk = make_chunk(
|
||||
Arc::clone(&iox_object_store),
|
||||
"foo",
|
||||
chunk_addr(1),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let parquet_metadata = chunk.parquet_metadata();
|
||||
let data = parquet_metadata.thrift_bytes().to_vec();
|
||||
let parquet_metadata = IoxParquetMetaData::from_thrift_bytes(data);
|
||||
let decoded = parquet_metadata.decode().unwrap();
|
||||
|
@ -941,18 +921,10 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_restore_from_file_no_row_group() {
|
||||
// setup: preserve chunk to object store
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let chunk = make_chunk_no_row_group(
|
||||
Arc::clone(&iox_object_store),
|
||||
"foo",
|
||||
chunk_addr(1),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
generator.set_config(GeneratorConfig::NoData);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let parquet_metadata = chunk.parquet_metadata();
|
||||
let decoded = parquet_metadata.decode().unwrap();
|
||||
|
||||
// step 1: read back schema
|
||||
|
@ -971,18 +943,11 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_restore_from_thrift_no_row_group() {
|
||||
// setup: write chunk to object store and only keep thrift-encoded metadata
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let chunk = make_chunk_no_row_group(
|
||||
Arc::clone(&iox_object_store),
|
||||
"foo",
|
||||
chunk_addr(1),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
generator.set_config(GeneratorConfig::NoData);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let parquet_metadata = chunk.parquet_metadata();
|
||||
|
||||
let data = parquet_metadata.thrift_bytes().to_vec();
|
||||
let parquet_metadata = IoxParquetMetaData::from_thrift_bytes(data);
|
||||
let decoded = parquet_metadata.decode().unwrap();
|
||||
|
@ -1002,18 +967,9 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_make_chunk() {
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let chunk = make_chunk(
|
||||
Arc::clone(&iox_object_store),
|
||||
"foo",
|
||||
chunk_addr(1),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let parquet_metadata = chunk.parquet_metadata();
|
||||
let decoded = parquet_metadata.decode().unwrap();
|
||||
|
||||
assert!(decoded.md.num_row_groups() > 1);
|
||||
|
@ -1040,18 +996,10 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_make_chunk_no_row_group() {
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let chunk = make_chunk_no_row_group(
|
||||
Arc::clone(&iox_object_store),
|
||||
"foo",
|
||||
chunk_addr(1),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
generator.set_config(GeneratorConfig::NoData);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let parquet_metadata = chunk.parquet_metadata();
|
||||
let decoded = parquet_metadata.decode().unwrap();
|
||||
|
||||
assert_eq!(decoded.md.num_row_groups(), 0);
|
||||
|
@ -1113,18 +1061,9 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_parquet_metadata_size() {
|
||||
// setup: preserve chunk to object store
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
let chunk = make_chunk(
|
||||
Arc::clone(&iox_object_store),
|
||||
"foo",
|
||||
chunk_addr(1),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
|
||||
assert_eq!(parquet_metadata.size(), 3730);
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let parquet_metadata = chunk.parquet_metadata();
|
||||
assert_eq!(parquet_metadata.size(), 3729);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -429,17 +429,14 @@ impl TryClone for MemWriter {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_utils::generator::ChunkGenerator;
|
||||
use crate::test_utils::{
|
||||
chunk_addr, create_partition_and_database_checkpoint, load_parquet_from_store,
|
||||
make_chunk_given_record_batch, make_iox_object_store, make_record_batch,
|
||||
read_data_from_parquet_data, TestSize,
|
||||
create_partition_and_database_checkpoint, load_parquet_from_store, make_iox_object_store,
|
||||
make_record_batch, read_data_from_parquet_data, TestSize,
|
||||
};
|
||||
use arrow::array::{ArrayRef, StringArray};
|
||||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{
|
||||
chunk_metadata::{ChunkId, ChunkOrder},
|
||||
partition_metadata::TableSummary,
|
||||
};
|
||||
use data_types::chunk_metadata::{ChunkId, ChunkOrder};
|
||||
use datafusion::physical_plan::common::SizedRecordBatchStream;
|
||||
use datafusion_util::MemoryStream;
|
||||
use parquet::schema::types::ColumnPath;
|
||||
|
@ -584,37 +581,17 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_write_read() {
|
||||
////////////////////
|
||||
// Create test data which is also the expected data
|
||||
let addr = chunk_addr(1);
|
||||
let table = Arc::clone(&addr.table_name);
|
||||
let (record_batches, schema, column_summaries, num_rows) =
|
||||
make_record_batch("foo", TestSize::Full);
|
||||
let mut table_summary = TableSummary::new(table.to_string());
|
||||
table_summary.columns = column_summaries.clone();
|
||||
let record_batch = record_batches[0].clone(); // Get the first one to compare key-value meta data that would be the same for all batches
|
||||
let key_value_metadata = record_batch.schema().metadata().clone();
|
||||
|
||||
////////////////////
|
||||
// Make an OS in memory
|
||||
let store = make_iox_object_store().await;
|
||||
|
||||
////////////////////
|
||||
// Store the data as a chunk and write it to in the object store
|
||||
// This test Storage::write_to_object_store
|
||||
let chunk = make_chunk_given_record_batch(
|
||||
Arc::clone(&store),
|
||||
record_batches.clone(),
|
||||
schema.clone(),
|
||||
addr,
|
||||
column_summaries.clone(),
|
||||
)
|
||||
.await;
|
||||
// This tests Storage::write_to_object_store
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let key_value_metadata = chunk.schema().as_arrow().metadata().clone();
|
||||
|
||||
////////////////////
|
||||
// Now let read it back
|
||||
//
|
||||
let parquet_data = load_parquet_from_store(&chunk, Arc::clone(&store))
|
||||
let parquet_data = load_parquet_from_store(&chunk, Arc::clone(generator.store()))
|
||||
.await
|
||||
.unwrap();
|
||||
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data.clone()).unwrap();
|
||||
|
@ -622,7 +599,7 @@ mod tests {
|
|||
//
|
||||
// 1. Check metadata at file level: Everything is correct
|
||||
let schema_actual = decoded.read_schema().unwrap();
|
||||
assert_eq!(Arc::new(schema.clone()), schema_actual);
|
||||
assert_eq!(chunk.schema(), schema_actual);
|
||||
assert_eq!(
|
||||
key_value_metadata.clone(),
|
||||
schema_actual.as_arrow().metadata().clone()
|
||||
|
@ -630,22 +607,19 @@ mod tests {
|
|||
|
||||
// 2. Check statistics
|
||||
let table_summary_actual = decoded.read_statistics(&schema_actual).unwrap();
|
||||
assert_eq!(table_summary_actual, table_summary.columns);
|
||||
assert_eq!(table_summary_actual, chunk.table_summary().columns);
|
||||
|
||||
// 3. Check data
|
||||
// Note that the read_data_from_parquet_data function fixes the row-group/batches' level metadata bug in arrow
|
||||
let actual_record_batches =
|
||||
read_data_from_parquet_data(Arc::clone(&schema.as_arrow()), parquet_data);
|
||||
read_data_from_parquet_data(chunk.schema().as_arrow(), parquet_data);
|
||||
let mut actual_num_rows = 0;
|
||||
for batch in actual_record_batches.clone() {
|
||||
actual_num_rows += batch.num_rows();
|
||||
|
||||
// Check if record batch has meta data
|
||||
let batch_key_value_metadata = batch.schema().metadata().clone();
|
||||
assert_eq!(
|
||||
schema.as_arrow().metadata().clone(),
|
||||
batch_key_value_metadata
|
||||
);
|
||||
assert_eq!(key_value_metadata, batch_key_value_metadata);
|
||||
}
|
||||
|
||||
// Now verify return results. This assert_batches_eq still works correctly without the metadata
|
||||
|
@ -660,8 +634,7 @@ mod tests {
|
|||
"| foo | | | | foo | | | | 4 | 9223372036854775807 | | | 4 | 18446744073709551615 | | | 40.1 | 1 | -0 | NaN | NaN | | | false | | | 1970-01-01T00:00:00.000004Z |",
|
||||
"+----------------+---------------+-------------------+------------------+-------------------------+------------------------+----------------------------+---------------------------+----------------------+----------------------+-------------------------+------------------------+----------------------+----------------------+-------------------------+------------------------+----------------------+-------------------+--------------------+------------------------+-----------------------+-------------------------+------------------------+-----------------------+--------------------------+-------------------------+-----------------------------+",
|
||||
];
|
||||
assert_eq!(num_rows, actual_num_rows);
|
||||
assert_batches_eq!(expected.clone(), &record_batches);
|
||||
assert_eq!(chunk.rows(), actual_num_rows);
|
||||
assert_batches_eq!(expected, &actual_record_batches);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
use crate::{
|
||||
chunk::{self, ChunkMetrics, ParquetChunk},
|
||||
metadata::{IoxMetadata, IoxParquetMetaData},
|
||||
chunk::{self, ParquetChunk},
|
||||
storage::Storage,
|
||||
};
|
||||
use arrow::{
|
||||
|
@ -12,12 +11,9 @@ use arrow::{
|
|||
record_batch::RecordBatch,
|
||||
};
|
||||
use data_types::{
|
||||
chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder},
|
||||
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary},
|
||||
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics},
|
||||
server_id::ServerId,
|
||||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion_util::MemoryStream;
|
||||
use futures::TryStreamExt;
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use object_store::ObjectStore;
|
||||
|
@ -36,6 +32,8 @@ use std::{collections::BTreeMap, num::NonZeroU32, sync::Arc};
|
|||
use time::Time;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub mod generator;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error getting data from object store: {}", source))]
|
||||
|
@ -98,109 +96,6 @@ pub async fn load_parquet_from_store_for_path(
|
|||
Ok(parquet_data)
|
||||
}
|
||||
|
||||
/// The db name to use for testing
|
||||
pub fn db_name() -> &'static str {
|
||||
"db1"
|
||||
}
|
||||
|
||||
/// Creates a test chunk address for a given chunk id
|
||||
pub fn chunk_addr(id: u128) -> ChunkAddr {
|
||||
ChunkAddr {
|
||||
db_name: Arc::from(db_name()),
|
||||
table_name: Arc::from("table1"),
|
||||
partition_key: Arc::from("part1"),
|
||||
chunk_id: ChunkId::new_test(id),
|
||||
}
|
||||
}
|
||||
|
||||
/// Same as [`make_chunk`] but parquet file does not contain any row group.
|
||||
pub async fn make_chunk(
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
column_prefix: &str,
|
||||
addr: ChunkAddr,
|
||||
test_size: TestSize,
|
||||
) -> ParquetChunk {
|
||||
let (record_batches, schema, column_summaries, _num_rows) =
|
||||
make_record_batch(column_prefix, test_size);
|
||||
make_chunk_given_record_batch(
|
||||
iox_object_store,
|
||||
record_batches,
|
||||
schema,
|
||||
addr,
|
||||
column_summaries,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Same as [`make_chunk`] but parquet file does not contain any row group.
|
||||
pub async fn make_chunk_no_row_group(
|
||||
store: Arc<IoxObjectStore>,
|
||||
column_prefix: &str,
|
||||
addr: ChunkAddr,
|
||||
test_size: TestSize,
|
||||
) -> ParquetChunk {
|
||||
let (_, schema, column_summaries, _num_rows) = make_record_batch(column_prefix, test_size);
|
||||
make_chunk_given_record_batch(store, vec![], schema, addr, column_summaries).await
|
||||
}
|
||||
|
||||
/// Create a test chunk by writing data to object store.
|
||||
///
|
||||
/// TODO: This code creates a chunk that isn't hooked up with metrics
|
||||
pub async fn make_chunk_given_record_batch(
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
record_batches: Vec<RecordBatch>,
|
||||
schema: Schema,
|
||||
addr: ChunkAddr,
|
||||
column_summaries: Vec<ColumnSummary>,
|
||||
) -> ParquetChunk {
|
||||
let storage = Storage::new(Arc::clone(&iox_object_store));
|
||||
|
||||
let table_summary = TableSummary {
|
||||
name: addr.table_name.to_string(),
|
||||
columns: column_summaries,
|
||||
};
|
||||
let stream: SendableRecordBatchStream = if record_batches.is_empty() {
|
||||
Box::pin(MemoryStream::new_with_schema(
|
||||
record_batches,
|
||||
Arc::clone(schema.inner()),
|
||||
))
|
||||
} else {
|
||||
Box::pin(MemoryStream::new(record_batches))
|
||||
};
|
||||
let (partition_checkpoint, database_checkpoint) = create_partition_and_database_checkpoint(
|
||||
Arc::clone(&addr.table_name),
|
||||
Arc::clone(&addr.partition_key),
|
||||
);
|
||||
let metadata = IoxMetadata {
|
||||
creation_timestamp: Time::from_timestamp(10, 20),
|
||||
table_name: Arc::clone(&addr.table_name),
|
||||
partition_key: Arc::clone(&addr.partition_key),
|
||||
chunk_id: addr.chunk_id,
|
||||
partition_checkpoint,
|
||||
database_checkpoint,
|
||||
time_of_first_write: Time::from_timestamp(30, 40),
|
||||
time_of_last_write: Time::from_timestamp(50, 60),
|
||||
chunk_order: ChunkOrder::new(5).unwrap(),
|
||||
};
|
||||
let (path, file_size_bytes, parquet_metadata) = storage
|
||||
.write_to_object_store(addr.clone(), stream, metadata)
|
||||
.await
|
||||
.unwrap();
|
||||
let rows = parquet_metadata.decode().unwrap().row_count();
|
||||
|
||||
ParquetChunk::new_from_parts(
|
||||
addr.partition_key,
|
||||
Arc::new(table_summary),
|
||||
Arc::new(schema),
|
||||
&path,
|
||||
Arc::clone(&iox_object_store),
|
||||
file_size_bytes,
|
||||
Arc::new(parquet_metadata),
|
||||
rows,
|
||||
ChunkMetrics::new_unregistered(),
|
||||
)
|
||||
}
|
||||
|
||||
fn create_column_tag(
|
||||
name: &str,
|
||||
data: Vec<Vec<Option<&str>>>,
|
||||
|
@ -893,25 +788,6 @@ pub fn read_data_from_parquet_data(schema: SchemaRef, parquet_data: Vec<u8>) ->
|
|||
record_batches
|
||||
}
|
||||
|
||||
/// Create test metadata by creating a parquet file and reading it back into memory.
|
||||
///
|
||||
/// See [`make_chunk`] for details.
|
||||
pub async fn make_metadata(
|
||||
iox_object_store: &Arc<IoxObjectStore>,
|
||||
column_prefix: &str,
|
||||
addr: ChunkAddr,
|
||||
test_size: TestSize,
|
||||
) -> (ParquetFilePath, IoxParquetMetaData) {
|
||||
let chunk = make_chunk(Arc::clone(iox_object_store), column_prefix, addr, test_size).await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, Arc::clone(iox_object_store))
|
||||
.await
|
||||
.unwrap();
|
||||
(
|
||||
chunk.path().clone(),
|
||||
IoxParquetMetaData::from_file_bytes(parquet_data).unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create [`PartitionCheckpoint`] and [`DatabaseCheckpoint`] for testing.
|
||||
pub fn create_partition_and_database_checkpoint(
|
||||
table_name: Arc<str>,
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
use crate::chunk::{ChunkMetrics, ParquetChunk};
|
||||
use crate::metadata::IoxMetadata;
|
||||
use crate::storage::Storage;
|
||||
use crate::test_utils::{
|
||||
create_partition_and_database_checkpoint, make_iox_object_store, make_record_batch, TestSize,
|
||||
};
|
||||
use data_types::chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder};
|
||||
use data_types::partition_metadata::{PartitionAddr, TableSummary};
|
||||
use datafusion_util::MemoryStream;
|
||||
use iox_object_store::IoxObjectStore;
|
||||
use std::sync::Arc;
|
||||
use time::Time;
|
||||
|
||||
/// Controls the number of row groups to generate for chunks
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum GeneratorConfig {
|
||||
/// Generates schema but skips generating data
|
||||
NoData,
|
||||
/// Generates 3 row groups with a limited selection of columns
|
||||
Simple,
|
||||
/// Generates 3 row groups with a wide variety of different columns
|
||||
Full,
|
||||
}
|
||||
|
||||
/// A generator of persisted chunks for use in tests
|
||||
#[derive(Debug)]
|
||||
pub struct ChunkGenerator {
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
storage: Storage,
|
||||
column_prefix: String,
|
||||
config: GeneratorConfig,
|
||||
partition: PartitionAddr,
|
||||
next_chunk: u32,
|
||||
}
|
||||
|
||||
impl ChunkGenerator {
|
||||
pub async fn new() -> Self {
|
||||
Self::new_with_store(make_iox_object_store().await)
|
||||
}
|
||||
|
||||
pub fn new_with_store(iox_object_store: Arc<IoxObjectStore>) -> Self {
|
||||
let storage = Storage::new(Arc::clone(&iox_object_store));
|
||||
Self {
|
||||
iox_object_store,
|
||||
storage,
|
||||
column_prefix: "foo".to_string(),
|
||||
config: GeneratorConfig::Full,
|
||||
partition: PartitionAddr {
|
||||
db_name: Arc::from("db1"),
|
||||
table_name: Arc::from("table1"),
|
||||
partition_key: Arc::from("part1"),
|
||||
},
|
||||
next_chunk: 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store(&self) -> &Arc<IoxObjectStore> {
|
||||
&self.iox_object_store
|
||||
}
|
||||
|
||||
pub fn set_config(&mut self, config: GeneratorConfig) {
|
||||
self.config = config;
|
||||
}
|
||||
|
||||
pub fn partition(&self) -> &PartitionAddr {
|
||||
&self.partition
|
||||
}
|
||||
|
||||
pub async fn generate(&mut self) -> (ParquetChunk, IoxMetadata) {
|
||||
let id = self.next_chunk;
|
||||
self.next_chunk += 1;
|
||||
self.generate_id(id).await
|
||||
}
|
||||
|
||||
pub async fn generate_id(&mut self, id: u32) -> (ParquetChunk, IoxMetadata) {
|
||||
let (partition_checkpoint, database_checkpoint) = create_partition_and_database_checkpoint(
|
||||
Arc::clone(&self.partition.table_name),
|
||||
Arc::clone(&self.partition.partition_key),
|
||||
);
|
||||
|
||||
let chunk_id = ChunkId::new_test(id as _);
|
||||
let chunk_order = ChunkOrder::new(id).unwrap();
|
||||
let chunk_addr = ChunkAddr::new(&self.partition, chunk_id);
|
||||
|
||||
let metadata = IoxMetadata {
|
||||
creation_timestamp: Time::from_timestamp(10, 20),
|
||||
table_name: Arc::clone(&self.partition.table_name),
|
||||
partition_key: Arc::clone(&self.partition.partition_key),
|
||||
chunk_id,
|
||||
chunk_order,
|
||||
partition_checkpoint,
|
||||
database_checkpoint,
|
||||
time_of_first_write: Time::from_timestamp(30, 40),
|
||||
time_of_last_write: Time::from_timestamp(50, 60),
|
||||
};
|
||||
|
||||
let (record_batches, schema, column_summaries, rows) = match self.config {
|
||||
GeneratorConfig::NoData => {
|
||||
// Generating an entire row group just for its metadata seems wasteful
|
||||
let (_, schema, column_summaries, _) =
|
||||
make_record_batch(&self.column_prefix, TestSize::Minimal);
|
||||
// Note: column summaries here are inconsistent with the actual data?
|
||||
(vec![], schema, column_summaries, 0)
|
||||
}
|
||||
GeneratorConfig::Simple => make_record_batch(&self.column_prefix, TestSize::Minimal),
|
||||
GeneratorConfig::Full => make_record_batch(&self.column_prefix, TestSize::Full),
|
||||
};
|
||||
|
||||
let table_summary = TableSummary {
|
||||
name: self.partition.table_name.to_string(),
|
||||
columns: column_summaries,
|
||||
};
|
||||
|
||||
let stream = Box::pin(MemoryStream::new_with_schema(
|
||||
record_batches,
|
||||
Arc::clone(schema.inner()),
|
||||
));
|
||||
|
||||
let (path, file_size_bytes, parquet_metadata) = self
|
||||
.storage
|
||||
.write_to_object_store(chunk_addr, stream, metadata.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let chunk = ParquetChunk::new_from_parts(
|
||||
Arc::clone(&self.partition.partition_key),
|
||||
Arc::new(table_summary),
|
||||
Arc::new(schema),
|
||||
&path,
|
||||
Arc::clone(&self.iox_object_store),
|
||||
file_size_bytes,
|
||||
Arc::new(parquet_metadata),
|
||||
rows,
|
||||
ChunkMetrics::new_unregistered(),
|
||||
);
|
||||
|
||||
(chunk, metadata)
|
||||
}
|
||||
}
|
|
@ -9,6 +9,7 @@ internal_types = { path = "../internal_types" }
|
|||
observability_deps = { path = "../observability_deps" }
|
||||
snafu = "0.6.2"
|
||||
time = { path = "../time" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -13,9 +13,10 @@ schema = { path = "../schema" }
|
|||
observability_deps = { path = "../observability_deps" }
|
||||
ordered-float = "2"
|
||||
regex = "1"
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
snafu = "0.6.9"
|
||||
sqlparser = "0.12.0"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -35,6 +35,7 @@ tokio-stream = "0.1.8"
|
|||
tokio-util = { version = "0.6.9" }
|
||||
trace = { path = "../trace" }
|
||||
predicate = { path = "../predicate" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
|
||||
# use libc on unix like platforms to set worker priority in DedicatedExecutor
|
||||
|
|
|
@ -18,7 +18,7 @@ use datafusion::{
|
|||
ExecutionPlan,
|
||||
},
|
||||
};
|
||||
use observability_deps::tracing::{debug, info, trace};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::predicate::{Predicate, PredicateBuilder};
|
||||
use schema::{merge::SchemaMerger, sort::SortKey, Schema};
|
||||
|
||||
|
@ -236,7 +236,7 @@ impl<C: QueryChunk + 'static> TableProvider for ChunkTableProvider<C> {
|
|||
filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> std::result::Result<Arc<dyn ExecutionPlan>, DataFusionError> {
|
||||
info!(" = Inside ChunkTableProvider Scan");
|
||||
trace!(" = Inside ChunkTableProvider Scan");
|
||||
|
||||
// Note that `filters` don't actually need to be evaluated in
|
||||
// the scan for the plans to be correct, they are an extra
|
||||
|
|
|
@ -15,6 +15,7 @@ once_cell = { version = "1.4.0", features = ["parking_lot"] }
|
|||
predicate = { path = "../predicate" }
|
||||
query = { path = "../query" }
|
||||
server = { path = "../server" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
arrow = { version = "6.0", features = ["prettyprint"] }
|
||||
|
|
|
@ -25,6 +25,7 @@ parking_lot = "0.11"
|
|||
permutation = "0.2.5"
|
||||
snafu = "0.6"
|
||||
schema = { path = "../schema" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
criterion = "0.3.3"
|
||||
|
|
|
@ -5,7 +5,7 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
async-trait = "0.1"
|
||||
cache_loader_async = "0.1.2"
|
||||
cache_loader_async = { version = "0.1.2", features = ["ttl-cache"] }
|
||||
data_types = { path = "../data_types" }
|
||||
dml = { path = "../dml" }
|
||||
hashbrown = "0.11"
|
||||
|
@ -19,6 +19,7 @@ parking_lot = "0.11.2"
|
|||
snafu = "0.6"
|
||||
time = { path = "../time" }
|
||||
write_buffer = { path = "../write_buffer" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
|
|
|
@ -11,3 +11,4 @@ hashbrown = "0.11"
|
|||
indexmap = "1.7"
|
||||
itertools = "0.10.1"
|
||||
snafu = "0.6"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -56,6 +56,7 @@ tokio-util = { version = "0.6.9" }
|
|||
tracker = { path = "../tracker" }
|
||||
uuid = { version = "0.8", features = ["serde", "v4"] }
|
||||
write_buffer = { path = "../write_buffer" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -53,7 +53,7 @@ use write_buffer::core::{WriteBufferReading, WriteBufferWriting};
|
|||
|
||||
pub(crate) use crate::db::chunk::DbChunk;
|
||||
pub(crate) use crate::db::lifecycle::ArcDb;
|
||||
use crate::db::write::{WriteFilter, WriteFilterNone};
|
||||
use crate::db::write::{DeleteFilter, DeleteFilterNone, WriteFilter, WriteFilterNone};
|
||||
use crate::{
|
||||
db::{
|
||||
access::QueryCatalogAccess,
|
||||
|
@ -522,25 +522,43 @@ impl Db {
|
|||
|
||||
/// Store a delete
|
||||
pub fn store_delete(&self, delete: &DmlDelete) -> Result<()> {
|
||||
self.store_filtered_delete(delete, DeleteFilterNone::default())
|
||||
}
|
||||
|
||||
/// Store a delete with the provided [`DeleteFilter`]
|
||||
pub fn store_filtered_delete(
|
||||
&self,
|
||||
delete: &DmlDelete,
|
||||
filter: impl DeleteFilter,
|
||||
) -> Result<()> {
|
||||
let predicate = Arc::new(delete.predicate().clone());
|
||||
match delete.table_name() {
|
||||
None => {
|
||||
// Note: This assumes tables cannot be removed from the catalog and therefore
|
||||
// this lock gap is not problematic
|
||||
for table_name in self.catalog.table_names() {
|
||||
self.delete(&table_name, Arc::clone(&predicate))
|
||||
self.delete_filtered(&table_name, Arc::clone(&predicate), filter)
|
||||
.expect("table exists")
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Some(table_name) => self.delete(table_name, predicate),
|
||||
Some(table_name) => self.delete_filtered(table_name, predicate, filter),
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete data from a table on a specified predicate
|
||||
/// Delete data from a table on a specified predicate
|
||||
///
|
||||
/// Returns an error if the table cannot be found in the catalog
|
||||
pub fn delete(&self, table_name: &str, delete_predicate: Arc<DeletePredicate>) -> Result<()> {
|
||||
self.delete_filtered(table_name, delete_predicate, DeleteFilterNone::default())
|
||||
}
|
||||
|
||||
fn delete_filtered(
|
||||
&self,
|
||||
table_name: &str,
|
||||
delete_predicate: Arc<DeletePredicate>,
|
||||
filter: impl DeleteFilter,
|
||||
) -> Result<()> {
|
||||
// collect delete predicates on preserved partitions for a catalog transaction
|
||||
let mut affected_persisted_chunks = vec![];
|
||||
|
||||
|
@ -558,6 +576,10 @@ impl Db {
|
|||
for chunk in chunks {
|
||||
// save the delete predicate in the chunk
|
||||
let mut chunk = chunk.write();
|
||||
if !filter.filter_chunk(&chunk) {
|
||||
continue;
|
||||
}
|
||||
|
||||
chunk.add_delete_predicate(Arc::clone(&delete_predicate));
|
||||
|
||||
// We should only report persisted chunks or chunks that are currently being persisted, because the
|
||||
|
@ -652,6 +674,40 @@ impl Db {
|
|||
fut.await.context(TaskCancelled)?.context(LifecycleError)
|
||||
}
|
||||
|
||||
/// Compact all provided persisted chunks
|
||||
pub async fn compact_object_store_chunks(
|
||||
self: &Arc<Self>,
|
||||
table_name: &str,
|
||||
partition_key: &str,
|
||||
chunk_ids: Vec<ChunkId>,
|
||||
) -> Result<Option<Arc<DbChunk>>> {
|
||||
if chunk_ids.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Use explicit scope to ensure the async generator doesn't
|
||||
// assume the locks have to possibly live across the `await`
|
||||
let fut = {
|
||||
let partition = self.partition(table_name, partition_key)?;
|
||||
let partition = LockableCatalogPartition::new(Arc::clone(self), partition);
|
||||
let partition = partition.read();
|
||||
|
||||
// todo: set these chunks
|
||||
let chunks = vec![];
|
||||
|
||||
// Lock partition for write
|
||||
let partition = partition.upgrade();
|
||||
|
||||
// invoke compact
|
||||
let (_, fut) =
|
||||
lifecycle::compact_object_store::compact_object_store_chunks(partition, chunks)
|
||||
.context(LifecycleError)?;
|
||||
fut
|
||||
};
|
||||
|
||||
fut.await.context(TaskCancelled)?.context(LifecycleError)
|
||||
}
|
||||
|
||||
/// Persist given partition.
|
||||
///
|
||||
/// If `force` is `true` will persist all unpersisted data regardless of arrival time
|
||||
|
|
|
@ -175,6 +175,10 @@ impl ChunkStage {
|
|||
pub fn is_open(&self) -> bool {
|
||||
matches!(self, ChunkStage::Open { .. })
|
||||
}
|
||||
|
||||
pub fn is_persisted(&self) -> bool {
|
||||
matches!(self, ChunkStage::Persisted { .. })
|
||||
}
|
||||
}
|
||||
|
||||
/// The catalog representation of a Chunk in IOx. Note that a chunk
|
||||
|
@ -398,6 +402,10 @@ impl CatalogChunk {
|
|||
&self.stage
|
||||
}
|
||||
|
||||
pub fn is_persisted(&self) -> bool {
|
||||
self.stage.is_persisted()
|
||||
}
|
||||
|
||||
/// Returns the AccessRecorder used to record access to this chunk's data by queries
|
||||
pub fn access_recorder(&self) -> &AccessRecorder {
|
||||
&self.access_recorder
|
||||
|
@ -724,6 +732,27 @@ impl CatalogChunk {
|
|||
}
|
||||
}
|
||||
|
||||
/// Set the persisted chunk to be compacting
|
||||
pub fn set_compacting_object_store(&mut self, registration: &TaskRegistration) -> Result<()> {
|
||||
match &self.stage {
|
||||
ChunkStage::Open { .. } | ChunkStage::Frozen { .. } => {
|
||||
unexpected_state!(
|
||||
self,
|
||||
"setting compacting object store",
|
||||
"Persisted",
|
||||
&self.stage
|
||||
)
|
||||
}
|
||||
ChunkStage::Persisted { .. } => {
|
||||
self.set_lifecycle_action(
|
||||
ChunkLifecycleAction::CompactingObjectStore,
|
||||
registration,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Start lifecycle action that should move the chunk into the _persisted_ stage.
|
||||
pub fn set_writing_to_object_store(&mut self, registration: &TaskRegistration) -> Result<()> {
|
||||
// This ensures the closing logic is consistent but doesn't break code that
|
||||
|
@ -888,12 +917,7 @@ mod tests {
|
|||
use data_types::{delete_predicate::DeleteExpr, timestamp::TimestampRange};
|
||||
|
||||
use mutable_buffer::test_helpers::write_lp_to_new_chunk;
|
||||
use parquet_file::{
|
||||
chunk::ParquetChunk,
|
||||
test_utils::{
|
||||
make_chunk as make_parquet_chunk_with_store, make_iox_object_store, TestSize,
|
||||
},
|
||||
};
|
||||
use parquet_file::test_utils::generator::{ChunkGenerator, GeneratorConfig};
|
||||
|
||||
#[test]
|
||||
fn test_new_open() {
|
||||
|
@ -917,7 +941,7 @@ mod tests {
|
|||
let mut chunk = make_persisted_chunk().await;
|
||||
assert_eq!(
|
||||
chunk.freeze().unwrap_err().to_string(),
|
||||
"Internal Error: unexpected chunk state for Chunk('db':'table1':'part1':00000000-0000-0000-0000-000000000000) \
|
||||
"Internal Error: unexpected chunk state for Chunk('db1':'table1':'part1':00000000-0000-0000-0000-000000000001) \
|
||||
during setting closed. Expected Open or Frozen, got Persisted"
|
||||
);
|
||||
}
|
||||
|
@ -1103,11 +1127,6 @@ mod tests {
|
|||
write_lp_to_new_chunk(&format!("{} bar=1 10", table_name))
|
||||
}
|
||||
|
||||
async fn make_parquet_chunk(addr: ChunkAddr) -> ParquetChunk {
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
make_parquet_chunk_with_store(iox_object_store, "foo", addr, TestSize::Full).await
|
||||
}
|
||||
|
||||
fn chunk_addr() -> ChunkAddr {
|
||||
ChunkAddr {
|
||||
db_name: Arc::from("db"),
|
||||
|
@ -1131,11 +1150,12 @@ mod tests {
|
|||
}
|
||||
|
||||
async fn make_persisted_chunk() -> CatalogChunk {
|
||||
let addr = chunk_addr();
|
||||
let now = Time::from_timestamp_nanos(43564);
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
generator.set_config(GeneratorConfig::NoData);
|
||||
let (parquet_chunk, metadata) = generator.generate().await;
|
||||
let addr = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
// assemble ParquetChunk
|
||||
let parquet_chunk = make_parquet_chunk(addr.clone()).await;
|
||||
let now = Time::from_timestamp_nanos(43564);
|
||||
|
||||
CatalogChunk::new_object_store_only(
|
||||
addr,
|
||||
|
|
|
@ -14,7 +14,11 @@ use persistence_windows::{
|
|||
};
|
||||
use schema::Schema;
|
||||
use snafu::{OptionExt, Snafu};
|
||||
use std::{collections::BTreeMap, fmt::Display, sync::Arc};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
fmt::Display,
|
||||
sync::Arc,
|
||||
};
|
||||
use time::{Time, TimeProvider};
|
||||
use tracker::RwLock;
|
||||
|
||||
|
@ -368,6 +372,35 @@ impl Partition {
|
|||
self.chunks.iter()
|
||||
}
|
||||
|
||||
/// Return true if there are no other persisted chunks that are in the middle of
|
||||
/// the provided chunk orders
|
||||
// NGA todo: There is test_compact_os_non_contiguous_chunks in
|
||||
// compact_object_store.rs to test this but I will add more unit tests right here
|
||||
// when PR #3167 ChunkGenerator is merged
|
||||
pub fn contiguous_object_store_chunks(&self, chunk_orders: &BTreeSet<ChunkOrder>) -> bool {
|
||||
// Last order in the chunk_orders for comparison
|
||||
let last_order_element = chunk_orders.iter().rev().next();
|
||||
let last_order = match last_order_element {
|
||||
Some(last_order) => last_order,
|
||||
None => {
|
||||
return true;
|
||||
} // provided chunk_orders is empty
|
||||
};
|
||||
|
||||
let chunks = self.chunks();
|
||||
for chunk in chunks {
|
||||
let chunk = chunk.read();
|
||||
if chunk.is_persisted() {
|
||||
let order = chunk.order();
|
||||
// this chunk does not belong to chunk_orders but in the middle of them
|
||||
if !chunk_orders.contains(&order) && order < *last_order {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Return a PartitionSummary for this partition. If the partition
|
||||
/// has no chunks, returns None.
|
||||
pub fn summary(&self) -> Option<PartitionSummary> {
|
||||
|
|
|
@ -33,6 +33,7 @@ pub(crate) use persist::persist_chunks;
|
|||
pub(crate) use unload::unload_read_buffer_chunk;
|
||||
|
||||
mod compact;
|
||||
pub(crate) mod compact_object_store;
|
||||
mod drop;
|
||||
mod error;
|
||||
mod persist;
|
||||
|
@ -201,6 +202,17 @@ impl LockablePartition for LockableCatalogPartition {
|
|||
Ok(tracker)
|
||||
}
|
||||
|
||||
fn compact_object_store_chunks(
|
||||
partition: LifecycleWriteGuard<'_, Partition, Self>,
|
||||
chunks: Vec<LifecycleWriteGuard<'_, CatalogChunk, Self::Chunk>>,
|
||||
) -> Result<TaskTracker<Job>, Self::Error> {
|
||||
info!(table=%partition.table_name(), partition=%partition.partition_key(), "compacting object store chunks");
|
||||
let (tracker, fut) = compact_object_store::compact_object_store_chunks(partition, chunks)?;
|
||||
let _ =
|
||||
tokio::spawn(async move { fut.await.log_if_error("compacting object store chunks") });
|
||||
Ok(tracker)
|
||||
}
|
||||
|
||||
fn prepare_persist(
|
||||
partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>,
|
||||
force: bool,
|
||||
|
|
|
@ -0,0 +1,483 @@
|
|||
//! This module compact object store chunks (aka persisted chunks)
|
||||
|
||||
use super::{
|
||||
error::{
|
||||
ChunksNotContiguous, ChunksNotInPartition, EmptyChunks, ParquetChunkError,
|
||||
WritingToObjectStore,
|
||||
},
|
||||
LockableCatalogChunk, LockableCatalogPartition, Result,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
db::{
|
||||
catalog::{chunk::CatalogChunk, partition::Partition},
|
||||
lifecycle::merge_schemas,
|
||||
DbChunk,
|
||||
},
|
||||
Db,
|
||||
};
|
||||
use data_types::{
|
||||
chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder},
|
||||
delete_predicate::DeletePredicate,
|
||||
job::Job,
|
||||
partition_metadata::PartitionAddr,
|
||||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use futures::Future;
|
||||
use lifecycle::LifecycleWriteGuard;
|
||||
use observability_deps::tracing::info;
|
||||
use parquet_file::{
|
||||
chunk::{ChunkMetrics as ParquetChunkMetrics, ParquetChunk},
|
||||
metadata::IoxMetadata,
|
||||
storage::Storage,
|
||||
};
|
||||
use persistence_windows::checkpoint::{DatabaseCheckpoint, PartitionCheckpoint};
|
||||
use query::{compute_sort_key, exec::ExecutorType, frontend::reorg::ReorgPlanner, QueryChunkMeta};
|
||||
use schema::Schema;
|
||||
use snafu::ResultExt;
|
||||
use std::{
|
||||
collections::{BTreeSet, HashSet},
|
||||
sync::Arc,
|
||||
};
|
||||
use time::Time;
|
||||
use tracker::{TaskRegistration, TaskTracker, TrackedFuture, TrackedFutureExt};
|
||||
|
||||
// Compact the provided object store chunks into a single object store chunk,
|
||||
/// returning the newly created chunk
|
||||
///
|
||||
/// The function will error if
|
||||
/// . No chunks are provided
|
||||
/// . provided chunk(s) not belong to the provided partition
|
||||
/// . not all provided chunks are persisted
|
||||
/// . the provided chunks are not contiguous
|
||||
/// Implementation steps
|
||||
/// . Verify the eligible of the input OS chunks and mark them for ready to compact
|
||||
/// . Compact the chunks
|
||||
/// . Persist the compacted output into an OS chunk
|
||||
/// . Drop old chunks and make the new chunk available in one transaction
|
||||
pub(crate) fn compact_object_store_chunks(
|
||||
partition: LifecycleWriteGuard<'_, Partition, LockableCatalogPartition>,
|
||||
chunks: Vec<LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>>,
|
||||
) -> Result<(
|
||||
TaskTracker<Job>,
|
||||
TrackedFuture<impl Future<Output = Result<Option<Arc<DbChunk>>>> + Send>,
|
||||
)> {
|
||||
// Track compaction duration
|
||||
let now = std::time::Instant::now();
|
||||
// Register the compacting job
|
||||
let db = Arc::clone(&partition.data().db);
|
||||
let partition_addr = partition.addr().clone();
|
||||
let chunk_ids: Vec<_> = chunks.iter().map(|x| x.id()).collect();
|
||||
info!(%partition_addr, ?chunk_ids, "compacting object store chunks");
|
||||
let (tracker, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
|
||||
partition: partition.addr().clone(),
|
||||
chunks: chunk_ids.clone(),
|
||||
});
|
||||
|
||||
// Step 1: Verify input while marking and snapshoting the chunks for compacting
|
||||
let compacting_os_chunks = mark_chunks_to_compact(partition, chunks, ®istration)?;
|
||||
let _delete_predicates_before = compacting_os_chunks.delete_predicates;
|
||||
|
||||
let fut = async move {
|
||||
// track future runtime
|
||||
let fut_now = std::time::Instant::now();
|
||||
|
||||
// Step 2: Compact the os chunks into a stream
|
||||
let compacted_stream = compact_chunks(&db, &compacting_os_chunks.os_chunks).await?;
|
||||
let compacted_rows;
|
||||
let _schema = compacted_stream.schema;
|
||||
let sort_key = compacted_stream.sort_key;
|
||||
|
||||
// Step 3: Start to persist files and update the preserved catalog accordingly
|
||||
// This process needs to hold cleanup lock to avoid the persisted file was deleted right after
|
||||
// it is created and before it is updated in the preserved catalog
|
||||
{
|
||||
// fetch shared (= read) guard preventing the cleanup job from deleting our files
|
||||
let _guard = db.cleanup_lock.read().await;
|
||||
|
||||
// Step 3.1: Write the chunk as a parquet file into the object store
|
||||
let iox_metadata = IoxMetadata {
|
||||
creation_timestamp: db.time_provider.now(),
|
||||
table_name: Arc::clone(&partition_addr.table_name),
|
||||
partition_key: Arc::clone(&partition_addr.partition_key),
|
||||
chunk_id: ChunkId::new(),
|
||||
partition_checkpoint: compacting_os_chunks.partition_checkpoint.clone(),
|
||||
database_checkpoint: compacting_os_chunks.database_checkpoint.clone(),
|
||||
time_of_first_write: compacting_os_chunks.time_of_first_write,
|
||||
time_of_last_write: compacting_os_chunks.time_of_last_write,
|
||||
chunk_order: compacting_os_chunks.min_order,
|
||||
};
|
||||
|
||||
let compacted_and_persisted_chunk = persist_stream_to_chunk(
|
||||
&db,
|
||||
&partition_addr,
|
||||
compacted_stream.stream,
|
||||
iox_metadata,
|
||||
)
|
||||
.await?;
|
||||
compacted_rows = compacted_and_persisted_chunk.rows();
|
||||
|
||||
// Step 3.2: Update the preserved catalogs to use the newly created os_chunk
|
||||
// Todo: This will be done in a sub-function that creates a single transaction that:
|
||||
// . Drop all os_chunks from the preserved catalog
|
||||
// . Add the newly created os_chunk into the preserved catalog
|
||||
// Extra: delete_predicates_after must be included here or below (detail will be figured out)
|
||||
} // End of cleanup locking
|
||||
|
||||
// Step 4: Update the in-memory catalogs to use the newly created os_chunk
|
||||
// . Drop all os_chunks from the in-memory catalog
|
||||
// . Add the new created os_chunk in the in-memory catalog
|
||||
// This step can be done outside a transaction because the in-memory catalog
|
||||
// was design to false tolerant
|
||||
|
||||
// - Extra note: If there is a risk that the parquet files of os_chunks are
|
||||
// permanently deleted from the Object Store between step 3 and step 4,
|
||||
// we might need to put steps 3 and 4 in the same transaction
|
||||
|
||||
// Log the summary
|
||||
let elapsed = now.elapsed();
|
||||
// input rows per second
|
||||
let throughput =
|
||||
(compacting_os_chunks.input_rows as u128 * 1_000_000_000) / elapsed.as_nanos();
|
||||
info!(input_chunks=chunk_ids.len(),
|
||||
%compacting_os_chunks.input_rows, %compacted_rows,
|
||||
%sort_key,
|
||||
compaction_took = ?elapsed,
|
||||
fut_execution_duration= ?fut_now.elapsed(),
|
||||
rows_per_sec=?throughput,
|
||||
"object store chunk(s) compacted");
|
||||
|
||||
Ok(None) // todo: will be a real chunk when all todos done
|
||||
};
|
||||
|
||||
Ok((tracker, fut.track(registration)))
|
||||
}
|
||||
|
||||
/// Verify eligible compacting chunks, mark and snapshot them to get ready for compacting
|
||||
/// Throws error if
|
||||
/// . provided chunks do not belong to the provided partition
|
||||
/// . not all provided chunks are persisted
|
||||
/// . the provided chunks are not contiguous
|
||||
/// Returns:
|
||||
/// . min (time_of_first_write) of provided chunks
|
||||
/// . max (time_of_last_write) of provided chunks
|
||||
/// . total rows of the provided chunks to be compacted
|
||||
/// . all delete predicates of the provided chunks
|
||||
/// . snapshot of the provided chunks
|
||||
/// . min(order) of the provided chunks
|
||||
/// . max(database_checkpoint) of the provided chunks
|
||||
/// . max(partition_checkpoint) of the provided chunks
|
||||
fn mark_chunks_to_compact(
|
||||
partition: LifecycleWriteGuard<'_, Partition, LockableCatalogPartition>,
|
||||
chunks: Vec<LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>>,
|
||||
registration: &TaskRegistration,
|
||||
) -> Result<CompactingOsChunks> {
|
||||
// no chunks provided
|
||||
if chunks.is_empty() {
|
||||
return EmptyChunks {}.fail();
|
||||
}
|
||||
|
||||
let db = Arc::clone(&partition.data().db);
|
||||
let partition_addr = partition.addr().clone();
|
||||
|
||||
// Mark and snapshot chunks, then drop locks
|
||||
let mut time_of_first_write = Time::MAX;
|
||||
let mut time_of_last_write = Time::MIN;
|
||||
let mut chunk_orders = BTreeSet::new();
|
||||
let mut input_rows = 0;
|
||||
let mut delete_predicates: HashSet<Arc<DeletePredicate>> = HashSet::new();
|
||||
let mut min_order = ChunkOrder::MAX;
|
||||
|
||||
// initialize checkpoints
|
||||
let database_checkpoint = DatabaseCheckpoint::new(Default::default());
|
||||
let partition_checkpoint = PartitionCheckpoint::new(
|
||||
Arc::clone(&partition_addr.table_name),
|
||||
Arc::clone(&partition_addr.partition_key),
|
||||
Default::default(),
|
||||
Time::from_timestamp_nanos(0),
|
||||
);
|
||||
|
||||
let os_chunks = chunks
|
||||
.into_iter()
|
||||
.map(|mut chunk| {
|
||||
// Sanity-check
|
||||
assert!(Arc::ptr_eq(&db, &chunk.data().db));
|
||||
assert_eq!(
|
||||
chunk.table_name().as_ref(),
|
||||
partition_addr.table_name.as_ref()
|
||||
);
|
||||
|
||||
// provided chunks not in the provided partition
|
||||
if chunk.key() != partition_addr.partition_key.as_ref() {
|
||||
return ChunksNotInPartition {}.fail();
|
||||
}
|
||||
|
||||
input_rows += chunk.table_summary().total_count();
|
||||
|
||||
let candidate_first = chunk.time_of_first_write();
|
||||
time_of_first_write = std::cmp::min(time_of_first_write, candidate_first);
|
||||
|
||||
let candidate_last = chunk.time_of_last_write();
|
||||
time_of_last_write = std::cmp::max(time_of_last_write, candidate_last);
|
||||
|
||||
delete_predicates.extend(chunk.delete_predicates().iter().cloned());
|
||||
|
||||
min_order = min_order.min(chunk.order());
|
||||
chunk_orders.insert(chunk.order());
|
||||
|
||||
// Todo:get chunk's datatbase_checkpoint and partition_checkpoint of the chunk and keep max
|
||||
|
||||
// Set chunk in the right action which is compacting object store
|
||||
// This function will also error out if the chunk is not yet persisted
|
||||
chunk.set_compacting_object_store(registration)?;
|
||||
Ok(DbChunk::snapshot(&*chunk))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
// Verify if all the provided chunks are contiguous
|
||||
if !partition.contiguous_object_store_chunks(&chunk_orders) {
|
||||
return ChunksNotContiguous {}.fail();
|
||||
}
|
||||
|
||||
// drop partition lock
|
||||
std::mem::drop(partition);
|
||||
|
||||
Ok(CompactingOsChunks {
|
||||
time_of_first_write,
|
||||
time_of_last_write,
|
||||
input_rows,
|
||||
delete_predicates,
|
||||
os_chunks,
|
||||
min_order,
|
||||
database_checkpoint,
|
||||
partition_checkpoint,
|
||||
})
|
||||
}
|
||||
|
||||
/// This struct is used as return data of compacting os chunks
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CompactingOsChunks {
|
||||
time_of_first_write: Time,
|
||||
time_of_last_write: Time,
|
||||
input_rows: u64,
|
||||
delete_predicates: HashSet<Arc<DeletePredicate>>,
|
||||
os_chunks: Vec<Arc<DbChunk>>,
|
||||
min_order: ChunkOrder,
|
||||
database_checkpoint: DatabaseCheckpoint,
|
||||
partition_checkpoint: PartitionCheckpoint,
|
||||
}
|
||||
|
||||
/// Create query plan to compact the given DbChunks and return its output stream
|
||||
/// Return:
|
||||
/// . stream of output record batch of the scanned chunks Result<SendableRecordBatchStream>
|
||||
/// Deleted and duplicated data will be eliminated during the scan
|
||||
/// . Output schema of the compact plan
|
||||
/// . Sort Key of the output data
|
||||
async fn compact_chunks(db: &Db, query_chunks: &[Arc<DbChunk>]) -> Result<CompactedStream> {
|
||||
// Tracking metric
|
||||
let ctx = db.exec.new_context(ExecutorType::Reorg);
|
||||
|
||||
// Compute the sorted output of the compacting result
|
||||
let sort_key = compute_sort_key(query_chunks.iter().map(|x| x.summary()));
|
||||
let sort_key_str = format!("\"{}\"", sort_key); // for logging
|
||||
|
||||
// Merge schema of the compacting chunks
|
||||
let merged_schema = merge_schemas(query_chunks);
|
||||
|
||||
// Build compact query plan
|
||||
let (plan_schema, plan) = ReorgPlanner::new().compact_plan(
|
||||
Arc::clone(&merged_schema),
|
||||
query_chunks.iter().map(Arc::clone),
|
||||
sort_key,
|
||||
)?;
|
||||
let physical_plan = ctx.prepare_plan(&plan).await?;
|
||||
|
||||
// run the plan
|
||||
let stream = ctx.execute_stream(physical_plan).await?;
|
||||
|
||||
Ok(CompactedStream {
|
||||
stream,
|
||||
schema: plan_schema,
|
||||
sort_key: sort_key_str,
|
||||
})
|
||||
}
|
||||
|
||||
/// Struct holding output of a compacted stream
|
||||
struct CompactedStream {
|
||||
stream: SendableRecordBatchStream,
|
||||
schema: Arc<Schema>,
|
||||
sort_key: String,
|
||||
}
|
||||
|
||||
/// Persist a provided stream to a new OS chunk
|
||||
async fn persist_stream_to_chunk<'a>(
|
||||
db: &'a Db,
|
||||
partition_addr: &'a PartitionAddr,
|
||||
stream: SendableRecordBatchStream,
|
||||
iox_metadata: IoxMetadata,
|
||||
) -> Result<Arc<ParquetChunk>> {
|
||||
// Create a storage to save data of this chunk
|
||||
let storage = Storage::new(Arc::clone(&db.iox_object_store));
|
||||
|
||||
// Write the chunk stream data into a parquet file in the storage
|
||||
let chunk_addr = ChunkAddr::new(partition_addr, iox_metadata.chunk_id);
|
||||
let (path, file_size_bytes, parquet_metadata) = storage
|
||||
.write_to_object_store(chunk_addr, stream, iox_metadata)
|
||||
.await
|
||||
.context(WritingToObjectStore)?;
|
||||
|
||||
// Create parquet chunk for the parquet file
|
||||
let parquet_metadata = Arc::new(parquet_metadata);
|
||||
let metrics = ParquetChunkMetrics::new(db.metric_registry.as_ref());
|
||||
let parquet_chunk = Arc::new(
|
||||
ParquetChunk::new(
|
||||
&path,
|
||||
Arc::clone(&db.iox_object_store),
|
||||
file_size_bytes,
|
||||
Arc::clone(&parquet_metadata),
|
||||
Arc::clone(&partition_addr.table_name),
|
||||
Arc::clone(&partition_addr.partition_key),
|
||||
metrics,
|
||||
)
|
||||
.context(ParquetChunkError)?,
|
||||
);
|
||||
|
||||
Ok(parquet_chunk)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{db::test_helpers::write_lp, utils::make_db};
|
||||
use lifecycle::{LockableChunk, LockablePartition};
|
||||
use query::QueryChunk;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_os_no_chunks() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let db = make_db().await.db;
|
||||
let partition_key = "1970-01-01T00";
|
||||
write_lp(&db, "cpu,tag1=cupcakes bar=1 10").await;
|
||||
|
||||
let db_partition = db.partition("cpu", partition_key).unwrap();
|
||||
let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition));
|
||||
let partition = partition.write();
|
||||
|
||||
let (_, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
|
||||
partition: partition.addr().clone(),
|
||||
chunks: vec![],
|
||||
});
|
||||
let compact_no_chunks = mark_chunks_to_compact(partition, vec![], ®istration);
|
||||
|
||||
let err = compact_no_chunks.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("No object store chunks provided for compacting"),
|
||||
"No object store chunks provided for compacting"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_os_non_os_chunks() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let db = make_db().await.db;
|
||||
let partition_key = "1970-01-01T00";
|
||||
write_lp(&db, "cpu,tag1=cupcakes bar=1 10").await;
|
||||
|
||||
let db_partition = db.partition("cpu", partition_key).unwrap();
|
||||
|
||||
// persisted non persisted chunks
|
||||
let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition));
|
||||
let partition = partition.read();
|
||||
let chunks = LockablePartition::chunks(&partition);
|
||||
assert_eq!(chunks.len(), 1);
|
||||
let partition = partition.upgrade();
|
||||
let chunk = chunks[0].write();
|
||||
|
||||
let (_, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
|
||||
partition: partition.addr().clone(),
|
||||
chunks: vec![chunk.id()],
|
||||
});
|
||||
|
||||
let compact_non_persisted_chunks =
|
||||
mark_chunks_to_compact(partition, vec![chunk], ®istration);
|
||||
let err = compact_non_persisted_chunks.unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("Expected Persisted, got Open"),
|
||||
"Expected Persisted, got Open"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_os_non_contiguous_chunks() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let db = make_db().await.db;
|
||||
let partition_key = "1970-01-01T00";
|
||||
write_lp(&db, "cpu,tag1=cupcakes bar=1 10").await;
|
||||
|
||||
let db_partition = db.partition("cpu", partition_key).unwrap();
|
||||
|
||||
// persist chunk 1
|
||||
db.persist_partition("cpu", partition_key, true)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.id();
|
||||
//
|
||||
// persist chunk 2
|
||||
write_lp(db.as_ref(), "cpu,tag1=chunk2,tag2=a bar=2 10").await;
|
||||
db.persist_partition("cpu", partition_key, true)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.id();
|
||||
//
|
||||
// persist chunk 3
|
||||
write_lp(db.as_ref(), "cpu,tag1=chunk3,tag2=a bar=2 30").await;
|
||||
db.persist_partition("cpu", partition_key, true)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.id();
|
||||
//
|
||||
// Add a MUB
|
||||
write_lp(db.as_ref(), "cpu,tag1=chunk4,tag2=a bar=2 40").await;
|
||||
|
||||
// let compact 2 non contiguous chunk 1 and chunk 3
|
||||
let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition));
|
||||
let partition = partition.read();
|
||||
let chunks = LockablePartition::chunks(&partition);
|
||||
assert_eq!(chunks.len(), 4);
|
||||
let partition = partition.upgrade();
|
||||
let chunk1 = chunks[0].write();
|
||||
let chunk3 = chunks[2].write();
|
||||
|
||||
let (_, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
|
||||
partition: partition.addr().clone(),
|
||||
chunks: vec![chunk1.id(), chunk3.id()],
|
||||
});
|
||||
|
||||
let compact_non_contiguous_persisted_chunks =
|
||||
mark_chunks_to_compact(partition, vec![chunk1, chunk3], ®istration);
|
||||
let err = compact_non_contiguous_persisted_chunks.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("Cannot compact the provided persisted chunks. They are not contiguous"),
|
||||
"Cannot compact the provided persisted chunks. They are not contiguous"
|
||||
);
|
||||
}
|
||||
|
||||
// todo: add tests
|
||||
// . compact 2 contiguous OS chunks
|
||||
// . compact 3 chunks with duplicated data
|
||||
// . compact with deletes before compacting
|
||||
// . compact with deletes happening during compaction
|
||||
// . verify checkpoints
|
||||
// . replay
|
||||
}
|
|
@ -39,6 +39,11 @@ pub enum Error {
|
|||
chunk_id: u32,
|
||||
},
|
||||
|
||||
#[snafu(display("Error reading from object store: {}", source))]
|
||||
ReadingObjectStore {
|
||||
source: parquet_file::storage::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error writing to object store: {}", source))]
|
||||
WritingToObjectStore {
|
||||
source: parquet_file::storage::Error,
|
||||
|
@ -57,6 +62,17 @@ pub enum Error {
|
|||
|
||||
#[snafu(display("Cannot drop unpersisted chunk: {}", addr))]
|
||||
CannotDropUnpersistedChunk { addr: ChunkAddr },
|
||||
|
||||
#[snafu(display("No object store chunks provided for compacting"))]
|
||||
EmptyChunks {},
|
||||
|
||||
#[snafu(display(
|
||||
"Cannot compact chunks because at least one does not belong to the given partition"
|
||||
))]
|
||||
ChunksNotInPartition {},
|
||||
|
||||
#[snafu(display("Cannot compact the provided persisted chunks. They are not contiguous"))]
|
||||
ChunksNotContiguous {},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
|
|
@ -44,6 +44,8 @@ use super::{
|
|||
/// Returns a future registered with the tracker registry, and the corresponding tracker
|
||||
///
|
||||
/// The caller can either spawn this future to tokio, or block directly on it
|
||||
///
|
||||
/// NB: This function is tightly coupled with the semantics of persist_chunks
|
||||
pub(super) fn write_chunk_to_object_store(
|
||||
partition: LifecycleWriteGuard<'_, Partition, LockableCatalogPartition>,
|
||||
mut chunk: LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>,
|
||||
|
@ -155,6 +157,13 @@ pub(super) fn write_chunk_to_object_store(
|
|||
.context(ParquetChunkError)?,
|
||||
);
|
||||
|
||||
// Collect any pending delete predicate from any partitions and include them in
|
||||
// the transaction. This MUST be done after the DatabaseCheckpoint is computed
|
||||
//
|
||||
// This ensures that any deletes encountered during or prior to the replay window
|
||||
// must have been made durable within the catalog for any persisted chunks
|
||||
let delete_handle = db.delete_predicates_mailbox.consume().await;
|
||||
|
||||
// IMPORTANT: Start transaction AFTER writing the actual parquet file so we do not hold
|
||||
// the transaction lock (that is part of the PreservedCatalog) for too long.
|
||||
// By using the cleanup lock (see above) it is ensured that the file that we
|
||||
|
@ -169,7 +178,7 @@ pub(super) fn write_chunk_to_object_store(
|
|||
};
|
||||
transaction.add_parquet(&info);
|
||||
|
||||
// add delete predicates
|
||||
// add delete predicates for this chunk
|
||||
//
|
||||
// Delete predicates are handled in the following way
|
||||
// 1. Predicates added before this chunk was created (aka before the DataFusion split plan was running):
|
||||
|
@ -182,9 +191,16 @@ pub(super) fn write_chunk_to_object_store(
|
|||
transaction.delete_predicate(&predicate, &[addr.clone().into()]);
|
||||
}
|
||||
|
||||
for (predicate, chunks) in delete_handle.outbox() {
|
||||
transaction.delete_predicate(predicate, chunks);
|
||||
}
|
||||
|
||||
// preserved commit
|
||||
let ckpt_handle = transaction.commit().await.context(CommitError)?;
|
||||
|
||||
// Deletes persisted correctly
|
||||
delete_handle.flush();
|
||||
|
||||
// in-mem commit
|
||||
{
|
||||
let mut guard = chunk.write();
|
||||
|
@ -195,6 +211,7 @@ pub(super) fn write_chunk_to_object_store(
|
|||
|
||||
let create_checkpoint =
|
||||
ckpt_handle.revision_counter() % catalog_transactions_until_checkpoint == 0;
|
||||
|
||||
if create_checkpoint {
|
||||
// Commit is already done, so we can just scan the catalog for the state.
|
||||
//
|
||||
|
|
|
@ -18,7 +18,8 @@ use snafu::{ResultExt, Snafu};
|
|||
use time::Time;
|
||||
use write_buffer::core::WriteBufferReading;
|
||||
|
||||
use crate::db::write::WriteFilter;
|
||||
use crate::db::catalog::chunk::{CatalogChunk, ChunkStage};
|
||||
use crate::db::write::{DeleteFilter, WriteFilter};
|
||||
use crate::Db;
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
|
@ -243,8 +244,7 @@ pub async fn perform_replay(
|
|||
for n_try in 1..=n_tries {
|
||||
let result = match &dml_operation {
|
||||
DmlOperation::Write(write) => db.store_filtered_write(write, filter),
|
||||
// TODO: Only apply delete to unpersisted chunks (#3125)
|
||||
DmlOperation::Delete(delete) => db.store_delete(delete),
|
||||
DmlOperation::Delete(delete) => db.store_filtered_delete(delete, filter),
|
||||
};
|
||||
|
||||
match result {
|
||||
|
@ -370,6 +370,19 @@ impl<'a> WriteFilter for ReplayFilter<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> DeleteFilter for ReplayFilter<'a> {
|
||||
fn filter_chunk(&self, chunk: &CatalogChunk) -> bool {
|
||||
// The persist lifecycle action MUST persist any outstanding delete predicates
|
||||
//
|
||||
// As such deletes should only be applied to unpersisted chunks - i.e.
|
||||
// those containing data from the in-progress replay operation
|
||||
//
|
||||
// This avoids a situation where a delete could be applied to a chunk containing
|
||||
// data from writes sequenced after the delete being replayed
|
||||
!matches!(chunk.stage(), ChunkStage::Persisted { .. })
|
||||
}
|
||||
}
|
||||
|
||||
/// Where is a given sequence number and the entire data batch associated with it compared to the range of persisted and
|
||||
/// partially persisted sequence numbers (extracted from partition checkpoint).
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
@ -431,9 +444,13 @@ mod tests {
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{
|
||||
database_rules::{PartitionTemplate, TemplatePart},
|
||||
delete_predicate::DeletePredicate,
|
||||
non_empty::NonEmptyString,
|
||||
sequence::Sequence,
|
||||
server_id::ServerId,
|
||||
timestamp::TimestampRange,
|
||||
};
|
||||
use dml::{DmlDelete, DmlMeta};
|
||||
use object_store::ObjectStore;
|
||||
use persistence_windows::{
|
||||
checkpoint::{PartitionCheckpoint, PersistCheckpointBuilder, ReplayPlanner},
|
||||
|
@ -459,6 +476,14 @@ mod tests {
|
|||
lp: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestDelete {
|
||||
sequencer_id: u32,
|
||||
sequence_number: u64,
|
||||
table_name: Option<&'static str>,
|
||||
predicate: DeletePredicate,
|
||||
}
|
||||
|
||||
/// Different checks for replay tests
|
||||
#[derive(Debug)]
|
||||
enum Check {
|
||||
|
@ -514,6 +539,9 @@ mod tests {
|
|||
///
|
||||
/// Persistence and write buffer reads are enabled in preparation to this step.
|
||||
Await(Vec<Check>),
|
||||
|
||||
/// Performs a delete to the given table
|
||||
Delete(Vec<TestDelete>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -724,6 +752,21 @@ mod tests {
|
|||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
Step::Delete(deletes) => {
|
||||
for delete in deletes {
|
||||
let delete = DmlDelete::new(
|
||||
delete.predicate,
|
||||
delete.table_name.and_then(NonEmptyString::new),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(delete.sequencer_id, delete.sequence_number),
|
||||
time::Time::from_timestamp_nanos(0),
|
||||
None,
|
||||
0,
|
||||
),
|
||||
);
|
||||
write_buffer_state.push_delete(delete)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2568,6 +2611,126 @@ mod tests {
|
|||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn replay_delete() {
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 0,
|
||||
lp: "table_1,tag_partition_by=a bar=10 10",
|
||||
}]),
|
||||
Step::Await(vec![Check::Query(
|
||||
"select * from table_1 order by bar",
|
||||
vec![
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| bar | tag_partition_by | time |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| 10 | a | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
],
|
||||
)]),
|
||||
Step::MakeWritesPersistable,
|
||||
Step::Persist(vec![("table_1", "tag_partition_by_a")]),
|
||||
Step::Delete(vec![TestDelete {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 1,
|
||||
table_name: None,
|
||||
predicate: DeletePredicate {
|
||||
range: TimestampRange { start: 0, end: 20 },
|
||||
exprs: vec![],
|
||||
},
|
||||
}]),
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 2,
|
||||
lp: "table_1,tag_partition_by=b bar=15 15",
|
||||
}]),
|
||||
Step::Await(vec![Check::Query(
|
||||
"select * from table_1 order by bar",
|
||||
vec![
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| bar | tag_partition_by | time |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| 15 | b | 1970-01-01T00:00:00.000000015Z |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
],
|
||||
)]),
|
||||
Step::MakeWritesPersistable,
|
||||
Step::Persist(vec![("table_1", "tag_partition_by_b")]),
|
||||
Step::Restart,
|
||||
Step::Replay,
|
||||
Step::Assert(vec![Check::Query(
|
||||
"select * from table_1 order by bar",
|
||||
vec![
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| bar | tag_partition_by | time |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| 15 | b | 1970-01-01T00:00:00.000000015Z |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
],
|
||||
)]),
|
||||
],
|
||||
..Default::default()
|
||||
}
|
||||
.run()
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn replay_delete_persisted_chunks() {
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 0,
|
||||
lp: "table_1,tag_partition_by=a bar=10 10",
|
||||
}]),
|
||||
Step::Delete(vec![TestDelete {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 1,
|
||||
table_name: None,
|
||||
predicate: DeletePredicate {
|
||||
range: TimestampRange { start: 0, end: 11 },
|
||||
exprs: vec![],
|
||||
},
|
||||
}]),
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 2,
|
||||
lp: "table_1,tag_partition_by=b bar=20 10",
|
||||
}]),
|
||||
Step::Await(vec![Check::Query(
|
||||
"select * from table_1 order by bar",
|
||||
vec![
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| bar | tag_partition_by | time |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| 20 | b | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
],
|
||||
)]),
|
||||
Step::MakeWritesPersistable,
|
||||
Step::Persist(vec![("table_1", "tag_partition_by_b")]),
|
||||
Step::Restart,
|
||||
Step::Replay,
|
||||
Step::Assert(vec![Check::Query(
|
||||
"select * from table_1 order by bar",
|
||||
vec![
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| bar | tag_partition_by | time |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| 20 | b | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
],
|
||||
)]),
|
||||
],
|
||||
..Default::default()
|
||||
}
|
||||
.run()
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn replay_fail_sequencers_change() {
|
||||
// create write buffer w/ sequencer 0 and 1
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use crate::db::{catalog::Catalog, system_tables::IoxSystemTable};
|
||||
use arrow::{
|
||||
array::{ArrayRef, StringBuilder, UInt64Builder},
|
||||
array::{ArrayRef, StringArray, StringBuilder, UInt64Array},
|
||||
datatypes::{DataType, Field, Schema, SchemaRef},
|
||||
error::Result,
|
||||
record_batch::RecordBatch,
|
||||
|
@ -8,7 +8,7 @@ use arrow::{
|
|||
use data_types::{
|
||||
chunk_metadata::DetailedChunkSummary,
|
||||
error::ErrorLogger,
|
||||
partition_metadata::{PartitionSummary, TableSummary},
|
||||
partition_metadata::{ColumnSummary, PartitionSummary, TableSummary},
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
|
@ -91,7 +91,7 @@ fn from_partition_summaries(
|
|||
)
|
||||
}
|
||||
|
||||
/// Implementation of system.column_chunks table
|
||||
/// Implementation of `system.chunk_columns` table
|
||||
#[derive(Debug)]
|
||||
pub(super) struct ChunkColumnsTable {
|
||||
schema: SchemaRef,
|
||||
|
@ -137,79 +137,118 @@ fn assemble_chunk_columns(
|
|||
schema: SchemaRef,
|
||||
chunk_summaries: Vec<(Arc<TableSummary>, DetailedChunkSummary)>,
|
||||
) -> Result<RecordBatch> {
|
||||
/// Builds an index from column_name -> size
|
||||
fn make_column_index(summary: &DetailedChunkSummary) -> HashMap<&str, u64> {
|
||||
summary
|
||||
.columns
|
||||
.iter()
|
||||
.map(|column_summary| {
|
||||
(
|
||||
column_summary.name.as_ref(),
|
||||
column_summary.memory_bytes as u64,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
// Create an iterator over each column in each table in each chunk
|
||||
// so we can build `chunk_columns` column by column
|
||||
struct EachColumn<'a> {
|
||||
chunk_summary: &'a DetailedChunkSummary,
|
||||
column_summary: &'a ColumnSummary,
|
||||
}
|
||||
|
||||
// Assume each chunk has roughly 5 columns
|
||||
let row_estimate = chunk_summaries.len() * 5;
|
||||
let rows = chunk_summaries
|
||||
.iter()
|
||||
.map(|(table_summary, chunk_summary)| {
|
||||
table_summary
|
||||
.columns
|
||||
.iter()
|
||||
.map(move |column_summary| EachColumn {
|
||||
chunk_summary,
|
||||
column_summary,
|
||||
})
|
||||
})
|
||||
.flatten()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut partition_key = StringBuilder::new(row_estimate);
|
||||
let mut chunk_id = StringBuilder::new(row_estimate);
|
||||
let mut table_name = StringBuilder::new(row_estimate);
|
||||
let mut column_name = StringBuilder::new(row_estimate);
|
||||
let mut storage = StringBuilder::new(row_estimate);
|
||||
let mut row_count = UInt64Builder::new(row_estimate);
|
||||
let mut null_count = UInt64Builder::new(row_estimate);
|
||||
let mut min_values = StringBuilder::new(row_estimate);
|
||||
let mut max_values = StringBuilder::new(row_estimate);
|
||||
let mut memory_bytes = UInt64Builder::new(row_estimate);
|
||||
let partition_key = rows
|
||||
.iter()
|
||||
.map(|each| each.chunk_summary.inner.partition_key.as_ref())
|
||||
.map(Some)
|
||||
.collect::<StringArray>();
|
||||
|
||||
// Note no rows are produced for partitions with no chunks, or
|
||||
// tables with no partitions: There are other tables to list tables
|
||||
// and columns
|
||||
for (table_summary, chunk_summary) in chunk_summaries {
|
||||
let mut column_index = make_column_index(&chunk_summary);
|
||||
let storage_value = chunk_summary.inner.storage.as_str();
|
||||
let chunk_id = rows
|
||||
.iter()
|
||||
.map(|each| each.chunk_summary.inner.id.get().to_string())
|
||||
.map(Some)
|
||||
.collect::<StringArray>();
|
||||
|
||||
for column in &table_summary.columns {
|
||||
partition_key.append_value(chunk_summary.inner.partition_key.as_ref())?;
|
||||
chunk_id.append_value(chunk_summary.inner.id.get().to_string())?;
|
||||
table_name.append_value(&chunk_summary.inner.table_name)?;
|
||||
column_name.append_value(&column.name)?;
|
||||
storage.append_value(storage_value)?;
|
||||
row_count.append_value(column.total_count())?;
|
||||
null_count.append_value(column.null_count())?;
|
||||
if let Some(v) = column.stats.min_as_str() {
|
||||
min_values.append_value(v)?;
|
||||
} else {
|
||||
min_values.append(false)?;
|
||||
}
|
||||
if let Some(v) = column.stats.max_as_str() {
|
||||
max_values.append_value(v)?;
|
||||
} else {
|
||||
max_values.append(false)?;
|
||||
}
|
||||
let table_name = rows
|
||||
.iter()
|
||||
.map(|each| each.chunk_summary.inner.table_name.as_ref())
|
||||
.map(Some)
|
||||
.collect::<StringArray>();
|
||||
|
||||
let size = column_index.remove(column.name.as_str());
|
||||
let column_name = rows
|
||||
.iter()
|
||||
.map(|each| each.column_summary.name.as_str())
|
||||
.map(Some)
|
||||
.collect::<StringArray>();
|
||||
|
||||
memory_bytes.append_option(size)?;
|
||||
}
|
||||
}
|
||||
let storage = rows
|
||||
.iter()
|
||||
.map(|each| each.chunk_summary.inner.storage.as_str())
|
||||
.map(Some)
|
||||
.collect::<StringArray>();
|
||||
|
||||
let row_count = rows
|
||||
.iter()
|
||||
.map(|each| each.column_summary.total_count())
|
||||
.map(Some)
|
||||
.collect::<UInt64Array>();
|
||||
|
||||
let null_count = rows
|
||||
.iter()
|
||||
.map(|each| each.column_summary.null_count())
|
||||
.map(Some)
|
||||
.collect::<UInt64Array>();
|
||||
|
||||
let min_values = rows
|
||||
.iter()
|
||||
.map(|each| each.column_summary.stats.min_as_str())
|
||||
.collect::<StringArray>();
|
||||
|
||||
let max_values = rows
|
||||
.iter()
|
||||
.map(|each| each.column_summary.stats.max_as_str())
|
||||
.collect::<StringArray>();
|
||||
|
||||
// handle memory bytes specially to avoid having to search for
|
||||
// each column in ColumnSummary
|
||||
let memory_bytes = chunk_summaries
|
||||
.iter()
|
||||
.map(|(table_summary, chunk_summary)| {
|
||||
// Don't assume column order in DetailedColumnSummary are
|
||||
// consistent with ColumnSummary
|
||||
let mut column_sizes = chunk_summary
|
||||
.columns
|
||||
.iter()
|
||||
.map(|column_summary| {
|
||||
(
|
||||
column_summary.name.as_ref(),
|
||||
column_summary.memory_bytes as u64,
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
table_summary
|
||||
.columns
|
||||
.iter()
|
||||
.map(move |column_summary| column_sizes.remove(column_summary.name.as_str()))
|
||||
})
|
||||
.flatten()
|
||||
.collect::<UInt64Array>();
|
||||
|
||||
RecordBatch::try_new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(partition_key.finish()) as ArrayRef,
|
||||
Arc::new(chunk_id.finish()),
|
||||
Arc::new(table_name.finish()),
|
||||
Arc::new(column_name.finish()),
|
||||
Arc::new(storage.finish()),
|
||||
Arc::new(row_count.finish()),
|
||||
Arc::new(null_count.finish()),
|
||||
Arc::new(min_values.finish()),
|
||||
Arc::new(max_values.finish()),
|
||||
Arc::new(memory_bytes.finish()),
|
||||
Arc::new(partition_key) as ArrayRef,
|
||||
Arc::new(chunk_id),
|
||||
Arc::new(table_name),
|
||||
Arc::new(column_name),
|
||||
Arc::new(storage),
|
||||
Arc::new(row_count),
|
||||
Arc::new(null_count),
|
||||
Arc::new(min_values),
|
||||
Arc::new(max_values),
|
||||
Arc::new(memory_bytes),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::db::catalog::chunk::CatalogChunk;
|
||||
use mutable_batch::PartitionWrite;
|
||||
|
||||
/// A [`WriteFilter`] provides the ability to mask rows from a [`PartitionWrite`]
|
||||
|
@ -27,3 +28,21 @@ impl WriteFilter for WriteFilterNone {
|
|||
Some(write)
|
||||
}
|
||||
}
|
||||
|
||||
/// A [`DeleteFilter`] provides the ability to exclude chunks from having a delete applied
|
||||
///
|
||||
/// This is important for replay where it needs to prevent deletes from being applied to chunks
|
||||
/// containing writes sequenced after the delete
|
||||
pub trait DeleteFilter: Copy {
|
||||
/// Returns true if the delete should be applied to this chunk
|
||||
fn filter_chunk(&self, chunk: &CatalogChunk) -> bool;
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Copy, Clone)]
|
||||
pub struct DeleteFilterNone {}
|
||||
|
||||
impl DeleteFilter for DeleteFilterNone {
|
||||
fn filter_chunk(&self, _chunk: &CatalogChunk) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -127,29 +127,11 @@ impl JobRegistryMetrics {
|
|||
|
||||
fn duration_histogram_options() -> metric::DurationHistogramOptions {
|
||||
metric::DurationHistogramOptions::new(vec![
|
||||
Duration::from_millis(5),
|
||||
Duration::from_millis(10),
|
||||
Duration::from_millis(25),
|
||||
Duration::from_millis(50),
|
||||
Duration::from_millis(100),
|
||||
Duration::from_millis(250),
|
||||
Duration::from_millis(500),
|
||||
Duration::from_millis(1000),
|
||||
Duration::from_millis(2500),
|
||||
Duration::from_millis(5000),
|
||||
Duration::from_millis(10000),
|
||||
Duration::from_millis(1_000),
|
||||
Duration::from_millis(2_500),
|
||||
Duration::from_millis(5_000),
|
||||
Duration::from_millis(10_000),
|
||||
Duration::from_millis(25_000),
|
||||
Duration::from_millis(50_000),
|
||||
Duration::from_millis(100_000),
|
||||
Duration::from_millis(250_000),
|
||||
Duration::from_millis(500_000),
|
||||
Duration::from_millis(1_000_000),
|
||||
Duration::from_millis(2_500_000),
|
||||
Duration::from_millis(5_000_000),
|
||||
Duration::from_secs(1),
|
||||
Duration::from_secs(10),
|
||||
Duration::from_secs(100),
|
||||
metric::DURATION_MAX,
|
||||
])
|
||||
}
|
||||
|
@ -213,9 +195,6 @@ impl JobRegistryMetrics {
|
|||
if let Some(db_name) = metadata.db_name() {
|
||||
attributes.insert("db_name", db_name.to_string());
|
||||
}
|
||||
if let Some(table) = metadata.table_name() {
|
||||
attributes.insert("table", table.to_string());
|
||||
}
|
||||
|
||||
attributes
|
||||
}
|
||||
|
|
|
@ -1875,6 +1875,95 @@ mod tests {
|
|||
new_loc_db.wait_for_init().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn old_server_config_object_store_path() {
|
||||
let application = make_application();
|
||||
let server_id = ServerId::try_from(1).unwrap();
|
||||
let object_store = application.object_store();
|
||||
|
||||
// Server config used to be stored under /[server id]/config.pb. Construct a config in that
|
||||
// old location that points to a database
|
||||
let mut old_server_config_path = object_store.new_path();
|
||||
old_server_config_path.push_dir(&server_id.to_string());
|
||||
old_server_config_path.set_file_name("config.pb");
|
||||
|
||||
// Create database rules and database owner info for a database in object storage
|
||||
let db_uuid = Uuid::new_v4();
|
||||
let db_name = DatabaseName::new("mydb").unwrap();
|
||||
let db_rules = DatabaseRules::new(db_name.clone());
|
||||
|
||||
let mut db_path = object_store.new_path();
|
||||
db_path.push_dir("dbs");
|
||||
db_path.push_dir(db_uuid.to_string());
|
||||
let mut db_rules_path = db_path.clone();
|
||||
db_rules_path.set_file_name("rules.pb");
|
||||
|
||||
let persisted_database_rules = management::v1::PersistedDatabaseRules {
|
||||
uuid: db_uuid.as_bytes().to_vec(),
|
||||
rules: Some(db_rules.into()),
|
||||
};
|
||||
let mut encoded_rules = bytes::BytesMut::new();
|
||||
generated_types::database_rules::encode_persisted_database_rules(
|
||||
&persisted_database_rules,
|
||||
&mut encoded_rules,
|
||||
)
|
||||
.unwrap();
|
||||
let encoded_rules = encoded_rules.freeze();
|
||||
object_store
|
||||
.put(&db_rules_path, encoded_rules)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut db_owner_info_path = db_path.clone();
|
||||
db_owner_info_path.set_file_name("owner.pb");
|
||||
let owner_info = management::v1::OwnerInfo {
|
||||
id: server_id.get_u32(),
|
||||
location: old_server_config_path.to_string(),
|
||||
transactions: vec![],
|
||||
};
|
||||
let mut encoded_owner_info = bytes::BytesMut::new();
|
||||
generated_types::server_config::encode_database_owner_info(
|
||||
&owner_info,
|
||||
&mut encoded_owner_info,
|
||||
)
|
||||
.unwrap();
|
||||
let encoded_owner_info = encoded_owner_info.freeze();
|
||||
object_store
|
||||
.put(&db_owner_info_path, encoded_owner_info)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let config = management::v1::ServerConfig {
|
||||
databases: [(db_name.to_string(), db_path.to_raw())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
};
|
||||
let mut encoded_server_config = bytes::BytesMut::new();
|
||||
generated_types::server_config::encode_persisted_server_config(
|
||||
&config,
|
||||
&mut encoded_server_config,
|
||||
)
|
||||
.unwrap();
|
||||
let encoded_server_config = encoded_server_config.freeze();
|
||||
object_store
|
||||
.put(&old_server_config_path, encoded_server_config)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Start up server
|
||||
let server = make_server(Arc::clone(&application));
|
||||
server.set_id(server_id).unwrap();
|
||||
server.wait_for_init().await.unwrap();
|
||||
|
||||
// Database should init
|
||||
let database = server.database(&db_name).unwrap();
|
||||
database.wait_for_init().await.unwrap();
|
||||
|
||||
// Server config should be transitioned to the new location
|
||||
let config = server_config(application.object_store(), server_id).await;
|
||||
assert_config_contents(&config, &[(&db_name, format!("dbs/{}/", db_uuid))]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn db_names_sorted() {
|
||||
let server = make_server(make_application());
|
||||
|
@ -2232,7 +2321,7 @@ mod tests {
|
|||
let baz_iox_object_store = baz.iox_object_store().unwrap();
|
||||
let owner_info = management::v1::OwnerInfo {
|
||||
id: 2,
|
||||
location: "2/config.pb".to_string(),
|
||||
location: "nodes/2/config.pb".to_string(),
|
||||
transactions: vec![],
|
||||
};
|
||||
let mut encoded = bytes::BytesMut::new();
|
||||
|
|
|
@ -10,3 +10,4 @@ parking_lot = "0.11.2"
|
|||
tempfile = "3.1.0"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
|
|
@ -8,5 +8,6 @@ description = "Time functionality for IOx"
|
|||
|
||||
chrono = "0.4"
|
||||
parking_lot = "0.11"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -11,5 +11,6 @@ chrono = "0.4"
|
|||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.11"
|
||||
rand = "0.8"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -16,5 +16,6 @@ structopt = { version = "0.3.25" }
|
|||
thrift = { version = "0.13.0" }
|
||||
tokio = { version = "1.13", features = ["macros", "time", "sync", "rt"] }
|
||||
trace = { path = "../trace" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -19,5 +19,6 @@ parking_lot = "0.11"
|
|||
pin-project = "1.0"
|
||||
snafu = "0.6"
|
||||
tower = "0.4"
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -17,6 +17,7 @@ pin-project = "1.0"
|
|||
time = { path = "../time" }
|
||||
tokio = { version = "1.13", features = ["macros", "time"] }
|
||||
tokio-util = { version = "0.6.9" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
# Need the multi-threaded executor for testing
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
# Avoid putting conflict markers in the generated Cargo.toml file, since their presence breaks
|
||||
# Cargo.
|
||||
# Also do not check out the file as CRLF on Windows, as that's what hakari needs.
|
||||
Cargo.toml merge=binary -crlf
|
|
@ -0,0 +1,76 @@
|
|||
# This file is generated by `cargo hakari`.
|
||||
# To regenerate, run:
|
||||
# cargo hakari generate
|
||||
|
||||
[package]
|
||||
name = "workspace-hack"
|
||||
version = "0.1.0"
|
||||
description = "workspace-hack package, managed by hakari"
|
||||
publish = false
|
||||
|
||||
# The parts of the file between the BEGIN HAKARI SECTION and END HAKARI SECTION comments
|
||||
# are managed by hakari.
|
||||
|
||||
### BEGIN HAKARI SECTION
|
||||
[dependencies]
|
||||
ahash = { version = "0.7", features = ["std"] }
|
||||
bytes = { version = "1", features = ["std"] }
|
||||
chrono = { version = "0.4", features = ["clock", "libc", "oldtime", "serde", "std", "time", "winapi"] }
|
||||
clap = { version = "2", features = ["ansi_term", "atty", "color", "strsim", "suggestions", "vec_map"] }
|
||||
either = { version = "1", features = ["use_std"] }
|
||||
futures = { version = "0.3", features = ["alloc", "async-await", "executor", "futures-executor", "std"] }
|
||||
futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] }
|
||||
futures-core = { version = "0.3", features = ["alloc", "std"] }
|
||||
futures-io = { version = "0.3", default-features = false, features = ["std"] }
|
||||
futures-sink = { version = "0.3", features = ["alloc", "std"] }
|
||||
futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] }
|
||||
futures-util = { version = "0.3", features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "proc-macro-hack", "proc-macro-nested", "sink", "slab", "std"] }
|
||||
getrandom = { version = "0.2", default-features = false, features = ["js", "js-sys", "std", "wasm-bindgen"] }
|
||||
hashbrown = { version = "0.11", features = ["ahash", "inline-more", "raw"] }
|
||||
hyper = { version = "0.14", features = ["client", "full", "h2", "http1", "http2", "runtime", "server", "socket2", "stream", "tcp"] }
|
||||
indexmap = { version = "1", default-features = false, features = ["std"] }
|
||||
itoa = { version = "0.4", features = ["i128", "std"] }
|
||||
libc = { version = "0.2", features = ["extra_traits", "std"] }
|
||||
log = { version = "0.4", default-features = false, features = ["std"] }
|
||||
memchr = { version = "2", features = ["std", "use_std"] }
|
||||
num-bigint = { version = "0.4", features = ["std"] }
|
||||
num-integer = { version = "0.1", default-features = false, features = ["i128", "std"] }
|
||||
num-traits = { version = "0.2", features = ["i128", "libm", "std"] }
|
||||
once_cell = { version = "1", features = ["alloc", "parking_lot", "race", "std"] }
|
||||
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
|
||||
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
regex-automata = { version = "0.1", features = ["regex-syntax", "std"] }
|
||||
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
reqwest = { version = "0.11", features = ["__rustls", "__tls", "blocking", "default-tls", "hyper-rustls", "hyper-tls", "json", "native-tls-crate", "rustls", "rustls-tls", "rustls-tls-webpki-roots", "serde_json", "stream", "tokio-native-tls", "tokio-rustls", "webpki-roots"] }
|
||||
serde = { version = "1", features = ["derive", "rc", "serde_derive", "std"] }
|
||||
serde_json = { version = "1", features = ["indexmap", "preserve_order", "std"] }
|
||||
smallvec = { version = "1", default-features = false, features = ["union"] }
|
||||
tokio = { version = "1", features = ["bytes", "fs", "full", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "parking_lot", "process", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "sync", "time", "tokio-macros", "winapi"] }
|
||||
tokio-stream = { version = "0.1", features = ["net", "time"] }
|
||||
tokio-util = { version = "0.6", features = ["codec", "io"] }
|
||||
tower = { version = "0.4", features = ["balance", "buffer", "discover", "futures-util", "indexmap", "limit", "load", "log", "make", "rand", "ready-cache", "slab", "timeout", "tokio", "tokio-stream", "tokio-util", "tracing", "util"] }
|
||||
tracing = { version = "0.1", features = ["attributes", "log", "max_level_trace", "release_max_level_debug", "std", "tracing-attributes"] }
|
||||
tracing-core = { version = "0.1", features = ["lazy_static", "std"] }
|
||||
tracing-subscriber = { version = "0.3", features = ["alloc", "ansi", "ansi_term", "env-filter", "fmt", "lazy_static", "matchers", "regex", "registry", "sharded-slab", "smallvec", "std", "thread_local", "tracing", "tracing-log"] }
|
||||
url = { version = "2", default-features = false, features = ["serde"] }
|
||||
uuid = { version = "0.8", features = ["getrandom", "serde", "std", "v4"] }
|
||||
|
||||
[build-dependencies]
|
||||
ahash = { version = "0.7", features = ["std"] }
|
||||
bytes = { version = "1", features = ["std"] }
|
||||
cc = { version = "1", default-features = false, features = ["jobserver", "parallel"] }
|
||||
clap = { version = "2", features = ["ansi_term", "atty", "color", "strsim", "suggestions", "vec_map"] }
|
||||
either = { version = "1", features = ["use_std"] }
|
||||
getrandom = { version = "0.2", default-features = false, features = ["js", "js-sys", "std", "wasm-bindgen"] }
|
||||
hashbrown = { version = "0.11", features = ["ahash", "inline-more", "raw"] }
|
||||
indexmap = { version = "1", default-features = false, features = ["std"] }
|
||||
libc = { version = "0.2", features = ["extra_traits", "std"] }
|
||||
log = { version = "0.4", default-features = false, features = ["std"] }
|
||||
memchr = { version = "2", features = ["std", "use_std"] }
|
||||
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
|
||||
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
serde = { version = "1", features = ["derive", "rc", "serde_derive", "std"] }
|
||||
syn = { version = "1", features = ["clone-impls", "derive", "extra-traits", "full", "parsing", "printing", "proc-macro", "quote", "visit", "visit-mut"] }
|
||||
|
||||
### END HAKARI SECTION
|
|
@ -0,0 +1,21 @@
|
|||
# workspace-hack
|
||||
|
||||
This crate is a "workspace hack" crate managed by [`cargo hakari`][hakari].
|
||||
|
||||
Its purpose is to unify the features used by all crates in the workspace so that the crates share
|
||||
more dependencies and rebuild crates less. There are more details in [hakari's
|
||||
documentation][hakari-docs].
|
||||
|
||||
[hakari]: https://crates.io/crates/cargo-hakari
|
||||
[hakari-docs]: https://docs.rs/cargo-hakari/0.9.6/cargo_hakari/about/index.html
|
||||
|
||||
## CI failures
|
||||
|
||||
If the `workspace_hack_checks` CI job is failing, there are two possible reasons and solutions:
|
||||
|
||||
- If `cargo hakari generate --diff` fails, that means a crate has started or stopped using a
|
||||
feature of some crate and that feature isn't up-to-date in the `workspace-hack` crate. To fix
|
||||
this, run `cargo hakari generate` and commit the changes.
|
||||
- If `cargo hakari manage-deps --dry-run` fails, that means a crate in the workspace isn't
|
||||
depending on the `workspace-hack` crate. To fix this, run `cargo hakari manage-deps` and commit
|
||||
the changes.
|
|
@ -0,0 +1,2 @@
|
|||
// A build script is required for cargo to consider build dependencies.
|
||||
fn main() {}
|
|
@ -0,0 +1 @@
|
|||
// This is a dummy lib.rs.
|
|
@ -25,6 +25,7 @@ tokio = { version = "1.13", features = ["macros", "fs"] }
|
|||
trace = { path = "../trace" }
|
||||
trace_http = { path = "../trace_http" }
|
||||
uuid = { version = "0.8", features = ["serde", "v4"] }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.1.0"
|
||||
|
|
|
@ -11,7 +11,7 @@ use parking_lot::Mutex;
|
|||
|
||||
use data_types::sequence::Sequence;
|
||||
use data_types::write_buffer::WriteBufferCreationConfig;
|
||||
use dml::{DmlMeta, DmlOperation, DmlWrite};
|
||||
use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
|
||||
use time::TimeProvider;
|
||||
|
||||
use crate::core::{
|
||||
|
@ -108,14 +108,36 @@ impl MockBufferSharedState {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Push a new delete to the specified sequencer
|
||||
///
|
||||
/// # Panics
|
||||
/// - when delete is not sequenced
|
||||
/// - when no sequencer was initialized
|
||||
/// - when specified sequencer does not exist
|
||||
/// - when sequence number in entry is not larger the current maximum
|
||||
pub fn push_delete(&self, delete: DmlDelete) {
|
||||
self.push_operation(DmlOperation::Delete(delete))
|
||||
}
|
||||
|
||||
/// Push a new entry to the specified sequencer.
|
||||
///
|
||||
/// # Panics
|
||||
/// - when given entry is not sequenced
|
||||
/// - when write is not sequenced
|
||||
/// - when no sequencer was initialized
|
||||
/// - when specified sequencer does not exist
|
||||
/// - when sequence number in entry is not larger the current maximum
|
||||
pub fn push_write(&self, write: DmlWrite) {
|
||||
self.push_operation(DmlOperation::Write(write))
|
||||
}
|
||||
|
||||
/// Push a new operation to the specified sequencer
|
||||
///
|
||||
/// # Panics
|
||||
/// - when operation is not sequenced
|
||||
/// - when no sequencer was initialized
|
||||
/// - when specified sequencer does not exist
|
||||
/// - when sequence number in entry is not larger the current maximum
|
||||
pub fn push_operation(&self, write: DmlOperation) {
|
||||
let sequence = write.meta().sequence().expect("write must be sequenced");
|
||||
assert!(
|
||||
write.meta().producer_ts().is_some(),
|
||||
|
@ -135,7 +157,7 @@ impl MockBufferSharedState {
|
|||
);
|
||||
}
|
||||
|
||||
writes_vec.push(Ok(DmlOperation::Write(write)));
|
||||
writes_vec.push(Ok(write));
|
||||
}
|
||||
|
||||
/// Push line protocol data with placeholder values used for write metadata
|
||||
|
|
Loading…
Reference in New Issue