Merge branch 'main' into crepererum/issue3030

pull/24376/head
kodiakhq[bot] 2021-11-23 08:08:34 +00:00 committed by GitHub
commit a6a0eda142
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
99 changed files with 2023 additions and 918 deletions

View File

@ -40,6 +40,7 @@ commands:
rustup show
cargo fmt --version
cargo clippy --version
cargo install cargo-hakari && cargo hakari --version
cache_restore:
description: Restore Cargo Cache
@ -137,6 +138,19 @@ jobs:
command: tar -cvzf rustdoc.tar.gz target/doc/
- store_artifacts:
path: rustdoc.tar.gz
workspace_hack_checks:
docker:
- image: quay.io/influxdb/rust:ci
steps:
- checkout
- rust_components
- cache_restore
- run:
name: Check that the workspace hack crate contains all features in use
command: cargo hakari generate --diff || echo "If this fails, fix it by running \`cargo hakari generate\` locally and committing the changes"
- run:
name: Check that all crates in the workspace depend on the workspace hack crate
command: cargo hakari manage-deps --dry-run || echo "If this fails, fix it by running \`cargo hakari manage-deps\` locally and committing the changes"
test:
docker:
@ -414,6 +428,7 @@ workflows:
- test_perf
- build
- doc
- workspace_hack_checks
- perf_image:
filters:
branches:

40
.guppy/hakari.toml Normal file
View File

@ -0,0 +1,40 @@
# This file contains settings for `cargo hakari`.
# See https://docs.rs/cargo-hakari/*/cargo_hakari/config for a full list of options.
hakari-package = "workspace-hack"
# Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended.
# Hakari works much better with the new feature resolver.
# For more about the new feature resolver, see:
# https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
resolver = "2"
# Add triples corresponding to platforms commonly used by developers here.
# https://doc.rust-lang.org/rustc/platform-support.html
platforms = [
# "x86_64-unknown-linux-gnu",
# "x86_64-apple-darwin",
# "x86_64-pc-windows-msvc",
]
# Write out exact versions rather than a semver range. (Defaults to false.)
# exact-versions = true
# Don't search in these crates for dependencies, and don't have these crates depend on the
# workspace-hack crate.
#
# Includes most bench- or test-only crates except for query_tests, as that crate is built often
# and should share as many dependencies as possible.
[traversal-excludes]
workspace-members = [
"grpc-router",
"grpc-router-test-gen",
"influxdb_iox_client",
"iox_data_generator",
"mutable_batch_tests",
"server_benchmarks",
"trogging",
]
third-party = [
{ name = "tikv-jemalloc-sys" },
]

116
Cargo.lock generated
View File

@ -144,6 +144,7 @@ dependencies = [
"num-traits",
"rand",
"snafu",
"workspace-hack",
]
[[package]]
@ -535,6 +536,7 @@ dependencies = [
"tokio",
"tonic",
"tower",
"workspace-hack",
]
[[package]]
@ -797,6 +799,7 @@ dependencies = [
"test_helpers",
"time 0.1.0",
"uuid",
"workspace-hack",
]
[[package]]
@ -804,6 +807,7 @@ name = "datafusion"
version = "0.1.0"
dependencies = [
"datafusion 6.0.0",
"workspace-hack",
]
[[package]]
@ -839,6 +843,7 @@ dependencies = [
"futures",
"tokio",
"tokio-stream",
"workspace-hack",
]
[[package]]
@ -924,6 +929,7 @@ dependencies = [
"schema",
"time 0.1.0",
"trace",
"workspace-hack",
]
[[package]]
@ -1215,6 +1221,7 @@ dependencies = [
"time 0.1.0",
"tonic",
"tonic-build",
"workspace-hack",
]
[[package]]
@ -1321,9 +1328,9 @@ checksum = "ac5956d4e63858efaec57e0d6c1c2f6a41e1487f830314a324ccd7e2223a7ca0"
[[package]]
name = "handlebars"
version = "4.1.4"
version = "4.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1874024f4a29f47d609014caec0b1c866f1c1eb0661a09c9733ecc4757f5f88"
checksum = "8ad84da8f63da982543fc85fcabaee2ad1fdd809d99d64a48887e2e942ddfe46"
dependencies = [
"log",
"pest",
@ -1434,9 +1441,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "hyper"
version = "0.14.14"
version = "0.14.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b91bb1f221b6ea1f1e4371216b70f40748774c2fb5971b450c07773fb92d26b"
checksum = "436ec0091e4f20e655156a30a0df3770fe2900aa301e548e08446ec794b6953c"
dependencies = [
"bytes",
"futures-channel",
@ -1551,6 +1558,7 @@ dependencies = [
"test_helpers",
"tokio",
"url",
"workspace-hack",
]
[[package]]
@ -1644,6 +1652,7 @@ dependencies = [
"tracker",
"trogging",
"uuid",
"workspace-hack",
"write_buffer",
]
@ -1681,6 +1690,7 @@ dependencies = [
"smallvec",
"snafu",
"test_helpers",
"workspace-hack",
]
[[package]]
@ -1692,6 +1702,7 @@ dependencies = [
"generated_types",
"prost",
"tonic",
"workspace-hack",
]
[[package]]
@ -1706,6 +1717,7 @@ dependencies = [
"snafu",
"snap",
"test_helpers",
"workspace-hack",
]
[[package]]
@ -1737,6 +1749,7 @@ dependencies = [
"parking_lot",
"time 0.1.0",
"tokio",
"workspace-hack",
]
[[package]]
@ -1781,6 +1794,7 @@ dependencies = [
"tokio",
"tokio-stream",
"uuid",
"workspace-hack",
]
[[package]]
@ -1914,9 +1928,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.106"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673"
checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
[[package]]
name = "libloading"
@ -1959,6 +1973,7 @@ dependencies = [
"time 0.1.0",
"tokio",
"tracker",
"workspace-hack",
]
[[package]]
@ -1988,6 +2003,7 @@ dependencies = [
"parking_lot",
"regex",
"tracing-subscriber",
"workspace-hack",
]
[[package]]
@ -2078,6 +2094,7 @@ name = "metric"
version = "0.1.0"
dependencies = [
"parking_lot",
"workspace-hack",
]
[[package]]
@ -2088,6 +2105,7 @@ dependencies = [
"observability_deps",
"prometheus",
"test_helpers",
"workspace-hack",
]
[[package]]
@ -2191,6 +2209,7 @@ dependencies = [
"rand",
"schema",
"snafu",
"workspace-hack",
]
[[package]]
@ -2203,6 +2222,7 @@ dependencies = [
"mutable_batch",
"schema",
"snafu",
"workspace-hack",
]
[[package]]
@ -2217,6 +2237,7 @@ dependencies = [
"mutable_batch_lp",
"schema",
"snafu",
"workspace-hack",
]
[[package]]
@ -2250,6 +2271,7 @@ dependencies = [
"snafu",
"test_helpers",
"tokio",
"workspace-hack",
]
[[package]]
@ -2534,6 +2556,7 @@ dependencies = [
"tokio",
"tokio-util",
"walkdir",
"workspace-hack",
]
[[package]]
@ -2541,6 +2564,7 @@ name = "observability_deps"
version = "0.1.0"
dependencies = [
"tracing",
"workspace-hack",
]
[[package]]
@ -2634,6 +2658,7 @@ dependencies = [
"schema",
"snafu",
"test_helpers",
"workspace-hack",
]
[[package]]
@ -2641,6 +2666,7 @@ name = "panic_logging"
version = "0.1.0"
dependencies = [
"observability_deps",
"workspace-hack",
]
[[package]]
@ -2730,6 +2756,7 @@ dependencies = [
"tokio",
"tokio-stream",
"uuid",
"workspace-hack",
"zstd",
]
@ -2766,6 +2793,7 @@ dependencies = [
"tokio",
"tokio-stream",
"uuid",
"workspace-hack",
"zstd",
]
@ -2851,6 +2879,7 @@ dependencies = [
"snafu",
"test_helpers",
"time 0.1.0",
"workspace-hack",
]
[[package]]
@ -3017,6 +3046,7 @@ dependencies = [
"sqlparser",
"test_helpers",
"tokio",
"workspace-hack",
]
[[package]]
@ -3197,6 +3227,7 @@ dependencies = [
"tokio-stream",
"tokio-util",
"trace",
"workspace-hack",
]
[[package]]
@ -3219,6 +3250,7 @@ dependencies = [
"tempfile",
"test_helpers",
"tokio",
"workspace-hack",
]
[[package]]
@ -3388,6 +3420,7 @@ dependencies = [
"schema",
"snafu",
"test_helpers",
"workspace-hack",
]
[[package]]
@ -3528,6 +3561,7 @@ dependencies = [
"time 0.1.0",
"tokio",
"trace",
"workspace-hack",
"write_buffer",
]
@ -3710,6 +3744,7 @@ dependencies = [
"indexmap",
"itertools",
"snafu",
"workspace-hack",
]
[[package]]
@ -3801,9 +3836,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.70"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e277c495ac6cd1a01a58d0a0c574568b4d1ddf14f59965c6a58b8d96400b54f3"
checksum = "063bf466a64011ac24040a49009724ee60a57da1b437617ceb32e53ad61bfb19"
dependencies = [
"indexmap",
"itoa",
@ -3887,6 +3922,7 @@ dependencies = [
"trace",
"tracker",
"uuid",
"workspace-hack",
"write_buffer",
]
@ -4232,6 +4268,7 @@ dependencies = [
"parking_lot",
"tempfile",
"tracing-subscriber",
"workspace-hack",
]
[[package]]
@ -4322,6 +4359,7 @@ version = "0.1.0"
dependencies = [
"chrono",
"parking_lot",
"workspace-hack",
]
[[package]]
@ -4530,9 +4568,9 @@ dependencies = [
[[package]]
name = "tower"
version = "0.4.10"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c00e500fff5fa1131c866b246041a6bf96da9c965f8fe4128cb1421f23e93c00"
checksum = "5651b5f6860a99bd1adb59dbfe1db8beb433e73709d9032b413a77e2fb7c066a"
dependencies = [
"futures-core",
"futures-util",
@ -4569,6 +4607,7 @@ dependencies = [
"observability_deps",
"parking_lot",
"rand",
"workspace-hack",
]
[[package]]
@ -4584,6 +4623,7 @@ dependencies = [
"thrift",
"tokio",
"trace",
"workspace-hack",
]
[[package]]
@ -4602,6 +4642,7 @@ dependencies = [
"snafu",
"tower",
"trace",
"workspace-hack",
]
[[package]]
@ -4670,9 +4711,9 @@ dependencies = [
[[package]]
name = "tracing-subscriber"
version = "0.3.1"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80a4ddde70311d8da398062ecf6fc2c309337de6b0f77d6c27aff8d53f6fca52"
checksum = "7507ec620f809cdf07cccb5bc57b13069a88031b795efd4079b1c71b66c1613d"
dependencies = [
"ansi_term 0.12.1",
"lazy_static",
@ -4703,6 +4744,7 @@ dependencies = [
"time 0.1.0",
"tokio",
"tokio-util",
"workspace-hack",
]
[[package]]
@ -5014,6 +5056,55 @@ dependencies = [
"winapi",
]
[[package]]
name = "workspace-hack"
version = "0.1.0"
dependencies = [
"ahash",
"bytes",
"cc",
"chrono",
"clap",
"either",
"futures",
"futures-channel",
"futures-core",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
"getrandom",
"hashbrown",
"hyper",
"indexmap",
"itoa",
"libc",
"log",
"memchr",
"num-bigint 0.4.3",
"num-integer",
"num-traits",
"once_cell",
"rand",
"regex",
"regex-automata",
"regex-syntax",
"reqwest",
"serde",
"serde_json",
"smallvec",
"syn",
"tokio",
"tokio-stream",
"tokio-util",
"tower",
"tracing",
"tracing-core",
"tracing-subscriber",
"url",
"uuid",
]
[[package]]
name = "write_buffer"
version = "0.1.0"
@ -5040,6 +5131,7 @@ dependencies = [
"trace",
"trace_http",
"uuid",
"workspace-hack",
]
[[package]]

View File

@ -50,6 +50,7 @@ members = [
"trace_http",
"tracker",
"trogging",
"workspace-hack",
"write_buffer",
]
default-members = ["influxdb_iox"]

View File

@ -14,6 +14,7 @@ comfy-table = { version = "5.0", default-features = false }
hashbrown = "0.11"
num-traits = "0.2"
snafu = "0.6"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
rand = "0.8.3"

View File

@ -11,6 +11,7 @@ prost = "0.8"
thiserror = "1.0.30"
tonic = { version = "0.5.0" }
tower = "0.4"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
tokio = { version = "1.13", features = ["macros", "rt-multi-thread"] }

View File

@ -17,6 +17,7 @@ siphasher = "0.3"
snafu = "0.6"
time = { path = "../time" }
uuid = { version = "0.8", features = ["v4"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
test_helpers = { path = "../test_helpers" }

View File

@ -97,6 +97,9 @@ pub enum ChunkLifecycleAction {
/// Chunk is in the process of being compacted
Compacting,
/// Object Store Chunk is in the process of being compacted
CompactingObjectStore,
/// Chunk is about to be dropped from memory and (if persisted) from object store
Dropping,
}
@ -112,6 +115,7 @@ impl ChunkLifecycleAction {
match self {
Self::Persisting => "Persisting to Object Storage",
Self::Compacting => "Compacting",
Self::CompactingObjectStore => "Compacting Object Store",
Self::Dropping => "Dropping",
}
}

View File

@ -24,6 +24,12 @@ pub enum Job {
chunks: Vec<ChunkId>,
},
/// Compact a set of object store chunks
CompactObjectStoreChunks {
partition: PartitionAddr,
chunks: Vec<ChunkId>,
},
/// Split and persist a set of chunks
PersistChunks {
partition: PartitionAddr,
@ -47,6 +53,7 @@ impl Job {
Self::Dummy { db_name, .. } => db_name.as_ref(),
Self::WriteChunk { chunk, .. } => Some(&chunk.db_name),
Self::CompactChunks { partition, .. } => Some(&partition.db_name),
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.db_name),
Self::PersistChunks { partition, .. } => Some(&partition.db_name),
Self::DropChunk { chunk, .. } => Some(&chunk.db_name),
Self::DropPartition { partition, .. } => Some(&partition.db_name),
@ -60,6 +67,7 @@ impl Job {
Self::Dummy { .. } => None,
Self::WriteChunk { chunk, .. } => Some(&chunk.partition_key),
Self::CompactChunks { partition, .. } => Some(&partition.partition_key),
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.partition_key),
Self::PersistChunks { partition, .. } => Some(&partition.partition_key),
Self::DropChunk { chunk, .. } => Some(&chunk.partition_key),
Self::DropPartition { partition, .. } => Some(&partition.partition_key),
@ -73,6 +81,7 @@ impl Job {
Self::Dummy { .. } => None,
Self::WriteChunk { chunk, .. } => Some(&chunk.table_name),
Self::CompactChunks { partition, .. } => Some(&partition.table_name),
Self::CompactObjectStoreChunks { partition, .. } => Some(&partition.table_name),
Self::PersistChunks { partition, .. } => Some(&partition.table_name),
Self::DropChunk { chunk, .. } => Some(&chunk.table_name),
Self::DropPartition { partition, .. } => Some(&partition.table_name),
@ -86,6 +95,7 @@ impl Job {
Self::Dummy { .. } => None,
Self::WriteChunk { chunk, .. } => Some(vec![chunk.chunk_id]),
Self::CompactChunks { chunks, .. } => Some(chunks.clone()),
Self::CompactObjectStoreChunks { chunks, .. } => Some(chunks.clone()),
Self::PersistChunks { chunks, .. } => Some(chunks.clone()),
Self::DropChunk { chunk, .. } => Some(vec![chunk.chunk_id]),
Self::DropPartition { .. } => None,
@ -99,6 +109,9 @@ impl Job {
Self::Dummy { .. } => "Dummy Job, for testing",
Self::WriteChunk { .. } => "Writing chunk to Object Storage",
Self::CompactChunks { .. } => "Compacting chunks to ReadBuffer",
Self::CompactObjectStoreChunks { .. } => {
"Compacting Object Store chunks to an Object Store chunk"
}
Self::PersistChunks { .. } => "Persisting chunks to object storage",
Self::DropChunk { .. } => "Drop chunk from memory and (if persisted) from object store",
Self::DropPartition { .. } => {
@ -115,6 +128,9 @@ impl std::fmt::Display for Job {
Job::Dummy { .. } => write!(f, "Job::Dummy"),
Job::WriteChunk { chunk } => write!(f, "Job::WriteChunk({}))", chunk),
Job::CompactChunks { partition, .. } => write!(f, "Job::CompactChunks({})", partition),
Job::CompactObjectStoreChunks { partition, .. } => {
write!(f, "Job::CompactObjectStoreChunks({})", partition)
}
Job::PersistChunks { partition, .. } => write!(f, "Job::PersistChunks({})", partition),
Job::DropChunk { chunk } => write!(f, "Job::DropChunk({})", chunk),
Job::DropPartition { partition } => write!(f, "Job::DropPartition({})", partition),

View File

@ -11,7 +11,8 @@ impl NonEmptyString {
/// Create a new `NonEmptyString` from the provided `String`
///
/// Returns None if empty
pub fn new(s: String) -> Option<Self> {
pub fn new(s: impl Into<String>) -> Option<Self> {
let s = s.into();
match s.is_empty() {
true => None,
false => Some(Self(s.into_boxed_str())),

View File

@ -10,3 +10,4 @@ description = "Re-exports datafusion at a specific version"
# Rename to workaround doctest bug
# Turn off optional datafusion features (e.g. don't get support for crypo functions or avro)
upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="79f129d048667a4552e44ef740e1b1cf9de306a1", default-features = false, package = "datafusion" }
workspace-hack = { path = "../workspace-hack"}

View File

@ -10,3 +10,4 @@ datafusion = { path = "../datafusion" }
futures = "0.3"
tokio = { version = "1.13", features = ["macros"] }
tokio-stream = "0.1.8"
workspace-hack = { path = "../workspace-hack"}

View File

@ -13,3 +13,4 @@ ordered-float = "2"
schema = { path = "../schema" }
time = { path = "../time" }
trace = { path = "../trace" }
workspace-hack = { path = "../workspace-hack"}

View File

@ -15,6 +15,7 @@ regex = "1.4"
serde = { version = "1.0", features = ["derive"] }
tonic = "0.5"
time = { path = "../time" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
data_types = { path = "../data_types" }

View File

@ -18,7 +18,6 @@ fn main() -> Result<()> {
///
/// Creates:
///
/// - `com.github.influxdata.idpe.storage.read.rs`
/// - `influxdata.iox.delete.v1.rs`
/// - `influxdata.iox.deployment.v1.rs`
/// - `influxdata.iox.management.v1.rs`
@ -31,7 +30,6 @@ fn main() -> Result<()> {
fn generate_grpc_types(root: &Path) -> Result<()> {
let delete_path = root.join("influxdata/iox/delete/v1");
let deployment_path = root.join("influxdata/iox/deployment/v1");
let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
let management_path = root.join("influxdata/iox/management/v1");
let predicate_path = root.join("influxdata/iox/predicate/v1");
let preserved_catalog_path = root.join("influxdata/iox/preserved_catalog/v1");
@ -43,7 +41,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
let proto_files = vec![
delete_path.join("service.proto"),
deployment_path.join("service.proto"),
idpe_path.join("source.proto"),
management_path.join("chunk.proto"),
management_path.join("database_rules.proto"),
management_path.join("jobs.proto"),
@ -67,8 +64,8 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
router_path.join("shard.proto"),
storage_path.join("predicate.proto"),
storage_path.join("service.proto"),
storage_path.join("source.proto"),
storage_path.join("storage_common.proto"),
storage_path.join("storage_common_idpe.proto"),
storage_path.join("test.proto"),
write_buffer_path.join("write_buffer.proto"),
];
@ -88,6 +85,7 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
".influxdata.iox.management.v1.Chunk.id",
".influxdata.iox.management.v1.ClosePartitionChunkRequest.chunk_id",
".influxdata.iox.management.v1.CompactChunks.chunks",
".influxdata.iox.management.v1.CompactObjectStoreChunks.chunks",
".influxdata.iox.management.v1.DropChunk.chunk_id",
".influxdata.iox.management.v1.PersistChunks.chunks",
".influxdata.iox.management.v1.WriteChunk.chunk_id",
@ -114,7 +112,12 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
pbjson_build::Builder::new()
.register_descriptors(&descriptor_set)?
.build(&[".influxdata", ".google.longrunning", ".google.rpc"])?;
.build(&[
".influxdata.iox",
".influxdata.pbdata",
".google.longrunning",
".google.rpc",
])?;
Ok(())
}

View File

@ -42,6 +42,9 @@ enum ChunkLifecycleAction {
/// Chunk is about to be dropped from memory and (if persisted) from object store.
CHUNK_LIFECYCLE_ACTION_DROPPING = 4;
/// Chunk is in the process of being compacted
CHUNK_LIFECYCLE_ACTION_COMPACTING_OBJECT_STORE = 5;
}

View File

@ -39,6 +39,7 @@ message OperationMetadata {
PersistChunks persist_chunks = 11;
DropChunk drop_chunk = 12;
DropPartition drop_partition = 17;
CompactObjectStoreChunks compact_object_store_chunks = 18;
}
}
@ -91,6 +92,23 @@ message CompactChunks {
repeated bytes chunks = 5;
}
// Compact chunks into a single chunk
message CompactObjectStoreChunks {
// name of the database
string db_name = 1;
// partition key
string partition_key = 2;
// table name
string table_name = 3;
// chunk_id
// UUID is stored as 16 bytes in big-endian order.
repeated bytes chunks = 4;
}
// Split and write chunks to object store
message PersistChunks {
// name of the database

View File

@ -9,7 +9,6 @@ package influxdata.platform.storage;
import "google/protobuf/empty.proto";
import "influxdata/platform/storage/storage_common.proto";
import "influxdata/platform/storage/storage_common_idpe.proto";
service Storage {
// ReadFilter performs a filter operation at storage

View File

@ -1,5 +1,5 @@
syntax = "proto3";
package com.github.influxdata.idpe.storage.read;
package influxdata.platform.storage.read;
message ReadSource {
// OrgID specifies the organization identifier for this request.

View File

@ -9,59 +9,91 @@ package influxdata.platform.storage;
import "google/protobuf/any.proto";
import "influxdata/platform/storage/predicate.proto";
import "influxdata/platform/storage/source.proto";
message OffsetsResponse {
message PartitionOffsetResponse {
int64 id = 1;
int64 offset = 2;
}
repeated PartitionOffsetResponse partitions = 1;
}
enum TagKeyMetaNames {
// option (gogoproto.goproto_enum_prefix) = false;
// TagKeyMetaNamesText means the tag keys for measurement and field will
// be returned as _measurement and _field respectively.
TagKeyMetaNamesText = 0;
// TagKeyMetaNames means the tag keys for measurement and field will
// be returned as \x00 and \xff respectively.
TagKeyMetaNamesBinary = 1;
}
message ReadFilterRequest {
google.protobuf.Any read_source = 1;
TimestampRange range = 2;
google.protobuf.Any ReadSource = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
// KeySort determines the ordering of series keys from the server.
KeySort key_sort = 4;
// TagKeyMetaNames determines the key format used for the measurement and field
// tags.
TagKeyMetaNames tag_key_meta_names = 5;
enum KeySort {
// option (gogoproto.goproto_enum_prefix) = false;
// KeySortUnspecified means the key order is unspecified.
KeySortUnspecified = 0;
// KeySortAscending means the key order should be lexicographically ascending.
//
// NOTE: In order to preserve sort order, canonical tag keys are not
// transformed from 0x00 _measurement and 0xff _field.
KeySortAscending = 1;
}
}
message ReadGroupRequest {
google.protobuf.Any read_source = 1;
TimestampRange range = 2;
google.protobuf.Any ReadSource = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
enum Group {
// option (gogoproto.goproto_enum_prefix) = false;
// GroupNone returns all series as a single group.
// The single GroupFrame.TagKeys will be the union of all tag keys.
GROUP_NONE = 0;
GroupNone = 0;
// GroupBy returns a group for each unique value of the specified GroupKeys.
GROUP_BY = 2;
GroupBy = 2;
}
// GroupKeys specifies a list of tag keys used to order the data.
// It is dependent on the Group property to determine its behavior.
repeated string group_keys = 4;
repeated string GroupKeys = 4;
Group group = 5;
Aggregate aggregate = 6;
// TODO(jlapacik): This field is only used in unit tests.
// Specifically the two tests in group_resultset_test.go.
// This field should be removed and the tests that depend
// on it refactored.
enum HintFlags {
HINT_NONE = 0x00;
HINT_NO_POINTS = 0x01;
HINT_NO_SERIES = 0x02;
// HintSchemaAllTime performs schema queries without using time ranges
HINT_SCHEMA_ALL_TIME = 0x04;
}
fixed32 hints = 7;
// Deprecated field only used in TSM storage-related tests.
reserved "Hints";
}
message Aggregate {
enum AggregateType {
NONE = 0;
SUM = 1;
COUNT = 2;
MIN = 3;
MAX = 4;
FIRST = 5;
LAST = 6;
MEAN = 7;
AggregateTypeNone = 0;
AggregateTypeSum = 1;
AggregateTypeCount = 2;
AggregateTypeMin = 3;
AggregateTypeMax = 4;
AggregateTypeFirst = 5;
AggregateTypeLast = 6;
AggregateTypeMean = 7;
}
AggregateType type = 1;
@ -77,39 +109,39 @@ message Tag {
// Response message for ReadFilter and ReadGroup
message ReadResponse {
enum FrameType {
SERIES = 0;
POINTS = 1;
FrameTypeSeries = 0;
FrameTypePoints = 1;
}
enum DataType {
FLOAT = 0;
INTEGER = 1;
UNSIGNED = 2;
BOOLEAN = 3;
STRING = 4;
DataTypeFloat = 0;
DataTypeInteger = 1;
DataTypeUnsigned = 2;
DataTypeBoolean = 3;
DataTypeString = 4;
}
message Frame {
oneof data {
GroupFrame group = 7;
SeriesFrame series = 1;
FloatPointsFrame float_points = 2;
IntegerPointsFrame integer_points = 3;
UnsignedPointsFrame unsigned_points = 4;
BooleanPointsFrame boolean_points = 5;
StringPointsFrame string_points = 6;
FloatPointsFrame FloatPoints = 2;
IntegerPointsFrame IntegerPoints = 3;
UnsignedPointsFrame UnsignedPoints = 4;
BooleanPointsFrame BooleanPoints = 5;
StringPointsFrame StringPoints = 6;
}
}
message GroupFrame {
// TagKeys
repeated bytes tag_keys = 1;
repeated bytes TagKeys = 1;
// PartitionKeyVals is the values of the partition key for this group, order matching ReadGroupRequest.GroupKeys
repeated bytes partition_key_vals = 2;
repeated bytes PartitionKeyVals = 2;
}
message SeriesFrame {
repeated Tag tags = 1;
repeated Tag tags = 1; // [(gogoproto.nullable) = false];
DataType data_type = 2;
}
@ -138,7 +170,7 @@ message ReadResponse {
repeated string values = 2;
}
repeated Frame frames = 1;
repeated Frame frames = 1; // [(gogoproto.nullable) = false];
}
message Capability {
@ -165,32 +197,47 @@ message TimestampRange {
// TagKeysRequest is the request message for Storage.TagKeys.
message TagKeysRequest {
google.protobuf.Any tags_source = 1;
TimestampRange range = 2;
google.protobuf.Any TagsSource = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
}
// TagValuesRequest is the request message for Storage.TagValues.
message TagValuesRequest {
google.protobuf.Any tags_source = 1 ;
google.protobuf.Any TagsSource = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
// string tag_key = 4;
// AAL changed from string --> bytes to handle \xff literals in Rust which are not valid UTF-8
bytes tag_key = 4;
}
message ReadSeriesCardinalityRequest {
google.protobuf.Any ReadSeriesCardinalitySource = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
}
// Response message for Storage.TagKeys, Storage.TagValues Storage.MeasurementNames,
// Storage.MeasurementTagKeys and Storage.MeasurementTagValues.
message StringValuesResponse {
repeated bytes values = 1;
}
// Response message for Storage.TagValuesGroupedByMeasurementAndTagKey.
message TagValuesResponse {
string measurement = 1;
string key = 2;
repeated string values = 3;
}
// Response message for Storage.SeriesCardinality
message Int64ValuesResponse {
repeated int64 values = 1;
}
// MeasurementNamesRequest is the request message for Storage.MeasurementNames.
message MeasurementNamesRequest {
google.protobuf.Any source = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false]
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
}
@ -198,7 +245,7 @@ message MeasurementNamesRequest {
message MeasurementTagKeysRequest {
google.protobuf.Any source = 1;
string measurement = 2;
TimestampRange range = 3; // [(gogoproto.nullable) = false]
TimestampRange range = 3; // [(gogoproto.nullable) = false];
Predicate predicate = 4;
}
@ -222,12 +269,12 @@ message MeasurementFieldsRequest {
// MeasurementFieldsResponse is the response message for Storage.MeasurementFields.
message MeasurementFieldsResponse {
enum FieldType {
FLOAT = 0;
INTEGER = 1;
UNSIGNED = 2;
STRING = 3;
BOOLEAN = 4;
UNDEFINED = 5;
FieldTypeFloat = 0;
FieldTypeInteger = 1;
FieldTypeUnsigned = 2;
FieldTypeString = 3;
FieldTypeBoolean = 4;
FieldTypeUndefined = 5;
}
message MessageField {
@ -236,11 +283,11 @@ message MeasurementFieldsResponse {
sfixed64 timestamp = 3;
}
repeated MessageField fields = 1;// [(gogoproto.nullable) = false];
repeated MessageField fields = 1; // [(gogoproto.nullable) = false];
}
message ReadWindowAggregateRequest {
google.protobuf.Any read_source = 1;
google.protobuf.Any ReadSource = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
int64 WindowEvery = 4;
@ -249,6 +296,48 @@ message ReadWindowAggregateRequest {
Window window = 7;
}
message TagValuesGroupedByMeasurementAndTagKeyRequest {
google.protobuf.Any source = 1;
// MeasurementPatterns holds the patterns to match the measurements
// against (the "FROM" part of the SHOW TAG VALUES statement).
repeated LiteralOrRegex MeasurementPatterns = 2;
// TagKeyPredicate holds a predicate for the tags to find values on.
// (the "WITH KEY" part of the SHOW TAG VALUES statement.
// It's in one of the forms:
// OR(IDENT, OR(IDENT, ...))
// EQ(IDENT)
// NEQ(IDENT)
// EQREGEX(REGEX)
// NEQREGEX(REGEX)
TagKeyPredicate TagKeyPredicate = 3;
// Condition holds any additional condition to evaluate on the results.
Predicate Condition = 4;
}
message TagKeyPredicate {
oneof value {
string Eq = 1;
string Neq = 2;
string EqRegex = 3;
string NeqRegex = 4;
StringList In = 5;
}
}
message StringList {
repeated string Vals = 1;
}
message LiteralOrRegex {
oneof value {
string literal_value = 1;
string regex_value = 2;
}
}
message Window {
Duration every = 1;
Duration offset = 2;

View File

@ -1,25 +0,0 @@
// This file defines extensions to the InfluxDB storage gRPC common message types
// that have not yet made it into influxdb.
// It is, effectively, the delta between these two files:
// https://github.com/influxdata/influxdb/blob/master/storage/reads/datatypes/storage_common.proto
// https://github.com/influxdata/idpe/blob/master/storage/storageproto/storage_common.proto
syntax = "proto3";
package influxdata.platform.storage;
import "google/protobuf/any.proto";
import "influxdata/platform/storage/predicate.proto";
import "influxdata/platform/storage/storage_common.proto";
message ReadSeriesCardinalityRequest {
google.protobuf.Any read_series_cardinality_source = 1;
TimestampRange range = 2; // [(gogoproto.nullable) = false];
Predicate predicate = 3;
}
// Response message for Storage.SeriesCardinality
message Int64ValuesResponse {
repeated int64 values = 1;
}

View File

@ -64,6 +64,7 @@ impl From<Option<ChunkLifecycleAction>> for management::ChunkLifecycleAction {
match lifecycle_action {
Some(ChunkLifecycleAction::Persisting) => Self::Persisting,
Some(ChunkLifecycleAction::Compacting) => Self::Compacting,
Some(ChunkLifecycleAction::CompactingObjectStore) => Self::CompactingObjectStore,
Some(ChunkLifecycleAction::Dropping) => Self::Dropping,
None => Self::Unspecified,
}
@ -153,6 +154,9 @@ impl TryFrom<management::ChunkLifecycleAction> for Option<ChunkLifecycleAction>
management::ChunkLifecycleAction::Compacting => {
Ok(Some(ChunkLifecycleAction::Compacting))
}
management::ChunkLifecycleAction::CompactingObjectStore => {
Ok(Some(ChunkLifecycleAction::CompactingObjectStore))
}
management::ChunkLifecycleAction::Dropping => Ok(Some(ChunkLifecycleAction::Dropping)),
management::ChunkLifecycleAction::Unspecified => Ok(None),
}

View File

@ -27,6 +27,14 @@ impl From<Job> for management::operation_metadata::Job {
chunks: chunks.into_iter().map(|chunk_id| chunk_id.into()).collect(),
})
}
Job::CompactObjectStoreChunks { partition, chunks } => {
Self::CompactObjectStoreChunks(management::CompactObjectStoreChunks {
db_name: partition.db_name.to_string(),
partition_key: partition.partition_key.to_string(),
table_name: partition.table_name.to_string(),
chunks: chunks.into_iter().map(|chunk_id| chunk_id.into()).collect(),
})
}
Job::PersistChunks { partition, chunks } => {
Self::PersistChunks(management::PersistChunks {
db_name: partition.db_name.to_string(),

View File

@ -9,11 +9,11 @@
pub mod influxdata {
pub mod platform {
pub mod storage {
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
include!(concat!(
env!("OUT_DIR"),
"/influxdata.platform.storage.serde.rs"
"/influxdata.platform.storage.read.rs"
));
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
// Can't implement `Default` because `prost::Message` implements `Default`
impl TimestampRange {
@ -127,23 +127,6 @@ pub mod influxdata {
}
}
pub mod com {
pub mod github {
pub mod influxdata {
pub mod idpe {
pub mod storage {
pub mod read {
include!(concat!(
env!("OUT_DIR"),
"/com.github.influxdata.idpe.storage.read.rs"
));
}
}
}
}
}
}
// Needed because of https://github.com/hyperium/tonic/issues/471
pub mod grpc {
pub mod health {
@ -199,7 +182,6 @@ pub fn protobuf_type_url_eq(url: &str, protobuf_type: &str) -> bool {
}
// TODO: Remove these (#2419)
pub use com::github::influxdata::idpe::storage::read::*;
pub use influxdata::platform::storage::*;
pub mod google;

View File

@ -9,9 +9,10 @@ bytes = "1.0"
futures = { version = "0.3", default-features = false }
reqwest = { version = "0.11", features = ["stream", "json"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.70"
serde_json = "1.0.71"
snafu = "0.6.6"
url = "2.1.1"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
mockito = "0.30"

View File

@ -70,7 +70,7 @@ pprof = { version = "^0.5", default-features = false, features = ["flamegraph",
prost = "0.8"
rustyline = { version = "9.0", default-features = false }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.70"
serde_json = "1.0.71"
serde_urlencoded = "0.7.0"
snafu = "0.6.9"
structopt = "0.3.25"
@ -88,6 +88,7 @@ uuid = { version = "0.8", features = ["v4"] }
# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]

View File

@ -220,6 +220,7 @@ where
read_source: _read_source,
range,
predicate,
..
} = read_filter_request;
info!(%db_name, ?range, predicate=%predicate.loggable(),"read filter");
@ -251,15 +252,10 @@ where
group_keys,
group,
aggregate,
hints,
} = read_group_request;
info!(%db_name, ?range, ?group_keys, ?group, ?aggregate,predicate=%predicate.loggable(),"read_group");
if hints != 0 {
InternalHintsFieldNotSupported { hints }.fail()?
}
let aggregate_string = format!(
"aggregate: {:?}, group: {:?}, group_keys: {:?}",
aggregate, group, group_keys
@ -1772,6 +1768,7 @@ mod tests {
read_source: source.clone(),
range: Some(make_timestamp_range(0, 10000)),
predicate: Some(make_state_ma_predicate()),
..Default::default()
};
let frames = fixture.storage_client.read_filter(request).await.unwrap();
@ -1812,6 +1809,7 @@ mod tests {
read_source: source.clone(),
range: None,
predicate: None,
..Default::default()
};
// Note we don't set the response on the test database, so we expect an error
@ -1855,7 +1853,6 @@ mod tests {
aggregate: Some(Aggregate {
r#type: aggregate::AggregateType::Sum as i32,
}),
hints: 0,
};
let frames = fixture.storage_client.read_group(request).await.unwrap();
@ -1890,34 +1887,6 @@ mod tests {
let group = generated_types::read_group_request::Group::By as i32;
// ---
// test error hit in request processing
// ---
let request = ReadGroupRequest {
read_source: source.clone(),
range: None,
predicate: None,
group_keys: vec!["tag1".into()],
group,
aggregate: Some(Aggregate {
r#type: aggregate::AggregateType::Sum as i32,
}),
hints: 42,
};
let response_string = fixture
.storage_client
.read_group(request)
.await
.unwrap_err()
.to_string();
assert_contains!(
response_string,
"Unexpected hint value on read_group request. Expected 0, got 42"
);
grpc_request_metric_has_count(&fixture, "ReadGroup", "server_error", 1);
// ---
// test error returned in database processing
// ---
@ -1930,7 +1899,6 @@ mod tests {
aggregate: Some(Aggregate {
r#type: aggregate::AggregateType::Sum as i32,
}),
hints: 0,
};
// Note we don't set the response on the test database, so we expect an error

View File

@ -1349,7 +1349,7 @@ async fn test_get_server_status_db_error() {
// create valid owner info but malformed DB rules that will put DB in an error state
let my_db_uuid = Uuid::new_v4();
let mut path = server_fixture.dir().to_path_buf();
path.push("42");
path.push("dbs");
path.push(my_db_uuid.to_string());
std::fs::create_dir_all(path.clone()).unwrap();
let mut owner_info_path = path.clone();
@ -1360,11 +1360,13 @@ async fn test_get_server_status_db_error() {
// create the server config listing the ownership of this database
let mut path = server_fixture.dir().to_path_buf();
path.push("nodes");
path.push("42");
std::fs::create_dir_all(path.clone()).unwrap();
path.push("config.pb");
let data = ServerConfig {
databases: vec![(String::from("my_db"), format!("42/{}", my_db_uuid))]
databases: vec![(String::from("my_db"), format!("dbs/{}", my_db_uuid))]
.into_iter()
.collect(),
};

View File

@ -63,6 +63,7 @@ async fn read_filter_endpoint(storage_client: &mut StorageClient<Connection>, sc
read_source,
range,
predicate,
..Default::default()
});
let read_response = storage_client
.read_filter(read_filter_request)
@ -316,6 +317,7 @@ pub async fn regex_operator_test() {
end: 2001, // include all data
}),
predicate: Some(make_regex_match_predicate("host", "^b.+")),
..Default::default()
};
let expected_frames = vec![
@ -391,7 +393,6 @@ async fn test_read_group_none_agg() {
aggregate: Some(Aggregate {
r#type: AggregateType::None as i32,
}),
hints: 0,
};
let expected_group_frames = vec![
@ -442,7 +443,6 @@ async fn test_read_group_none_agg_with_predicate() {
aggregate: Some(Aggregate {
r#type: AggregateType::None as i32,
}),
hints: 0,
};
let expected_group_frames = vec![
@ -488,7 +488,6 @@ async fn test_read_group_sum_agg() {
aggregate: Some(Aggregate {
r#type: AggregateType::Sum as i32,
}),
hints: 0,
};
let expected_group_frames = vec![
@ -541,7 +540,6 @@ async fn test_read_group_count_agg() {
aggregate: Some(Aggregate {
r#type: AggregateType::Count as i32,
}),
hints: 0,
};
let expected_group_frames = vec![
@ -595,7 +593,6 @@ async fn test_read_group_last_agg() {
aggregate: Some(Aggregate {
r#type: AggregateType::Last as i32,
}),
hints: 0,
};
let expected_group_frames = vec![

View File

@ -85,6 +85,7 @@ pub async fn test_tracing_storage_api() {
read_source,
range,
predicate,
..Default::default()
});
let mut storage_client = StorageClient::new(server_fixture.grpc_channel());
let read_response = storage_client

View File

@ -27,7 +27,7 @@ mutable_batch_pb = { path = "../mutable_batch_pb", optional = true }
prost = "0.8"
rand = "0.8.3"
serde = "1.0.128"
serde_json = { version = "1.0.70", optional = true }
serde_json = { version = "1.0.71", optional = true }
thiserror = "1.0.30"
tonic = { version = "0.5.0" }
uuid = { version = "0.8", features = ["v4"] }

View File

@ -9,6 +9,7 @@ nom = "7"
smallvec = "1.7.0"
snafu = "0.6.2"
observability_deps = { path = "../observability_deps" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
test_helpers = { path = "../test_helpers" }

View File

@ -10,5 +10,6 @@ generated_types = { path = "../generated_types" }
prost = "0.8"
tonic = { version = "0.5.0" }
futures-util = { version = "0.3.1" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]

View File

@ -21,7 +21,6 @@ use std::collections::HashMap;
/// Re-export generated_types
pub mod generated_types {
pub use generated_types::com::github::influxdata::idpe::storage::read::*;
pub use generated_types::influxdata::platform::storage::*;
}
@ -97,8 +96,7 @@ impl Client {
.encode(&mut d)
.expect("encoded read source appropriately");
Any {
type_url: "type.googleapis.com/com.github.influxdata.idpe.storage.read.ReadSource"
.to_string(),
type_url: "type.googleapis.com/influxdata.platform.storage.read.ReadSource".to_string(),
value: d.freeze(),
}
}

View File

@ -9,6 +9,7 @@ integer-encoding = "3.0.2"
snafu = "0.6.2"
snap = "1.0.0"
observability_deps = { path = "../observability_deps" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
flate2 = "1.0"

View File

@ -10,6 +10,7 @@ readme = "README.md"
parking_lot = "0.11"
time = { path = "../time" }
tokio = { version = "1.13", features = ["sync"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
futures = "0.3"

View File

@ -10,7 +10,7 @@ chrono = "0.4.13"
chrono-english = "0.1.4"
clap = "2.33.1"
futures = "0.3.5"
handlebars = "4.1.4"
handlebars = "4.1.5"
humantime = "2.1.0"
data_types = { path = "../data_types" }
generated_types = { path = "../generated_types" }
@ -19,12 +19,12 @@ influxdb_iox_client = { path = "../influxdb_iox_client" }
itertools = "0.10.0"
rand = { version = "0.8.3", features = ["small_rng"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.70"
serde_json = "1.0.71"
snafu = "0.6.8"
tokio = { version = "1.13", features = ["macros", "rt-multi-thread"] }
toml = "0.5.6"
tracing = "0.1"
tracing-subscriber = "0.3.1"
tracing-subscriber = "0.3.2"
uuid = { version = "0.8.1", default_features = false }
[dev-dependencies]

View File

@ -14,6 +14,7 @@ snafu = "0.6"
tokio = { version = "1.13", features = ["macros", "time"] }
tokio-stream = "0.1"
uuid = { version = "0.8", features = ["serde", "v4"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
test_helpers = { path = "../test_helpers" }

View File

@ -59,9 +59,23 @@ pub struct IoxObjectStore {
impl IoxObjectStore {
/// Get the data for the server config to determine the names and locations of the databases
/// that this server owns.
///
/// TEMPORARY: Server config used to be at the top level instead of beneath `/nodes/`. Until
/// all deployments have transitioned, check both locations before reporting that the server
/// config is not found.
pub async fn get_server_config_file(inner: &ObjectStore, server_id: ServerId) -> Result<Bytes> {
let path = paths::server_config_path(inner, server_id);
let mut stream = inner.get(&path).await?;
let mut stream = match inner.get(&path).await {
Err(object_store::Error::NotFound { .. }) => {
use object_store::path::ObjectStorePath;
let mut legacy_path = inner.new_path();
legacy_path.push_dir(server_id.to_string());
legacy_path.set_file_name(paths::SERVER_CONFIG_FILE_NAME);
inner.get(&legacy_path).await
}
other => other,
}?;
let mut bytes = BytesMut::new();
while let Some(buf) = stream.next().await {

View File

@ -15,13 +15,15 @@ pub mod transaction_file;
use transaction_file::TransactionFilePath;
pub(crate) const ALL_DATABASES_DIRECTORY: &str = "dbs";
const SERVER_CONFIG_FILE_NAME: &str = "config.pb";
const ALL_SERVERS_DIRECTORY: &str = "nodes";
pub(crate) const SERVER_CONFIG_FILE_NAME: &str = "config.pb";
const DATABASE_OWNER_FILE_NAME: &str = "owner.pb";
/// The path to the server file containing the list of databases this server owns.
// TODO: this is in the process of replacing all_databases_path for the floating databases design
pub(crate) fn server_config_path(object_store: &ObjectStore, server_id: ServerId) -> Path {
let mut path = object_store.new_path();
path.push_dir(ALL_SERVERS_DIRECTORY);
path.push_dir(server_id.to_string());
path.set_file_name(SERVER_CONFIG_FILE_NAME);
path

View File

@ -15,6 +15,7 @@ parking_lot = "0.11"
time = { path = "../time" }
tokio = { version = "1.13", features = ["macros", "time"] }
tracker = { path = "../tracker" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
tokio = { version = "1.13", features = ["macros", "time", "rt"] }

View File

@ -79,6 +79,12 @@ pub trait LockablePartition: Sized + std::fmt::Display {
chunks: Vec<LifecycleWriteGuard<'_, <Self::Chunk as LockableChunk>::Chunk, Self::Chunk>>,
) -> Result<TaskTracker<<Self::Chunk as LockableChunk>::Job>, Self::Error>;
/// Compact object store chunks into a single object store chunk
fn compact_object_store_chunks(
partition: LifecycleWriteGuard<'_, Self::Partition, Self>,
chunks: Vec<LifecycleWriteGuard<'_, <Self::Chunk as LockableChunk>::Chunk, Self::Chunk>>,
) -> Result<TaskTracker<<Self::Chunk as LockableChunk>::Job>, Self::Error>;
/// Returns a PersistHandle for the provided partition, and the
/// timestamp up to which to to flush
///

View File

@ -908,6 +908,13 @@ mod tests {
Ok(db.registry.lock().complete(()))
}
fn compact_object_store_chunks(
_partition: LifecycleWriteGuard<'_, TestPartition, Self>,
_chunks: Vec<LifecycleWriteGuard<'_, TestChunk, Self::Chunk>>,
) -> Result<TaskTracker<()>, Self::Error> {
unimplemented!("The test does not need compact os chunks");
}
fn prepare_persist(
partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>,
_force: bool,

View File

@ -8,6 +8,7 @@ edition = "2021"
[dependencies] # In alphabetical order
observability_deps = { path = "../observability_deps" }
tracing-subscriber = "0.3"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
once_cell = { version = "1.4.0", features = ["parking_lot"] }

View File

@ -7,5 +7,6 @@ edition = "2021"
[dependencies] # In alphabetical order
parking_lot = "0.11"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order

View File

@ -9,6 +9,7 @@ edition = "2021"
observability_deps = { path = "../observability_deps" }
metric = { path = "../metric" }
prometheus = { version = "0.13", default-features = false }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
test_helpers = { path = "../test_helpers" }

View File

@ -13,6 +13,7 @@ schema = { path = "../schema" }
snafu = "0.6"
hashbrown = "0.11"
itertools = "0.10"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
rand = "0.8"

View File

@ -10,6 +10,7 @@ influxdb_line_protocol = { path = "../influxdb_line_protocol" }
mutable_batch = { path = "../mutable_batch" }
schema = { path = "../schema" }
snafu = "0.6"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
arrow_util = { path = "../arrow_util" }

View File

@ -12,6 +12,7 @@ hashbrown = "0.11"
mutable_batch = { path = "../mutable_batch" }
schema = { path = "../schema" }
snafu = "0.6"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
mutable_batch_lp = { path = "../mutable_batch_lp" }

View File

@ -14,6 +14,7 @@ mutable_batch_lp = { path = "../mutable_batch_lp" }
observability_deps = { path = "../observability_deps" }
parking_lot = "0.11.2"
snafu = "0.6.2"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
tokio = { version = "1.13", features = ["macros"] }

View File

@ -31,6 +31,7 @@ reqwest = { version = "0.11", optional = true }
# Filesystem integration
walkdir = "2"
tempfile = "3.1.0"
workspace-hack = { path = "../workspace-hack"}
[features]
azure = ["azure_core", "azure_storage", "indexmap", "reqwest"]

View File

@ -7,3 +7,4 @@ description = "Observability ecosystem dependencies for InfluxDB IOx, to ensure
[dependencies] # In alphabetical order
tracing = { version = "0.1", features = ["max_level_trace", "release_max_level_debug"] }
workspace-hack = { path = "../workspace-hack"}

View File

@ -11,6 +11,7 @@ schema = { path = "../schema" }
snafu = "0.6.2"
observability_deps = { path = "../observability_deps" }
parquet = "6.0"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
criterion = "0.3.3"

View File

@ -6,3 +6,4 @@ edition = "2021"
[dependencies] # In alphabetical order
observability_deps = { path = "../observability_deps" }
workspace-hack = { path = "../workspace-hack"}

View File

@ -33,3 +33,4 @@ tokio = { version = "1.13", features = ["macros", "rt", "rt-multi-thread", "sync
tokio-stream = "0.1"
uuid = { version = "0.8", features = ["serde", "v4"] }
zstd = "0.9"
workspace-hack = { path = "../workspace-hack"}

View File

@ -155,7 +155,7 @@ impl CatalogState for TracerCatalogState {
mod tests {
use super::*;
use crate::test_helpers::{make_config, new_empty};
use parquet_file::test_utils::{chunk_addr, make_metadata, TestSize};
use parquet_file::test_utils::generator::ChunkGenerator;
use std::{collections::HashSet, sync::Arc};
use tokio::sync::RwLock;
@ -176,6 +176,7 @@ mod tests {
async fn test_cleanup_rules() {
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
let catalog = new_empty(config.clone()).await;
@ -186,36 +187,20 @@ mod tests {
let mut transaction = catalog.open_transaction().await;
// an ordinary tracked parquet file => keep
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
let metadata = Arc::new(metadata);
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata,
};
transaction.add_parquet(&info);
paths_keep.push(info.path);
let (chunk, _) = generator.generate().await;
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
paths_keep.push(chunk.path().clone());
// another ordinary tracked parquet file that was added and removed => keep (for time
// travel)
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(2), TestSize::Full).await;
let metadata = Arc::new(metadata);
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata,
};
transaction.add_parquet(&info);
transaction.remove_parquet(&info.path);
paths_keep.push(info.path);
let (chunk, _) = generator.generate().await;
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
transaction.remove_parquet(chunk.path());
paths_keep.push(chunk.path().clone());
// an untracked parquet file => delete
let (path, _md) =
make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
paths_delete.push(path);
let (chunk, _) = generator.generate().await;
paths_delete.push(chunk.path().clone());
transaction.commit().await.unwrap();
}
@ -224,6 +209,7 @@ mod tests {
let files = get_unreferenced_parquet_files(&catalog, 1_000)
.await
.unwrap();
delete_files(&catalog, &files).await.unwrap();
// deleting a second time should just work
@ -243,39 +229,33 @@ mod tests {
async fn test_cleanup_with_parallel_transaction() {
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
let lock: RwLock<()> = Default::default();
let catalog = new_empty(config.clone()).await;
// try multiple times to provoke a conflict
for i in 0..100 {
for i in 1..100 {
// Every so often try to create a file with the same ChunkAddr beforehand. This should
// not trick the cleanup logic to remove the actual file because file paths contains a
// UUIDv4 part.
if i % 2 == 0 {
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
generator.generate_id(i).await;
}
let (path, _) = tokio::join!(
let (chunk, _) = tokio::join!(
async {
let guard = lock.read().await;
let (path, md) =
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
let metadata = Arc::new(md);
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata,
};
let (chunk, _) = generator.generate_id(i).await;
let mut transaction = catalog.open_transaction().await;
transaction.add_parquet(&info);
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
transaction.commit().await.unwrap();
drop(guard);
info.path
chunk
},
async {
let guard = lock.write().await;
@ -289,7 +269,7 @@ mod tests {
);
let all_files = list_all_files(iox_object_store).await;
assert!(dbg!(all_files).contains(dbg!(&path)));
assert!(dbg!(all_files).contains(dbg!(chunk.path())));
}
}
@ -297,20 +277,15 @@ mod tests {
async fn test_cleanup_max_files() {
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
let catalog = new_empty(config.clone()).await;
// create some files
let mut to_remove = HashSet::default();
for chunk_id in 0..3 {
let (path, _md) = make_metadata(
iox_object_store,
"foo",
chunk_addr(chunk_id),
TestSize::Full,
)
.await;
to_remove.insert(path);
for _ in 0..3 {
let (chunk, _) = generator.generate().await;
to_remove.insert(chunk.path().clone());
}
// run clean-up

View File

@ -1064,7 +1064,10 @@ mod tests {
use std::vec;
use bytes::Bytes;
use parquet_file::test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize};
use data_types::chunk_metadata::ChunkAddr;
use parquet_file::chunk::ParquetChunk;
use parquet_file::test_utils::generator::ChunkGenerator;
use parquet_file::test_utils::make_iox_object_store;
use super::*;
use crate::test_helpers::{
@ -1642,6 +1645,7 @@ mod tests {
async fn test_checkpoint() {
let config = make_config().await;
let mut trace = assert_single_catalog_inmem_works(config.clone()).await;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(&config.iox_object_store));
// re-open catalog
let (catalog, mut state) = load_ok(config.clone()).await.unwrap();
@ -1659,21 +1663,10 @@ mod tests {
// create another transaction on-top that adds a file (this transaction will be required to load the full state)
{
let addr = chunk_addr(1337);
let (path, metadata) = make_metadata(
&config.iox_object_store,
"foo",
addr.clone(),
TestSize::Full,
)
.await;
let (chunk, _) = generator.generate_id(1337).await;
let mut transaction = catalog.open_transaction().await;
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
let info = CatalogParquetInfo::from_chunk(&chunk);
state.insert(info.clone()).unwrap();
transaction.add_parquet(&info);
let ckpt_handle = transaction.commit().await.unwrap();
@ -1713,6 +1706,7 @@ mod tests {
async fn test_delete_predicates() {
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
let catalog = new_empty(config.clone()).await;
let mut state = TestCatalogState::default();
@ -1722,16 +1716,11 @@ mod tests {
// create 3 chunks
let mut chunk_addrs = vec![];
for id in 0..3 {
let chunk_addr = chunk_addr(id);
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr.clone(), TestSize::Full)
.await;
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
for _ in 0..3 {
let (chunk, metadata) = generator.generate().await;
let chunk_addr = ChunkAddr::new(generator.partition(), metadata.chunk_id);
let info = CatalogParquetInfo::from_chunk(&chunk);
state.insert(info.clone()).unwrap();
t.add_parquet(&info);
@ -1819,6 +1808,29 @@ mod tests {
}
}
/// Assert that set of parquet files tracked by a catalog are identical to the given sorted list.
fn assert_catalog_chunks(state: &TestCatalogState, expected: &[ParquetChunk]) {
let actual = get_catalog_parquet_files(state);
let mut expected: Vec<_> = expected.iter().collect();
expected.sort_by(|a, b| a.path().cmp(b.path()));
for ((actual_path, actual_md), chunk) in actual.iter().zip(expected.iter()) {
assert_eq!(actual_path, chunk.path());
let actual_md = actual_md.decode().unwrap();
let actual_schema = actual_md.read_schema().unwrap();
let expected_schema = chunk.schema();
assert_eq!(actual_schema, expected_schema);
// NOTE: the actual table name is not important here as long as it is the same for both calls, since it is
// only used to generate out statistics struct (not to read / dispatch anything).
let actual_stats = actual_md.read_statistics(&actual_schema).unwrap();
let expected_stats = &chunk.table_summary().columns;
assert_eq!(&actual_stats, expected_stats);
}
}
async fn checked_delete(iox_object_store: &IoxObjectStore, path: &TransactionFilePath) {
// issue full GET operation to check if object is preset
iox_object_store
@ -1872,6 +1884,7 @@ mod tests {
async fn assert_single_catalog_inmem_works(config: PreservedCatalogConfig) -> TestTrace {
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
let catalog = new_empty(config.clone()).await;
let mut state = TestCatalogState::default();
@ -1889,102 +1902,56 @@ mod tests {
{
let mut t = catalog.open_transaction().await;
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Full).await;
expected.push((path.clone(), metadata.clone()));
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
state.insert(info.clone()).unwrap();
t.add_parquet(&info);
let (path, metadata) =
make_metadata(iox_object_store, "bar", chunk_addr(1), TestSize::Full).await;
expected.push((path.clone(), metadata.clone()));
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
state.insert(info.clone()).unwrap();
t.add_parquet(&info);
let (path, metadata) =
make_metadata(iox_object_store, "bar", chunk_addr(2), TestSize::Full).await;
expected.push((path.clone(), metadata.clone()));
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
state.insert(info.clone()).unwrap();
t.add_parquet(&info);
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
expected.push((path.clone(), metadata.clone()));
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
state.insert(info.clone()).unwrap();
t.add_parquet(&info);
for _ in 0..4 {
let (chunk, _) = generator.generate().await;
let info = CatalogParquetInfo::from_chunk(&chunk);
expected.push(chunk);
state.insert(info.clone()).unwrap();
t.add_parquet(&info);
}
t.commit().await.unwrap();
}
assert_eq!(catalog.revision_counter(), 1);
assert_catalog_parquet_files(&state, &expected);
assert_catalog_chunks(&state, &expected);
trace.record(&catalog, &state, false);
// modify catalog with examples
{
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(4), TestSize::Full).await;
expected.push((path.clone(), metadata.clone()));
let (chunk, _) = generator.generate().await;
let info = CatalogParquetInfo::from_chunk(&chunk);
expected.push(chunk);
let mut t = catalog.open_transaction().await;
// "real" modifications
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
state.insert(info.clone()).unwrap();
t.add_parquet(&info);
let (path, _) = expected.remove(0);
state.remove(&path).unwrap();
t.remove_parquet(&path);
let chunk = expected.remove(0);
state.remove(chunk.path()).unwrap();
t.remove_parquet(chunk.path());
t.commit().await.unwrap();
}
assert_eq!(catalog.revision_counter(), 2);
assert_catalog_parquet_files(&state, &expected);
assert_catalog_chunks(&state, &expected);
trace.record(&catalog, &state, false);
// uncommitted modifications have no effect
{
let mut t = catalog.open_transaction().await;
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
let (chunk, _) = generator.generate().await;
let info = CatalogParquetInfo::from_chunk(&chunk);
t.add_parquet(&info);
t.remove_parquet(&expected[0].0);
t.remove_parquet(expected[0].path());
// NO commit here!
}
assert_eq!(catalog.revision_counter(), 2);
assert_catalog_parquet_files(&state, &expected);
assert_catalog_chunks(&state, &expected);
trace.record(&catalog, &state, true);
trace

View File

@ -222,7 +222,7 @@ impl Debug for Metadata {
mod tests {
use super::*;
use crate::{core::PreservedCatalog, interface::CatalogParquetInfo, test_helpers::make_config};
use parquet_file::test_utils::{chunk_addr, make_metadata, TestSize};
use parquet_file::test_utils::generator::{ChunkGenerator, GeneratorConfig};
use time::Time;
use uuid::Uuid;
@ -235,21 +235,15 @@ mod tests {
.with_time_provider(time_provider);
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
generator.set_config(GeneratorConfig::Simple);
// build catalog with some data
let catalog = PreservedCatalog::new_empty(config.clone()).await.unwrap();
{
let (chunk, _) = generator.generate().await;
let mut transaction = catalog.open_transaction().await;
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
transaction.add_parquet(&info);
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
transaction.commit().await.unwrap();
}
@ -304,11 +298,11 @@ File {
"table1",
"part1",
],
file_name: "00000000-0000-0000-0000-000000000000.parquet",
file_name: "00000000-0000-0000-0000-000000000001.parquet",
},
),
file_size_bytes: 33,
metadata: b"metadata omitted (937 bytes)",
file_size_bytes: 3052,
metadata: b"metadata omitted (935 bytes)",
},
),
),
@ -352,21 +346,15 @@ File {
.with_fixed_uuid(Uuid::nil())
.with_time_provider(time_provider);
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
generator.set_config(GeneratorConfig::Simple);
// build catalog with some data
let catalog = PreservedCatalog::new_empty(config.clone()).await.unwrap();
{
let (chunk, _) = generator.generate().await;
let mut transaction = catalog.open_transaction().await;
let (path, metadata) =
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
let info = CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
};
transaction.add_parquet(&info);
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
transaction.commit().await.unwrap();
}
@ -426,11 +414,11 @@ File {
"table1",
"part1",
],
file_name: "00000000-0000-0000-0000-000000000000.parquet",
file_name: "00000000-0000-0000-0000-000000000001.parquet",
},
),
file_size_bytes: 33,
metadata: b"metadata omitted (937 bytes)",
file_size_bytes: 3052,
metadata: b"metadata omitted (935 bytes)",
},
),
),
@ -460,7 +448,7 @@ File {
table_name: "table1",
partition_key: "part1",
chunk_id: ChunkId(
0,
1,
),
partition_checkpoint: PartitionCheckpoint {
table_name: "table1",
@ -500,7 +488,7 @@ File {
},
},
chunk_order: ChunkOrder(
5,
1,
),
},
),

View File

@ -7,6 +7,7 @@ use std::{
use data_types::chunk_metadata::{ChunkAddr, ChunkId};
use data_types::delete_predicate::DeletePredicate;
use iox_object_store::{IoxObjectStore, ParquetFilePath};
use parquet_file::chunk::ParquetChunk;
use snafu::Snafu;
use parquet_file::metadata::IoxParquetMetaData;
@ -24,6 +25,17 @@ pub struct CatalogParquetInfo {
pub metadata: Arc<IoxParquetMetaData>,
}
impl CatalogParquetInfo {
/// Creates a [`CatalogParquetInfo`] from a [`ParquetChunk`]
pub fn from_chunk(chunk: &ParquetChunk) -> Self {
Self {
path: chunk.path().clone(),
file_size_bytes: chunk.file_size_bytes(),
metadata: chunk.parquet_metadata(),
}
}
}
/// Same as [ChunkAddr] but w/o the database part.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ChunkAddrWithoutDatabase {

View File

@ -10,11 +10,14 @@ use crate::{
},
};
use data_types::delete_predicate::{DeleteExpr, DeletePredicate, Op, Scalar};
use data_types::{chunk_metadata::ChunkId, timestamp::TimestampRange};
use data_types::{
chunk_metadata::{ChunkAddr, ChunkId},
timestamp::TimestampRange,
};
use iox_object_store::{IoxObjectStore, ParquetFilePath, TransactionFilePath};
use parquet_file::{
metadata::IoxParquetMetaData,
test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize},
chunk::ParquetChunk,
test_utils::{generator::ChunkGenerator, make_iox_object_store},
};
use snafu::ResultExt;
use std::{
@ -259,158 +262,107 @@ where
F: Fn(&S) -> CheckpointData + Send,
{
let config = make_config().await;
let iox_object_store = &config.iox_object_store;
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
// The expected state of the catalog
let mut expected_files: HashMap<ChunkId, (ParquetFilePath, Arc<IoxParquetMetaData>)> =
HashMap::new();
let mut expected_chunks: HashMap<u32, ParquetChunk> = HashMap::new();
let mut expected_predicates: HashMap<Arc<DeletePredicate>, HashSet<ChunkAddrWithoutDatabase>> =
HashMap::new();
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// add files
{
for chunk_id in 0..5 {
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr(chunk_id),
TestSize::Full,
)
.await;
for chunk_id in 1..5 {
let (chunk, _) = generator.generate_id(chunk_id).await;
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::new(metadata.clone()),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
expected_files.insert(ChunkId::new_test(chunk_id), (path, Arc::new(metadata)));
expected_chunks.insert(chunk_id, chunk);
}
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// remove files
{
let (path, _) = expected_files.remove(&ChunkId::new_test(1)).unwrap();
state.remove(&path).unwrap();
let chunk = expected_chunks.remove(&1).unwrap();
state.remove(chunk.path()).unwrap();
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// add and remove in the same transaction
{
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr(5),
TestSize::Full,
)
.await;
let (chunk, _) = generator.generate_id(5).await;
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::new(metadata),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
state.remove(&path).unwrap();
state.remove(chunk.path()).unwrap();
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// remove and add in the same transaction
{
let (path, metadata) = expected_files.get(&ChunkId::new_test(3)).unwrap();
state.remove(path).unwrap();
let chunk = expected_chunks.get(&3).unwrap();
state.remove(chunk.path()).unwrap();
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::clone(metadata),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(chunk),
)
.unwrap();
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// add, remove, add in the same transaction
{
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr(6),
TestSize::Full,
)
.await;
let (chunk, _) = generator.generate_id(6).await;
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::new(metadata.clone()),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
state.remove(&path).unwrap();
state.remove(chunk.path()).unwrap();
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::new(metadata.clone()),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
expected_files.insert(ChunkId::new_test(6), (path, Arc::new(metadata)));
expected_chunks.insert(6, chunk);
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// remove, add, remove in same transaction
{
let (path, metadata) = expected_files.remove(&ChunkId::new_test(4)).unwrap();
state.remove(&path).unwrap();
let chunk = expected_chunks.remove(&4).unwrap();
state.remove(chunk.path()).unwrap();
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::clone(&metadata),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
state.remove(&path).unwrap();
state.remove(chunk.path()).unwrap();
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// error handling, no real opt
{
// TODO: Error handling should disambiguate between chunk collision and filename collision
// chunk with same ID already exists (should also not change the metadata)
let (path, metadata) = make_metadata(
&config.iox_object_store,
"fail",
chunk_addr(0),
TestSize::Full,
)
.await;
let (chunk, _) = generator.generate_id(2).await;
let err = state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::new(metadata),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap_err();
assert!(matches!(
@ -418,21 +370,16 @@ where
CatalogStateAddError::ParquetFileAlreadyExists { .. }
));
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// error handling, still something works
{
// already exists (should also not change the metadata)
let (_, metadata) = expected_files.get(&ChunkId::new_test(0)).unwrap();
let (chunk, _) = generator.generate_id(2).await;
let err = state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
// Intentionally "incorrect" path
path: ParquetFilePath::new(&chunk_addr(10)),
file_size_bytes: 33,
metadata: Arc::clone(metadata),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap_err();
assert!(matches!(
@ -441,97 +388,57 @@ where
));
// this transaction will still work
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr(7),
TestSize::Full,
)
.await;
let metadata = Arc::new(metadata);
let (chunk, _) = generator.generate_id(7).await;
let info = CatalogParquetInfo::from_chunk(&chunk);
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::clone(&metadata),
},
)
.add(Arc::clone(iox_object_store), info.clone())
.unwrap();
expected_files.insert(ChunkId::new_test(7), (path.clone(), Arc::clone(&metadata)));
expected_chunks.insert(7, chunk);
// recently added
let err = state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path,
file_size_bytes: 33,
metadata: Arc::clone(&metadata),
},
)
.unwrap_err();
let err = state.add(Arc::clone(iox_object_store), info).unwrap_err();
assert!(matches!(
err,
CatalogStateAddError::ParquetFileAlreadyExists { .. }
));
// this still works
let (path, _) = expected_files.remove(&ChunkId::new_test(7)).unwrap();
state.remove(&path).unwrap();
let chunk = expected_chunks.remove(&7).unwrap();
state.remove(chunk.path()).unwrap();
// recently removed
let err = state.remove(&path).unwrap_err();
let err = state.remove(chunk.path()).unwrap_err();
assert!(matches!(
err,
CatalogStateRemoveError::ParquetFileDoesNotExist { .. }
));
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// add predicates
{
// create two chunks that we can use for delete predicate
let chunk_addr_1 = chunk_addr(8);
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr_1.clone(),
TestSize::Full,
)
.await;
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::new(metadata.clone()),
},
)
.unwrap();
expected_files.insert(chunk_addr_1.chunk_id, (path, Arc::new(metadata)));
let (chunk, metadata) = generator.generate_id(8).await;
let chunk_addr_1 = ChunkAddr::new(generator.partition(), metadata.chunk_id);
let chunk_addr_2 = chunk_addr(9);
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr_2.clone(),
TestSize::Full,
)
.await;
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::new(metadata.clone()),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
expected_files.insert(chunk_addr_2.chunk_id, (path, Arc::new(metadata)));
expected_chunks.insert(8, chunk);
let (chunk, metadata) = generator.generate_id(9).await;
let chunk_addr_2 = ChunkAddr::new(generator.partition(), metadata.chunk_id);
state
.add(
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
expected_chunks.insert(9, chunk);
// first predicate used only a single chunk
let predicate_1 = create_delete_predicate(1);
@ -546,32 +453,21 @@ where
expected_predicates.insert(predicate_2, chunks_2.into_iter().collect());
// chunks created afterwards are unaffected
let chunk_addr_3 = chunk_addr(10);
let (path, metadata) = make_metadata(
&config.iox_object_store,
"ok",
chunk_addr_3.clone(),
TestSize::Full,
)
.await;
let (chunk, _) = generator.generate_id(10).await;
state
.add(
Arc::clone(&config.iox_object_store),
CatalogParquetInfo {
path: path.clone(),
file_size_bytes: 33,
metadata: Arc::new(metadata.clone()),
},
Arc::clone(iox_object_store),
CatalogParquetInfo::from_chunk(&chunk),
)
.unwrap();
expected_files.insert(chunk_addr_3.chunk_id, (path, Arc::new(metadata)));
expected_chunks.insert(10, chunk);
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// removing a chunk will also remove its predicates
{
let (path, _) = expected_files.remove(&ChunkId::new_test(8)).unwrap();
state.remove(&path).unwrap();
let chunk = expected_chunks.remove(&8).unwrap();
state.remove(chunk.path()).unwrap();
expected_predicates = expected_predicates
.into_iter()
.filter_map(|(predicate, chunks)| {
@ -583,7 +479,7 @@ where
})
.collect();
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
// Registering predicates for unknown chunks is just ignored because chunks might been in "persisting" intermediate
// state while the predicate was reported.
@ -596,30 +492,30 @@ where
}];
state.delete_predicate(Arc::clone(&predicate), chunks);
}
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
}
/// Assert that tracked files and their linked metadata are equal.
fn assert_checkpoint<S, F>(
state: &S,
f: &F,
expected_files: &HashMap<ChunkId, (ParquetFilePath, Arc<IoxParquetMetaData>)>,
expected_chunks: &HashMap<u32, ParquetChunk>,
expected_predicates: &HashMap<Arc<DeletePredicate>, HashSet<ChunkAddrWithoutDatabase>>,
) where
F: Fn(&S) -> CheckpointData,
{
let data = f(state);
let data: CheckpointData = f(state);
let actual_files = data.files;
let sorted_keys_actual = get_sorted_keys(actual_files.keys());
let sorted_keys_expected = get_sorted_keys(expected_files.values().map(|(path, _)| path));
let sorted_keys_expected = get_sorted_keys(expected_chunks.values().map(|chunk| chunk.path()));
assert_eq!(sorted_keys_actual, sorted_keys_expected);
for (path, md_expected) in expected_files.values() {
let md_actual = &actual_files[path].metadata;
for chunk in expected_chunks.values() {
let md_actual = &actual_files[chunk.path()].metadata;
let md_actual = md_actual.decode().unwrap();
let md_expected = md_expected.decode().unwrap();
let md_expected = chunk.parquet_metadata().decode().unwrap();
let iox_md_actual = md_actual.read_iox_metadata().unwrap();
let iox_md_expected = md_expected.read_iox_metadata().unwrap();

View File

@ -33,6 +33,7 @@ tokio = { version = "1.13", features = ["macros", "rt", "rt-multi-thread", "sync
tokio-stream = "0.1"
uuid = { version = "0.8", features = ["serde", "v4"] }
zstd = "0.9"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
arrow_util = { path = "../arrow_util" }

View File

@ -870,26 +870,15 @@ mod tests {
use schema::TIME_COLUMN_NAME;
use crate::test_utils::{
chunk_addr, create_partition_and_database_checkpoint, load_parquet_from_store, make_chunk,
make_chunk_no_row_group, make_iox_object_store, TestSize,
};
use crate::test_utils::create_partition_and_database_checkpoint;
use crate::test_utils::generator::{ChunkGenerator, GeneratorConfig};
#[tokio::test]
async fn test_restore_from_file() {
// setup: preserve chunk to object store
let iox_object_store = make_iox_object_store().await;
let chunk = make_chunk(
Arc::clone(&iox_object_store),
"foo",
chunk_addr(1),
TestSize::Full,
)
.await;
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
let mut generator = ChunkGenerator::new().await;
let (chunk, _) = generator.generate().await;
let parquet_metadata = chunk.parquet_metadata();
let decoded = parquet_metadata.decode().unwrap();
// step 1: read back schema
@ -911,18 +900,9 @@ mod tests {
#[tokio::test]
async fn test_restore_from_thrift() {
// setup: write chunk to object store and only keep thrift-encoded metadata
let iox_object_store = make_iox_object_store().await;
let chunk = make_chunk(
Arc::clone(&iox_object_store),
"foo",
chunk_addr(1),
TestSize::Full,
)
.await;
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
let mut generator = ChunkGenerator::new().await;
let (chunk, _) = generator.generate().await;
let parquet_metadata = chunk.parquet_metadata();
let data = parquet_metadata.thrift_bytes().to_vec();
let parquet_metadata = IoxParquetMetaData::from_thrift_bytes(data);
let decoded = parquet_metadata.decode().unwrap();
@ -941,18 +921,10 @@ mod tests {
#[tokio::test]
async fn test_restore_from_file_no_row_group() {
// setup: preserve chunk to object store
let iox_object_store = make_iox_object_store().await;
let chunk = make_chunk_no_row_group(
Arc::clone(&iox_object_store),
"foo",
chunk_addr(1),
TestSize::Full,
)
.await;
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
let mut generator = ChunkGenerator::new().await;
generator.set_config(GeneratorConfig::NoData);
let (chunk, _) = generator.generate().await;
let parquet_metadata = chunk.parquet_metadata();
let decoded = parquet_metadata.decode().unwrap();
// step 1: read back schema
@ -971,18 +943,11 @@ mod tests {
#[tokio::test]
async fn test_restore_from_thrift_no_row_group() {
// setup: write chunk to object store and only keep thrift-encoded metadata
let iox_object_store = make_iox_object_store().await;
let chunk = make_chunk_no_row_group(
Arc::clone(&iox_object_store),
"foo",
chunk_addr(1),
TestSize::Full,
)
.await;
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
let mut generator = ChunkGenerator::new().await;
generator.set_config(GeneratorConfig::NoData);
let (chunk, _) = generator.generate().await;
let parquet_metadata = chunk.parquet_metadata();
let data = parquet_metadata.thrift_bytes().to_vec();
let parquet_metadata = IoxParquetMetaData::from_thrift_bytes(data);
let decoded = parquet_metadata.decode().unwrap();
@ -1002,18 +967,9 @@ mod tests {
#[tokio::test]
async fn test_make_chunk() {
let iox_object_store = make_iox_object_store().await;
let chunk = make_chunk(
Arc::clone(&iox_object_store),
"foo",
chunk_addr(1),
TestSize::Full,
)
.await;
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
let mut generator = ChunkGenerator::new().await;
let (chunk, _) = generator.generate().await;
let parquet_metadata = chunk.parquet_metadata();
let decoded = parquet_metadata.decode().unwrap();
assert!(decoded.md.num_row_groups() > 1);
@ -1040,18 +996,10 @@ mod tests {
#[tokio::test]
async fn test_make_chunk_no_row_group() {
let iox_object_store = make_iox_object_store().await;
let chunk = make_chunk_no_row_group(
Arc::clone(&iox_object_store),
"foo",
chunk_addr(1),
TestSize::Full,
)
.await;
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
let mut generator = ChunkGenerator::new().await;
generator.set_config(GeneratorConfig::NoData);
let (chunk, _) = generator.generate().await;
let parquet_metadata = chunk.parquet_metadata();
let decoded = parquet_metadata.decode().unwrap();
assert_eq!(decoded.md.num_row_groups(), 0);
@ -1113,18 +1061,9 @@ mod tests {
#[tokio::test]
async fn test_parquet_metadata_size() {
// setup: preserve chunk to object store
let iox_object_store = make_iox_object_store().await;
let chunk = make_chunk(
Arc::clone(&iox_object_store),
"foo",
chunk_addr(1),
TestSize::Full,
)
.await;
let parquet_data = load_parquet_from_store(&chunk, iox_object_store)
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data).unwrap();
assert_eq!(parquet_metadata.size(), 3730);
let mut generator = ChunkGenerator::new().await;
let (chunk, _) = generator.generate().await;
let parquet_metadata = chunk.parquet_metadata();
assert_eq!(parquet_metadata.size(), 3729);
}
}

View File

@ -429,17 +429,14 @@ impl TryClone for MemWriter {
#[cfg(test)]
mod tests {
use super::*;
use crate::test_utils::generator::ChunkGenerator;
use crate::test_utils::{
chunk_addr, create_partition_and_database_checkpoint, load_parquet_from_store,
make_chunk_given_record_batch, make_iox_object_store, make_record_batch,
read_data_from_parquet_data, TestSize,
create_partition_and_database_checkpoint, load_parquet_from_store, make_iox_object_store,
make_record_batch, read_data_from_parquet_data, TestSize,
};
use arrow::array::{ArrayRef, StringArray};
use arrow_util::assert_batches_eq;
use data_types::{
chunk_metadata::{ChunkId, ChunkOrder},
partition_metadata::TableSummary,
};
use data_types::chunk_metadata::{ChunkId, ChunkOrder};
use datafusion::physical_plan::common::SizedRecordBatchStream;
use datafusion_util::MemoryStream;
use parquet::schema::types::ColumnPath;
@ -584,37 +581,17 @@ mod tests {
#[tokio::test]
async fn test_write_read() {
////////////////////
// Create test data which is also the expected data
let addr = chunk_addr(1);
let table = Arc::clone(&addr.table_name);
let (record_batches, schema, column_summaries, num_rows) =
make_record_batch("foo", TestSize::Full);
let mut table_summary = TableSummary::new(table.to_string());
table_summary.columns = column_summaries.clone();
let record_batch = record_batches[0].clone(); // Get the first one to compare key-value meta data that would be the same for all batches
let key_value_metadata = record_batch.schema().metadata().clone();
////////////////////
// Make an OS in memory
let store = make_iox_object_store().await;
////////////////////
// Store the data as a chunk and write it to in the object store
// This test Storage::write_to_object_store
let chunk = make_chunk_given_record_batch(
Arc::clone(&store),
record_batches.clone(),
schema.clone(),
addr,
column_summaries.clone(),
)
.await;
// This tests Storage::write_to_object_store
let mut generator = ChunkGenerator::new().await;
let (chunk, _) = generator.generate().await;
let key_value_metadata = chunk.schema().as_arrow().metadata().clone();
////////////////////
// Now let read it back
//
let parquet_data = load_parquet_from_store(&chunk, Arc::clone(&store))
let parquet_data = load_parquet_from_store(&chunk, Arc::clone(generator.store()))
.await
.unwrap();
let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data.clone()).unwrap();
@ -622,7 +599,7 @@ mod tests {
//
// 1. Check metadata at file level: Everything is correct
let schema_actual = decoded.read_schema().unwrap();
assert_eq!(Arc::new(schema.clone()), schema_actual);
assert_eq!(chunk.schema(), schema_actual);
assert_eq!(
key_value_metadata.clone(),
schema_actual.as_arrow().metadata().clone()
@ -630,22 +607,19 @@ mod tests {
// 2. Check statistics
let table_summary_actual = decoded.read_statistics(&schema_actual).unwrap();
assert_eq!(table_summary_actual, table_summary.columns);
assert_eq!(table_summary_actual, chunk.table_summary().columns);
// 3. Check data
// Note that the read_data_from_parquet_data function fixes the row-group/batches' level metadata bug in arrow
let actual_record_batches =
read_data_from_parquet_data(Arc::clone(&schema.as_arrow()), parquet_data);
read_data_from_parquet_data(chunk.schema().as_arrow(), parquet_data);
let mut actual_num_rows = 0;
for batch in actual_record_batches.clone() {
actual_num_rows += batch.num_rows();
// Check if record batch has meta data
let batch_key_value_metadata = batch.schema().metadata().clone();
assert_eq!(
schema.as_arrow().metadata().clone(),
batch_key_value_metadata
);
assert_eq!(key_value_metadata, batch_key_value_metadata);
}
// Now verify return results. This assert_batches_eq still works correctly without the metadata
@ -660,8 +634,7 @@ mod tests {
"| foo | | | | foo | | | | 4 | 9223372036854775807 | | | 4 | 18446744073709551615 | | | 40.1 | 1 | -0 | NaN | NaN | | | false | | | 1970-01-01T00:00:00.000004Z |",
"+----------------+---------------+-------------------+------------------+-------------------------+------------------------+----------------------------+---------------------------+----------------------+----------------------+-------------------------+------------------------+----------------------+----------------------+-------------------------+------------------------+----------------------+-------------------+--------------------+------------------------+-----------------------+-------------------------+------------------------+-----------------------+--------------------------+-------------------------+-----------------------------+",
];
assert_eq!(num_rows, actual_num_rows);
assert_batches_eq!(expected.clone(), &record_batches);
assert_eq!(chunk.rows(), actual_num_rows);
assert_batches_eq!(expected, &actual_record_batches);
}
}

View File

@ -1,6 +1,5 @@
use crate::{
chunk::{self, ChunkMetrics, ParquetChunk},
metadata::{IoxMetadata, IoxParquetMetaData},
chunk::{self, ParquetChunk},
storage::Storage,
};
use arrow::{
@ -12,12 +11,9 @@ use arrow::{
record_batch::RecordBatch,
};
use data_types::{
chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder},
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary},
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics},
server_id::ServerId,
};
use datafusion::physical_plan::SendableRecordBatchStream;
use datafusion_util::MemoryStream;
use futures::TryStreamExt;
use iox_object_store::{IoxObjectStore, ParquetFilePath};
use object_store::ObjectStore;
@ -36,6 +32,8 @@ use std::{collections::BTreeMap, num::NonZeroU32, sync::Arc};
use time::Time;
use uuid::Uuid;
pub mod generator;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Error getting data from object store: {}", source))]
@ -98,109 +96,6 @@ pub async fn load_parquet_from_store_for_path(
Ok(parquet_data)
}
/// The db name to use for testing
pub fn db_name() -> &'static str {
"db1"
}
/// Creates a test chunk address for a given chunk id
pub fn chunk_addr(id: u128) -> ChunkAddr {
ChunkAddr {
db_name: Arc::from(db_name()),
table_name: Arc::from("table1"),
partition_key: Arc::from("part1"),
chunk_id: ChunkId::new_test(id),
}
}
/// Same as [`make_chunk`] but parquet file does not contain any row group.
pub async fn make_chunk(
iox_object_store: Arc<IoxObjectStore>,
column_prefix: &str,
addr: ChunkAddr,
test_size: TestSize,
) -> ParquetChunk {
let (record_batches, schema, column_summaries, _num_rows) =
make_record_batch(column_prefix, test_size);
make_chunk_given_record_batch(
iox_object_store,
record_batches,
schema,
addr,
column_summaries,
)
.await
}
/// Same as [`make_chunk`] but parquet file does not contain any row group.
pub async fn make_chunk_no_row_group(
store: Arc<IoxObjectStore>,
column_prefix: &str,
addr: ChunkAddr,
test_size: TestSize,
) -> ParquetChunk {
let (_, schema, column_summaries, _num_rows) = make_record_batch(column_prefix, test_size);
make_chunk_given_record_batch(store, vec![], schema, addr, column_summaries).await
}
/// Create a test chunk by writing data to object store.
///
/// TODO: This code creates a chunk that isn't hooked up with metrics
pub async fn make_chunk_given_record_batch(
iox_object_store: Arc<IoxObjectStore>,
record_batches: Vec<RecordBatch>,
schema: Schema,
addr: ChunkAddr,
column_summaries: Vec<ColumnSummary>,
) -> ParquetChunk {
let storage = Storage::new(Arc::clone(&iox_object_store));
let table_summary = TableSummary {
name: addr.table_name.to_string(),
columns: column_summaries,
};
let stream: SendableRecordBatchStream = if record_batches.is_empty() {
Box::pin(MemoryStream::new_with_schema(
record_batches,
Arc::clone(schema.inner()),
))
} else {
Box::pin(MemoryStream::new(record_batches))
};
let (partition_checkpoint, database_checkpoint) = create_partition_and_database_checkpoint(
Arc::clone(&addr.table_name),
Arc::clone(&addr.partition_key),
);
let metadata = IoxMetadata {
creation_timestamp: Time::from_timestamp(10, 20),
table_name: Arc::clone(&addr.table_name),
partition_key: Arc::clone(&addr.partition_key),
chunk_id: addr.chunk_id,
partition_checkpoint,
database_checkpoint,
time_of_first_write: Time::from_timestamp(30, 40),
time_of_last_write: Time::from_timestamp(50, 60),
chunk_order: ChunkOrder::new(5).unwrap(),
};
let (path, file_size_bytes, parquet_metadata) = storage
.write_to_object_store(addr.clone(), stream, metadata)
.await
.unwrap();
let rows = parquet_metadata.decode().unwrap().row_count();
ParquetChunk::new_from_parts(
addr.partition_key,
Arc::new(table_summary),
Arc::new(schema),
&path,
Arc::clone(&iox_object_store),
file_size_bytes,
Arc::new(parquet_metadata),
rows,
ChunkMetrics::new_unregistered(),
)
}
fn create_column_tag(
name: &str,
data: Vec<Vec<Option<&str>>>,
@ -893,25 +788,6 @@ pub fn read_data_from_parquet_data(schema: SchemaRef, parquet_data: Vec<u8>) ->
record_batches
}
/// Create test metadata by creating a parquet file and reading it back into memory.
///
/// See [`make_chunk`] for details.
pub async fn make_metadata(
iox_object_store: &Arc<IoxObjectStore>,
column_prefix: &str,
addr: ChunkAddr,
test_size: TestSize,
) -> (ParquetFilePath, IoxParquetMetaData) {
let chunk = make_chunk(Arc::clone(iox_object_store), column_prefix, addr, test_size).await;
let parquet_data = load_parquet_from_store(&chunk, Arc::clone(iox_object_store))
.await
.unwrap();
(
chunk.path().clone(),
IoxParquetMetaData::from_file_bytes(parquet_data).unwrap(),
)
}
/// Create [`PartitionCheckpoint`] and [`DatabaseCheckpoint`] for testing.
pub fn create_partition_and_database_checkpoint(
table_name: Arc<str>,

View File

@ -0,0 +1,139 @@
use crate::chunk::{ChunkMetrics, ParquetChunk};
use crate::metadata::IoxMetadata;
use crate::storage::Storage;
use crate::test_utils::{
create_partition_and_database_checkpoint, make_iox_object_store, make_record_batch, TestSize,
};
use data_types::chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder};
use data_types::partition_metadata::{PartitionAddr, TableSummary};
use datafusion_util::MemoryStream;
use iox_object_store::IoxObjectStore;
use std::sync::Arc;
use time::Time;
/// Controls the number of row groups to generate for chunks
#[derive(Debug, Copy, Clone)]
pub enum GeneratorConfig {
/// Generates schema but skips generating data
NoData,
/// Generates 3 row groups with a limited selection of columns
Simple,
/// Generates 3 row groups with a wide variety of different columns
Full,
}
/// A generator of persisted chunks for use in tests
#[derive(Debug)]
pub struct ChunkGenerator {
iox_object_store: Arc<IoxObjectStore>,
storage: Storage,
column_prefix: String,
config: GeneratorConfig,
partition: PartitionAddr,
next_chunk: u32,
}
impl ChunkGenerator {
pub async fn new() -> Self {
Self::new_with_store(make_iox_object_store().await)
}
pub fn new_with_store(iox_object_store: Arc<IoxObjectStore>) -> Self {
let storage = Storage::new(Arc::clone(&iox_object_store));
Self {
iox_object_store,
storage,
column_prefix: "foo".to_string(),
config: GeneratorConfig::Full,
partition: PartitionAddr {
db_name: Arc::from("db1"),
table_name: Arc::from("table1"),
partition_key: Arc::from("part1"),
},
next_chunk: 1,
}
}
pub fn store(&self) -> &Arc<IoxObjectStore> {
&self.iox_object_store
}
pub fn set_config(&mut self, config: GeneratorConfig) {
self.config = config;
}
pub fn partition(&self) -> &PartitionAddr {
&self.partition
}
pub async fn generate(&mut self) -> (ParquetChunk, IoxMetadata) {
let id = self.next_chunk;
self.next_chunk += 1;
self.generate_id(id).await
}
pub async fn generate_id(&mut self, id: u32) -> (ParquetChunk, IoxMetadata) {
let (partition_checkpoint, database_checkpoint) = create_partition_and_database_checkpoint(
Arc::clone(&self.partition.table_name),
Arc::clone(&self.partition.partition_key),
);
let chunk_id = ChunkId::new_test(id as _);
let chunk_order = ChunkOrder::new(id).unwrap();
let chunk_addr = ChunkAddr::new(&self.partition, chunk_id);
let metadata = IoxMetadata {
creation_timestamp: Time::from_timestamp(10, 20),
table_name: Arc::clone(&self.partition.table_name),
partition_key: Arc::clone(&self.partition.partition_key),
chunk_id,
chunk_order,
partition_checkpoint,
database_checkpoint,
time_of_first_write: Time::from_timestamp(30, 40),
time_of_last_write: Time::from_timestamp(50, 60),
};
let (record_batches, schema, column_summaries, rows) = match self.config {
GeneratorConfig::NoData => {
// Generating an entire row group just for its metadata seems wasteful
let (_, schema, column_summaries, _) =
make_record_batch(&self.column_prefix, TestSize::Minimal);
// Note: column summaries here are inconsistent with the actual data?
(vec![], schema, column_summaries, 0)
}
GeneratorConfig::Simple => make_record_batch(&self.column_prefix, TestSize::Minimal),
GeneratorConfig::Full => make_record_batch(&self.column_prefix, TestSize::Full),
};
let table_summary = TableSummary {
name: self.partition.table_name.to_string(),
columns: column_summaries,
};
let stream = Box::pin(MemoryStream::new_with_schema(
record_batches,
Arc::clone(schema.inner()),
));
let (path, file_size_bytes, parquet_metadata) = self
.storage
.write_to_object_store(chunk_addr, stream, metadata.clone())
.await
.unwrap();
let chunk = ParquetChunk::new_from_parts(
Arc::clone(&self.partition.partition_key),
Arc::new(table_summary),
Arc::new(schema),
&path,
Arc::clone(&self.iox_object_store),
file_size_bytes,
Arc::new(parquet_metadata),
rows,
ChunkMetrics::new_unregistered(),
);
(chunk, metadata)
}
}

View File

@ -9,6 +9,7 @@ internal_types = { path = "../internal_types" }
observability_deps = { path = "../observability_deps" }
snafu = "0.6.2"
time = { path = "../time" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
test_helpers = { path = "../test_helpers" }

View File

@ -13,9 +13,10 @@ schema = { path = "../schema" }
observability_deps = { path = "../observability_deps" }
ordered-float = "2"
regex = "1"
serde_json = "1.0.70"
serde_json = "1.0.71"
snafu = "0.6.9"
sqlparser = "0.12.0"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
test_helpers = { path = "../test_helpers" }

View File

@ -35,6 +35,7 @@ tokio-stream = "0.1.8"
tokio-util = { version = "0.6.9" }
trace = { path = "../trace" }
predicate = { path = "../predicate" }
workspace-hack = { path = "../workspace-hack"}
# use libc on unix like platforms to set worker priority in DedicatedExecutor

View File

@ -18,7 +18,7 @@ use datafusion::{
ExecutionPlan,
},
};
use observability_deps::tracing::{debug, info, trace};
use observability_deps::tracing::{debug, trace};
use predicate::predicate::{Predicate, PredicateBuilder};
use schema::{merge::SchemaMerger, sort::SortKey, Schema};
@ -236,7 +236,7 @@ impl<C: QueryChunk + 'static> TableProvider for ChunkTableProvider<C> {
filters: &[Expr],
_limit: Option<usize>,
) -> std::result::Result<Arc<dyn ExecutionPlan>, DataFusionError> {
info!(" = Inside ChunkTableProvider Scan");
trace!(" = Inside ChunkTableProvider Scan");
// Note that `filters` don't actually need to be evaluated in
// the scan for the plans to be correct, they are an extra

View File

@ -15,6 +15,7 @@ once_cell = { version = "1.4.0", features = ["parking_lot"] }
predicate = { path = "../predicate" }
query = { path = "../query" }
server = { path = "../server" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
arrow = { version = "6.0", features = ["prettyprint"] }

View File

@ -25,6 +25,7 @@ parking_lot = "0.11"
permutation = "0.2.5"
snafu = "0.6"
schema = { path = "../schema" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
criterion = "0.3.3"

View File

@ -5,7 +5,7 @@ edition = "2021"
[dependencies]
async-trait = "0.1"
cache_loader_async = "0.1.2"
cache_loader_async = { version = "0.1.2", features = ["ttl-cache"] }
data_types = { path = "../data_types" }
dml = { path = "../dml" }
hashbrown = "0.11"
@ -19,6 +19,7 @@ parking_lot = "0.11.2"
snafu = "0.6"
time = { path = "../time" }
write_buffer = { path = "../write_buffer" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
mutable_batch_lp = { path = "../mutable_batch_lp" }

View File

@ -11,3 +11,4 @@ hashbrown = "0.11"
indexmap = "1.7"
itertools = "0.10.1"
snafu = "0.6"
workspace-hack = { path = "../workspace-hack"}

View File

@ -56,6 +56,7 @@ tokio-util = { version = "0.6.9" }
tracker = { path = "../tracker" }
uuid = { version = "0.8", features = ["serde", "v4"] }
write_buffer = { path = "../write_buffer" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
test_helpers = { path = "../test_helpers" }

View File

@ -53,7 +53,7 @@ use write_buffer::core::{WriteBufferReading, WriteBufferWriting};
pub(crate) use crate::db::chunk::DbChunk;
pub(crate) use crate::db::lifecycle::ArcDb;
use crate::db::write::{WriteFilter, WriteFilterNone};
use crate::db::write::{DeleteFilter, DeleteFilterNone, WriteFilter, WriteFilterNone};
use crate::{
db::{
access::QueryCatalogAccess,
@ -522,25 +522,43 @@ impl Db {
/// Store a delete
pub fn store_delete(&self, delete: &DmlDelete) -> Result<()> {
self.store_filtered_delete(delete, DeleteFilterNone::default())
}
/// Store a delete with the provided [`DeleteFilter`]
pub fn store_filtered_delete(
&self,
delete: &DmlDelete,
filter: impl DeleteFilter,
) -> Result<()> {
let predicate = Arc::new(delete.predicate().clone());
match delete.table_name() {
None => {
// Note: This assumes tables cannot be removed from the catalog and therefore
// this lock gap is not problematic
for table_name in self.catalog.table_names() {
self.delete(&table_name, Arc::clone(&predicate))
self.delete_filtered(&table_name, Arc::clone(&predicate), filter)
.expect("table exists")
}
Ok(())
}
Some(table_name) => self.delete(table_name, predicate),
Some(table_name) => self.delete_filtered(table_name, predicate, filter),
}
}
/// Delete data from a table on a specified predicate
/// Delete data from a table on a specified predicate
///
/// Returns an error if the table cannot be found in the catalog
pub fn delete(&self, table_name: &str, delete_predicate: Arc<DeletePredicate>) -> Result<()> {
self.delete_filtered(table_name, delete_predicate, DeleteFilterNone::default())
}
fn delete_filtered(
&self,
table_name: &str,
delete_predicate: Arc<DeletePredicate>,
filter: impl DeleteFilter,
) -> Result<()> {
// collect delete predicates on preserved partitions for a catalog transaction
let mut affected_persisted_chunks = vec![];
@ -558,6 +576,10 @@ impl Db {
for chunk in chunks {
// save the delete predicate in the chunk
let mut chunk = chunk.write();
if !filter.filter_chunk(&chunk) {
continue;
}
chunk.add_delete_predicate(Arc::clone(&delete_predicate));
// We should only report persisted chunks or chunks that are currently being persisted, because the
@ -652,6 +674,40 @@ impl Db {
fut.await.context(TaskCancelled)?.context(LifecycleError)
}
/// Compact all provided persisted chunks
pub async fn compact_object_store_chunks(
self: &Arc<Self>,
table_name: &str,
partition_key: &str,
chunk_ids: Vec<ChunkId>,
) -> Result<Option<Arc<DbChunk>>> {
if chunk_ids.is_empty() {
return Ok(None);
}
// Use explicit scope to ensure the async generator doesn't
// assume the locks have to possibly live across the `await`
let fut = {
let partition = self.partition(table_name, partition_key)?;
let partition = LockableCatalogPartition::new(Arc::clone(self), partition);
let partition = partition.read();
// todo: set these chunks
let chunks = vec![];
// Lock partition for write
let partition = partition.upgrade();
// invoke compact
let (_, fut) =
lifecycle::compact_object_store::compact_object_store_chunks(partition, chunks)
.context(LifecycleError)?;
fut
};
fut.await.context(TaskCancelled)?.context(LifecycleError)
}
/// Persist given partition.
///
/// If `force` is `true` will persist all unpersisted data regardless of arrival time

View File

@ -175,6 +175,10 @@ impl ChunkStage {
pub fn is_open(&self) -> bool {
matches!(self, ChunkStage::Open { .. })
}
pub fn is_persisted(&self) -> bool {
matches!(self, ChunkStage::Persisted { .. })
}
}
/// The catalog representation of a Chunk in IOx. Note that a chunk
@ -398,6 +402,10 @@ impl CatalogChunk {
&self.stage
}
pub fn is_persisted(&self) -> bool {
self.stage.is_persisted()
}
/// Returns the AccessRecorder used to record access to this chunk's data by queries
pub fn access_recorder(&self) -> &AccessRecorder {
&self.access_recorder
@ -724,6 +732,27 @@ impl CatalogChunk {
}
}
/// Set the persisted chunk to be compacting
pub fn set_compacting_object_store(&mut self, registration: &TaskRegistration) -> Result<()> {
match &self.stage {
ChunkStage::Open { .. } | ChunkStage::Frozen { .. } => {
unexpected_state!(
self,
"setting compacting object store",
"Persisted",
&self.stage
)
}
ChunkStage::Persisted { .. } => {
self.set_lifecycle_action(
ChunkLifecycleAction::CompactingObjectStore,
registration,
)?;
Ok(())
}
}
}
/// Start lifecycle action that should move the chunk into the _persisted_ stage.
pub fn set_writing_to_object_store(&mut self, registration: &TaskRegistration) -> Result<()> {
// This ensures the closing logic is consistent but doesn't break code that
@ -888,12 +917,7 @@ mod tests {
use data_types::{delete_predicate::DeleteExpr, timestamp::TimestampRange};
use mutable_buffer::test_helpers::write_lp_to_new_chunk;
use parquet_file::{
chunk::ParquetChunk,
test_utils::{
make_chunk as make_parquet_chunk_with_store, make_iox_object_store, TestSize,
},
};
use parquet_file::test_utils::generator::{ChunkGenerator, GeneratorConfig};
#[test]
fn test_new_open() {
@ -917,7 +941,7 @@ mod tests {
let mut chunk = make_persisted_chunk().await;
assert_eq!(
chunk.freeze().unwrap_err().to_string(),
"Internal Error: unexpected chunk state for Chunk('db':'table1':'part1':00000000-0000-0000-0000-000000000000) \
"Internal Error: unexpected chunk state for Chunk('db1':'table1':'part1':00000000-0000-0000-0000-000000000001) \
during setting closed. Expected Open or Frozen, got Persisted"
);
}
@ -1103,11 +1127,6 @@ mod tests {
write_lp_to_new_chunk(&format!("{} bar=1 10", table_name))
}
async fn make_parquet_chunk(addr: ChunkAddr) -> ParquetChunk {
let iox_object_store = make_iox_object_store().await;
make_parquet_chunk_with_store(iox_object_store, "foo", addr, TestSize::Full).await
}
fn chunk_addr() -> ChunkAddr {
ChunkAddr {
db_name: Arc::from("db"),
@ -1131,11 +1150,12 @@ mod tests {
}
async fn make_persisted_chunk() -> CatalogChunk {
let addr = chunk_addr();
let now = Time::from_timestamp_nanos(43564);
let mut generator = ChunkGenerator::new().await;
generator.set_config(GeneratorConfig::NoData);
let (parquet_chunk, metadata) = generator.generate().await;
let addr = ChunkAddr::new(generator.partition(), metadata.chunk_id);
// assemble ParquetChunk
let parquet_chunk = make_parquet_chunk(addr.clone()).await;
let now = Time::from_timestamp_nanos(43564);
CatalogChunk::new_object_store_only(
addr,

View File

@ -14,7 +14,11 @@ use persistence_windows::{
};
use schema::Schema;
use snafu::{OptionExt, Snafu};
use std::{collections::BTreeMap, fmt::Display, sync::Arc};
use std::{
collections::{BTreeMap, BTreeSet},
fmt::Display,
sync::Arc,
};
use time::{Time, TimeProvider};
use tracker::RwLock;
@ -368,6 +372,35 @@ impl Partition {
self.chunks.iter()
}
/// Return true if there are no other persisted chunks that are in the middle of
/// the provided chunk orders
// NGA todo: There is test_compact_os_non_contiguous_chunks in
// compact_object_store.rs to test this but I will add more unit tests right here
// when PR #3167 ChunkGenerator is merged
pub fn contiguous_object_store_chunks(&self, chunk_orders: &BTreeSet<ChunkOrder>) -> bool {
// Last order in the chunk_orders for comparison
let last_order_element = chunk_orders.iter().rev().next();
let last_order = match last_order_element {
Some(last_order) => last_order,
None => {
return true;
} // provided chunk_orders is empty
};
let chunks = self.chunks();
for chunk in chunks {
let chunk = chunk.read();
if chunk.is_persisted() {
let order = chunk.order();
// this chunk does not belong to chunk_orders but in the middle of them
if !chunk_orders.contains(&order) && order < *last_order {
return false;
}
}
}
true
}
/// Return a PartitionSummary for this partition. If the partition
/// has no chunks, returns None.
pub fn summary(&self) -> Option<PartitionSummary> {

View File

@ -33,6 +33,7 @@ pub(crate) use persist::persist_chunks;
pub(crate) use unload::unload_read_buffer_chunk;
mod compact;
pub(crate) mod compact_object_store;
mod drop;
mod error;
mod persist;
@ -201,6 +202,17 @@ impl LockablePartition for LockableCatalogPartition {
Ok(tracker)
}
fn compact_object_store_chunks(
partition: LifecycleWriteGuard<'_, Partition, Self>,
chunks: Vec<LifecycleWriteGuard<'_, CatalogChunk, Self::Chunk>>,
) -> Result<TaskTracker<Job>, Self::Error> {
info!(table=%partition.table_name(), partition=%partition.partition_key(), "compacting object store chunks");
let (tracker, fut) = compact_object_store::compact_object_store_chunks(partition, chunks)?;
let _ =
tokio::spawn(async move { fut.await.log_if_error("compacting object store chunks") });
Ok(tracker)
}
fn prepare_persist(
partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>,
force: bool,

View File

@ -0,0 +1,483 @@
//! This module compact object store chunks (aka persisted chunks)
use super::{
error::{
ChunksNotContiguous, ChunksNotInPartition, EmptyChunks, ParquetChunkError,
WritingToObjectStore,
},
LockableCatalogChunk, LockableCatalogPartition, Result,
};
use crate::{
db::{
catalog::{chunk::CatalogChunk, partition::Partition},
lifecycle::merge_schemas,
DbChunk,
},
Db,
};
use data_types::{
chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder},
delete_predicate::DeletePredicate,
job::Job,
partition_metadata::PartitionAddr,
};
use datafusion::physical_plan::SendableRecordBatchStream;
use futures::Future;
use lifecycle::LifecycleWriteGuard;
use observability_deps::tracing::info;
use parquet_file::{
chunk::{ChunkMetrics as ParquetChunkMetrics, ParquetChunk},
metadata::IoxMetadata,
storage::Storage,
};
use persistence_windows::checkpoint::{DatabaseCheckpoint, PartitionCheckpoint};
use query::{compute_sort_key, exec::ExecutorType, frontend::reorg::ReorgPlanner, QueryChunkMeta};
use schema::Schema;
use snafu::ResultExt;
use std::{
collections::{BTreeSet, HashSet},
sync::Arc,
};
use time::Time;
use tracker::{TaskRegistration, TaskTracker, TrackedFuture, TrackedFutureExt};
// Compact the provided object store chunks into a single object store chunk,
/// returning the newly created chunk
///
/// The function will error if
/// . No chunks are provided
/// . provided chunk(s) not belong to the provided partition
/// . not all provided chunks are persisted
/// . the provided chunks are not contiguous
/// Implementation steps
/// . Verify the eligible of the input OS chunks and mark them for ready to compact
/// . Compact the chunks
/// . Persist the compacted output into an OS chunk
/// . Drop old chunks and make the new chunk available in one transaction
pub(crate) fn compact_object_store_chunks(
partition: LifecycleWriteGuard<'_, Partition, LockableCatalogPartition>,
chunks: Vec<LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>>,
) -> Result<(
TaskTracker<Job>,
TrackedFuture<impl Future<Output = Result<Option<Arc<DbChunk>>>> + Send>,
)> {
// Track compaction duration
let now = std::time::Instant::now();
// Register the compacting job
let db = Arc::clone(&partition.data().db);
let partition_addr = partition.addr().clone();
let chunk_ids: Vec<_> = chunks.iter().map(|x| x.id()).collect();
info!(%partition_addr, ?chunk_ids, "compacting object store chunks");
let (tracker, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
partition: partition.addr().clone(),
chunks: chunk_ids.clone(),
});
// Step 1: Verify input while marking and snapshoting the chunks for compacting
let compacting_os_chunks = mark_chunks_to_compact(partition, chunks, &registration)?;
let _delete_predicates_before = compacting_os_chunks.delete_predicates;
let fut = async move {
// track future runtime
let fut_now = std::time::Instant::now();
// Step 2: Compact the os chunks into a stream
let compacted_stream = compact_chunks(&db, &compacting_os_chunks.os_chunks).await?;
let compacted_rows;
let _schema = compacted_stream.schema;
let sort_key = compacted_stream.sort_key;
// Step 3: Start to persist files and update the preserved catalog accordingly
// This process needs to hold cleanup lock to avoid the persisted file was deleted right after
// it is created and before it is updated in the preserved catalog
{
// fetch shared (= read) guard preventing the cleanup job from deleting our files
let _guard = db.cleanup_lock.read().await;
// Step 3.1: Write the chunk as a parquet file into the object store
let iox_metadata = IoxMetadata {
creation_timestamp: db.time_provider.now(),
table_name: Arc::clone(&partition_addr.table_name),
partition_key: Arc::clone(&partition_addr.partition_key),
chunk_id: ChunkId::new(),
partition_checkpoint: compacting_os_chunks.partition_checkpoint.clone(),
database_checkpoint: compacting_os_chunks.database_checkpoint.clone(),
time_of_first_write: compacting_os_chunks.time_of_first_write,
time_of_last_write: compacting_os_chunks.time_of_last_write,
chunk_order: compacting_os_chunks.min_order,
};
let compacted_and_persisted_chunk = persist_stream_to_chunk(
&db,
&partition_addr,
compacted_stream.stream,
iox_metadata,
)
.await?;
compacted_rows = compacted_and_persisted_chunk.rows();
// Step 3.2: Update the preserved catalogs to use the newly created os_chunk
// Todo: This will be done in a sub-function that creates a single transaction that:
// . Drop all os_chunks from the preserved catalog
// . Add the newly created os_chunk into the preserved catalog
// Extra: delete_predicates_after must be included here or below (detail will be figured out)
} // End of cleanup locking
// Step 4: Update the in-memory catalogs to use the newly created os_chunk
// . Drop all os_chunks from the in-memory catalog
// . Add the new created os_chunk in the in-memory catalog
// This step can be done outside a transaction because the in-memory catalog
// was design to false tolerant
// - Extra note: If there is a risk that the parquet files of os_chunks are
// permanently deleted from the Object Store between step 3 and step 4,
// we might need to put steps 3 and 4 in the same transaction
// Log the summary
let elapsed = now.elapsed();
// input rows per second
let throughput =
(compacting_os_chunks.input_rows as u128 * 1_000_000_000) / elapsed.as_nanos();
info!(input_chunks=chunk_ids.len(),
%compacting_os_chunks.input_rows, %compacted_rows,
%sort_key,
compaction_took = ?elapsed,
fut_execution_duration= ?fut_now.elapsed(),
rows_per_sec=?throughput,
"object store chunk(s) compacted");
Ok(None) // todo: will be a real chunk when all todos done
};
Ok((tracker, fut.track(registration)))
}
/// Verify eligible compacting chunks, mark and snapshot them to get ready for compacting
/// Throws error if
/// . provided chunks do not belong to the provided partition
/// . not all provided chunks are persisted
/// . the provided chunks are not contiguous
/// Returns:
/// . min (time_of_first_write) of provided chunks
/// . max (time_of_last_write) of provided chunks
/// . total rows of the provided chunks to be compacted
/// . all delete predicates of the provided chunks
/// . snapshot of the provided chunks
/// . min(order) of the provided chunks
/// . max(database_checkpoint) of the provided chunks
/// . max(partition_checkpoint) of the provided chunks
fn mark_chunks_to_compact(
partition: LifecycleWriteGuard<'_, Partition, LockableCatalogPartition>,
chunks: Vec<LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>>,
registration: &TaskRegistration,
) -> Result<CompactingOsChunks> {
// no chunks provided
if chunks.is_empty() {
return EmptyChunks {}.fail();
}
let db = Arc::clone(&partition.data().db);
let partition_addr = partition.addr().clone();
// Mark and snapshot chunks, then drop locks
let mut time_of_first_write = Time::MAX;
let mut time_of_last_write = Time::MIN;
let mut chunk_orders = BTreeSet::new();
let mut input_rows = 0;
let mut delete_predicates: HashSet<Arc<DeletePredicate>> = HashSet::new();
let mut min_order = ChunkOrder::MAX;
// initialize checkpoints
let database_checkpoint = DatabaseCheckpoint::new(Default::default());
let partition_checkpoint = PartitionCheckpoint::new(
Arc::clone(&partition_addr.table_name),
Arc::clone(&partition_addr.partition_key),
Default::default(),
Time::from_timestamp_nanos(0),
);
let os_chunks = chunks
.into_iter()
.map(|mut chunk| {
// Sanity-check
assert!(Arc::ptr_eq(&db, &chunk.data().db));
assert_eq!(
chunk.table_name().as_ref(),
partition_addr.table_name.as_ref()
);
// provided chunks not in the provided partition
if chunk.key() != partition_addr.partition_key.as_ref() {
return ChunksNotInPartition {}.fail();
}
input_rows += chunk.table_summary().total_count();
let candidate_first = chunk.time_of_first_write();
time_of_first_write = std::cmp::min(time_of_first_write, candidate_first);
let candidate_last = chunk.time_of_last_write();
time_of_last_write = std::cmp::max(time_of_last_write, candidate_last);
delete_predicates.extend(chunk.delete_predicates().iter().cloned());
min_order = min_order.min(chunk.order());
chunk_orders.insert(chunk.order());
// Todo:get chunk's datatbase_checkpoint and partition_checkpoint of the chunk and keep max
// Set chunk in the right action which is compacting object store
// This function will also error out if the chunk is not yet persisted
chunk.set_compacting_object_store(registration)?;
Ok(DbChunk::snapshot(&*chunk))
})
.collect::<Result<Vec<_>>>()?;
// Verify if all the provided chunks are contiguous
if !partition.contiguous_object_store_chunks(&chunk_orders) {
return ChunksNotContiguous {}.fail();
}
// drop partition lock
std::mem::drop(partition);
Ok(CompactingOsChunks {
time_of_first_write,
time_of_last_write,
input_rows,
delete_predicates,
os_chunks,
min_order,
database_checkpoint,
partition_checkpoint,
})
}
/// This struct is used as return data of compacting os chunks
#[derive(Debug, Clone)]
struct CompactingOsChunks {
time_of_first_write: Time,
time_of_last_write: Time,
input_rows: u64,
delete_predicates: HashSet<Arc<DeletePredicate>>,
os_chunks: Vec<Arc<DbChunk>>,
min_order: ChunkOrder,
database_checkpoint: DatabaseCheckpoint,
partition_checkpoint: PartitionCheckpoint,
}
/// Create query plan to compact the given DbChunks and return its output stream
/// Return:
/// . stream of output record batch of the scanned chunks Result<SendableRecordBatchStream>
/// Deleted and duplicated data will be eliminated during the scan
/// . Output schema of the compact plan
/// . Sort Key of the output data
async fn compact_chunks(db: &Db, query_chunks: &[Arc<DbChunk>]) -> Result<CompactedStream> {
// Tracking metric
let ctx = db.exec.new_context(ExecutorType::Reorg);
// Compute the sorted output of the compacting result
let sort_key = compute_sort_key(query_chunks.iter().map(|x| x.summary()));
let sort_key_str = format!("\"{}\"", sort_key); // for logging
// Merge schema of the compacting chunks
let merged_schema = merge_schemas(query_chunks);
// Build compact query plan
let (plan_schema, plan) = ReorgPlanner::new().compact_plan(
Arc::clone(&merged_schema),
query_chunks.iter().map(Arc::clone),
sort_key,
)?;
let physical_plan = ctx.prepare_plan(&plan).await?;
// run the plan
let stream = ctx.execute_stream(physical_plan).await?;
Ok(CompactedStream {
stream,
schema: plan_schema,
sort_key: sort_key_str,
})
}
/// Struct holding output of a compacted stream
struct CompactedStream {
stream: SendableRecordBatchStream,
schema: Arc<Schema>,
sort_key: String,
}
/// Persist a provided stream to a new OS chunk
async fn persist_stream_to_chunk<'a>(
db: &'a Db,
partition_addr: &'a PartitionAddr,
stream: SendableRecordBatchStream,
iox_metadata: IoxMetadata,
) -> Result<Arc<ParquetChunk>> {
// Create a storage to save data of this chunk
let storage = Storage::new(Arc::clone(&db.iox_object_store));
// Write the chunk stream data into a parquet file in the storage
let chunk_addr = ChunkAddr::new(partition_addr, iox_metadata.chunk_id);
let (path, file_size_bytes, parquet_metadata) = storage
.write_to_object_store(chunk_addr, stream, iox_metadata)
.await
.context(WritingToObjectStore)?;
// Create parquet chunk for the parquet file
let parquet_metadata = Arc::new(parquet_metadata);
let metrics = ParquetChunkMetrics::new(db.metric_registry.as_ref());
let parquet_chunk = Arc::new(
ParquetChunk::new(
&path,
Arc::clone(&db.iox_object_store),
file_size_bytes,
Arc::clone(&parquet_metadata),
Arc::clone(&partition_addr.table_name),
Arc::clone(&partition_addr.partition_key),
metrics,
)
.context(ParquetChunkError)?,
);
Ok(parquet_chunk)
}
////////////////////////////////////////////////////////////
#[cfg(test)]
mod tests {
use super::*;
use crate::{db::test_helpers::write_lp, utils::make_db};
use lifecycle::{LockableChunk, LockablePartition};
use query::QueryChunk;
#[tokio::test]
async fn test_compact_os_no_chunks() {
test_helpers::maybe_start_logging();
let db = make_db().await.db;
let partition_key = "1970-01-01T00";
write_lp(&db, "cpu,tag1=cupcakes bar=1 10").await;
let db_partition = db.partition("cpu", partition_key).unwrap();
let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition));
let partition = partition.write();
let (_, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
partition: partition.addr().clone(),
chunks: vec![],
});
let compact_no_chunks = mark_chunks_to_compact(partition, vec![], &registration);
let err = compact_no_chunks.unwrap_err();
assert!(
err.to_string()
.contains("No object store chunks provided for compacting"),
"No object store chunks provided for compacting"
);
}
#[tokio::test]
async fn test_compact_os_non_os_chunks() {
test_helpers::maybe_start_logging();
let db = make_db().await.db;
let partition_key = "1970-01-01T00";
write_lp(&db, "cpu,tag1=cupcakes bar=1 10").await;
let db_partition = db.partition("cpu", partition_key).unwrap();
// persisted non persisted chunks
let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition));
let partition = partition.read();
let chunks = LockablePartition::chunks(&partition);
assert_eq!(chunks.len(), 1);
let partition = partition.upgrade();
let chunk = chunks[0].write();
let (_, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
partition: partition.addr().clone(),
chunks: vec![chunk.id()],
});
let compact_non_persisted_chunks =
mark_chunks_to_compact(partition, vec![chunk], &registration);
let err = compact_non_persisted_chunks.unwrap_err();
assert!(
err.to_string().contains("Expected Persisted, got Open"),
"Expected Persisted, got Open"
);
}
#[tokio::test]
async fn test_compact_os_non_contiguous_chunks() {
test_helpers::maybe_start_logging();
let db = make_db().await.db;
let partition_key = "1970-01-01T00";
write_lp(&db, "cpu,tag1=cupcakes bar=1 10").await;
let db_partition = db.partition("cpu", partition_key).unwrap();
// persist chunk 1
db.persist_partition("cpu", partition_key, true)
.await
.unwrap()
.unwrap()
.id();
//
// persist chunk 2
write_lp(db.as_ref(), "cpu,tag1=chunk2,tag2=a bar=2 10").await;
db.persist_partition("cpu", partition_key, true)
.await
.unwrap()
.unwrap()
.id();
//
// persist chunk 3
write_lp(db.as_ref(), "cpu,tag1=chunk3,tag2=a bar=2 30").await;
db.persist_partition("cpu", partition_key, true)
.await
.unwrap()
.unwrap()
.id();
//
// Add a MUB
write_lp(db.as_ref(), "cpu,tag1=chunk4,tag2=a bar=2 40").await;
// let compact 2 non contiguous chunk 1 and chunk 3
let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition));
let partition = partition.read();
let chunks = LockablePartition::chunks(&partition);
assert_eq!(chunks.len(), 4);
let partition = partition.upgrade();
let chunk1 = chunks[0].write();
let chunk3 = chunks[2].write();
let (_, registration) = db.jobs.register(Job::CompactObjectStoreChunks {
partition: partition.addr().clone(),
chunks: vec![chunk1.id(), chunk3.id()],
});
let compact_non_contiguous_persisted_chunks =
mark_chunks_to_compact(partition, vec![chunk1, chunk3], &registration);
let err = compact_non_contiguous_persisted_chunks.unwrap_err();
assert!(
err.to_string()
.contains("Cannot compact the provided persisted chunks. They are not contiguous"),
"Cannot compact the provided persisted chunks. They are not contiguous"
);
}
// todo: add tests
// . compact 2 contiguous OS chunks
// . compact 3 chunks with duplicated data
// . compact with deletes before compacting
// . compact with deletes happening during compaction
// . verify checkpoints
// . replay
}

View File

@ -39,6 +39,11 @@ pub enum Error {
chunk_id: u32,
},
#[snafu(display("Error reading from object store: {}", source))]
ReadingObjectStore {
source: parquet_file::storage::Error,
},
#[snafu(display("Error writing to object store: {}", source))]
WritingToObjectStore {
source: parquet_file::storage::Error,
@ -57,6 +62,17 @@ pub enum Error {
#[snafu(display("Cannot drop unpersisted chunk: {}", addr))]
CannotDropUnpersistedChunk { addr: ChunkAddr },
#[snafu(display("No object store chunks provided for compacting"))]
EmptyChunks {},
#[snafu(display(
"Cannot compact chunks because at least one does not belong to the given partition"
))]
ChunksNotInPartition {},
#[snafu(display("Cannot compact the provided persisted chunks. They are not contiguous"))]
ChunksNotContiguous {},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;

View File

@ -44,6 +44,8 @@ use super::{
/// Returns a future registered with the tracker registry, and the corresponding tracker
///
/// The caller can either spawn this future to tokio, or block directly on it
///
/// NB: This function is tightly coupled with the semantics of persist_chunks
pub(super) fn write_chunk_to_object_store(
partition: LifecycleWriteGuard<'_, Partition, LockableCatalogPartition>,
mut chunk: LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>,
@ -155,6 +157,13 @@ pub(super) fn write_chunk_to_object_store(
.context(ParquetChunkError)?,
);
// Collect any pending delete predicate from any partitions and include them in
// the transaction. This MUST be done after the DatabaseCheckpoint is computed
//
// This ensures that any deletes encountered during or prior to the replay window
// must have been made durable within the catalog for any persisted chunks
let delete_handle = db.delete_predicates_mailbox.consume().await;
// IMPORTANT: Start transaction AFTER writing the actual parquet file so we do not hold
// the transaction lock (that is part of the PreservedCatalog) for too long.
// By using the cleanup lock (see above) it is ensured that the file that we
@ -169,7 +178,7 @@ pub(super) fn write_chunk_to_object_store(
};
transaction.add_parquet(&info);
// add delete predicates
// add delete predicates for this chunk
//
// Delete predicates are handled in the following way
// 1. Predicates added before this chunk was created (aka before the DataFusion split plan was running):
@ -182,9 +191,16 @@ pub(super) fn write_chunk_to_object_store(
transaction.delete_predicate(&predicate, &[addr.clone().into()]);
}
for (predicate, chunks) in delete_handle.outbox() {
transaction.delete_predicate(predicate, chunks);
}
// preserved commit
let ckpt_handle = transaction.commit().await.context(CommitError)?;
// Deletes persisted correctly
delete_handle.flush();
// in-mem commit
{
let mut guard = chunk.write();
@ -195,6 +211,7 @@ pub(super) fn write_chunk_to_object_store(
let create_checkpoint =
ckpt_handle.revision_counter() % catalog_transactions_until_checkpoint == 0;
if create_checkpoint {
// Commit is already done, so we can just scan the catalog for the state.
//

View File

@ -18,7 +18,8 @@ use snafu::{ResultExt, Snafu};
use time::Time;
use write_buffer::core::WriteBufferReading;
use crate::db::write::WriteFilter;
use crate::db::catalog::chunk::{CatalogChunk, ChunkStage};
use crate::db::write::{DeleteFilter, WriteFilter};
use crate::Db;
#[allow(clippy::enum_variant_names)]
@ -243,8 +244,7 @@ pub async fn perform_replay(
for n_try in 1..=n_tries {
let result = match &dml_operation {
DmlOperation::Write(write) => db.store_filtered_write(write, filter),
// TODO: Only apply delete to unpersisted chunks (#3125)
DmlOperation::Delete(delete) => db.store_delete(delete),
DmlOperation::Delete(delete) => db.store_filtered_delete(delete, filter),
};
match result {
@ -370,6 +370,19 @@ impl<'a> WriteFilter for ReplayFilter<'a> {
}
}
impl<'a> DeleteFilter for ReplayFilter<'a> {
fn filter_chunk(&self, chunk: &CatalogChunk) -> bool {
// The persist lifecycle action MUST persist any outstanding delete predicates
//
// As such deletes should only be applied to unpersisted chunks - i.e.
// those containing data from the in-progress replay operation
//
// This avoids a situation where a delete could be applied to a chunk containing
// data from writes sequenced after the delete being replayed
!matches!(chunk.stage(), ChunkStage::Persisted { .. })
}
}
/// Where is a given sequence number and the entire data batch associated with it compared to the range of persisted and
/// partially persisted sequence numbers (extracted from partition checkpoint).
#[derive(Debug, PartialEq)]
@ -431,9 +444,13 @@ mod tests {
use arrow_util::assert_batches_eq;
use data_types::{
database_rules::{PartitionTemplate, TemplatePart},
delete_predicate::DeletePredicate,
non_empty::NonEmptyString,
sequence::Sequence,
server_id::ServerId,
timestamp::TimestampRange,
};
use dml::{DmlDelete, DmlMeta};
use object_store::ObjectStore;
use persistence_windows::{
checkpoint::{PartitionCheckpoint, PersistCheckpointBuilder, ReplayPlanner},
@ -459,6 +476,14 @@ mod tests {
lp: &'static str,
}
#[derive(Debug)]
struct TestDelete {
sequencer_id: u32,
sequence_number: u64,
table_name: Option<&'static str>,
predicate: DeletePredicate,
}
/// Different checks for replay tests
#[derive(Debug)]
enum Check {
@ -514,6 +539,9 @@ mod tests {
///
/// Persistence and write buffer reads are enabled in preparation to this step.
Await(Vec<Check>),
/// Performs a delete to the given table
Delete(Vec<TestDelete>),
}
#[derive(Debug)]
@ -724,6 +752,21 @@ mod tests {
tokio::time::sleep(Duration::from_millis(100)).await;
}
}
Step::Delete(deletes) => {
for delete in deletes {
let delete = DmlDelete::new(
delete.predicate,
delete.table_name.and_then(NonEmptyString::new),
DmlMeta::sequenced(
Sequence::new(delete.sequencer_id, delete.sequence_number),
time::Time::from_timestamp_nanos(0),
None,
0,
),
);
write_buffer_state.push_delete(delete)
}
}
}
}
}
@ -2568,6 +2611,126 @@ mod tests {
.await;
}
#[tokio::test]
async fn replay_delete() {
ReplayTest {
steps: vec![
Step::Ingest(vec![TestSequencedEntry {
sequencer_id: 0,
sequence_number: 0,
lp: "table_1,tag_partition_by=a bar=10 10",
}]),
Step::Await(vec![Check::Query(
"select * from table_1 order by bar",
vec![
"+-----+------------------+--------------------------------+",
"| bar | tag_partition_by | time |",
"+-----+------------------+--------------------------------+",
"| 10 | a | 1970-01-01T00:00:00.000000010Z |",
"+-----+------------------+--------------------------------+",
],
)]),
Step::MakeWritesPersistable,
Step::Persist(vec![("table_1", "tag_partition_by_a")]),
Step::Delete(vec![TestDelete {
sequencer_id: 0,
sequence_number: 1,
table_name: None,
predicate: DeletePredicate {
range: TimestampRange { start: 0, end: 20 },
exprs: vec![],
},
}]),
Step::Ingest(vec![TestSequencedEntry {
sequencer_id: 0,
sequence_number: 2,
lp: "table_1,tag_partition_by=b bar=15 15",
}]),
Step::Await(vec![Check::Query(
"select * from table_1 order by bar",
vec![
"+-----+------------------+--------------------------------+",
"| bar | tag_partition_by | time |",
"+-----+------------------+--------------------------------+",
"| 15 | b | 1970-01-01T00:00:00.000000015Z |",
"+-----+------------------+--------------------------------+",
],
)]),
Step::MakeWritesPersistable,
Step::Persist(vec![("table_1", "tag_partition_by_b")]),
Step::Restart,
Step::Replay,
Step::Assert(vec![Check::Query(
"select * from table_1 order by bar",
vec![
"+-----+------------------+--------------------------------+",
"| bar | tag_partition_by | time |",
"+-----+------------------+--------------------------------+",
"| 15 | b | 1970-01-01T00:00:00.000000015Z |",
"+-----+------------------+--------------------------------+",
],
)]),
],
..Default::default()
}
.run()
.await;
}
#[tokio::test]
async fn replay_delete_persisted_chunks() {
ReplayTest {
steps: vec![
Step::Ingest(vec![TestSequencedEntry {
sequencer_id: 0,
sequence_number: 0,
lp: "table_1,tag_partition_by=a bar=10 10",
}]),
Step::Delete(vec![TestDelete {
sequencer_id: 0,
sequence_number: 1,
table_name: None,
predicate: DeletePredicate {
range: TimestampRange { start: 0, end: 11 },
exprs: vec![],
},
}]),
Step::Ingest(vec![TestSequencedEntry {
sequencer_id: 0,
sequence_number: 2,
lp: "table_1,tag_partition_by=b bar=20 10",
}]),
Step::Await(vec![Check::Query(
"select * from table_1 order by bar",
vec![
"+-----+------------------+--------------------------------+",
"| bar | tag_partition_by | time |",
"+-----+------------------+--------------------------------+",
"| 20 | b | 1970-01-01T00:00:00.000000010Z |",
"+-----+------------------+--------------------------------+",
],
)]),
Step::MakeWritesPersistable,
Step::Persist(vec![("table_1", "tag_partition_by_b")]),
Step::Restart,
Step::Replay,
Step::Assert(vec![Check::Query(
"select * from table_1 order by bar",
vec![
"+-----+------------------+--------------------------------+",
"| bar | tag_partition_by | time |",
"+-----+------------------+--------------------------------+",
"| 20 | b | 1970-01-01T00:00:00.000000010Z |",
"+-----+------------------+--------------------------------+",
],
)]),
],
..Default::default()
}
.run()
.await;
}
#[tokio::test]
async fn replay_fail_sequencers_change() {
// create write buffer w/ sequencer 0 and 1

View File

@ -1,6 +1,6 @@
use crate::db::{catalog::Catalog, system_tables::IoxSystemTable};
use arrow::{
array::{ArrayRef, StringBuilder, UInt64Builder},
array::{ArrayRef, StringArray, StringBuilder, UInt64Array},
datatypes::{DataType, Field, Schema, SchemaRef},
error::Result,
record_batch::RecordBatch,
@ -8,7 +8,7 @@ use arrow::{
use data_types::{
chunk_metadata::DetailedChunkSummary,
error::ErrorLogger,
partition_metadata::{PartitionSummary, TableSummary},
partition_metadata::{ColumnSummary, PartitionSummary, TableSummary},
};
use std::{collections::HashMap, sync::Arc};
@ -91,7 +91,7 @@ fn from_partition_summaries(
)
}
/// Implementation of system.column_chunks table
/// Implementation of `system.chunk_columns` table
#[derive(Debug)]
pub(super) struct ChunkColumnsTable {
schema: SchemaRef,
@ -137,79 +137,118 @@ fn assemble_chunk_columns(
schema: SchemaRef,
chunk_summaries: Vec<(Arc<TableSummary>, DetailedChunkSummary)>,
) -> Result<RecordBatch> {
/// Builds an index from column_name -> size
fn make_column_index(summary: &DetailedChunkSummary) -> HashMap<&str, u64> {
summary
.columns
.iter()
.map(|column_summary| {
(
column_summary.name.as_ref(),
column_summary.memory_bytes as u64,
)
})
.collect()
// Create an iterator over each column in each table in each chunk
// so we can build `chunk_columns` column by column
struct EachColumn<'a> {
chunk_summary: &'a DetailedChunkSummary,
column_summary: &'a ColumnSummary,
}
// Assume each chunk has roughly 5 columns
let row_estimate = chunk_summaries.len() * 5;
let rows = chunk_summaries
.iter()
.map(|(table_summary, chunk_summary)| {
table_summary
.columns
.iter()
.map(move |column_summary| EachColumn {
chunk_summary,
column_summary,
})
})
.flatten()
.collect::<Vec<_>>();
let mut partition_key = StringBuilder::new(row_estimate);
let mut chunk_id = StringBuilder::new(row_estimate);
let mut table_name = StringBuilder::new(row_estimate);
let mut column_name = StringBuilder::new(row_estimate);
let mut storage = StringBuilder::new(row_estimate);
let mut row_count = UInt64Builder::new(row_estimate);
let mut null_count = UInt64Builder::new(row_estimate);
let mut min_values = StringBuilder::new(row_estimate);
let mut max_values = StringBuilder::new(row_estimate);
let mut memory_bytes = UInt64Builder::new(row_estimate);
let partition_key = rows
.iter()
.map(|each| each.chunk_summary.inner.partition_key.as_ref())
.map(Some)
.collect::<StringArray>();
// Note no rows are produced for partitions with no chunks, or
// tables with no partitions: There are other tables to list tables
// and columns
for (table_summary, chunk_summary) in chunk_summaries {
let mut column_index = make_column_index(&chunk_summary);
let storage_value = chunk_summary.inner.storage.as_str();
let chunk_id = rows
.iter()
.map(|each| each.chunk_summary.inner.id.get().to_string())
.map(Some)
.collect::<StringArray>();
for column in &table_summary.columns {
partition_key.append_value(chunk_summary.inner.partition_key.as_ref())?;
chunk_id.append_value(chunk_summary.inner.id.get().to_string())?;
table_name.append_value(&chunk_summary.inner.table_name)?;
column_name.append_value(&column.name)?;
storage.append_value(storage_value)?;
row_count.append_value(column.total_count())?;
null_count.append_value(column.null_count())?;
if let Some(v) = column.stats.min_as_str() {
min_values.append_value(v)?;
} else {
min_values.append(false)?;
}
if let Some(v) = column.stats.max_as_str() {
max_values.append_value(v)?;
} else {
max_values.append(false)?;
}
let table_name = rows
.iter()
.map(|each| each.chunk_summary.inner.table_name.as_ref())
.map(Some)
.collect::<StringArray>();
let size = column_index.remove(column.name.as_str());
let column_name = rows
.iter()
.map(|each| each.column_summary.name.as_str())
.map(Some)
.collect::<StringArray>();
memory_bytes.append_option(size)?;
}
}
let storage = rows
.iter()
.map(|each| each.chunk_summary.inner.storage.as_str())
.map(Some)
.collect::<StringArray>();
let row_count = rows
.iter()
.map(|each| each.column_summary.total_count())
.map(Some)
.collect::<UInt64Array>();
let null_count = rows
.iter()
.map(|each| each.column_summary.null_count())
.map(Some)
.collect::<UInt64Array>();
let min_values = rows
.iter()
.map(|each| each.column_summary.stats.min_as_str())
.collect::<StringArray>();
let max_values = rows
.iter()
.map(|each| each.column_summary.stats.max_as_str())
.collect::<StringArray>();
// handle memory bytes specially to avoid having to search for
// each column in ColumnSummary
let memory_bytes = chunk_summaries
.iter()
.map(|(table_summary, chunk_summary)| {
// Don't assume column order in DetailedColumnSummary are
// consistent with ColumnSummary
let mut column_sizes = chunk_summary
.columns
.iter()
.map(|column_summary| {
(
column_summary.name.as_ref(),
column_summary.memory_bytes as u64,
)
})
.collect::<HashMap<_, _>>();
table_summary
.columns
.iter()
.map(move |column_summary| column_sizes.remove(column_summary.name.as_str()))
})
.flatten()
.collect::<UInt64Array>();
RecordBatch::try_new(
schema,
vec![
Arc::new(partition_key.finish()) as ArrayRef,
Arc::new(chunk_id.finish()),
Arc::new(table_name.finish()),
Arc::new(column_name.finish()),
Arc::new(storage.finish()),
Arc::new(row_count.finish()),
Arc::new(null_count.finish()),
Arc::new(min_values.finish()),
Arc::new(max_values.finish()),
Arc::new(memory_bytes.finish()),
Arc::new(partition_key) as ArrayRef,
Arc::new(chunk_id),
Arc::new(table_name),
Arc::new(column_name),
Arc::new(storage),
Arc::new(row_count),
Arc::new(null_count),
Arc::new(min_values),
Arc::new(max_values),
Arc::new(memory_bytes),
],
)
}

View File

@ -1,3 +1,4 @@
use crate::db::catalog::chunk::CatalogChunk;
use mutable_batch::PartitionWrite;
/// A [`WriteFilter`] provides the ability to mask rows from a [`PartitionWrite`]
@ -27,3 +28,21 @@ impl WriteFilter for WriteFilterNone {
Some(write)
}
}
/// A [`DeleteFilter`] provides the ability to exclude chunks from having a delete applied
///
/// This is important for replay where it needs to prevent deletes from being applied to chunks
/// containing writes sequenced after the delete
pub trait DeleteFilter: Copy {
/// Returns true if the delete should be applied to this chunk
fn filter_chunk(&self, chunk: &CatalogChunk) -> bool;
}
#[derive(Debug, Default, Copy, Clone)]
pub struct DeleteFilterNone {}
impl DeleteFilter for DeleteFilterNone {
fn filter_chunk(&self, _chunk: &CatalogChunk) -> bool {
true
}
}

View File

@ -127,29 +127,11 @@ impl JobRegistryMetrics {
fn duration_histogram_options() -> metric::DurationHistogramOptions {
metric::DurationHistogramOptions::new(vec![
Duration::from_millis(5),
Duration::from_millis(10),
Duration::from_millis(25),
Duration::from_millis(50),
Duration::from_millis(100),
Duration::from_millis(250),
Duration::from_millis(500),
Duration::from_millis(1000),
Duration::from_millis(2500),
Duration::from_millis(5000),
Duration::from_millis(10000),
Duration::from_millis(1_000),
Duration::from_millis(2_500),
Duration::from_millis(5_000),
Duration::from_millis(10_000),
Duration::from_millis(25_000),
Duration::from_millis(50_000),
Duration::from_millis(100_000),
Duration::from_millis(250_000),
Duration::from_millis(500_000),
Duration::from_millis(1_000_000),
Duration::from_millis(2_500_000),
Duration::from_millis(5_000_000),
Duration::from_secs(1),
Duration::from_secs(10),
Duration::from_secs(100),
metric::DURATION_MAX,
])
}
@ -213,9 +195,6 @@ impl JobRegistryMetrics {
if let Some(db_name) = metadata.db_name() {
attributes.insert("db_name", db_name.to_string());
}
if let Some(table) = metadata.table_name() {
attributes.insert("table", table.to_string());
}
attributes
}

View File

@ -1875,6 +1875,95 @@ mod tests {
new_loc_db.wait_for_init().await.unwrap();
}
#[tokio::test]
async fn old_server_config_object_store_path() {
let application = make_application();
let server_id = ServerId::try_from(1).unwrap();
let object_store = application.object_store();
// Server config used to be stored under /[server id]/config.pb. Construct a config in that
// old location that points to a database
let mut old_server_config_path = object_store.new_path();
old_server_config_path.push_dir(&server_id.to_string());
old_server_config_path.set_file_name("config.pb");
// Create database rules and database owner info for a database in object storage
let db_uuid = Uuid::new_v4();
let db_name = DatabaseName::new("mydb").unwrap();
let db_rules = DatabaseRules::new(db_name.clone());
let mut db_path = object_store.new_path();
db_path.push_dir("dbs");
db_path.push_dir(db_uuid.to_string());
let mut db_rules_path = db_path.clone();
db_rules_path.set_file_name("rules.pb");
let persisted_database_rules = management::v1::PersistedDatabaseRules {
uuid: db_uuid.as_bytes().to_vec(),
rules: Some(db_rules.into()),
};
let mut encoded_rules = bytes::BytesMut::new();
generated_types::database_rules::encode_persisted_database_rules(
&persisted_database_rules,
&mut encoded_rules,
)
.unwrap();
let encoded_rules = encoded_rules.freeze();
object_store
.put(&db_rules_path, encoded_rules)
.await
.unwrap();
let mut db_owner_info_path = db_path.clone();
db_owner_info_path.set_file_name("owner.pb");
let owner_info = management::v1::OwnerInfo {
id: server_id.get_u32(),
location: old_server_config_path.to_string(),
transactions: vec![],
};
let mut encoded_owner_info = bytes::BytesMut::new();
generated_types::server_config::encode_database_owner_info(
&owner_info,
&mut encoded_owner_info,
)
.unwrap();
let encoded_owner_info = encoded_owner_info.freeze();
object_store
.put(&db_owner_info_path, encoded_owner_info)
.await
.unwrap();
let config = management::v1::ServerConfig {
databases: [(db_name.to_string(), db_path.to_raw())]
.into_iter()
.collect(),
};
let mut encoded_server_config = bytes::BytesMut::new();
generated_types::server_config::encode_persisted_server_config(
&config,
&mut encoded_server_config,
)
.unwrap();
let encoded_server_config = encoded_server_config.freeze();
object_store
.put(&old_server_config_path, encoded_server_config)
.await
.unwrap();
// Start up server
let server = make_server(Arc::clone(&application));
server.set_id(server_id).unwrap();
server.wait_for_init().await.unwrap();
// Database should init
let database = server.database(&db_name).unwrap();
database.wait_for_init().await.unwrap();
// Server config should be transitioned to the new location
let config = server_config(application.object_store(), server_id).await;
assert_config_contents(&config, &[(&db_name, format!("dbs/{}/", db_uuid))]);
}
#[tokio::test]
async fn db_names_sorted() {
let server = make_server(make_application());
@ -2232,7 +2321,7 @@ mod tests {
let baz_iox_object_store = baz.iox_object_store().unwrap();
let owner_info = management::v1::OwnerInfo {
id: 2,
location: "2/config.pb".to_string(),
location: "nodes/2/config.pb".to_string(),
transactions: vec![],
};
let mut encoded = bytes::BytesMut::new();

View File

@ -10,3 +10,4 @@ parking_lot = "0.11.2"
tempfile = "3.1.0"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
observability_deps = { path = "../observability_deps" }
workspace-hack = { path = "../workspace-hack"}

View File

@ -8,5 +8,6 @@ description = "Time functionality for IOx"
chrono = "0.4"
parking_lot = "0.11"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]

View File

@ -11,5 +11,6 @@ chrono = "0.4"
observability_deps = { path = "../observability_deps" }
parking_lot = "0.11"
rand = "0.8"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]

View File

@ -16,5 +16,6 @@ structopt = { version = "0.3.25" }
thrift = { version = "0.13.0" }
tokio = { version = "1.13", features = ["macros", "time", "sync", "rt"] }
trace = { path = "../trace" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]

View File

@ -19,5 +19,6 @@ parking_lot = "0.11"
pin-project = "1.0"
snafu = "0.6"
tower = "0.4"
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]

View File

@ -17,6 +17,7 @@ pin-project = "1.0"
time = { path = "../time" }
tokio = { version = "1.13", features = ["macros", "time"] }
tokio-util = { version = "0.6.9" }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
# Need the multi-threaded executor for testing

4
workspace-hack/.gitattributes vendored Normal file
View File

@ -0,0 +1,4 @@
# Avoid putting conflict markers in the generated Cargo.toml file, since their presence breaks
# Cargo.
# Also do not check out the file as CRLF on Windows, as that's what hakari needs.
Cargo.toml merge=binary -crlf

76
workspace-hack/Cargo.toml Normal file
View File

@ -0,0 +1,76 @@
# This file is generated by `cargo hakari`.
# To regenerate, run:
# cargo hakari generate
[package]
name = "workspace-hack"
version = "0.1.0"
description = "workspace-hack package, managed by hakari"
publish = false
# The parts of the file between the BEGIN HAKARI SECTION and END HAKARI SECTION comments
# are managed by hakari.
### BEGIN HAKARI SECTION
[dependencies]
ahash = { version = "0.7", features = ["std"] }
bytes = { version = "1", features = ["std"] }
chrono = { version = "0.4", features = ["clock", "libc", "oldtime", "serde", "std", "time", "winapi"] }
clap = { version = "2", features = ["ansi_term", "atty", "color", "strsim", "suggestions", "vec_map"] }
either = { version = "1", features = ["use_std"] }
futures = { version = "0.3", features = ["alloc", "async-await", "executor", "futures-executor", "std"] }
futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] }
futures-core = { version = "0.3", features = ["alloc", "std"] }
futures-io = { version = "0.3", default-features = false, features = ["std"] }
futures-sink = { version = "0.3", features = ["alloc", "std"] }
futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] }
futures-util = { version = "0.3", features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "proc-macro-hack", "proc-macro-nested", "sink", "slab", "std"] }
getrandom = { version = "0.2", default-features = false, features = ["js", "js-sys", "std", "wasm-bindgen"] }
hashbrown = { version = "0.11", features = ["ahash", "inline-more", "raw"] }
hyper = { version = "0.14", features = ["client", "full", "h2", "http1", "http2", "runtime", "server", "socket2", "stream", "tcp"] }
indexmap = { version = "1", default-features = false, features = ["std"] }
itoa = { version = "0.4", features = ["i128", "std"] }
libc = { version = "0.2", features = ["extra_traits", "std"] }
log = { version = "0.4", default-features = false, features = ["std"] }
memchr = { version = "2", features = ["std", "use_std"] }
num-bigint = { version = "0.4", features = ["std"] }
num-integer = { version = "0.1", default-features = false, features = ["i128", "std"] }
num-traits = { version = "0.2", features = ["i128", "libm", "std"] }
once_cell = { version = "1", features = ["alloc", "parking_lot", "race", "std"] }
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
regex-automata = { version = "0.1", features = ["regex-syntax", "std"] }
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
reqwest = { version = "0.11", features = ["__rustls", "__tls", "blocking", "default-tls", "hyper-rustls", "hyper-tls", "json", "native-tls-crate", "rustls", "rustls-tls", "rustls-tls-webpki-roots", "serde_json", "stream", "tokio-native-tls", "tokio-rustls", "webpki-roots"] }
serde = { version = "1", features = ["derive", "rc", "serde_derive", "std"] }
serde_json = { version = "1", features = ["indexmap", "preserve_order", "std"] }
smallvec = { version = "1", default-features = false, features = ["union"] }
tokio = { version = "1", features = ["bytes", "fs", "full", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "parking_lot", "process", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "sync", "time", "tokio-macros", "winapi"] }
tokio-stream = { version = "0.1", features = ["net", "time"] }
tokio-util = { version = "0.6", features = ["codec", "io"] }
tower = { version = "0.4", features = ["balance", "buffer", "discover", "futures-util", "indexmap", "limit", "load", "log", "make", "rand", "ready-cache", "slab", "timeout", "tokio", "tokio-stream", "tokio-util", "tracing", "util"] }
tracing = { version = "0.1", features = ["attributes", "log", "max_level_trace", "release_max_level_debug", "std", "tracing-attributes"] }
tracing-core = { version = "0.1", features = ["lazy_static", "std"] }
tracing-subscriber = { version = "0.3", features = ["alloc", "ansi", "ansi_term", "env-filter", "fmt", "lazy_static", "matchers", "regex", "registry", "sharded-slab", "smallvec", "std", "thread_local", "tracing", "tracing-log"] }
url = { version = "2", default-features = false, features = ["serde"] }
uuid = { version = "0.8", features = ["getrandom", "serde", "std", "v4"] }
[build-dependencies]
ahash = { version = "0.7", features = ["std"] }
bytes = { version = "1", features = ["std"] }
cc = { version = "1", default-features = false, features = ["jobserver", "parallel"] }
clap = { version = "2", features = ["ansi_term", "atty", "color", "strsim", "suggestions", "vec_map"] }
either = { version = "1", features = ["use_std"] }
getrandom = { version = "0.2", default-features = false, features = ["js", "js-sys", "std", "wasm-bindgen"] }
hashbrown = { version = "0.11", features = ["ahash", "inline-more", "raw"] }
indexmap = { version = "1", default-features = false, features = ["std"] }
libc = { version = "0.2", features = ["extra_traits", "std"] }
log = { version = "0.4", default-features = false, features = ["std"] }
memchr = { version = "2", features = ["std", "use_std"] }
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
serde = { version = "1", features = ["derive", "rc", "serde_derive", "std"] }
syn = { version = "1", features = ["clone-impls", "derive", "extra-traits", "full", "parsing", "printing", "proc-macro", "quote", "visit", "visit-mut"] }
### END HAKARI SECTION

21
workspace-hack/README.md Normal file
View File

@ -0,0 +1,21 @@
# workspace-hack
This crate is a "workspace hack" crate managed by [`cargo hakari`][hakari].
Its purpose is to unify the features used by all crates in the workspace so that the crates share
more dependencies and rebuild crates less. There are more details in [hakari's
documentation][hakari-docs].
[hakari]: https://crates.io/crates/cargo-hakari
[hakari-docs]: https://docs.rs/cargo-hakari/0.9.6/cargo_hakari/about/index.html
## CI failures
If the `workspace_hack_checks` CI job is failing, there are two possible reasons and solutions:
- If `cargo hakari generate --diff` fails, that means a crate has started or stopped using a
feature of some crate and that feature isn't up-to-date in the `workspace-hack` crate. To fix
this, run `cargo hakari generate` and commit the changes.
- If `cargo hakari manage-deps --dry-run` fails, that means a crate in the workspace isn't
depending on the `workspace-hack` crate. To fix this, run `cargo hakari manage-deps` and commit
the changes.

2
workspace-hack/build.rs Normal file
View File

@ -0,0 +1,2 @@
// A build script is required for cargo to consider build dependencies.
fn main() {}

View File

@ -0,0 +1 @@
// This is a dummy lib.rs.

View File

@ -25,6 +25,7 @@ tokio = { version = "1.13", features = ["macros", "fs"] }
trace = { path = "../trace" }
trace_http = { path = "../trace_http" }
uuid = { version = "0.8", features = ["serde", "v4"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies]
tempfile = "3.1.0"

View File

@ -11,7 +11,7 @@ use parking_lot::Mutex;
use data_types::sequence::Sequence;
use data_types::write_buffer::WriteBufferCreationConfig;
use dml::{DmlMeta, DmlOperation, DmlWrite};
use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
use time::TimeProvider;
use crate::core::{
@ -108,14 +108,36 @@ impl MockBufferSharedState {
.collect()
}
/// Push a new delete to the specified sequencer
///
/// # Panics
/// - when delete is not sequenced
/// - when no sequencer was initialized
/// - when specified sequencer does not exist
/// - when sequence number in entry is not larger the current maximum
pub fn push_delete(&self, delete: DmlDelete) {
self.push_operation(DmlOperation::Delete(delete))
}
/// Push a new entry to the specified sequencer.
///
/// # Panics
/// - when given entry is not sequenced
/// - when write is not sequenced
/// - when no sequencer was initialized
/// - when specified sequencer does not exist
/// - when sequence number in entry is not larger the current maximum
pub fn push_write(&self, write: DmlWrite) {
self.push_operation(DmlOperation::Write(write))
}
/// Push a new operation to the specified sequencer
///
/// # Panics
/// - when operation is not sequenced
/// - when no sequencer was initialized
/// - when specified sequencer does not exist
/// - when sequence number in entry is not larger the current maximum
pub fn push_operation(&self, write: DmlOperation) {
let sequence = write.meta().sequence().expect("write must be sequenced");
assert!(
write.meta().producer_ts().is_some(),
@ -135,7 +157,7 @@ impl MockBufferSharedState {
);
}
writes_vec.push(Ok(DmlOperation::Write(write)));
writes_vec.push(Ok(write));
}
/// Push line protocol data with placeholder values used for write metadata