Merge branch 'main' into ntran/compact_os_framework
commit
b5d6e201e6
|
@ -1321,9 +1321,9 @@ checksum = "ac5956d4e63858efaec57e0d6c1c2f6a41e1487f830314a324ccd7e2223a7ca0"
|
|||
|
||||
[[package]]
|
||||
name = "handlebars"
|
||||
version = "4.1.4"
|
||||
version = "4.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1874024f4a29f47d609014caec0b1c866f1c1eb0661a09c9733ecc4757f5f88"
|
||||
checksum = "8ad84da8f63da982543fc85fcabaee2ad1fdd809d99d64a48887e2e942ddfe46"
|
||||
dependencies = [
|
||||
"log",
|
||||
"pest",
|
||||
|
@ -1434,9 +1434,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
|||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.14"
|
||||
version = "0.14.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b91bb1f221b6ea1f1e4371216b70f40748774c2fb5971b450c07773fb92d26b"
|
||||
checksum = "436ec0091e4f20e655156a30a0df3770fe2900aa301e548e08446ec794b6953c"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
|
@ -1914,9 +1914,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.106"
|
||||
version = "0.2.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673"
|
||||
checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
|
@ -3801,9 +3801,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.70"
|
||||
version = "1.0.71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e277c495ac6cd1a01a58d0a0c574568b4d1ddf14f59965c6a58b8d96400b54f3"
|
||||
checksum = "063bf466a64011ac24040a49009724ee60a57da1b437617ceb32e53ad61bfb19"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
|
@ -4530,9 +4530,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.4.10"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c00e500fff5fa1131c866b246041a6bf96da9c965f8fe4128cb1421f23e93c00"
|
||||
checksum = "5651b5f6860a99bd1adb59dbfe1db8beb433e73709d9032b413a77e2fb7c066a"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
|
@ -4670,9 +4670,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.1"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80a4ddde70311d8da398062ecf6fc2c309337de6b0f77d6c27aff8d53f6fca52"
|
||||
checksum = "7507ec620f809cdf07cccb5bc57b13069a88031b795efd4079b1c71b66c1613d"
|
||||
dependencies = [
|
||||
"ansi_term 0.12.1",
|
||||
"lazy_static",
|
||||
|
|
|
@ -9,7 +9,7 @@ bytes = "1.0"
|
|||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", features = ["stream", "json"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
snafu = "0.6.6"
|
||||
url = "2.1.1"
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ pprof = { version = "^0.5", default-features = false, features = ["flamegraph",
|
|||
prost = "0.8"
|
||||
rustyline = { version = "9.0", default-features = false }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
serde_urlencoded = "0.7.0"
|
||||
snafu = "0.6.9"
|
||||
structopt = "0.3.25"
|
||||
|
|
|
@ -27,7 +27,7 @@ mutable_batch_pb = { path = "../mutable_batch_pb", optional = true }
|
|||
prost = "0.8"
|
||||
rand = "0.8.3"
|
||||
serde = "1.0.128"
|
||||
serde_json = { version = "1.0.70", optional = true }
|
||||
serde_json = { version = "1.0.71", optional = true }
|
||||
thiserror = "1.0.30"
|
||||
tonic = { version = "0.5.0" }
|
||||
uuid = { version = "0.8", features = ["v4"] }
|
||||
|
|
|
@ -10,7 +10,7 @@ chrono = "0.4.13"
|
|||
chrono-english = "0.1.4"
|
||||
clap = "2.33.1"
|
||||
futures = "0.3.5"
|
||||
handlebars = "4.1.4"
|
||||
handlebars = "4.1.5"
|
||||
humantime = "2.1.0"
|
||||
data_types = { path = "../data_types" }
|
||||
generated_types = { path = "../generated_types" }
|
||||
|
@ -19,12 +19,12 @@ influxdb_iox_client = { path = "../influxdb_iox_client" }
|
|||
itertools = "0.10.0"
|
||||
rand = { version = "0.8.3", features = ["small_rng"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
snafu = "0.6.8"
|
||||
tokio = { version = "1.13", features = ["macros", "rt-multi-thread"] }
|
||||
toml = "0.5.6"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3.1"
|
||||
tracing-subscriber = "0.3.2"
|
||||
uuid = { version = "0.8.1", default_features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -155,7 +155,7 @@ impl CatalogState for TracerCatalogState {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_helpers::{make_config, new_empty};
|
||||
use parquet_file::test_utils::{chunk_addr, make_metadata, TestSize};
|
||||
use parquet_file::test_utils::generator::ChunkGenerator;
|
||||
use std::{collections::HashSet, sync::Arc};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
|
@ -176,6 +176,7 @@ mod tests {
|
|||
async fn test_cleanup_rules() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
|
||||
|
@ -186,36 +187,20 @@ mod tests {
|
|||
let mut transaction = catalog.open_transaction().await;
|
||||
|
||||
// an ordinary tracked parquet file => keep
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
|
||||
let metadata = Arc::new(metadata);
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata,
|
||||
};
|
||||
|
||||
transaction.add_parquet(&info);
|
||||
paths_keep.push(info.path);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
paths_keep.push(chunk.path().clone());
|
||||
|
||||
// another ordinary tracked parquet file that was added and removed => keep (for time
|
||||
// travel)
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(2), TestSize::Full).await;
|
||||
let metadata = Arc::new(metadata);
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata,
|
||||
};
|
||||
transaction.add_parquet(&info);
|
||||
transaction.remove_parquet(&info.path);
|
||||
paths_keep.push(info.path);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.remove_parquet(chunk.path());
|
||||
paths_keep.push(chunk.path().clone());
|
||||
|
||||
// an untracked parquet file => delete
|
||||
let (path, _md) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
|
||||
paths_delete.push(path);
|
||||
let (chunk, _) = generator.generate().await;
|
||||
paths_delete.push(chunk.path().clone());
|
||||
|
||||
transaction.commit().await.unwrap();
|
||||
}
|
||||
|
@ -224,6 +209,7 @@ mod tests {
|
|||
let files = get_unreferenced_parquet_files(&catalog, 1_000)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
delete_files(&catalog, &files).await.unwrap();
|
||||
|
||||
// deleting a second time should just work
|
||||
|
@ -243,39 +229,33 @@ mod tests {
|
|||
async fn test_cleanup_with_parallel_transaction() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
let lock: RwLock<()> = Default::default();
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
|
||||
// try multiple times to provoke a conflict
|
||||
for i in 0..100 {
|
||||
for i in 1..100 {
|
||||
// Every so often try to create a file with the same ChunkAddr beforehand. This should
|
||||
// not trick the cleanup logic to remove the actual file because file paths contains a
|
||||
// UUIDv4 part.
|
||||
if i % 2 == 0 {
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
|
||||
generator.generate_id(i).await;
|
||||
}
|
||||
|
||||
let (path, _) = tokio::join!(
|
||||
let (chunk, _) = tokio::join!(
|
||||
async {
|
||||
let guard = lock.read().await;
|
||||
let (path, md) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await;
|
||||
|
||||
let metadata = Arc::new(md);
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata,
|
||||
};
|
||||
let (chunk, _) = generator.generate_id(i).await;
|
||||
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
transaction.add_parquet(&info);
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.commit().await.unwrap();
|
||||
|
||||
drop(guard);
|
||||
|
||||
info.path
|
||||
chunk
|
||||
},
|
||||
async {
|
||||
let guard = lock.write().await;
|
||||
|
@ -289,7 +269,7 @@ mod tests {
|
|||
);
|
||||
|
||||
let all_files = list_all_files(iox_object_store).await;
|
||||
assert!(dbg!(all_files).contains(dbg!(&path)));
|
||||
assert!(dbg!(all_files).contains(dbg!(chunk.path())));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,20 +277,15 @@ mod tests {
|
|||
async fn test_cleanup_max_files() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
|
||||
// create some files
|
||||
let mut to_remove = HashSet::default();
|
||||
for chunk_id in 0..3 {
|
||||
let (path, _md) = make_metadata(
|
||||
iox_object_store,
|
||||
"foo",
|
||||
chunk_addr(chunk_id),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
to_remove.insert(path);
|
||||
for _ in 0..3 {
|
||||
let (chunk, _) = generator.generate().await;
|
||||
to_remove.insert(chunk.path().clone());
|
||||
}
|
||||
|
||||
// run clean-up
|
||||
|
|
|
@ -1064,7 +1064,10 @@ mod tests {
|
|||
use std::vec;
|
||||
|
||||
use bytes::Bytes;
|
||||
use parquet_file::test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize};
|
||||
use data_types::chunk_metadata::ChunkAddr;
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use parquet_file::test_utils::generator::ChunkGenerator;
|
||||
use parquet_file::test_utils::make_iox_object_store;
|
||||
|
||||
use super::*;
|
||||
use crate::test_helpers::{
|
||||
|
@ -1642,6 +1645,7 @@ mod tests {
|
|||
async fn test_checkpoint() {
|
||||
let config = make_config().await;
|
||||
let mut trace = assert_single_catalog_inmem_works(config.clone()).await;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(&config.iox_object_store));
|
||||
|
||||
// re-open catalog
|
||||
let (catalog, mut state) = load_ok(config.clone()).await.unwrap();
|
||||
|
@ -1659,21 +1663,10 @@ mod tests {
|
|||
|
||||
// create another transaction on-top that adds a file (this transaction will be required to load the full state)
|
||||
{
|
||||
let addr = chunk_addr(1337);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"foo",
|
||||
addr.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(1337).await;
|
||||
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
state.insert(info.clone()).unwrap();
|
||||
transaction.add_parquet(&info);
|
||||
let ckpt_handle = transaction.commit().await.unwrap();
|
||||
|
@ -1713,6 +1706,7 @@ mod tests {
|
|||
async fn test_delete_predicates() {
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
let mut state = TestCatalogState::default();
|
||||
|
@ -1722,16 +1716,11 @@ mod tests {
|
|||
|
||||
// create 3 chunks
|
||||
let mut chunk_addrs = vec![];
|
||||
for id in 0..3 {
|
||||
let chunk_addr = chunk_addr(id);
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr.clone(), TestSize::Full)
|
||||
.await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
for _ in 0..3 {
|
||||
let (chunk, metadata) = generator.generate().await;
|
||||
let chunk_addr = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
|
@ -1819,6 +1808,29 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// Assert that set of parquet files tracked by a catalog are identical to the given sorted list.
|
||||
fn assert_catalog_chunks(state: &TestCatalogState, expected: &[ParquetChunk]) {
|
||||
let actual = get_catalog_parquet_files(state);
|
||||
let mut expected: Vec<_> = expected.iter().collect();
|
||||
expected.sort_by(|a, b| a.path().cmp(b.path()));
|
||||
|
||||
for ((actual_path, actual_md), chunk) in actual.iter().zip(expected.iter()) {
|
||||
assert_eq!(actual_path, chunk.path());
|
||||
|
||||
let actual_md = actual_md.decode().unwrap();
|
||||
|
||||
let actual_schema = actual_md.read_schema().unwrap();
|
||||
let expected_schema = chunk.schema();
|
||||
assert_eq!(actual_schema, expected_schema);
|
||||
|
||||
// NOTE: the actual table name is not important here as long as it is the same for both calls, since it is
|
||||
// only used to generate out statistics struct (not to read / dispatch anything).
|
||||
let actual_stats = actual_md.read_statistics(&actual_schema).unwrap();
|
||||
let expected_stats = &chunk.table_summary().columns;
|
||||
assert_eq!(&actual_stats, expected_stats);
|
||||
}
|
||||
}
|
||||
|
||||
async fn checked_delete(iox_object_store: &IoxObjectStore, path: &TransactionFilePath) {
|
||||
// issue full GET operation to check if object is preset
|
||||
iox_object_store
|
||||
|
@ -1872,6 +1884,7 @@ mod tests {
|
|||
|
||||
async fn assert_single_catalog_inmem_works(config: PreservedCatalogConfig) -> TestTrace {
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
let catalog = new_empty(config.clone()).await;
|
||||
let mut state = TestCatalogState::default();
|
||||
|
||||
|
@ -1889,102 +1902,56 @@ mod tests {
|
|||
{
|
||||
let mut t = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "bar", chunk_addr(1), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "bar", chunk_addr(2), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
for _ in 0..4 {
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
expected.push(chunk);
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
}
|
||||
|
||||
t.commit().await.unwrap();
|
||||
}
|
||||
assert_eq!(catalog.revision_counter(), 1);
|
||||
assert_catalog_parquet_files(&state, &expected);
|
||||
assert_catalog_chunks(&state, &expected);
|
||||
trace.record(&catalog, &state, false);
|
||||
|
||||
// modify catalog with examples
|
||||
{
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(4), TestSize::Full).await;
|
||||
expected.push((path.clone(), metadata.clone()));
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
expected.push(chunk);
|
||||
|
||||
let mut t = catalog.open_transaction().await;
|
||||
|
||||
// "real" modifications
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
state.insert(info.clone()).unwrap();
|
||||
t.add_parquet(&info);
|
||||
|
||||
let (path, _) = expected.remove(0);
|
||||
state.remove(&path).unwrap();
|
||||
t.remove_parquet(&path);
|
||||
let chunk = expected.remove(0);
|
||||
state.remove(chunk.path()).unwrap();
|
||||
t.remove_parquet(chunk.path());
|
||||
|
||||
t.commit().await.unwrap();
|
||||
}
|
||||
assert_eq!(catalog.revision_counter(), 2);
|
||||
assert_catalog_parquet_files(&state, &expected);
|
||||
assert_catalog_chunks(&state, &expected);
|
||||
trace.record(&catalog, &state, false);
|
||||
|
||||
// uncommitted modifications have no effect
|
||||
{
|
||||
let mut t = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
|
||||
t.add_parquet(&info);
|
||||
t.remove_parquet(&expected[0].0);
|
||||
t.remove_parquet(expected[0].path());
|
||||
|
||||
// NO commit here!
|
||||
}
|
||||
assert_eq!(catalog.revision_counter(), 2);
|
||||
assert_catalog_parquet_files(&state, &expected);
|
||||
assert_catalog_chunks(&state, &expected);
|
||||
trace.record(&catalog, &state, true);
|
||||
|
||||
trace
|
||||
|
|
|
@ -222,7 +222,7 @@ impl Debug for Metadata {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::{core::PreservedCatalog, interface::CatalogParquetInfo, test_helpers::make_config};
|
||||
use parquet_file::test_utils::{chunk_addr, make_metadata, TestSize};
|
||||
use parquet_file::test_utils::generator::{ChunkGenerator, GeneratorConfig};
|
||||
use time::Time;
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -235,21 +235,15 @@ mod tests {
|
|||
.with_time_provider(time_provider);
|
||||
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
generator.set_config(GeneratorConfig::Simple);
|
||||
|
||||
// build catalog with some data
|
||||
let catalog = PreservedCatalog::new_empty(config.clone()).await.unwrap();
|
||||
{
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
transaction.add_parquet(&info);
|
||||
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.commit().await.unwrap();
|
||||
}
|
||||
|
||||
|
@ -304,11 +298,11 @@ File {
|
|||
"table1",
|
||||
"part1",
|
||||
],
|
||||
file_name: "00000000-0000-0000-0000-000000000000.parquet",
|
||||
file_name: "00000000-0000-0000-0000-000000000001.parquet",
|
||||
},
|
||||
),
|
||||
file_size_bytes: 33,
|
||||
metadata: b"metadata omitted (937 bytes)",
|
||||
file_size_bytes: 3052,
|
||||
metadata: b"metadata omitted (935 bytes)",
|
||||
},
|
||||
),
|
||||
),
|
||||
|
@ -352,21 +346,15 @@ File {
|
|||
.with_fixed_uuid(Uuid::nil())
|
||||
.with_time_provider(time_provider);
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
generator.set_config(GeneratorConfig::Simple);
|
||||
|
||||
// build catalog with some data
|
||||
let catalog = PreservedCatalog::new_empty(config.clone()).await.unwrap();
|
||||
{
|
||||
let (chunk, _) = generator.generate().await;
|
||||
let mut transaction = catalog.open_transaction().await;
|
||||
|
||||
let (path, metadata) =
|
||||
make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await;
|
||||
let info = CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
};
|
||||
transaction.add_parquet(&info);
|
||||
|
||||
transaction.add_parquet(&CatalogParquetInfo::from_chunk(&chunk));
|
||||
transaction.commit().await.unwrap();
|
||||
}
|
||||
|
||||
|
@ -426,11 +414,11 @@ File {
|
|||
"table1",
|
||||
"part1",
|
||||
],
|
||||
file_name: "00000000-0000-0000-0000-000000000000.parquet",
|
||||
file_name: "00000000-0000-0000-0000-000000000001.parquet",
|
||||
},
|
||||
),
|
||||
file_size_bytes: 33,
|
||||
metadata: b"metadata omitted (937 bytes)",
|
||||
file_size_bytes: 3052,
|
||||
metadata: b"metadata omitted (935 bytes)",
|
||||
},
|
||||
),
|
||||
),
|
||||
|
@ -460,7 +448,7 @@ File {
|
|||
table_name: "table1",
|
||||
partition_key: "part1",
|
||||
chunk_id: ChunkId(
|
||||
0,
|
||||
1,
|
||||
),
|
||||
partition_checkpoint: PartitionCheckpoint {
|
||||
table_name: "table1",
|
||||
|
@ -500,7 +488,7 @@ File {
|
|||
},
|
||||
},
|
||||
chunk_order: ChunkOrder(
|
||||
5,
|
||||
1,
|
||||
),
|
||||
},
|
||||
),
|
||||
|
|
|
@ -7,6 +7,7 @@ use std::{
|
|||
use data_types::chunk_metadata::{ChunkAddr, ChunkId};
|
||||
use data_types::delete_predicate::DeletePredicate;
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use snafu::Snafu;
|
||||
|
||||
use parquet_file::metadata::IoxParquetMetaData;
|
||||
|
@ -24,6 +25,17 @@ pub struct CatalogParquetInfo {
|
|||
pub metadata: Arc<IoxParquetMetaData>,
|
||||
}
|
||||
|
||||
impl CatalogParquetInfo {
|
||||
/// Creates a [`CatalogParquetInfo`] from a [`ParquetChunk`]
|
||||
pub fn from_chunk(chunk: &ParquetChunk) -> Self {
|
||||
Self {
|
||||
path: chunk.path().clone(),
|
||||
file_size_bytes: chunk.file_size_bytes(),
|
||||
metadata: chunk.parquet_metadata(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Same as [ChunkAddr] but w/o the database part.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct ChunkAddrWithoutDatabase {
|
||||
|
|
|
@ -10,11 +10,14 @@ use crate::{
|
|||
},
|
||||
};
|
||||
use data_types::delete_predicate::{DeleteExpr, DeletePredicate, Op, Scalar};
|
||||
use data_types::{chunk_metadata::ChunkId, timestamp::TimestampRange};
|
||||
use data_types::{
|
||||
chunk_metadata::{ChunkAddr, ChunkId},
|
||||
timestamp::TimestampRange,
|
||||
};
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath, TransactionFilePath};
|
||||
use parquet_file::{
|
||||
metadata::IoxParquetMetaData,
|
||||
test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize},
|
||||
chunk::ParquetChunk,
|
||||
test_utils::{generator::ChunkGenerator, make_iox_object_store},
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
use std::{
|
||||
|
@ -259,158 +262,107 @@ where
|
|||
F: Fn(&S) -> CheckpointData + Send,
|
||||
{
|
||||
let config = make_config().await;
|
||||
let iox_object_store = &config.iox_object_store;
|
||||
let mut generator = ChunkGenerator::new_with_store(Arc::clone(iox_object_store));
|
||||
|
||||
// The expected state of the catalog
|
||||
let mut expected_files: HashMap<ChunkId, (ParquetFilePath, Arc<IoxParquetMetaData>)> =
|
||||
HashMap::new();
|
||||
let mut expected_chunks: HashMap<u32, ParquetChunk> = HashMap::new();
|
||||
let mut expected_predicates: HashMap<Arc<DeletePredicate>, HashSet<ChunkAddrWithoutDatabase>> =
|
||||
HashMap::new();
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add files
|
||||
{
|
||||
for chunk_id in 0..5 {
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(chunk_id),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
for chunk_id in 1..5 {
|
||||
let (chunk, _) = generator.generate_id(chunk_id).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(ChunkId::new_test(chunk_id), (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(chunk_id, chunk);
|
||||
}
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// remove files
|
||||
{
|
||||
let (path, _) = expected_files.remove(&ChunkId::new_test(1)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&1).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add and remove in the same transaction
|
||||
{
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(5),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(5).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// remove and add in the same transaction
|
||||
{
|
||||
let (path, metadata) = expected_files.get(&ChunkId::new_test(3)).unwrap();
|
||||
state.remove(path).unwrap();
|
||||
let chunk = expected_chunks.get(&3).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(chunk),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add, remove, add in the same transaction
|
||||
{
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(6),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(6).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(ChunkId::new_test(6), (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(6, chunk);
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// remove, add, remove in same transaction
|
||||
{
|
||||
let (path, metadata) = expected_files.remove(&ChunkId::new_test(4)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&4).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(&metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// error handling, no real opt
|
||||
{
|
||||
// TODO: Error handling should disambiguate between chunk collision and filename collision
|
||||
|
||||
// chunk with same ID already exists (should also not change the metadata)
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"fail",
|
||||
chunk_addr(0),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(2).await;
|
||||
let err = state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(
|
||||
|
@ -418,21 +370,16 @@ where
|
|||
CatalogStateAddError::ParquetFileAlreadyExists { .. }
|
||||
));
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// error handling, still something works
|
||||
{
|
||||
// already exists (should also not change the metadata)
|
||||
let (_, metadata) = expected_files.get(&ChunkId::new_test(0)).unwrap();
|
||||
let (chunk, _) = generator.generate_id(2).await;
|
||||
let err = state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
// Intentionally "incorrect" path
|
||||
path: ParquetFilePath::new(&chunk_addr(10)),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(metadata),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(
|
||||
|
@ -441,97 +388,57 @@ where
|
|||
));
|
||||
|
||||
// this transaction will still work
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr(7),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let metadata = Arc::new(metadata);
|
||||
let (chunk, _) = generator.generate_id(7).await;
|
||||
let info = CatalogParquetInfo::from_chunk(&chunk);
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(&metadata),
|
||||
},
|
||||
)
|
||||
.add(Arc::clone(iox_object_store), info.clone())
|
||||
.unwrap();
|
||||
expected_files.insert(ChunkId::new_test(7), (path.clone(), Arc::clone(&metadata)));
|
||||
expected_chunks.insert(7, chunk);
|
||||
|
||||
// recently added
|
||||
let err = state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path,
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::clone(&metadata),
|
||||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
let err = state.add(Arc::clone(iox_object_store), info).unwrap_err();
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateAddError::ParquetFileAlreadyExists { .. }
|
||||
));
|
||||
|
||||
// this still works
|
||||
let (path, _) = expected_files.remove(&ChunkId::new_test(7)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&7).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
|
||||
// recently removed
|
||||
let err = state.remove(&path).unwrap_err();
|
||||
let err = state.remove(chunk.path()).unwrap_err();
|
||||
assert!(matches!(
|
||||
err,
|
||||
CatalogStateRemoveError::ParquetFileDoesNotExist { .. }
|
||||
));
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// add predicates
|
||||
{
|
||||
// create two chunks that we can use for delete predicate
|
||||
let chunk_addr_1 = chunk_addr(8);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr_1.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(chunk_addr_1.chunk_id, (path, Arc::new(metadata)));
|
||||
let (chunk, metadata) = generator.generate_id(8).await;
|
||||
let chunk_addr_1 = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
let chunk_addr_2 = chunk_addr(9);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr_2.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(chunk_addr_2.chunk_id, (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(8, chunk);
|
||||
|
||||
let (chunk, metadata) = generator.generate_id(9).await;
|
||||
let chunk_addr_2 = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
state
|
||||
.add(
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_chunks.insert(9, chunk);
|
||||
|
||||
// first predicate used only a single chunk
|
||||
let predicate_1 = create_delete_predicate(1);
|
||||
|
@ -546,32 +453,21 @@ where
|
|||
expected_predicates.insert(predicate_2, chunks_2.into_iter().collect());
|
||||
|
||||
// chunks created afterwards are unaffected
|
||||
let chunk_addr_3 = chunk_addr(10);
|
||||
let (path, metadata) = make_metadata(
|
||||
&config.iox_object_store,
|
||||
"ok",
|
||||
chunk_addr_3.clone(),
|
||||
TestSize::Full,
|
||||
)
|
||||
.await;
|
||||
let (chunk, _) = generator.generate_id(10).await;
|
||||
state
|
||||
.add(
|
||||
Arc::clone(&config.iox_object_store),
|
||||
CatalogParquetInfo {
|
||||
path: path.clone(),
|
||||
file_size_bytes: 33,
|
||||
metadata: Arc::new(metadata.clone()),
|
||||
},
|
||||
Arc::clone(iox_object_store),
|
||||
CatalogParquetInfo::from_chunk(&chunk),
|
||||
)
|
||||
.unwrap();
|
||||
expected_files.insert(chunk_addr_3.chunk_id, (path, Arc::new(metadata)));
|
||||
expected_chunks.insert(10, chunk);
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// removing a chunk will also remove its predicates
|
||||
{
|
||||
let (path, _) = expected_files.remove(&ChunkId::new_test(8)).unwrap();
|
||||
state.remove(&path).unwrap();
|
||||
let chunk = expected_chunks.remove(&8).unwrap();
|
||||
state.remove(chunk.path()).unwrap();
|
||||
expected_predicates = expected_predicates
|
||||
.into_iter()
|
||||
.filter_map(|(predicate, chunks)| {
|
||||
|
@ -583,7 +479,7 @@ where
|
|||
})
|
||||
.collect();
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
|
||||
// Registering predicates for unknown chunks is just ignored because chunks might been in "persisting" intermediate
|
||||
// state while the predicate was reported.
|
||||
|
@ -596,30 +492,30 @@ where
|
|||
}];
|
||||
state.delete_predicate(Arc::clone(&predicate), chunks);
|
||||
}
|
||||
assert_checkpoint(&state, &f, &expected_files, &expected_predicates);
|
||||
assert_checkpoint(&state, &f, &expected_chunks, &expected_predicates);
|
||||
}
|
||||
|
||||
/// Assert that tracked files and their linked metadata are equal.
|
||||
fn assert_checkpoint<S, F>(
|
||||
state: &S,
|
||||
f: &F,
|
||||
expected_files: &HashMap<ChunkId, (ParquetFilePath, Arc<IoxParquetMetaData>)>,
|
||||
expected_chunks: &HashMap<u32, ParquetChunk>,
|
||||
expected_predicates: &HashMap<Arc<DeletePredicate>, HashSet<ChunkAddrWithoutDatabase>>,
|
||||
) where
|
||||
F: Fn(&S) -> CheckpointData,
|
||||
{
|
||||
let data = f(state);
|
||||
let data: CheckpointData = f(state);
|
||||
let actual_files = data.files;
|
||||
|
||||
let sorted_keys_actual = get_sorted_keys(actual_files.keys());
|
||||
let sorted_keys_expected = get_sorted_keys(expected_files.values().map(|(path, _)| path));
|
||||
let sorted_keys_expected = get_sorted_keys(expected_chunks.values().map(|chunk| chunk.path()));
|
||||
assert_eq!(sorted_keys_actual, sorted_keys_expected);
|
||||
|
||||
for (path, md_expected) in expected_files.values() {
|
||||
let md_actual = &actual_files[path].metadata;
|
||||
for chunk in expected_chunks.values() {
|
||||
let md_actual = &actual_files[chunk.path()].metadata;
|
||||
|
||||
let md_actual = md_actual.decode().unwrap();
|
||||
let md_expected = md_expected.decode().unwrap();
|
||||
let md_expected = chunk.parquet_metadata().decode().unwrap();
|
||||
|
||||
let iox_md_actual = md_actual.read_iox_metadata().unwrap();
|
||||
let iox_md_expected = md_expected.read_iox_metadata().unwrap();
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
use crate::{
|
||||
chunk::{self, ChunkMetrics, ParquetChunk},
|
||||
metadata::{IoxMetadata, IoxParquetMetaData},
|
||||
chunk::{self, ParquetChunk},
|
||||
storage::Storage,
|
||||
};
|
||||
use arrow::{
|
||||
|
@ -12,12 +11,9 @@ use arrow::{
|
|||
record_batch::RecordBatch,
|
||||
};
|
||||
use data_types::{
|
||||
chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder},
|
||||
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary},
|
||||
partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics},
|
||||
server_id::ServerId,
|
||||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion_util::MemoryStream;
|
||||
use futures::TryStreamExt;
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use object_store::ObjectStore;
|
||||
|
@ -100,102 +96,6 @@ pub async fn load_parquet_from_store_for_path(
|
|||
Ok(parquet_data)
|
||||
}
|
||||
|
||||
/// The db name to use for testing
|
||||
pub fn db_name() -> &'static str {
|
||||
"db1"
|
||||
}
|
||||
|
||||
/// Creates a test chunk address for a given chunk id
|
||||
pub fn chunk_addr(id: u128) -> ChunkAddr {
|
||||
ChunkAddr {
|
||||
db_name: Arc::from(db_name()),
|
||||
table_name: Arc::from("table1"),
|
||||
partition_key: Arc::from("part1"),
|
||||
chunk_id: ChunkId::new_test(id),
|
||||
}
|
||||
}
|
||||
|
||||
/// Same as [`make_chunk`] but parquet file does not contain any row group.
|
||||
///
|
||||
/// TODO(raphael): Replace with ChunkGenerator
|
||||
pub async fn make_chunk(
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
column_prefix: &str,
|
||||
addr: ChunkAddr,
|
||||
test_size: TestSize,
|
||||
) -> ParquetChunk {
|
||||
let (record_batches, schema, column_summaries, _num_rows) =
|
||||
make_record_batch(column_prefix, test_size);
|
||||
make_chunk_given_record_batch(
|
||||
iox_object_store,
|
||||
record_batches,
|
||||
schema,
|
||||
addr,
|
||||
column_summaries,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Create a test chunk by writing data to object store.
|
||||
///
|
||||
/// TODO: This code creates a chunk that isn't hooked up with metrics
|
||||
///
|
||||
/// TODO(raphael): Replace with ChunkGenerator
|
||||
async fn make_chunk_given_record_batch(
|
||||
iox_object_store: Arc<IoxObjectStore>,
|
||||
record_batches: Vec<RecordBatch>,
|
||||
schema: Schema,
|
||||
addr: ChunkAddr,
|
||||
column_summaries: Vec<ColumnSummary>,
|
||||
) -> ParquetChunk {
|
||||
let storage = Storage::new(Arc::clone(&iox_object_store));
|
||||
|
||||
let table_summary = TableSummary {
|
||||
name: addr.table_name.to_string(),
|
||||
columns: column_summaries,
|
||||
};
|
||||
let stream: SendableRecordBatchStream = if record_batches.is_empty() {
|
||||
Box::pin(MemoryStream::new_with_schema(
|
||||
record_batches,
|
||||
Arc::clone(schema.inner()),
|
||||
))
|
||||
} else {
|
||||
Box::pin(MemoryStream::new(record_batches))
|
||||
};
|
||||
let (partition_checkpoint, database_checkpoint) = create_partition_and_database_checkpoint(
|
||||
Arc::clone(&addr.table_name),
|
||||
Arc::clone(&addr.partition_key),
|
||||
);
|
||||
let metadata = IoxMetadata {
|
||||
creation_timestamp: Time::from_timestamp(10, 20),
|
||||
table_name: Arc::clone(&addr.table_name),
|
||||
partition_key: Arc::clone(&addr.partition_key),
|
||||
chunk_id: addr.chunk_id,
|
||||
partition_checkpoint,
|
||||
database_checkpoint,
|
||||
time_of_first_write: Time::from_timestamp(30, 40),
|
||||
time_of_last_write: Time::from_timestamp(50, 60),
|
||||
chunk_order: ChunkOrder::new(5).unwrap(),
|
||||
};
|
||||
let (path, file_size_bytes, parquet_metadata) = storage
|
||||
.write_to_object_store(addr.clone(), stream, metadata)
|
||||
.await
|
||||
.unwrap();
|
||||
let rows = parquet_metadata.decode().unwrap().row_count();
|
||||
|
||||
ParquetChunk::new_from_parts(
|
||||
addr.partition_key,
|
||||
Arc::new(table_summary),
|
||||
Arc::new(schema),
|
||||
&path,
|
||||
Arc::clone(&iox_object_store),
|
||||
file_size_bytes,
|
||||
Arc::new(parquet_metadata),
|
||||
rows,
|
||||
ChunkMetrics::new_unregistered(),
|
||||
)
|
||||
}
|
||||
|
||||
fn create_column_tag(
|
||||
name: &str,
|
||||
data: Vec<Vec<Option<&str>>>,
|
||||
|
@ -888,25 +788,6 @@ pub fn read_data_from_parquet_data(schema: SchemaRef, parquet_data: Vec<u8>) ->
|
|||
record_batches
|
||||
}
|
||||
|
||||
/// Create test metadata by creating a parquet file and reading it back into memory.
|
||||
///
|
||||
/// TODO(raphael): Replace with ChunkGenerator
|
||||
pub async fn make_metadata(
|
||||
iox_object_store: &Arc<IoxObjectStore>,
|
||||
column_prefix: &str,
|
||||
addr: ChunkAddr,
|
||||
test_size: TestSize,
|
||||
) -> (ParquetFilePath, IoxParquetMetaData) {
|
||||
let chunk = make_chunk(Arc::clone(iox_object_store), column_prefix, addr, test_size).await;
|
||||
let parquet_data = load_parquet_from_store(&chunk, Arc::clone(iox_object_store))
|
||||
.await
|
||||
.unwrap();
|
||||
(
|
||||
chunk.path().clone(),
|
||||
IoxParquetMetaData::from_file_bytes(parquet_data).unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create [`PartitionCheckpoint`] and [`DatabaseCheckpoint`] for testing.
|
||||
pub fn create_partition_and_database_checkpoint(
|
||||
table_name: Arc<str>,
|
||||
|
|
|
@ -62,19 +62,24 @@ impl ChunkGenerator {
|
|||
self.config = config;
|
||||
}
|
||||
|
||||
fn next_chunk(&mut self) -> (ChunkId, ChunkOrder) {
|
||||
let t = self.next_chunk;
|
||||
self.next_chunk += 1;
|
||||
(ChunkId::new_test(t as _), ChunkOrder::new(t).unwrap())
|
||||
pub fn partition(&self) -> &PartitionAddr {
|
||||
&self.partition
|
||||
}
|
||||
|
||||
pub async fn generate(&mut self) -> (ParquetChunk, IoxMetadata) {
|
||||
let id = self.next_chunk;
|
||||
self.next_chunk += 1;
|
||||
self.generate_id(id).await
|
||||
}
|
||||
|
||||
pub async fn generate_id(&mut self, id: u32) -> (ParquetChunk, IoxMetadata) {
|
||||
let (partition_checkpoint, database_checkpoint) = create_partition_and_database_checkpoint(
|
||||
Arc::clone(&self.partition.table_name),
|
||||
Arc::clone(&self.partition.partition_key),
|
||||
);
|
||||
|
||||
let (chunk_id, chunk_order) = self.next_chunk();
|
||||
let chunk_id = ChunkId::new_test(id as _);
|
||||
let chunk_order = ChunkOrder::new(id).unwrap();
|
||||
let chunk_addr = ChunkAddr::new(&self.partition, chunk_id);
|
||||
|
||||
let metadata = IoxMetadata {
|
||||
|
|
|
@ -13,7 +13,7 @@ schema = { path = "../schema" }
|
|||
observability_deps = { path = "../observability_deps" }
|
||||
ordered-float = "2"
|
||||
regex = "1"
|
||||
serde_json = "1.0.70"
|
||||
serde_json = "1.0.71"
|
||||
snafu = "0.6.9"
|
||||
sqlparser = "0.12.0"
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ use write_buffer::core::{WriteBufferReading, WriteBufferWriting};
|
|||
|
||||
pub(crate) use crate::db::chunk::DbChunk;
|
||||
pub(crate) use crate::db::lifecycle::ArcDb;
|
||||
use crate::db::write::{WriteFilter, WriteFilterNone};
|
||||
use crate::db::write::{DeleteFilter, DeleteFilterNone, WriteFilter, WriteFilterNone};
|
||||
use crate::{
|
||||
db::{
|
||||
access::QueryCatalogAccess,
|
||||
|
@ -522,25 +522,43 @@ impl Db {
|
|||
|
||||
/// Store a delete
|
||||
pub fn store_delete(&self, delete: &DmlDelete) -> Result<()> {
|
||||
self.store_filtered_delete(delete, DeleteFilterNone::default())
|
||||
}
|
||||
|
||||
/// Store a delete with the provided [`DeleteFilter`]
|
||||
pub fn store_filtered_delete(
|
||||
&self,
|
||||
delete: &DmlDelete,
|
||||
filter: impl DeleteFilter,
|
||||
) -> Result<()> {
|
||||
let predicate = Arc::new(delete.predicate().clone());
|
||||
match delete.table_name() {
|
||||
None => {
|
||||
// Note: This assumes tables cannot be removed from the catalog and therefore
|
||||
// this lock gap is not problematic
|
||||
for table_name in self.catalog.table_names() {
|
||||
self.delete(&table_name, Arc::clone(&predicate))
|
||||
self.delete_filtered(&table_name, Arc::clone(&predicate), filter)
|
||||
.expect("table exists")
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Some(table_name) => self.delete(table_name, predicate),
|
||||
Some(table_name) => self.delete_filtered(table_name, predicate, filter),
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete data from a table on a specified predicate
|
||||
/// Delete data from a table on a specified predicate
|
||||
///
|
||||
/// Returns an error if the table cannot be found in the catalog
|
||||
pub fn delete(&self, table_name: &str, delete_predicate: Arc<DeletePredicate>) -> Result<()> {
|
||||
self.delete_filtered(table_name, delete_predicate, DeleteFilterNone::default())
|
||||
}
|
||||
|
||||
fn delete_filtered(
|
||||
&self,
|
||||
table_name: &str,
|
||||
delete_predicate: Arc<DeletePredicate>,
|
||||
filter: impl DeleteFilter,
|
||||
) -> Result<()> {
|
||||
// collect delete predicates on preserved partitions for a catalog transaction
|
||||
let mut affected_persisted_chunks = vec![];
|
||||
|
||||
|
@ -558,6 +576,10 @@ impl Db {
|
|||
for chunk in chunks {
|
||||
// save the delete predicate in the chunk
|
||||
let mut chunk = chunk.write();
|
||||
if !filter.filter_chunk(&chunk) {
|
||||
continue;
|
||||
}
|
||||
|
||||
chunk.add_delete_predicate(Arc::clone(&delete_predicate));
|
||||
|
||||
// We should only report persisted chunks or chunks that are currently being persisted, because the
|
||||
|
|
|
@ -917,12 +917,7 @@ mod tests {
|
|||
use data_types::{delete_predicate::DeleteExpr, timestamp::TimestampRange};
|
||||
|
||||
use mutable_buffer::test_helpers::write_lp_to_new_chunk;
|
||||
use parquet_file::{
|
||||
chunk::ParquetChunk,
|
||||
test_utils::{
|
||||
make_chunk as make_parquet_chunk_with_store, make_iox_object_store, TestSize,
|
||||
},
|
||||
};
|
||||
use parquet_file::test_utils::generator::{ChunkGenerator, GeneratorConfig};
|
||||
|
||||
#[test]
|
||||
fn test_new_open() {
|
||||
|
@ -946,7 +941,7 @@ mod tests {
|
|||
let mut chunk = make_persisted_chunk().await;
|
||||
assert_eq!(
|
||||
chunk.freeze().unwrap_err().to_string(),
|
||||
"Internal Error: unexpected chunk state for Chunk('db':'table1':'part1':00000000-0000-0000-0000-000000000000) \
|
||||
"Internal Error: unexpected chunk state for Chunk('db1':'table1':'part1':00000000-0000-0000-0000-000000000001) \
|
||||
during setting closed. Expected Open or Frozen, got Persisted"
|
||||
);
|
||||
}
|
||||
|
@ -1132,11 +1127,6 @@ mod tests {
|
|||
write_lp_to_new_chunk(&format!("{} bar=1 10", table_name))
|
||||
}
|
||||
|
||||
async fn make_parquet_chunk(addr: ChunkAddr) -> ParquetChunk {
|
||||
let iox_object_store = make_iox_object_store().await;
|
||||
make_parquet_chunk_with_store(iox_object_store, "foo", addr, TestSize::Full).await
|
||||
}
|
||||
|
||||
fn chunk_addr() -> ChunkAddr {
|
||||
ChunkAddr {
|
||||
db_name: Arc::from("db"),
|
||||
|
@ -1160,11 +1150,12 @@ mod tests {
|
|||
}
|
||||
|
||||
async fn make_persisted_chunk() -> CatalogChunk {
|
||||
let addr = chunk_addr();
|
||||
let now = Time::from_timestamp_nanos(43564);
|
||||
let mut generator = ChunkGenerator::new().await;
|
||||
generator.set_config(GeneratorConfig::NoData);
|
||||
let (parquet_chunk, metadata) = generator.generate().await;
|
||||
let addr = ChunkAddr::new(generator.partition(), metadata.chunk_id);
|
||||
|
||||
// assemble ParquetChunk
|
||||
let parquet_chunk = make_parquet_chunk(addr.clone()).await;
|
||||
let now = Time::from_timestamp_nanos(43564);
|
||||
|
||||
CatalogChunk::new_object_store_only(
|
||||
addr,
|
||||
|
|
|
@ -18,7 +18,8 @@ use snafu::{ResultExt, Snafu};
|
|||
use time::Time;
|
||||
use write_buffer::core::WriteBufferReading;
|
||||
|
||||
use crate::db::write::WriteFilter;
|
||||
use crate::db::catalog::chunk::{CatalogChunk, ChunkStage};
|
||||
use crate::db::write::{DeleteFilter, WriteFilter};
|
||||
use crate::Db;
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
|
@ -243,8 +244,7 @@ pub async fn perform_replay(
|
|||
for n_try in 1..=n_tries {
|
||||
let result = match &dml_operation {
|
||||
DmlOperation::Write(write) => db.store_filtered_write(write, filter),
|
||||
// TODO: Only apply delete to unpersisted chunks (#3125)
|
||||
DmlOperation::Delete(delete) => db.store_delete(delete),
|
||||
DmlOperation::Delete(delete) => db.store_filtered_delete(delete, filter),
|
||||
};
|
||||
|
||||
match result {
|
||||
|
@ -370,6 +370,19 @@ impl<'a> WriteFilter for ReplayFilter<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> DeleteFilter for ReplayFilter<'a> {
|
||||
fn filter_chunk(&self, chunk: &CatalogChunk) -> bool {
|
||||
// The persist lifecycle action MUST persist any outstanding delete predicates
|
||||
//
|
||||
// As such deletes should only be applied to unpersisted chunks - i.e.
|
||||
// those containing data from the in-progress replay operation
|
||||
//
|
||||
// This avoids a situation where a delete could be applied to a chunk containing
|
||||
// data from writes sequenced after the delete being replayed
|
||||
!matches!(chunk.stage(), ChunkStage::Persisted { .. })
|
||||
}
|
||||
}
|
||||
|
||||
/// Where is a given sequence number and the entire data batch associated with it compared to the range of persisted and
|
||||
/// partially persisted sequence numbers (extracted from partition checkpoint).
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
@ -2664,6 +2677,60 @@ mod tests {
|
|||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn replay_delete_persisted_chunks() {
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 0,
|
||||
lp: "table_1,tag_partition_by=a bar=10 10",
|
||||
}]),
|
||||
Step::Delete(vec![TestDelete {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 1,
|
||||
table_name: None,
|
||||
predicate: DeletePredicate {
|
||||
range: TimestampRange { start: 0, end: 11 },
|
||||
exprs: vec![],
|
||||
},
|
||||
}]),
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
sequencer_id: 0,
|
||||
sequence_number: 2,
|
||||
lp: "table_1,tag_partition_by=b bar=20 10",
|
||||
}]),
|
||||
Step::Await(vec![Check::Query(
|
||||
"select * from table_1 order by bar",
|
||||
vec![
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| bar | tag_partition_by | time |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| 20 | b | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
],
|
||||
)]),
|
||||
Step::MakeWritesPersistable,
|
||||
Step::Persist(vec![("table_1", "tag_partition_by_b")]),
|
||||
Step::Restart,
|
||||
Step::Replay,
|
||||
Step::Assert(vec![Check::Query(
|
||||
"select * from table_1 order by bar",
|
||||
vec![
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| bar | tag_partition_by | time |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
"| 20 | b | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----+------------------+--------------------------------+",
|
||||
],
|
||||
)]),
|
||||
],
|
||||
..Default::default()
|
||||
}
|
||||
.run()
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn replay_fail_sequencers_change() {
|
||||
// create write buffer w/ sequencer 0 and 1
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::db::catalog::chunk::CatalogChunk;
|
||||
use mutable_batch::PartitionWrite;
|
||||
|
||||
/// A [`WriteFilter`] provides the ability to mask rows from a [`PartitionWrite`]
|
||||
|
@ -27,3 +28,21 @@ impl WriteFilter for WriteFilterNone {
|
|||
Some(write)
|
||||
}
|
||||
}
|
||||
|
||||
/// A [`DeleteFilter`] provides the ability to exclude chunks from having a delete applied
|
||||
///
|
||||
/// This is important for replay where it needs to prevent deletes from being applied to chunks
|
||||
/// containing writes sequenced after the delete
|
||||
pub trait DeleteFilter: Copy {
|
||||
/// Returns true if the delete should be applied to this chunk
|
||||
fn filter_chunk(&self, chunk: &CatalogChunk) -> bool;
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Copy, Clone)]
|
||||
pub struct DeleteFilterNone {}
|
||||
|
||||
impl DeleteFilter for DeleteFilterNone {
|
||||
fn filter_chunk(&self, _chunk: &CatalogChunk) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue