2023-02-14 19:34:19 +00:00
|
|
|
//! IOx Compactor Layout tests
|
|
|
|
//!
|
|
|
|
//! These tests do almost everything the compactor would do in a
|
|
|
|
//! production system *except* for reading/writing parquet data.
|
|
|
|
//!
|
|
|
|
//! The input to each test is the parquet file layout of a partition.
|
|
|
|
//!
|
|
|
|
//! The output is a representation of the steps the compactor chose to
|
|
|
|
//! take and the final layout of the parquet files in the partition.
|
|
|
|
//!
|
|
|
|
//! # Interpreting test lines
|
|
|
|
//!
|
|
|
|
//! This test uses `insta` to compare inlined string represetention of
|
|
|
|
//! what the compactor did.
|
|
|
|
//!
|
|
|
|
//! Each line in the representation represents either some metadata or
|
|
|
|
//! a parquet file, with a visual depiction of its `min_time` and
|
|
|
|
//! `max_time` (the minimum timestamp and maximum timestamp for data
|
|
|
|
//! in the file).
|
|
|
|
//!
|
|
|
|
//! For example:
|
|
|
|
//!
|
|
|
|
//! ```text
|
|
|
|
//! - L0.3[300,350] 5kb |-L0.3-|
|
|
|
|
//! ```
|
|
|
|
//!
|
|
|
|
//! Represents the following [`ParquetFile`]:
|
|
|
|
//!
|
|
|
|
//! ```text
|
|
|
|
//! ParquetFile {
|
|
|
|
//! id: 3,
|
|
|
|
//! compaction_level: L0
|
|
|
|
//! min_time: 300,
|
|
|
|
//! max_time: 350
|
|
|
|
//! file_size_bytes: 5*1024
|
|
|
|
//! }
|
|
|
|
//! ```
|
|
|
|
//!
|
|
|
|
//! The `|-L0.3-|` shows the relative location of `min_time` (`|-`)
|
|
|
|
//! and `max_time (`-|`) on a time line to help visualize the output
|
2023-02-27 17:53:41 +00:00
|
|
|
//!
|
|
|
|
//! A file with `?` represents a `ParquetFileParam` (aka a file that
|
|
|
|
//! will be added to the catalog but is not yet and thus has no id
|
|
|
|
//! assigned). So the following represents the same file as above, but
|
|
|
|
//! without an entry in the catalog:
|
|
|
|
//!
|
|
|
|
//!
|
|
|
|
//! ```text
|
|
|
|
//! - L0.?[300,350] 5kb |-L0.3-|
|
|
|
|
//! ```
|
2023-06-16 13:28:59 +00:00
|
|
|
mod accumulated_size;
|
2023-03-27 18:48:16 +00:00
|
|
|
mod backfill;
|
2023-03-15 14:22:07 +00:00
|
|
|
mod common_use_cases;
|
2023-02-27 17:53:41 +00:00
|
|
|
mod core;
|
2023-03-22 22:14:06 +00:00
|
|
|
mod created_at;
|
2023-02-27 17:53:41 +00:00
|
|
|
mod knobs;
|
2023-03-09 19:17:54 +00:00
|
|
|
mod large_files;
|
2023-03-01 16:43:15 +00:00
|
|
|
mod large_overlaps;
|
2023-02-27 17:53:41 +00:00
|
|
|
mod many_files;
|
2023-03-03 20:12:23 +00:00
|
|
|
mod single_timestamp;
|
2023-06-23 09:19:06 +00:00
|
|
|
mod stuck;
|
2023-02-27 17:53:41 +00:00
|
|
|
|
2023-04-04 20:58:50 +00:00
|
|
|
use std::{sync::atomic::Ordering, time::Duration};
|
2023-02-14 19:34:19 +00:00
|
|
|
|
2023-05-09 00:47:08 +00:00
|
|
|
use compactor_test_utils::{display_size, format_files, TestSetup, TestSetupBuilder};
|
2023-02-14 19:34:19 +00:00
|
|
|
use data_types::{CompactionLevel, ParquetFile};
|
|
|
|
use iox_tests::TestParquetFileBuilder;
|
2023-03-07 18:51:59 +00:00
|
|
|
use iox_time::Time;
|
2023-02-14 19:34:19 +00:00
|
|
|
|
2023-02-27 17:53:41 +00:00
|
|
|
pub(crate) const ONE_MB: u64 = 1024 * 1024;
|
2023-02-14 19:34:19 +00:00
|
|
|
|
|
|
|
/// creates a TestParquetFileBuilder setup for layout tests
|
2023-02-27 17:53:41 +00:00
|
|
|
pub(crate) fn parquet_builder() -> TestParquetFileBuilder {
|
2023-02-14 19:34:19 +00:00
|
|
|
TestParquetFileBuilder::default()
|
|
|
|
.with_compaction_level(CompactionLevel::Initial)
|
|
|
|
// need some LP to generate the schema
|
|
|
|
.with_line_protocol("table,tag1=A,tag2=B,tag3=C field_int=1i 100")
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Creates the default TestSetupBuilder for layout tests
|
|
|
|
///
|
|
|
|
/// NOTE: The builder is configured with parameters that are intended
|
|
|
|
/// to be as close as possible to what is configured on production
|
|
|
|
/// systems so that we can predict and reason about what the compactor
|
|
|
|
/// will do in production.
|
2023-02-27 17:53:41 +00:00
|
|
|
pub(crate) async fn layout_setup_builder() -> TestSetupBuilder<false> {
|
2023-02-14 19:34:19 +00:00
|
|
|
TestSetup::builder()
|
|
|
|
.await
|
|
|
|
.with_percentage_max_file_size(20)
|
|
|
|
.with_split_percentage(80)
|
2023-02-16 21:17:25 +00:00
|
|
|
.with_max_num_files_per_plan(200)
|
2023-02-14 19:34:19 +00:00
|
|
|
.with_min_num_l1_files_to_compact(10)
|
2023-02-14 22:14:06 +00:00
|
|
|
.with_max_desired_file_size_bytes(100 * ONE_MB)
|
2023-02-14 19:34:19 +00:00
|
|
|
.simulate_without_object_store()
|
|
|
|
}
|
|
|
|
|
2023-02-27 15:01:06 +00:00
|
|
|
/// Creates a scenario with ten 9 * 1MB overlapping L0 files
|
2023-02-27 17:53:41 +00:00
|
|
|
pub(crate) async fn all_overlapping_l0_files(setup: TestSetup) -> TestSetup {
|
2023-03-07 18:51:59 +00:00
|
|
|
for i in 0..10 {
|
2023-02-14 19:34:19 +00:00
|
|
|
setup
|
|
|
|
.partition
|
|
|
|
.create_parquet_file(
|
|
|
|
parquet_builder()
|
|
|
|
.with_min_time(100)
|
2023-03-07 18:51:59 +00:00
|
|
|
.with_max_time(200000)
|
|
|
|
.with_file_size_bytes(9 * ONE_MB)
|
|
|
|
.with_max_l0_created_at(Time::from_timestamp_nanos(i + 1)),
|
2023-02-14 19:34:19 +00:00
|
|
|
)
|
|
|
|
.await;
|
|
|
|
}
|
2023-02-27 15:01:06 +00:00
|
|
|
setup
|
|
|
|
}
|
|
|
|
|
2023-02-14 19:34:19 +00:00
|
|
|
/// runs the scenario and returns a string based output for comparison
|
2023-02-27 17:53:41 +00:00
|
|
|
pub(crate) async fn run_layout_scenario(setup: &TestSetup) -> Vec<String> {
|
2023-03-01 19:52:04 +00:00
|
|
|
// verify the files are ok to begin with
|
|
|
|
setup.verify_invariants().await;
|
|
|
|
|
2023-02-14 19:34:19 +00:00
|
|
|
setup.catalog.time_provider.inc(Duration::from_nanos(200));
|
|
|
|
|
|
|
|
let input_files = setup.list_by_table_not_to_delete().await;
|
|
|
|
let mut output = format_files("**** Input Files ", &sort_files(input_files));
|
|
|
|
|
2023-03-06 15:54:38 +00:00
|
|
|
// check if input files trip warnings (situations may be deliberate)
|
|
|
|
output.extend(setup.generate_warnings().await);
|
|
|
|
|
2023-02-14 19:34:19 +00:00
|
|
|
// run the actual compaction
|
|
|
|
let compact_result = setup.run_compact().await;
|
|
|
|
|
|
|
|
// record what the compactor actually did
|
2023-04-03 20:02:04 +00:00
|
|
|
if !setup.suppress_run_output {
|
|
|
|
output.extend(compact_result.run_log);
|
|
|
|
}
|
2023-02-14 19:34:19 +00:00
|
|
|
|
|
|
|
// Record any skipped compactions (is after what the compactor actually did)
|
|
|
|
output.extend(get_skipped_compactions(setup).await);
|
|
|
|
|
|
|
|
// record the final state of the catalog
|
|
|
|
let output_files = setup.list_by_table_not_to_delete().await;
|
2023-04-04 20:58:50 +00:00
|
|
|
|
|
|
|
let bytes_written = setup.bytes_written.load(Ordering::Relaxed) as i64;
|
|
|
|
|
2023-02-14 19:34:19 +00:00
|
|
|
output.extend(format_files(
|
2023-04-04 20:58:50 +00:00
|
|
|
format!(
|
|
|
|
"**** Final Output Files ({} written)",
|
|
|
|
display_size(bytes_written)
|
|
|
|
),
|
2023-02-14 19:34:19 +00:00
|
|
|
&sort_files(output_files),
|
|
|
|
));
|
|
|
|
|
2023-03-02 14:43:53 +00:00
|
|
|
// verify that the output of the compactor was valid as well
|
|
|
|
setup.verify_invariants().await;
|
2023-03-01 19:52:04 +00:00
|
|
|
|
2023-03-06 15:54:38 +00:00
|
|
|
// check if output files trip warnings (warnings here deserve scrutiny, but may be justifiable)
|
|
|
|
output.extend(setup.generate_warnings().await);
|
|
|
|
|
2023-02-14 19:34:19 +00:00
|
|
|
output
|
|
|
|
}
|
|
|
|
|
|
|
|
fn sort_files(mut files: Vec<ParquetFile>) -> Vec<ParquetFile> {
|
|
|
|
// sort by ascending parquet file id for more consistent display
|
|
|
|
files.sort_by(|f1, f2| f1.id.cmp(&f2.id));
|
|
|
|
files
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn get_skipped_compactions(setup: &TestSetup) -> Vec<String> {
|
|
|
|
let skipped = setup
|
|
|
|
.catalog
|
|
|
|
.catalog
|
|
|
|
.repositories()
|
|
|
|
.await
|
|
|
|
.partitions()
|
|
|
|
.list_skipped_compactions()
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
skipped
|
|
|
|
.iter()
|
|
|
|
.map(|skipped| {
|
|
|
|
format!(
|
|
|
|
"SKIPPED COMPACTION for {:?}: {}",
|
|
|
|
skipped.partition_id, skipped.reason
|
|
|
|
)
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|