feat: handle large-size overlapped files (#7079)

* feat: split start-level files that overlap wiht many files

* test: split files and theit split times

* test: split test for L1 and L2 files

* feat: full implementation that support large-size overlapped files

* chore: modify comments to reflect the changes

* fix: typo

* chore: update test output

* docs: clearer comments

* chore: remove empty test files. Will add in in a separate PR

* chore: Apply suggestions from code review

Co-authored-by: Andrew Lamb <alamb@influxdata.com>

* chore: address review comments

* chore: Apply suggestions from code review

Co-authored-by: Andrew Lamb <alamb@influxdata.com>

* refactor: add a knob to turn large-size overlaps on and off

* fix: typo

* chore: update test output after merging main

* fix: split_times should not include the max_time of the file

* fix: fix an overlap bug while limitting number of files to compact

* test: unit tests for different overlap cases of limit files to compact

* chore: increase time range of the tests to let the split files work correctly

* fix: skip compacting files of tiny ranges

* test: add tests for time range 1

* chore: address review comments

* chore: remove  enable_large_size_overlap_files knob

* fix: fix a bug that sort L1 files in thier min_time instead of l0_max_created_at

* refactor: use the same order_files function afer merging main into branch

* chore: typos and clearer comments

* chore: remove obsolete comments

* chore: add asserts per review suggestion

---------

Co-authored-by: Andrew Lamb <alamb@influxdata.com>
pull/24376/head
Nga Tran 2023-03-07 13:51:59 -05:00 committed by GitHub
parent 3f3a47eae9
commit 9e9e689a30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 6623 additions and 896 deletions

View File

@ -247,6 +247,7 @@ where
.get(file_level)
.expect("all compaction levels covered")
.record(create.len() as u64);
self.job_bytes
.create
.get(file_level)

View File

@ -27,15 +27,18 @@ impl DivideInitial for MultipleBranchesDivideInitial {
RoundInfo::ManySmallFiles {
start_level,
max_num_files_to_group,
max_total_file_size_to_group,
} => {
// To split the start_level files correctly so they can be compacted in the right order,
// the files must be sorted on max_l0_created_at if start_level is 0 or min_time otherwise.
// the files must be sorted on `max_l0_created_at` if start_level is 0 or `min_time` otherwise.
//
// Since L0s can overlap, they can contain duplicate data, which can only be resolved by
// using `created_at` time, so the `created_at` time must be used to sort so that it is
// using `max_l0_created_at` time, so the `max_l0_created_at` time must be used to sort so that it is
// preserved. Since L1s & L2s cannot overlap within their own level, they cannot contain
// duplicate data within their own level, so they do not need to preserve their `created_at`
// time, so they do not need to be sorted based on `created_at`. However, sorting by
// `min_time` makes it easier to avoid introducing overlaps within their levels.
// duplicate data within their own level, so they do not need to preserve their `max_l0_created_at`
// time, so they do not need to be sorted based on `max_l0_created_at`. However, sorting by
// `min_time` is needed to avoid introducing overlaps within their levels.
//
// See tests many_l0_files_different_created_order and many_l1_files_different_created_order for examples
let start_level_files = files
.into_iter()
@ -43,11 +46,33 @@ impl DivideInitial for MultipleBranchesDivideInitial {
.collect::<Vec<_>>();
let start_level_files = order_files(start_level_files, start_level);
// Split files into many small groups, each has at most max_num_files_to_group files
let branches = start_level_files
.chunks(*max_num_files_to_group)
.map(|c| c.to_vec())
.collect::<Vec<Vec<_>>>();
// Split L0s into many small groups, each has max_num_files_to_group but not exceed max_total_file_size_to_group
// Collect files until either limit is reached
let mut branches = vec![];
let mut current_branch = vec![];
let mut current_branch_size = 0;
for f in start_level_files {
if current_branch.len() == *max_num_files_to_group
|| current_branch_size + f.file_size_bytes as usize
> *max_total_file_size_to_group
{
// panic if current_branch is empty
if current_branch.is_empty() {
panic!("Size of a file {} is larger than the max size limit to compact. Please adjust the settings. See ticket https://github.com/influxdata/idpe/issues/17209" , f.file_size_bytes);
}
branches.push(current_branch);
current_branch = vec![];
current_branch_size = 0;
}
current_branch_size += f.file_size_bytes as usize;
current_branch.push(f);
}
// push the last branch
if !current_branch.is_empty() {
branches.push(current_branch);
}
branches
}
@ -56,14 +81,14 @@ impl DivideInitial for MultipleBranchesDivideInitial {
}
}
// Return a sorted files of the given ones.
// The order is used to split the files and form the right groups of files to compact
/// Return a sorted files of the given ones.
/// The order is used to split the files and form the right groups of files to compact
// and deduplcate correctly to fewer and larger but same level files
//
// All given files are in the same given start_level.
// They will be sorted on their `max_l0_created_at` if the start_level is 0,
// otherwise on their `min_time`
fn order_files(files: Vec<ParquetFile>, start_level: &CompactionLevel) -> Vec<ParquetFile> {
///
/// All given files are in the same given start_level.
/// They will be sorted on their `max_l0_created_at` if the start_level is 0,
/// otherwise on their `min_time`
pub fn order_files(files: Vec<ParquetFile>, start_level: &CompactionLevel) -> Vec<ParquetFile> {
let mut files = files;
if *start_level == CompactionLevel::Initial {
files.sort_by(|a, b| a.max_l0_created_at.cmp(&b.max_l0_created_at));
@ -89,10 +114,11 @@ mod tests {
}
#[test]
fn test_divide() {
fn test_divide_num_file() {
let round_info = RoundInfo::ManySmallFiles {
start_level: CompactionLevel::Initial,
max_num_files_to_group: 2,
max_total_file_size_to_group: 100,
};
let divide = MultipleBranchesDivideInitial::new();
@ -122,4 +148,69 @@ mod tests {
assert_eq!(branches[0], vec![f1, f2]);
assert_eq!(branches[1], vec![f3]);
}
#[test]
#[should_panic(
expected = "Size of a file 50 is larger than the max size limit to compact. Please adjust the settings"
)]
fn test_divide_size_limit_too_sall() {
let round_info = RoundInfo::ManySmallFiles {
start_level: CompactionLevel::Initial,
max_num_files_to_group: 10,
max_total_file_size_to_group: 10,
};
let divide = MultipleBranchesDivideInitial::new();
let f1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(1)
.with_file_size_bytes(50)
.build();
let f2 = ParquetFileBuilder::new(2)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(5)
.with_file_size_bytes(5)
.build();
// files in random order of max_l0_created_at
let files = vec![f2, f1];
// panic
let _branches = divide.divide(files, &round_info);
}
#[test]
fn test_divide_size_limit() {
let round_info = RoundInfo::ManySmallFiles {
start_level: CompactionLevel::Initial,
max_num_files_to_group: 10,
max_total_file_size_to_group: 100,
};
let divide = MultipleBranchesDivideInitial::new();
let f1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(1)
.with_file_size_bytes(90)
.build();
let f2 = ParquetFileBuilder::new(2)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(5)
.with_file_size_bytes(20)
.build();
let f3 = ParquetFileBuilder::new(3)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(10)
.with_file_size_bytes(30)
.build();
// files in random order of max_l0_created_at
let files = vec![f2.clone(), f3.clone(), f1.clone()];
let branches = divide.divide(files, &round_info);
// output must be split into their max_l0_created_at
assert_eq!(branches.len(), 2);
assert_eq!(branches[0], vec![f1]);
assert_eq!(branches[1], vec![f2, f3]);
}
}

View File

@ -49,7 +49,8 @@ where
partition_id = partition_info.partition_id.get(),
target_level = %classification.target_level,
round_info = %round_info,
files_to_compacts = classification.files_to_compact.len(),
files_to_compact = classification.files_to_compact_len(),
files_to_split = classification.files_to_split_len(),
files_to_upgrade = classification.files_to_upgrade.len(),
files_to_keep = classification.files_to_keep.len(),
"file classification"

View File

@ -3,8 +3,10 @@ use std::fmt::Display;
use data_types::{CompactionLevel, ParquetFile};
use crate::{
components::files_split::FilesSplit, file_classification::FileClassification,
partition_info::PartitionInfo, RoundInfo,
components::{files_split::FilesSplit, split_or_compact::SplitOrCompact},
file_classification::{FileClassification, FilesToCompactOrSplit},
partition_info::PartitionInfo,
RoundInfo,
};
use super::FileClassifier;
@ -33,50 +35,70 @@ use super::FileClassifier;
/// [non overlap split (FO)] | :
/// | | | :
/// | | | :
/// | +------------+-->(files keep) :
/// | :
/// | :
/// | +------------+------+ :
/// | | :
/// | | :
/// | +................................+
/// | :
/// V V
/// [upgrade split (FU)]
/// | |
/// | |
/// V V
/// (file compact) (file upgrade)
/// | : | :
/// V V | :
/// [upgrade split (FU)] | :
/// | | | :
/// | | | :
/// | V | :
/// | (files upgrade) | :
/// | | :
/// | +................................+
/// | | |
/// V V |
/// [split or compact (FSC)] |
/// | | |
/// | +-------------------+
/// | |
/// V V
/// (files compact or split) (files keep)
/// ```
#[derive(Debug)]
pub struct SplitBasedFileClassifier<FT, FO, FU>
pub struct SplitBasedFileClassifier<FT, FO, FU, FSC>
where
FT: FilesSplit,
FO: FilesSplit,
FU: FilesSplit,
FSC: SplitOrCompact,
{
target_level_split: FT,
non_overlap_split: FO,
upgrade_split: FU,
split_or_compact: FSC,
}
impl<FT, FO, FU> SplitBasedFileClassifier<FT, FO, FU>
impl<FT, FO, FU, FSC> SplitBasedFileClassifier<FT, FO, FU, FSC>
where
FT: FilesSplit,
FO: FilesSplit,
FU: FilesSplit,
FSC: SplitOrCompact,
{
pub fn new(target_level_split: FT, non_overlap_split: FO, upgrade_split: FU) -> Self {
pub fn new(
target_level_split: FT,
non_overlap_split: FO,
upgrade_split: FU,
split_or_compact: FSC,
) -> Self {
Self {
target_level_split,
non_overlap_split,
upgrade_split,
split_or_compact,
}
}
}
impl<FT, FO, FU> Display for SplitBasedFileClassifier<FT, FO, FU>
impl<FT, FO, FU, FSC> Display for SplitBasedFileClassifier<FT, FO, FU, FSC>
where
FT: FilesSplit,
FO: FilesSplit,
FU: FilesSplit,
FSC: SplitOrCompact,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
@ -87,15 +109,16 @@ where
}
}
impl<FT, FO, FU> FileClassifier for SplitBasedFileClassifier<FT, FO, FU>
impl<FT, FO, FU, FSC> FileClassifier for SplitBasedFileClassifier<FT, FO, FU, FSC>
where
FT: FilesSplit,
FO: FilesSplit,
FU: FilesSplit,
FSC: SplitOrCompact,
{
fn classify(
&self,
_partition_info: &PartitionInfo,
partition_info: &PartitionInfo,
round_info: &RoundInfo,
files: Vec<ParquetFile>,
) -> FileClassification {
@ -123,13 +146,23 @@ where
self.non_overlap_split.apply(files_to_compact, target_level);
files_to_keep.extend(non_overlapping_files);
// To have efficient compaction performance, we only need to uprade (catalog update only) eligible files
// To have efficient compaction performance, we only need to upgrade (catalog update only) eligible files
let (files_to_compact, files_to_upgrade) =
self.upgrade_split.apply(files_to_compact, target_level);
// See if we need to split start-level files due to over compaction size limit
let (files_to_compact_or_split, other_files) =
self.split_or_compact
.apply(partition_info, files_to_compact, target_level);
files_to_keep.extend(other_files);
// Target level of split files is the same level of the input files all of which are in the same level,
// while target level of compact files is the value of the target_level which is the higested level of the input files
let target_level = files_to_compact_or_split.target_level(target_level);
FileClassification {
target_level,
files_to_compact,
files_to_compact_or_split,
files_to_upgrade,
files_to_keep,
}
@ -154,7 +187,7 @@ fn file_classification_for_many_files(
FileClassification {
target_level,
files_to_compact,
files_to_compact_or_split: FilesToCompactOrSplit::FilesToCompact(files_to_compact),
files_to_upgrade: vec![],
files_to_keep: vec![],
}

View File

@ -30,7 +30,6 @@ use super::{
divide_initial::multiple_branches::MultipleBranchesDivideInitial,
file_classifier::{
logging::LoggingFileClassifierWrapper, split_based::SplitBasedFileClassifier,
FileClassifier,
},
file_filter::level_range::LevelRangeFileFilter,
files_split::{
@ -57,8 +56,8 @@ use super::{
greater_size_matching_files::GreaterSizeMatchingFilesPartitionFilter,
has_files::HasFilesPartitionFilter, has_matching_file::HasMatchingFilePartitionFilter,
logging::LoggingPartitionFilterWrapper, max_num_columns::MaxNumColumnsPartitionFilter,
max_parquet_bytes::MaxParquetBytesPartitionFilter, metrics::MetricsPartitionFilterWrapper,
never_skipped::NeverSkippedPartitionFilter, or::OrPartitionFilter, PartitionFilter,
metrics::MetricsPartitionFilterWrapper, never_skipped::NeverSkippedPartitionFilter,
or::OrPartitionFilter, unable_to_compact::UnableToCompactPartitionFilter, PartitionFilter,
},
partition_info_source::sub_sources::SubSourcePartitionInfoSource,
partition_source::{
@ -80,6 +79,7 @@ use super::{
round_split::many_files::ManyFilesRoundSplit,
scratchpad::{noop::NoopScratchpadGen, prod::ProdScratchpadGen, ScratchpadGen},
skipped_compactions_source::catalog::CatalogSkippedCompactionsSource,
split_or_compact::{logging::LoggingSplitOrCompactWrapper, split_compact::SplitCompact},
Components,
};
@ -135,7 +135,7 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
partition_filters.append(&mut make_partition_filters(config));
let partition_resource_limit_filters: Vec<Arc<dyn PartitionFilter>> = vec![Arc::new(
MaxParquetBytesPartitionFilter::new(config.max_input_parquet_bytes_per_partition),
UnableToCompactPartitionFilter::new(config.max_input_parquet_bytes_per_partition),
)];
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.shadow_mode {
@ -277,7 +277,10 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
Arc::clone(&config.catalog),
)),
round_info_source: Arc::new(LoggingRoundInfoWrapper::new(Arc::new(
LevelBasedRoundInfo::new(config.max_num_files_per_plan),
LevelBasedRoundInfo::new(
config.max_num_files_per_plan,
config.max_input_parquet_bytes_per_partition,
),
))),
partition_filter: Arc::new(LoggingPartitionFilterWrapper::new(
MetricsPartitionFilterWrapper::new(
@ -308,8 +311,15 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
round_split: Arc::new(ManyFilesRoundSplit::new()),
divide_initial: Arc::new(MultipleBranchesDivideInitial::new()),
scratchpad_gen,
file_classifier: Arc::new(LoggingFileClassifierWrapper::new(make_file_classifier(
config,
file_classifier: Arc::new(LoggingFileClassifierWrapper::new(Arc::new(
SplitBasedFileClassifier::new(
TargetLevelSplit::new(),
NonOverlapSplit::new(),
UpgradeSplit::new(config.max_desired_file_size_bytes),
LoggingSplitOrCompactWrapper::new(SplitCompact::new(
config.max_input_parquet_bytes_per_partition,
)),
),
))),
partition_resource_limit_filter: Arc::new(LoggingPartitionFilterWrapper::new(
MetricsPartitionFilterWrapper::new(
@ -345,11 +355,3 @@ fn make_partition_filters(config: &Config) -> Vec<Arc<dyn PartitionFilter>> {
)),
]))]
}
fn make_file_classifier(config: &Config) -> Arc<dyn FileClassifier> {
Arc::new(SplitBasedFileClassifier::new(
TargetLevelSplit::new(),
NonOverlapSplit::new(),
UpgradeSplit::new(config.max_desired_file_size_bytes),
))
}

View File

@ -37,7 +37,7 @@ impl<T> IRPlanner for LoggingIRPlannerWrapper<T>
where
T: IRPlanner,
{
fn plan(
fn compact_plan(
&self,
files: Vec<ParquetFile>,
partition: Arc<PartitionInfo>,
@ -46,7 +46,7 @@ where
let partition_id = partition.partition_id;
let n_input_files = files.len();
let input_file_size_bytes = files.iter().map(|f| f.file_size_bytes).sum::<i64>();
let plan = self.inner.plan(files, partition, compaction_level);
let plan = self.inner.compact_plan(files, partition, compaction_level);
info!(
partition_id = partition_id.get(),
@ -55,7 +55,34 @@ where
n_output_files = plan.n_output_files(),
compaction_level = compaction_level as i16,
%plan,
"created IR plan",
"created IR compact plan",
);
plan
}
fn split_plan(
&self,
file: ParquetFile,
split_times: Vec<i64>,
partition: Arc<PartitionInfo>,
compaction_level: CompactionLevel,
) -> PlanIR {
let partition_id = partition.partition_id;
let n_input_files = 1;
let input_file_size_bytes = file.file_size_bytes;
let plan = self
.inner
.split_plan(file, split_times, partition, compaction_level);
info!(
partition_id = partition_id.get(),
n_input_files,
input_file_size_bytes,
n_output_files = plan.n_output_files(),
compaction_level = compaction_level as i16,
%plan,
"created IR split plan",
);
plan

View File

@ -12,10 +12,20 @@ use crate::{partition_info::PartitionInfo, plan_ir::PlanIR};
/// Creates [`PlanIR`] that describes what files should be compacted and updated
pub trait IRPlanner: Debug + Display + Send + Sync {
fn plan(
/// Build a plan to compact give files
fn compact_plan(
&self,
files: Vec<ParquetFile>,
partition: Arc<PartitionInfo>,
compaction_level: CompactionLevel,
) -> PlanIR;
/// Build a plan to split a given file into given split times
fn split_plan(
&self,
file: ParquetFile,
split_times: Vec<i64>,
partition: Arc<PartitionInfo>,
compaction_level: CompactionLevel,
) -> PlanIR;
}

View File

@ -113,7 +113,9 @@ impl Display for V1IRPlanner {
}
impl IRPlanner for V1IRPlanner {
fn plan(
/// Build a plan to compact many files into a single file. Since we limit the size of the files,
/// if the compact result is larger than that limit, we will split the output into many files
fn compact_plan(
&self,
files: Vec<ParquetFile>,
_partition: Arc<PartitionInfo>,
@ -145,7 +147,11 @@ impl IRPlanner for V1IRPlanner {
let files = files
.into_iter()
.map(|file| {
let order = order(file.compaction_level, compaction_level, file.created_at);
let order = order(
file.compaction_level,
compaction_level,
file.max_l0_created_at,
);
FileIR { file, order }
})
.collect::<Vec<_>>();
@ -175,26 +181,49 @@ impl IRPlanner for V1IRPlanner {
// everything into one file
PlanIR::Compact { files }
} else {
// split compact query plan
// split compact query plan to split the result into multiple files
PlanIR::Split { files, split_times }
}
}
}
/// Build a plan to split a file into multiple files based on the given split times
fn split_plan(
&self,
file: ParquetFile,
split_times: Vec<i64>,
_partition: Arc<PartitionInfo>,
compaction_level: CompactionLevel,
) -> PlanIR {
let order = order(
file.compaction_level,
compaction_level,
file.max_l0_created_at,
);
let file = FileIR { file, order };
PlanIR::Split {
files: vec![file],
split_times,
}
}
}
// Order of the chunk so they can be deduplicated correctly
fn order(
compaction_level: CompactionLevel,
target_level: CompactionLevel,
created_at: Timestamp,
max_l0_created_at: Timestamp,
) -> ChunkOrder {
// TODO: If we chnage this design specified in driver.rs's compact functions, we will need to refine this
// Currently, we only compact files of level_n with level_n+1 and produce level_n+1 files,
// and with the strictly design that:
// . Level-0 files can overlap with any files.
// . Level-N files (N > 0) cannot overlap with any files in the same level.
// . For Level-0 files, we always pick the smaller `created_at` files to compact (with
// each other and overlapped L1 files) first.
// . For Level-0 files, we always pick the smaller `max_l0_created_at` files to compact (with
// each other and overlapped L1 files) first. `max_l0_created_at` is the max created time of all L0 files
// that were compacted into this given file. This value is used to order chunk for deduplication.
// . Level-N+1 files are results of compacting Level-N and/or Level-N+1 files, their `created_at`
// can be after the `created_at` of other Level-N files but they may include data loaded before
// the other Level-N files. Hence we should never use `created_at` of Level-N+1 files to order
@ -210,7 +239,7 @@ fn order(
(CompactionLevel::Initial, CompactionLevel::Initial)
| (CompactionLevel::Initial, CompactionLevel::FileNonOverlapped)
| (CompactionLevel::FileNonOverlapped, CompactionLevel::Final) => {
ChunkOrder::new(created_at.get())
ChunkOrder::new(max_l0_created_at.get())
}
(CompactionLevel::FileNonOverlapped, CompactionLevel::FileNonOverlapped)
| (CompactionLevel::Final, CompactionLevel::Final) => ChunkOrder::new(0),

View File

@ -35,6 +35,7 @@ pub mod round_info_source;
pub mod round_split;
pub mod scratchpad;
pub mod skipped_compactions_source;
pub mod split_or_compact;
pub mod tables_source;
/// Pluggable system to determine compactor behavior. Please see

View File

@ -1,96 +0,0 @@
use std::fmt::Display;
use async_trait::async_trait;
use data_types::ParquetFile;
use crate::{
error::{DynError, ErrorKind, SimpleError},
PartitionInfo,
};
use super::PartitionFilter;
#[derive(Debug)]
pub struct MaxParquetBytesPartitionFilter {
max_parquet_bytes: usize,
}
impl MaxParquetBytesPartitionFilter {
pub fn new(max_parquet_bytes: usize) -> Self {
Self { max_parquet_bytes }
}
}
impl Display for MaxParquetBytesPartitionFilter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "max_parquet_bytes")
}
}
#[async_trait]
impl PartitionFilter for MaxParquetBytesPartitionFilter {
async fn apply(
&self,
partition_info: &PartitionInfo,
files: &[ParquetFile],
) -> Result<bool, DynError> {
let sum = files
.iter()
.map(|f| usize::try_from(f.file_size_bytes).unwrap_or(0))
.sum::<usize>();
if sum <= self.max_parquet_bytes {
Ok(true)
} else {
Err(SimpleError::new(
ErrorKind::OutOfMemory,
format!(
"partition {} has {} parquet file bytes, limit is {}",
partition_info.partition_id, sum, self.max_parquet_bytes
),
)
.into())
}
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use crate::{error::ErrorKindExt, test_utils::PartitionInfoBuilder};
use iox_tests::ParquetFileBuilder;
use super::*;
#[test]
fn test_display() {
assert_eq!(
MaxParquetBytesPartitionFilter::new(10).to_string(),
"max_parquet_bytes"
);
}
#[tokio::test]
async fn test_apply() {
let filter = MaxParquetBytesPartitionFilter::new(10);
let f1 = ParquetFileBuilder::new(1).with_file_size_bytes(7).build();
let f2 = ParquetFileBuilder::new(2).with_file_size_bytes(4).build();
let f3 = ParquetFileBuilder::new(3).with_file_size_bytes(3).build();
let p_info = Arc::new(PartitionInfoBuilder::new().with_partition_id(1).build());
assert!(filter.apply(&p_info, &[]).await.unwrap());
assert!(filter.apply(&p_info, &[f1.clone()]).await.unwrap());
assert!(filter
.apply(&p_info, &[f1.clone(), f3.clone()])
.await
.unwrap());
let err = filter.apply(&p_info, &[f1, f2]).await.unwrap_err();
assert_eq!(err.classify(), ErrorKind::OutOfMemory);
assert_eq!(
err.to_string(),
"partition 1 has 11 parquet file bytes, limit is 10"
);
}
}

View File

@ -12,10 +12,10 @@ pub mod has_files;
pub mod has_matching_file;
pub mod logging;
pub mod max_num_columns;
pub mod max_parquet_bytes;
pub mod metrics;
pub mod never_skipped;
pub mod or;
pub mod unable_to_compact;
/// Filters partition based on ID and parquet files.
///

View File

@ -0,0 +1,92 @@
use std::fmt::Display;
use async_trait::async_trait;
use data_types::ParquetFile;
use crate::{
error::{DynError, ErrorKind, SimpleError},
PartitionInfo,
};
use super::PartitionFilter;
#[derive(Debug)]
pub struct UnableToCompactPartitionFilter {
max_parquet_bytes: usize,
}
impl UnableToCompactPartitionFilter {
pub fn new(max_parquet_bytes: usize) -> Self {
Self { max_parquet_bytes }
}
}
impl Display for UnableToCompactPartitionFilter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "unable_to_compact")
}
}
#[async_trait]
impl PartitionFilter for UnableToCompactPartitionFilter {
async fn apply(
&self,
partition_info: &PartitionInfo,
files: &[ParquetFile],
) -> Result<bool, DynError> {
if !files.is_empty() {
// There is some files to compact or split
Ok(true)
} else {
// No files means the split_compact cannot find any reasonable set of files to compact or split
// TODO: after https://github.com/influxdata/idpe/issues/17208 that renames the size limit and
// https://github.com/influxdata/idpe/issues/17209 for modifying the knobs, this message should be modified accordingly
Err(SimpleError::new(
ErrorKind::OutOfMemory,
format!(
"partition {} has overlapped files that exceed max compact size limit {}. The may happen if a large amount of data has the same timestamp",
partition_info.partition_id, self.max_parquet_bytes
),
)
.into())
}
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use crate::{error::ErrorKindExt, test_utils::PartitionInfoBuilder};
use iox_tests::ParquetFileBuilder;
use super::*;
#[test]
fn test_display() {
assert_eq!(
UnableToCompactPartitionFilter::new(10).to_string(),
"unable_to_compact"
);
}
#[tokio::test]
async fn test_apply_empty() {
let filter = UnableToCompactPartitionFilter::new(10);
let p_info = Arc::new(PartitionInfoBuilder::new().with_partition_id(1).build());
let err = filter.apply(&p_info, &[]).await.unwrap_err();
assert_eq!(err.classify(), ErrorKind::OutOfMemory);
assert_eq!(
err.to_string(),
"partition 1 has overlapped files that exceed max compact size limit 10. The may happen if a large amount of data has the same timestamp"
);
}
#[tokio::test]
async fn test_apply_not_empty() {
let filter = UnableToCompactPartitionFilter::new(10);
let p_info = Arc::new(PartitionInfoBuilder::new().with_partition_id(1).build());
let f1 = ParquetFileBuilder::new(1).with_file_size_bytes(7).build();
assert!(filter.apply(&p_info, &[f1]).await.unwrap());
}
}

View File

@ -55,6 +55,7 @@ impl RoundInfoSource for LoggingRoundInfoWrapper {
#[derive(Debug)]
pub struct LevelBasedRoundInfo {
pub max_num_files_per_plan: usize,
pub max_total_file_size_per_plan: usize,
}
impl Display for LevelBasedRoundInfo {
@ -63,9 +64,10 @@ impl Display for LevelBasedRoundInfo {
}
}
impl LevelBasedRoundInfo {
pub fn new(max_num_files_per_plan: usize) -> Self {
pub fn new(max_num_files_per_plan: usize, max_total_file_size_per_plan: usize) -> Self {
Self {
max_num_files_per_plan,
max_total_file_size_per_plan,
}
}
@ -115,6 +117,7 @@ impl RoundInfoSource for LevelBasedRoundInfo {
return Ok(Arc::new(RoundInfo::ManySmallFiles {
start_level,
max_num_files_to_group: self.max_num_files_per_plan,
max_total_file_size_to_group: self.max_total_file_size_per_plan,
}));
}
@ -221,6 +224,7 @@ mod tests {
// max 2 files per plan
let round_info = LevelBasedRoundInfo {
max_num_files_per_plan: 2,
max_total_file_size_per_plan: 1000,
};
// f1 and f2 are not over limit

View File

@ -61,6 +61,7 @@ mod tests {
let round_info = RoundInfo::ManySmallFiles {
start_level: CompactionLevel::Initial,
max_num_files_to_group: 2,
max_total_file_size_to_group: 100,
};
let split = ManyFilesRoundSplit::new();

View File

@ -0,0 +1,634 @@
use std::collections::VecDeque;
use data_types::{CompactionLevel, ParquetFile, Timestamp};
use crate::components::{
divide_initial::multiple_branches::order_files,
files_split::{target_level_split::TargetLevelSplit, FilesSplit},
};
/// Return (`[files_to_compact]`, `[files_to_keep]`) of given files
/// such that `files_to_compact` are files to compact that under max_compact_size limit
/// and `files_to_keep` are the rest of the files that will be considered to compact in next round
///
/// To deduplicate data correctly, we need to select start-level files in their max_l0_created_at order
/// and they must be compacted with overlapped files in target level. See example below for the
/// correlation between created order and overlapped time ranges of files
///
/// Example:
///
/// Input Files: three L0 and threee L1 files. The ID after the dot is the order the files are created
/// |---L0.1---| |---L0.3---| |---L0.2---| Note that L0.2 is created BEFORE L0.3 but has LATER time range
/// |---L1.1---| |---L1.2---| |---L1.3---|
///
/// Output of files_to_compact: only 3 possible choices:
/// 1. Smallest compacting set: L0.1 + L1.1
/// 2. Medium size compacting set: L0.1 + L1.1 + L0.2 + L1.2 + L1.3
/// Note that L1.2 overlaps with the time range of L0.1 + L0.2 and must be included here
/// 3. Largest compacting set: All input files
///
pub fn limit_files_to_compact(
max_compact_size: usize,
files: Vec<data_types::ParquetFile>,
target_level: CompactionLevel,
) -> (Vec<ParquetFile>, Vec<ParquetFile>) {
// panic if not all files are either in target level or start level
let start_level = target_level.prev();
assert!(files
.iter()
.all(|f| f.compaction_level == target_level || f.compaction_level == start_level));
// Get start-level and target-level files
let len = files.len();
let split = TargetLevelSplit::new();
let (start_level_files, mut target_level_files) = split.apply(files, start_level);
// Order start-level files by to group the files to commpact them correctly
let start_level_files = order_files(start_level_files, &start_level);
let mut start_level_files = start_level_files.iter().collect::<VecDeque<_>>();
// Go over start-level files and find overlapped files in target level
let mut start_level_files_to_compact: Vec<ParquetFile> = Vec::new();
let mut target_level_files_to_compact = Vec::new();
let mut files_to_keep = Vec::new();
let mut total_size = 0;
while let Some(file) = start_level_files.pop_front() {
// A start-level file, if compacted, must be compacted with all of its overlapped target-level files.
// Thus compute the size needed before deciding to compact this file and its overlaps or not
// Time range of start_level_files_to_compact plus this file
let (min_time, max_time) = time_range(file, &start_level_files_to_compact);
// Get all target-level files that overlaps with the time range and not yet in target_level_files_to_compact
let overlapped_files: Vec<&ParquetFile> = target_level_files
.iter()
.filter(|f| f.overlaps_time_range(min_time, max_time))
.filter(|f| !target_level_files_to_compact.iter().any(|x| x == *f))
.collect();
// Size of the file and its overlapped files
let size = file.file_size_bytes
+ overlapped_files
.iter()
.map(|f| f.file_size_bytes)
.sum::<i64>();
// If total size is under limit, add this file and its overlapped files to files_to_compact
if total_size + size <= max_compact_size as i64 {
start_level_files_to_compact.push(file.clone());
target_level_files_to_compact
.extend(overlapped_files.into_iter().cloned().collect::<Vec<_>>());
total_size += size;
} else {
// Over limit, stop here
files_to_keep.push(file.clone());
break;
}
}
// Remove all files in target_level_files_to_compact from target_level_files
target_level_files.retain(|f| !target_level_files_to_compact.iter().any(|x| x == f));
// All files left in start_level_files and target_level_files are kept for next round
target_level_files.extend(start_level_files.into_iter().cloned().collect::<Vec<_>>());
files_to_keep.extend(target_level_files);
// All files in start_level_files_to_compact and target_level_files_to_compact will be compacted
let files_to_compact = start_level_files_to_compact
.into_iter()
.chain(target_level_files_to_compact.into_iter())
.collect::<Vec<_>>();
assert_eq!(files_to_compact.len() + files_to_keep.len(), len);
(files_to_compact, files_to_keep)
}
/// Return time range of the given file and the list of given files
fn time_range(file: &ParquetFile, files: &[ParquetFile]) -> (Timestamp, Timestamp) {
let mut min_time = file.min_time;
let mut max_time = file.max_time;
files.iter().for_each(|f| {
min_time = min_time.min(f.min_time);
max_time = max_time.max(f.max_time);
});
(min_time, max_time)
}
#[cfg(test)]
mod tests {
use compactor2_test_utils::{
create_l1_files, create_overlapped_files, create_overlapped_l0_l1_files_2,
create_overlapped_start_target_files, format_files, format_files_split,
};
use data_types::CompactionLevel;
use crate::components::split_or_compact::files_to_compact::limit_files_to_compact;
const MAX_SIZE: usize = 100;
#[test]
fn test_compact_empty() {
let files = vec![];
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::Initial);
assert!(files_to_compact.is_empty());
assert!(files_to_keep.is_empty());
}
#[test]
#[should_panic]
fn test_compact_wrong_target_level() {
// all L1 files
let files = create_l1_files(1);
// Target is L0 while all files are in L1 --> panic
let (_files_to_compact, _files_to_keep) =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::Initial);
}
#[test]
#[should_panic]
fn test_compact_files_three_level_files() {
// Three level files
let files = create_overlapped_files();
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.2[650,750] 1b |-L0.2-| "
- "L0.1[450,620] 1b |----L0.1-----| "
- "L0.3[800,900] 100b |-L0.3-| "
- "L1 "
- "L1.13[600,700] 100b |L1.13-| "
- "L1.12[400,500] 1b |L1.12-| "
- "L1.11[250,350] 1b |L1.11-| "
- "L2 "
- "L2.21[0,100] 1b |L2.21-| "
- "L2.22[200,300] 1b |L2.22-| "
"###
);
// panic because it only handle at most 2 levels next to each other
let (_files_to_compact, _files_to_keep) =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::FileNonOverlapped);
}
#[test]
fn test_compact_files_no_limit() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
// size limit > total size --> files to compact = all L0s and overalapped L1s
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 5 + 1, files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_keep.len(), 0);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L0, all files 100b "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
- "files to keep:"
"###
);
}
#[test]
fn test_compact_files_limit_too_small() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
// size limit too small to compact anything
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 0);
assert_eq!(files_to_keep.len(), 5);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "files to keep:"
- "L0, all files 100b "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
}
#[test]
fn test_compact_files_limit() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
// size limit < total size --> only enough to compact L0.1 with L1.12 and L1.13
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 3, files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 3);
assert_eq!(files_to_keep.len(), 2);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L0, all files 100b "
- "L0.1[450,620] |-------------------L0.1--------------------| "
- "L1, all files 100b "
- "L1.13[600,700] |---------L1.13----------| "
- "L1.12[400,500] |---------L1.12----------| "
- "files to keep:"
- "L0, all files 100b "
- "L0.2[650,750] |-------------L0.2-------------| "
- "L0.3[800,900] |-------------L0.3-------------|"
"###
);
}
#[test]
fn test_compact_files_limit_2() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
// size limit < total size --> only enough to compact L0.1, L0.2 with L1.12 and L1.13
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 4, files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 4);
assert_eq!(files_to_keep.len(), 1);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L0, all files 100b "
- "L0.1[450,620] |----------------L0.1----------------| "
- "L0.2[650,750] |--------L0.2--------| "
- "L1, all files 100b "
- "L1.13[600,700] |-------L1.13--------| "
- "L1.12[400,500] |-------L1.12--------| "
- "files to keep:"
- "L0, all files 100b "
- "L0.3[800,900] |-------------------------------------L0.3-------------------------------------|"
"###
);
}
#[test]
fn test_compact_files_limit_3() {
let files = create_overlapped_start_target_files(MAX_SIZE as i64, CompactionLevel::Initial);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[550,650] |----L0.2----| "
- "L0.1[150,250] |----L0.1----| "
- "L0.3[350,450] |----L0.3----| "
- "L1, all files 100b "
- "L1.12[300,400] |---L1.12----| "
- "L1.13[500,600] |---L1.13----| "
- "L1.11[100,200] |---L1.11----| "
"###
);
// There are only 3 choices for compacting:
// 1. Smallest set: L0.1 with L1.11
// 2. Medium size set: L0.1, L0.2 with L1.11, L1.12, L1.13
// 3. All files: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
// --------------------
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11
let (files_to_compact, files_to_keep) = limit_files_to_compact(
MAX_SIZE * 3,
files.clone(),
CompactionLevel::FileNonOverlapped,
);
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L0, all files 100b "
- "L0.1[150,250] |-----------------------L0.1------------------------| "
- "L1, all files 100b "
- "L1.11[100,200] |-----------------------L1.11-----------------------| "
- "files to keep:"
- "L0, all files 100b "
- "L0.2[550,650] |--------L0.2--------| "
- "L0.3[350,450] |--------L0.3--------| "
- "L1, all files 100b "
- "L1.12[300,400] |-------L1.12--------| "
- "L1.13[500,600] |-------L1.13--------| "
"###
);
// --------------------
// size limit = MAX_SIZE * 4 to force the first choice, L0.1 with L1.11, becasue it still not enough to for second choice
let (files_to_compact, files_to_keep) = limit_files_to_compact(
MAX_SIZE * 4,
files.clone(),
CompactionLevel::FileNonOverlapped,
);
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L0, all files 100b "
- "L0.1[150,250] |-----------------------L0.1------------------------| "
- "L1, all files 100b "
- "L1.11[100,200] |-----------------------L1.11-----------------------| "
- "files to keep:"
- "L0, all files 100b "
- "L0.2[550,650] |--------L0.2--------| "
- "L0.3[350,450] |--------L0.3--------| "
- "L1, all files 100b "
- "L1.12[300,400] |-------L1.12--------| "
- "L1.13[500,600] |-------L1.13--------| "
"###
);
// --------------------
// size limit = MAX_SIZE * 5 to force the second choice, L0.1, L0.2 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) = limit_files_to_compact(
MAX_SIZE * 5,
files.clone(),
CompactionLevel::FileNonOverlapped,
);
assert_eq!(files_to_compact.len(), 5);
assert_eq!(files_to_keep.len(), 1);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L0, all files 100b "
- "L0.1[150,250] |----L0.1----| "
- "L0.2[550,650] |----L0.2----| "
- "L1, all files 100b "
- "L1.11[100,200] |---L1.11----| "
- "L1.12[300,400] |---L1.12----| "
- "L1.13[500,600] |---L1.13----| "
- "files to keep:"
- "L0, all files 100b "
- "L0.3[350,450] |-------------------------------------L0.3-------------------------------------|"
"###
);
// --------------------
// size limit >= total size to force the third choice compacting everything: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 6, files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact.len(), 6);
assert_eq!(files_to_keep.len(), 0);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L0, all files 100b "
- "L0.1[150,250] |----L0.1----| "
- "L0.2[550,650] |----L0.2----| "
- "L0.3[350,450] |----L0.3----| "
- "L1, all files 100b "
- "L1.11[100,200] |---L1.11----| "
- "L1.12[300,400] |---L1.12----| "
- "L1.13[500,600] |---L1.13----| "
- "files to keep:"
"###
);
}
#[test]
fn test_compact_files_limit_start_level_1() {
let files = create_overlapped_start_target_files(
MAX_SIZE as i64,
CompactionLevel::FileNonOverlapped,
);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 100b "
- "L1.2[550,650] |----L1.2----| "
- "L1.1[150,250] |----L1.1----| "
- "L1.3[350,450] |----L1.3----| "
- "L2, all files 100b "
- "L2.12[300,400] |---L2.12----| "
- "L2.13[500,600] |---L2.13----| "
- "L2.11[100,200] |---L2.11----| "
"###
);
// There are only 3 choices for compacting:
// 1. Smallest set: L1.1 with L2.11
// 2. Medium size set: L1.1, L1.3 with L1.11, L1.12,
// 3. All files: L1.1, L1.2, L1.3 with L2.11, L2.12, L2.13
// --------------------
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 3, files.clone(), CompactionLevel::Final);
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L1, all files 100b "
- "L1.1[150,250] |-----------------------L1.1------------------------| "
- "L2, all files 100b "
- "L2.11[100,200] |-----------------------L2.11-----------------------| "
- "files to keep:"
- "L1, all files 100b "
- "L1.3[350,450] |--------L1.3--------| "
- "L1.2[550,650] |--------L1.2--------| "
- "L2, all files 100b "
- "L2.12[300,400] |-------L2.12--------| "
- "L2.13[500,600] |-------L2.13--------| "
"###
);
// --------------------
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11, becasue it still not enough to for second choice
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 3, files.clone(), CompactionLevel::Final);
assert_eq!(files_to_compact.len(), 2);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L1, all files 100b "
- "L1.1[150,250] |-----------------------L1.1------------------------| "
- "L2, all files 100b "
- "L2.11[100,200] |-----------------------L2.11-----------------------| "
- "files to keep:"
- "L1, all files 100b "
- "L1.3[350,450] |--------L1.3--------| "
- "L1.2[550,650] |--------L1.2--------| "
- "L2, all files 100b "
- "L2.12[300,400] |-------L2.12--------| "
- "L2.13[500,600] |-------L2.13--------| "
"###
);
// --------------------
// size limit = MAX_SIZE * 5 to force the second choice, L0.1, L0.2 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 5, files.clone(), CompactionLevel::Final);
assert_eq!(files_to_compact.len(), 4);
assert_eq!(files_to_keep.len(), 2);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L1, all files 100b "
- "L1.1[150,250] |--------L1.1--------| "
- "L1.3[350,450] |--------L1.3--------| "
- "L2, all files 100b "
- "L2.11[100,200] |-------L2.11--------| "
- "L2.12[300,400] |-------L2.12--------| "
- "files to keep:"
- "L1, all files 100b "
- "L1.2[550,650] |-----------------------L1.2------------------------| "
- "L2, all files 100b "
- "L2.13[500,600] |-----------------------L2.13-----------------------| "
"###
);
// --------------------
// size limit >= total size to force the third choice compacting everything: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
let (files_to_compact, files_to_keep) =
limit_files_to_compact(MAX_SIZE * 6, files, CompactionLevel::Final);
assert_eq!(files_to_compact.len(), 6);
assert_eq!(files_to_keep.len(), 0);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
@r###"
---
- "files to compact:"
- "L1, all files 100b "
- "L1.1[150,250] |----L1.1----| "
- "L1.3[350,450] |----L1.3----| "
- "L1.2[550,650] |----L1.2----| "
- "L2, all files 100b "
- "L2.11[100,200] |---L2.11----| "
- "L2.12[300,400] |---L2.12----| "
- "L2.13[500,600] |---L2.13----| "
- "files to keep:"
"###
);
}
}

View File

@ -0,0 +1,234 @@
use std::collections::VecDeque;
use data_types::{CompactionLevel, ParquetFile};
use itertools::Itertools;
use observability_deps::tracing::debug;
use crate::{
components::files_split::{target_level_split::TargetLevelSplit, FilesSplit},
file_classification::FileToSplit,
};
/// Return (`[files_to_split]`, `[files_not_to_split]`) of given files
/// such that `files_to_split` are files in start-level that overlaps with more than one file in target_level.
///
/// Only split files in start-level if the total size greater than max_compact_size
///
/// Example:
/// . Input:
/// |---L0.1---| |--L0.2--|
/// |--L1.1--| |--L1.2--| |--L1.3--|
///
/// L0.1 overlaps with 2 level-1 files (L1.2, L1.3) and should be split into 2 files, one overlaps with L1.2
/// and one oerlaps with L1.3
///
/// . Output:
/// . files_to_split = [L0.1]
/// . files_not_to_split = [L1.1, L1.2, L1.3, L0.2] which is the rest of the files
///
/// Since a start-level file needs to compact with all of its overlapped target-level files to retain the invariant that
/// all files in target level are non-overlapped, splitting start-level files is to reduce the number of overlapped files
/// at the target level and avoid compacting too many files in the next compaction cycle.
/// To achieve this goal, a start-level file should be split to overlap with at most one target-level file. This enables the
/// minimum set of compacting files to 2 files: a start-level file and an overlapped target-level file.
pub fn identify_files_to_split(
files: Vec<data_types::ParquetFile>,
target_level: CompactionLevel,
) -> (Vec<FileToSplit>, Vec<ParquetFile>) {
// panic if not all files are either in target level or start level
let start_level = target_level.prev();
assert!(files
.iter()
.all(|f| f.compaction_level == target_level || f.compaction_level == start_level));
// Get start-level and target-level files
let len = files.len();
let split = TargetLevelSplit::new();
let (start_level_files, mut target_level_files) = split.apply(files, start_level);
// sort start_level files in their max_l0_created_at and convert it to VecDeque for pop_front
let mut start_level_files: VecDeque<ParquetFile> = start_level_files
.into_iter()
.sorted_by_key(|f| f.max_l0_created_at)
.collect();
// sort target level files in their min_time
target_level_files.sort_by_key(|f| f.min_time);
// Get files in start level that overlap with any file in target level
let mut files_to_split = Vec::new();
let mut files_not_to_split = Vec::new();
while let Some(file) = start_level_files.pop_front() {
// Get target_level files that overlaps with this file
let overlapped_target_level_files: Vec<&ParquetFile> = target_level_files
.iter()
.filter(|f| file.overlaps(f))
.collect();
// Neither split file that overlaps with only one file in target level
// nor has a single timestamp (splitting this will lead to the same file and as a result infinite loop)
// nor has time range = 1 (splitting this will cause panic because split_time will be min_tim/max_time which is disallowed)
if overlapped_target_level_files.len() < 2
|| file.min_time == file.max_time
|| file.min_time == file.max_time - 1
{
files_not_to_split.push(file);
} else {
debug!(?file.min_time, ?file.max_time, ?file.compaction_level, "time range of file to split");
overlapped_target_level_files
.iter()
.for_each(|f| debug!(?f.min_time, ?f.max_time, ?f.compaction_level, "time range of overlap file"));
// this files will be split, add its max time
let split_times: Vec<i64> = overlapped_target_level_files
.iter()
.filter(|f| f.max_time < file.max_time)
.map(|f| f.max_time.get())
.dedup()
.collect();
debug!(?split_times);
files_to_split.push(FileToSplit { file, split_times });
}
}
// keep the rest of the files for next round
start_level_files.extend(target_level_files);
files_not_to_split.extend(start_level_files);
assert_eq!(files_to_split.len() + files_not_to_split.len(), len);
(files_to_split, files_not_to_split)
}
#[cfg(test)]
mod tests {
use compactor2_test_utils::{
create_l1_files, create_overlapped_files, create_overlapped_l0_l1_files_2, format_files,
format_files_split,
};
use data_types::CompactionLevel;
#[test]
fn test_split_empty() {
let files = vec![];
let (files_to_split, files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::Initial);
assert!(files_to_split.is_empty());
assert!(files_not_to_split.is_empty());
}
#[test]
#[should_panic]
fn test_split_files_wrong_target_level() {
// all L1 files
let files = create_l1_files(1);
// Target is L0 while all files are in L1 --> panic
let (_files_to_split, _files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::Initial);
}
#[test]
#[should_panic]
fn test_split_files_three_level_files() {
// Three level files
let files = create_overlapped_files();
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0 "
- "L0.2[650,750] 1b |-L0.2-| "
- "L0.1[450,620] 1b |----L0.1-----| "
- "L0.3[800,900] 100b |-L0.3-| "
- "L1 "
- "L1.13[600,700] 100b |L1.13-| "
- "L1.12[400,500] 1b |L1.12-| "
- "L1.11[250,350] 1b |L1.11-| "
- "L2 "
- "L2.21[0,100] 1b |L2.21-| "
- "L2.22[200,300] 1b |L2.22-| "
"###
);
// panic because it only handle at most 2 levels next to each other
let (_files_to_split, _files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
}
#[test]
fn test_split_files_no_split() {
let files = create_l1_files(1);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 1b "
- "L1.13[600,700] |-----L1.13-----| "
- "L1.12[400,500] |-----L1.12-----| "
- "L1.11[250,350] |-----L1.11-----| "
"###
);
let (files_to_split, files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
assert!(files_to_split.is_empty());
assert_eq!(files_not_to_split.len(), 3);
}
#[test]
fn test_split_files_split() {
let files = create_overlapped_l0_l1_files_2(1);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 1b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 1b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
let (files_to_split, files_not_to_split) =
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
// L0.1 that overlaps with 2 level-1 files will be split
assert_eq!(files_to_split.len(), 1);
// L0.1 [450, 620] will be split at 500 (max of its overlapped L1.12)
// The spit_times [500] means after we execute the split (in later steps), L0.1 will
// be split into 2 files with time ranges: [450, 500] and [501, 620]. This means the first file will
// overlap with L1.12 and the second file will overlap with L1.13
assert_eq!(files_to_split[0].file.id.get(), 1);
assert_eq!(files_to_split[0].split_times, vec![500]);
// The rest is in not-split
assert_eq!(files_not_to_split.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to split:", &files_to_split.iter().map(|f| f.file.clone()).collect::<Vec<_>>(), "files not to split:", &files_not_to_split),
@r###"
---
- "files to split:"
- "L0, all files 1b "
- "L0.1[450,620] |-------------------------------------L0.1-------------------------------------|"
- "files not to split:"
- "L0, all files 1b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 1b "
- "L1.12[400,500] |----L1.12-----| "
- "L1.13[600,700] |----L1.13-----| "
"###
);
}
}

View File

@ -0,0 +1,60 @@
use std::fmt::Display;
use data_types::{CompactionLevel, ParquetFile};
use observability_deps::tracing::info;
use crate::{file_classification::FilesToCompactOrSplit, partition_info::PartitionInfo};
use super::SplitOrCompact;
#[derive(Debug)]
pub struct LoggingSplitOrCompactWrapper<T>
where
T: SplitOrCompact,
{
inner: T,
}
impl<T> LoggingSplitOrCompactWrapper<T>
where
T: SplitOrCompact,
{
pub fn new(inner: T) -> Self {
Self { inner }
}
}
impl<T> Display for LoggingSplitOrCompactWrapper<T>
where
T: SplitOrCompact,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "display({})", self.inner)
}
}
impl<T> SplitOrCompact for LoggingSplitOrCompactWrapper<T>
where
T: SplitOrCompact,
{
fn apply(
&self,
partition_info: &PartitionInfo,
files: Vec<ParquetFile>,
target_level: CompactionLevel,
) -> (FilesToCompactOrSplit, Vec<ParquetFile>) {
let (files_to_split_or_compact, files_to_keep) =
self.inner.apply(partition_info, files, target_level);
info!(
partition_id = partition_info.partition_id.get(),
target_level = %target_level,
files_to_compact = files_to_split_or_compact.files_to_compact_len(),
files_to_split = files_to_split_or_compact.files_to_split_len(),
files_to_keep = files_to_keep.len(),
"split or compact"
);
(files_to_split_or_compact, files_to_keep)
}
}

View File

@ -0,0 +1,22 @@
use std::fmt::{Debug, Display};
use data_types::{CompactionLevel, ParquetFile};
use crate::{file_classification::FilesToCompactOrSplit, PartitionInfo};
pub mod files_to_compact;
pub mod files_to_split;
pub mod logging;
pub mod split_compact;
pub trait SplitOrCompact: Debug + Display + Send + Sync {
/// Return (`[files_to_split_or_compact]`, `[files_to_keep]`) of given files
/// `files_to_keep` are files that are not part of the compaction of this round but they
/// are kept to get compacted in the next round
fn apply(
&self,
partition_info: &PartitionInfo,
files: Vec<ParquetFile>,
target_level: CompactionLevel,
) -> (FilesToCompactOrSplit, Vec<ParquetFile>);
}

View File

@ -0,0 +1,258 @@
use std::fmt::Display;
use data_types::{CompactionLevel, ParquetFile};
use crate::{file_classification::FilesToCompactOrSplit, partition_info::PartitionInfo};
use super::{
files_to_compact::limit_files_to_compact, files_to_split::identify_files_to_split,
SplitOrCompact,
};
#[derive(Debug)]
pub struct SplitCompact {
max_compact_size: usize,
}
impl SplitCompact {
pub fn new(max_compact_size: usize) -> Self {
Self { max_compact_size }
}
}
impl Display for SplitCompact {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "split_or_compact({})", self.max_compact_size)
}
}
impl SplitOrCompact for SplitCompact {
/// Return (`[files_to_split_or_compact]`, `[files_to_keep]`) of given files
///
/// Verify if the the give files are over the max_compact_size limit
/// If so, find start-level files that can be split to reduce the number of overlapped files that must be compact in one run.
/// If split is not needed, pick files to compact that under max_compact_size limit
fn apply(
&self,
_partition_info: &PartitionInfo,
files: Vec<ParquetFile>,
target_level: CompactionLevel,
) -> (FilesToCompactOrSplit, Vec<ParquetFile>) {
// Compact all in one run if total size is less than max_compact_size
let total_size: i64 = files.iter().map(|f| f.file_size_bytes).sum();
if total_size as usize <= self.max_compact_size {
return (FilesToCompactOrSplit::FilesToCompact(files), vec![]);
}
// See if split is needed
let (files_to_split, files_not_to_split) = identify_files_to_split(files, target_level);
if !files_to_split.is_empty() {
// These files must be split before further compaction
(
FilesToCompactOrSplit::FilesToSplit(files_to_split),
files_not_to_split,
)
} else {
// No split is needed, need to limit number of files to compact to stay under total size limit
let (files_to_compact, files_to_keep) =
limit_files_to_compact(self.max_compact_size, files_not_to_split, target_level);
(
FilesToCompactOrSplit::FilesToCompact(files_to_compact),
files_to_keep,
)
}
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use compactor2_test_utils::{
create_overlapped_l0_l1_files_2, create_overlapped_l1_l2_files_2, format_files,
format_files_split,
};
use data_types::CompactionLevel;
use crate::{
components::split_or_compact::{split_compact::SplitCompact, SplitOrCompact},
test_utils::PartitionInfoBuilder,
};
const MAX_SIZE: usize = 100;
#[test]
fn test_empty() {
let files = vec![];
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Initial);
assert!(files_to_compact_or_split.is_empty());
assert!(files_to_keep.is_empty());
}
#[test]
fn test_compact_too_small_to_compact() {
let files = create_overlapped_l1_l2_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----|"
- "L1.12[400,500] |----L1.12-----| "
- "L1.11[250,350] |----L1.11-----| "
- "L2, all files 100b "
- "L2.22[200,300] |----L2.22-----| "
"###
);
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Final);
// nothing to compact or split
assert!(files_to_compact_or_split.is_empty());
assert_eq!(files_to_keep.len(), 4);
}
#[test]
fn test_compact_files_no_limit() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
// size limit > total size --> compact all
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE * 6 + 1);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact_or_split.files_to_compact_len(), 5);
assert!(files_to_keep.is_empty());
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact", &files_to_compact_or_split.files_to_compact() , "files to keep:", &files_to_keep),
@r###"
---
- files to compact
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
- "files to keep:"
"###
);
}
#[test]
fn test_split_files() {
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.1[450,620] |----------L0.1-----------| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----| "
- "L1.12[400,500] |----L1.12-----| "
"###
);
// hit size limit -> split start_level files that overlap with more than 1 target_level files
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::FileNonOverlapped);
assert_eq!(files_to_compact_or_split.files_to_split_len(), 1);
assert_eq!(files_to_keep.len(), 4);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact or split:", &files_to_compact_or_split.files_to_split(), "files to keep:", &files_to_keep),
@r###"
---
- "files to compact or split:"
- "L0, all files 100b "
- "L0.1[450,620] |-------------------------------------L0.1-------------------------------------|"
- "files to keep:"
- "L0, all files 100b "
- "L0.2[650,750] |-----L0.2-----| "
- "L0.3[800,900] |-----L0.3-----|"
- "L1, all files 100b "
- "L1.12[400,500] |----L1.12-----| "
- "L1.13[600,700] |----L1.13-----| "
"###
);
}
#[test]
fn test_compact_files() {
let files = create_overlapped_l1_l2_files_2(MAX_SIZE as i64);
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1, all files 100b "
- "L1.13[600,700] |----L1.13-----|"
- "L1.12[400,500] |----L1.12-----| "
- "L1.11[250,350] |----L1.11-----| "
- "L2, all files 100b "
- "L2.22[200,300] |----L2.22-----| "
"###
);
// hit size limit and nthign to split --> limit number if files to compact
let p_info = Arc::new(PartitionInfoBuilder::new().build());
let split_compact = SplitCompact::new(MAX_SIZE * 3);
let (files_to_compact_or_split, files_to_keep) =
split_compact.apply(&p_info, files, CompactionLevel::Final);
assert_eq!(files_to_compact_or_split.files_to_compact_len(), 3);
assert_eq!(files_to_keep.len(), 1);
// See layout of 2 set of files
insta::assert_yaml_snapshot!(
format_files_split("files to compact or split:", &files_to_compact_or_split.files_to_compact() , "files to keep:", &files_to_keep),
@r###"
---
- "files to compact or split:"
- "L1, all files 100b "
- "L1.11[250,350] |---------L1.11----------| "
- "L1.12[400,500] |---------L1.12----------| "
- "L2, all files 100b "
- "L2.22[200,300] |---------L2.22----------| "
- "files to keep:"
- "L1, all files 100b "
- "L1.13[600,700] |------------------------------------L1.13-------------------------------------|"
"###
);
}
}

View File

@ -9,7 +9,9 @@ use tracker::InstrumentedAsyncSemaphore;
use crate::{
components::{scratchpad::Scratchpad, Components},
error::DynError,
file_classification::{FileToSplit, FilesToCompactOrSplit},
partition_info::PartitionInfo,
PlanIR,
};
/// Tries to compact all eligible partitions, up to
@ -116,18 +118,21 @@ async fn compact_partition(
///
/// The high level flow is:
///
/// . Mutiple rounds, each round process mutltiple branches. Each branch inlcudes at most 200 files
/// . Each branch will compact files lowest level (aka start-level) into its next level (aka target-level):
/// - Many L0s into fewer and larger L0s. Start-level = target-level = 0
/// - Many L1s into fewer and larger L1s. Start-level = target-level = 1
/// - (L0s & L1s) to L1s if there are L0s. Start-level = 0, target-level = 1
/// - (L1s & L2s) to L2s if no L0s. Start-level = 1, target-level = 2
/// . Mutiple rounds, each round process mutltiple branches. Each branch includes at most 200 files
/// . Each branch will compact files lowest level (aka start-level) into its next level (aka target-level), either:
/// - Compact many L0s into fewer and larger L0s. Start-level = target-level = 0
/// - Compact many L1s into fewer and larger L1s. Start-level = target-level = 1
/// - Compact (L0s & L1s) to L1s if there are L0s. Start-level = 0, target-level = 1
/// - Compact (L1s & L2s) to L2s if no L0s. Start-level = 1, target-level = 2
/// - Split L0s each of which overlaps with more than 1 L1s into many L0s, each overlaps with at most one L1 files
/// - Split L1s each of which overlaps with more than 1 L2s into many L1s, each overlaps with at most one L2 files
/// . Each branch does find non-overlaps and upgragde files to avoid unecessary recompacting.
/// The actually split files:
/// 1. files_to _keep: do not compact these files because they are already higher than target level
/// 2. files_to_upgrade: upgrade this initial-level files to target level because they are not overlap with
/// any target-level and initial-level files and large enough (> desired max size)
/// 3. files_to_compact: the rest of the files that must be compacted
/// 3. files_to_compact_or_split.: this is either files to compact or split and will be compacted or split accordingly
///
/// Example: 4 files: two L0s, two L1s and one L2
/// Input:
@ -202,24 +207,20 @@ async fn try_compact_partition(
.file_classifier
.classify(&partition_info, &round_info, branch);
// Cannot run this plan and skip this partition because of over limit of input num_files or size.
// The partition_resource_limit_filter will throw an error if one of the limits hit and will lead
// to the partition is added to the `skipped_compactions` catalog table for us to not bother
// compacting it again.
// TODO: After https://github.com/influxdata/idpe/issues/17090 is iplemented (aka V3), we will
// split files to smaller branches and aslo compact L0s into fewer L0s to deal with all kinds
// of conidtions even with limited resource. Then we will remove this resrouce limit check.
if !components
.partition_resource_limit_filter
.apply(&partition_info, &file_classification.files_to_compact)
.apply(
&partition_info,
&file_classification.files_to_compact_or_split.files(),
)
.await?
{
return Ok(());
}
// Compact
let created_file_params = run_compaction_plan(
&file_classification.files_to_compact,
let created_file_params = run_plans(
&file_classification.files_to_compact_or_split,
&partition_info,
&components,
file_classification.target_level,
@ -237,10 +238,11 @@ async fn try_compact_partition(
// Update the catalog to reflect the newly created files, soft delete the compacted files and
// update the upgraded files
let files_to_delete = file_classification.files_to_compact_or_split.files();
let (created_files, upgraded_files) = update_catalog(
Arc::clone(&components),
partition_id,
file_classification.files_to_compact,
files_to_delete,
file_classification.files_to_upgrade,
created_file_params,
file_classification.target_level,
@ -257,6 +259,41 @@ async fn try_compact_partition(
}
}
/// Compact of split give files
async fn run_plans(
files: &FilesToCompactOrSplit,
partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>,
target_level: CompactionLevel,
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
scratchpad_ctx: &mut dyn Scratchpad,
) -> Result<Vec<ParquetFileParams>, DynError> {
match files {
FilesToCompactOrSplit::FilesToCompact(files) => {
run_compaction_plan(
files,
partition_info,
components,
target_level,
job_semaphore,
scratchpad_ctx,
)
.await
}
FilesToCompactOrSplit::FilesToSplit(files) => {
run_split_plans(
files,
partition_info,
components,
target_level,
job_semaphore,
scratchpad_ctx,
)
.await
}
}
}
/// Compact `files` into a new parquet file of the the given target_level
async fn run_compaction_plan(
files: &[ParquetFile],
@ -285,8 +322,89 @@ async fn run_compaction_plan(
let plan_ir =
components
.ir_planner
.plan(branch_inpad, Arc::clone(partition_info), target_level);
.compact_plan(branch_inpad, Arc::clone(partition_info), target_level);
execute_plan(
plan_ir,
partition_info,
components,
target_level,
job_semaphore,
)
.await
}
/// Split each of given files into multiple files
async fn run_split_plans(
files_to_split: &[FileToSplit],
partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>,
target_level: CompactionLevel,
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
scratchpad_ctx: &mut dyn Scratchpad,
) -> Result<Vec<ParquetFileParams>, DynError> {
if files_to_split.is_empty() {
return Ok(vec![]);
}
let mut created_file_params = vec![];
for file_to_split in files_to_split {
let x = run_split_plan(
file_to_split,
partition_info,
components,
target_level,
Arc::clone(&job_semaphore),
scratchpad_ctx,
)
.await?;
created_file_params.extend(x);
}
Ok(created_file_params)
}
// Split a given file into multiple files
async fn run_split_plan(
file_to_split: &FileToSplit,
partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>,
target_level: CompactionLevel,
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
scratchpad_ctx: &mut dyn Scratchpad,
) -> Result<Vec<ParquetFileParams>, DynError> {
// stage files
let input_path = (&file_to_split.file).into();
let input_uuids_inpad = scratchpad_ctx.load_to_scratchpad(&[input_path]).await;
let file_inpad = ParquetFile {
object_store_id: input_uuids_inpad[0],
..file_to_split.file.clone()
};
let plan_ir = components.ir_planner.split_plan(
file_inpad,
file_to_split.split_times.clone(),
Arc::clone(partition_info),
target_level,
);
execute_plan(
plan_ir,
partition_info,
components,
target_level,
job_semaphore,
)
.await
}
async fn execute_plan(
plan_ir: PlanIR,
partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>,
target_level: CompactionLevel,
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
) -> Result<Vec<ParquetFileParams>, DynError> {
let create = {
// draw semaphore BEFORE creating the DataFusion plan and drop it directly AFTER finishing the
// DataFusion computation (but BEFORE doing any additional external IO).

View File

@ -12,9 +12,8 @@ pub struct FileClassification {
/// The target level of file resulting from compaction
pub target_level: CompactionLevel,
/// Files which should be compacted into a new single parquet
/// file, often the small and/or overlapped files
pub files_to_compact: Vec<ParquetFile>,
/// Decision on what files should be compacted or split. See [`FilesToCompactOrSplit`] for more details.
pub files_to_compact_or_split: FilesToCompactOrSplit,
/// Non-overlapped files that should be upgraded to the target
/// level without rewriting (for example they are of sufficient
@ -25,3 +24,99 @@ pub struct FileClassification {
/// non-overlapped or higher-target-level files
pub files_to_keep: Vec<ParquetFile>,
}
impl FileClassification {
pub fn files_to_compact_len(&self) -> usize {
match &self.files_to_compact_or_split {
FilesToCompactOrSplit::FilesToCompact(files) => files.len(),
FilesToCompactOrSplit::FilesToSplit(_) => 0,
}
}
pub fn files_to_split_len(&self) -> usize {
match &self.files_to_compact_or_split {
FilesToCompactOrSplit::FilesToCompact(_files) => 0,
FilesToCompactOrSplit::FilesToSplit(files) => files.len(),
}
}
}
/// Files to compact or to split
#[derive(Debug, PartialEq, Eq)]
pub enum FilesToCompactOrSplit {
/// These files should be compacted together, ideally forming a single output file.
/// Due to constraints such as the maximum desired output file size and the "leading edge" optimization
/// `FilesToCompact` may actually produce multiple output files.
FilesToCompact(Vec<ParquetFile>),
/// The input files should be split into multiple output files, at the specified times
FilesToSplit(Vec<FileToSplit>),
}
impl FilesToCompactOrSplit {
// Return true if thelist is empty
pub fn is_empty(&self) -> bool {
match self {
Self::FilesToCompact(files) => files.is_empty(),
Self::FilesToSplit(files) => files.is_empty(),
}
}
/// Return lentgh of files to compact
pub fn files_to_compact_len(&self) -> usize {
match self {
Self::FilesToCompact(files) => files.len(),
Self::FilesToSplit(_) => 0,
}
}
/// Return lentgh of files to split
pub fn files_to_split_len(&self) -> usize {
match self {
Self::FilesToCompact(_) => 0,
Self::FilesToSplit(files) => files.len(),
}
}
/// Return files to compact
pub fn files_to_compact(&self) -> Vec<ParquetFile> {
match self {
Self::FilesToCompact(files) => files.clone(),
Self::FilesToSplit(_) => vec![],
}
}
/// Return files to split
pub fn files_to_split(&self) -> Vec<ParquetFile> {
match self {
Self::FilesToCompact(_) => vec![],
Self::FilesToSplit(files) => {
let files: Vec<ParquetFile> = files.iter().map(|f| f.file.clone()).collect();
files
}
}
}
/// Return files of either type
pub fn files(&self) -> Vec<ParquetFile> {
match self {
Self::FilesToCompact(files) => files.clone(),
Self::FilesToSplit(files) => files.iter().map(|f| f.file.clone()).collect(),
}
}
// Returns target level of the files which the compaction level of spit files if any
// or the given target level
pub fn target_level(&self, target_level: CompactionLevel) -> CompactionLevel {
match self {
Self::FilesToCompact(_) => target_level,
Self::FilesToSplit(files) => files[0].file.compaction_level,
}
}
}
/// File to split and their split times
#[derive(Debug, PartialEq, Eq)]
pub struct FileToSplit {
pub file: ParquetFile,
pub split_times: Vec<i64>,
}

View File

@ -19,6 +19,8 @@ pub enum RoundInfo {
start_level: CompactionLevel,
/// max number of files to group in each plan
max_num_files_to_group: usize,
/// max total size limit of files to group in each plan
max_total_file_size_to_group: usize,
},
}
@ -29,7 +31,8 @@ impl Display for RoundInfo {
Self::ManySmallFiles {
start_level,
max_num_files_to_group,
} => write!(f, "ManySmallFiles: {start_level}, {max_num_files_to_group}",),
max_total_file_size_to_group,
} => write!(f, "ManySmallFiles: {start_level}, {max_num_files_to_group}, {max_total_file_size_to_group}",),
}
}
}

View File

@ -75,57 +75,6 @@ async fn test_num_files_over_limit() {
);
}
#[tokio::test]
async fn test_total_file_size_over_limit() {
test_helpers::maybe_start_logging();
// Create a test setup with 6 files
let setup = TestSetup::builder()
.await
.with_files()
.await
// Set max size < the input file size --> it won't get compacted
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
.build()
.await;
// verify 6 files
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 6);
// verify ID and compaction level of the files
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
setup.run_compact().await;
// read files and verify they are not compacted
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 6);
// verify ID and compaction level of the files
assert_levels(
&files,
vec![
(1, CompactionLevel::FileNonOverlapped),
(2, CompactionLevel::Initial),
(3, CompactionLevel::Initial),
(4, CompactionLevel::FileNonOverlapped),
(5, CompactionLevel::Initial),
(6, CompactionLevel::Initial),
],
);
}
#[tokio::test]
async fn test_compact_target_level() {
test_helpers::maybe_start_logging();
@ -249,7 +198,7 @@ async fn test_compact_large_overlapes() {
.await
// the test setup does not exceed number of files limit
.with_max_num_files_per_plan(10)
// the test setup exceed max compact size limit
// the test setup to have total file size exceed max compact size limit
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
.with_min_num_l1_files_to_compact(2)
.with_max_desired_file_size_bytes(100 * 1024 * 1024)
@ -264,51 +213,82 @@ async fn test_compact_large_overlapes() {
---
- initial
- "L1 "
- "L1.4[0,68000] 2.66kb|-----------------L1.4-----------------| "
- "L1.4[6000,68000] 2.66kb|----------------L1.4----------------| "
- "L1.5[136000,136000] 2.17kb |L1.5|"
- "L2 "
- "L2.1[8000,12000] 1.8kb |L2.1| "
- "L2.2[20000,30000] 2.61kb |L2.2| "
- "L2.3[35000,36000] 2.17kb |L2.3| "
- "L2.1[8000,12000] 1.8kb |L2.1| "
- "L2.2[20000,30000] 2.61kb |L2.2| "
- "L2.3[36000,36000] 2.17kb |L2.3| "
"###
);
// compact
setup.run_compact().await;
// Due to size limit, the compaction skip this partition and 5 files still in the system
// After PR https://github.com/influxdata/influxdb_iox/pull/7079 is in, this test will fail here
// ad the right result shoudl be similar to the commented out below
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 5);
let mut files = setup.list_by_table_not_to_delete().await;
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L2 "
- "L2.3[36000,36000] 2.17kb |L2.3| "
- "L2.9[6000,30000] 2.68kb|----L2.9----| "
- "L2.10[68000,136000] 2.62kb |-----------------L2.10-----------------| "
"###
);
// todo: use insta::assert_yaml_snapshot!( to verify the output layput
assert_eq!(files.len(), 3);
// // verify the content of files
// // Compacted smaller file with the later data
// let mut files = setup.list_by_table_not_to_delete().await;
// let file1 = files.pop().unwrap();
// let batches = setup.read_parquet_file(file1).await;
// assert_batches_sorted_eq!(
// &[
// "+-----------+------+------+------+-----------------------------+",
// "| field_int | tag1 | tag2 | tag3 | time |",
// "+-----------+------+------+------+-----------------------------+",
// "| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
// "| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
// "| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
// "| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
// "| 21 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
// "| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
// "| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
// "| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
// "| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
// "+-----------+------+------+------+-----------------------------+",
// ],
// &batches
// );
// order files on their min_time
files.sort_by_key(|f| f.min_time);
let file = files[0].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
let file = files[1].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+-----------------------------+",
"| field_int | tag2 | tag3 | time |",
"+-----------+------+------+-----------------------------+",
"| 21 | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"+-----------+------+------+-----------------------------+",
],
&batches
);
let file = files[2].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
}
#[tokio::test]
@ -341,51 +321,83 @@ async fn test_compact_large_overlape_2() {
---
- initial
- "L1 "
- "L1.4[8000,25000] 1.8kb|--L1.4--| "
- "L1.5[28000,136000] 2.64kb |------------------------------L1.5-------------------------------| "
- "L1.4[6000,25000] 1.8kb|--L1.4---| "
- "L1.5[28000,136000] 2.64kb |------------------------------L1.5------------------------------| "
- "L2 "
- "L2.1[8000,12000] 1.8kb|L2.1| "
- "L2.2[20000,30000] 2.61kb |L2.2| "
- "L2.3[35000,36000] 2.17kb |L2.3| "
- "L2.1[8000,12000] 1.8kb |L2.1| "
- "L2.2[20000,30000] 2.61kb |L2.2| "
- "L2.3[36000,36000] 2.17kb |L2.3| "
"###
);
// compact
setup.run_compact().await;
// Due to size limit, the compaction skip this partition and 5 files still in the system
// After PR https://github.com/influxdata/influxdb_iox/pull/7079 is in, this test will fail here
// ad the right result shoudl be similar to the commented out below
let files = setup.list_by_table_not_to_delete().await;
assert_eq!(files.len(), 5);
let mut files = setup.list_by_table_not_to_delete().await;
insta::assert_yaml_snapshot!(
format_files("initial", &files),
@r###"
---
- initial
- "L1 "
- "L1.9[68000,136000] 2.62kb |-----------------L1.9------------------| "
- "L2 "
- "L2.3[36000,36000] 2.17kb |L2.3| "
- "L2.10[6000,30000] 2.68kb|---L2.10----| "
"###
);
// todo: use insta::assert_yaml_snapshot!( to verify the output layput
assert_eq!(files.len(), 3);
// // verify the content of files
// // Compacted smaller file with the later data
// let mut files = setup.list_by_table_not_to_delete().await;
// let file1 = files.pop().unwrap();
// let batches = setup.read_parquet_file(file1).await;
// assert_batches_sorted_eq!(
// &[
// "+-----------+------+------+------+-----------------------------+",
// "| field_int | tag1 | tag2 | tag3 | time |",
// "+-----------+------+------+------+-----------------------------+",
// "| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
// "| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
// "| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
// "| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
// "| 21 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
// "| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
// "| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
// "| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
// "| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
// "+-----------+------+------+------+-----------------------------+",
// ],
// &batches
// );
// order files on their min_time
files.sort_by_key(|f| f.min_time);
let file = files[0].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
let file = files[1].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+-----------------------------+",
"| field_int | tag2 | tag3 | time |",
"+-----------+------+------+-----------------------------+",
"| 21 | OH | 21 | 1970-01-01T00:00:00.000036Z |",
"+-----------+------+------+-----------------------------+",
],
&batches
);
let file = files[2].clone();
let batches = setup.read_parquet_file(file).await;
assert_batches_sorted_eq!(
&[
"+-----------+------+------+------+-----------------------------+",
"| field_int | tag1 | tag2 | tag3 | time |",
"+-----------+------+------+------+-----------------------------+",
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
"+-----------+------+------+------+-----------------------------+",
],
&batches
);
}
#[tokio::test]

View File

@ -2,7 +2,10 @@
//!
//! See [crate::layout] module for detailed documentation
use std::sync::Arc;
use data_types::CompactionLevel;
use iox_time::{MockProvider, Time, TimeProvider};
use crate::layouts::{
all_overlapping_l0_files, layout_setup_builder, parquet_builder, run_layout_scenario, ONE_MB,
@ -72,39 +75,39 @@ async fn all_overlapping_l0() {
---
- "**** Input Files "
- "L0, all files 9mb "
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,200] |-------------------------------------L0.2-------------------------------------|"
- "L0.3[100,200] |-------------------------------------L0.3-------------------------------------|"
- "L0.4[100,200] |-------------------------------------L0.4-------------------------------------|"
- "L0.5[100,200] |-------------------------------------L0.5-------------------------------------|"
- "L0.6[100,200] |-------------------------------------L0.6-------------------------------------|"
- "L0.7[100,200] |-------------------------------------L0.7-------------------------------------|"
- "L0.8[100,200] |-------------------------------------L0.8-------------------------------------|"
- "L0.9[100,200] |-------------------------------------L0.9-------------------------------------|"
- "L0.10[100,200] |------------------------------------L0.10-------------------------------------|"
- "**** Simulation run 0, type=split(split_times=[180]). 10 Input Files, 90mb total:"
- "L0.1[100,200000] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,200000] |-------------------------------------L0.2-------------------------------------|"
- "L0.3[100,200000] |-------------------------------------L0.3-------------------------------------|"
- "L0.4[100,200000] |-------------------------------------L0.4-------------------------------------|"
- "L0.5[100,200000] |-------------------------------------L0.5-------------------------------------|"
- "L0.6[100,200000] |-------------------------------------L0.6-------------------------------------|"
- "L0.7[100,200000] |-------------------------------------L0.7-------------------------------------|"
- "L0.8[100,200000] |-------------------------------------L0.8-------------------------------------|"
- "L0.9[100,200000] |-------------------------------------L0.9-------------------------------------|"
- "L0.10[100,200000] |------------------------------------L0.10-------------------------------------|"
- "**** Simulation run 0, type=split(split_times=[160020]). 10 Input Files, 90mb total:"
- "L0, all files 9mb "
- "L0.10[100,200] |------------------------------------L0.10-------------------------------------|"
- "L0.9[100,200] |-------------------------------------L0.9-------------------------------------|"
- "L0.8[100,200] |-------------------------------------L0.8-------------------------------------|"
- "L0.7[100,200] |-------------------------------------L0.7-------------------------------------|"
- "L0.6[100,200] |-------------------------------------L0.6-------------------------------------|"
- "L0.5[100,200] |-------------------------------------L0.5-------------------------------------|"
- "L0.4[100,200] |-------------------------------------L0.4-------------------------------------|"
- "L0.3[100,200] |-------------------------------------L0.3-------------------------------------|"
- "L0.2[100,200] |-------------------------------------L0.2-------------------------------------|"
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 89.1mb total:"
- "L0.10[100,200000] |------------------------------------L0.10-------------------------------------|"
- "L0.9[100,200000] |-------------------------------------L0.9-------------------------------------|"
- "L0.8[100,200000] |-------------------------------------L0.8-------------------------------------|"
- "L0.7[100,200000] |-------------------------------------L0.7-------------------------------------|"
- "L0.6[100,200000] |-------------------------------------L0.6-------------------------------------|"
- "L0.5[100,200000] |-------------------------------------L0.5-------------------------------------|"
- "L0.4[100,200000] |-------------------------------------L0.4-------------------------------------|"
- "L0.3[100,200000] |-------------------------------------L0.3-------------------------------------|"
- "L0.2[100,200000] |-------------------------------------L0.2-------------------------------------|"
- "L0.1[100,200000] |-------------------------------------L0.1-------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 90mb total:"
- "L1 "
- "L1.?[100,180] 72mb |-----------------------------L1.?-----------------------------| "
- "L1.?[181,200] 17.1mb |----L1.?-----| "
- "L1.?[100,160020] 72mb|-----------------------------L1.?-----------------------------| "
- "L1.?[160021,200000] 18mb |----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 10 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10"
- " Creating 2 files at level CompactionLevel::L1"
- "**** Final Output Files "
- "L1 "
- "L1.11[100,180] 72mb |----------------------------L1.11-----------------------------| "
- "L1.12[181,200] 17.1mb |----L1.12----| "
- "L1.11[100,160020] 72mb|----------------------------L1.11-----------------------------| "
- "L1.12[160021,200000] 18mb |----L1.12----| "
"###
);
}
@ -408,6 +411,8 @@ async fn l1_too_much_with_non_overlapping_l0() {
let setup = layout_setup_builder().await.build().await;
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
// If we wait until we have 10 L1 files each is not large
// enough to upgrade, the total size will be > 256MB and we will
// skip the partition
@ -415,6 +420,7 @@ async fn l1_too_much_with_non_overlapping_l0() {
// L1: 90MB, 80MB, 70MB, ..., 70MB
// L0: ..
let mut num_l1_files = 0;
for (i, sz) in [90, 80, 70, 70, 70, 70, 70, 70, 70, 70].iter().enumerate() {
let i = i as i64;
setup
@ -424,19 +430,24 @@ async fn l1_too_much_with_non_overlapping_l0() {
.with_min_time(50 + i * 50)
.with_max_time(99 + i * 50)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_file_size_bytes(sz * ONE_MB),
.with_file_size_bytes(sz * ONE_MB)
.with_max_l0_created_at(time_provider.minutes_into_future(i as u64)),
)
.await;
num_l1_files += 1;
}
// note these overlap with each other, but not the L1 files
for _ in 0..3 {
for i in 0..3 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(600)
.with_max_time(650)
.with_file_size_bytes(5 * ONE_MB),
.with_max_time(649)
.with_file_size_bytes(5 * ONE_MB)
.with_max_l0_created_at(
time_provider.minutes_into_future(num_l1_files + i + 1),
),
)
.await;
}
@ -447,9 +458,9 @@ async fn l1_too_much_with_non_overlapping_l0() {
---
- "**** Input Files "
- "L0 "
- "L0.11[600,650] 5mb |L0.11|"
- "L0.12[600,650] 5mb |L0.12|"
- "L0.13[600,650] 5mb |L0.13|"
- "L0.11[600,649] 5mb |L0.11|"
- "L0.12[600,649] 5mb |L0.12|"
- "L0.13[600,649] 5mb |L0.13|"
- "L1 "
- "L1.1[50,99] 90mb |L1.1| "
- "L1.2[100,149] 80mb |L1.2| "
@ -463,29 +474,68 @@ async fn l1_too_much_with_non_overlapping_l0() {
- "L1.10[500,549] 70mb |L1.10| "
- "**** Simulation run 0, type=compact. 3 Input Files, 15mb total:"
- "L0, all files 5mb "
- "L0.13[600,650] |------------------------------------L0.13-------------------------------------|"
- "L0.12[600,650] |------------------------------------L0.12-------------------------------------|"
- "L0.11[600,650] |------------------------------------L0.11-------------------------------------|"
- "L0.13[600,649] |------------------------------------L0.13-------------------------------------|"
- "L0.12[600,649] |------------------------------------L0.12-------------------------------------|"
- "L0.11[600,649] |------------------------------------L0.11-------------------------------------|"
- "**** 1 Output Files (parquet_file_id not yet assigned), 15mb total:"
- "L1, all files 15mb "
- "L1.?[600,650] |-------------------------------------L1.?-------------------------------------|"
- "L1.?[600,649] |-------------------------------------L1.?-------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.11, L0.12, L0.13"
- " Creating 1 files at level CompactionLevel::L1"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 781189120 parquet file bytes, limit is 268435456"
- "**** Simulation run 1, type=split(split_times=[113, 176]). 3 Input Files, 240mb total:"
- "L1 "
- "L1.1[50,99] 90mb |----------L1.1----------| "
- "L1.2[100,149] 80mb |----------L1.2----------| "
- "L1.3[150,199] 70mb |----------L1.3----------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 236.78mb total:"
- "L2 "
- "L2.?[50,113] 101.48mb|-------------L2.?--------------| "
- "L2.?[114,176] 99.87mb |-------------L2.?--------------| "
- "L2.?[177,199] 35.44mb |--L2.?---| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.1, L1.2, L1.3"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Simulation run 2, type=split(split_times=[271, 342]). 3 Input Files, 210mb total:"
- "L1, all files 70mb "
- "L1.4[200,249] |----------L1.4----------| "
- "L1.5[250,299] |----------L1.5----------| "
- "L1.6[300,349] |----------L1.6----------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 207.18mb total:"
- "L2 "
- "L2.?[200,271] 100.07mb|----------------L2.?----------------| "
- "L2.?[272,342] 98.66mb |---------------L2.?----------------| "
- "L2.?[343,349] 8.46mb |L2.?|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.4, L1.5, L1.6"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Simulation run 3, type=split(split_times=[421, 492]). 3 Input Files, 210mb total:"
- "L1, all files 70mb "
- "L1.7[350,399] |----------L1.7----------| "
- "L1.8[400,449] |----------L1.8----------| "
- "L1.9[450,499] |----------L1.9----------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 207.18mb total:"
- "L2 "
- "L2.?[350,421] 100.07mb|----------------L2.?----------------| "
- "L2.?[422,492] 98.66mb |---------------L2.?----------------| "
- "L2.?[493,499] 8.46mb |L2.?|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.7, L1.8, L1.9"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Final Output Files "
- "L1 "
- "L1.1[50,99] 90mb |L1.1| "
- "L1.2[100,149] 80mb |L1.2| "
- "L1.3[150,199] 70mb |L1.3| "
- "L1.4[200,249] 70mb |L1.4| "
- "L1.5[250,299] 70mb |L1.5| "
- "L1.6[300,349] 70mb |L1.6| "
- "L1.7[350,399] 70mb |L1.7| "
- "L1.8[400,449] 70mb |L1.8| "
- "L1.9[450,499] 70mb |L1.9| "
- "L1.10[500,549] 70mb |L1.10| "
- "L1.14[600,650] 15mb |L1.14|"
- "L1.14[600,649] 15mb |L1.14|"
- "L2 "
- "L2.15[50,113] 101.48mb|L2.15-| "
- "L2.16[114,176] 99.87mb |L2.16-| "
- "L2.17[177,199] 35.44mb |L2.17| "
- "L2.18[200,271] 100.07mb |-L2.18-| "
- "L2.19[272,342] 98.66mb |-L2.19-| "
- "L2.20[343,349] 8.46mb |L2.20| "
- "L2.21[350,421] 100.07mb |-L2.21-| "
- "L2.22[422,492] 98.66mb |-L2.22-| "
- "L2.23[493,499] 8.46mb |L2.23| "
"###
);
}

File diff suppressed because it is too large Load Diff

View File

@ -14,7 +14,7 @@ async fn one_l1_overlaps_with_many_l2s() {
// Simulate a production scenario in which there are two L1 files but one overlaps with three L2 files
// and their total size > limit 256MB
// |----------L2.1----------||----------L2.2----------||-----L2.3----|
// |----------------------------------------L1.1---------------------------||--L1.2--|
// |----------------------------------------L1.4---------------------------||--L1.5--|
test_helpers::maybe_start_logging();
@ -72,15 +72,41 @@ async fn one_l1_overlaps_with_many_l2s() {
- "L2.1[51,100] 100mb |L2.1-| "
- "L2.2[101,150] 100mb |L2.2-| "
- "L2.3[151,200] 70mb |L2.3-| "
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 419430400 parquet file bytes, limit is 268435456"
- "**** Simulation run 0, type=split(split_times=[100, 150, 200]). 1 Input Files, 100mb total:"
- "L1, all files 100mb "
- "L1.4[1,250] |-------------------------------------L1.4-------------------------------------|"
- "**** 4 Output Files (parquet_file_id not yet assigned), 98.8mb total:"
- "L1 "
- "L1.?[1,100] 39.76mb |------------L1.?-------------| "
- "L1.?[101,150] 19.68mb |----L1.?-----| "
- "L1.?[151,200] 19.68mb |----L1.?-----| "
- "L1.?[201,250] 19.68mb |----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.4"
- " Creating 4 files at level CompactionLevel::L1"
- "**** Simulation run 1, type=split(split_times=[72]). 2 Input Files, 139.76mb total:"
- "L1 "
- "L1.6[1,100] 39.76mb |-------------------------------------L1.6-------------------------------------|"
- "L2 "
- "L2.1[51,100] 100mb |----------------L2.1-----------------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 138.35mb total:"
- "L2 "
- "L2.?[1,72] 100.23mb |-------------------------L2.?--------------------------| "
- "L2.?[73,100] 38.12mb |-------L2.?--------| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L2.1, L1.6"
- " Creating 2 files at level CompactionLevel::L2"
- "**** Final Output Files "
- "L1 "
- "L1.4[1,250] 100mb |----------------L1.4-----------------| "
- "L1.5[251,500] 30mb |----------------L1.5-----------------| "
- "L1.7[101,150] 19.68mb |L1.7-| "
- "L1.8[151,200] 19.68mb |L1.8-| "
- "L1.9[201,250] 19.68mb |L1.9-| "
- "L2 "
- "L2.1[51,100] 100mb |L2.1-| "
- "L2.2[101,150] 100mb |L2.2-| "
- "L2.3[151,200] 70mb |L2.3-| "
- "L2.10[1,72] 100.23mb|--L2.10--| "
- "L2.11[73,100] 38.12mb |L2.11| "
"###
);
}
@ -170,21 +196,45 @@ async fn many_l1_overlaps_with_many_l2s() {
- "L2.1[51,100] 100mb |--------L2.1---------| "
- "L2.2[101,150] 100mb |--------L2.2---------| "
- "L2.3[151,200] 70mb |--------L2.3---------| "
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 392167424 parquet file bytes, limit is 268435456"
- "**** Simulation run 0, type=split(split_times=[100]). 1 Input Files, 13mb total:"
- "L1, all files 13mb "
- "L1.6[91,105] |-------------------------------------L1.6-------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 12.07mb total:"
- "L1 "
- "L1.?[91,100] 8.36mb |----------------------L1.?-----------------------| "
- "L1.?[101,105] 3.71mb |--------L1.?--------| "
- "Committing partition 1:"
- " Soft Deleting 1 files: L1.6"
- " Creating 2 files at level CompactionLevel::L1"
- "**** Simulation run 1, type=split(split_times=[91, 131]). 7 Input Files, 251.07mb total:"
- "L1 "
- "L1.4[61,75] 13mb |--L1.4---| "
- "L1.5[76,90] 13mb |--L1.5---| "
- "L1.12[91,100] 8.36mb |L1.12| "
- "L1.13[101,105] 3.71mb |L1.13| "
- "L1.7[106,120] 13mb |--L1.7---| "
- "L2 "
- "L2.1[51,100] 100mb |----------------L2.1-----------------| "
- "L2.2[101,150] 100mb |----------------L2.2-----------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 246mb total:"
- "L2 "
- "L2.?[51,91] 101.44mb|-------------L2.?-------------| "
- "L2.?[92,131] 98.91mb |------------L2.?-------------| "
- "L2.?[132,150] 45.65mb |----L2.?----| "
- "Committing partition 1:"
- " Soft Deleting 7 files: L2.1, L2.2, L1.4, L1.5, L1.7, L1.12, L1.13"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Final Output Files "
- "L1 "
- "L1.4[61,75] 13mb |L1.4| "
- "L1.5[76,90] 13mb |L1.5| "
- "L1.6[91,105] 13mb |L1.6| "
- "L1.7[106,120] 13mb |L1.7| "
- "L1.8[121,135] 13mb |L1.8| "
- "L1.9[136,150] 13mb |L1.9| "
- "L1.10[151,165] 13mb |L1.10| "
- "L1.11[201,215] 13mb |L1.11|"
- "L2 "
- "L2.1[51,100] 100mb |--------L2.1---------| "
- "L2.2[101,150] 100mb |--------L2.2---------| "
- "L2.3[151,200] 70mb |--------L2.3---------| "
- "L2.14[51,91] 101.44mb|------L2.14------| "
- "L2.15[92,131] 98.91mb |------L2.15------| "
- "L2.16[132,150] 45.65mb |L2.16-| "
"###
);
}
@ -511,297 +561,385 @@ async fn many_good_size_l0_files() {
- "L0.286[285,286] |L0.286|"
- "L0.287[286,287] |L0.287|"
- "L0.288[287,288] |L0.288|"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 419430400 parquet file bytes, limit is 268435456"
- "**** Final Output Files "
- "**** Simulation run 0, type=split(split_times=[50, 100]). 128 Input Files, 256mb total:"
- "L0, all files 2mb "
- "L0.1[0,1] |L0.1| "
- "L0.2[1,2] |L0.2| "
- "L0.3[2,3] |L0.3| "
- "L0.4[3,4] |L0.4| "
- "L0.5[4,5] |L0.5| "
- "L0.6[5,6] |L0.6| "
- "L0.7[6,7] |L0.7| "
- "L0.8[7,8] |L0.8| "
- "L0.9[8,9] |L0.9| "
- "L0.10[9,10] |L0.10| "
- "L0.11[10,11] |L0.11| "
- "L0.12[11,12] |L0.12| "
- "L0.13[12,13] |L0.13| "
- "L0.14[13,14] |L0.14| "
- "L0.15[14,15] |L0.15| "
- "L0.16[15,16] |L0.16| "
- "L0.17[16,17] |L0.17| "
- "L0.18[17,18] |L0.18| "
- "L0.19[18,19] |L0.19| "
- "L0.20[19,20] |L0.20| "
- "L0.21[20,21] |L0.21| "
- "L0.22[21,22] |L0.22| "
- "L0.23[22,23] |L0.23| "
- "L0.24[23,24] |L0.24| "
- "L0.25[24,25] |L0.25| "
- "L0.26[25,26] |L0.26| "
- "L0.27[26,27] |L0.27| "
- "L0.28[27,28] |L0.28| "
- "L0.29[28,29] |L0.29| "
- "L0.30[29,30] |L0.30| "
- "L0.31[30,31] |L0.31| "
- "L0.32[31,32] |L0.32| "
- "L0.33[32,33] |L0.33| "
- "L0.34[33,34] |L0.34| "
- "L0.35[34,35] |L0.35| "
- "L0.36[35,36] |L0.36| "
- "L0.37[36,37] |L0.37| "
- "L0.38[37,38] |L0.38| "
- "L0.39[38,39] |L0.39| "
- "L0.40[39,40] |L0.40| "
- "L0.41[40,41] |L0.41| "
- "L0.42[41,42] |L0.42| "
- "L0.43[42,43] |L0.43| "
- "L0.44[43,44] |L0.44| "
- "L0.45[44,45] |L0.45| "
- "L0.46[45,46] |L0.46| "
- "L0.47[46,47] |L0.47| "
- "L0.48[47,48] |L0.48| "
- "L0.49[48,49] |L0.49| "
- "L0.50[49,50] |L0.50| "
- "L0.51[50,51] |L0.51| "
- "L0.52[51,52] |L0.52| "
- "L0.53[52,53] |L0.53| "
- "L0.54[53,54] |L0.54| "
- "L0.55[54,55] |L0.55| "
- "L0.56[55,56] |L0.56| "
- "L0.57[56,57] |L0.57| "
- "L0.58[57,58] |L0.58| "
- "L0.59[58,59] |L0.59| "
- "L0.60[59,60] |L0.60| "
- "L0.61[60,61] |L0.61| "
- "L0.62[61,62] |L0.62| "
- "L0.63[62,63] |L0.63| "
- "L0.64[63,64] |L0.64| "
- "L0.65[64,65] |L0.65| "
- "L0.66[65,66] |L0.66| "
- "L0.67[66,67] |L0.67| "
- "L0.68[67,68] |L0.68| "
- "L0.69[68,69] |L0.69| "
- "L0.70[69,70] |L0.70| "
- "L0.71[70,71] |L0.71| "
- "L0.72[71,72] |L0.72| "
- "L0.73[72,73] |L0.73| "
- "L0.74[73,74] |L0.74| "
- "L0.75[74,75] |L0.75| "
- "L0.76[75,76] |L0.76| "
- "L0.77[76,77] |L0.77| "
- "L0.78[77,78] |L0.78| "
- "L0.79[78,79] |L0.79| "
- "L0.80[79,80] |L0.80| "
- "L0.81[80,81] |L0.81| "
- "L0.82[81,82] |L0.82| "
- "L0.83[82,83] |L0.83| "
- "L0.84[83,84] |L0.84| "
- "L0.85[84,85] |L0.85| "
- "L0.86[85,86] |L0.86| "
- "L0.87[86,87] |L0.87| "
- "L0.88[87,88] |L0.88| "
- "L0.89[88,89] |L0.89| "
- "L0.90[89,90] |L0.90| "
- "L0.91[90,91] |L0.91| "
- "L0.92[91,92] |L0.92| "
- "L0.93[92,93] |L0.93| "
- "L0.94[93,94] |L0.94| "
- "L0.95[94,95] |L0.95| "
- "L0.96[95,96] |L0.96| "
- "L0.97[96,97] |L0.97| "
- "L0.98[97,98] |L0.98| "
- "L0.99[98,99] |L0.99| "
- "L0.100[99,100] |L0.100| "
- "L0.101[100,101] |L0.101| "
- "L0.102[101,102] |L0.102| "
- "L0.103[102,103] |L0.103| "
- "L0.104[103,104] |L0.104| "
- "L0.105[104,105] |L0.105| "
- "L0.106[105,106] |L0.106| "
- "L0.107[106,107] |L0.107| "
- "L0.108[107,108] |L0.108| "
- "L0.109[108,109] |L0.109| "
- "L0.110[109,110] |L0.110| "
- "L0.111[110,111] |L0.111| "
- "L0.112[111,112] |L0.112| "
- "L0.113[112,113] |L0.113| "
- "L0.114[113,114] |L0.114| "
- "L0.115[114,115] |L0.115| "
- "L0.116[115,116] |L0.116| "
- "L0.117[116,117] |L0.117| "
- "L0.118[117,118] |L0.118| "
- "L0.119[118,119] |L0.119| "
- "L0.120[119,120] |L0.120| "
- "L0.121[120,121] |L0.121| "
- "L0.122[121,122] |L0.122| "
- "L0.123[122,123] |L0.123| "
- "L0.124[123,124] |L0.124| "
- "L0.125[124,125] |L0.125| "
- "L0.126[125,126] |L0.126| "
- "L0.127[126,127] |L0.127| "
- "L0.128[127,128] |L0.128| "
- "L0.129[128,129] |L0.129| "
- "L0.130[129,130] |L0.130| "
- "L0.131[130,131] |L0.131| "
- "L0.132[131,132] |L0.132| "
- "L0.133[132,133] |L0.133| "
- "L0.134[133,134] |L0.134| "
- "L0.135[134,135] |L0.135| "
- "L0.136[135,136] |L0.136| "
- "L0.137[136,137] |L0.137| "
- "L0.138[137,138] |L0.138| "
- "L0.139[138,139] |L0.139| "
- "L0.140[139,140] |L0.140| "
- "L0.141[140,141] |L0.141| "
- "L0.142[141,142] |L0.142| "
- "L0.143[142,143] |L0.143| "
- "L0.144[143,144] |L0.144| "
- "L0.145[144,145] |L0.145| "
- "L0.146[145,146] |L0.146| "
- "L0.147[146,147] |L0.147| "
- "L0.148[147,148] |L0.148| "
- "L0.149[148,149] |L0.149| "
- "L0.150[149,150] |L0.150| "
- "L0.151[150,151] |L0.151| "
- "L0.152[151,152] |L0.152| "
- "L0.153[152,153] |L0.153| "
- "L0.154[153,154] |L0.154| "
- "L0.155[154,155] |L0.155| "
- "L0.156[155,156] |L0.156| "
- "L0.157[156,157] |L0.157| "
- "L0.158[157,158] |L0.158| "
- "L0.159[158,159] |L0.159| "
- "L0.160[159,160] |L0.160| "
- "L0.161[160,161] |L0.161| "
- "L0.162[161,162] |L0.162| "
- "L0.163[162,163] |L0.163| "
- "L0.164[163,164] |L0.164| "
- "L0.165[164,165] |L0.165| "
- "L0.166[165,166] |L0.166| "
- "L0.167[166,167] |L0.167| "
- "L0.168[167,168] |L0.168| "
- "L0.169[168,169] |L0.169| "
- "L0.170[169,170] |L0.170| "
- "L0.171[170,171] |L0.171| "
- "L0.172[171,172] |L0.172| "
- "L0.173[172,173] |L0.173| "
- "L0.174[173,174] |L0.174| "
- "L0.175[174,175] |L0.175| "
- "L0.176[175,176] |L0.176| "
- "L0.177[176,177] |L0.177| "
- "L0.178[177,178] |L0.178| "
- "L0.179[178,179] |L0.179| "
- "L0.180[179,180] |L0.180| "
- "L0.181[180,181] |L0.181| "
- "L0.182[181,182] |L0.182| "
- "L0.183[182,183] |L0.183| "
- "L0.184[183,184] |L0.184| "
- "L0.185[184,185] |L0.185| "
- "L0.186[185,186] |L0.186| "
- "L0.187[186,187] |L0.187| "
- "L0.188[187,188] |L0.188| "
- "L0.189[188,189] |L0.189| "
- "L0.190[189,190] |L0.190| "
- "L0.191[190,191] |L0.191| "
- "L0.192[191,192] |L0.192| "
- "L0.193[192,193] |L0.193| "
- "L0.194[193,194] |L0.194| "
- "L0.195[194,195] |L0.195| "
- "L0.196[195,196] |L0.196| "
- "L0.197[196,197] |L0.197| "
- "L0.198[197,198] |L0.198| "
- "L0.199[198,199] |L0.199| "
- "L0.200[199,200] |L0.200| "
- "L0.201[200,201] |L0.201| "
- "L0.202[201,202] |L0.202| "
- "L0.203[202,203] |L0.203| "
- "L0.204[203,204] |L0.204| "
- "L0.205[204,205] |L0.205| "
- "L0.206[205,206] |L0.206| "
- "L0.207[206,207] |L0.207| "
- "L0.208[207,208] |L0.208| "
- "L0.209[208,209] |L0.209| "
- "L0.210[209,210] |L0.210| "
- "L0.211[210,211] |L0.211| "
- "L0.212[211,212] |L0.212| "
- "L0.213[212,213] |L0.213| "
- "L0.214[213,214] |L0.214| "
- "L0.215[214,215] |L0.215| "
- "L0.216[215,216] |L0.216| "
- "L0.217[216,217] |L0.217| "
- "L0.218[217,218] |L0.218| "
- "L0.219[218,219] |L0.219| "
- "L0.220[219,220] |L0.220| "
- "L0.221[220,221] |L0.221| "
- "L0.222[221,222] |L0.222| "
- "L0.223[222,223] |L0.223| "
- "L0.224[223,224] |L0.224| "
- "L0.225[224,225] |L0.225| "
- "L0.226[225,226] |L0.226| "
- "L0.227[226,227] |L0.227| "
- "L0.228[227,228] |L0.228| "
- "L0.229[228,229] |L0.229| "
- "L0.3[2,3] |L0.3| "
- "L0.4[3,4] |L0.4| "
- "L0.5[4,5] |L0.5| "
- "L0.6[5,6] |L0.6| "
- "L0.7[6,7] |L0.7| "
- "L0.8[7,8] |L0.8| "
- "L0.9[8,9] |L0.9| "
- "L0.10[9,10] |L0.10| "
- "L0.11[10,11] |L0.11| "
- "L0.12[11,12] |L0.12| "
- "L0.13[12,13] |L0.13| "
- "L0.14[13,14] |L0.14| "
- "L0.15[14,15] |L0.15| "
- "L0.16[15,16] |L0.16| "
- "L0.17[16,17] |L0.17| "
- "L0.18[17,18] |L0.18| "
- "L0.19[18,19] |L0.19| "
- "L0.20[19,20] |L0.20| "
- "L0.21[20,21] |L0.21| "
- "L0.22[21,22] |L0.22| "
- "L0.23[22,23] |L0.23| "
- "L0.24[23,24] |L0.24| "
- "L0.25[24,25] |L0.25| "
- "L0.26[25,26] |L0.26| "
- "L0.27[26,27] |L0.27| "
- "L0.28[27,28] |L0.28| "
- "L0.29[28,29] |L0.29| "
- "L0.30[29,30] |L0.30| "
- "L0.31[30,31] |L0.31| "
- "L0.32[31,32] |L0.32| "
- "L0.33[32,33] |L0.33| "
- "L0.34[33,34] |L0.34| "
- "L0.35[34,35] |L0.35| "
- "L0.36[35,36] |L0.36| "
- "L0.37[36,37] |L0.37| "
- "L0.38[37,38] |L0.38| "
- "L0.39[38,39] |L0.39| "
- "L0.40[39,40] |L0.40| "
- "L0.41[40,41] |L0.41| "
- "L0.42[41,42] |L0.42| "
- "L0.43[42,43] |L0.43| "
- "L0.44[43,44] |L0.44| "
- "L0.45[44,45] |L0.45| "
- "L0.46[45,46] |L0.46| "
- "L0.47[46,47] |L0.47| "
- "L0.48[47,48] |L0.48| "
- "L0.49[48,49] |L0.49| "
- "L0.50[49,50] |L0.50| "
- "L0.51[50,51] |L0.51| "
- "L0.52[51,52] |L0.52| "
- "L0.53[52,53] |L0.53| "
- "L0.54[53,54] |L0.54| "
- "L0.55[54,55] |L0.55| "
- "L0.56[55,56] |L0.56| "
- "L0.57[56,57] |L0.57| "
- "L0.58[57,58] |L0.58| "
- "L0.59[58,59] |L0.59| "
- "L0.60[59,60] |L0.60| "
- "L0.61[60,61] |L0.61| "
- "L0.62[61,62] |L0.62| "
- "L0.63[62,63] |L0.63| "
- "L0.64[63,64] |L0.64| "
- "L0.65[64,65] |L0.65| "
- "L0.66[65,66] |L0.66| "
- "L0.67[66,67] |L0.67| "
- "L0.68[67,68] |L0.68| "
- "L0.69[68,69] |L0.69| "
- "L0.70[69,70] |L0.70| "
- "L0.71[70,71] |L0.71| "
- "L0.72[71,72] |L0.72| "
- "L0.73[72,73] |L0.73| "
- "L0.74[73,74] |L0.74| "
- "L0.75[74,75] |L0.75| "
- "L0.76[75,76] |L0.76| "
- "L0.77[76,77] |L0.77| "
- "L0.78[77,78] |L0.78| "
- "L0.79[78,79] |L0.79| "
- "L0.80[79,80] |L0.80| "
- "L0.81[80,81] |L0.81| "
- "L0.82[81,82] |L0.82| "
- "L0.83[82,83] |L0.83| "
- "L0.84[83,84] |L0.84| "
- "L0.85[84,85] |L0.85| "
- "L0.86[85,86] |L0.86| "
- "L0.87[86,87] |L0.87| "
- "L0.88[87,88] |L0.88| "
- "L0.89[88,89] |L0.89| "
- "L0.90[89,90] |L0.90| "
- "L0.91[90,91] |L0.91| "
- "L0.92[91,92] |L0.92| "
- "L0.93[92,93] |L0.93| "
- "L0.94[93,94] |L0.94| "
- "L0.95[94,95] |L0.95| "
- "L0.96[95,96] |L0.96| "
- "L0.97[96,97] |L0.97| "
- "L0.98[97,98] |L0.98| "
- "L0.99[98,99] |L0.99| "
- "L0.100[99,100] |L0.100| "
- "L0.101[100,101] |L0.101| "
- "L0.102[101,102] |L0.102| "
- "L0.103[102,103] |L0.103| "
- "L0.104[103,104] |L0.104| "
- "L0.105[104,105] |L0.105| "
- "L0.106[105,106] |L0.106| "
- "L0.107[106,107] |L0.107| "
- "L0.108[107,108] |L0.108| "
- "L0.109[108,109] |L0.109| "
- "L0.110[109,110] |L0.110| "
- "L0.111[110,111] |L0.111| "
- "L0.112[111,112] |L0.112| "
- "L0.113[112,113] |L0.113| "
- "L0.114[113,114] |L0.114| "
- "L0.115[114,115] |L0.115| "
- "L0.116[115,116] |L0.116| "
- "L0.117[116,117] |L0.117|"
- "L0.118[117,118] |L0.118|"
- "L0.119[118,119] |L0.119|"
- "L0.120[119,120] |L0.120|"
- "L0.121[120,121] |L0.121|"
- "L0.122[121,122] |L0.122|"
- "L0.123[122,123] |L0.123|"
- "L0.124[123,124] |L0.124|"
- "L0.125[124,125] |L0.125|"
- "L0.126[125,126] |L0.126|"
- "L0.127[126,127] |L0.127|"
- "L0.128[127,128] |L0.128|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 252mb total:"
- "L0 "
- "L0.?[0,50] 100mb |------------L0.?-------------| "
- "L0.?[51,100] 98mb |------------L0.?------------| "
- "L0.?[101,128] 54mb |-----L0.?-----| "
- "Committing partition 1:"
- " Soft Deleting 128 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128"
- " Creating 3 files at level CompactionLevel::L0"
- "**** Simulation run 1, type=split(split_times=[178, 228]). 128 Input Files, 256mb total:"
- "L0, all files 2mb "
- "L0.129[128,129] |L0.129| "
- "L0.130[129,130] |L0.130| "
- "L0.131[130,131] |L0.131| "
- "L0.132[131,132] |L0.132| "
- "L0.133[132,133] |L0.133| "
- "L0.134[133,134] |L0.134| "
- "L0.135[134,135] |L0.135| "
- "L0.136[135,136] |L0.136| "
- "L0.137[136,137] |L0.137| "
- "L0.138[137,138] |L0.138| "
- "L0.139[138,139] |L0.139| "
- "L0.140[139,140] |L0.140| "
- "L0.141[140,141] |L0.141| "
- "L0.142[141,142] |L0.142| "
- "L0.143[142,143] |L0.143| "
- "L0.144[143,144] |L0.144| "
- "L0.145[144,145] |L0.145| "
- "L0.146[145,146] |L0.146| "
- "L0.147[146,147] |L0.147| "
- "L0.148[147,148] |L0.148| "
- "L0.149[148,149] |L0.149| "
- "L0.150[149,150] |L0.150| "
- "L0.151[150,151] |L0.151| "
- "L0.152[151,152] |L0.152| "
- "L0.153[152,153] |L0.153| "
- "L0.154[153,154] |L0.154| "
- "L0.155[154,155] |L0.155| "
- "L0.156[155,156] |L0.156| "
- "L0.157[156,157] |L0.157| "
- "L0.158[157,158] |L0.158| "
- "L0.159[158,159] |L0.159| "
- "L0.160[159,160] |L0.160| "
- "L0.161[160,161] |L0.161| "
- "L0.162[161,162] |L0.162| "
- "L0.163[162,163] |L0.163| "
- "L0.164[163,164] |L0.164| "
- "L0.165[164,165] |L0.165| "
- "L0.166[165,166] |L0.166| "
- "L0.167[166,167] |L0.167| "
- "L0.168[167,168] |L0.168| "
- "L0.169[168,169] |L0.169| "
- "L0.170[169,170] |L0.170| "
- "L0.171[170,171] |L0.171| "
- "L0.172[171,172] |L0.172| "
- "L0.173[172,173] |L0.173| "
- "L0.174[173,174] |L0.174| "
- "L0.175[174,175] |L0.175| "
- "L0.176[175,176] |L0.176| "
- "L0.177[176,177] |L0.177| "
- "L0.178[177,178] |L0.178| "
- "L0.179[178,179] |L0.179| "
- "L0.180[179,180] |L0.180| "
- "L0.181[180,181] |L0.181| "
- "L0.182[181,182] |L0.182| "
- "L0.183[182,183] |L0.183| "
- "L0.184[183,184] |L0.184| "
- "L0.185[184,185] |L0.185| "
- "L0.186[185,186] |L0.186| "
- "L0.187[186,187] |L0.187| "
- "L0.188[187,188] |L0.188| "
- "L0.189[188,189] |L0.189| "
- "L0.190[189,190] |L0.190| "
- "L0.191[190,191] |L0.191| "
- "L0.192[191,192] |L0.192| "
- "L0.193[192,193] |L0.193| "
- "L0.194[193,194] |L0.194| "
- "L0.195[194,195] |L0.195| "
- "L0.196[195,196] |L0.196| "
- "L0.197[196,197] |L0.197| "
- "L0.198[197,198] |L0.198| "
- "L0.199[198,199] |L0.199| "
- "L0.200[199,200] |L0.200| "
- "L0.201[200,201] |L0.201| "
- "L0.202[201,202] |L0.202| "
- "L0.203[202,203] |L0.203| "
- "L0.204[203,204] |L0.204| "
- "L0.205[204,205] |L0.205| "
- "L0.206[205,206] |L0.206| "
- "L0.207[206,207] |L0.207| "
- "L0.208[207,208] |L0.208| "
- "L0.209[208,209] |L0.209| "
- "L0.210[209,210] |L0.210| "
- "L0.211[210,211] |L0.211| "
- "L0.212[211,212] |L0.212| "
- "L0.213[212,213] |L0.213| "
- "L0.214[213,214] |L0.214| "
- "L0.215[214,215] |L0.215| "
- "L0.216[215,216] |L0.216| "
- "L0.217[216,217] |L0.217| "
- "L0.218[217,218] |L0.218| "
- "L0.219[218,219] |L0.219| "
- "L0.220[219,220] |L0.220| "
- "L0.221[220,221] |L0.221| "
- "L0.222[221,222] |L0.222| "
- "L0.223[222,223] |L0.223| "
- "L0.224[223,224] |L0.224| "
- "L0.225[224,225] |L0.225| "
- "L0.226[225,226] |L0.226| "
- "L0.227[226,227] |L0.227| "
- "L0.228[227,228] |L0.228| "
- "L0.229[228,229] |L0.229| "
- "L0.230[229,230] |L0.230| "
- "L0.231[230,231] |L0.231| "
- "L0.232[231,232] |L0.232| "
- "L0.233[232,233] |L0.233| "
- "L0.234[233,234] |L0.234| "
- "L0.235[234,235] |L0.235| "
- "L0.236[235,236] |L0.236| "
- "L0.237[236,237] |L0.237| "
- "L0.238[237,238] |L0.238| "
- "L0.239[238,239] |L0.239| "
- "L0.240[239,240] |L0.240| "
- "L0.241[240,241] |L0.241| "
- "L0.242[241,242] |L0.242| "
- "L0.243[242,243] |L0.243| "
- "L0.244[243,244] |L0.244| "
- "L0.245[244,245] |L0.245| "
- "L0.246[245,246] |L0.246| "
- "L0.247[246,247] |L0.247| "
- "L0.248[247,248] |L0.248| "
- "L0.249[248,249] |L0.249| "
- "L0.250[249,250] |L0.250| "
- "L0.251[250,251] |L0.251| "
- "L0.252[251,252] |L0.252| "
- "L0.253[252,253] |L0.253| "
- "L0.254[253,254] |L0.254| "
- "L0.255[254,255] |L0.255| "
- "L0.256[255,256] |L0.256| "
- "L0.257[256,257] |L0.257| "
- "L0.258[257,258] |L0.258| "
- "L0.259[258,259] |L0.259| "
- "L0.260[259,260] |L0.260| "
- "L0.261[260,261] |L0.261|"
- "L0.262[261,262] |L0.262|"
- "L0.263[262,263] |L0.263|"
- "L0.264[263,264] |L0.264|"
- "L0.265[264,265] |L0.265|"
- "L0.266[265,266] |L0.266|"
- "L0.267[266,267] |L0.267|"
- "L0.268[267,268] |L0.268|"
- "L0.269[268,269] |L0.269|"
- "L0.270[269,270] |L0.270|"
- "L0.271[270,271] |L0.271|"
- "L0.272[271,272] |L0.272|"
- "L0.273[272,273] |L0.273|"
- "L0.274[273,274] |L0.274|"
- "L0.275[274,275] |L0.275|"
- "L0.276[275,276] |L0.276|"
- "L0.277[276,277] |L0.277|"
- "L0.278[277,278] |L0.278|"
- "L0.279[278,279] |L0.279|"
- "L0.280[279,280] |L0.280|"
- "L0.281[280,281] |L0.281|"
- "L0.282[281,282] |L0.282|"
- "L0.283[282,283] |L0.283|"
- "L0.284[283,284] |L0.284|"
- "L0.285[284,285] |L0.285|"
- "L0.286[285,286] |L0.286|"
- "L0.287[286,287] |L0.287|"
- "L0.288[287,288] |L0.288|"
- "L0.233[232,233] |L0.233| "
- "L0.234[233,234] |L0.234| "
- "L0.235[234,235] |L0.235| "
- "L0.236[235,236] |L0.236| "
- "L0.237[236,237] |L0.237| "
- "L0.238[237,238] |L0.238| "
- "L0.239[238,239] |L0.239| "
- "L0.240[239,240] |L0.240| "
- "L0.241[240,241] |L0.241| "
- "L0.242[241,242] |L0.242| "
- "L0.243[242,243] |L0.243| "
- "L0.244[243,244] |L0.244| "
- "L0.245[244,245] |L0.245|"
- "L0.246[245,246] |L0.246|"
- "L0.247[246,247] |L0.247|"
- "L0.248[247,248] |L0.248|"
- "L0.249[248,249] |L0.249|"
- "L0.250[249,250] |L0.250|"
- "L0.251[250,251] |L0.251|"
- "L0.252[251,252] |L0.252|"
- "L0.253[252,253] |L0.253|"
- "L0.254[253,254] |L0.254|"
- "L0.255[254,255] |L0.255|"
- "L0.256[255,256] |L0.256|"
- "**** 3 Output Files (parquet_file_id not yet assigned), 252mb total:"
- "L0 "
- "L0.?[128,178] 100mb |------------L0.?-------------| "
- "L0.?[179,228] 98mb |------------L0.?------------| "
- "L0.?[229,256] 54mb |-----L0.?-----| "
- "Committing partition 1:"
- " Soft Deleting 128 files: L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150, L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190, L0.191, L0.192, L0.193, L0.194, L0.195, L0.196, L0.197, L0.198, L0.199, L0.200, L0.201, L0.202, L0.203, L0.204, L0.205, L0.206, L0.207, L0.208, L0.209, L0.210, L0.211, L0.212, L0.213, L0.214, L0.215, L0.216, L0.217, L0.218, L0.219, L0.220, L0.221, L0.222, L0.223, L0.224, L0.225, L0.226, L0.227, L0.228, L0.229, L0.230, L0.231, L0.232, L0.233, L0.234, L0.235, L0.236, L0.237, L0.238, L0.239, L0.240, L0.241, L0.242, L0.243, L0.244, L0.245, L0.246, L0.247, L0.248, L0.249, L0.250, L0.251, L0.252, L0.253, L0.254, L0.255, L0.256"
- " Creating 3 files at level CompactionLevel::L0"
- "**** Simulation run 2, type=split(split_times=[281]). 32 Input Files, 64mb total:"
- "L0, all files 2mb "
- "L0.257[256,257] |L0.257| "
- "L0.258[257,258] |L0.258| "
- "L0.259[258,259] |L0.259| "
- "L0.260[259,260] |L0.260| "
- "L0.261[260,261] |L0.261| "
- "L0.262[261,262] |L0.262| "
- "L0.263[262,263] |L0.263| "
- "L0.264[263,264] |L0.264| "
- "L0.265[264,265] |L0.265| "
- "L0.266[265,266] |L0.266| "
- "L0.267[266,267] |L0.267| "
- "L0.268[267,268] |L0.268| "
- "L0.269[268,269] |L0.269| "
- "L0.270[269,270] |L0.270| "
- "L0.271[270,271] |L0.271| "
- "L0.272[271,272] |L0.272| "
- "L0.273[272,273] |L0.273| "
- "L0.274[273,274] |L0.274| "
- "L0.275[274,275] |L0.275| "
- "L0.276[275,276] |L0.276| "
- "L0.277[276,277] |L0.277| "
- "L0.278[277,278] |L0.278| "
- "L0.279[278,279] |L0.279| "
- "L0.280[279,280] |L0.280| "
- "L0.281[280,281] |L0.281| "
- "L0.282[281,282] |L0.282| "
- "L0.283[282,283] |L0.283| "
- "L0.284[283,284] |L0.284| "
- "L0.285[284,285] |L0.285| "
- "L0.286[285,286] |L0.286|"
- "L0.287[286,287] |L0.287|"
- "L0.288[287,288] |L0.288|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 62mb total:"
- "L0 "
- "L0.?[256,281] 50mb |----------------------------L0.?----------------------------| "
- "L0.?[282,288] 12mb |----L0.?-----|"
- "Committing partition 1:"
- " Soft Deleting 32 files: L0.257, L0.258, L0.259, L0.260, L0.261, L0.262, L0.263, L0.264, L0.265, L0.266, L0.267, L0.268, L0.269, L0.270, L0.271, L0.272, L0.273, L0.274, L0.275, L0.276, L0.277, L0.278, L0.279, L0.280, L0.281, L0.282, L0.283, L0.284, L0.285, L0.286, L0.287, L0.288"
- " Creating 2 files at level CompactionLevel::L0"
- "**** Simulation run 3, type=split(split_times=[230, 281]). 4 Input Files, 214mb total:"
- "L0 "
- "L0.296[282,288] 12mb |L0.296|"
- "L0.295[256,281] 50mb |-----L0.295-----| "
- "L0.294[229,256] 54mb |-----L0.294------| "
- "L0.293[179,228] 98mb|-------------L0.293--------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 210.07mb total:"
- "L1 "
- "L1.?[179,230] 100.13mb|---------------L1.?----------------| "
- "L1.?[231,281] 98.17mb |---------------L1.?---------------| "
- "L1.?[282,288] 11.78mb |L1.?|"
- "Committing partition 1:"
- " Soft Deleting 4 files: L0.293, L0.294, L0.295, L0.296"
- " Upgrading 1 files level to CompactionLevel::L1: L0.289"
- " Creating 3 files at level CompactionLevel::L1"
- "**** Simulation run 4, type=split(split_times=[102, 153]). 3 Input Files, 252mb total:"
- "L0 "
- "L0.290[51,100] 98mb |-----------L0.290-----------| "
- "L0.291[101,128] 54mb |----L0.291-----| "
- "L0.292[128,178] 100mb |-----------L0.292------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 248.03mb total:"
- "L1 "
- "L1.?[51,102] 101.2mb|-------------L1.?-------------| "
- "L1.?[103,153] 99.21mb |------------L1.?-------------| "
- "L1.?[154,178] 47.62mb |----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.290, L0.291, L0.292"
- " Creating 3 files at level CompactionLevel::L1"
- "**** Simulation run 5, type=split(split_times=[155, 207]). 3 Input Files, 246.96mb total:"
- "L1 "
- "L1.301[103,153] 99.21mb|-----------L1.301------------| "
- "L1.302[154,178] 47.62mb |---L1.302----| "
- "L1.297[179,230] 100.13mb |------------L1.297------------| "
- "**** 3 Output Files (parquet_file_id not yet assigned), 243.07mb total:"
- "L2 "
- "L2.?[103,155] 101.12mb|-------------L2.?-------------| "
- "L2.?[156,207] 99.17mb |-------------L2.?-------------| "
- "L2.?[208,230] 42.78mb |---L2.?----| "
- "Committing partition 1:"
- " Soft Deleting 3 files: L1.297, L1.301, L1.302"
- " Upgrading 2 files level to CompactionLevel::L2: L1.289, L1.300"
- " Creating 3 files at level CompactionLevel::L2"
- "**** Simulation run 6, type=split(split_times=[276]). 2 Input Files, 109.94mb total:"
- "L1 "
- "L1.299[282,288] 11.78mb |L1.299| "
- "L1.298[231,281] 98.17mb|-------------------------------L1.298-------------------------------| "
- "**** 2 Output Files (parquet_file_id not yet assigned), 108.02mb total:"
- "L2 "
- "L2.?[231,276] 86.8mb|----------------------------L2.?-----------------------------| "
- "L2.?[277,288] 21.22mb |----L2.?-----| "
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.298, L1.299"
- " Creating 2 files at level CompactionLevel::L2"
- "**** Final Output Files "
- "L2 "
- "L2.289[0,50] 100mb |--L2.289---| "
- "L2.300[51,102] 101.2mb |---L2.300---| "
- "L2.303[103,155] 101.12mb |---L2.303---| "
- "L2.304[156,207] 99.17mb |---L2.304---| "
- "L2.305[208,230] 42.78mb |L2.305| "
- "L2.306[231,276] 86.8mb |--L2.306--| "
- "L2.307[277,288] 21.22mb |L2.307|"
"###
);
}

View File

@ -59,6 +59,7 @@ use std::time::Duration;
use compactor2_test_utils::{format_files, TestSetup, TestSetupBuilder};
use data_types::{CompactionLevel, ParquetFile};
use iox_tests::TestParquetFileBuilder;
use iox_time::Time;
pub(crate) const ONE_MB: u64 = 1024 * 1024;
@ -90,14 +91,15 @@ pub(crate) async fn layout_setup_builder() -> TestSetupBuilder<false> {
/// Creates a scenario with ten 9 * 1MB overlapping L0 files
pub(crate) async fn all_overlapping_l0_files(setup: TestSetup) -> TestSetup {
for _ in 0..10 {
for i in 0..10 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(100)
.with_max_time(200)
.with_file_size_bytes(9 * ONE_MB),
.with_max_time(200000)
.with_file_size_bytes(9 * ONE_MB)
.with_max_l0_created_at(Time::from_timestamp_nanos(i + 1)),
)
.await;
}

View File

@ -36,14 +36,11 @@ async fn single_giant_file() {
- "L0, all files 4.88gb "
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
- "WARNING: file L0.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
- "Committing partition 1:"
- " Upgrading 1 files level to CompactionLevel::L1: L0.1"
- "Committing partition 1:"
- " Upgrading 1 files level to CompactionLevel::L2: L1.1"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L2, all files 4.88gb "
- "L2.1[100,100] |-------------------------------------L2.1-------------------------------------|"
- "WARNING: file L2.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
- "L0, all files 4.88gb "
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
- "WARNING: file L0.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
"###
);
}
@ -79,7 +76,7 @@ async fn two_giant_files() {
- "L0.2[100,100] |-------------------------------------L0.2-------------------------------------|"
- "WARNING: file L0.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L0.2[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 10485760000 parquet file bytes, limit is 268435456"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0, all files 4.88gb "
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
@ -90,6 +87,48 @@ async fn two_giant_files() {
);
}
#[tokio::test]
async fn two_giant_files_time_range_1() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder().await.build().await;
// This has two large overlapping files that the compactor can't
// split as they have a single timestamp. The compactor should not
// panic
for _ in 0..2 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(100)
.with_max_time(101)
.with_file_size_bytes(5 * 1000 * ONE_MB)
.with_compaction_level(CompactionLevel::Initial),
)
.await;
}
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0, all files 4.88gb "
- "L0.1[100,101] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,101] |-------------------------------------L0.2-------------------------------------|"
- "WARNING: file L0.1[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L0.2[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0, all files 4.88gb "
- "L0.1[100,101] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,101] |-------------------------------------L0.2-------------------------------------|"
- "WARNING: file L0.1[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
- "WARNING: file L0.2[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
"###
);
}
#[tokio::test]
async fn many_medium_files() {
test_helpers::maybe_start_logging();
@ -138,29 +177,126 @@ async fn many_medium_files() {
- "L0.18[100,100] |------------------------------------L0.18-------------------------------------|"
- "L0.19[100,100] |------------------------------------L0.19-------------------------------------|"
- "L0.20[100,100] |------------------------------------L0.20-------------------------------------|"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 629145600 parquet file bytes, limit is 268435456"
- "**** Final Output Files "
- "**** Simulation run 0, type=compact. 8 Input Files, 240mb total:"
- "L0, all files 30mb "
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,100] |-------------------------------------L0.2-------------------------------------|"
- "L0.3[100,100] |-------------------------------------L0.3-------------------------------------|"
- "L0.4[100,100] |-------------------------------------L0.4-------------------------------------|"
- "L0.5[100,100] |-------------------------------------L0.5-------------------------------------|"
- "L0.6[100,100] |-------------------------------------L0.6-------------------------------------|"
- "L0.7[100,100] |-------------------------------------L0.7-------------------------------------|"
- "L0.8[100,100] |-------------------------------------L0.8-------------------------------------|"
- "L0.9[100,100] |-------------------------------------L0.9-------------------------------------|"
- "L0.10[100,100] |------------------------------------L0.10-------------------------------------|"
- "L0.11[100,100] |------------------------------------L0.11-------------------------------------|"
- "L0.12[100,100] |------------------------------------L0.12-------------------------------------|"
- "L0.13[100,100] |------------------------------------L0.13-------------------------------------|"
- "L0.14[100,100] |------------------------------------L0.14-------------------------------------|"
- "L0.15[100,100] |------------------------------------L0.15-------------------------------------|"
- "L0.16[100,100] |------------------------------------L0.16-------------------------------------|"
- "L0.17[100,100] |------------------------------------L0.17-------------------------------------|"
- "L0.18[100,100] |------------------------------------L0.18-------------------------------------|"
- "L0.19[100,100] |------------------------------------L0.19-------------------------------------|"
- "L0.20[100,100] |------------------------------------L0.20-------------------------------------|"
- "L0.19[100,100] |------------------------------------L0.19-------------------------------------|"
- "L0.18[100,100] |------------------------------------L0.18-------------------------------------|"
- "L0.17[100,100] |------------------------------------L0.17-------------------------------------|"
- "L0.16[100,100] |------------------------------------L0.16-------------------------------------|"
- "L0.15[100,100] |------------------------------------L0.15-------------------------------------|"
- "L0.14[100,100] |------------------------------------L0.14-------------------------------------|"
- "L0.13[100,100] |------------------------------------L0.13-------------------------------------|"
- "**** 1 Output Files (parquet_file_id not yet assigned), 240mb total:"
- "L1, all files 240mb "
- "L1.?[100,100] |-------------------------------------L1.?-------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 8 files: L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files at level CompactionLevel::L1"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0 "
- "L0.1[100,100] 30mb |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,100] 30mb |-------------------------------------L0.2-------------------------------------|"
- "L0.3[100,100] 30mb |-------------------------------------L0.3-------------------------------------|"
- "L0.4[100,100] 30mb |-------------------------------------L0.4-------------------------------------|"
- "L0.5[100,100] 30mb |-------------------------------------L0.5-------------------------------------|"
- "L0.6[100,100] 30mb |-------------------------------------L0.6-------------------------------------|"
- "L0.7[100,100] 30mb |-------------------------------------L0.7-------------------------------------|"
- "L0.8[100,100] 30mb |-------------------------------------L0.8-------------------------------------|"
- "L0.9[100,100] 30mb |-------------------------------------L0.9-------------------------------------|"
- "L0.10[100,100] 30mb |------------------------------------L0.10-------------------------------------|"
- "L0.11[100,100] 30mb |------------------------------------L0.11-------------------------------------|"
- "L0.12[100,100] 30mb |------------------------------------L0.12-------------------------------------|"
- "L1 "
- "L1.21[100,100] 240mb|------------------------------------L1.21-------------------------------------|"
- "WARNING: file L1.21[100,100] 240mb exceeds soft limit 100mb by more than 50%"
"###
);
}
#[tokio::test]
async fn many_medium_files_time_range_1() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder().await.build().await;
// The compactor has 20 files overlapping files with a single
// timestamp that indivdually are small enough to be processed,
// but when compacted together are too large and can't be split by
// timestamp
for _ in 0..20 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(100)
.with_max_time(101)
.with_file_size_bytes(30 * ONE_MB)
.with_compaction_level(CompactionLevel::Initial),
)
.await;
}
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0, all files 30mb "
- "L0.1[100,101] |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,101] |-------------------------------------L0.2-------------------------------------|"
- "L0.3[100,101] |-------------------------------------L0.3-------------------------------------|"
- "L0.4[100,101] |-------------------------------------L0.4-------------------------------------|"
- "L0.5[100,101] |-------------------------------------L0.5-------------------------------------|"
- "L0.6[100,101] |-------------------------------------L0.6-------------------------------------|"
- "L0.7[100,101] |-------------------------------------L0.7-------------------------------------|"
- "L0.8[100,101] |-------------------------------------L0.8-------------------------------------|"
- "L0.9[100,101] |-------------------------------------L0.9-------------------------------------|"
- "L0.10[100,101] |------------------------------------L0.10-------------------------------------|"
- "L0.11[100,101] |------------------------------------L0.11-------------------------------------|"
- "L0.12[100,101] |------------------------------------L0.12-------------------------------------|"
- "L0.13[100,101] |------------------------------------L0.13-------------------------------------|"
- "L0.14[100,101] |------------------------------------L0.14-------------------------------------|"
- "L0.15[100,101] |------------------------------------L0.15-------------------------------------|"
- "L0.16[100,101] |------------------------------------L0.16-------------------------------------|"
- "L0.17[100,101] |------------------------------------L0.17-------------------------------------|"
- "L0.18[100,101] |------------------------------------L0.18-------------------------------------|"
- "L0.19[100,101] |------------------------------------L0.19-------------------------------------|"
- "L0.20[100,101] |------------------------------------L0.20-------------------------------------|"
- "**** Simulation run 0, type=compact. 8 Input Files, 240mb total:"
- "L0, all files 30mb "
- "L0.20[100,101] |------------------------------------L0.20-------------------------------------|"
- "L0.19[100,101] |------------------------------------L0.19-------------------------------------|"
- "L0.18[100,101] |------------------------------------L0.18-------------------------------------|"
- "L0.17[100,101] |------------------------------------L0.17-------------------------------------|"
- "L0.16[100,101] |------------------------------------L0.16-------------------------------------|"
- "L0.15[100,101] |------------------------------------L0.15-------------------------------------|"
- "L0.14[100,101] |------------------------------------L0.14-------------------------------------|"
- "L0.13[100,101] |------------------------------------L0.13-------------------------------------|"
- "**** 1 Output Files (parquet_file_id not yet assigned), 240mb total:"
- "L1, all files 240mb "
- "L1.?[100,101] |-------------------------------------L1.?-------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 8 files: L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files at level CompactionLevel::L1"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L0 "
- "L0.1[100,101] 30mb |-------------------------------------L0.1-------------------------------------|"
- "L0.2[100,101] 30mb |-------------------------------------L0.2-------------------------------------|"
- "L0.3[100,101] 30mb |-------------------------------------L0.3-------------------------------------|"
- "L0.4[100,101] 30mb |-------------------------------------L0.4-------------------------------------|"
- "L0.5[100,101] 30mb |-------------------------------------L0.5-------------------------------------|"
- "L0.6[100,101] 30mb |-------------------------------------L0.6-------------------------------------|"
- "L0.7[100,101] 30mb |-------------------------------------L0.7-------------------------------------|"
- "L0.8[100,101] 30mb |-------------------------------------L0.8-------------------------------------|"
- "L0.9[100,101] 30mb |-------------------------------------L0.9-------------------------------------|"
- "L0.10[100,101] 30mb |------------------------------------L0.10-------------------------------------|"
- "L0.11[100,101] 30mb |------------------------------------L0.11-------------------------------------|"
- "L0.12[100,101] 30mb |------------------------------------L0.12-------------------------------------|"
- "L1 "
- "L1.21[100,101] 240mb|------------------------------------L1.21-------------------------------------|"
- "WARNING: file L1.21[100,101] 240mb exceeds soft limit 100mb by more than 50%"
"###
);
}
@ -240,12 +376,11 @@ async fn many_small_files() {
- "Committing partition 1:"
- " Soft Deleting 20 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
- " Creating 1 files at level CompactionLevel::L1"
- "Committing partition 1:"
- " Upgrading 1 files level to CompactionLevel::L2: L1.21"
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
- "**** Final Output Files "
- "L2, all files 200mb "
- "L2.21[100,100] |------------------------------------L2.21-------------------------------------|"
- "WARNING: file L2.21[100,100] 200mb exceeds soft limit 100mb by more than 50%"
- "L1, all files 200mb "
- "L1.21[100,100] |------------------------------------L1.21-------------------------------------|"
- "WARNING: file L1.21[100,100] 200mb exceeds soft limit 100mb by more than 50%"
"###
);
}

View File

@ -39,7 +39,7 @@ use iox_tests::{
ParquetFileBuilder, TestCatalog, TestNamespace, TestParquetFileBuilder, TestPartition,
TestShard, TestTable,
};
use iox_time::{Time, TimeProvider};
use iox_time::{MockProvider, Time, TimeProvider};
use object_store::{path::Path, DynObjectStore};
use parquet_file::storage::{ParquetStorage, StorageId};
use schema::sort::SortKey;
@ -393,7 +393,8 @@ impl TestSetupBuilder<false> {
.with_line_protocol(&lp)
.with_creation_time(Time::from_timestamp_nanos(time.time_3_minutes_future))
.with_max_l0_created_at(Time::from_timestamp_nanos(time.time_3_minutes_future))
.with_min_time(35000)
// the file includes one row of data. min_time and max_time are the same
.with_min_time(36000)
.with_max_time(36000)
.with_compaction_level(CompactionLevel::Final);
let l2_3 = self.partition.create_parquet_file(builder).await.into();
@ -421,7 +422,7 @@ impl TestSetupBuilder<false> {
.with_line_protocol(&lp)
.with_creation_time(Time::from_timestamp_nanos(time.time_4_minutes_future))
.with_max_l0_created_at(Time::from_timestamp_nanos(time.time_4_minutes_future))
.with_min_time(0)
.with_min_time(6000)
.with_max_time(68000)
.with_compaction_level(CompactionLevel::FileNonOverlapped);
let l1_1 = self.partition.create_parquet_file(builder).await.into();
@ -458,7 +459,7 @@ impl TestSetupBuilder<false> {
.with_line_protocol(&lp)
.with_creation_time(Time::from_timestamp_nanos(time.time_5_minutes_future))
.with_max_l0_created_at(Time::from_timestamp_nanos(time.time_4_minutes_future))
.with_min_time(8000)
.with_min_time(6000)
.with_max_time(25000)
.with_compaction_level(CompactionLevel::FileNonOverlapped);
let l1_1 = self.partition.create_parquet_file(builder).await.into();
@ -975,20 +976,26 @@ pub fn create_l2_files() -> Vec<ParquetFile> {
/// |--L1.1--| |--L1.2--| |--L1.3--|
/// |--L0.1--| |--L0.2--| |--L0.3--|
pub fn create_overlapped_l0_l1_files(size: i64) -> Vec<ParquetFile> {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(250, 350)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
let l1_2 = ParquetFileBuilder::new(12)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 500)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
let l1_3 = ParquetFileBuilder::new(13)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(600, 700)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_2 and L1_3
@ -996,24 +1003,131 @@ pub fn create_overlapped_l0_l1_files(size: i64) -> Vec<ParquetFile> {
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(450, 620)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
// L0_2 overlaps with L1_3
let l0_2 = ParquetFileBuilder::new(2)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(650, 750)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_3_minutes_future)
.build();
// L0_3 overlaps with nothing
let l0_3 = ParquetFileBuilder::new(3)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(800, 900)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_5_minutes_future)
.build();
// Put the files in random order
vec![l1_3, l1_2, l0_2, l1_1, l0_1, l0_3]
}
/// This setup will return files with ranges as follows:
/// |--L1.1--| |--L1.2--|
/// |--L0.1--| |--L0.2--| |--L0.3--|
pub fn create_overlapped_l0_l1_files_2(size: i64) -> Vec<ParquetFile> {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let l1_1 = ParquetFileBuilder::new(12)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 500)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
let l1_2 = ParquetFileBuilder::new(13)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(600, 700)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_1 and L1_2
let l0_1 = ParquetFileBuilder::new(1)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(450, 620)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
// L0_2 overlaps with L1_2
let l0_2 = ParquetFileBuilder::new(2)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(650, 750)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_3_minutes_future)
.build();
// L0_3 overlaps with nothing
let l0_3 = ParquetFileBuilder::new(3)
.with_compaction_level(CompactionLevel::Initial)
.with_time_range(800, 900)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_5_minutes_future)
.build();
// Put the files in random order
vec![l1_2, l0_2, l1_1, l0_1, l0_3]
}
/// This setup will return files with ranges as follows:
/// |--L1.1--| |--L1.2--| |--L1.3--| : target_level files
/// |--L0.1--| |--L0.3--| |--L0.2--| : start_level files
/// Note that L0.2 is created before L0.3 but has later time range
pub fn create_overlapped_start_target_files(
size: i64,
start_level: CompactionLevel,
) -> Vec<ParquetFile> {
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
let time = TestTimes::new(&time_provider);
let target_level = start_level.next();
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(target_level)
.with_time_range(100, 200)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
let l1_2 = ParquetFileBuilder::new(12)
.with_compaction_level(target_level)
.with_time_range(300, 400)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
let l1_3 = ParquetFileBuilder::new(13)
.with_compaction_level(target_level)
.with_time_range(500, 600)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_1_minute_future)
.build();
// L0_1 overlaps with L1_1
let l0_1 = ParquetFileBuilder::new(1)
.with_compaction_level(start_level)
.with_time_range(150, 250)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_2_minutes_future)
.build();
// L0_2 overlaps L1_3
let l0_2 = ParquetFileBuilder::new(2)
.with_compaction_level(start_level)
.with_time_range(550, 650)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_3_minutes_future)
.build();
// L0_3 overlaps with L1_2
let l0_3 = ParquetFileBuilder::new(3)
.with_compaction_level(start_level)
.with_time_range(350, 450)
.with_file_size_bytes(size)
.with_max_l0_created_at(time.time_5_minutes_future)
.build();
// Put the files in random order
vec![l1_2, l1_3, l0_2, l1_1, l0_1, l0_3]
}
/// This setup will return files with ranges as follows:
/// |--L2.1--| |--L2.2--|
/// |--L1.1--| |--L1.2--| |--L1.3--|
@ -1050,6 +1164,41 @@ pub fn create_overlapped_l1_l2_files(size: i64) -> Vec<ParquetFile> {
vec![l1_3, l1_2, l2_1, l2_2, l1_1]
}
/// This setup will return files with ranges as follows:
/// |--L2.2--|
/// |--L1.1--| |--L1.2--| |--L1.3--|
pub fn create_overlapped_l1_l2_files_2(size: i64) -> Vec<ParquetFile> {
let l2_2 = ParquetFileBuilder::new(22)
.with_compaction_level(CompactionLevel::Final)
.with_time_range(200, 300)
.with_file_size_bytes(size)
.with_max_l0_created_at(1)
.build();
// L1_1 overlaps with L2_1
let l1_1 = ParquetFileBuilder::new(11)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(250, 350)
.with_file_size_bytes(size)
.with_max_l0_created_at(2)
.build();
let l1_2 = ParquetFileBuilder::new(12)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(400, 500)
.with_file_size_bytes(size)
.with_max_l0_created_at(3)
.build();
let l1_3 = ParquetFileBuilder::new(13)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_time_range(600, 700)
.with_file_size_bytes(size)
.with_max_l0_created_at(4)
.build();
// Put the files in random order
vec![l1_3, l1_2, l2_2, l1_1]
}
/// This setup will return files with ranges as follows with mixed sizes:
/// |--L2.1--| |--L2.2--|
/// |--L1.1--| |--L1.2--| |--L1.3--|

View File

@ -12,7 +12,7 @@ use data_types::{
};
use datafusion::physical_plan::SendableRecordBatchStream;
use iox_time::Time;
use observability_deps::tracing::{debug, info};
use observability_deps::tracing::info;
use uuid::Uuid;
use compactor2::{DynError, ParquetFilesSink, PartitionInfo, PlanIR};
@ -312,7 +312,7 @@ fn even_time_split(
// add the entry for the last bucket
time_ranges.push((last_time, overall_max_time));
debug!(
info!(
?overall_min_time,
?overall_max_time,
?overall_time_range,

View File

@ -1223,6 +1223,11 @@ impl ParquetFile {
pub fn overlaps(&self, other: &Self) -> bool {
self.min_time <= other.max_time && self.max_time >= other.min_time
}
/// Return true if the time range of this file overlaps with the given time range
pub fn overlaps_time_range(&self, min_time: Timestamp, max_time: Timestamp) -> bool {
self.min_time <= max_time && self.max_time >= min_time
}
}
/// Data for a parquet file to be inserted into the catalog.