feat: handle large-size overlapped files (#7079)
* feat: split start-level files that overlap wiht many files * test: split files and theit split times * test: split test for L1 and L2 files * feat: full implementation that support large-size overlapped files * chore: modify comments to reflect the changes * fix: typo * chore: update test output * docs: clearer comments * chore: remove empty test files. Will add in in a separate PR * chore: Apply suggestions from code review Co-authored-by: Andrew Lamb <alamb@influxdata.com> * chore: address review comments * chore: Apply suggestions from code review Co-authored-by: Andrew Lamb <alamb@influxdata.com> * refactor: add a knob to turn large-size overlaps on and off * fix: typo * chore: update test output after merging main * fix: split_times should not include the max_time of the file * fix: fix an overlap bug while limitting number of files to compact * test: unit tests for different overlap cases of limit files to compact * chore: increase time range of the tests to let the split files work correctly * fix: skip compacting files of tiny ranges * test: add tests for time range 1 * chore: address review comments * chore: remove enable_large_size_overlap_files knob * fix: fix a bug that sort L1 files in thier min_time instead of l0_max_created_at * refactor: use the same order_files function afer merging main into branch * chore: typos and clearer comments * chore: remove obsolete comments * chore: add asserts per review suggestion --------- Co-authored-by: Andrew Lamb <alamb@influxdata.com>pull/24376/head
parent
3f3a47eae9
commit
9e9e689a30
|
@ -247,6 +247,7 @@ where
|
|||
.get(file_level)
|
||||
.expect("all compaction levels covered")
|
||||
.record(create.len() as u64);
|
||||
|
||||
self.job_bytes
|
||||
.create
|
||||
.get(file_level)
|
||||
|
|
|
@ -27,15 +27,18 @@ impl DivideInitial for MultipleBranchesDivideInitial {
|
|||
RoundInfo::ManySmallFiles {
|
||||
start_level,
|
||||
max_num_files_to_group,
|
||||
max_total_file_size_to_group,
|
||||
} => {
|
||||
// To split the start_level files correctly so they can be compacted in the right order,
|
||||
// the files must be sorted on max_l0_created_at if start_level is 0 or min_time otherwise.
|
||||
// the files must be sorted on `max_l0_created_at` if start_level is 0 or `min_time` otherwise.
|
||||
//
|
||||
// Since L0s can overlap, they can contain duplicate data, which can only be resolved by
|
||||
// using `created_at` time, so the `created_at` time must be used to sort so that it is
|
||||
// using `max_l0_created_at` time, so the `max_l0_created_at` time must be used to sort so that it is
|
||||
// preserved. Since L1s & L2s cannot overlap within their own level, they cannot contain
|
||||
// duplicate data within their own level, so they do not need to preserve their `created_at`
|
||||
// time, so they do not need to be sorted based on `created_at`. However, sorting by
|
||||
// `min_time` makes it easier to avoid introducing overlaps within their levels.
|
||||
// duplicate data within their own level, so they do not need to preserve their `max_l0_created_at`
|
||||
// time, so they do not need to be sorted based on `max_l0_created_at`. However, sorting by
|
||||
// `min_time` is needed to avoid introducing overlaps within their levels.
|
||||
//
|
||||
// See tests many_l0_files_different_created_order and many_l1_files_different_created_order for examples
|
||||
let start_level_files = files
|
||||
.into_iter()
|
||||
|
@ -43,11 +46,33 @@ impl DivideInitial for MultipleBranchesDivideInitial {
|
|||
.collect::<Vec<_>>();
|
||||
let start_level_files = order_files(start_level_files, start_level);
|
||||
|
||||
// Split files into many small groups, each has at most max_num_files_to_group files
|
||||
let branches = start_level_files
|
||||
.chunks(*max_num_files_to_group)
|
||||
.map(|c| c.to_vec())
|
||||
.collect::<Vec<Vec<_>>>();
|
||||
// Split L0s into many small groups, each has max_num_files_to_group but not exceed max_total_file_size_to_group
|
||||
// Collect files until either limit is reached
|
||||
let mut branches = vec![];
|
||||
let mut current_branch = vec![];
|
||||
let mut current_branch_size = 0;
|
||||
for f in start_level_files {
|
||||
if current_branch.len() == *max_num_files_to_group
|
||||
|| current_branch_size + f.file_size_bytes as usize
|
||||
> *max_total_file_size_to_group
|
||||
{
|
||||
// panic if current_branch is empty
|
||||
if current_branch.is_empty() {
|
||||
panic!("Size of a file {} is larger than the max size limit to compact. Please adjust the settings. See ticket https://github.com/influxdata/idpe/issues/17209" , f.file_size_bytes);
|
||||
}
|
||||
|
||||
branches.push(current_branch);
|
||||
current_branch = vec![];
|
||||
current_branch_size = 0;
|
||||
}
|
||||
current_branch_size += f.file_size_bytes as usize;
|
||||
current_branch.push(f);
|
||||
}
|
||||
|
||||
// push the last branch
|
||||
if !current_branch.is_empty() {
|
||||
branches.push(current_branch);
|
||||
}
|
||||
|
||||
branches
|
||||
}
|
||||
|
@ -56,14 +81,14 @@ impl DivideInitial for MultipleBranchesDivideInitial {
|
|||
}
|
||||
}
|
||||
|
||||
// Return a sorted files of the given ones.
|
||||
// The order is used to split the files and form the right groups of files to compact
|
||||
/// Return a sorted files of the given ones.
|
||||
/// The order is used to split the files and form the right groups of files to compact
|
||||
// and deduplcate correctly to fewer and larger but same level files
|
||||
//
|
||||
// All given files are in the same given start_level.
|
||||
// They will be sorted on their `max_l0_created_at` if the start_level is 0,
|
||||
// otherwise on their `min_time`
|
||||
fn order_files(files: Vec<ParquetFile>, start_level: &CompactionLevel) -> Vec<ParquetFile> {
|
||||
///
|
||||
/// All given files are in the same given start_level.
|
||||
/// They will be sorted on their `max_l0_created_at` if the start_level is 0,
|
||||
/// otherwise on their `min_time`
|
||||
pub fn order_files(files: Vec<ParquetFile>, start_level: &CompactionLevel) -> Vec<ParquetFile> {
|
||||
let mut files = files;
|
||||
if *start_level == CompactionLevel::Initial {
|
||||
files.sort_by(|a, b| a.max_l0_created_at.cmp(&b.max_l0_created_at));
|
||||
|
@ -89,10 +114,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_divide() {
|
||||
fn test_divide_num_file() {
|
||||
let round_info = RoundInfo::ManySmallFiles {
|
||||
start_level: CompactionLevel::Initial,
|
||||
max_num_files_to_group: 2,
|
||||
max_total_file_size_to_group: 100,
|
||||
};
|
||||
let divide = MultipleBranchesDivideInitial::new();
|
||||
|
||||
|
@ -122,4 +148,69 @@ mod tests {
|
|||
assert_eq!(branches[0], vec![f1, f2]);
|
||||
assert_eq!(branches[1], vec![f3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Size of a file 50 is larger than the max size limit to compact. Please adjust the settings"
|
||||
)]
|
||||
fn test_divide_size_limit_too_sall() {
|
||||
let round_info = RoundInfo::ManySmallFiles {
|
||||
start_level: CompactionLevel::Initial,
|
||||
max_num_files_to_group: 10,
|
||||
max_total_file_size_to_group: 10,
|
||||
};
|
||||
let divide = MultipleBranchesDivideInitial::new();
|
||||
|
||||
let f1 = ParquetFileBuilder::new(1)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(1)
|
||||
.with_file_size_bytes(50)
|
||||
.build();
|
||||
let f2 = ParquetFileBuilder::new(2)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(5)
|
||||
.with_file_size_bytes(5)
|
||||
.build();
|
||||
|
||||
// files in random order of max_l0_created_at
|
||||
let files = vec![f2, f1];
|
||||
|
||||
// panic
|
||||
let _branches = divide.divide(files, &round_info);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_divide_size_limit() {
|
||||
let round_info = RoundInfo::ManySmallFiles {
|
||||
start_level: CompactionLevel::Initial,
|
||||
max_num_files_to_group: 10,
|
||||
max_total_file_size_to_group: 100,
|
||||
};
|
||||
let divide = MultipleBranchesDivideInitial::new();
|
||||
|
||||
let f1 = ParquetFileBuilder::new(1)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(1)
|
||||
.with_file_size_bytes(90)
|
||||
.build();
|
||||
let f2 = ParquetFileBuilder::new(2)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(5)
|
||||
.with_file_size_bytes(20)
|
||||
.build();
|
||||
let f3 = ParquetFileBuilder::new(3)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(10)
|
||||
.with_file_size_bytes(30)
|
||||
.build();
|
||||
|
||||
// files in random order of max_l0_created_at
|
||||
let files = vec![f2.clone(), f3.clone(), f1.clone()];
|
||||
|
||||
let branches = divide.divide(files, &round_info);
|
||||
// output must be split into their max_l0_created_at
|
||||
assert_eq!(branches.len(), 2);
|
||||
assert_eq!(branches[0], vec![f1]);
|
||||
assert_eq!(branches[1], vec![f2, f3]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,7 +49,8 @@ where
|
|||
partition_id = partition_info.partition_id.get(),
|
||||
target_level = %classification.target_level,
|
||||
round_info = %round_info,
|
||||
files_to_compacts = classification.files_to_compact.len(),
|
||||
files_to_compact = classification.files_to_compact_len(),
|
||||
files_to_split = classification.files_to_split_len(),
|
||||
files_to_upgrade = classification.files_to_upgrade.len(),
|
||||
files_to_keep = classification.files_to_keep.len(),
|
||||
"file classification"
|
||||
|
|
|
@ -3,8 +3,10 @@ use std::fmt::Display;
|
|||
use data_types::{CompactionLevel, ParquetFile};
|
||||
|
||||
use crate::{
|
||||
components::files_split::FilesSplit, file_classification::FileClassification,
|
||||
partition_info::PartitionInfo, RoundInfo,
|
||||
components::{files_split::FilesSplit, split_or_compact::SplitOrCompact},
|
||||
file_classification::{FileClassification, FilesToCompactOrSplit},
|
||||
partition_info::PartitionInfo,
|
||||
RoundInfo,
|
||||
};
|
||||
|
||||
use super::FileClassifier;
|
||||
|
@ -33,50 +35,70 @@ use super::FileClassifier;
|
|||
/// [non overlap split (FO)] | :
|
||||
/// | | | :
|
||||
/// | | | :
|
||||
/// | +------------+-->(files keep) :
|
||||
/// | :
|
||||
/// | :
|
||||
/// | +------------+------+ :
|
||||
/// | | :
|
||||
/// | | :
|
||||
/// | +................................+
|
||||
/// | :
|
||||
/// V V
|
||||
/// [upgrade split (FU)]
|
||||
/// | |
|
||||
/// | |
|
||||
/// V V
|
||||
/// (file compact) (file upgrade)
|
||||
/// | : | :
|
||||
/// V V | :
|
||||
/// [upgrade split (FU)] | :
|
||||
/// | | | :
|
||||
/// | | | :
|
||||
/// | V | :
|
||||
/// | (files upgrade) | :
|
||||
/// | | :
|
||||
/// | +................................+
|
||||
/// | | |
|
||||
/// V V |
|
||||
/// [split or compact (FSC)] |
|
||||
/// | | |
|
||||
/// | +-------------------+
|
||||
/// | |
|
||||
/// V V
|
||||
/// (files compact or split) (files keep)
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct SplitBasedFileClassifier<FT, FO, FU>
|
||||
pub struct SplitBasedFileClassifier<FT, FO, FU, FSC>
|
||||
where
|
||||
FT: FilesSplit,
|
||||
FO: FilesSplit,
|
||||
FU: FilesSplit,
|
||||
FSC: SplitOrCompact,
|
||||
{
|
||||
target_level_split: FT,
|
||||
non_overlap_split: FO,
|
||||
upgrade_split: FU,
|
||||
split_or_compact: FSC,
|
||||
}
|
||||
|
||||
impl<FT, FO, FU> SplitBasedFileClassifier<FT, FO, FU>
|
||||
impl<FT, FO, FU, FSC> SplitBasedFileClassifier<FT, FO, FU, FSC>
|
||||
where
|
||||
FT: FilesSplit,
|
||||
FO: FilesSplit,
|
||||
FU: FilesSplit,
|
||||
FSC: SplitOrCompact,
|
||||
{
|
||||
pub fn new(target_level_split: FT, non_overlap_split: FO, upgrade_split: FU) -> Self {
|
||||
pub fn new(
|
||||
target_level_split: FT,
|
||||
non_overlap_split: FO,
|
||||
upgrade_split: FU,
|
||||
split_or_compact: FSC,
|
||||
) -> Self {
|
||||
Self {
|
||||
target_level_split,
|
||||
non_overlap_split,
|
||||
upgrade_split,
|
||||
split_or_compact,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<FT, FO, FU> Display for SplitBasedFileClassifier<FT, FO, FU>
|
||||
impl<FT, FO, FU, FSC> Display for SplitBasedFileClassifier<FT, FO, FU, FSC>
|
||||
where
|
||||
FT: FilesSplit,
|
||||
FO: FilesSplit,
|
||||
FU: FilesSplit,
|
||||
FSC: SplitOrCompact,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
|
@ -87,15 +109,16 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<FT, FO, FU> FileClassifier for SplitBasedFileClassifier<FT, FO, FU>
|
||||
impl<FT, FO, FU, FSC> FileClassifier for SplitBasedFileClassifier<FT, FO, FU, FSC>
|
||||
where
|
||||
FT: FilesSplit,
|
||||
FO: FilesSplit,
|
||||
FU: FilesSplit,
|
||||
FSC: SplitOrCompact,
|
||||
{
|
||||
fn classify(
|
||||
&self,
|
||||
_partition_info: &PartitionInfo,
|
||||
partition_info: &PartitionInfo,
|
||||
round_info: &RoundInfo,
|
||||
files: Vec<ParquetFile>,
|
||||
) -> FileClassification {
|
||||
|
@ -123,13 +146,23 @@ where
|
|||
self.non_overlap_split.apply(files_to_compact, target_level);
|
||||
files_to_keep.extend(non_overlapping_files);
|
||||
|
||||
// To have efficient compaction performance, we only need to uprade (catalog update only) eligible files
|
||||
// To have efficient compaction performance, we only need to upgrade (catalog update only) eligible files
|
||||
let (files_to_compact, files_to_upgrade) =
|
||||
self.upgrade_split.apply(files_to_compact, target_level);
|
||||
|
||||
// See if we need to split start-level files due to over compaction size limit
|
||||
let (files_to_compact_or_split, other_files) =
|
||||
self.split_or_compact
|
||||
.apply(partition_info, files_to_compact, target_level);
|
||||
files_to_keep.extend(other_files);
|
||||
|
||||
// Target level of split files is the same level of the input files all of which are in the same level,
|
||||
// while target level of compact files is the value of the target_level which is the higested level of the input files
|
||||
let target_level = files_to_compact_or_split.target_level(target_level);
|
||||
|
||||
FileClassification {
|
||||
target_level,
|
||||
files_to_compact,
|
||||
files_to_compact_or_split,
|
||||
files_to_upgrade,
|
||||
files_to_keep,
|
||||
}
|
||||
|
@ -154,7 +187,7 @@ fn file_classification_for_many_files(
|
|||
|
||||
FileClassification {
|
||||
target_level,
|
||||
files_to_compact,
|
||||
files_to_compact_or_split: FilesToCompactOrSplit::FilesToCompact(files_to_compact),
|
||||
files_to_upgrade: vec![],
|
||||
files_to_keep: vec![],
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ use super::{
|
|||
divide_initial::multiple_branches::MultipleBranchesDivideInitial,
|
||||
file_classifier::{
|
||||
logging::LoggingFileClassifierWrapper, split_based::SplitBasedFileClassifier,
|
||||
FileClassifier,
|
||||
},
|
||||
file_filter::level_range::LevelRangeFileFilter,
|
||||
files_split::{
|
||||
|
@ -57,8 +56,8 @@ use super::{
|
|||
greater_size_matching_files::GreaterSizeMatchingFilesPartitionFilter,
|
||||
has_files::HasFilesPartitionFilter, has_matching_file::HasMatchingFilePartitionFilter,
|
||||
logging::LoggingPartitionFilterWrapper, max_num_columns::MaxNumColumnsPartitionFilter,
|
||||
max_parquet_bytes::MaxParquetBytesPartitionFilter, metrics::MetricsPartitionFilterWrapper,
|
||||
never_skipped::NeverSkippedPartitionFilter, or::OrPartitionFilter, PartitionFilter,
|
||||
metrics::MetricsPartitionFilterWrapper, never_skipped::NeverSkippedPartitionFilter,
|
||||
or::OrPartitionFilter, unable_to_compact::UnableToCompactPartitionFilter, PartitionFilter,
|
||||
},
|
||||
partition_info_source::sub_sources::SubSourcePartitionInfoSource,
|
||||
partition_source::{
|
||||
|
@ -80,6 +79,7 @@ use super::{
|
|||
round_split::many_files::ManyFilesRoundSplit,
|
||||
scratchpad::{noop::NoopScratchpadGen, prod::ProdScratchpadGen, ScratchpadGen},
|
||||
skipped_compactions_source::catalog::CatalogSkippedCompactionsSource,
|
||||
split_or_compact::{logging::LoggingSplitOrCompactWrapper, split_compact::SplitCompact},
|
||||
Components,
|
||||
};
|
||||
|
||||
|
@ -135,7 +135,7 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
partition_filters.append(&mut make_partition_filters(config));
|
||||
|
||||
let partition_resource_limit_filters: Vec<Arc<dyn PartitionFilter>> = vec![Arc::new(
|
||||
MaxParquetBytesPartitionFilter::new(config.max_input_parquet_bytes_per_partition),
|
||||
UnableToCompactPartitionFilter::new(config.max_input_parquet_bytes_per_partition),
|
||||
)];
|
||||
|
||||
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.shadow_mode {
|
||||
|
@ -277,7 +277,10 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
Arc::clone(&config.catalog),
|
||||
)),
|
||||
round_info_source: Arc::new(LoggingRoundInfoWrapper::new(Arc::new(
|
||||
LevelBasedRoundInfo::new(config.max_num_files_per_plan),
|
||||
LevelBasedRoundInfo::new(
|
||||
config.max_num_files_per_plan,
|
||||
config.max_input_parquet_bytes_per_partition,
|
||||
),
|
||||
))),
|
||||
partition_filter: Arc::new(LoggingPartitionFilterWrapper::new(
|
||||
MetricsPartitionFilterWrapper::new(
|
||||
|
@ -308,8 +311,15 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
|
|||
round_split: Arc::new(ManyFilesRoundSplit::new()),
|
||||
divide_initial: Arc::new(MultipleBranchesDivideInitial::new()),
|
||||
scratchpad_gen,
|
||||
file_classifier: Arc::new(LoggingFileClassifierWrapper::new(make_file_classifier(
|
||||
config,
|
||||
file_classifier: Arc::new(LoggingFileClassifierWrapper::new(Arc::new(
|
||||
SplitBasedFileClassifier::new(
|
||||
TargetLevelSplit::new(),
|
||||
NonOverlapSplit::new(),
|
||||
UpgradeSplit::new(config.max_desired_file_size_bytes),
|
||||
LoggingSplitOrCompactWrapper::new(SplitCompact::new(
|
||||
config.max_input_parquet_bytes_per_partition,
|
||||
)),
|
||||
),
|
||||
))),
|
||||
partition_resource_limit_filter: Arc::new(LoggingPartitionFilterWrapper::new(
|
||||
MetricsPartitionFilterWrapper::new(
|
||||
|
@ -345,11 +355,3 @@ fn make_partition_filters(config: &Config) -> Vec<Arc<dyn PartitionFilter>> {
|
|||
)),
|
||||
]))]
|
||||
}
|
||||
|
||||
fn make_file_classifier(config: &Config) -> Arc<dyn FileClassifier> {
|
||||
Arc::new(SplitBasedFileClassifier::new(
|
||||
TargetLevelSplit::new(),
|
||||
NonOverlapSplit::new(),
|
||||
UpgradeSplit::new(config.max_desired_file_size_bytes),
|
||||
))
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ impl<T> IRPlanner for LoggingIRPlannerWrapper<T>
|
|||
where
|
||||
T: IRPlanner,
|
||||
{
|
||||
fn plan(
|
||||
fn compact_plan(
|
||||
&self,
|
||||
files: Vec<ParquetFile>,
|
||||
partition: Arc<PartitionInfo>,
|
||||
|
@ -46,7 +46,7 @@ where
|
|||
let partition_id = partition.partition_id;
|
||||
let n_input_files = files.len();
|
||||
let input_file_size_bytes = files.iter().map(|f| f.file_size_bytes).sum::<i64>();
|
||||
let plan = self.inner.plan(files, partition, compaction_level);
|
||||
let plan = self.inner.compact_plan(files, partition, compaction_level);
|
||||
|
||||
info!(
|
||||
partition_id = partition_id.get(),
|
||||
|
@ -55,7 +55,34 @@ where
|
|||
n_output_files = plan.n_output_files(),
|
||||
compaction_level = compaction_level as i16,
|
||||
%plan,
|
||||
"created IR plan",
|
||||
"created IR compact plan",
|
||||
);
|
||||
|
||||
plan
|
||||
}
|
||||
|
||||
fn split_plan(
|
||||
&self,
|
||||
file: ParquetFile,
|
||||
split_times: Vec<i64>,
|
||||
partition: Arc<PartitionInfo>,
|
||||
compaction_level: CompactionLevel,
|
||||
) -> PlanIR {
|
||||
let partition_id = partition.partition_id;
|
||||
let n_input_files = 1;
|
||||
let input_file_size_bytes = file.file_size_bytes;
|
||||
let plan = self
|
||||
.inner
|
||||
.split_plan(file, split_times, partition, compaction_level);
|
||||
|
||||
info!(
|
||||
partition_id = partition_id.get(),
|
||||
n_input_files,
|
||||
input_file_size_bytes,
|
||||
n_output_files = plan.n_output_files(),
|
||||
compaction_level = compaction_level as i16,
|
||||
%plan,
|
||||
"created IR split plan",
|
||||
);
|
||||
|
||||
plan
|
||||
|
|
|
@ -12,10 +12,20 @@ use crate::{partition_info::PartitionInfo, plan_ir::PlanIR};
|
|||
|
||||
/// Creates [`PlanIR`] that describes what files should be compacted and updated
|
||||
pub trait IRPlanner: Debug + Display + Send + Sync {
|
||||
fn plan(
|
||||
/// Build a plan to compact give files
|
||||
fn compact_plan(
|
||||
&self,
|
||||
files: Vec<ParquetFile>,
|
||||
partition: Arc<PartitionInfo>,
|
||||
compaction_level: CompactionLevel,
|
||||
) -> PlanIR;
|
||||
|
||||
/// Build a plan to split a given file into given split times
|
||||
fn split_plan(
|
||||
&self,
|
||||
file: ParquetFile,
|
||||
split_times: Vec<i64>,
|
||||
partition: Arc<PartitionInfo>,
|
||||
compaction_level: CompactionLevel,
|
||||
) -> PlanIR;
|
||||
}
|
||||
|
|
|
@ -113,7 +113,9 @@ impl Display for V1IRPlanner {
|
|||
}
|
||||
|
||||
impl IRPlanner for V1IRPlanner {
|
||||
fn plan(
|
||||
/// Build a plan to compact many files into a single file. Since we limit the size of the files,
|
||||
/// if the compact result is larger than that limit, we will split the output into many files
|
||||
fn compact_plan(
|
||||
&self,
|
||||
files: Vec<ParquetFile>,
|
||||
_partition: Arc<PartitionInfo>,
|
||||
|
@ -145,7 +147,11 @@ impl IRPlanner for V1IRPlanner {
|
|||
let files = files
|
||||
.into_iter()
|
||||
.map(|file| {
|
||||
let order = order(file.compaction_level, compaction_level, file.created_at);
|
||||
let order = order(
|
||||
file.compaction_level,
|
||||
compaction_level,
|
||||
file.max_l0_created_at,
|
||||
);
|
||||
FileIR { file, order }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
@ -175,26 +181,49 @@ impl IRPlanner for V1IRPlanner {
|
|||
// everything into one file
|
||||
PlanIR::Compact { files }
|
||||
} else {
|
||||
// split compact query plan
|
||||
// split compact query plan to split the result into multiple files
|
||||
PlanIR::Split { files, split_times }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a plan to split a file into multiple files based on the given split times
|
||||
fn split_plan(
|
||||
&self,
|
||||
file: ParquetFile,
|
||||
split_times: Vec<i64>,
|
||||
_partition: Arc<PartitionInfo>,
|
||||
compaction_level: CompactionLevel,
|
||||
) -> PlanIR {
|
||||
let order = order(
|
||||
file.compaction_level,
|
||||
compaction_level,
|
||||
file.max_l0_created_at,
|
||||
);
|
||||
|
||||
let file = FileIR { file, order };
|
||||
|
||||
PlanIR::Split {
|
||||
files: vec![file],
|
||||
split_times,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Order of the chunk so they can be deduplicated correctly
|
||||
fn order(
|
||||
compaction_level: CompactionLevel,
|
||||
target_level: CompactionLevel,
|
||||
created_at: Timestamp,
|
||||
max_l0_created_at: Timestamp,
|
||||
) -> ChunkOrder {
|
||||
// TODO: If we chnage this design specified in driver.rs's compact functions, we will need to refine this
|
||||
// Currently, we only compact files of level_n with level_n+1 and produce level_n+1 files,
|
||||
// and with the strictly design that:
|
||||
// . Level-0 files can overlap with any files.
|
||||
// . Level-N files (N > 0) cannot overlap with any files in the same level.
|
||||
// . For Level-0 files, we always pick the smaller `created_at` files to compact (with
|
||||
// each other and overlapped L1 files) first.
|
||||
// . For Level-0 files, we always pick the smaller `max_l0_created_at` files to compact (with
|
||||
// each other and overlapped L1 files) first. `max_l0_created_at` is the max created time of all L0 files
|
||||
// that were compacted into this given file. This value is used to order chunk for deduplication.
|
||||
// . Level-N+1 files are results of compacting Level-N and/or Level-N+1 files, their `created_at`
|
||||
// can be after the `created_at` of other Level-N files but they may include data loaded before
|
||||
// the other Level-N files. Hence we should never use `created_at` of Level-N+1 files to order
|
||||
|
@ -210,7 +239,7 @@ fn order(
|
|||
(CompactionLevel::Initial, CompactionLevel::Initial)
|
||||
| (CompactionLevel::Initial, CompactionLevel::FileNonOverlapped)
|
||||
| (CompactionLevel::FileNonOverlapped, CompactionLevel::Final) => {
|
||||
ChunkOrder::new(created_at.get())
|
||||
ChunkOrder::new(max_l0_created_at.get())
|
||||
}
|
||||
(CompactionLevel::FileNonOverlapped, CompactionLevel::FileNonOverlapped)
|
||||
| (CompactionLevel::Final, CompactionLevel::Final) => ChunkOrder::new(0),
|
||||
|
|
|
@ -35,6 +35,7 @@ pub mod round_info_source;
|
|||
pub mod round_split;
|
||||
pub mod scratchpad;
|
||||
pub mod skipped_compactions_source;
|
||||
pub mod split_or_compact;
|
||||
pub mod tables_source;
|
||||
|
||||
/// Pluggable system to determine compactor behavior. Please see
|
||||
|
|
|
@ -1,96 +0,0 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::ParquetFile;
|
||||
|
||||
use crate::{
|
||||
error::{DynError, ErrorKind, SimpleError},
|
||||
PartitionInfo,
|
||||
};
|
||||
|
||||
use super::PartitionFilter;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MaxParquetBytesPartitionFilter {
|
||||
max_parquet_bytes: usize,
|
||||
}
|
||||
|
||||
impl MaxParquetBytesPartitionFilter {
|
||||
pub fn new(max_parquet_bytes: usize) -> Self {
|
||||
Self { max_parquet_bytes }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for MaxParquetBytesPartitionFilter {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "max_parquet_bytes")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionFilter for MaxParquetBytesPartitionFilter {
|
||||
async fn apply(
|
||||
&self,
|
||||
partition_info: &PartitionInfo,
|
||||
files: &[ParquetFile],
|
||||
) -> Result<bool, DynError> {
|
||||
let sum = files
|
||||
.iter()
|
||||
.map(|f| usize::try_from(f.file_size_bytes).unwrap_or(0))
|
||||
.sum::<usize>();
|
||||
|
||||
if sum <= self.max_parquet_bytes {
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(SimpleError::new(
|
||||
ErrorKind::OutOfMemory,
|
||||
format!(
|
||||
"partition {} has {} parquet file bytes, limit is {}",
|
||||
partition_info.partition_id, sum, self.max_parquet_bytes
|
||||
),
|
||||
)
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{error::ErrorKindExt, test_utils::PartitionInfoBuilder};
|
||||
use iox_tests::ParquetFileBuilder;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
assert_eq!(
|
||||
MaxParquetBytesPartitionFilter::new(10).to_string(),
|
||||
"max_parquet_bytes"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply() {
|
||||
let filter = MaxParquetBytesPartitionFilter::new(10);
|
||||
let f1 = ParquetFileBuilder::new(1).with_file_size_bytes(7).build();
|
||||
let f2 = ParquetFileBuilder::new(2).with_file_size_bytes(4).build();
|
||||
let f3 = ParquetFileBuilder::new(3).with_file_size_bytes(3).build();
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().with_partition_id(1).build());
|
||||
|
||||
assert!(filter.apply(&p_info, &[]).await.unwrap());
|
||||
assert!(filter.apply(&p_info, &[f1.clone()]).await.unwrap());
|
||||
assert!(filter
|
||||
.apply(&p_info, &[f1.clone(), f3.clone()])
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
let err = filter.apply(&p_info, &[f1, f2]).await.unwrap_err();
|
||||
assert_eq!(err.classify(), ErrorKind::OutOfMemory);
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"partition 1 has 11 parquet file bytes, limit is 10"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -12,10 +12,10 @@ pub mod has_files;
|
|||
pub mod has_matching_file;
|
||||
pub mod logging;
|
||||
pub mod max_num_columns;
|
||||
pub mod max_parquet_bytes;
|
||||
pub mod metrics;
|
||||
pub mod never_skipped;
|
||||
pub mod or;
|
||||
pub mod unable_to_compact;
|
||||
|
||||
/// Filters partition based on ID and parquet files.
|
||||
///
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::ParquetFile;
|
||||
|
||||
use crate::{
|
||||
error::{DynError, ErrorKind, SimpleError},
|
||||
PartitionInfo,
|
||||
};
|
||||
|
||||
use super::PartitionFilter;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct UnableToCompactPartitionFilter {
|
||||
max_parquet_bytes: usize,
|
||||
}
|
||||
|
||||
impl UnableToCompactPartitionFilter {
|
||||
pub fn new(max_parquet_bytes: usize) -> Self {
|
||||
Self { max_parquet_bytes }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for UnableToCompactPartitionFilter {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "unable_to_compact")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionFilter for UnableToCompactPartitionFilter {
|
||||
async fn apply(
|
||||
&self,
|
||||
partition_info: &PartitionInfo,
|
||||
files: &[ParquetFile],
|
||||
) -> Result<bool, DynError> {
|
||||
if !files.is_empty() {
|
||||
// There is some files to compact or split
|
||||
Ok(true)
|
||||
} else {
|
||||
// No files means the split_compact cannot find any reasonable set of files to compact or split
|
||||
// TODO: after https://github.com/influxdata/idpe/issues/17208 that renames the size limit and
|
||||
// https://github.com/influxdata/idpe/issues/17209 for modifying the knobs, this message should be modified accordingly
|
||||
Err(SimpleError::new(
|
||||
ErrorKind::OutOfMemory,
|
||||
format!(
|
||||
"partition {} has overlapped files that exceed max compact size limit {}. The may happen if a large amount of data has the same timestamp",
|
||||
partition_info.partition_id, self.max_parquet_bytes
|
||||
),
|
||||
)
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{error::ErrorKindExt, test_utils::PartitionInfoBuilder};
|
||||
use iox_tests::ParquetFileBuilder;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
assert_eq!(
|
||||
UnableToCompactPartitionFilter::new(10).to_string(),
|
||||
"unable_to_compact"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_empty() {
|
||||
let filter = UnableToCompactPartitionFilter::new(10);
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().with_partition_id(1).build());
|
||||
let err = filter.apply(&p_info, &[]).await.unwrap_err();
|
||||
assert_eq!(err.classify(), ErrorKind::OutOfMemory);
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"partition 1 has overlapped files that exceed max compact size limit 10. The may happen if a large amount of data has the same timestamp"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_not_empty() {
|
||||
let filter = UnableToCompactPartitionFilter::new(10);
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().with_partition_id(1).build());
|
||||
let f1 = ParquetFileBuilder::new(1).with_file_size_bytes(7).build();
|
||||
assert!(filter.apply(&p_info, &[f1]).await.unwrap());
|
||||
}
|
||||
}
|
|
@ -55,6 +55,7 @@ impl RoundInfoSource for LoggingRoundInfoWrapper {
|
|||
#[derive(Debug)]
|
||||
pub struct LevelBasedRoundInfo {
|
||||
pub max_num_files_per_plan: usize,
|
||||
pub max_total_file_size_per_plan: usize,
|
||||
}
|
||||
|
||||
impl Display for LevelBasedRoundInfo {
|
||||
|
@ -63,9 +64,10 @@ impl Display for LevelBasedRoundInfo {
|
|||
}
|
||||
}
|
||||
impl LevelBasedRoundInfo {
|
||||
pub fn new(max_num_files_per_plan: usize) -> Self {
|
||||
pub fn new(max_num_files_per_plan: usize, max_total_file_size_per_plan: usize) -> Self {
|
||||
Self {
|
||||
max_num_files_per_plan,
|
||||
max_total_file_size_per_plan,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,6 +117,7 @@ impl RoundInfoSource for LevelBasedRoundInfo {
|
|||
return Ok(Arc::new(RoundInfo::ManySmallFiles {
|
||||
start_level,
|
||||
max_num_files_to_group: self.max_num_files_per_plan,
|
||||
max_total_file_size_to_group: self.max_total_file_size_per_plan,
|
||||
}));
|
||||
}
|
||||
|
||||
|
@ -221,6 +224,7 @@ mod tests {
|
|||
// max 2 files per plan
|
||||
let round_info = LevelBasedRoundInfo {
|
||||
max_num_files_per_plan: 2,
|
||||
max_total_file_size_per_plan: 1000,
|
||||
};
|
||||
|
||||
// f1 and f2 are not over limit
|
||||
|
|
|
@ -61,6 +61,7 @@ mod tests {
|
|||
let round_info = RoundInfo::ManySmallFiles {
|
||||
start_level: CompactionLevel::Initial,
|
||||
max_num_files_to_group: 2,
|
||||
max_total_file_size_to_group: 100,
|
||||
};
|
||||
let split = ManyFilesRoundSplit::new();
|
||||
|
||||
|
|
|
@ -0,0 +1,634 @@
|
|||
use std::collections::VecDeque;
|
||||
|
||||
use data_types::{CompactionLevel, ParquetFile, Timestamp};
|
||||
|
||||
use crate::components::{
|
||||
divide_initial::multiple_branches::order_files,
|
||||
files_split::{target_level_split::TargetLevelSplit, FilesSplit},
|
||||
};
|
||||
|
||||
/// Return (`[files_to_compact]`, `[files_to_keep]`) of given files
|
||||
/// such that `files_to_compact` are files to compact that under max_compact_size limit
|
||||
/// and `files_to_keep` are the rest of the files that will be considered to compact in next round
|
||||
///
|
||||
/// To deduplicate data correctly, we need to select start-level files in their max_l0_created_at order
|
||||
/// and they must be compacted with overlapped files in target level. See example below for the
|
||||
/// correlation between created order and overlapped time ranges of files
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// Input Files: three L0 and threee L1 files. The ID after the dot is the order the files are created
|
||||
/// |---L0.1---| |---L0.3---| |---L0.2---| Note that L0.2 is created BEFORE L0.3 but has LATER time range
|
||||
/// |---L1.1---| |---L1.2---| |---L1.3---|
|
||||
///
|
||||
/// Output of files_to_compact: only 3 possible choices:
|
||||
/// 1. Smallest compacting set: L0.1 + L1.1
|
||||
/// 2. Medium size compacting set: L0.1 + L1.1 + L0.2 + L1.2 + L1.3
|
||||
/// Note that L1.2 overlaps with the time range of L0.1 + L0.2 and must be included here
|
||||
/// 3. Largest compacting set: All input files
|
||||
///
|
||||
pub fn limit_files_to_compact(
|
||||
max_compact_size: usize,
|
||||
files: Vec<data_types::ParquetFile>,
|
||||
target_level: CompactionLevel,
|
||||
) -> (Vec<ParquetFile>, Vec<ParquetFile>) {
|
||||
// panic if not all files are either in target level or start level
|
||||
let start_level = target_level.prev();
|
||||
assert!(files
|
||||
.iter()
|
||||
.all(|f| f.compaction_level == target_level || f.compaction_level == start_level));
|
||||
|
||||
// Get start-level and target-level files
|
||||
let len = files.len();
|
||||
let split = TargetLevelSplit::new();
|
||||
let (start_level_files, mut target_level_files) = split.apply(files, start_level);
|
||||
|
||||
// Order start-level files by to group the files to commpact them correctly
|
||||
let start_level_files = order_files(start_level_files, &start_level);
|
||||
let mut start_level_files = start_level_files.iter().collect::<VecDeque<_>>();
|
||||
|
||||
// Go over start-level files and find overlapped files in target level
|
||||
let mut start_level_files_to_compact: Vec<ParquetFile> = Vec::new();
|
||||
let mut target_level_files_to_compact = Vec::new();
|
||||
let mut files_to_keep = Vec::new();
|
||||
let mut total_size = 0;
|
||||
while let Some(file) = start_level_files.pop_front() {
|
||||
// A start-level file, if compacted, must be compacted with all of its overlapped target-level files.
|
||||
// Thus compute the size needed before deciding to compact this file and its overlaps or not
|
||||
|
||||
// Time range of start_level_files_to_compact plus this file
|
||||
let (min_time, max_time) = time_range(file, &start_level_files_to_compact);
|
||||
|
||||
// Get all target-level files that overlaps with the time range and not yet in target_level_files_to_compact
|
||||
let overlapped_files: Vec<&ParquetFile> = target_level_files
|
||||
.iter()
|
||||
.filter(|f| f.overlaps_time_range(min_time, max_time))
|
||||
.filter(|f| !target_level_files_to_compact.iter().any(|x| x == *f))
|
||||
.collect();
|
||||
|
||||
// Size of the file and its overlapped files
|
||||
let size = file.file_size_bytes
|
||||
+ overlapped_files
|
||||
.iter()
|
||||
.map(|f| f.file_size_bytes)
|
||||
.sum::<i64>();
|
||||
|
||||
// If total size is under limit, add this file and its overlapped files to files_to_compact
|
||||
if total_size + size <= max_compact_size as i64 {
|
||||
start_level_files_to_compact.push(file.clone());
|
||||
target_level_files_to_compact
|
||||
.extend(overlapped_files.into_iter().cloned().collect::<Vec<_>>());
|
||||
total_size += size;
|
||||
} else {
|
||||
// Over limit, stop here
|
||||
files_to_keep.push(file.clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove all files in target_level_files_to_compact from target_level_files
|
||||
target_level_files.retain(|f| !target_level_files_to_compact.iter().any(|x| x == f));
|
||||
|
||||
// All files left in start_level_files and target_level_files are kept for next round
|
||||
target_level_files.extend(start_level_files.into_iter().cloned().collect::<Vec<_>>());
|
||||
files_to_keep.extend(target_level_files);
|
||||
|
||||
// All files in start_level_files_to_compact and target_level_files_to_compact will be compacted
|
||||
let files_to_compact = start_level_files_to_compact
|
||||
.into_iter()
|
||||
.chain(target_level_files_to_compact.into_iter())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(files_to_compact.len() + files_to_keep.len(), len);
|
||||
|
||||
(files_to_compact, files_to_keep)
|
||||
}
|
||||
|
||||
/// Return time range of the given file and the list of given files
|
||||
fn time_range(file: &ParquetFile, files: &[ParquetFile]) -> (Timestamp, Timestamp) {
|
||||
let mut min_time = file.min_time;
|
||||
let mut max_time = file.max_time;
|
||||
files.iter().for_each(|f| {
|
||||
min_time = min_time.min(f.min_time);
|
||||
max_time = max_time.max(f.max_time);
|
||||
});
|
||||
|
||||
(min_time, max_time)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor2_test_utils::{
|
||||
create_l1_files, create_overlapped_files, create_overlapped_l0_l1_files_2,
|
||||
create_overlapped_start_target_files, format_files, format_files_split,
|
||||
};
|
||||
use data_types::CompactionLevel;
|
||||
|
||||
use crate::components::split_or_compact::files_to_compact::limit_files_to_compact;
|
||||
|
||||
const MAX_SIZE: usize = 100;
|
||||
|
||||
#[test]
|
||||
fn test_compact_empty() {
|
||||
let files = vec![];
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::Initial);
|
||||
assert!(files_to_compact.is_empty());
|
||||
assert!(files_to_keep.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_compact_wrong_target_level() {
|
||||
// all L1 files
|
||||
let files = create_l1_files(1);
|
||||
|
||||
// Target is L0 while all files are in L1 --> panic
|
||||
let (_files_to_compact, _files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::Initial);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_compact_files_three_level_files() {
|
||||
// Three level files
|
||||
let files = create_overlapped_files();
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0 "
|
||||
- "L0.2[650,750] 1b |-L0.2-| "
|
||||
- "L0.1[450,620] 1b |----L0.1-----| "
|
||||
- "L0.3[800,900] 100b |-L0.3-| "
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |L1.13-| "
|
||||
- "L1.12[400,500] 1b |L1.12-| "
|
||||
- "L1.11[250,350] 1b |L1.11-| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 1b |L2.21-| "
|
||||
- "L2.22[200,300] 1b |L2.22-| "
|
||||
"###
|
||||
);
|
||||
|
||||
// panic because it only handle at most 2 levels next to each other
|
||||
let (_files_to_compact, _files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::FileNonOverlapped);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files_no_limit() {
|
||||
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// size limit > total size --> files to compact = all L0s and overalapped L1s
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 5 + 1, files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 5);
|
||||
assert_eq!(files_to_keep.len(), 0);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "files to keep:"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files_limit_too_small() {
|
||||
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// size limit too small to compact anything
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE, files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 0);
|
||||
assert_eq!(files_to_keep.len(), 5);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "files to keep:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files_limit() {
|
||||
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// size limit < total size --> only enough to compact L0.1 with L1.12 and L1.13
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 3, files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 3);
|
||||
assert_eq!(files_to_keep.len(), 2);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[450,620] |-------------------L0.1--------------------| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |---------L1.13----------| "
|
||||
- "L1.12[400,500] |---------L1.12----------| "
|
||||
- "files to keep:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-------------L0.2-------------| "
|
||||
- "L0.3[800,900] |-------------L0.3-------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files_limit_2() {
|
||||
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// size limit < total size --> only enough to compact L0.1, L0.2 with L1.12 and L1.13
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 4, files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 4);
|
||||
assert_eq!(files_to_keep.len(), 1);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[450,620] |----------------L0.1----------------| "
|
||||
- "L0.2[650,750] |--------L0.2--------| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |-------L1.13--------| "
|
||||
- "L1.12[400,500] |-------L1.12--------| "
|
||||
- "files to keep:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.3[800,900] |-------------------------------------L0.3-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files_limit_3() {
|
||||
let files = create_overlapped_start_target_files(MAX_SIZE as i64, CompactionLevel::Initial);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[550,650] |----L0.2----| "
|
||||
- "L0.1[150,250] |----L0.1----| "
|
||||
- "L0.3[350,450] |----L0.3----| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.12[300,400] |---L1.12----| "
|
||||
- "L1.13[500,600] |---L1.13----| "
|
||||
- "L1.11[100,200] |---L1.11----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// There are only 3 choices for compacting:
|
||||
// 1. Smallest set: L0.1 with L1.11
|
||||
// 2. Medium size set: L0.1, L0.2 with L1.11, L1.12, L1.13
|
||||
// 3. All files: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
|
||||
|
||||
// --------------------
|
||||
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11
|
||||
let (files_to_compact, files_to_keep) = limit_files_to_compact(
|
||||
MAX_SIZE * 3,
|
||||
files.clone(),
|
||||
CompactionLevel::FileNonOverlapped,
|
||||
);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 2);
|
||||
assert_eq!(files_to_keep.len(), 4);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[150,250] |-----------------------L0.1------------------------| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.11[100,200] |-----------------------L1.11-----------------------| "
|
||||
- "files to keep:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[550,650] |--------L0.2--------| "
|
||||
- "L0.3[350,450] |--------L0.3--------| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.12[300,400] |-------L1.12--------| "
|
||||
- "L1.13[500,600] |-------L1.13--------| "
|
||||
"###
|
||||
);
|
||||
|
||||
// --------------------
|
||||
// size limit = MAX_SIZE * 4 to force the first choice, L0.1 with L1.11, becasue it still not enough to for second choice
|
||||
let (files_to_compact, files_to_keep) = limit_files_to_compact(
|
||||
MAX_SIZE * 4,
|
||||
files.clone(),
|
||||
CompactionLevel::FileNonOverlapped,
|
||||
);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 2);
|
||||
assert_eq!(files_to_keep.len(), 4);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[150,250] |-----------------------L0.1------------------------| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.11[100,200] |-----------------------L1.11-----------------------| "
|
||||
- "files to keep:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[550,650] |--------L0.2--------| "
|
||||
- "L0.3[350,450] |--------L0.3--------| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.12[300,400] |-------L1.12--------| "
|
||||
- "L1.13[500,600] |-------L1.13--------| "
|
||||
"###
|
||||
);
|
||||
|
||||
// --------------------
|
||||
// size limit = MAX_SIZE * 5 to force the second choice, L0.1, L0.2 with L1.11, L1.12, L1.13
|
||||
let (files_to_compact, files_to_keep) = limit_files_to_compact(
|
||||
MAX_SIZE * 5,
|
||||
files.clone(),
|
||||
CompactionLevel::FileNonOverlapped,
|
||||
);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 5);
|
||||
assert_eq!(files_to_keep.len(), 1);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[150,250] |----L0.1----| "
|
||||
- "L0.2[550,650] |----L0.2----| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.11[100,200] |---L1.11----| "
|
||||
- "L1.12[300,400] |---L1.12----| "
|
||||
- "L1.13[500,600] |---L1.13----| "
|
||||
- "files to keep:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.3[350,450] |-------------------------------------L0.3-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
|
||||
// --------------------
|
||||
// size limit >= total size to force the third choice compacting everything: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 6, files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 6);
|
||||
assert_eq!(files_to_keep.len(), 0);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[150,250] |----L0.1----| "
|
||||
- "L0.2[550,650] |----L0.2----| "
|
||||
- "L0.3[350,450] |----L0.3----| "
|
||||
- "L1, all files 100b "
|
||||
- "L1.11[100,200] |---L1.11----| "
|
||||
- "L1.12[300,400] |---L1.12----| "
|
||||
- "L1.13[500,600] |---L1.13----| "
|
||||
- "files to keep:"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files_limit_start_level_1() {
|
||||
let files = create_overlapped_start_target_files(
|
||||
MAX_SIZE as i64,
|
||||
CompactionLevel::FileNonOverlapped,
|
||||
);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 100b "
|
||||
- "L1.2[550,650] |----L1.2----| "
|
||||
- "L1.1[150,250] |----L1.1----| "
|
||||
- "L1.3[350,450] |----L1.3----| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.12[300,400] |---L2.12----| "
|
||||
- "L2.13[500,600] |---L2.13----| "
|
||||
- "L2.11[100,200] |---L2.11----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// There are only 3 choices for compacting:
|
||||
// 1. Smallest set: L1.1 with L2.11
|
||||
// 2. Medium size set: L1.1, L1.3 with L1.11, L1.12,
|
||||
// 3. All files: L1.1, L1.2, L1.3 with L2.11, L2.12, L2.13
|
||||
|
||||
// --------------------
|
||||
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 3, files.clone(), CompactionLevel::Final);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 2);
|
||||
assert_eq!(files_to_keep.len(), 4);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.1[150,250] |-----------------------L1.1------------------------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.11[100,200] |-----------------------L2.11-----------------------| "
|
||||
- "files to keep:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.3[350,450] |--------L1.3--------| "
|
||||
- "L1.2[550,650] |--------L1.2--------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.12[300,400] |-------L2.12--------| "
|
||||
- "L2.13[500,600] |-------L2.13--------| "
|
||||
"###
|
||||
);
|
||||
|
||||
// --------------------
|
||||
// size limit = MAX_SIZE * 3 to force the first choice, L0.1 with L1.11, becasue it still not enough to for second choice
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 3, files.clone(), CompactionLevel::Final);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 2);
|
||||
assert_eq!(files_to_keep.len(), 4);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.1[150,250] |-----------------------L1.1------------------------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.11[100,200] |-----------------------L2.11-----------------------| "
|
||||
- "files to keep:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.3[350,450] |--------L1.3--------| "
|
||||
- "L1.2[550,650] |--------L1.2--------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.12[300,400] |-------L2.12--------| "
|
||||
- "L2.13[500,600] |-------L2.13--------| "
|
||||
"###
|
||||
);
|
||||
|
||||
// --------------------
|
||||
// size limit = MAX_SIZE * 5 to force the second choice, L0.1, L0.2 with L1.11, L1.12, L1.13
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 5, files.clone(), CompactionLevel::Final);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 4);
|
||||
assert_eq!(files_to_keep.len(), 2);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.1[150,250] |--------L1.1--------| "
|
||||
- "L1.3[350,450] |--------L1.3--------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.11[100,200] |-------L2.11--------| "
|
||||
- "L2.12[300,400] |-------L2.12--------| "
|
||||
- "files to keep:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.2[550,650] |-----------------------L1.2------------------------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.13[500,600] |-----------------------L2.13-----------------------| "
|
||||
"###
|
||||
);
|
||||
|
||||
// --------------------
|
||||
// size limit >= total size to force the third choice compacting everything: L0.1, L0.2, L0.3 with L1.11, L1.12, L1.13
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(MAX_SIZE * 6, files, CompactionLevel::Final);
|
||||
|
||||
assert_eq!(files_to_compact.len(), 6);
|
||||
assert_eq!(files_to_keep.len(), 0);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact:", &files_to_compact, "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.1[150,250] |----L1.1----| "
|
||||
- "L1.3[350,450] |----L1.3----| "
|
||||
- "L1.2[550,650] |----L1.2----| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.11[100,200] |---L2.11----| "
|
||||
- "L2.12[300,400] |---L2.12----| "
|
||||
- "L2.13[500,600] |---L2.13----| "
|
||||
- "files to keep:"
|
||||
"###
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,234 @@
|
|||
use std::collections::VecDeque;
|
||||
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use itertools::Itertools;
|
||||
use observability_deps::tracing::debug;
|
||||
|
||||
use crate::{
|
||||
components::files_split::{target_level_split::TargetLevelSplit, FilesSplit},
|
||||
file_classification::FileToSplit,
|
||||
};
|
||||
|
||||
/// Return (`[files_to_split]`, `[files_not_to_split]`) of given files
|
||||
/// such that `files_to_split` are files in start-level that overlaps with more than one file in target_level.
|
||||
///
|
||||
/// Only split files in start-level if the total size greater than max_compact_size
|
||||
///
|
||||
/// Example:
|
||||
/// . Input:
|
||||
/// |---L0.1---| |--L0.2--|
|
||||
/// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
///
|
||||
/// L0.1 overlaps with 2 level-1 files (L1.2, L1.3) and should be split into 2 files, one overlaps with L1.2
|
||||
/// and one oerlaps with L1.3
|
||||
///
|
||||
/// . Output:
|
||||
/// . files_to_split = [L0.1]
|
||||
/// . files_not_to_split = [L1.1, L1.2, L1.3, L0.2] which is the rest of the files
|
||||
///
|
||||
/// Since a start-level file needs to compact with all of its overlapped target-level files to retain the invariant that
|
||||
/// all files in target level are non-overlapped, splitting start-level files is to reduce the number of overlapped files
|
||||
/// at the target level and avoid compacting too many files in the next compaction cycle.
|
||||
/// To achieve this goal, a start-level file should be split to overlap with at most one target-level file. This enables the
|
||||
/// minimum set of compacting files to 2 files: a start-level file and an overlapped target-level file.
|
||||
pub fn identify_files_to_split(
|
||||
files: Vec<data_types::ParquetFile>,
|
||||
target_level: CompactionLevel,
|
||||
) -> (Vec<FileToSplit>, Vec<ParquetFile>) {
|
||||
// panic if not all files are either in target level or start level
|
||||
let start_level = target_level.prev();
|
||||
assert!(files
|
||||
.iter()
|
||||
.all(|f| f.compaction_level == target_level || f.compaction_level == start_level));
|
||||
|
||||
// Get start-level and target-level files
|
||||
let len = files.len();
|
||||
let split = TargetLevelSplit::new();
|
||||
let (start_level_files, mut target_level_files) = split.apply(files, start_level);
|
||||
|
||||
// sort start_level files in their max_l0_created_at and convert it to VecDeque for pop_front
|
||||
let mut start_level_files: VecDeque<ParquetFile> = start_level_files
|
||||
.into_iter()
|
||||
.sorted_by_key(|f| f.max_l0_created_at)
|
||||
.collect();
|
||||
// sort target level files in their min_time
|
||||
target_level_files.sort_by_key(|f| f.min_time);
|
||||
|
||||
// Get files in start level that overlap with any file in target level
|
||||
let mut files_to_split = Vec::new();
|
||||
let mut files_not_to_split = Vec::new();
|
||||
while let Some(file) = start_level_files.pop_front() {
|
||||
// Get target_level files that overlaps with this file
|
||||
let overlapped_target_level_files: Vec<&ParquetFile> = target_level_files
|
||||
.iter()
|
||||
.filter(|f| file.overlaps(f))
|
||||
.collect();
|
||||
|
||||
// Neither split file that overlaps with only one file in target level
|
||||
// nor has a single timestamp (splitting this will lead to the same file and as a result infinite loop)
|
||||
// nor has time range = 1 (splitting this will cause panic because split_time will be min_tim/max_time which is disallowed)
|
||||
if overlapped_target_level_files.len() < 2
|
||||
|| file.min_time == file.max_time
|
||||
|| file.min_time == file.max_time - 1
|
||||
{
|
||||
files_not_to_split.push(file);
|
||||
} else {
|
||||
debug!(?file.min_time, ?file.max_time, ?file.compaction_level, "time range of file to split");
|
||||
overlapped_target_level_files
|
||||
.iter()
|
||||
.for_each(|f| debug!(?f.min_time, ?f.max_time, ?f.compaction_level, "time range of overlap file"));
|
||||
|
||||
// this files will be split, add its max time
|
||||
let split_times: Vec<i64> = overlapped_target_level_files
|
||||
.iter()
|
||||
.filter(|f| f.max_time < file.max_time)
|
||||
.map(|f| f.max_time.get())
|
||||
.dedup()
|
||||
.collect();
|
||||
|
||||
debug!(?split_times);
|
||||
|
||||
files_to_split.push(FileToSplit { file, split_times });
|
||||
}
|
||||
}
|
||||
|
||||
// keep the rest of the files for next round
|
||||
start_level_files.extend(target_level_files);
|
||||
files_not_to_split.extend(start_level_files);
|
||||
|
||||
assert_eq!(files_to_split.len() + files_not_to_split.len(), len);
|
||||
|
||||
(files_to_split, files_not_to_split)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor2_test_utils::{
|
||||
create_l1_files, create_overlapped_files, create_overlapped_l0_l1_files_2, format_files,
|
||||
format_files_split,
|
||||
};
|
||||
use data_types::CompactionLevel;
|
||||
|
||||
#[test]
|
||||
fn test_split_empty() {
|
||||
let files = vec![];
|
||||
let (files_to_split, files_not_to_split) =
|
||||
super::identify_files_to_split(files, CompactionLevel::Initial);
|
||||
assert!(files_to_split.is_empty());
|
||||
assert!(files_not_to_split.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_split_files_wrong_target_level() {
|
||||
// all L1 files
|
||||
let files = create_l1_files(1);
|
||||
|
||||
// Target is L0 while all files are in L1 --> panic
|
||||
let (_files_to_split, _files_not_to_split) =
|
||||
super::identify_files_to_split(files, CompactionLevel::Initial);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_split_files_three_level_files() {
|
||||
// Three level files
|
||||
let files = create_overlapped_files();
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0 "
|
||||
- "L0.2[650,750] 1b |-L0.2-| "
|
||||
- "L0.1[450,620] 1b |----L0.1-----| "
|
||||
- "L0.3[800,900] 100b |-L0.3-| "
|
||||
- "L1 "
|
||||
- "L1.13[600,700] 100b |L1.13-| "
|
||||
- "L1.12[400,500] 1b |L1.12-| "
|
||||
- "L1.11[250,350] 1b |L1.11-| "
|
||||
- "L2 "
|
||||
- "L2.21[0,100] 1b |L2.21-| "
|
||||
- "L2.22[200,300] 1b |L2.22-| "
|
||||
"###
|
||||
);
|
||||
|
||||
// panic because it only handle at most 2 levels next to each other
|
||||
let (_files_to_split, _files_not_to_split) =
|
||||
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_files_no_split() {
|
||||
let files = create_l1_files(1);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[600,700] |-----L1.13-----| "
|
||||
- "L1.12[400,500] |-----L1.12-----| "
|
||||
- "L1.11[250,350] |-----L1.11-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
let (files_to_split, files_not_to_split) =
|
||||
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
|
||||
assert!(files_to_split.is_empty());
|
||||
assert_eq!(files_not_to_split.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_files_split() {
|
||||
let files = create_overlapped_l0_l1_files_2(1);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 1b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 1b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
let (files_to_split, files_not_to_split) =
|
||||
super::identify_files_to_split(files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
// L0.1 that overlaps with 2 level-1 files will be split
|
||||
assert_eq!(files_to_split.len(), 1);
|
||||
|
||||
// L0.1 [450, 620] will be split at 500 (max of its overlapped L1.12)
|
||||
// The spit_times [500] means after we execute the split (in later steps), L0.1 will
|
||||
// be split into 2 files with time ranges: [450, 500] and [501, 620]. This means the first file will
|
||||
// overlap with L1.12 and the second file will overlap with L1.13
|
||||
assert_eq!(files_to_split[0].file.id.get(), 1);
|
||||
assert_eq!(files_to_split[0].split_times, vec![500]);
|
||||
|
||||
// The rest is in not-split
|
||||
assert_eq!(files_not_to_split.len(), 4);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to split:", &files_to_split.iter().map(|f| f.file.clone()).collect::<Vec<_>>(), "files not to split:", &files_not_to_split),
|
||||
@r###"
|
||||
---
|
||||
- "files to split:"
|
||||
- "L0, all files 1b "
|
||||
- "L0.1[450,620] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "files not to split:"
|
||||
- "L0, all files 1b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 1b "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use observability_deps::tracing::info;
|
||||
|
||||
use crate::{file_classification::FilesToCompactOrSplit, partition_info::PartitionInfo};
|
||||
|
||||
use super::SplitOrCompact;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LoggingSplitOrCompactWrapper<T>
|
||||
where
|
||||
T: SplitOrCompact,
|
||||
{
|
||||
inner: T,
|
||||
}
|
||||
|
||||
impl<T> LoggingSplitOrCompactWrapper<T>
|
||||
where
|
||||
T: SplitOrCompact,
|
||||
{
|
||||
pub fn new(inner: T) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Display for LoggingSplitOrCompactWrapper<T>
|
||||
where
|
||||
T: SplitOrCompact,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "display({})", self.inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> SplitOrCompact for LoggingSplitOrCompactWrapper<T>
|
||||
where
|
||||
T: SplitOrCompact,
|
||||
{
|
||||
fn apply(
|
||||
&self,
|
||||
partition_info: &PartitionInfo,
|
||||
files: Vec<ParquetFile>,
|
||||
target_level: CompactionLevel,
|
||||
) -> (FilesToCompactOrSplit, Vec<ParquetFile>) {
|
||||
let (files_to_split_or_compact, files_to_keep) =
|
||||
self.inner.apply(partition_info, files, target_level);
|
||||
|
||||
info!(
|
||||
partition_id = partition_info.partition_id.get(),
|
||||
target_level = %target_level,
|
||||
files_to_compact = files_to_split_or_compact.files_to_compact_len(),
|
||||
files_to_split = files_to_split_or_compact.files_to_split_len(),
|
||||
files_to_keep = files_to_keep.len(),
|
||||
"split or compact"
|
||||
);
|
||||
|
||||
(files_to_split_or_compact, files_to_keep)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
use std::fmt::{Debug, Display};
|
||||
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
|
||||
use crate::{file_classification::FilesToCompactOrSplit, PartitionInfo};
|
||||
|
||||
pub mod files_to_compact;
|
||||
pub mod files_to_split;
|
||||
pub mod logging;
|
||||
pub mod split_compact;
|
||||
|
||||
pub trait SplitOrCompact: Debug + Display + Send + Sync {
|
||||
/// Return (`[files_to_split_or_compact]`, `[files_to_keep]`) of given files
|
||||
/// `files_to_keep` are files that are not part of the compaction of this round but they
|
||||
/// are kept to get compacted in the next round
|
||||
fn apply(
|
||||
&self,
|
||||
partition_info: &PartitionInfo,
|
||||
files: Vec<ParquetFile>,
|
||||
target_level: CompactionLevel,
|
||||
) -> (FilesToCompactOrSplit, Vec<ParquetFile>);
|
||||
}
|
|
@ -0,0 +1,258 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
|
||||
use crate::{file_classification::FilesToCompactOrSplit, partition_info::PartitionInfo};
|
||||
|
||||
use super::{
|
||||
files_to_compact::limit_files_to_compact, files_to_split::identify_files_to_split,
|
||||
SplitOrCompact,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SplitCompact {
|
||||
max_compact_size: usize,
|
||||
}
|
||||
|
||||
impl SplitCompact {
|
||||
pub fn new(max_compact_size: usize) -> Self {
|
||||
Self { max_compact_size }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SplitCompact {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "split_or_compact({})", self.max_compact_size)
|
||||
}
|
||||
}
|
||||
|
||||
impl SplitOrCompact for SplitCompact {
|
||||
/// Return (`[files_to_split_or_compact]`, `[files_to_keep]`) of given files
|
||||
///
|
||||
/// Verify if the the give files are over the max_compact_size limit
|
||||
/// If so, find start-level files that can be split to reduce the number of overlapped files that must be compact in one run.
|
||||
/// If split is not needed, pick files to compact that under max_compact_size limit
|
||||
fn apply(
|
||||
&self,
|
||||
_partition_info: &PartitionInfo,
|
||||
files: Vec<ParquetFile>,
|
||||
target_level: CompactionLevel,
|
||||
) -> (FilesToCompactOrSplit, Vec<ParquetFile>) {
|
||||
// Compact all in one run if total size is less than max_compact_size
|
||||
let total_size: i64 = files.iter().map(|f| f.file_size_bytes).sum();
|
||||
if total_size as usize <= self.max_compact_size {
|
||||
return (FilesToCompactOrSplit::FilesToCompact(files), vec![]);
|
||||
}
|
||||
|
||||
// See if split is needed
|
||||
let (files_to_split, files_not_to_split) = identify_files_to_split(files, target_level);
|
||||
|
||||
if !files_to_split.is_empty() {
|
||||
// These files must be split before further compaction
|
||||
(
|
||||
FilesToCompactOrSplit::FilesToSplit(files_to_split),
|
||||
files_not_to_split,
|
||||
)
|
||||
} else {
|
||||
// No split is needed, need to limit number of files to compact to stay under total size limit
|
||||
let (files_to_compact, files_to_keep) =
|
||||
limit_files_to_compact(self.max_compact_size, files_not_to_split, target_level);
|
||||
|
||||
(
|
||||
FilesToCompactOrSplit::FilesToCompact(files_to_compact),
|
||||
files_to_keep,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use compactor2_test_utils::{
|
||||
create_overlapped_l0_l1_files_2, create_overlapped_l1_l2_files_2, format_files,
|
||||
format_files_split,
|
||||
};
|
||||
use data_types::CompactionLevel;
|
||||
|
||||
use crate::{
|
||||
components::split_or_compact::{split_compact::SplitCompact, SplitOrCompact},
|
||||
test_utils::PartitionInfoBuilder,
|
||||
};
|
||||
|
||||
const MAX_SIZE: usize = 100;
|
||||
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
let files = vec![];
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().build());
|
||||
let split_compact = SplitCompact::new(MAX_SIZE);
|
||||
let (files_to_compact_or_split, files_to_keep) =
|
||||
split_compact.apply(&p_info, files, CompactionLevel::Initial);
|
||||
|
||||
assert!(files_to_compact_or_split.is_empty());
|
||||
assert!(files_to_keep.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_too_small_to_compact() {
|
||||
let files = create_overlapped_l1_l2_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----|"
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "L1.11[250,350] |----L1.11-----| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.22[200,300] |----L2.22-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().build());
|
||||
let split_compact = SplitCompact::new(MAX_SIZE);
|
||||
let (files_to_compact_or_split, files_to_keep) =
|
||||
split_compact.apply(&p_info, files, CompactionLevel::Final);
|
||||
// nothing to compact or split
|
||||
assert!(files_to_compact_or_split.is_empty());
|
||||
assert_eq!(files_to_keep.len(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files_no_limit() {
|
||||
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// size limit > total size --> compact all
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().build());
|
||||
let split_compact = SplitCompact::new(MAX_SIZE * 6 + 1);
|
||||
let (files_to_compact_or_split, files_to_keep) =
|
||||
split_compact.apply(&p_info, files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact_or_split.files_to_compact_len(), 5);
|
||||
assert!(files_to_keep.is_empty());
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact", &files_to_compact_or_split.files_to_compact() , "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- files to compact
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "files to keep:"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_files() {
|
||||
let files = create_overlapped_l0_l1_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.1[450,620] |----------L0.1-----------| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// hit size limit -> split start_level files that overlap with more than 1 target_level files
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().build());
|
||||
let split_compact = SplitCompact::new(MAX_SIZE);
|
||||
let (files_to_compact_or_split, files_to_keep) =
|
||||
split_compact.apply(&p_info, files, CompactionLevel::FileNonOverlapped);
|
||||
|
||||
assert_eq!(files_to_compact_or_split.files_to_split_len(), 1);
|
||||
assert_eq!(files_to_keep.len(), 4);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact or split:", &files_to_compact_or_split.files_to_split(), "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact or split:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.1[450,620] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "files to keep:"
|
||||
- "L0, all files 100b "
|
||||
- "L0.2[650,750] |-----L0.2-----| "
|
||||
- "L0.3[800,900] |-----L0.3-----|"
|
||||
- "L1, all files 100b "
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "L1.13[600,700] |----L1.13-----| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_files() {
|
||||
let files = create_overlapped_l1_l2_files_2(MAX_SIZE as i64);
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |----L1.13-----|"
|
||||
- "L1.12[400,500] |----L1.12-----| "
|
||||
- "L1.11[250,350] |----L1.11-----| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.22[200,300] |----L2.22-----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// hit size limit and nthign to split --> limit number if files to compact
|
||||
let p_info = Arc::new(PartitionInfoBuilder::new().build());
|
||||
let split_compact = SplitCompact::new(MAX_SIZE * 3);
|
||||
let (files_to_compact_or_split, files_to_keep) =
|
||||
split_compact.apply(&p_info, files, CompactionLevel::Final);
|
||||
|
||||
assert_eq!(files_to_compact_or_split.files_to_compact_len(), 3);
|
||||
assert_eq!(files_to_keep.len(), 1);
|
||||
|
||||
// See layout of 2 set of files
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files_split("files to compact or split:", &files_to_compact_or_split.files_to_compact() , "files to keep:", &files_to_keep),
|
||||
@r###"
|
||||
---
|
||||
- "files to compact or split:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.11[250,350] |---------L1.11----------| "
|
||||
- "L1.12[400,500] |---------L1.12----------| "
|
||||
- "L2, all files 100b "
|
||||
- "L2.22[200,300] |---------L2.22----------| "
|
||||
- "files to keep:"
|
||||
- "L1, all files 100b "
|
||||
- "L1.13[600,700] |------------------------------------L1.13-------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
}
|
|
@ -9,7 +9,9 @@ use tracker::InstrumentedAsyncSemaphore;
|
|||
use crate::{
|
||||
components::{scratchpad::Scratchpad, Components},
|
||||
error::DynError,
|
||||
file_classification::{FileToSplit, FilesToCompactOrSplit},
|
||||
partition_info::PartitionInfo,
|
||||
PlanIR,
|
||||
};
|
||||
|
||||
/// Tries to compact all eligible partitions, up to
|
||||
|
@ -116,18 +118,21 @@ async fn compact_partition(
|
|||
///
|
||||
/// The high level flow is:
|
||||
///
|
||||
/// . Mutiple rounds, each round process mutltiple branches. Each branch inlcudes at most 200 files
|
||||
/// . Each branch will compact files lowest level (aka start-level) into its next level (aka target-level):
|
||||
/// - Many L0s into fewer and larger L0s. Start-level = target-level = 0
|
||||
/// - Many L1s into fewer and larger L1s. Start-level = target-level = 1
|
||||
/// - (L0s & L1s) to L1s if there are L0s. Start-level = 0, target-level = 1
|
||||
/// - (L1s & L2s) to L2s if no L0s. Start-level = 1, target-level = 2
|
||||
/// . Mutiple rounds, each round process mutltiple branches. Each branch includes at most 200 files
|
||||
/// . Each branch will compact files lowest level (aka start-level) into its next level (aka target-level), either:
|
||||
/// - Compact many L0s into fewer and larger L0s. Start-level = target-level = 0
|
||||
/// - Compact many L1s into fewer and larger L1s. Start-level = target-level = 1
|
||||
/// - Compact (L0s & L1s) to L1s if there are L0s. Start-level = 0, target-level = 1
|
||||
/// - Compact (L1s & L2s) to L2s if no L0s. Start-level = 1, target-level = 2
|
||||
/// - Split L0s each of which overlaps with more than 1 L1s into many L0s, each overlaps with at most one L1 files
|
||||
/// - Split L1s each of which overlaps with more than 1 L2s into many L1s, each overlaps with at most one L2 files
|
||||
/// . Each branch does find non-overlaps and upgragde files to avoid unecessary recompacting.
|
||||
/// The actually split files:
|
||||
/// 1. files_to _keep: do not compact these files because they are already higher than target level
|
||||
/// 2. files_to_upgrade: upgrade this initial-level files to target level because they are not overlap with
|
||||
/// any target-level and initial-level files and large enough (> desired max size)
|
||||
/// 3. files_to_compact: the rest of the files that must be compacted
|
||||
/// 3. files_to_compact_or_split.: this is either files to compact or split and will be compacted or split accordingly
|
||||
|
||||
///
|
||||
/// Example: 4 files: two L0s, two L1s and one L2
|
||||
/// Input:
|
||||
|
@ -202,24 +207,20 @@ async fn try_compact_partition(
|
|||
.file_classifier
|
||||
.classify(&partition_info, &round_info, branch);
|
||||
|
||||
// Cannot run this plan and skip this partition because of over limit of input num_files or size.
|
||||
// The partition_resource_limit_filter will throw an error if one of the limits hit and will lead
|
||||
// to the partition is added to the `skipped_compactions` catalog table for us to not bother
|
||||
// compacting it again.
|
||||
// TODO: After https://github.com/influxdata/idpe/issues/17090 is iplemented (aka V3), we will
|
||||
// split files to smaller branches and aslo compact L0s into fewer L0s to deal with all kinds
|
||||
// of conidtions even with limited resource. Then we will remove this resrouce limit check.
|
||||
if !components
|
||||
.partition_resource_limit_filter
|
||||
.apply(&partition_info, &file_classification.files_to_compact)
|
||||
.apply(
|
||||
&partition_info,
|
||||
&file_classification.files_to_compact_or_split.files(),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Compact
|
||||
let created_file_params = run_compaction_plan(
|
||||
&file_classification.files_to_compact,
|
||||
let created_file_params = run_plans(
|
||||
&file_classification.files_to_compact_or_split,
|
||||
&partition_info,
|
||||
&components,
|
||||
file_classification.target_level,
|
||||
|
@ -237,10 +238,11 @@ async fn try_compact_partition(
|
|||
|
||||
// Update the catalog to reflect the newly created files, soft delete the compacted files and
|
||||
// update the upgraded files
|
||||
let files_to_delete = file_classification.files_to_compact_or_split.files();
|
||||
let (created_files, upgraded_files) = update_catalog(
|
||||
Arc::clone(&components),
|
||||
partition_id,
|
||||
file_classification.files_to_compact,
|
||||
files_to_delete,
|
||||
file_classification.files_to_upgrade,
|
||||
created_file_params,
|
||||
file_classification.target_level,
|
||||
|
@ -257,6 +259,41 @@ async fn try_compact_partition(
|
|||
}
|
||||
}
|
||||
|
||||
/// Compact of split give files
|
||||
async fn run_plans(
|
||||
files: &FilesToCompactOrSplit,
|
||||
partition_info: &Arc<PartitionInfo>,
|
||||
components: &Arc<Components>,
|
||||
target_level: CompactionLevel,
|
||||
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
scratchpad_ctx: &mut dyn Scratchpad,
|
||||
) -> Result<Vec<ParquetFileParams>, DynError> {
|
||||
match files {
|
||||
FilesToCompactOrSplit::FilesToCompact(files) => {
|
||||
run_compaction_plan(
|
||||
files,
|
||||
partition_info,
|
||||
components,
|
||||
target_level,
|
||||
job_semaphore,
|
||||
scratchpad_ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
FilesToCompactOrSplit::FilesToSplit(files) => {
|
||||
run_split_plans(
|
||||
files,
|
||||
partition_info,
|
||||
components,
|
||||
target_level,
|
||||
job_semaphore,
|
||||
scratchpad_ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compact `files` into a new parquet file of the the given target_level
|
||||
async fn run_compaction_plan(
|
||||
files: &[ParquetFile],
|
||||
|
@ -285,8 +322,89 @@ async fn run_compaction_plan(
|
|||
let plan_ir =
|
||||
components
|
||||
.ir_planner
|
||||
.plan(branch_inpad, Arc::clone(partition_info), target_level);
|
||||
.compact_plan(branch_inpad, Arc::clone(partition_info), target_level);
|
||||
|
||||
execute_plan(
|
||||
plan_ir,
|
||||
partition_info,
|
||||
components,
|
||||
target_level,
|
||||
job_semaphore,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Split each of given files into multiple files
|
||||
async fn run_split_plans(
|
||||
files_to_split: &[FileToSplit],
|
||||
partition_info: &Arc<PartitionInfo>,
|
||||
components: &Arc<Components>,
|
||||
target_level: CompactionLevel,
|
||||
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
scratchpad_ctx: &mut dyn Scratchpad,
|
||||
) -> Result<Vec<ParquetFileParams>, DynError> {
|
||||
if files_to_split.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let mut created_file_params = vec![];
|
||||
for file_to_split in files_to_split {
|
||||
let x = run_split_plan(
|
||||
file_to_split,
|
||||
partition_info,
|
||||
components,
|
||||
target_level,
|
||||
Arc::clone(&job_semaphore),
|
||||
scratchpad_ctx,
|
||||
)
|
||||
.await?;
|
||||
created_file_params.extend(x);
|
||||
}
|
||||
|
||||
Ok(created_file_params)
|
||||
}
|
||||
|
||||
// Split a given file into multiple files
|
||||
async fn run_split_plan(
|
||||
file_to_split: &FileToSplit,
|
||||
partition_info: &Arc<PartitionInfo>,
|
||||
components: &Arc<Components>,
|
||||
target_level: CompactionLevel,
|
||||
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
scratchpad_ctx: &mut dyn Scratchpad,
|
||||
) -> Result<Vec<ParquetFileParams>, DynError> {
|
||||
// stage files
|
||||
let input_path = (&file_to_split.file).into();
|
||||
let input_uuids_inpad = scratchpad_ctx.load_to_scratchpad(&[input_path]).await;
|
||||
let file_inpad = ParquetFile {
|
||||
object_store_id: input_uuids_inpad[0],
|
||||
..file_to_split.file.clone()
|
||||
};
|
||||
|
||||
let plan_ir = components.ir_planner.split_plan(
|
||||
file_inpad,
|
||||
file_to_split.split_times.clone(),
|
||||
Arc::clone(partition_info),
|
||||
target_level,
|
||||
);
|
||||
|
||||
execute_plan(
|
||||
plan_ir,
|
||||
partition_info,
|
||||
components,
|
||||
target_level,
|
||||
job_semaphore,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn execute_plan(
|
||||
plan_ir: PlanIR,
|
||||
partition_info: &Arc<PartitionInfo>,
|
||||
components: &Arc<Components>,
|
||||
target_level: CompactionLevel,
|
||||
job_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
) -> Result<Vec<ParquetFileParams>, DynError> {
|
||||
let create = {
|
||||
// draw semaphore BEFORE creating the DataFusion plan and drop it directly AFTER finishing the
|
||||
// DataFusion computation (but BEFORE doing any additional external IO).
|
||||
|
|
|
@ -12,9 +12,8 @@ pub struct FileClassification {
|
|||
/// The target level of file resulting from compaction
|
||||
pub target_level: CompactionLevel,
|
||||
|
||||
/// Files which should be compacted into a new single parquet
|
||||
/// file, often the small and/or overlapped files
|
||||
pub files_to_compact: Vec<ParquetFile>,
|
||||
/// Decision on what files should be compacted or split. See [`FilesToCompactOrSplit`] for more details.
|
||||
pub files_to_compact_or_split: FilesToCompactOrSplit,
|
||||
|
||||
/// Non-overlapped files that should be upgraded to the target
|
||||
/// level without rewriting (for example they are of sufficient
|
||||
|
@ -25,3 +24,99 @@ pub struct FileClassification {
|
|||
/// non-overlapped or higher-target-level files
|
||||
pub files_to_keep: Vec<ParquetFile>,
|
||||
}
|
||||
|
||||
impl FileClassification {
|
||||
pub fn files_to_compact_len(&self) -> usize {
|
||||
match &self.files_to_compact_or_split {
|
||||
FilesToCompactOrSplit::FilesToCompact(files) => files.len(),
|
||||
FilesToCompactOrSplit::FilesToSplit(_) => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn files_to_split_len(&self) -> usize {
|
||||
match &self.files_to_compact_or_split {
|
||||
FilesToCompactOrSplit::FilesToCompact(_files) => 0,
|
||||
FilesToCompactOrSplit::FilesToSplit(files) => files.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Files to compact or to split
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum FilesToCompactOrSplit {
|
||||
/// These files should be compacted together, ideally forming a single output file.
|
||||
/// Due to constraints such as the maximum desired output file size and the "leading edge" optimization
|
||||
/// `FilesToCompact` may actually produce multiple output files.
|
||||
FilesToCompact(Vec<ParquetFile>),
|
||||
/// The input files should be split into multiple output files, at the specified times
|
||||
FilesToSplit(Vec<FileToSplit>),
|
||||
}
|
||||
|
||||
impl FilesToCompactOrSplit {
|
||||
// Return true if thelist is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::FilesToCompact(files) => files.is_empty(),
|
||||
Self::FilesToSplit(files) => files.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return lentgh of files to compact
|
||||
pub fn files_to_compact_len(&self) -> usize {
|
||||
match self {
|
||||
Self::FilesToCompact(files) => files.len(),
|
||||
Self::FilesToSplit(_) => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return lentgh of files to split
|
||||
pub fn files_to_split_len(&self) -> usize {
|
||||
match self {
|
||||
Self::FilesToCompact(_) => 0,
|
||||
Self::FilesToSplit(files) => files.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return files to compact
|
||||
pub fn files_to_compact(&self) -> Vec<ParquetFile> {
|
||||
match self {
|
||||
Self::FilesToCompact(files) => files.clone(),
|
||||
Self::FilesToSplit(_) => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Return files to split
|
||||
pub fn files_to_split(&self) -> Vec<ParquetFile> {
|
||||
match self {
|
||||
Self::FilesToCompact(_) => vec![],
|
||||
Self::FilesToSplit(files) => {
|
||||
let files: Vec<ParquetFile> = files.iter().map(|f| f.file.clone()).collect();
|
||||
files
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return files of either type
|
||||
pub fn files(&self) -> Vec<ParquetFile> {
|
||||
match self {
|
||||
Self::FilesToCompact(files) => files.clone(),
|
||||
Self::FilesToSplit(files) => files.iter().map(|f| f.file.clone()).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
// Returns target level of the files which the compaction level of spit files if any
|
||||
// or the given target level
|
||||
pub fn target_level(&self, target_level: CompactionLevel) -> CompactionLevel {
|
||||
match self {
|
||||
Self::FilesToCompact(_) => target_level,
|
||||
Self::FilesToSplit(files) => files[0].file.compaction_level,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// File to split and their split times
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct FileToSplit {
|
||||
pub file: ParquetFile,
|
||||
pub split_times: Vec<i64>,
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@ pub enum RoundInfo {
|
|||
start_level: CompactionLevel,
|
||||
/// max number of files to group in each plan
|
||||
max_num_files_to_group: usize,
|
||||
/// max total size limit of files to group in each plan
|
||||
max_total_file_size_to_group: usize,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -29,7 +31,8 @@ impl Display for RoundInfo {
|
|||
Self::ManySmallFiles {
|
||||
start_level,
|
||||
max_num_files_to_group,
|
||||
} => write!(f, "ManySmallFiles: {start_level}, {max_num_files_to_group}",),
|
||||
max_total_file_size_to_group,
|
||||
} => write!(f, "ManySmallFiles: {start_level}, {max_num_files_to_group}, {max_total_file_size_to_group}",),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,57 +75,6 @@ async fn test_num_files_over_limit() {
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_total_file_size_over_limit() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// Create a test setup with 6 files
|
||||
let setup = TestSetup::builder()
|
||||
.await
|
||||
.with_files()
|
||||
.await
|
||||
// Set max size < the input file size --> it won't get compacted
|
||||
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
|
||||
.build()
|
||||
.await;
|
||||
|
||||
// verify 6 files
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 6);
|
||||
|
||||
// verify ID and compaction level of the files
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
|
||||
setup.run_compact().await;
|
||||
|
||||
// read files and verify they are not compacted
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 6);
|
||||
|
||||
// verify ID and compaction level of the files
|
||||
assert_levels(
|
||||
&files,
|
||||
vec![
|
||||
(1, CompactionLevel::FileNonOverlapped),
|
||||
(2, CompactionLevel::Initial),
|
||||
(3, CompactionLevel::Initial),
|
||||
(4, CompactionLevel::FileNonOverlapped),
|
||||
(5, CompactionLevel::Initial),
|
||||
(6, CompactionLevel::Initial),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compact_target_level() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
@ -249,7 +198,7 @@ async fn test_compact_large_overlapes() {
|
|||
.await
|
||||
// the test setup does not exceed number of files limit
|
||||
.with_max_num_files_per_plan(10)
|
||||
// the test setup exceed max compact size limit
|
||||
// the test setup to have total file size exceed max compact size limit
|
||||
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
|
||||
.with_min_num_l1_files_to_compact(2)
|
||||
.with_max_desired_file_size_bytes(100 * 1024 * 1024)
|
||||
|
@ -264,51 +213,82 @@ async fn test_compact_large_overlapes() {
|
|||
---
|
||||
- initial
|
||||
- "L1 "
|
||||
- "L1.4[0,68000] 2.66kb|-----------------L1.4-----------------| "
|
||||
- "L1.4[6000,68000] 2.66kb|----------------L1.4----------------| "
|
||||
- "L1.5[136000,136000] 2.17kb |L1.5|"
|
||||
- "L2 "
|
||||
- "L2.1[8000,12000] 1.8kb |L2.1| "
|
||||
- "L2.2[20000,30000] 2.61kb |L2.2| "
|
||||
- "L2.3[35000,36000] 2.17kb |L2.3| "
|
||||
- "L2.1[8000,12000] 1.8kb |L2.1| "
|
||||
- "L2.2[20000,30000] 2.61kb |L2.2| "
|
||||
- "L2.3[36000,36000] 2.17kb |L2.3| "
|
||||
"###
|
||||
);
|
||||
|
||||
// compact
|
||||
setup.run_compact().await;
|
||||
|
||||
// Due to size limit, the compaction skip this partition and 5 files still in the system
|
||||
// After PR https://github.com/influxdata/influxdb_iox/pull/7079 is in, this test will fail here
|
||||
// ad the right result shoudl be similar to the commented out below
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 5);
|
||||
let mut files = setup.list_by_table_not_to_delete().await;
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L2 "
|
||||
- "L2.3[36000,36000] 2.17kb |L2.3| "
|
||||
- "L2.9[6000,30000] 2.68kb|----L2.9----| "
|
||||
- "L2.10[68000,136000] 2.62kb |-----------------L2.10-----------------| "
|
||||
"###
|
||||
);
|
||||
|
||||
// todo: use insta::assert_yaml_snapshot!( to verify the output layput
|
||||
assert_eq!(files.len(), 3);
|
||||
|
||||
// // verify the content of files
|
||||
// // Compacted smaller file with the later data
|
||||
// let mut files = setup.list_by_table_not_to_delete().await;
|
||||
// let file1 = files.pop().unwrap();
|
||||
// let batches = setup.read_parquet_file(file1).await;
|
||||
// assert_batches_sorted_eq!(
|
||||
// &[
|
||||
// "+-----------+------+------+------+-----------------------------+",
|
||||
// "| field_int | tag1 | tag2 | tag3 | time |",
|
||||
// "+-----------+------+------+------+-----------------------------+",
|
||||
// "| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
// "| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
// "| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
// "| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
|
||||
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
// "| 21 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
// "| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
// "| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
// "| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
// "| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
// "+-----------+------+------+------+-----------------------------+",
|
||||
// ],
|
||||
// &batches
|
||||
// );
|
||||
// order files on their min_time
|
||||
files.sort_by_key(|f| f.min_time);
|
||||
|
||||
let file = files[0].clone();
|
||||
let batches = setup.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
let file = files[1].clone();
|
||||
let batches = setup.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
"| field_int | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
"| 21 | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
let file = files[2].clone();
|
||||
let batches = setup.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -341,51 +321,83 @@ async fn test_compact_large_overlape_2() {
|
|||
---
|
||||
- initial
|
||||
- "L1 "
|
||||
- "L1.4[8000,25000] 1.8kb|--L1.4--| "
|
||||
- "L1.5[28000,136000] 2.64kb |------------------------------L1.5-------------------------------| "
|
||||
- "L1.4[6000,25000] 1.8kb|--L1.4---| "
|
||||
- "L1.5[28000,136000] 2.64kb |------------------------------L1.5------------------------------| "
|
||||
- "L2 "
|
||||
- "L2.1[8000,12000] 1.8kb|L2.1| "
|
||||
- "L2.2[20000,30000] 2.61kb |L2.2| "
|
||||
- "L2.3[35000,36000] 2.17kb |L2.3| "
|
||||
- "L2.1[8000,12000] 1.8kb |L2.1| "
|
||||
- "L2.2[20000,30000] 2.61kb |L2.2| "
|
||||
- "L2.3[36000,36000] 2.17kb |L2.3| "
|
||||
"###
|
||||
);
|
||||
|
||||
// compact
|
||||
setup.run_compact().await;
|
||||
|
||||
// Due to size limit, the compaction skip this partition and 5 files still in the system
|
||||
// After PR https://github.com/influxdata/influxdb_iox/pull/7079 is in, this test will fail here
|
||||
// ad the right result shoudl be similar to the commented out below
|
||||
let files = setup.list_by_table_not_to_delete().await;
|
||||
assert_eq!(files.len(), 5);
|
||||
let mut files = setup.list_by_table_not_to_delete().await;
|
||||
insta::assert_yaml_snapshot!(
|
||||
format_files("initial", &files),
|
||||
@r###"
|
||||
---
|
||||
- initial
|
||||
- "L1 "
|
||||
- "L1.9[68000,136000] 2.62kb |-----------------L1.9------------------| "
|
||||
- "L2 "
|
||||
- "L2.3[36000,36000] 2.17kb |L2.3| "
|
||||
- "L2.10[6000,30000] 2.68kb|---L2.10----| "
|
||||
"###
|
||||
);
|
||||
|
||||
// todo: use insta::assert_yaml_snapshot!( to verify the output layput
|
||||
assert_eq!(files.len(), 3);
|
||||
|
||||
// // verify the content of files
|
||||
// // Compacted smaller file with the later data
|
||||
// let mut files = setup.list_by_table_not_to_delete().await;
|
||||
// let file1 = files.pop().unwrap();
|
||||
// let batches = setup.read_parquet_file(file1).await;
|
||||
// assert_batches_sorted_eq!(
|
||||
// &[
|
||||
// "+-----------+------+------+------+-----------------------------+",
|
||||
// "| field_int | tag1 | tag2 | tag3 | time |",
|
||||
// "+-----------+------+------+------+-----------------------------+",
|
||||
// "| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
// "| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
// "| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
// "| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
|
||||
// "| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
// "| 21 | | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
// "| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
// "| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
// "| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
// "| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
// "+-----------+------+------+------+-----------------------------+",
|
||||
// ],
|
||||
// &batches
|
||||
// );
|
||||
// order files on their min_time
|
||||
files.sort_by_key(|f| f.min_time);
|
||||
|
||||
let file = files[0].clone();
|
||||
let batches = setup.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000006Z |",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000010Z |",
|
||||
"| 1500 | WA | | | 1970-01-01T00:00:00.000008Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000028Z |",
|
||||
"| 1601 | | PA | 15 | 1970-01-01T00:00:00.000030Z |",
|
||||
"| 270 | UT | | | 1970-01-01T00:00:00.000025Z |",
|
||||
"| 70 | UT | | | 1970-01-01T00:00:00.000020Z |",
|
||||
"| 99 | OR | | | 1970-01-01T00:00:00.000012Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
let file = files[1].clone();
|
||||
let batches = setup.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
"| field_int | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
"| 21 | OH | 21 | 1970-01-01T00:00:00.000036Z |",
|
||||
"+-----------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
|
||||
let file = files[2].clone();
|
||||
let batches = setup.read_parquet_file(file).await;
|
||||
assert_batches_sorted_eq!(
|
||||
&[
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| field_int | tag1 | tag2 | tag3 | time |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
"| 10 | VT | | | 1970-01-01T00:00:00.000068Z |",
|
||||
"| 210 | | OH | 21 | 1970-01-01T00:00:00.000136Z |",
|
||||
"+-----------+------+------+------+-----------------------------+",
|
||||
],
|
||||
&batches
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
@ -2,7 +2,10 @@
|
|||
//!
|
||||
//! See [crate::layout] module for detailed documentation
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_types::CompactionLevel;
|
||||
use iox_time::{MockProvider, Time, TimeProvider};
|
||||
|
||||
use crate::layouts::{
|
||||
all_overlapping_l0_files, layout_setup_builder, parquet_builder, run_layout_scenario, ONE_MB,
|
||||
|
@ -72,39 +75,39 @@ async fn all_overlapping_l0() {
|
|||
---
|
||||
- "**** Input Files "
|
||||
- "L0, all files 9mb "
|
||||
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,200] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.3[100,200] |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.4[100,200] |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.5[100,200] |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.6[100,200] |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.7[100,200] |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.8[100,200] |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.9[100,200] |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.10[100,200] |------------------------------------L0.10-------------------------------------|"
|
||||
- "**** Simulation run 0, type=split(split_times=[180]). 10 Input Files, 90mb total:"
|
||||
- "L0.1[100,200000] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,200000] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.3[100,200000] |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.4[100,200000] |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.5[100,200000] |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.6[100,200000] |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.7[100,200000] |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.8[100,200000] |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.9[100,200000] |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.10[100,200000] |------------------------------------L0.10-------------------------------------|"
|
||||
- "**** Simulation run 0, type=split(split_times=[160020]). 10 Input Files, 90mb total:"
|
||||
- "L0, all files 9mb "
|
||||
- "L0.10[100,200] |------------------------------------L0.10-------------------------------------|"
|
||||
- "L0.9[100,200] |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.8[100,200] |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.7[100,200] |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.6[100,200] |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.5[100,200] |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.4[100,200] |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.3[100,200] |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.2[100,200] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.1[100,200] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 89.1mb total:"
|
||||
- "L0.10[100,200000] |------------------------------------L0.10-------------------------------------|"
|
||||
- "L0.9[100,200000] |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.8[100,200000] |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.7[100,200000] |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.6[100,200000] |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.5[100,200000] |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.4[100,200000] |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.3[100,200000] |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.2[100,200000] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.1[100,200000] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 90mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[100,180] 72mb |-----------------------------L1.?-----------------------------| "
|
||||
- "L1.?[181,200] 17.1mb |----L1.?-----| "
|
||||
- "L1.?[100,160020] 72mb|-----------------------------L1.?-----------------------------| "
|
||||
- "L1.?[160021,200000] 18mb |----L1.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 10 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10"
|
||||
- " Creating 2 files at level CompactionLevel::L1"
|
||||
- "**** Final Output Files "
|
||||
- "L1 "
|
||||
- "L1.11[100,180] 72mb |----------------------------L1.11-----------------------------| "
|
||||
- "L1.12[181,200] 17.1mb |----L1.12----| "
|
||||
- "L1.11[100,160020] 72mb|----------------------------L1.11-----------------------------| "
|
||||
- "L1.12[160021,200000] 18mb |----L1.12----| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
@ -408,6 +411,8 @@ async fn l1_too_much_with_non_overlapping_l0() {
|
|||
|
||||
let setup = layout_setup_builder().await.build().await;
|
||||
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
|
||||
|
||||
// If we wait until we have 10 L1 files each is not large
|
||||
// enough to upgrade, the total size will be > 256MB and we will
|
||||
// skip the partition
|
||||
|
@ -415,6 +420,7 @@ async fn l1_too_much_with_non_overlapping_l0() {
|
|||
// L1: 90MB, 80MB, 70MB, ..., 70MB
|
||||
// L0: ..
|
||||
|
||||
let mut num_l1_files = 0;
|
||||
for (i, sz) in [90, 80, 70, 70, 70, 70, 70, 70, 70, 70].iter().enumerate() {
|
||||
let i = i as i64;
|
||||
setup
|
||||
|
@ -424,19 +430,24 @@ async fn l1_too_much_with_non_overlapping_l0() {
|
|||
.with_min_time(50 + i * 50)
|
||||
.with_max_time(99 + i * 50)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_file_size_bytes(sz * ONE_MB),
|
||||
.with_file_size_bytes(sz * ONE_MB)
|
||||
.with_max_l0_created_at(time_provider.minutes_into_future(i as u64)),
|
||||
)
|
||||
.await;
|
||||
num_l1_files += 1;
|
||||
}
|
||||
// note these overlap with each other, but not the L1 files
|
||||
for _ in 0..3 {
|
||||
for i in 0..3 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(600)
|
||||
.with_max_time(650)
|
||||
.with_file_size_bytes(5 * ONE_MB),
|
||||
.with_max_time(649)
|
||||
.with_file_size_bytes(5 * ONE_MB)
|
||||
.with_max_l0_created_at(
|
||||
time_provider.minutes_into_future(num_l1_files + i + 1),
|
||||
),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -447,9 +458,9 @@ async fn l1_too_much_with_non_overlapping_l0() {
|
|||
---
|
||||
- "**** Input Files "
|
||||
- "L0 "
|
||||
- "L0.11[600,650] 5mb |L0.11|"
|
||||
- "L0.12[600,650] 5mb |L0.12|"
|
||||
- "L0.13[600,650] 5mb |L0.13|"
|
||||
- "L0.11[600,649] 5mb |L0.11|"
|
||||
- "L0.12[600,649] 5mb |L0.12|"
|
||||
- "L0.13[600,649] 5mb |L0.13|"
|
||||
- "L1 "
|
||||
- "L1.1[50,99] 90mb |L1.1| "
|
||||
- "L1.2[100,149] 80mb |L1.2| "
|
||||
|
@ -463,29 +474,68 @@ async fn l1_too_much_with_non_overlapping_l0() {
|
|||
- "L1.10[500,549] 70mb |L1.10| "
|
||||
- "**** Simulation run 0, type=compact. 3 Input Files, 15mb total:"
|
||||
- "L0, all files 5mb "
|
||||
- "L0.13[600,650] |------------------------------------L0.13-------------------------------------|"
|
||||
- "L0.12[600,650] |------------------------------------L0.12-------------------------------------|"
|
||||
- "L0.11[600,650] |------------------------------------L0.11-------------------------------------|"
|
||||
- "L0.13[600,649] |------------------------------------L0.13-------------------------------------|"
|
||||
- "L0.12[600,649] |------------------------------------L0.12-------------------------------------|"
|
||||
- "L0.11[600,649] |------------------------------------L0.11-------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 15mb total:"
|
||||
- "L1, all files 15mb "
|
||||
- "L1.?[600,650] |-------------------------------------L1.?-------------------------------------|"
|
||||
- "L1.?[600,649] |-------------------------------------L1.?-------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L0.11, L0.12, L0.13"
|
||||
- " Creating 1 files at level CompactionLevel::L1"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 781189120 parquet file bytes, limit is 268435456"
|
||||
- "**** Simulation run 1, type=split(split_times=[113, 176]). 3 Input Files, 240mb total:"
|
||||
- "L1 "
|
||||
- "L1.1[50,99] 90mb |----------L1.1----------| "
|
||||
- "L1.2[100,149] 80mb |----------L1.2----------| "
|
||||
- "L1.3[150,199] 70mb |----------L1.3----------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 236.78mb total:"
|
||||
- "L2 "
|
||||
- "L2.?[50,113] 101.48mb|-------------L2.?--------------| "
|
||||
- "L2.?[114,176] 99.87mb |-------------L2.?--------------| "
|
||||
- "L2.?[177,199] 35.44mb |--L2.?---| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L1.1, L1.2, L1.3"
|
||||
- " Creating 3 files at level CompactionLevel::L2"
|
||||
- "**** Simulation run 2, type=split(split_times=[271, 342]). 3 Input Files, 210mb total:"
|
||||
- "L1, all files 70mb "
|
||||
- "L1.4[200,249] |----------L1.4----------| "
|
||||
- "L1.5[250,299] |----------L1.5----------| "
|
||||
- "L1.6[300,349] |----------L1.6----------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 207.18mb total:"
|
||||
- "L2 "
|
||||
- "L2.?[200,271] 100.07mb|----------------L2.?----------------| "
|
||||
- "L2.?[272,342] 98.66mb |---------------L2.?----------------| "
|
||||
- "L2.?[343,349] 8.46mb |L2.?|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L1.4, L1.5, L1.6"
|
||||
- " Creating 3 files at level CompactionLevel::L2"
|
||||
- "**** Simulation run 3, type=split(split_times=[421, 492]). 3 Input Files, 210mb total:"
|
||||
- "L1, all files 70mb "
|
||||
- "L1.7[350,399] |----------L1.7----------| "
|
||||
- "L1.8[400,449] |----------L1.8----------| "
|
||||
- "L1.9[450,499] |----------L1.9----------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 207.18mb total:"
|
||||
- "L2 "
|
||||
- "L2.?[350,421] 100.07mb|----------------L2.?----------------| "
|
||||
- "L2.?[422,492] 98.66mb |---------------L2.?----------------| "
|
||||
- "L2.?[493,499] 8.46mb |L2.?|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L1.7, L1.8, L1.9"
|
||||
- " Creating 3 files at level CompactionLevel::L2"
|
||||
- "**** Final Output Files "
|
||||
- "L1 "
|
||||
- "L1.1[50,99] 90mb |L1.1| "
|
||||
- "L1.2[100,149] 80mb |L1.2| "
|
||||
- "L1.3[150,199] 70mb |L1.3| "
|
||||
- "L1.4[200,249] 70mb |L1.4| "
|
||||
- "L1.5[250,299] 70mb |L1.5| "
|
||||
- "L1.6[300,349] 70mb |L1.6| "
|
||||
- "L1.7[350,399] 70mb |L1.7| "
|
||||
- "L1.8[400,449] 70mb |L1.8| "
|
||||
- "L1.9[450,499] 70mb |L1.9| "
|
||||
- "L1.10[500,549] 70mb |L1.10| "
|
||||
- "L1.14[600,650] 15mb |L1.14|"
|
||||
- "L1.14[600,649] 15mb |L1.14|"
|
||||
- "L2 "
|
||||
- "L2.15[50,113] 101.48mb|L2.15-| "
|
||||
- "L2.16[114,176] 99.87mb |L2.16-| "
|
||||
- "L2.17[177,199] 35.44mb |L2.17| "
|
||||
- "L2.18[200,271] 100.07mb |-L2.18-| "
|
||||
- "L2.19[272,342] 98.66mb |-L2.19-| "
|
||||
- "L2.20[343,349] 8.46mb |L2.20| "
|
||||
- "L2.21[350,421] 100.07mb |-L2.21-| "
|
||||
- "L2.22[422,492] 98.66mb |-L2.22-| "
|
||||
- "L2.23[493,499] 8.46mb |L2.23| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -14,7 +14,7 @@ async fn one_l1_overlaps_with_many_l2s() {
|
|||
// Simulate a production scenario in which there are two L1 files but one overlaps with three L2 files
|
||||
// and their total size > limit 256MB
|
||||
// |----------L2.1----------||----------L2.2----------||-----L2.3----|
|
||||
// |----------------------------------------L1.1---------------------------||--L1.2--|
|
||||
// |----------------------------------------L1.4---------------------------||--L1.5--|
|
||||
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
|
@ -72,15 +72,41 @@ async fn one_l1_overlaps_with_many_l2s() {
|
|||
- "L2.1[51,100] 100mb |L2.1-| "
|
||||
- "L2.2[101,150] 100mb |L2.2-| "
|
||||
- "L2.3[151,200] 70mb |L2.3-| "
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 419430400 parquet file bytes, limit is 268435456"
|
||||
- "**** Simulation run 0, type=split(split_times=[100, 150, 200]). 1 Input Files, 100mb total:"
|
||||
- "L1, all files 100mb "
|
||||
- "L1.4[1,250] |-------------------------------------L1.4-------------------------------------|"
|
||||
- "**** 4 Output Files (parquet_file_id not yet assigned), 98.8mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[1,100] 39.76mb |------------L1.?-------------| "
|
||||
- "L1.?[101,150] 19.68mb |----L1.?-----| "
|
||||
- "L1.?[151,200] 19.68mb |----L1.?-----| "
|
||||
- "L1.?[201,250] 19.68mb |----L1.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 1 files: L1.4"
|
||||
- " Creating 4 files at level CompactionLevel::L1"
|
||||
- "**** Simulation run 1, type=split(split_times=[72]). 2 Input Files, 139.76mb total:"
|
||||
- "L1 "
|
||||
- "L1.6[1,100] 39.76mb |-------------------------------------L1.6-------------------------------------|"
|
||||
- "L2 "
|
||||
- "L2.1[51,100] 100mb |----------------L2.1-----------------| "
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 138.35mb total:"
|
||||
- "L2 "
|
||||
- "L2.?[1,72] 100.23mb |-------------------------L2.?--------------------------| "
|
||||
- "L2.?[73,100] 38.12mb |-------L2.?--------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L2.1, L1.6"
|
||||
- " Creating 2 files at level CompactionLevel::L2"
|
||||
- "**** Final Output Files "
|
||||
- "L1 "
|
||||
- "L1.4[1,250] 100mb |----------------L1.4-----------------| "
|
||||
- "L1.5[251,500] 30mb |----------------L1.5-----------------| "
|
||||
- "L1.7[101,150] 19.68mb |L1.7-| "
|
||||
- "L1.8[151,200] 19.68mb |L1.8-| "
|
||||
- "L1.9[201,250] 19.68mb |L1.9-| "
|
||||
- "L2 "
|
||||
- "L2.1[51,100] 100mb |L2.1-| "
|
||||
- "L2.2[101,150] 100mb |L2.2-| "
|
||||
- "L2.3[151,200] 70mb |L2.3-| "
|
||||
- "L2.10[1,72] 100.23mb|--L2.10--| "
|
||||
- "L2.11[73,100] 38.12mb |L2.11| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
@ -170,21 +196,45 @@ async fn many_l1_overlaps_with_many_l2s() {
|
|||
- "L2.1[51,100] 100mb |--------L2.1---------| "
|
||||
- "L2.2[101,150] 100mb |--------L2.2---------| "
|
||||
- "L2.3[151,200] 70mb |--------L2.3---------| "
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 392167424 parquet file bytes, limit is 268435456"
|
||||
- "**** Simulation run 0, type=split(split_times=[100]). 1 Input Files, 13mb total:"
|
||||
- "L1, all files 13mb "
|
||||
- "L1.6[91,105] |-------------------------------------L1.6-------------------------------------|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 12.07mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[91,100] 8.36mb |----------------------L1.?-----------------------| "
|
||||
- "L1.?[101,105] 3.71mb |--------L1.?--------| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 1 files: L1.6"
|
||||
- " Creating 2 files at level CompactionLevel::L1"
|
||||
- "**** Simulation run 1, type=split(split_times=[91, 131]). 7 Input Files, 251.07mb total:"
|
||||
- "L1 "
|
||||
- "L1.4[61,75] 13mb |--L1.4---| "
|
||||
- "L1.5[76,90] 13mb |--L1.5---| "
|
||||
- "L1.12[91,100] 8.36mb |L1.12| "
|
||||
- "L1.13[101,105] 3.71mb |L1.13| "
|
||||
- "L1.7[106,120] 13mb |--L1.7---| "
|
||||
- "L2 "
|
||||
- "L2.1[51,100] 100mb |----------------L2.1-----------------| "
|
||||
- "L2.2[101,150] 100mb |----------------L2.2-----------------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 246mb total:"
|
||||
- "L2 "
|
||||
- "L2.?[51,91] 101.44mb|-------------L2.?-------------| "
|
||||
- "L2.?[92,131] 98.91mb |------------L2.?-------------| "
|
||||
- "L2.?[132,150] 45.65mb |----L2.?----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 7 files: L2.1, L2.2, L1.4, L1.5, L1.7, L1.12, L1.13"
|
||||
- " Creating 3 files at level CompactionLevel::L2"
|
||||
- "**** Final Output Files "
|
||||
- "L1 "
|
||||
- "L1.4[61,75] 13mb |L1.4| "
|
||||
- "L1.5[76,90] 13mb |L1.5| "
|
||||
- "L1.6[91,105] 13mb |L1.6| "
|
||||
- "L1.7[106,120] 13mb |L1.7| "
|
||||
- "L1.8[121,135] 13mb |L1.8| "
|
||||
- "L1.9[136,150] 13mb |L1.9| "
|
||||
- "L1.10[151,165] 13mb |L1.10| "
|
||||
- "L1.11[201,215] 13mb |L1.11|"
|
||||
- "L2 "
|
||||
- "L2.1[51,100] 100mb |--------L2.1---------| "
|
||||
- "L2.2[101,150] 100mb |--------L2.2---------| "
|
||||
- "L2.3[151,200] 70mb |--------L2.3---------| "
|
||||
- "L2.14[51,91] 101.44mb|------L2.14------| "
|
||||
- "L2.15[92,131] 98.91mb |------L2.15------| "
|
||||
- "L2.16[132,150] 45.65mb |L2.16-| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
@ -511,297 +561,385 @@ async fn many_good_size_l0_files() {
|
|||
- "L0.286[285,286] |L0.286|"
|
||||
- "L0.287[286,287] |L0.287|"
|
||||
- "L0.288[287,288] |L0.288|"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 419430400 parquet file bytes, limit is 268435456"
|
||||
- "**** Final Output Files "
|
||||
- "**** Simulation run 0, type=split(split_times=[50, 100]). 128 Input Files, 256mb total:"
|
||||
- "L0, all files 2mb "
|
||||
- "L0.1[0,1] |L0.1| "
|
||||
- "L0.2[1,2] |L0.2| "
|
||||
- "L0.3[2,3] |L0.3| "
|
||||
- "L0.4[3,4] |L0.4| "
|
||||
- "L0.5[4,5] |L0.5| "
|
||||
- "L0.6[5,6] |L0.6| "
|
||||
- "L0.7[6,7] |L0.7| "
|
||||
- "L0.8[7,8] |L0.8| "
|
||||
- "L0.9[8,9] |L0.9| "
|
||||
- "L0.10[9,10] |L0.10| "
|
||||
- "L0.11[10,11] |L0.11| "
|
||||
- "L0.12[11,12] |L0.12| "
|
||||
- "L0.13[12,13] |L0.13| "
|
||||
- "L0.14[13,14] |L0.14| "
|
||||
- "L0.15[14,15] |L0.15| "
|
||||
- "L0.16[15,16] |L0.16| "
|
||||
- "L0.17[16,17] |L0.17| "
|
||||
- "L0.18[17,18] |L0.18| "
|
||||
- "L0.19[18,19] |L0.19| "
|
||||
- "L0.20[19,20] |L0.20| "
|
||||
- "L0.21[20,21] |L0.21| "
|
||||
- "L0.22[21,22] |L0.22| "
|
||||
- "L0.23[22,23] |L0.23| "
|
||||
- "L0.24[23,24] |L0.24| "
|
||||
- "L0.25[24,25] |L0.25| "
|
||||
- "L0.26[25,26] |L0.26| "
|
||||
- "L0.27[26,27] |L0.27| "
|
||||
- "L0.28[27,28] |L0.28| "
|
||||
- "L0.29[28,29] |L0.29| "
|
||||
- "L0.30[29,30] |L0.30| "
|
||||
- "L0.31[30,31] |L0.31| "
|
||||
- "L0.32[31,32] |L0.32| "
|
||||
- "L0.33[32,33] |L0.33| "
|
||||
- "L0.34[33,34] |L0.34| "
|
||||
- "L0.35[34,35] |L0.35| "
|
||||
- "L0.36[35,36] |L0.36| "
|
||||
- "L0.37[36,37] |L0.37| "
|
||||
- "L0.38[37,38] |L0.38| "
|
||||
- "L0.39[38,39] |L0.39| "
|
||||
- "L0.40[39,40] |L0.40| "
|
||||
- "L0.41[40,41] |L0.41| "
|
||||
- "L0.42[41,42] |L0.42| "
|
||||
- "L0.43[42,43] |L0.43| "
|
||||
- "L0.44[43,44] |L0.44| "
|
||||
- "L0.45[44,45] |L0.45| "
|
||||
- "L0.46[45,46] |L0.46| "
|
||||
- "L0.47[46,47] |L0.47| "
|
||||
- "L0.48[47,48] |L0.48| "
|
||||
- "L0.49[48,49] |L0.49| "
|
||||
- "L0.50[49,50] |L0.50| "
|
||||
- "L0.51[50,51] |L0.51| "
|
||||
- "L0.52[51,52] |L0.52| "
|
||||
- "L0.53[52,53] |L0.53| "
|
||||
- "L0.54[53,54] |L0.54| "
|
||||
- "L0.55[54,55] |L0.55| "
|
||||
- "L0.56[55,56] |L0.56| "
|
||||
- "L0.57[56,57] |L0.57| "
|
||||
- "L0.58[57,58] |L0.58| "
|
||||
- "L0.59[58,59] |L0.59| "
|
||||
- "L0.60[59,60] |L0.60| "
|
||||
- "L0.61[60,61] |L0.61| "
|
||||
- "L0.62[61,62] |L0.62| "
|
||||
- "L0.63[62,63] |L0.63| "
|
||||
- "L0.64[63,64] |L0.64| "
|
||||
- "L0.65[64,65] |L0.65| "
|
||||
- "L0.66[65,66] |L0.66| "
|
||||
- "L0.67[66,67] |L0.67| "
|
||||
- "L0.68[67,68] |L0.68| "
|
||||
- "L0.69[68,69] |L0.69| "
|
||||
- "L0.70[69,70] |L0.70| "
|
||||
- "L0.71[70,71] |L0.71| "
|
||||
- "L0.72[71,72] |L0.72| "
|
||||
- "L0.73[72,73] |L0.73| "
|
||||
- "L0.74[73,74] |L0.74| "
|
||||
- "L0.75[74,75] |L0.75| "
|
||||
- "L0.76[75,76] |L0.76| "
|
||||
- "L0.77[76,77] |L0.77| "
|
||||
- "L0.78[77,78] |L0.78| "
|
||||
- "L0.79[78,79] |L0.79| "
|
||||
- "L0.80[79,80] |L0.80| "
|
||||
- "L0.81[80,81] |L0.81| "
|
||||
- "L0.82[81,82] |L0.82| "
|
||||
- "L0.83[82,83] |L0.83| "
|
||||
- "L0.84[83,84] |L0.84| "
|
||||
- "L0.85[84,85] |L0.85| "
|
||||
- "L0.86[85,86] |L0.86| "
|
||||
- "L0.87[86,87] |L0.87| "
|
||||
- "L0.88[87,88] |L0.88| "
|
||||
- "L0.89[88,89] |L0.89| "
|
||||
- "L0.90[89,90] |L0.90| "
|
||||
- "L0.91[90,91] |L0.91| "
|
||||
- "L0.92[91,92] |L0.92| "
|
||||
- "L0.93[92,93] |L0.93| "
|
||||
- "L0.94[93,94] |L0.94| "
|
||||
- "L0.95[94,95] |L0.95| "
|
||||
- "L0.96[95,96] |L0.96| "
|
||||
- "L0.97[96,97] |L0.97| "
|
||||
- "L0.98[97,98] |L0.98| "
|
||||
- "L0.99[98,99] |L0.99| "
|
||||
- "L0.100[99,100] |L0.100| "
|
||||
- "L0.101[100,101] |L0.101| "
|
||||
- "L0.102[101,102] |L0.102| "
|
||||
- "L0.103[102,103] |L0.103| "
|
||||
- "L0.104[103,104] |L0.104| "
|
||||
- "L0.105[104,105] |L0.105| "
|
||||
- "L0.106[105,106] |L0.106| "
|
||||
- "L0.107[106,107] |L0.107| "
|
||||
- "L0.108[107,108] |L0.108| "
|
||||
- "L0.109[108,109] |L0.109| "
|
||||
- "L0.110[109,110] |L0.110| "
|
||||
- "L0.111[110,111] |L0.111| "
|
||||
- "L0.112[111,112] |L0.112| "
|
||||
- "L0.113[112,113] |L0.113| "
|
||||
- "L0.114[113,114] |L0.114| "
|
||||
- "L0.115[114,115] |L0.115| "
|
||||
- "L0.116[115,116] |L0.116| "
|
||||
- "L0.117[116,117] |L0.117| "
|
||||
- "L0.118[117,118] |L0.118| "
|
||||
- "L0.119[118,119] |L0.119| "
|
||||
- "L0.120[119,120] |L0.120| "
|
||||
- "L0.121[120,121] |L0.121| "
|
||||
- "L0.122[121,122] |L0.122| "
|
||||
- "L0.123[122,123] |L0.123| "
|
||||
- "L0.124[123,124] |L0.124| "
|
||||
- "L0.125[124,125] |L0.125| "
|
||||
- "L0.126[125,126] |L0.126| "
|
||||
- "L0.127[126,127] |L0.127| "
|
||||
- "L0.128[127,128] |L0.128| "
|
||||
- "L0.129[128,129] |L0.129| "
|
||||
- "L0.130[129,130] |L0.130| "
|
||||
- "L0.131[130,131] |L0.131| "
|
||||
- "L0.132[131,132] |L0.132| "
|
||||
- "L0.133[132,133] |L0.133| "
|
||||
- "L0.134[133,134] |L0.134| "
|
||||
- "L0.135[134,135] |L0.135| "
|
||||
- "L0.136[135,136] |L0.136| "
|
||||
- "L0.137[136,137] |L0.137| "
|
||||
- "L0.138[137,138] |L0.138| "
|
||||
- "L0.139[138,139] |L0.139| "
|
||||
- "L0.140[139,140] |L0.140| "
|
||||
- "L0.141[140,141] |L0.141| "
|
||||
- "L0.142[141,142] |L0.142| "
|
||||
- "L0.143[142,143] |L0.143| "
|
||||
- "L0.144[143,144] |L0.144| "
|
||||
- "L0.145[144,145] |L0.145| "
|
||||
- "L0.146[145,146] |L0.146| "
|
||||
- "L0.147[146,147] |L0.147| "
|
||||
- "L0.148[147,148] |L0.148| "
|
||||
- "L0.149[148,149] |L0.149| "
|
||||
- "L0.150[149,150] |L0.150| "
|
||||
- "L0.151[150,151] |L0.151| "
|
||||
- "L0.152[151,152] |L0.152| "
|
||||
- "L0.153[152,153] |L0.153| "
|
||||
- "L0.154[153,154] |L0.154| "
|
||||
- "L0.155[154,155] |L0.155| "
|
||||
- "L0.156[155,156] |L0.156| "
|
||||
- "L0.157[156,157] |L0.157| "
|
||||
- "L0.158[157,158] |L0.158| "
|
||||
- "L0.159[158,159] |L0.159| "
|
||||
- "L0.160[159,160] |L0.160| "
|
||||
- "L0.161[160,161] |L0.161| "
|
||||
- "L0.162[161,162] |L0.162| "
|
||||
- "L0.163[162,163] |L0.163| "
|
||||
- "L0.164[163,164] |L0.164| "
|
||||
- "L0.165[164,165] |L0.165| "
|
||||
- "L0.166[165,166] |L0.166| "
|
||||
- "L0.167[166,167] |L0.167| "
|
||||
- "L0.168[167,168] |L0.168| "
|
||||
- "L0.169[168,169] |L0.169| "
|
||||
- "L0.170[169,170] |L0.170| "
|
||||
- "L0.171[170,171] |L0.171| "
|
||||
- "L0.172[171,172] |L0.172| "
|
||||
- "L0.173[172,173] |L0.173| "
|
||||
- "L0.174[173,174] |L0.174| "
|
||||
- "L0.175[174,175] |L0.175| "
|
||||
- "L0.176[175,176] |L0.176| "
|
||||
- "L0.177[176,177] |L0.177| "
|
||||
- "L0.178[177,178] |L0.178| "
|
||||
- "L0.179[178,179] |L0.179| "
|
||||
- "L0.180[179,180] |L0.180| "
|
||||
- "L0.181[180,181] |L0.181| "
|
||||
- "L0.182[181,182] |L0.182| "
|
||||
- "L0.183[182,183] |L0.183| "
|
||||
- "L0.184[183,184] |L0.184| "
|
||||
- "L0.185[184,185] |L0.185| "
|
||||
- "L0.186[185,186] |L0.186| "
|
||||
- "L0.187[186,187] |L0.187| "
|
||||
- "L0.188[187,188] |L0.188| "
|
||||
- "L0.189[188,189] |L0.189| "
|
||||
- "L0.190[189,190] |L0.190| "
|
||||
- "L0.191[190,191] |L0.191| "
|
||||
- "L0.192[191,192] |L0.192| "
|
||||
- "L0.193[192,193] |L0.193| "
|
||||
- "L0.194[193,194] |L0.194| "
|
||||
- "L0.195[194,195] |L0.195| "
|
||||
- "L0.196[195,196] |L0.196| "
|
||||
- "L0.197[196,197] |L0.197| "
|
||||
- "L0.198[197,198] |L0.198| "
|
||||
- "L0.199[198,199] |L0.199| "
|
||||
- "L0.200[199,200] |L0.200| "
|
||||
- "L0.201[200,201] |L0.201| "
|
||||
- "L0.202[201,202] |L0.202| "
|
||||
- "L0.203[202,203] |L0.203| "
|
||||
- "L0.204[203,204] |L0.204| "
|
||||
- "L0.205[204,205] |L0.205| "
|
||||
- "L0.206[205,206] |L0.206| "
|
||||
- "L0.207[206,207] |L0.207| "
|
||||
- "L0.208[207,208] |L0.208| "
|
||||
- "L0.209[208,209] |L0.209| "
|
||||
- "L0.210[209,210] |L0.210| "
|
||||
- "L0.211[210,211] |L0.211| "
|
||||
- "L0.212[211,212] |L0.212| "
|
||||
- "L0.213[212,213] |L0.213| "
|
||||
- "L0.214[213,214] |L0.214| "
|
||||
- "L0.215[214,215] |L0.215| "
|
||||
- "L0.216[215,216] |L0.216| "
|
||||
- "L0.217[216,217] |L0.217| "
|
||||
- "L0.218[217,218] |L0.218| "
|
||||
- "L0.219[218,219] |L0.219| "
|
||||
- "L0.220[219,220] |L0.220| "
|
||||
- "L0.221[220,221] |L0.221| "
|
||||
- "L0.222[221,222] |L0.222| "
|
||||
- "L0.223[222,223] |L0.223| "
|
||||
- "L0.224[223,224] |L0.224| "
|
||||
- "L0.225[224,225] |L0.225| "
|
||||
- "L0.226[225,226] |L0.226| "
|
||||
- "L0.227[226,227] |L0.227| "
|
||||
- "L0.228[227,228] |L0.228| "
|
||||
- "L0.229[228,229] |L0.229| "
|
||||
- "L0.3[2,3] |L0.3| "
|
||||
- "L0.4[3,4] |L0.4| "
|
||||
- "L0.5[4,5] |L0.5| "
|
||||
- "L0.6[5,6] |L0.6| "
|
||||
- "L0.7[6,7] |L0.7| "
|
||||
- "L0.8[7,8] |L0.8| "
|
||||
- "L0.9[8,9] |L0.9| "
|
||||
- "L0.10[9,10] |L0.10| "
|
||||
- "L0.11[10,11] |L0.11| "
|
||||
- "L0.12[11,12] |L0.12| "
|
||||
- "L0.13[12,13] |L0.13| "
|
||||
- "L0.14[13,14] |L0.14| "
|
||||
- "L0.15[14,15] |L0.15| "
|
||||
- "L0.16[15,16] |L0.16| "
|
||||
- "L0.17[16,17] |L0.17| "
|
||||
- "L0.18[17,18] |L0.18| "
|
||||
- "L0.19[18,19] |L0.19| "
|
||||
- "L0.20[19,20] |L0.20| "
|
||||
- "L0.21[20,21] |L0.21| "
|
||||
- "L0.22[21,22] |L0.22| "
|
||||
- "L0.23[22,23] |L0.23| "
|
||||
- "L0.24[23,24] |L0.24| "
|
||||
- "L0.25[24,25] |L0.25| "
|
||||
- "L0.26[25,26] |L0.26| "
|
||||
- "L0.27[26,27] |L0.27| "
|
||||
- "L0.28[27,28] |L0.28| "
|
||||
- "L0.29[28,29] |L0.29| "
|
||||
- "L0.30[29,30] |L0.30| "
|
||||
- "L0.31[30,31] |L0.31| "
|
||||
- "L0.32[31,32] |L0.32| "
|
||||
- "L0.33[32,33] |L0.33| "
|
||||
- "L0.34[33,34] |L0.34| "
|
||||
- "L0.35[34,35] |L0.35| "
|
||||
- "L0.36[35,36] |L0.36| "
|
||||
- "L0.37[36,37] |L0.37| "
|
||||
- "L0.38[37,38] |L0.38| "
|
||||
- "L0.39[38,39] |L0.39| "
|
||||
- "L0.40[39,40] |L0.40| "
|
||||
- "L0.41[40,41] |L0.41| "
|
||||
- "L0.42[41,42] |L0.42| "
|
||||
- "L0.43[42,43] |L0.43| "
|
||||
- "L0.44[43,44] |L0.44| "
|
||||
- "L0.45[44,45] |L0.45| "
|
||||
- "L0.46[45,46] |L0.46| "
|
||||
- "L0.47[46,47] |L0.47| "
|
||||
- "L0.48[47,48] |L0.48| "
|
||||
- "L0.49[48,49] |L0.49| "
|
||||
- "L0.50[49,50] |L0.50| "
|
||||
- "L0.51[50,51] |L0.51| "
|
||||
- "L0.52[51,52] |L0.52| "
|
||||
- "L0.53[52,53] |L0.53| "
|
||||
- "L0.54[53,54] |L0.54| "
|
||||
- "L0.55[54,55] |L0.55| "
|
||||
- "L0.56[55,56] |L0.56| "
|
||||
- "L0.57[56,57] |L0.57| "
|
||||
- "L0.58[57,58] |L0.58| "
|
||||
- "L0.59[58,59] |L0.59| "
|
||||
- "L0.60[59,60] |L0.60| "
|
||||
- "L0.61[60,61] |L0.61| "
|
||||
- "L0.62[61,62] |L0.62| "
|
||||
- "L0.63[62,63] |L0.63| "
|
||||
- "L0.64[63,64] |L0.64| "
|
||||
- "L0.65[64,65] |L0.65| "
|
||||
- "L0.66[65,66] |L0.66| "
|
||||
- "L0.67[66,67] |L0.67| "
|
||||
- "L0.68[67,68] |L0.68| "
|
||||
- "L0.69[68,69] |L0.69| "
|
||||
- "L0.70[69,70] |L0.70| "
|
||||
- "L0.71[70,71] |L0.71| "
|
||||
- "L0.72[71,72] |L0.72| "
|
||||
- "L0.73[72,73] |L0.73| "
|
||||
- "L0.74[73,74] |L0.74| "
|
||||
- "L0.75[74,75] |L0.75| "
|
||||
- "L0.76[75,76] |L0.76| "
|
||||
- "L0.77[76,77] |L0.77| "
|
||||
- "L0.78[77,78] |L0.78| "
|
||||
- "L0.79[78,79] |L0.79| "
|
||||
- "L0.80[79,80] |L0.80| "
|
||||
- "L0.81[80,81] |L0.81| "
|
||||
- "L0.82[81,82] |L0.82| "
|
||||
- "L0.83[82,83] |L0.83| "
|
||||
- "L0.84[83,84] |L0.84| "
|
||||
- "L0.85[84,85] |L0.85| "
|
||||
- "L0.86[85,86] |L0.86| "
|
||||
- "L0.87[86,87] |L0.87| "
|
||||
- "L0.88[87,88] |L0.88| "
|
||||
- "L0.89[88,89] |L0.89| "
|
||||
- "L0.90[89,90] |L0.90| "
|
||||
- "L0.91[90,91] |L0.91| "
|
||||
- "L0.92[91,92] |L0.92| "
|
||||
- "L0.93[92,93] |L0.93| "
|
||||
- "L0.94[93,94] |L0.94| "
|
||||
- "L0.95[94,95] |L0.95| "
|
||||
- "L0.96[95,96] |L0.96| "
|
||||
- "L0.97[96,97] |L0.97| "
|
||||
- "L0.98[97,98] |L0.98| "
|
||||
- "L0.99[98,99] |L0.99| "
|
||||
- "L0.100[99,100] |L0.100| "
|
||||
- "L0.101[100,101] |L0.101| "
|
||||
- "L0.102[101,102] |L0.102| "
|
||||
- "L0.103[102,103] |L0.103| "
|
||||
- "L0.104[103,104] |L0.104| "
|
||||
- "L0.105[104,105] |L0.105| "
|
||||
- "L0.106[105,106] |L0.106| "
|
||||
- "L0.107[106,107] |L0.107| "
|
||||
- "L0.108[107,108] |L0.108| "
|
||||
- "L0.109[108,109] |L0.109| "
|
||||
- "L0.110[109,110] |L0.110| "
|
||||
- "L0.111[110,111] |L0.111| "
|
||||
- "L0.112[111,112] |L0.112| "
|
||||
- "L0.113[112,113] |L0.113| "
|
||||
- "L0.114[113,114] |L0.114| "
|
||||
- "L0.115[114,115] |L0.115| "
|
||||
- "L0.116[115,116] |L0.116| "
|
||||
- "L0.117[116,117] |L0.117|"
|
||||
- "L0.118[117,118] |L0.118|"
|
||||
- "L0.119[118,119] |L0.119|"
|
||||
- "L0.120[119,120] |L0.120|"
|
||||
- "L0.121[120,121] |L0.121|"
|
||||
- "L0.122[121,122] |L0.122|"
|
||||
- "L0.123[122,123] |L0.123|"
|
||||
- "L0.124[123,124] |L0.124|"
|
||||
- "L0.125[124,125] |L0.125|"
|
||||
- "L0.126[125,126] |L0.126|"
|
||||
- "L0.127[126,127] |L0.127|"
|
||||
- "L0.128[127,128] |L0.128|"
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 252mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[0,50] 100mb |------------L0.?-------------| "
|
||||
- "L0.?[51,100] 98mb |------------L0.?------------| "
|
||||
- "L0.?[101,128] 54mb |-----L0.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 128 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20, L0.21, L0.22, L0.23, L0.24, L0.25, L0.26, L0.27, L0.28, L0.29, L0.30, L0.31, L0.32, L0.33, L0.34, L0.35, L0.36, L0.37, L0.38, L0.39, L0.40, L0.41, L0.42, L0.43, L0.44, L0.45, L0.46, L0.47, L0.48, L0.49, L0.50, L0.51, L0.52, L0.53, L0.54, L0.55, L0.56, L0.57, L0.58, L0.59, L0.60, L0.61, L0.62, L0.63, L0.64, L0.65, L0.66, L0.67, L0.68, L0.69, L0.70, L0.71, L0.72, L0.73, L0.74, L0.75, L0.76, L0.77, L0.78, L0.79, L0.80, L0.81, L0.82, L0.83, L0.84, L0.85, L0.86, L0.87, L0.88, L0.89, L0.90, L0.91, L0.92, L0.93, L0.94, L0.95, L0.96, L0.97, L0.98, L0.99, L0.100, L0.101, L0.102, L0.103, L0.104, L0.105, L0.106, L0.107, L0.108, L0.109, L0.110, L0.111, L0.112, L0.113, L0.114, L0.115, L0.116, L0.117, L0.118, L0.119, L0.120, L0.121, L0.122, L0.123, L0.124, L0.125, L0.126, L0.127, L0.128"
|
||||
- " Creating 3 files at level CompactionLevel::L0"
|
||||
- "**** Simulation run 1, type=split(split_times=[178, 228]). 128 Input Files, 256mb total:"
|
||||
- "L0, all files 2mb "
|
||||
- "L0.129[128,129] |L0.129| "
|
||||
- "L0.130[129,130] |L0.130| "
|
||||
- "L0.131[130,131] |L0.131| "
|
||||
- "L0.132[131,132] |L0.132| "
|
||||
- "L0.133[132,133] |L0.133| "
|
||||
- "L0.134[133,134] |L0.134| "
|
||||
- "L0.135[134,135] |L0.135| "
|
||||
- "L0.136[135,136] |L0.136| "
|
||||
- "L0.137[136,137] |L0.137| "
|
||||
- "L0.138[137,138] |L0.138| "
|
||||
- "L0.139[138,139] |L0.139| "
|
||||
- "L0.140[139,140] |L0.140| "
|
||||
- "L0.141[140,141] |L0.141| "
|
||||
- "L0.142[141,142] |L0.142| "
|
||||
- "L0.143[142,143] |L0.143| "
|
||||
- "L0.144[143,144] |L0.144| "
|
||||
- "L0.145[144,145] |L0.145| "
|
||||
- "L0.146[145,146] |L0.146| "
|
||||
- "L0.147[146,147] |L0.147| "
|
||||
- "L0.148[147,148] |L0.148| "
|
||||
- "L0.149[148,149] |L0.149| "
|
||||
- "L0.150[149,150] |L0.150| "
|
||||
- "L0.151[150,151] |L0.151| "
|
||||
- "L0.152[151,152] |L0.152| "
|
||||
- "L0.153[152,153] |L0.153| "
|
||||
- "L0.154[153,154] |L0.154| "
|
||||
- "L0.155[154,155] |L0.155| "
|
||||
- "L0.156[155,156] |L0.156| "
|
||||
- "L0.157[156,157] |L0.157| "
|
||||
- "L0.158[157,158] |L0.158| "
|
||||
- "L0.159[158,159] |L0.159| "
|
||||
- "L0.160[159,160] |L0.160| "
|
||||
- "L0.161[160,161] |L0.161| "
|
||||
- "L0.162[161,162] |L0.162| "
|
||||
- "L0.163[162,163] |L0.163| "
|
||||
- "L0.164[163,164] |L0.164| "
|
||||
- "L0.165[164,165] |L0.165| "
|
||||
- "L0.166[165,166] |L0.166| "
|
||||
- "L0.167[166,167] |L0.167| "
|
||||
- "L0.168[167,168] |L0.168| "
|
||||
- "L0.169[168,169] |L0.169| "
|
||||
- "L0.170[169,170] |L0.170| "
|
||||
- "L0.171[170,171] |L0.171| "
|
||||
- "L0.172[171,172] |L0.172| "
|
||||
- "L0.173[172,173] |L0.173| "
|
||||
- "L0.174[173,174] |L0.174| "
|
||||
- "L0.175[174,175] |L0.175| "
|
||||
- "L0.176[175,176] |L0.176| "
|
||||
- "L0.177[176,177] |L0.177| "
|
||||
- "L0.178[177,178] |L0.178| "
|
||||
- "L0.179[178,179] |L0.179| "
|
||||
- "L0.180[179,180] |L0.180| "
|
||||
- "L0.181[180,181] |L0.181| "
|
||||
- "L0.182[181,182] |L0.182| "
|
||||
- "L0.183[182,183] |L0.183| "
|
||||
- "L0.184[183,184] |L0.184| "
|
||||
- "L0.185[184,185] |L0.185| "
|
||||
- "L0.186[185,186] |L0.186| "
|
||||
- "L0.187[186,187] |L0.187| "
|
||||
- "L0.188[187,188] |L0.188| "
|
||||
- "L0.189[188,189] |L0.189| "
|
||||
- "L0.190[189,190] |L0.190| "
|
||||
- "L0.191[190,191] |L0.191| "
|
||||
- "L0.192[191,192] |L0.192| "
|
||||
- "L0.193[192,193] |L0.193| "
|
||||
- "L0.194[193,194] |L0.194| "
|
||||
- "L0.195[194,195] |L0.195| "
|
||||
- "L0.196[195,196] |L0.196| "
|
||||
- "L0.197[196,197] |L0.197| "
|
||||
- "L0.198[197,198] |L0.198| "
|
||||
- "L0.199[198,199] |L0.199| "
|
||||
- "L0.200[199,200] |L0.200| "
|
||||
- "L0.201[200,201] |L0.201| "
|
||||
- "L0.202[201,202] |L0.202| "
|
||||
- "L0.203[202,203] |L0.203| "
|
||||
- "L0.204[203,204] |L0.204| "
|
||||
- "L0.205[204,205] |L0.205| "
|
||||
- "L0.206[205,206] |L0.206| "
|
||||
- "L0.207[206,207] |L0.207| "
|
||||
- "L0.208[207,208] |L0.208| "
|
||||
- "L0.209[208,209] |L0.209| "
|
||||
- "L0.210[209,210] |L0.210| "
|
||||
- "L0.211[210,211] |L0.211| "
|
||||
- "L0.212[211,212] |L0.212| "
|
||||
- "L0.213[212,213] |L0.213| "
|
||||
- "L0.214[213,214] |L0.214| "
|
||||
- "L0.215[214,215] |L0.215| "
|
||||
- "L0.216[215,216] |L0.216| "
|
||||
- "L0.217[216,217] |L0.217| "
|
||||
- "L0.218[217,218] |L0.218| "
|
||||
- "L0.219[218,219] |L0.219| "
|
||||
- "L0.220[219,220] |L0.220| "
|
||||
- "L0.221[220,221] |L0.221| "
|
||||
- "L0.222[221,222] |L0.222| "
|
||||
- "L0.223[222,223] |L0.223| "
|
||||
- "L0.224[223,224] |L0.224| "
|
||||
- "L0.225[224,225] |L0.225| "
|
||||
- "L0.226[225,226] |L0.226| "
|
||||
- "L0.227[226,227] |L0.227| "
|
||||
- "L0.228[227,228] |L0.228| "
|
||||
- "L0.229[228,229] |L0.229| "
|
||||
- "L0.230[229,230] |L0.230| "
|
||||
- "L0.231[230,231] |L0.231| "
|
||||
- "L0.232[231,232] |L0.232| "
|
||||
- "L0.233[232,233] |L0.233| "
|
||||
- "L0.234[233,234] |L0.234| "
|
||||
- "L0.235[234,235] |L0.235| "
|
||||
- "L0.236[235,236] |L0.236| "
|
||||
- "L0.237[236,237] |L0.237| "
|
||||
- "L0.238[237,238] |L0.238| "
|
||||
- "L0.239[238,239] |L0.239| "
|
||||
- "L0.240[239,240] |L0.240| "
|
||||
- "L0.241[240,241] |L0.241| "
|
||||
- "L0.242[241,242] |L0.242| "
|
||||
- "L0.243[242,243] |L0.243| "
|
||||
- "L0.244[243,244] |L0.244| "
|
||||
- "L0.245[244,245] |L0.245| "
|
||||
- "L0.246[245,246] |L0.246| "
|
||||
- "L0.247[246,247] |L0.247| "
|
||||
- "L0.248[247,248] |L0.248| "
|
||||
- "L0.249[248,249] |L0.249| "
|
||||
- "L0.250[249,250] |L0.250| "
|
||||
- "L0.251[250,251] |L0.251| "
|
||||
- "L0.252[251,252] |L0.252| "
|
||||
- "L0.253[252,253] |L0.253| "
|
||||
- "L0.254[253,254] |L0.254| "
|
||||
- "L0.255[254,255] |L0.255| "
|
||||
- "L0.256[255,256] |L0.256| "
|
||||
- "L0.257[256,257] |L0.257| "
|
||||
- "L0.258[257,258] |L0.258| "
|
||||
- "L0.259[258,259] |L0.259| "
|
||||
- "L0.260[259,260] |L0.260| "
|
||||
- "L0.261[260,261] |L0.261|"
|
||||
- "L0.262[261,262] |L0.262|"
|
||||
- "L0.263[262,263] |L0.263|"
|
||||
- "L0.264[263,264] |L0.264|"
|
||||
- "L0.265[264,265] |L0.265|"
|
||||
- "L0.266[265,266] |L0.266|"
|
||||
- "L0.267[266,267] |L0.267|"
|
||||
- "L0.268[267,268] |L0.268|"
|
||||
- "L0.269[268,269] |L0.269|"
|
||||
- "L0.270[269,270] |L0.270|"
|
||||
- "L0.271[270,271] |L0.271|"
|
||||
- "L0.272[271,272] |L0.272|"
|
||||
- "L0.273[272,273] |L0.273|"
|
||||
- "L0.274[273,274] |L0.274|"
|
||||
- "L0.275[274,275] |L0.275|"
|
||||
- "L0.276[275,276] |L0.276|"
|
||||
- "L0.277[276,277] |L0.277|"
|
||||
- "L0.278[277,278] |L0.278|"
|
||||
- "L0.279[278,279] |L0.279|"
|
||||
- "L0.280[279,280] |L0.280|"
|
||||
- "L0.281[280,281] |L0.281|"
|
||||
- "L0.282[281,282] |L0.282|"
|
||||
- "L0.283[282,283] |L0.283|"
|
||||
- "L0.284[283,284] |L0.284|"
|
||||
- "L0.285[284,285] |L0.285|"
|
||||
- "L0.286[285,286] |L0.286|"
|
||||
- "L0.287[286,287] |L0.287|"
|
||||
- "L0.288[287,288] |L0.288|"
|
||||
- "L0.233[232,233] |L0.233| "
|
||||
- "L0.234[233,234] |L0.234| "
|
||||
- "L0.235[234,235] |L0.235| "
|
||||
- "L0.236[235,236] |L0.236| "
|
||||
- "L0.237[236,237] |L0.237| "
|
||||
- "L0.238[237,238] |L0.238| "
|
||||
- "L0.239[238,239] |L0.239| "
|
||||
- "L0.240[239,240] |L0.240| "
|
||||
- "L0.241[240,241] |L0.241| "
|
||||
- "L0.242[241,242] |L0.242| "
|
||||
- "L0.243[242,243] |L0.243| "
|
||||
- "L0.244[243,244] |L0.244| "
|
||||
- "L0.245[244,245] |L0.245|"
|
||||
- "L0.246[245,246] |L0.246|"
|
||||
- "L0.247[246,247] |L0.247|"
|
||||
- "L0.248[247,248] |L0.248|"
|
||||
- "L0.249[248,249] |L0.249|"
|
||||
- "L0.250[249,250] |L0.250|"
|
||||
- "L0.251[250,251] |L0.251|"
|
||||
- "L0.252[251,252] |L0.252|"
|
||||
- "L0.253[252,253] |L0.253|"
|
||||
- "L0.254[253,254] |L0.254|"
|
||||
- "L0.255[254,255] |L0.255|"
|
||||
- "L0.256[255,256] |L0.256|"
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 252mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[128,178] 100mb |------------L0.?-------------| "
|
||||
- "L0.?[179,228] 98mb |------------L0.?------------| "
|
||||
- "L0.?[229,256] 54mb |-----L0.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 128 files: L0.129, L0.130, L0.131, L0.132, L0.133, L0.134, L0.135, L0.136, L0.137, L0.138, L0.139, L0.140, L0.141, L0.142, L0.143, L0.144, L0.145, L0.146, L0.147, L0.148, L0.149, L0.150, L0.151, L0.152, L0.153, L0.154, L0.155, L0.156, L0.157, L0.158, L0.159, L0.160, L0.161, L0.162, L0.163, L0.164, L0.165, L0.166, L0.167, L0.168, L0.169, L0.170, L0.171, L0.172, L0.173, L0.174, L0.175, L0.176, L0.177, L0.178, L0.179, L0.180, L0.181, L0.182, L0.183, L0.184, L0.185, L0.186, L0.187, L0.188, L0.189, L0.190, L0.191, L0.192, L0.193, L0.194, L0.195, L0.196, L0.197, L0.198, L0.199, L0.200, L0.201, L0.202, L0.203, L0.204, L0.205, L0.206, L0.207, L0.208, L0.209, L0.210, L0.211, L0.212, L0.213, L0.214, L0.215, L0.216, L0.217, L0.218, L0.219, L0.220, L0.221, L0.222, L0.223, L0.224, L0.225, L0.226, L0.227, L0.228, L0.229, L0.230, L0.231, L0.232, L0.233, L0.234, L0.235, L0.236, L0.237, L0.238, L0.239, L0.240, L0.241, L0.242, L0.243, L0.244, L0.245, L0.246, L0.247, L0.248, L0.249, L0.250, L0.251, L0.252, L0.253, L0.254, L0.255, L0.256"
|
||||
- " Creating 3 files at level CompactionLevel::L0"
|
||||
- "**** Simulation run 2, type=split(split_times=[281]). 32 Input Files, 64mb total:"
|
||||
- "L0, all files 2mb "
|
||||
- "L0.257[256,257] |L0.257| "
|
||||
- "L0.258[257,258] |L0.258| "
|
||||
- "L0.259[258,259] |L0.259| "
|
||||
- "L0.260[259,260] |L0.260| "
|
||||
- "L0.261[260,261] |L0.261| "
|
||||
- "L0.262[261,262] |L0.262| "
|
||||
- "L0.263[262,263] |L0.263| "
|
||||
- "L0.264[263,264] |L0.264| "
|
||||
- "L0.265[264,265] |L0.265| "
|
||||
- "L0.266[265,266] |L0.266| "
|
||||
- "L0.267[266,267] |L0.267| "
|
||||
- "L0.268[267,268] |L0.268| "
|
||||
- "L0.269[268,269] |L0.269| "
|
||||
- "L0.270[269,270] |L0.270| "
|
||||
- "L0.271[270,271] |L0.271| "
|
||||
- "L0.272[271,272] |L0.272| "
|
||||
- "L0.273[272,273] |L0.273| "
|
||||
- "L0.274[273,274] |L0.274| "
|
||||
- "L0.275[274,275] |L0.275| "
|
||||
- "L0.276[275,276] |L0.276| "
|
||||
- "L0.277[276,277] |L0.277| "
|
||||
- "L0.278[277,278] |L0.278| "
|
||||
- "L0.279[278,279] |L0.279| "
|
||||
- "L0.280[279,280] |L0.280| "
|
||||
- "L0.281[280,281] |L0.281| "
|
||||
- "L0.282[281,282] |L0.282| "
|
||||
- "L0.283[282,283] |L0.283| "
|
||||
- "L0.284[283,284] |L0.284| "
|
||||
- "L0.285[284,285] |L0.285| "
|
||||
- "L0.286[285,286] |L0.286|"
|
||||
- "L0.287[286,287] |L0.287|"
|
||||
- "L0.288[287,288] |L0.288|"
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 62mb total:"
|
||||
- "L0 "
|
||||
- "L0.?[256,281] 50mb |----------------------------L0.?----------------------------| "
|
||||
- "L0.?[282,288] 12mb |----L0.?-----|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 32 files: L0.257, L0.258, L0.259, L0.260, L0.261, L0.262, L0.263, L0.264, L0.265, L0.266, L0.267, L0.268, L0.269, L0.270, L0.271, L0.272, L0.273, L0.274, L0.275, L0.276, L0.277, L0.278, L0.279, L0.280, L0.281, L0.282, L0.283, L0.284, L0.285, L0.286, L0.287, L0.288"
|
||||
- " Creating 2 files at level CompactionLevel::L0"
|
||||
- "**** Simulation run 3, type=split(split_times=[230, 281]). 4 Input Files, 214mb total:"
|
||||
- "L0 "
|
||||
- "L0.296[282,288] 12mb |L0.296|"
|
||||
- "L0.295[256,281] 50mb |-----L0.295-----| "
|
||||
- "L0.294[229,256] 54mb |-----L0.294------| "
|
||||
- "L0.293[179,228] 98mb|-------------L0.293--------------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 210.07mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[179,230] 100.13mb|---------------L1.?----------------| "
|
||||
- "L1.?[231,281] 98.17mb |---------------L1.?---------------| "
|
||||
- "L1.?[282,288] 11.78mb |L1.?|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L0.293, L0.294, L0.295, L0.296"
|
||||
- " Upgrading 1 files level to CompactionLevel::L1: L0.289"
|
||||
- " Creating 3 files at level CompactionLevel::L1"
|
||||
- "**** Simulation run 4, type=split(split_times=[102, 153]). 3 Input Files, 252mb total:"
|
||||
- "L0 "
|
||||
- "L0.290[51,100] 98mb |-----------L0.290-----------| "
|
||||
- "L0.291[101,128] 54mb |----L0.291-----| "
|
||||
- "L0.292[128,178] 100mb |-----------L0.292------------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 248.03mb total:"
|
||||
- "L1 "
|
||||
- "L1.?[51,102] 101.2mb|-------------L1.?-------------| "
|
||||
- "L1.?[103,153] 99.21mb |------------L1.?-------------| "
|
||||
- "L1.?[154,178] 47.62mb |----L1.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L0.290, L0.291, L0.292"
|
||||
- " Creating 3 files at level CompactionLevel::L1"
|
||||
- "**** Simulation run 5, type=split(split_times=[155, 207]). 3 Input Files, 246.96mb total:"
|
||||
- "L1 "
|
||||
- "L1.301[103,153] 99.21mb|-----------L1.301------------| "
|
||||
- "L1.302[154,178] 47.62mb |---L1.302----| "
|
||||
- "L1.297[179,230] 100.13mb |------------L1.297------------| "
|
||||
- "**** 3 Output Files (parquet_file_id not yet assigned), 243.07mb total:"
|
||||
- "L2 "
|
||||
- "L2.?[103,155] 101.12mb|-------------L2.?-------------| "
|
||||
- "L2.?[156,207] 99.17mb |-------------L2.?-------------| "
|
||||
- "L2.?[208,230] 42.78mb |---L2.?----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L1.297, L1.301, L1.302"
|
||||
- " Upgrading 2 files level to CompactionLevel::L2: L1.289, L1.300"
|
||||
- " Creating 3 files at level CompactionLevel::L2"
|
||||
- "**** Simulation run 6, type=split(split_times=[276]). 2 Input Files, 109.94mb total:"
|
||||
- "L1 "
|
||||
- "L1.299[282,288] 11.78mb |L1.299| "
|
||||
- "L1.298[231,281] 98.17mb|-------------------------------L1.298-------------------------------| "
|
||||
- "**** 2 Output Files (parquet_file_id not yet assigned), 108.02mb total:"
|
||||
- "L2 "
|
||||
- "L2.?[231,276] 86.8mb|----------------------------L2.?-----------------------------| "
|
||||
- "L2.?[277,288] 21.22mb |----L2.?-----| "
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L1.298, L1.299"
|
||||
- " Creating 2 files at level CompactionLevel::L2"
|
||||
- "**** Final Output Files "
|
||||
- "L2 "
|
||||
- "L2.289[0,50] 100mb |--L2.289---| "
|
||||
- "L2.300[51,102] 101.2mb |---L2.300---| "
|
||||
- "L2.303[103,155] 101.12mb |---L2.303---| "
|
||||
- "L2.304[156,207] 99.17mb |---L2.304---| "
|
||||
- "L2.305[208,230] 42.78mb |L2.305| "
|
||||
- "L2.306[231,276] 86.8mb |--L2.306--| "
|
||||
- "L2.307[277,288] 21.22mb |L2.307|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
|
@ -59,6 +59,7 @@ use std::time::Duration;
|
|||
use compactor2_test_utils::{format_files, TestSetup, TestSetupBuilder};
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use iox_tests::TestParquetFileBuilder;
|
||||
use iox_time::Time;
|
||||
|
||||
pub(crate) const ONE_MB: u64 = 1024 * 1024;
|
||||
|
||||
|
@ -90,14 +91,15 @@ pub(crate) async fn layout_setup_builder() -> TestSetupBuilder<false> {
|
|||
|
||||
/// Creates a scenario with ten 9 * 1MB overlapping L0 files
|
||||
pub(crate) async fn all_overlapping_l0_files(setup: TestSetup) -> TestSetup {
|
||||
for _ in 0..10 {
|
||||
for i in 0..10 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(100)
|
||||
.with_max_time(200)
|
||||
.with_file_size_bytes(9 * ONE_MB),
|
||||
.with_max_time(200000)
|
||||
.with_file_size_bytes(9 * ONE_MB)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(i + 1)),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
|
|
@ -36,14 +36,11 @@ async fn single_giant_file() {
|
|||
- "L0, all files 4.88gb "
|
||||
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "WARNING: file L0.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
- "Committing partition 1:"
|
||||
- " Upgrading 1 files level to CompactionLevel::L1: L0.1"
|
||||
- "Committing partition 1:"
|
||||
- " Upgrading 1 files level to CompactionLevel::L2: L1.1"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
|
||||
- "**** Final Output Files "
|
||||
- "L2, all files 4.88gb "
|
||||
- "L2.1[100,100] |-------------------------------------L2.1-------------------------------------|"
|
||||
- "WARNING: file L2.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
- "L0, all files 4.88gb "
|
||||
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "WARNING: file L0.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
@ -79,7 +76,7 @@ async fn two_giant_files() {
|
|||
- "L0.2[100,100] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "WARNING: file L0.1[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L0.2[100,100] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 10485760000 parquet file bytes, limit is 268435456"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
|
||||
- "**** Final Output Files "
|
||||
- "L0, all files 4.88gb "
|
||||
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
|
||||
|
@ -90,6 +87,48 @@ async fn two_giant_files() {
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn two_giant_files_time_range_1() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let setup = layout_setup_builder().await.build().await;
|
||||
|
||||
// This has two large overlapping files that the compactor can't
|
||||
// split as they have a single timestamp. The compactor should not
|
||||
// panic
|
||||
for _ in 0..2 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(100)
|
||||
.with_max_time(101)
|
||||
.with_file_size_bytes(5 * 1000 * ONE_MB)
|
||||
.with_compaction_level(CompactionLevel::Initial),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
insta::assert_yaml_snapshot!(
|
||||
run_layout_scenario(&setup).await,
|
||||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0, all files 4.88gb "
|
||||
- "L0.1[100,101] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,101] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "WARNING: file L0.1[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L0.2[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
|
||||
- "**** Final Output Files "
|
||||
- "L0, all files 4.88gb "
|
||||
- "L0.1[100,101] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,101] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "WARNING: file L0.1[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
- "WARNING: file L0.2[100,101] 4.88gb exceeds soft limit 100mb by more than 50%"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn many_medium_files() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
@ -138,29 +177,126 @@ async fn many_medium_files() {
|
|||
- "L0.18[100,100] |------------------------------------L0.18-------------------------------------|"
|
||||
- "L0.19[100,100] |------------------------------------L0.19-------------------------------------|"
|
||||
- "L0.20[100,100] |------------------------------------L0.20-------------------------------------|"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has 629145600 parquet file bytes, limit is 268435456"
|
||||
- "**** Final Output Files "
|
||||
- "**** Simulation run 0, type=compact. 8 Input Files, 240mb total:"
|
||||
- "L0, all files 30mb "
|
||||
- "L0.1[100,100] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,100] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.3[100,100] |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.4[100,100] |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.5[100,100] |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.6[100,100] |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.7[100,100] |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.8[100,100] |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.9[100,100] |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.10[100,100] |------------------------------------L0.10-------------------------------------|"
|
||||
- "L0.11[100,100] |------------------------------------L0.11-------------------------------------|"
|
||||
- "L0.12[100,100] |------------------------------------L0.12-------------------------------------|"
|
||||
- "L0.13[100,100] |------------------------------------L0.13-------------------------------------|"
|
||||
- "L0.14[100,100] |------------------------------------L0.14-------------------------------------|"
|
||||
- "L0.15[100,100] |------------------------------------L0.15-------------------------------------|"
|
||||
- "L0.16[100,100] |------------------------------------L0.16-------------------------------------|"
|
||||
- "L0.17[100,100] |------------------------------------L0.17-------------------------------------|"
|
||||
- "L0.18[100,100] |------------------------------------L0.18-------------------------------------|"
|
||||
- "L0.19[100,100] |------------------------------------L0.19-------------------------------------|"
|
||||
- "L0.20[100,100] |------------------------------------L0.20-------------------------------------|"
|
||||
- "L0.19[100,100] |------------------------------------L0.19-------------------------------------|"
|
||||
- "L0.18[100,100] |------------------------------------L0.18-------------------------------------|"
|
||||
- "L0.17[100,100] |------------------------------------L0.17-------------------------------------|"
|
||||
- "L0.16[100,100] |------------------------------------L0.16-------------------------------------|"
|
||||
- "L0.15[100,100] |------------------------------------L0.15-------------------------------------|"
|
||||
- "L0.14[100,100] |------------------------------------L0.14-------------------------------------|"
|
||||
- "L0.13[100,100] |------------------------------------L0.13-------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 240mb total:"
|
||||
- "L1, all files 240mb "
|
||||
- "L1.?[100,100] |-------------------------------------L1.?-------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 8 files: L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
|
||||
- " Creating 1 files at level CompactionLevel::L1"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
|
||||
- "**** Final Output Files "
|
||||
- "L0 "
|
||||
- "L0.1[100,100] 30mb |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,100] 30mb |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.3[100,100] 30mb |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.4[100,100] 30mb |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.5[100,100] 30mb |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.6[100,100] 30mb |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.7[100,100] 30mb |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.8[100,100] 30mb |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.9[100,100] 30mb |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.10[100,100] 30mb |------------------------------------L0.10-------------------------------------|"
|
||||
- "L0.11[100,100] 30mb |------------------------------------L0.11-------------------------------------|"
|
||||
- "L0.12[100,100] 30mb |------------------------------------L0.12-------------------------------------|"
|
||||
- "L1 "
|
||||
- "L1.21[100,100] 240mb|------------------------------------L1.21-------------------------------------|"
|
||||
- "WARNING: file L1.21[100,100] 240mb exceeds soft limit 100mb by more than 50%"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn many_medium_files_time_range_1() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let setup = layout_setup_builder().await.build().await;
|
||||
|
||||
// The compactor has 20 files overlapping files with a single
|
||||
// timestamp that indivdually are small enough to be processed,
|
||||
// but when compacted together are too large and can't be split by
|
||||
// timestamp
|
||||
for _ in 0..20 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(100)
|
||||
.with_max_time(101)
|
||||
.with_file_size_bytes(30 * ONE_MB)
|
||||
.with_compaction_level(CompactionLevel::Initial),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
insta::assert_yaml_snapshot!(
|
||||
run_layout_scenario(&setup).await,
|
||||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0, all files 30mb "
|
||||
- "L0.1[100,101] |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,101] |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.3[100,101] |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.4[100,101] |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.5[100,101] |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.6[100,101] |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.7[100,101] |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.8[100,101] |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.9[100,101] |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.10[100,101] |------------------------------------L0.10-------------------------------------|"
|
||||
- "L0.11[100,101] |------------------------------------L0.11-------------------------------------|"
|
||||
- "L0.12[100,101] |------------------------------------L0.12-------------------------------------|"
|
||||
- "L0.13[100,101] |------------------------------------L0.13-------------------------------------|"
|
||||
- "L0.14[100,101] |------------------------------------L0.14-------------------------------------|"
|
||||
- "L0.15[100,101] |------------------------------------L0.15-------------------------------------|"
|
||||
- "L0.16[100,101] |------------------------------------L0.16-------------------------------------|"
|
||||
- "L0.17[100,101] |------------------------------------L0.17-------------------------------------|"
|
||||
- "L0.18[100,101] |------------------------------------L0.18-------------------------------------|"
|
||||
- "L0.19[100,101] |------------------------------------L0.19-------------------------------------|"
|
||||
- "L0.20[100,101] |------------------------------------L0.20-------------------------------------|"
|
||||
- "**** Simulation run 0, type=compact. 8 Input Files, 240mb total:"
|
||||
- "L0, all files 30mb "
|
||||
- "L0.20[100,101] |------------------------------------L0.20-------------------------------------|"
|
||||
- "L0.19[100,101] |------------------------------------L0.19-------------------------------------|"
|
||||
- "L0.18[100,101] |------------------------------------L0.18-------------------------------------|"
|
||||
- "L0.17[100,101] |------------------------------------L0.17-------------------------------------|"
|
||||
- "L0.16[100,101] |------------------------------------L0.16-------------------------------------|"
|
||||
- "L0.15[100,101] |------------------------------------L0.15-------------------------------------|"
|
||||
- "L0.14[100,101] |------------------------------------L0.14-------------------------------------|"
|
||||
- "L0.13[100,101] |------------------------------------L0.13-------------------------------------|"
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 240mb total:"
|
||||
- "L1, all files 240mb "
|
||||
- "L1.?[100,101] |-------------------------------------L1.?-------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 8 files: L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
|
||||
- " Creating 1 files at level CompactionLevel::L1"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
|
||||
- "**** Final Output Files "
|
||||
- "L0 "
|
||||
- "L0.1[100,101] 30mb |-------------------------------------L0.1-------------------------------------|"
|
||||
- "L0.2[100,101] 30mb |-------------------------------------L0.2-------------------------------------|"
|
||||
- "L0.3[100,101] 30mb |-------------------------------------L0.3-------------------------------------|"
|
||||
- "L0.4[100,101] 30mb |-------------------------------------L0.4-------------------------------------|"
|
||||
- "L0.5[100,101] 30mb |-------------------------------------L0.5-------------------------------------|"
|
||||
- "L0.6[100,101] 30mb |-------------------------------------L0.6-------------------------------------|"
|
||||
- "L0.7[100,101] 30mb |-------------------------------------L0.7-------------------------------------|"
|
||||
- "L0.8[100,101] 30mb |-------------------------------------L0.8-------------------------------------|"
|
||||
- "L0.9[100,101] 30mb |-------------------------------------L0.9-------------------------------------|"
|
||||
- "L0.10[100,101] 30mb |------------------------------------L0.10-------------------------------------|"
|
||||
- "L0.11[100,101] 30mb |------------------------------------L0.11-------------------------------------|"
|
||||
- "L0.12[100,101] 30mb |------------------------------------L0.12-------------------------------------|"
|
||||
- "L1 "
|
||||
- "L1.21[100,101] 240mb|------------------------------------L1.21-------------------------------------|"
|
||||
- "WARNING: file L1.21[100,101] 240mb exceeds soft limit 100mb by more than 50%"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
@ -240,12 +376,11 @@ async fn many_small_files() {
|
|||
- "Committing partition 1:"
|
||||
- " Soft Deleting 20 files: L0.1, L0.2, L0.3, L0.4, L0.5, L0.6, L0.7, L0.8, L0.9, L0.10, L0.11, L0.12, L0.13, L0.14, L0.15, L0.16, L0.17, L0.18, L0.19, L0.20"
|
||||
- " Creating 1 files at level CompactionLevel::L1"
|
||||
- "Committing partition 1:"
|
||||
- " Upgrading 1 files level to CompactionLevel::L2: L1.21"
|
||||
- "SKIPPED COMPACTION for PartitionId(1): partition 1 has overlapped files that exceed max compact size limit 268435456. The may happen if a large amount of data has the same timestamp"
|
||||
- "**** Final Output Files "
|
||||
- "L2, all files 200mb "
|
||||
- "L2.21[100,100] |------------------------------------L2.21-------------------------------------|"
|
||||
- "WARNING: file L2.21[100,100] 200mb exceeds soft limit 100mb by more than 50%"
|
||||
- "L1, all files 200mb "
|
||||
- "L1.21[100,100] |------------------------------------L1.21-------------------------------------|"
|
||||
- "WARNING: file L1.21[100,100] 200mb exceeds soft limit 100mb by more than 50%"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ use iox_tests::{
|
|||
ParquetFileBuilder, TestCatalog, TestNamespace, TestParquetFileBuilder, TestPartition,
|
||||
TestShard, TestTable,
|
||||
};
|
||||
use iox_time::{Time, TimeProvider};
|
||||
use iox_time::{MockProvider, Time, TimeProvider};
|
||||
use object_store::{path::Path, DynObjectStore};
|
||||
use parquet_file::storage::{ParquetStorage, StorageId};
|
||||
use schema::sort::SortKey;
|
||||
|
@ -393,7 +393,8 @@ impl TestSetupBuilder<false> {
|
|||
.with_line_protocol(&lp)
|
||||
.with_creation_time(Time::from_timestamp_nanos(time.time_3_minutes_future))
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(time.time_3_minutes_future))
|
||||
.with_min_time(35000)
|
||||
// the file includes one row of data. min_time and max_time are the same
|
||||
.with_min_time(36000)
|
||||
.with_max_time(36000)
|
||||
.with_compaction_level(CompactionLevel::Final);
|
||||
let l2_3 = self.partition.create_parquet_file(builder).await.into();
|
||||
|
@ -421,7 +422,7 @@ impl TestSetupBuilder<false> {
|
|||
.with_line_protocol(&lp)
|
||||
.with_creation_time(Time::from_timestamp_nanos(time.time_4_minutes_future))
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(time.time_4_minutes_future))
|
||||
.with_min_time(0)
|
||||
.with_min_time(6000)
|
||||
.with_max_time(68000)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped);
|
||||
let l1_1 = self.partition.create_parquet_file(builder).await.into();
|
||||
|
@ -458,7 +459,7 @@ impl TestSetupBuilder<false> {
|
|||
.with_line_protocol(&lp)
|
||||
.with_creation_time(Time::from_timestamp_nanos(time.time_5_minutes_future))
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(time.time_4_minutes_future))
|
||||
.with_min_time(8000)
|
||||
.with_min_time(6000)
|
||||
.with_max_time(25000)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped);
|
||||
let l1_1 = self.partition.create_parquet_file(builder).await.into();
|
||||
|
@ -975,20 +976,26 @@ pub fn create_l2_files() -> Vec<ParquetFile> {
|
|||
/// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
/// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
pub fn create_overlapped_l0_l1_files(size: i64) -> Vec<ParquetFile> {
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
|
||||
let time = TestTimes::new(&time_provider);
|
||||
|
||||
let l1_1 = ParquetFileBuilder::new(11)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(250, 350)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
let l1_2 = ParquetFileBuilder::new(12)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(400, 500)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
let l1_3 = ParquetFileBuilder::new(13)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(600, 700)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
|
||||
// L0_1 overlaps with L1_2 and L1_3
|
||||
|
@ -996,24 +1003,131 @@ pub fn create_overlapped_l0_l1_files(size: i64) -> Vec<ParquetFile> {
|
|||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_time_range(450, 620)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_2_minutes_future)
|
||||
.build();
|
||||
// L0_2 overlaps with L1_3
|
||||
let l0_2 = ParquetFileBuilder::new(2)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_time_range(650, 750)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_3_minutes_future)
|
||||
.build();
|
||||
// L0_3 overlaps with nothing
|
||||
let l0_3 = ParquetFileBuilder::new(3)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_time_range(800, 900)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_5_minutes_future)
|
||||
.build();
|
||||
|
||||
// Put the files in random order
|
||||
vec![l1_3, l1_2, l0_2, l1_1, l0_1, l0_3]
|
||||
}
|
||||
|
||||
/// This setup will return files with ranges as follows:
|
||||
/// |--L1.1--| |--L1.2--|
|
||||
/// |--L0.1--| |--L0.2--| |--L0.3--|
|
||||
pub fn create_overlapped_l0_l1_files_2(size: i64) -> Vec<ParquetFile> {
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
|
||||
let time = TestTimes::new(&time_provider);
|
||||
|
||||
let l1_1 = ParquetFileBuilder::new(12)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(400, 500)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
let l1_2 = ParquetFileBuilder::new(13)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(600, 700)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
|
||||
// L0_1 overlaps with L1_1 and L1_2
|
||||
let l0_1 = ParquetFileBuilder::new(1)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_time_range(450, 620)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_2_minutes_future)
|
||||
.build();
|
||||
// L0_2 overlaps with L1_2
|
||||
let l0_2 = ParquetFileBuilder::new(2)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_time_range(650, 750)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_3_minutes_future)
|
||||
.build();
|
||||
// L0_3 overlaps with nothing
|
||||
let l0_3 = ParquetFileBuilder::new(3)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_time_range(800, 900)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_5_minutes_future)
|
||||
.build();
|
||||
|
||||
// Put the files in random order
|
||||
vec![l1_2, l0_2, l1_1, l0_1, l0_3]
|
||||
}
|
||||
|
||||
/// This setup will return files with ranges as follows:
|
||||
/// |--L1.1--| |--L1.2--| |--L1.3--| : target_level files
|
||||
/// |--L0.1--| |--L0.3--| |--L0.2--| : start_level files
|
||||
/// Note that L0.2 is created before L0.3 but has later time range
|
||||
pub fn create_overlapped_start_target_files(
|
||||
size: i64,
|
||||
start_level: CompactionLevel,
|
||||
) -> Vec<ParquetFile> {
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp(0, 0).unwrap()));
|
||||
let time = TestTimes::new(&time_provider);
|
||||
|
||||
let target_level = start_level.next();
|
||||
|
||||
let l1_1 = ParquetFileBuilder::new(11)
|
||||
.with_compaction_level(target_level)
|
||||
.with_time_range(100, 200)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
let l1_2 = ParquetFileBuilder::new(12)
|
||||
.with_compaction_level(target_level)
|
||||
.with_time_range(300, 400)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
let l1_3 = ParquetFileBuilder::new(13)
|
||||
.with_compaction_level(target_level)
|
||||
.with_time_range(500, 600)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_1_minute_future)
|
||||
.build();
|
||||
|
||||
// L0_1 overlaps with L1_1
|
||||
let l0_1 = ParquetFileBuilder::new(1)
|
||||
.with_compaction_level(start_level)
|
||||
.with_time_range(150, 250)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_2_minutes_future)
|
||||
.build();
|
||||
// L0_2 overlaps L1_3
|
||||
let l0_2 = ParquetFileBuilder::new(2)
|
||||
.with_compaction_level(start_level)
|
||||
.with_time_range(550, 650)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_3_minutes_future)
|
||||
.build();
|
||||
// L0_3 overlaps with L1_2
|
||||
let l0_3 = ParquetFileBuilder::new(3)
|
||||
.with_compaction_level(start_level)
|
||||
.with_time_range(350, 450)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(time.time_5_minutes_future)
|
||||
.build();
|
||||
|
||||
// Put the files in random order
|
||||
vec![l1_2, l1_3, l0_2, l1_1, l0_1, l0_3]
|
||||
}
|
||||
|
||||
/// This setup will return files with ranges as follows:
|
||||
/// |--L2.1--| |--L2.2--|
|
||||
/// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
|
@ -1050,6 +1164,41 @@ pub fn create_overlapped_l1_l2_files(size: i64) -> Vec<ParquetFile> {
|
|||
vec![l1_3, l1_2, l2_1, l2_2, l1_1]
|
||||
}
|
||||
|
||||
/// This setup will return files with ranges as follows:
|
||||
/// |--L2.2--|
|
||||
/// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
pub fn create_overlapped_l1_l2_files_2(size: i64) -> Vec<ParquetFile> {
|
||||
let l2_2 = ParquetFileBuilder::new(22)
|
||||
.with_compaction_level(CompactionLevel::Final)
|
||||
.with_time_range(200, 300)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(1)
|
||||
.build();
|
||||
|
||||
// L1_1 overlaps with L2_1
|
||||
let l1_1 = ParquetFileBuilder::new(11)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(250, 350)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(2)
|
||||
.build();
|
||||
let l1_2 = ParquetFileBuilder::new(12)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(400, 500)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(3)
|
||||
.build();
|
||||
let l1_3 = ParquetFileBuilder::new(13)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_time_range(600, 700)
|
||||
.with_file_size_bytes(size)
|
||||
.with_max_l0_created_at(4)
|
||||
.build();
|
||||
|
||||
// Put the files in random order
|
||||
vec![l1_3, l1_2, l2_2, l1_1]
|
||||
}
|
||||
|
||||
/// This setup will return files with ranges as follows with mixed sizes:
|
||||
/// |--L2.1--| |--L2.2--|
|
||||
/// |--L1.1--| |--L1.2--| |--L1.3--|
|
||||
|
|
|
@ -12,7 +12,7 @@ use data_types::{
|
|||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use iox_time::Time;
|
||||
use observability_deps::tracing::{debug, info};
|
||||
use observability_deps::tracing::info;
|
||||
use uuid::Uuid;
|
||||
|
||||
use compactor2::{DynError, ParquetFilesSink, PartitionInfo, PlanIR};
|
||||
|
@ -312,7 +312,7 @@ fn even_time_split(
|
|||
// add the entry for the last bucket
|
||||
time_ranges.push((last_time, overall_max_time));
|
||||
|
||||
debug!(
|
||||
info!(
|
||||
?overall_min_time,
|
||||
?overall_max_time,
|
||||
?overall_time_range,
|
||||
|
|
|
@ -1223,6 +1223,11 @@ impl ParquetFile {
|
|||
pub fn overlaps(&self, other: &Self) -> bool {
|
||||
self.min_time <= other.max_time && self.max_time >= other.min_time
|
||||
}
|
||||
|
||||
/// Return true if the time range of this file overlaps with the given time range
|
||||
pub fn overlaps_time_range(&self, min_time: Timestamp, max_time: Timestamp) -> bool {
|
||||
self.min_time <= max_time && self.max_time >= min_time
|
||||
}
|
||||
}
|
||||
|
||||
/// Data for a parquet file to be inserted into the catalog.
|
||||
|
|
Loading…
Reference in New Issue