fix: a silly bug that did not capture file limit if a lot of L0 files and very few or non overlapped L1 (#5736)

pull/24376/head
Nga Tran 2022-09-23 17:03:29 -04:00 committed by GitHub
parent c4542d6b21
commit b11da1d98b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 75 additions and 2 deletions

View File

@ -141,8 +141,12 @@ fn filter_parquet_files_inner(
ln_estimated_file_bytes + current_ln_plus_1_estimated_file_bytes.iter().sum::<u64>();
// Over limit of num files
if files_to_return.len() + 1 /* LN file */ + overlaps.len() > max_num_files {
if files_to_return.is_empty() {
// At this stage files_to_return only includes LN+1 files. To get both returning LN+1 and LN files,
// we need to consider both files_to_return and level_n_to_return
if files_to_return.len() + level_n_to_return.len() + 1 /* LN file */ + overlaps.len()
> max_num_files
{
if files_to_return.is_empty() && level_n_to_return.is_empty() {
// Cannot compact this partition because its first set of overlapped files
// exceed the file limit
return FilterResult::OverLimitFileNum {
@ -662,6 +666,75 @@ mod tests {
);
}
// bug: https://github.com/influxdata/conductor/issues/1178
#[test]
fn file_num_limit_3_five_l0_one_l1_files() {
let level_0 = vec![
// Level 0 files that overlap in time slightly.
ParquetFileBuilder::level_0()
.id(1)
.min_time(200)
.max_time(300)
.file_size_bytes(10)
.build(),
ParquetFileBuilder::level_0()
.id(2)
.min_time(280)
.max_time(310)
.file_size_bytes(10)
.build(),
ParquetFileBuilder::level_0()
.id(3)
.min_time(309)
.max_time(350)
.file_size_bytes(10)
.build(),
ParquetFileBuilder::level_0()
.id(4)
.min_time(309)
.max_time(350)
.file_size_bytes(10)
.build(),
ParquetFileBuilder::level_0()
.id(5)
.min_time(309)
.max_time(350)
.file_size_bytes(10)
.build(),
];
let level_1 = vec![
// overlaps in time with first and second L0s
ParquetFileBuilder::level_1()
.id(101)
.min_time(150)
.max_time(250)
.build(),
];
let (files_metric, bytes_metric) = metrics();
let filter_result = filter_parquet_files_inner(
level_0,
level_1,
MEMORY_BUDGET,
3,
&files_metric,
&bytes_metric,
);
// should include 3 files: first 2 L0s (id=1 and id=2) and sthe L1 (id=101)
assert!(
matches!(
&filter_result,
FilterResult::Proceed { files, budget_bytes }
if files.len() == 3
&& files.iter().map(|f| f.id().get()).collect::<Vec<_>>() == [101, 1, 2]
&& *budget_bytes == 3 * 1176
),
"Match failed, got: {filter_result:?}"
);
}
#[test]
fn large_budget_returns_one_level_0_file_and_its_level_1_overlaps() {
let level_0 = vec![ParquetFileBuilder::level_0()