fix: a silly bug that did not capture file limit if a lot of L0 files and very few or non overlapped L1 (#5736)
parent
c4542d6b21
commit
b11da1d98b
|
@ -141,8 +141,12 @@ fn filter_parquet_files_inner(
|
||||||
ln_estimated_file_bytes + current_ln_plus_1_estimated_file_bytes.iter().sum::<u64>();
|
ln_estimated_file_bytes + current_ln_plus_1_estimated_file_bytes.iter().sum::<u64>();
|
||||||
|
|
||||||
// Over limit of num files
|
// Over limit of num files
|
||||||
if files_to_return.len() + 1 /* LN file */ + overlaps.len() > max_num_files {
|
// At this stage files_to_return only includes LN+1 files. To get both returning LN+1 and LN files,
|
||||||
if files_to_return.is_empty() {
|
// we need to consider both files_to_return and level_n_to_return
|
||||||
|
if files_to_return.len() + level_n_to_return.len() + 1 /* LN file */ + overlaps.len()
|
||||||
|
> max_num_files
|
||||||
|
{
|
||||||
|
if files_to_return.is_empty() && level_n_to_return.is_empty() {
|
||||||
// Cannot compact this partition because its first set of overlapped files
|
// Cannot compact this partition because its first set of overlapped files
|
||||||
// exceed the file limit
|
// exceed the file limit
|
||||||
return FilterResult::OverLimitFileNum {
|
return FilterResult::OverLimitFileNum {
|
||||||
|
@ -662,6 +666,75 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bug: https://github.com/influxdata/conductor/issues/1178
|
||||||
|
#[test]
|
||||||
|
fn file_num_limit_3_five_l0_one_l1_files() {
|
||||||
|
let level_0 = vec![
|
||||||
|
// Level 0 files that overlap in time slightly.
|
||||||
|
ParquetFileBuilder::level_0()
|
||||||
|
.id(1)
|
||||||
|
.min_time(200)
|
||||||
|
.max_time(300)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
ParquetFileBuilder::level_0()
|
||||||
|
.id(2)
|
||||||
|
.min_time(280)
|
||||||
|
.max_time(310)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
ParquetFileBuilder::level_0()
|
||||||
|
.id(3)
|
||||||
|
.min_time(309)
|
||||||
|
.max_time(350)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
ParquetFileBuilder::level_0()
|
||||||
|
.id(4)
|
||||||
|
.min_time(309)
|
||||||
|
.max_time(350)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
ParquetFileBuilder::level_0()
|
||||||
|
.id(5)
|
||||||
|
.min_time(309)
|
||||||
|
.max_time(350)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let level_1 = vec![
|
||||||
|
// overlaps in time with first and second L0s
|
||||||
|
ParquetFileBuilder::level_1()
|
||||||
|
.id(101)
|
||||||
|
.min_time(150)
|
||||||
|
.max_time(250)
|
||||||
|
.build(),
|
||||||
|
];
|
||||||
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
|
let filter_result = filter_parquet_files_inner(
|
||||||
|
level_0,
|
||||||
|
level_1,
|
||||||
|
MEMORY_BUDGET,
|
||||||
|
3,
|
||||||
|
&files_metric,
|
||||||
|
&bytes_metric,
|
||||||
|
);
|
||||||
|
|
||||||
|
// should include 3 files: first 2 L0s (id=1 and id=2) and sthe L1 (id=101)
|
||||||
|
assert!(
|
||||||
|
matches!(
|
||||||
|
&filter_result,
|
||||||
|
FilterResult::Proceed { files, budget_bytes }
|
||||||
|
if files.len() == 3
|
||||||
|
&& files.iter().map(|f| f.id().get()).collect::<Vec<_>>() == [101, 1, 2]
|
||||||
|
&& *budget_bytes == 3 * 1176
|
||||||
|
),
|
||||||
|
"Match failed, got: {filter_result:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn large_budget_returns_one_level_0_file_and_its_level_1_overlaps() {
|
fn large_budget_returns_one_level_0_file_and_its_level_1_overlaps() {
|
||||||
let level_0 = vec![ParquetFileBuilder::level_0()
|
let level_0 = vec![ParquetFileBuilder::level_0()
|
||||||
|
|
Loading…
Reference in New Issue