docs: Clarify comments on sort order of input/output of filtering
parent
cde0a94fd5
commit
ca4c5d65e7
|
@ -28,19 +28,19 @@ pub(crate) enum FilterResult {
|
|||
},
|
||||
}
|
||||
|
||||
/// Given a list of level N files sorted by max sequence number and a list of level N + 1 files for
|
||||
/// a partition, select a subset set of files that:
|
||||
/// Given a list of sorted level N files and a list of level N + 1 files for a partition, select a
|
||||
/// subset set of files that:
|
||||
///
|
||||
/// - Has a subset of the level N files selected, from the start of the sorted level N list
|
||||
/// - Has a total size less than `max_bytes`
|
||||
/// - Has only level N + 1 files that overlap in time with the level N files
|
||||
///
|
||||
/// The returned files will be ordered with the level N + 1 files first, then the level N files
|
||||
/// ordered in ascending order by their max sequence number.
|
||||
/// in the same order as they were in the input.
|
||||
pub(crate) fn filter_parquet_files(
|
||||
// partition of the parquet files
|
||||
partition: Arc<PartitionCompactionCandidateWithInfo>,
|
||||
// Level N files sorted by max sequence number
|
||||
// Level N files, sorted
|
||||
level_n: Vec<CompactorParquetFile>,
|
||||
// Level N + 1 files
|
||||
level_n_plus_1: Vec<CompactorParquetFile>,
|
||||
|
@ -67,7 +67,7 @@ pub(crate) fn filter_parquet_files(
|
|||
}
|
||||
|
||||
fn filter_parquet_files_inner(
|
||||
// Level N files sorted by max sequence number
|
||||
// Level N files, sorted
|
||||
level_n: Vec<CompactorParquetFile>,
|
||||
// Level N + 1 files
|
||||
mut remaining_level_n_plus_1: Vec<CompactorParquetFile>,
|
||||
|
@ -95,8 +95,7 @@ fn filter_parquet_files_inner(
|
|||
let num_level_n_plus_1_considering = remaining_level_n_plus_1.len();
|
||||
|
||||
// This will start by holding the level N + 1 files that are found to overlap an included level
|
||||
// N file. At the end of this function, the level N files are added to the end so they are
|
||||
// sorted last for deduplication purposes.
|
||||
// N file. At the end of this function, the level N files are added to the end.
|
||||
let mut files_to_return = Vec::with_capacity(level_n.len() + remaining_level_n_plus_1.len());
|
||||
// Estimated memory bytes needed to compact returned LN+1 files
|
||||
let mut ln_plus_1_estimated_budget =
|
||||
|
@ -199,8 +198,8 @@ fn filter_parquet_files_inner(
|
|||
ln_plus_1_estimated_budget,
|
||||
);
|
||||
|
||||
// Return the level N+1 files first, followed by the level N files, assuming we've maintained
|
||||
// their ordering by max sequence number.
|
||||
// Return the level N+1 files first, followed by the level N files. The order is arbitrary;
|
||||
// ordering for deduplication happens using `QueryChunk.order`.
|
||||
files_to_return.extend(level_n_to_return);
|
||||
|
||||
FilterResult::Proceed {
|
||||
|
|
Loading…
Reference in New Issue