docs: Update comment to what we'd like this code to do

pull/24376/head
Carol (Nichols || Goulding) 2022-09-15 11:38:45 -04:00
parent 69c9e7b5ff
commit dc64e494bd
No known key found for this signature in database
GPG Key ID: E907EE5A736F87D4
2 changed files with 9 additions and 6 deletions

View File

@ -94,10 +94,13 @@ pub(crate) enum Error {
/// Given a partition that needs to have full compaction run,
///
/// - Select all files in the partition, which this method assumes will only be level 1
/// without overlaps (any level 0 and level 2 files passed into this function will be ignored)
/// - Split the files into groups based on size: take files in the list until the current group size
/// is greater than max_desired_file_size_bytes
/// - Select all level 1 and level 2 files in the partition.
/// - This method assumes the level 1 files don't overlap with each other but might overlap with
/// existing level 2 files.
/// - Any level 0 files will be ignored.
/// - Sort the level 1 files by max_sequence_number.
/// - Take level 1 files with any overlapping level 2 files in the list until the current group
/// size exceeds the memory budget or the max_desired_file_size_bytes
/// - Compact each group into a new level 2 file, no splitting
///
/// Uses a hashmap of size overrides to allow mocking of file sizes.

View File

@ -269,8 +269,8 @@ pub(crate) async fn compact_parquet_files(
}
/// Compact all files given, no matter their size, into one level 2 file. When this is called by
/// `full_compaction`, it should only receive a group of level 1 files that has already been
/// selected to be an appropriate size.
/// `full_compaction`, it should only receive a group of level 1 and level 2 files that has already
/// been selected to be an appropriate size.
#[allow(clippy::too_many_arguments)]
pub(crate) async fn compact_final_no_splits(
files: Vec<CompactorParquetFile>,