feat: Make filter_parquet_files more general with regards to compaction level

pull/24376/head
Carol (Nichols || Goulding) 2022-09-15 12:46:59 -04:00
parent 9b99af08e4
commit e05657e8a4
No known key found for this signature in database
GPG Key ID: E907EE5A736F87D4
5 changed files with 645 additions and 425 deletions

View File

@ -384,26 +384,33 @@ mod tests {
// ------------------------------------------------
// Compact
let mut candidates = compactor
let mut partition_candidates = compactor
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
.await
.unwrap();
assert_eq!(candidates.len(), 1);
let c = candidates.pop().unwrap();
assert_eq!(partition_candidates.len(), 1);
let partition = partition_candidates.pop().unwrap();
let parquet_files_for_compaction =
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
Arc::clone(&compactor.catalog),
Arc::clone(&c),
Arc::clone(&partition),
&size_overrides,
)
.await
.unwrap();
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files(
c,
parquet_files_for_compaction,
partition,
level_0,
level_1,
compactor.config.memory_budget_bytes,
&compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes,
@ -567,26 +574,33 @@ mod tests {
// ------------------------------------------------
// Compact
let mut candidates = compactor
let mut partition_candidates = compactor
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
.await
.unwrap();
assert_eq!(candidates.len(), 1);
let c = candidates.pop().unwrap();
assert_eq!(partition_candidates.len(), 1);
let partition = partition_candidates.pop().unwrap();
let parquet_files_for_compaction =
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
Arc::clone(&compactor.catalog),
Arc::clone(&c),
Arc::clone(&partition),
&size_overrides,
)
.await
.unwrap();
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files(
Arc::clone(&c),
parquet_files_for_compaction,
Arc::clone(&partition),
level_0,
level_1,
compactor.config.memory_budget_bytes,
&compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes,
@ -656,7 +670,7 @@ mod tests {
);
// Full compaction will now combine the two level 1 files into one level 2 file
full_compaction(&compactor, c, &size_overrides)
full_compaction(&compactor, partition, &size_overrides)
.await
.unwrap();

View File

@ -77,6 +77,7 @@ mod tests {
handler::CompactorConfig,
parquet_file_filtering, parquet_file_lookup,
tests::{test_setup, TestSetup},
ParquetFilesForCompaction,
};
use arrow_util::assert_batches_sorted_eq;
use backoff::BackoffConfig;
@ -485,7 +486,7 @@ mod tests {
// ------------------------------------------------
// Compact
let mut candidates = compactor
let mut partition_candidates = compactor
.hot_partitions_to_compact(
compactor.config.max_number_partitions_per_shard,
compactor
@ -495,21 +496,28 @@ mod tests {
.await
.unwrap();
assert_eq!(candidates.len(), 1);
let c = candidates.pop().unwrap();
assert_eq!(partition_candidates.len(), 1);
let partition = partition_candidates.pop().unwrap();
let parquet_files_for_compaction =
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
Arc::clone(&compactor.catalog),
Arc::clone(&c),
Arc::clone(&partition),
&size_overrides,
)
.await
.unwrap();
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files(
c,
parquet_files_for_compaction,
partition,
level_0,
level_1,
compactor.config.memory_budget_bytes,
&compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes,

View File

@ -27,6 +27,7 @@ use crate::{
compact::{Compactor, PartitionCompactionCandidateWithInfo},
parquet_file::CompactorParquetFile,
parquet_file_filtering::{FilterResult, FilteredFiles},
parquet_file_lookup::ParquetFilesForCompaction,
};
use data_types::CompactionLevel;
use metric::Attributes;
@ -107,9 +108,16 @@ async fn compact_candidates_with_memory_budget<C, Fut>(
Ok(parquet_files_for_compaction) => {
// Return only files under the `remaining_budget_bytes` that should be
// compacted
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files(
Arc::clone(&partition),
parquet_files_for_compaction,
level_0,
level_1,
remaining_budget_bytes,
&compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes,

File diff suppressed because it is too large Load Diff

View File

@ -53,11 +53,24 @@ impl TryFrom<i32> for CompactionLevel {
match value {
x if x == Self::Initial as i32 => Ok(Self::Initial),
x if x == Self::FileNonOverlapped as i32 => Ok(Self::FileNonOverlapped),
x if x == Self::Final as i32 => Ok(Self::Final),
_ => Err("invalid compaction level value".into()),
}
}
}
impl CompactionLevel {
/// When compacting files of this level, provide the level that the resulting file should be.
/// Does not exceed the maximum available level.
pub fn next(&self) -> Self {
match self {
Self::Initial => Self::FileNonOverlapped,
Self::FileNonOverlapped => Self::Final,
_ => Self::Final,
}
}
}
/// Unique ID for a `Namespace`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]