feat: Make filter_parquet_files more general with regards to compaction level

pull/24376/head
Carol (Nichols || Goulding) 2022-09-15 12:46:59 -04:00
parent 9b99af08e4
commit e05657e8a4
No known key found for this signature in database
GPG Key ID: E907EE5A736F87D4
5 changed files with 645 additions and 425 deletions

View File

@ -384,26 +384,33 @@ mod tests {
// ------------------------------------------------ // ------------------------------------------------
// Compact // Compact
let mut candidates = compactor let mut partition_candidates = compactor
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard) .cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
.await .await
.unwrap(); .unwrap();
assert_eq!(candidates.len(), 1); assert_eq!(partition_candidates.len(), 1);
let c = candidates.pop().unwrap(); let partition = partition_candidates.pop().unwrap();
let parquet_files_for_compaction = let parquet_files_for_compaction =
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides( parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
Arc::clone(&compactor.catalog), Arc::clone(&compactor.catalog),
Arc::clone(&c), Arc::clone(&partition),
&size_overrides, &size_overrides,
) )
.await .await
.unwrap(); .unwrap();
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files( let to_compact = parquet_file_filtering::filter_parquet_files(
c, partition,
parquet_files_for_compaction, level_0,
level_1,
compactor.config.memory_budget_bytes, compactor.config.memory_budget_bytes,
&compactor.parquet_file_candidate_gauge, &compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes, &compactor.parquet_file_candidate_bytes,
@ -567,26 +574,33 @@ mod tests {
// ------------------------------------------------ // ------------------------------------------------
// Compact // Compact
let mut candidates = compactor let mut partition_candidates = compactor
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard) .cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
.await .await
.unwrap(); .unwrap();
assert_eq!(candidates.len(), 1); assert_eq!(partition_candidates.len(), 1);
let c = candidates.pop().unwrap(); let partition = partition_candidates.pop().unwrap();
let parquet_files_for_compaction = let parquet_files_for_compaction =
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides( parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
Arc::clone(&compactor.catalog), Arc::clone(&compactor.catalog),
Arc::clone(&c), Arc::clone(&partition),
&size_overrides, &size_overrides,
) )
.await .await
.unwrap(); .unwrap();
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files( let to_compact = parquet_file_filtering::filter_parquet_files(
Arc::clone(&c), Arc::clone(&partition),
parquet_files_for_compaction, level_0,
level_1,
compactor.config.memory_budget_bytes, compactor.config.memory_budget_bytes,
&compactor.parquet_file_candidate_gauge, &compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes, &compactor.parquet_file_candidate_bytes,
@ -656,7 +670,7 @@ mod tests {
); );
// Full compaction will now combine the two level 1 files into one level 2 file // Full compaction will now combine the two level 1 files into one level 2 file
full_compaction(&compactor, c, &size_overrides) full_compaction(&compactor, partition, &size_overrides)
.await .await
.unwrap(); .unwrap();

View File

@ -77,6 +77,7 @@ mod tests {
handler::CompactorConfig, handler::CompactorConfig,
parquet_file_filtering, parquet_file_lookup, parquet_file_filtering, parquet_file_lookup,
tests::{test_setup, TestSetup}, tests::{test_setup, TestSetup},
ParquetFilesForCompaction,
}; };
use arrow_util::assert_batches_sorted_eq; use arrow_util::assert_batches_sorted_eq;
use backoff::BackoffConfig; use backoff::BackoffConfig;
@ -485,7 +486,7 @@ mod tests {
// ------------------------------------------------ // ------------------------------------------------
// Compact // Compact
let mut candidates = compactor let mut partition_candidates = compactor
.hot_partitions_to_compact( .hot_partitions_to_compact(
compactor.config.max_number_partitions_per_shard, compactor.config.max_number_partitions_per_shard,
compactor compactor
@ -495,21 +496,28 @@ mod tests {
.await .await
.unwrap(); .unwrap();
assert_eq!(candidates.len(), 1); assert_eq!(partition_candidates.len(), 1);
let c = candidates.pop().unwrap(); let partition = partition_candidates.pop().unwrap();
let parquet_files_for_compaction = let parquet_files_for_compaction =
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides( parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
Arc::clone(&compactor.catalog), Arc::clone(&compactor.catalog),
Arc::clone(&c), Arc::clone(&partition),
&size_overrides, &size_overrides,
) )
.await .await
.unwrap(); .unwrap();
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files( let to_compact = parquet_file_filtering::filter_parquet_files(
c, partition,
parquet_files_for_compaction, level_0,
level_1,
compactor.config.memory_budget_bytes, compactor.config.memory_budget_bytes,
&compactor.parquet_file_candidate_gauge, &compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes, &compactor.parquet_file_candidate_bytes,

View File

@ -27,6 +27,7 @@ use crate::{
compact::{Compactor, PartitionCompactionCandidateWithInfo}, compact::{Compactor, PartitionCompactionCandidateWithInfo},
parquet_file::CompactorParquetFile, parquet_file::CompactorParquetFile,
parquet_file_filtering::{FilterResult, FilteredFiles}, parquet_file_filtering::{FilterResult, FilteredFiles},
parquet_file_lookup::ParquetFilesForCompaction,
}; };
use data_types::CompactionLevel; use data_types::CompactionLevel;
use metric::Attributes; use metric::Attributes;
@ -107,9 +108,16 @@ async fn compact_candidates_with_memory_budget<C, Fut>(
Ok(parquet_files_for_compaction) => { Ok(parquet_files_for_compaction) => {
// Return only files under the `remaining_budget_bytes` that should be // Return only files under the `remaining_budget_bytes` that should be
// compacted // compacted
let ParquetFilesForCompaction {
level_0,
level_1,
.. // Ignore other levels
} = parquet_files_for_compaction;
let to_compact = parquet_file_filtering::filter_parquet_files( let to_compact = parquet_file_filtering::filter_parquet_files(
Arc::clone(&partition), Arc::clone(&partition),
parquet_files_for_compaction, level_0,
level_1,
remaining_budget_bytes, remaining_budget_bytes,
&compactor.parquet_file_candidate_gauge, &compactor.parquet_file_candidate_gauge,
&compactor.parquet_file_candidate_bytes, &compactor.parquet_file_candidate_bytes,

File diff suppressed because it is too large Load Diff

View File

@ -53,11 +53,24 @@ impl TryFrom<i32> for CompactionLevel {
match value { match value {
x if x == Self::Initial as i32 => Ok(Self::Initial), x if x == Self::Initial as i32 => Ok(Self::Initial),
x if x == Self::FileNonOverlapped as i32 => Ok(Self::FileNonOverlapped), x if x == Self::FileNonOverlapped as i32 => Ok(Self::FileNonOverlapped),
x if x == Self::Final as i32 => Ok(Self::Final),
_ => Err("invalid compaction level value".into()), _ => Err("invalid compaction level value".into()),
} }
} }
} }
impl CompactionLevel {
/// When compacting files of this level, provide the level that the resulting file should be.
/// Does not exceed the maximum available level.
pub fn next(&self) -> Self {
match self {
Self::Initial => Self::FileNonOverlapped,
Self::FileNonOverlapped => Self::Final,
_ => Self::Final,
}
}
}
/// Unique ID for a `Namespace` /// Unique ID for a `Namespace`
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)] #[sqlx(transparent)]