feat: Make filter_parquet_files more general with regards to compaction level
parent
9b99af08e4
commit
e05657e8a4
|
@ -384,26 +384,33 @@ mod tests {
|
||||||
|
|
||||||
// ------------------------------------------------
|
// ------------------------------------------------
|
||||||
// Compact
|
// Compact
|
||||||
let mut candidates = compactor
|
let mut partition_candidates = compactor
|
||||||
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
|
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(candidates.len(), 1);
|
assert_eq!(partition_candidates.len(), 1);
|
||||||
let c = candidates.pop().unwrap();
|
let partition = partition_candidates.pop().unwrap();
|
||||||
|
|
||||||
let parquet_files_for_compaction =
|
let parquet_files_for_compaction =
|
||||||
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
|
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
|
||||||
Arc::clone(&compactor.catalog),
|
Arc::clone(&compactor.catalog),
|
||||||
Arc::clone(&c),
|
Arc::clone(&partition),
|
||||||
&size_overrides,
|
&size_overrides,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
let ParquetFilesForCompaction {
|
||||||
|
level_0,
|
||||||
|
level_1,
|
||||||
|
.. // Ignore other levels
|
||||||
|
} = parquet_files_for_compaction;
|
||||||
|
|
||||||
let to_compact = parquet_file_filtering::filter_parquet_files(
|
let to_compact = parquet_file_filtering::filter_parquet_files(
|
||||||
c,
|
partition,
|
||||||
parquet_files_for_compaction,
|
level_0,
|
||||||
|
level_1,
|
||||||
compactor.config.memory_budget_bytes,
|
compactor.config.memory_budget_bytes,
|
||||||
&compactor.parquet_file_candidate_gauge,
|
&compactor.parquet_file_candidate_gauge,
|
||||||
&compactor.parquet_file_candidate_bytes,
|
&compactor.parquet_file_candidate_bytes,
|
||||||
|
@ -567,26 +574,33 @@ mod tests {
|
||||||
|
|
||||||
// ------------------------------------------------
|
// ------------------------------------------------
|
||||||
// Compact
|
// Compact
|
||||||
let mut candidates = compactor
|
let mut partition_candidates = compactor
|
||||||
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
|
.cold_partitions_to_compact(compactor.config.max_number_partitions_per_shard)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(candidates.len(), 1);
|
assert_eq!(partition_candidates.len(), 1);
|
||||||
let c = candidates.pop().unwrap();
|
let partition = partition_candidates.pop().unwrap();
|
||||||
|
|
||||||
let parquet_files_for_compaction =
|
let parquet_files_for_compaction =
|
||||||
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
|
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
|
||||||
Arc::clone(&compactor.catalog),
|
Arc::clone(&compactor.catalog),
|
||||||
Arc::clone(&c),
|
Arc::clone(&partition),
|
||||||
&size_overrides,
|
&size_overrides,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
let ParquetFilesForCompaction {
|
||||||
|
level_0,
|
||||||
|
level_1,
|
||||||
|
.. // Ignore other levels
|
||||||
|
} = parquet_files_for_compaction;
|
||||||
|
|
||||||
let to_compact = parquet_file_filtering::filter_parquet_files(
|
let to_compact = parquet_file_filtering::filter_parquet_files(
|
||||||
Arc::clone(&c),
|
Arc::clone(&partition),
|
||||||
parquet_files_for_compaction,
|
level_0,
|
||||||
|
level_1,
|
||||||
compactor.config.memory_budget_bytes,
|
compactor.config.memory_budget_bytes,
|
||||||
&compactor.parquet_file_candidate_gauge,
|
&compactor.parquet_file_candidate_gauge,
|
||||||
&compactor.parquet_file_candidate_bytes,
|
&compactor.parquet_file_candidate_bytes,
|
||||||
|
@ -656,7 +670,7 @@ mod tests {
|
||||||
);
|
);
|
||||||
|
|
||||||
// Full compaction will now combine the two level 1 files into one level 2 file
|
// Full compaction will now combine the two level 1 files into one level 2 file
|
||||||
full_compaction(&compactor, c, &size_overrides)
|
full_compaction(&compactor, partition, &size_overrides)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,7 @@ mod tests {
|
||||||
handler::CompactorConfig,
|
handler::CompactorConfig,
|
||||||
parquet_file_filtering, parquet_file_lookup,
|
parquet_file_filtering, parquet_file_lookup,
|
||||||
tests::{test_setup, TestSetup},
|
tests::{test_setup, TestSetup},
|
||||||
|
ParquetFilesForCompaction,
|
||||||
};
|
};
|
||||||
use arrow_util::assert_batches_sorted_eq;
|
use arrow_util::assert_batches_sorted_eq;
|
||||||
use backoff::BackoffConfig;
|
use backoff::BackoffConfig;
|
||||||
|
@ -485,7 +486,7 @@ mod tests {
|
||||||
|
|
||||||
// ------------------------------------------------
|
// ------------------------------------------------
|
||||||
// Compact
|
// Compact
|
||||||
let mut candidates = compactor
|
let mut partition_candidates = compactor
|
||||||
.hot_partitions_to_compact(
|
.hot_partitions_to_compact(
|
||||||
compactor.config.max_number_partitions_per_shard,
|
compactor.config.max_number_partitions_per_shard,
|
||||||
compactor
|
compactor
|
||||||
|
@ -495,21 +496,28 @@ mod tests {
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(candidates.len(), 1);
|
assert_eq!(partition_candidates.len(), 1);
|
||||||
let c = candidates.pop().unwrap();
|
let partition = partition_candidates.pop().unwrap();
|
||||||
|
|
||||||
let parquet_files_for_compaction =
|
let parquet_files_for_compaction =
|
||||||
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
|
parquet_file_lookup::ParquetFilesForCompaction::for_partition_with_size_overrides(
|
||||||
Arc::clone(&compactor.catalog),
|
Arc::clone(&compactor.catalog),
|
||||||
Arc::clone(&c),
|
Arc::clone(&partition),
|
||||||
&size_overrides,
|
&size_overrides,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
let ParquetFilesForCompaction {
|
||||||
|
level_0,
|
||||||
|
level_1,
|
||||||
|
.. // Ignore other levels
|
||||||
|
} = parquet_files_for_compaction;
|
||||||
|
|
||||||
let to_compact = parquet_file_filtering::filter_parquet_files(
|
let to_compact = parquet_file_filtering::filter_parquet_files(
|
||||||
c,
|
partition,
|
||||||
parquet_files_for_compaction,
|
level_0,
|
||||||
|
level_1,
|
||||||
compactor.config.memory_budget_bytes,
|
compactor.config.memory_budget_bytes,
|
||||||
&compactor.parquet_file_candidate_gauge,
|
&compactor.parquet_file_candidate_gauge,
|
||||||
&compactor.parquet_file_candidate_bytes,
|
&compactor.parquet_file_candidate_bytes,
|
||||||
|
|
|
@ -27,6 +27,7 @@ use crate::{
|
||||||
compact::{Compactor, PartitionCompactionCandidateWithInfo},
|
compact::{Compactor, PartitionCompactionCandidateWithInfo},
|
||||||
parquet_file::CompactorParquetFile,
|
parquet_file::CompactorParquetFile,
|
||||||
parquet_file_filtering::{FilterResult, FilteredFiles},
|
parquet_file_filtering::{FilterResult, FilteredFiles},
|
||||||
|
parquet_file_lookup::ParquetFilesForCompaction,
|
||||||
};
|
};
|
||||||
use data_types::CompactionLevel;
|
use data_types::CompactionLevel;
|
||||||
use metric::Attributes;
|
use metric::Attributes;
|
||||||
|
@ -107,9 +108,16 @@ async fn compact_candidates_with_memory_budget<C, Fut>(
|
||||||
Ok(parquet_files_for_compaction) => {
|
Ok(parquet_files_for_compaction) => {
|
||||||
// Return only files under the `remaining_budget_bytes` that should be
|
// Return only files under the `remaining_budget_bytes` that should be
|
||||||
// compacted
|
// compacted
|
||||||
|
let ParquetFilesForCompaction {
|
||||||
|
level_0,
|
||||||
|
level_1,
|
||||||
|
.. // Ignore other levels
|
||||||
|
} = parquet_files_for_compaction;
|
||||||
|
|
||||||
let to_compact = parquet_file_filtering::filter_parquet_files(
|
let to_compact = parquet_file_filtering::filter_parquet_files(
|
||||||
Arc::clone(&partition),
|
Arc::clone(&partition),
|
||||||
parquet_files_for_compaction,
|
level_0,
|
||||||
|
level_1,
|
||||||
remaining_budget_bytes,
|
remaining_budget_bytes,
|
||||||
&compactor.parquet_file_candidate_gauge,
|
&compactor.parquet_file_candidate_gauge,
|
||||||
&compactor.parquet_file_candidate_bytes,
|
&compactor.parquet_file_candidate_bytes,
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
//! Logic for filtering a set of Parquet files to the desired set to be used for an optimal
|
//! Logic for filtering a set of Parquet files to the desired set to be used for an optimal
|
||||||
//! compaction operation.
|
//! compaction operation.
|
||||||
|
|
||||||
use crate::{
|
use crate::{compact::PartitionCompactionCandidateWithInfo, parquet_file::CompactorParquetFile};
|
||||||
compact::PartitionCompactionCandidateWithInfo, parquet_file::CompactorParquetFile,
|
use data_types::CompactionLevel;
|
||||||
parquet_file_lookup::ParquetFilesForCompaction,
|
|
||||||
};
|
|
||||||
use metric::{Attributes, Metric, U64Gauge, U64Histogram};
|
use metric::{Attributes, Metric, U64Gauge, U64Histogram};
|
||||||
use observability_deps::tracing::*;
|
use observability_deps::tracing::*;
|
||||||
use std::sync::Arc;
|
use std::{borrow::Cow, sync::Arc};
|
||||||
|
|
||||||
/// Groups of files, their partition, and the estimated budget for compacting this group
|
/// Groups of files, their partition, and the estimated budget for compacting this group
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -30,23 +28,24 @@ pub(crate) enum FilterResult {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a list of level 0 files sorted by max sequence number and a list of level 1 files for
|
/// Given a list of level N files sorted by max sequence number and a list of level N + 1 files for
|
||||||
/// a partition, select a subset set of files that:
|
/// a partition, select a subset set of files that:
|
||||||
///
|
///
|
||||||
/// - Has a subset of the level 0 files selected, from the start of the sorted level 0 list
|
/// - Has a subset of the level N files selected, from the start of the sorted level N list
|
||||||
/// - Has a total size less than `max_bytes`
|
/// - Has a total size less than `max_bytes`
|
||||||
/// - Has only level 1 files that overlap in time with the level 0 files
|
/// - Has only level N + 1 files that overlap in time with the level N files
|
||||||
///
|
///
|
||||||
/// The returned files will be ordered with the level 1 files first, then the level 0 files ordered
|
/// The returned files will be ordered with the level N + 1 files first, then the level N files
|
||||||
/// in ascending order by their max sequence number.
|
/// ordered in ascending order by their max sequence number.
|
||||||
pub(crate) fn filter_parquet_files(
|
pub(crate) fn filter_parquet_files(
|
||||||
// partition of the parquet files
|
// partition of the parquet files
|
||||||
partition: Arc<PartitionCompactionCandidateWithInfo>,
|
partition: Arc<PartitionCompactionCandidateWithInfo>,
|
||||||
// Level 0 files sorted by max sequence number and level 1 files in arbitrary order for one
|
// Level N files sorted by max sequence number
|
||||||
// partition
|
level_n: Vec<CompactorParquetFile>,
|
||||||
parquet_files_for_compaction: ParquetFilesForCompaction,
|
// Level N + 1 files
|
||||||
// Stop considering level 0 files when the total size of all files selected for compaction so
|
level_n_plus_1: Vec<CompactorParquetFile>,
|
||||||
// far exceeds this value
|
// Stop considering level N files when the total estimated arrow size of all files selected for
|
||||||
|
// compaction so far exceeds this value
|
||||||
max_bytes: u64,
|
max_bytes: u64,
|
||||||
// Gauge for the number of Parquet file candidates
|
// Gauge for the number of Parquet file candidates
|
||||||
parquet_file_candidate_gauge: &Metric<U64Gauge>,
|
parquet_file_candidate_gauge: &Metric<U64Gauge>,
|
||||||
|
@ -54,7 +53,8 @@ pub(crate) fn filter_parquet_files(
|
||||||
parquet_file_candidate_bytes: &Metric<U64Histogram>,
|
parquet_file_candidate_bytes: &Metric<U64Histogram>,
|
||||||
) -> FilteredFiles {
|
) -> FilteredFiles {
|
||||||
let filter_result = filter_parquet_files_inner(
|
let filter_result = filter_parquet_files_inner(
|
||||||
parquet_files_for_compaction,
|
level_n,
|
||||||
|
level_n_plus_1,
|
||||||
max_bytes,
|
max_bytes,
|
||||||
parquet_file_candidate_gauge,
|
parquet_file_candidate_gauge,
|
||||||
parquet_file_candidate_bytes,
|
parquet_file_candidate_bytes,
|
||||||
|
@ -67,69 +67,68 @@ pub(crate) fn filter_parquet_files(
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filter_parquet_files_inner(
|
fn filter_parquet_files_inner(
|
||||||
// Level 0 files sorted by max sequence number and level 1 files in arbitrary order for one
|
// Level N files sorted by max sequence number
|
||||||
// partition
|
level_n: Vec<CompactorParquetFile>,
|
||||||
parquet_files_for_compaction: ParquetFilesForCompaction,
|
// Level N + 1 files
|
||||||
// Stop considering level 0 files when the total size of all files selected for compaction so
|
mut remaining_level_n_plus_1: Vec<CompactorParquetFile>,
|
||||||
// far exceeds this value
|
// Stop considering level N files when the total estimated arrow size of all files selected for
|
||||||
|
// compaction so far exceeds this value
|
||||||
max_bytes: u64,
|
max_bytes: u64,
|
||||||
// Gauge for the number of Parquet file candidates
|
// Gauge for the number of Parquet file candidates
|
||||||
parquet_file_candidate_gauge: &Metric<U64Gauge>,
|
parquet_file_candidate_gauge: &Metric<U64Gauge>,
|
||||||
// Histogram for the number of bytes of Parquet file candidates
|
// Histogram for the number of bytes of Parquet file candidates
|
||||||
parquet_file_candidate_bytes: &Metric<U64Histogram>,
|
parquet_file_candidate_bytes: &Metric<U64Histogram>,
|
||||||
) -> FilterResult {
|
) -> FilterResult {
|
||||||
let ParquetFilesForCompaction {
|
if level_n.is_empty() {
|
||||||
level_0,
|
info!("No level N files to consider for compaction");
|
||||||
level_1: mut remaining_level_1,
|
|
||||||
.. // Ignore other levels
|
|
||||||
} = parquet_files_for_compaction;
|
|
||||||
|
|
||||||
if level_0.is_empty() {
|
|
||||||
info!("No level 0 files to consider for compaction");
|
|
||||||
return FilterResult::NothingToCompact;
|
return FilterResult::NothingToCompact;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Guaranteed to exist because of the empty check and early return above. Also assuming all
|
// Guaranteed to exist because of the empty check and early return above. Also assuming all
|
||||||
// files are for the same partition.
|
// files are for the same partition.
|
||||||
let partition_id = level_0[0].partition_id();
|
let partition_id = level_n[0].partition_id();
|
||||||
|
|
||||||
let num_level_0_considering = level_0.len();
|
let compaction_level_n = level_n[0].compaction_level();
|
||||||
let num_level_1_considering = remaining_level_1.len();
|
let compaction_level_n_plus_1 = compaction_level_n.next();
|
||||||
|
|
||||||
// This will start by holding the level 1 files that are found to overlap an included level 0
|
let num_level_n_considering = level_n.len();
|
||||||
// file. At the end of this function, the level 0 files are added to the end so they are sorted
|
let num_level_n_plus_1_considering = remaining_level_n_plus_1.len();
|
||||||
// last.
|
|
||||||
let mut files_to_return = Vec::with_capacity(level_0.len() + remaining_level_1.len());
|
// This will start by holding the level N + 1 files that are found to overlap an included level
|
||||||
// Estimated memory bytes needed to compact returned L1 files
|
// N file. At the end of this function, the level N files are added to the end so they are
|
||||||
let mut l1_estimated_budget = Vec::with_capacity(level_0.len() + remaining_level_1.len());
|
// sorted last for deduplication purposes.
|
||||||
// Keep track of level 0 files to include in this compaction operation; maintain assumed
|
let mut files_to_return = Vec::with_capacity(level_n.len() + remaining_level_n_plus_1.len());
|
||||||
|
// Estimated memory bytes needed to compact returned LN+1 files
|
||||||
|
let mut ln_plus_1_estimated_budget =
|
||||||
|
Vec::with_capacity(level_n.len() + remaining_level_n_plus_1.len());
|
||||||
|
// Keep track of level N files to include in this compaction operation; maintain assumed
|
||||||
// ordering by max sequence number.
|
// ordering by max sequence number.
|
||||||
let mut level_0_to_return = Vec::with_capacity(level_0.len());
|
let mut level_n_to_return = Vec::with_capacity(level_n.len());
|
||||||
// Estimated memory bytes needed to compact returned L0 files
|
// Estimated memory bytes needed to compact returned LN files
|
||||||
let mut l0_estimated_budget = Vec::with_capacity(level_0.len());
|
let mut ln_estimated_budget = Vec::with_capacity(level_n.len());
|
||||||
|
|
||||||
// Memory needed to compact the returned files
|
// Memory needed to compact the returned files
|
||||||
let mut total_estimated_budget = 0;
|
let mut total_estimated_budget = 0;
|
||||||
for level_0_file in level_0 {
|
for level_n_file in level_n {
|
||||||
// Estimate memory needed for this L0 file
|
// Estimate memory needed for this LN file
|
||||||
let l0_estimated_file_bytes = level_0_file.estimated_arrow_bytes();
|
let ln_estimated_file_bytes = level_n_file.estimated_arrow_bytes();
|
||||||
|
|
||||||
// Note: even though we can stop here if the l0_estimated_file_bytes is larger than the
|
// Note: even though we can stop here if the ln_estimated_file_bytes is larger than the
|
||||||
// given budget,we still continue estimated the memory needed for its overlapped L1 to
|
// given budget,we still continue estimated the memory needed for its overlapped LN+1 to
|
||||||
// return the total memory needed to compact this L0 with all of its overlapped L1s
|
// return the total memory needed to compact this LN with all of its overlapped LN+1s
|
||||||
|
|
||||||
// Find all level 1 files that overlap with this level 0 file.
|
// Find all level N+1 files that overlap with this level N file.
|
||||||
let (overlaps, non_overlaps): (Vec<_>, Vec<_>) = remaining_level_1
|
let (overlaps, non_overlaps): (Vec<_>, Vec<_>) = remaining_level_n_plus_1
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.partition(|level_1_file| overlaps_in_time(level_1_file, &level_0_file));
|
.partition(|level_n_plus_1_file| overlaps_in_time(level_n_plus_1_file, &level_n_file));
|
||||||
|
|
||||||
// Estimate memory needed for each of L1
|
// Estimate memory needed for each LN+1
|
||||||
let current_l1_estimated_file_bytes: Vec<_> = overlaps
|
let current_ln_plus_1_estimated_file_bytes: Vec<_> = overlaps
|
||||||
.iter()
|
.iter()
|
||||||
.map(|file| file.estimated_arrow_bytes())
|
.map(|file| file.estimated_arrow_bytes())
|
||||||
.collect();
|
.collect();
|
||||||
let estimated_file_bytes =
|
let estimated_file_bytes =
|
||||||
l0_estimated_file_bytes + current_l1_estimated_file_bytes.iter().sum::<u64>();
|
ln_estimated_file_bytes + current_ln_plus_1_estimated_file_bytes.iter().sum::<u64>();
|
||||||
|
|
||||||
// Over budget
|
// Over budget
|
||||||
if total_estimated_budget + estimated_file_bytes > max_bytes {
|
if total_estimated_budget + estimated_file_bytes > max_bytes {
|
||||||
|
@ -145,46 +144,50 @@ fn filter_parquet_files_inner(
|
||||||
} else {
|
} else {
|
||||||
// still under budget
|
// still under budget
|
||||||
total_estimated_budget += estimated_file_bytes;
|
total_estimated_budget += estimated_file_bytes;
|
||||||
l0_estimated_budget.push(l0_estimated_file_bytes);
|
ln_estimated_budget.push(ln_estimated_file_bytes);
|
||||||
l1_estimated_budget.extend(current_l1_estimated_file_bytes);
|
ln_plus_1_estimated_budget.extend(current_ln_plus_1_estimated_file_bytes);
|
||||||
|
|
||||||
// Move the overlapping level 1 files to `files_to_return` so they're not considered
|
// Move the overlapping level N+1 files to `files_to_return` so they're not considered
|
||||||
// again; a level 1 file overlapping with one level 0 file is enough for its inclusion.
|
// again; a level N+1 file overlapping with one level N file is enough for its
|
||||||
// This way, we also don't include level 1 files multiple times.
|
// inclusion. This way, we also don't include level N+1 files multiple times.
|
||||||
files_to_return.extend(overlaps);
|
files_to_return.extend(overlaps);
|
||||||
|
|
||||||
// The remaining level 1 files to possibly include in future iterations are the
|
// The remaining level N+1 files to possibly include in future iterations are the
|
||||||
// remaining ones that did not overlap with this level 0 file.
|
// remaining ones that did not overlap with this level N file.
|
||||||
remaining_level_1 = non_overlaps;
|
remaining_level_n_plus_1 = non_overlaps;
|
||||||
|
|
||||||
// Move the level 0 file into the list of level 0 files to return
|
// Move the level N file into the list of level N files to return
|
||||||
level_0_to_return.push(level_0_file);
|
level_n_to_return.push(level_n_file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let num_level_0_compacting = level_0_to_return.len();
|
let num_level_n_compacting = level_n_to_return.len();
|
||||||
let num_level_1_compacting = files_to_return.len();
|
let num_level_n_plus_1_compacting = files_to_return.len();
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
partition_id = partition_id.get(),
|
partition_id = partition_id.get(),
|
||||||
num_level_0_considering,
|
num_level_n_considering,
|
||||||
num_level_1_considering,
|
num_level_n_plus_1_considering,
|
||||||
num_level_0_compacting,
|
num_level_n_compacting,
|
||||||
num_level_1_compacting,
|
num_level_n_plus_1_compacting,
|
||||||
"filtered Parquet files for compaction",
|
"filtered Parquet files for compaction",
|
||||||
);
|
);
|
||||||
|
|
||||||
record_file_metrics(
|
record_file_metrics(
|
||||||
parquet_file_candidate_gauge,
|
parquet_file_candidate_gauge,
|
||||||
num_level_0_considering as u64,
|
compaction_level_n,
|
||||||
num_level_1_considering as u64,
|
compaction_level_n_plus_1,
|
||||||
num_level_0_compacting as u64,
|
num_level_n_considering as u64,
|
||||||
num_level_1_compacting as u64,
|
num_level_n_plus_1_considering as u64,
|
||||||
|
num_level_n_compacting as u64,
|
||||||
|
num_level_n_plus_1_compacting as u64,
|
||||||
);
|
);
|
||||||
|
|
||||||
record_byte_metrics(
|
record_byte_metrics(
|
||||||
parquet_file_candidate_bytes,
|
parquet_file_candidate_bytes,
|
||||||
level_0_to_return
|
compaction_level_n,
|
||||||
|
compaction_level_n_plus_1,
|
||||||
|
level_n_to_return
|
||||||
.iter()
|
.iter()
|
||||||
.map(|pf| pf.file_size_bytes() as u64)
|
.map(|pf| pf.file_size_bytes() as u64)
|
||||||
.collect(),
|
.collect(),
|
||||||
|
@ -192,13 +195,13 @@ fn filter_parquet_files_inner(
|
||||||
.iter()
|
.iter()
|
||||||
.map(|pf| pf.file_size_bytes() as u64)
|
.map(|pf| pf.file_size_bytes() as u64)
|
||||||
.collect(),
|
.collect(),
|
||||||
l0_estimated_budget,
|
ln_estimated_budget,
|
||||||
l1_estimated_budget,
|
ln_plus_1_estimated_budget,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Return the level 1 files first, followed by the level 0 files assuming we've maintained
|
// Return the level N+1 files first, followed by the level N files, assuming we've maintained
|
||||||
// their ordering by max sequence number.
|
// their ordering by max sequence number.
|
||||||
files_to_return.extend(level_0_to_return);
|
files_to_return.extend(level_n_to_return);
|
||||||
|
|
||||||
FilterResult::Proceed {
|
FilterResult::Proceed {
|
||||||
files: files_to_return,
|
files: files_to_return,
|
||||||
|
@ -213,70 +216,87 @@ fn overlaps_in_time(a: &CompactorParquetFile, b: &CompactorParquetFile) -> bool
|
||||||
|
|
||||||
fn record_file_metrics(
|
fn record_file_metrics(
|
||||||
gauge: &Metric<U64Gauge>,
|
gauge: &Metric<U64Gauge>,
|
||||||
num_level_0_considering: u64,
|
compaction_level_n: CompactionLevel,
|
||||||
num_level_1_considering: u64,
|
compaction_level_n_plus_1: CompactionLevel,
|
||||||
num_level_0_compacting: u64,
|
num_level_n_considering: u64,
|
||||||
num_level_1_compacting: u64,
|
num_level_n_plus_1_considering: u64,
|
||||||
|
num_level_n_compacting: u64,
|
||||||
|
num_level_n_plus_1_compacting: u64,
|
||||||
) {
|
) {
|
||||||
let attributes = Attributes::from(&[
|
let compaction_level_n_string = Cow::from(format!("{}", compaction_level_n as i16));
|
||||||
("compaction_level", "0"),
|
let mut level_n_attributes =
|
||||||
("status", "selected_for_compaction"),
|
Attributes::from([("compaction_level", compaction_level_n_string)]);
|
||||||
]);
|
|
||||||
let recorder = gauge.recorder(attributes);
|
|
||||||
recorder.set(num_level_0_compacting);
|
|
||||||
|
|
||||||
let attributes = Attributes::from(&[
|
level_n_attributes.insert("status", "selected_for_compaction");
|
||||||
("compaction_level", "0"),
|
let recorder = gauge.recorder(level_n_attributes.clone());
|
||||||
("status", "not_selected_for_compaction"),
|
recorder.set(num_level_n_compacting);
|
||||||
]);
|
|
||||||
let recorder = gauge.recorder(attributes);
|
|
||||||
recorder.set(num_level_0_considering - num_level_0_compacting);
|
|
||||||
|
|
||||||
let attributes = Attributes::from(&[
|
level_n_attributes.insert("status", "not_selected_for_compaction");
|
||||||
("compaction_level", "1"),
|
let recorder = gauge.recorder(level_n_attributes);
|
||||||
("status", "selected_for_compaction"),
|
recorder.set(num_level_n_considering - num_level_n_compacting);
|
||||||
]);
|
|
||||||
let recorder = gauge.recorder(attributes);
|
|
||||||
recorder.set(num_level_1_compacting);
|
|
||||||
|
|
||||||
let attributes = Attributes::from(&[
|
let compaction_level_n_plus_1_string =
|
||||||
("compaction_level", "1"),
|
Cow::from(format!("{}", compaction_level_n_plus_1 as i16));
|
||||||
("status", "not_selected_for_compaction"),
|
let mut level_n_plus_1_attributes =
|
||||||
]);
|
Attributes::from([("compaction_level", compaction_level_n_plus_1_string)]);
|
||||||
let recorder = gauge.recorder(attributes);
|
|
||||||
recorder.set(num_level_1_considering - num_level_1_compacting);
|
level_n_plus_1_attributes.insert("status", "selected_for_compaction");
|
||||||
|
let recorder = gauge.recorder(level_n_plus_1_attributes.clone());
|
||||||
|
recorder.set(num_level_n_plus_1_compacting);
|
||||||
|
|
||||||
|
level_n_plus_1_attributes.insert("status", "not_selected_for_compaction");
|
||||||
|
let recorder = gauge.recorder(level_n_plus_1_attributes);
|
||||||
|
recorder.set(num_level_n_plus_1_considering - num_level_n_plus_1_compacting);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn record_byte_metrics(
|
fn record_byte_metrics(
|
||||||
histogram: &Metric<U64Histogram>,
|
histogram: &Metric<U64Histogram>,
|
||||||
level_0_sizes: Vec<u64>,
|
compaction_level_n: CompactionLevel,
|
||||||
level_1_sizes: Vec<u64>,
|
compaction_level_n_plus_1: CompactionLevel,
|
||||||
level_0_estimated_compacting_budgets: Vec<u64>,
|
level_n_sizes: Vec<u64>,
|
||||||
level_1_estimated_compacting_budgets: Vec<u64>,
|
level_n_plus_1_sizes: Vec<u64>,
|
||||||
|
level_n_estimated_compacting_budgets: Vec<u64>,
|
||||||
|
level_n_plus_1_estimated_compacting_budgets: Vec<u64>,
|
||||||
) {
|
) {
|
||||||
let attributes = Attributes::from(&[("file_size_compaction_level", "0")]);
|
let compaction_level_n_string = Cow::from(format!("{}", compaction_level_n as i16));
|
||||||
let recorder = histogram.recorder(attributes);
|
let level_n_attributes = Attributes::from([(
|
||||||
for size in level_0_sizes {
|
"file_size_compaction_level",
|
||||||
|
compaction_level_n_string.clone(),
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let compaction_level_n_plus_1_string =
|
||||||
|
Cow::from(format!("{}", compaction_level_n_plus_1 as i16));
|
||||||
|
let level_n_plus_1_attributes = Attributes::from([(
|
||||||
|
"file_size_compaction_level",
|
||||||
|
compaction_level_n_plus_1_string.clone(),
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let recorder = histogram.recorder(level_n_attributes);
|
||||||
|
for size in level_n_sizes {
|
||||||
recorder.record(size);
|
recorder.record(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
let attributes = Attributes::from(&[("file_size_compaction_level", "1")]);
|
let recorder = histogram.recorder(level_n_plus_1_attributes);
|
||||||
let recorder = histogram.recorder(attributes);
|
for size in level_n_plus_1_sizes {
|
||||||
for size in level_1_sizes {
|
|
||||||
recorder.record(size);
|
recorder.record(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
let attributes =
|
let level_n_attributes = Attributes::from([(
|
||||||
Attributes::from(&[("file_estimated_compacting_budget_compaction_level", "0")]);
|
"file_estimated_compacting_budget_compaction_level",
|
||||||
let recorder = histogram.recorder(attributes);
|
compaction_level_n_string,
|
||||||
for size in level_0_estimated_compacting_budgets {
|
)]);
|
||||||
|
let recorder = histogram.recorder(level_n_attributes);
|
||||||
|
for size in level_n_estimated_compacting_budgets {
|
||||||
recorder.record(size);
|
recorder.record(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
let attributes =
|
let level_n_plus_1_attributes = Attributes::from([(
|
||||||
Attributes::from(&[("file_estimated_compacting_budget_compaction_level", "1")]);
|
"file_estimated_compacting_budget_compaction_level",
|
||||||
let recorder = histogram.recorder(attributes);
|
compaction_level_n_plus_1_string,
|
||||||
for size in level_1_estimated_compacting_budgets {
|
)]);
|
||||||
|
|
||||||
|
let recorder = histogram.recorder(level_n_plus_1_attributes);
|
||||||
|
for size in level_n_plus_1_estimated_compacting_budgets {
|
||||||
recorder.record(size);
|
recorder.record(size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -373,22 +393,17 @@ mod tests {
|
||||||
(parquet_file_candidate_gauge, parquet_file_candidate_bytes)
|
(parquet_file_candidate_gauge, parquet_file_candidate_bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
mod hot {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
const MEMORY_BUDGET: u64 = 1024 * 1024 * 10;
|
const MEMORY_BUDGET: u64 = 1024 * 1024 * 10;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_in_empty_out() {
|
fn empty_in_empty_out() {
|
||||||
let parquet_files_for_compaction = ParquetFilesForCompaction {
|
let level_0 = vec![];
|
||||||
level_0: vec![],
|
let level_1 = vec![];
|
||||||
level_1: vec![],
|
|
||||||
level_2: vec![],
|
|
||||||
};
|
|
||||||
let (files_metric, bytes_metric) = metrics();
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
let filter_result = filter_parquet_files_inner(
|
let filter_result = filter_parquet_files_inner(
|
||||||
parquet_files_for_compaction,
|
level_0,
|
||||||
|
level_1,
|
||||||
MEMORY_BUDGET,
|
MEMORY_BUDGET,
|
||||||
&files_metric,
|
&files_metric,
|
||||||
&bytes_metric,
|
&bytes_metric,
|
||||||
|
@ -399,19 +414,12 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn budget_0_returns_over_budget() {
|
fn budget_0_returns_over_budget() {
|
||||||
let parquet_files_for_compaction = ParquetFilesForCompaction {
|
let level_0 = vec![ParquetFileBuilder::level_0().id(1).build()];
|
||||||
level_0: vec![ParquetFileBuilder::level_0().id(1).build()],
|
let level_1 = vec![];
|
||||||
level_1: vec![],
|
|
||||||
level_2: vec![],
|
|
||||||
};
|
|
||||||
let (files_metric, bytes_metric) = metrics();
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
let filter_result = filter_parquet_files_inner(
|
let filter_result =
|
||||||
parquet_files_for_compaction,
|
filter_parquet_files_inner(level_0, level_1, 0, &files_metric, &bytes_metric);
|
||||||
0,
|
|
||||||
&files_metric,
|
|
||||||
&bytes_metric,
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
filter_result,
|
filter_result,
|
||||||
|
@ -421,19 +429,12 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn budget_1000_returns_over_budget() {
|
fn budget_1000_returns_over_budget() {
|
||||||
let parquet_files_for_compaction = ParquetFilesForCompaction {
|
let level_0 = vec![ParquetFileBuilder::level_0().id(1).build()];
|
||||||
level_0: vec![ParquetFileBuilder::level_0().id(1).build()],
|
let level_1 = vec![];
|
||||||
level_1: vec![],
|
|
||||||
level_2: vec![],
|
|
||||||
};
|
|
||||||
let (files_metric, bytes_metric) = metrics();
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
let filter_result = filter_parquet_files_inner(
|
let filter_result =
|
||||||
parquet_files_for_compaction,
|
filter_parquet_files_inner(level_0, level_1, 1000, &files_metric, &bytes_metric);
|
||||||
1000,
|
|
||||||
&files_metric,
|
|
||||||
&bytes_metric,
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
filter_result,
|
filter_result,
|
||||||
|
@ -443,13 +444,12 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn large_budget_returns_one_level_0_file_and_its_level_1_overlaps() {
|
fn large_budget_returns_one_level_0_file_and_its_level_1_overlaps() {
|
||||||
let parquet_files_for_compaction = ParquetFilesForCompaction {
|
let level_0 = vec![ParquetFileBuilder::level_0()
|
||||||
level_0: vec![ParquetFileBuilder::level_0()
|
|
||||||
.id(1)
|
.id(1)
|
||||||
.min_time(200)
|
.min_time(200)
|
||||||
.max_time(300)
|
.max_time(300)
|
||||||
.build()],
|
.build()];
|
||||||
level_1: vec![
|
let level_1 = vec![
|
||||||
// Too early
|
// Too early
|
||||||
ParquetFileBuilder::level_1()
|
ParquetFileBuilder::level_1()
|
||||||
.id(101)
|
.id(101)
|
||||||
|
@ -468,13 +468,12 @@ mod tests {
|
||||||
.min_time(400)
|
.min_time(400)
|
||||||
.max_time(500)
|
.max_time(500)
|
||||||
.build(),
|
.build(),
|
||||||
],
|
];
|
||||||
level_2: vec![],
|
|
||||||
};
|
|
||||||
let (files_metric, bytes_metric) = metrics();
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
let filter_result = filter_parquet_files_inner(
|
let filter_result = filter_parquet_files_inner(
|
||||||
parquet_files_for_compaction,
|
level_0,
|
||||||
|
level_1,
|
||||||
MEMORY_BUDGET,
|
MEMORY_BUDGET,
|
||||||
&files_metric,
|
&files_metric,
|
||||||
&bytes_metric,
|
&bytes_metric,
|
||||||
|
@ -494,8 +493,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn returns_only_overlapping_level_1_files_in_order() {
|
fn returns_only_overlapping_level_1_files_in_order() {
|
||||||
let parquet_files_for_compaction = ParquetFilesForCompaction {
|
let level_0 = vec![
|
||||||
level_0: vec![
|
|
||||||
// Level 0 files that overlap in time slightly.
|
// Level 0 files that overlap in time slightly.
|
||||||
ParquetFileBuilder::level_0()
|
ParquetFileBuilder::level_0()
|
||||||
.id(1)
|
.id(1)
|
||||||
|
@ -515,9 +513,10 @@ mod tests {
|
||||||
.max_time(350)
|
.max_time(350)
|
||||||
.file_size_bytes(10)
|
.file_size_bytes(10)
|
||||||
.build(),
|
.build(),
|
||||||
],
|
];
|
||||||
|
|
||||||
// Level 1 files can be assumed not to overlap each other.
|
// Level 1 files can be assumed not to overlap each other.
|
||||||
level_1: vec![
|
let level_1 = vec![
|
||||||
// Does not overlap any level 0, times are too early
|
// Does not overlap any level 0, times are too early
|
||||||
ParquetFileBuilder::level_1()
|
ParquetFileBuilder::level_1()
|
||||||
.id(101)
|
.id(101)
|
||||||
|
@ -567,14 +566,13 @@ mod tests {
|
||||||
.max_time(399)
|
.max_time(399)
|
||||||
.file_size_bytes(10)
|
.file_size_bytes(10)
|
||||||
.build(),
|
.build(),
|
||||||
],
|
];
|
||||||
level_2: vec![],
|
|
||||||
};
|
|
||||||
|
|
||||||
let (files_metric, bytes_metric) = metrics();
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
let filter_result = filter_parquet_files_inner(
|
let filter_result = filter_parquet_files_inner(
|
||||||
parquet_files_for_compaction.clone(),
|
level_0.clone(),
|
||||||
|
level_1.clone(),
|
||||||
1176 * 3 + 5, // enough for 3 files
|
1176 * 3 + 5, // enough for 3 files
|
||||||
&files_metric,
|
&files_metric,
|
||||||
&bytes_metric,
|
&bytes_metric,
|
||||||
|
@ -595,19 +593,24 @@ mod tests {
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
extract_file_metrics(&files_metric),
|
extract_file_metrics(
|
||||||
|
&files_metric,
|
||||||
|
CompactionLevel::Initial,
|
||||||
|
CompactionLevel::FileNonOverlapped
|
||||||
|
),
|
||||||
ExtractedFileMetrics {
|
ExtractedFileMetrics {
|
||||||
level_0_selected: 1,
|
level_n_selected: 1,
|
||||||
level_0_not_selected: 2,
|
level_n_not_selected: 2,
|
||||||
level_1_selected: 2,
|
level_n_plus_1_selected: 2,
|
||||||
level_1_not_selected: 5,
|
level_n_plus_1_not_selected: 5,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
let (files_metric, bytes_metric) = metrics();
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
let filter_result = filter_parquet_files_inner(
|
let filter_result = filter_parquet_files_inner(
|
||||||
parquet_files_for_compaction,
|
level_0,
|
||||||
|
level_1,
|
||||||
// Increase budget to more than 6 files; 1st two level 0 files & their overlapping
|
// Increase budget to more than 6 files; 1st two level 0 files & their overlapping
|
||||||
// level 1 files get returned
|
// level 1 files get returned
|
||||||
1176 * 6 + 5,
|
1176 * 6 + 5,
|
||||||
|
@ -630,15 +633,174 @@ mod tests {
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
extract_file_metrics(&files_metric),
|
extract_file_metrics(
|
||||||
|
&files_metric,
|
||||||
|
CompactionLevel::Initial,
|
||||||
|
CompactionLevel::FileNonOverlapped
|
||||||
|
),
|
||||||
ExtractedFileMetrics {
|
ExtractedFileMetrics {
|
||||||
level_0_selected: 2,
|
level_n_selected: 2,
|
||||||
level_0_not_selected: 1,
|
level_n_not_selected: 1,
|
||||||
level_1_selected: 4,
|
level_n_plus_1_selected: 4,
|
||||||
level_1_not_selected: 3,
|
level_n_plus_1_not_selected: 3,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn returns_only_overlapping_level_2_files_in_order() {
|
||||||
|
let level_1 = vec![
|
||||||
|
// Level 1 files don't overlap each other.
|
||||||
|
ParquetFileBuilder::level_1()
|
||||||
|
.id(1)
|
||||||
|
.min_time(200)
|
||||||
|
.max_time(300)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
ParquetFileBuilder::level_1()
|
||||||
|
.id(2)
|
||||||
|
.min_time(310)
|
||||||
|
.max_time(330)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
ParquetFileBuilder::level_1()
|
||||||
|
.id(3)
|
||||||
|
.min_time(340)
|
||||||
|
.max_time(350)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
];
|
||||||
|
|
||||||
|
// Level 2 files can be assumed not to overlap each other.
|
||||||
|
let level_2 = vec![
|
||||||
|
// Does not overlap any level 1, times are too early
|
||||||
|
ParquetFileBuilder::level_2()
|
||||||
|
.id(101)
|
||||||
|
.min_time(1)
|
||||||
|
.max_time(50)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
// Overlaps file 1
|
||||||
|
ParquetFileBuilder::level_2()
|
||||||
|
.id(102)
|
||||||
|
.min_time(199)
|
||||||
|
.max_time(201)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
// Overlaps files 1 and 2
|
||||||
|
ParquetFileBuilder::level_2()
|
||||||
|
.id(103)
|
||||||
|
.min_time(290)
|
||||||
|
.max_time(312)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
// Overlaps file 2
|
||||||
|
ParquetFileBuilder::level_2()
|
||||||
|
.id(104)
|
||||||
|
.min_time(315)
|
||||||
|
.max_time(315)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
// Overlaps files 2 and 3
|
||||||
|
ParquetFileBuilder::level_2()
|
||||||
|
.id(105)
|
||||||
|
.min_time(329)
|
||||||
|
.max_time(341)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
// Overlaps file 3
|
||||||
|
ParquetFileBuilder::level_2()
|
||||||
|
.id(106)
|
||||||
|
.min_time(342)
|
||||||
|
.max_time(360)
|
||||||
|
.file_size_bytes(BUCKET_500_KB as i64 + 1) // exercise metrics
|
||||||
|
.build(),
|
||||||
|
// Does not overlap any level 1, times are too late
|
||||||
|
ParquetFileBuilder::level_2()
|
||||||
|
.id(107)
|
||||||
|
.min_time(390)
|
||||||
|
.max_time(399)
|
||||||
|
.file_size_bytes(10)
|
||||||
|
.build(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
|
let filter_result = filter_parquet_files_inner(
|
||||||
|
level_1.clone(),
|
||||||
|
level_2.clone(),
|
||||||
|
1176 * 3 + 5, // enough for 3 files
|
||||||
|
&files_metric,
|
||||||
|
&bytes_metric,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(
|
||||||
|
&filter_result,
|
||||||
|
FilterResult::Proceed { files, budget_bytes }
|
||||||
|
if files.len() == 3
|
||||||
|
&& files
|
||||||
|
.iter()
|
||||||
|
.map(|f| f.id().get())
|
||||||
|
.collect::<Vec<_>>() == [102, 103, 1]
|
||||||
|
&& *budget_bytes == 3 * 1176
|
||||||
|
),
|
||||||
|
"Match failed, got: {filter_result:?}"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
extract_file_metrics(
|
||||||
|
&files_metric,
|
||||||
|
CompactionLevel::FileNonOverlapped,
|
||||||
|
CompactionLevel::Final
|
||||||
|
),
|
||||||
|
ExtractedFileMetrics {
|
||||||
|
level_n_selected: 1,
|
||||||
|
level_n_not_selected: 2,
|
||||||
|
level_n_plus_1_selected: 2,
|
||||||
|
level_n_plus_1_not_selected: 5,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let (files_metric, bytes_metric) = metrics();
|
||||||
|
|
||||||
|
let filter_result = filter_parquet_files_inner(
|
||||||
|
level_1,
|
||||||
|
level_2,
|
||||||
|
// Increase budget to more than 6 files; 1st two level 1 files & their overlapping
|
||||||
|
// level 2 files get returned
|
||||||
|
1176 * 6 + 5,
|
||||||
|
&files_metric,
|
||||||
|
&bytes_metric,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(
|
||||||
|
&filter_result,
|
||||||
|
FilterResult::Proceed { files, budget_bytes }
|
||||||
|
if files.len() == 6
|
||||||
|
&& files
|
||||||
|
.iter()
|
||||||
|
.map(|f| f.id().get())
|
||||||
|
.collect::<Vec<_>>() == [102, 103, 104, 105, 1, 2]
|
||||||
|
&& *budget_bytes == 6 * 1176
|
||||||
|
),
|
||||||
|
"Match failed, got: {filter_result:?}"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
extract_file_metrics(
|
||||||
|
&files_metric,
|
||||||
|
CompactionLevel::FileNonOverlapped,
|
||||||
|
CompactionLevel::Final
|
||||||
|
),
|
||||||
|
ExtractedFileMetrics {
|
||||||
|
level_n_selected: 2,
|
||||||
|
level_n_not_selected: 1,
|
||||||
|
level_n_plus_1_selected: 4,
|
||||||
|
level_n_plus_1_not_selected: 3,
|
||||||
|
}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create ParquetFile instances for testing. Only sets fields relevant to the filtering; other
|
/// Create ParquetFile instances for testing. Only sets fields relevant to the filtering; other
|
||||||
|
@ -677,6 +839,17 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start building a level 2 file
|
||||||
|
fn level_2() -> Self {
|
||||||
|
Self {
|
||||||
|
compaction_level: CompactionLevel::Final,
|
||||||
|
id: 1,
|
||||||
|
min_time: 8,
|
||||||
|
max_time: 9,
|
||||||
|
file_size_bytes: 10,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn id(mut self, id: i64) -> Self {
|
fn id(mut self, id: i64) -> Self {
|
||||||
self.id = id;
|
self.id = id;
|
||||||
self
|
self
|
||||||
|
@ -730,50 +903,54 @@ mod tests {
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
struct ExtractedFileMetrics {
|
struct ExtractedFileMetrics {
|
||||||
level_0_selected: u64,
|
level_n_selected: u64,
|
||||||
level_0_not_selected: u64,
|
level_n_not_selected: u64,
|
||||||
level_1_selected: u64,
|
level_n_plus_1_selected: u64,
|
||||||
level_1_not_selected: u64,
|
level_n_plus_1_not_selected: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_file_metrics(metric: &Metric<U64Gauge>) -> ExtractedFileMetrics {
|
fn extract_file_metrics(
|
||||||
let level_0_selected = metric
|
metric: &Metric<U64Gauge>,
|
||||||
.get_observer(&Attributes::from(&[
|
level_n: CompactionLevel,
|
||||||
("compaction_level", "0"),
|
level_n_plus_1: CompactionLevel,
|
||||||
("status", "selected_for_compaction"),
|
) -> ExtractedFileMetrics {
|
||||||
|
let level_n_string = Cow::from(format!("{}", level_n as i16));
|
||||||
|
let level_n_selected = metric
|
||||||
|
.get_observer(&Attributes::from([
|
||||||
|
("compaction_level", level_n_string.clone()),
|
||||||
|
("status", "selected_for_compaction".into()),
|
||||||
|
]))
|
||||||
|
.unwrap()
|
||||||
|
.fetch();
|
||||||
|
let level_n_not_selected = metric
|
||||||
|
.get_observer(&Attributes::from([
|
||||||
|
("compaction_level", level_n_string),
|
||||||
|
("status", "not_selected_for_compaction".into()),
|
||||||
]))
|
]))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.fetch();
|
.fetch();
|
||||||
|
|
||||||
let level_0_not_selected = metric
|
let level_n_plus_1_string = Cow::from(format!("{}", level_n_plus_1 as i16));
|
||||||
.get_observer(&Attributes::from(&[
|
let level_n_plus_1_selected = metric
|
||||||
("compaction_level", "0"),
|
.get_observer(&Attributes::from([
|
||||||
("status", "not_selected_for_compaction"),
|
("compaction_level", level_n_plus_1_string.clone()),
|
||||||
|
("status", "selected_for_compaction".into()),
|
||||||
]))
|
]))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.fetch();
|
.fetch();
|
||||||
|
let level_n_plus_1_not_selected = metric
|
||||||
let level_1_selected = metric
|
.get_observer(&Attributes::from([
|
||||||
.get_observer(&Attributes::from(&[
|
("compaction_level", level_n_plus_1_string),
|
||||||
("compaction_level", "1"),
|
("status", "not_selected_for_compaction".into()),
|
||||||
("status", "selected_for_compaction"),
|
|
||||||
]))
|
|
||||||
.unwrap()
|
|
||||||
.fetch();
|
|
||||||
|
|
||||||
let level_1_not_selected = metric
|
|
||||||
.get_observer(&Attributes::from(&[
|
|
||||||
("compaction_level", "1"),
|
|
||||||
("status", "not_selected_for_compaction"),
|
|
||||||
]))
|
]))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.fetch();
|
.fetch();
|
||||||
|
|
||||||
ExtractedFileMetrics {
|
ExtractedFileMetrics {
|
||||||
level_0_selected,
|
level_n_selected,
|
||||||
level_0_not_selected,
|
level_n_not_selected,
|
||||||
level_1_selected,
|
level_n_plus_1_selected,
|
||||||
level_1_not_selected,
|
level_n_plus_1_not_selected,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,11 +53,24 @@ impl TryFrom<i32> for CompactionLevel {
|
||||||
match value {
|
match value {
|
||||||
x if x == Self::Initial as i32 => Ok(Self::Initial),
|
x if x == Self::Initial as i32 => Ok(Self::Initial),
|
||||||
x if x == Self::FileNonOverlapped as i32 => Ok(Self::FileNonOverlapped),
|
x if x == Self::FileNonOverlapped as i32 => Ok(Self::FileNonOverlapped),
|
||||||
|
x if x == Self::Final as i32 => Ok(Self::Final),
|
||||||
_ => Err("invalid compaction level value".into()),
|
_ => Err("invalid compaction level value".into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl CompactionLevel {
|
||||||
|
/// When compacting files of this level, provide the level that the resulting file should be.
|
||||||
|
/// Does not exceed the maximum available level.
|
||||||
|
pub fn next(&self) -> Self {
|
||||||
|
match self {
|
||||||
|
Self::Initial => Self::FileNonOverlapped,
|
||||||
|
Self::FileNonOverlapped => Self::Final,
|
||||||
|
_ => Self::Final,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Unique ID for a `Namespace`
|
/// Unique ID for a `Namespace`
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
|
||||||
#[sqlx(transparent)]
|
#[sqlx(transparent)]
|
||||||
|
|
Loading…
Reference in New Issue