chore(iox/compactor): rename max_input_parquet_bytes_per_partition (#7160)

pull/24376/head
Joe-Blount 2023-03-08 11:08:08 -06:00 committed by GitHub
parent 309177b750
commit c87113ccbf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 32 additions and 47 deletions

View File

@ -201,7 +201,7 @@ pub struct Compactor2Config {
)]
pub max_num_files_per_plan: usize,
/// Maximum input bytes (in parquet) per partition that the
/// Maximum input bytes (in parquet) per compaction that the
/// compactor will attempt to compact in any one plan.
///
/// In the worst case, if the sum of the sizes of all parquet
@ -213,12 +213,12 @@ pub struct Compactor2Config {
/// This setting is a self protection mechanism, and it is
/// expected to be removed in future versions
#[clap(
long = "compaction-max-input-parquet-bytes-per-partition",
env = "INFLUXDB_IOX_COMPACTION_MAX_INPUT_PARQUET_BYTES_PER_PARTITION",
default_value = "268435456", // 256MB
long = "compaction-max-compact-size",
env = "INFLUXDB_IOX_COMPACTION_MAX_COMPACT_SIZE",
default_value = "314572800", // 300MB
action
)]
pub max_input_parquet_bytes_per_partition: usize,
pub max_compact_size: usize,
/// Number of shards.
///

View File

@ -135,7 +135,7 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
partition_filters.append(&mut make_partition_filters(config));
let partition_resource_limit_filters: Vec<Arc<dyn PartitionFilter>> = vec![Arc::new(
UnableToCompactPartitionFilter::new(config.max_input_parquet_bytes_per_partition),
UnableToCompactPartitionFilter::new(config.max_compact_size),
)];
let partition_done_sink: Arc<dyn PartitionDoneSink> = if config.shadow_mode {
@ -277,10 +277,7 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
Arc::clone(&config.catalog),
)),
round_info_source: Arc::new(LoggingRoundInfoWrapper::new(Arc::new(
LevelBasedRoundInfo::new(
config.max_num_files_per_plan,
config.max_input_parquet_bytes_per_partition,
),
LevelBasedRoundInfo::new(config.max_num_files_per_plan, config.max_compact_size),
))),
partition_filter: Arc::new(LoggingPartitionFilterWrapper::new(
MetricsPartitionFilterWrapper::new(
@ -316,9 +313,7 @@ pub fn hardcoded_components(config: &Config) -> Arc<Components> {
TargetLevelSplit::new(),
NonOverlapSplit::new(),
UpgradeSplit::new(config.max_desired_file_size_bytes),
LoggingSplitOrCompactWrapper::new(SplitCompact::new(
config.max_input_parquet_bytes_per_partition,
)),
LoggingSplitOrCompactWrapper::new(SplitCompact::new(config.max_compact_size)),
),
))),
partition_resource_limit_filter: Arc::new(LoggingPartitionFilterWrapper::new(

View File

@ -30,7 +30,7 @@ pub fn log_config(config: &Config) {
partitions_source,
shadow_mode,
ignore_partition_skip_marker,
max_input_parquet_bytes_per_partition,
max_compact_size,
shard_config,
min_num_l1_files_to_compact,
process_once,
@ -77,7 +77,7 @@ pub fn log_config(config: &Config) {
%partitions_source,
shadow_mode,
ignore_partition_skip_marker,
max_input_parquet_bytes_per_partition,
max_compact_size,
?shard_cfg_n_shards,
?shard_cfg_shard_id,
min_num_l1_files_to_compact,

View File

@ -91,9 +91,9 @@ pub struct Config {
/// This is mostly useful for debugging.
pub ignore_partition_skip_marker: bool,
/// Maximum input bytes (in parquet) per partition. If there is more data, we ignore the partition (for now) as a
/// self-protection mechanism.
pub max_input_parquet_bytes_per_partition: usize,
/// Maximum input bytes (from parquet files) per compaction. If there is more data, we ignore
/// the partition (for now) as a self-protection mechanism.
pub max_compact_size: usize,
/// Shard config (if sharding should be enabled).
pub shard_config: Option<ShardConfig>,

View File

@ -86,7 +86,7 @@ async fn test_compact_target_level() {
.await
// Ensure we have enough resource to compact the files
.with_max_num_files_per_plan(10)
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(1000)
.with_max_compact_size_relative_to_total_size(1000)
.with_min_num_l1_files_to_compact(2)
.build()
.await;
@ -199,7 +199,7 @@ async fn test_compact_large_overlapes() {
// the test setup does not exceed number of files limit
.with_max_num_files_per_plan(10)
// the test setup to have total file size exceed max compact size limit
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
.with_max_compact_size_relative_to_total_size(-1)
.with_min_num_l1_files_to_compact(2)
.with_max_desired_file_size_bytes(100 * 1024 * 1024)
.build()
@ -307,7 +307,7 @@ async fn test_compact_large_overlape_2() {
// the test setup does not exceed number of files limit
.with_max_num_files_per_plan(10)
// the test setup exceed max compact size limit
.with_max_input_parquet_bytes_per_partition_relative_to_total_size(-1)
.with_max_compact_size_relative_to_total_size(-1)
.with_min_num_l1_files_to_compact(2)
.with_max_desired_file_size_bytes(100 * 1024 * 1024)
.build()

View File

@ -393,7 +393,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
// limit the plan to combining 30 mb at a time
let setup = layout_setup_builder()
.await
.with_max_input_parquet_bytes_per_partition(30 * ONE_MB as usize)
.with_max_compact_size(30 * ONE_MB as usize)
// implicit max desired file size means it is 100MB and greater than the max compact size
.build()
.await;
@ -1083,7 +1083,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
// limit the plan to combining 30 mb at a time
let setup = layout_setup_builder()
.await
.with_max_input_parquet_bytes_per_partition(30 * ONE_MB as usize)
.with_max_compact_size(30 * ONE_MB as usize)
// max desired file size is smaller than the max compact size
.with_max_desired_file_size_bytes(10 * ONE_MB)
.build()

View File

@ -83,7 +83,7 @@ pub(crate) async fn layout_setup_builder() -> TestSetupBuilder<false> {
.with_percentage_max_file_size(20)
.with_split_percentage(80)
.with_max_num_files_per_plan(200)
.with_max_input_parquet_bytes_per_partition(256 * ONE_MB as usize)
.with_max_compact_size(256 * ONE_MB as usize)
.with_min_num_l1_files_to_compact(10)
.with_max_desired_file_size_bytes(100 * ONE_MB)
.simulate_without_object_store()

View File

@ -135,7 +135,7 @@ impl TestSetupBuilder<false> {
partitions_source: PartitionsSourceConfig::CatalogRecentWrites,
shadow_mode: false,
ignore_partition_skip_marker: false,
max_input_parquet_bytes_per_partition: usize::MAX,
max_compact_size: usize::MAX,
shard_config: None,
min_num_l1_files_to_compact: MIN_NUM_L1_FILES_TO_COMPACT,
process_once: true,
@ -486,15 +486,12 @@ impl TestSetupBuilder<false> {
}
impl TestSetupBuilder<true> {
/// Set max_input_parquet_bytes_per_partition
pub fn with_max_input_parquet_bytes_per_partition_relative_to_total_size(
self,
delta: isize,
) -> Self {
/// Set max_compact_size
pub fn with_max_compact_size_relative_to_total_size(self, delta: isize) -> Self {
let total_size = self.files.iter().map(|f| f.file_size_bytes).sum::<i64>();
Self {
config: Config {
max_input_parquet_bytes_per_partition: (total_size as isize + delta) as usize,
max_compact_size: (total_size as isize + delta) as usize,
..self.config
},
..self
@ -548,12 +545,9 @@ impl<const WITH_FILES: bool> TestSetupBuilder<WITH_FILES> {
self
}
/// Set max_input_parquet_bytes_per_partition
pub fn with_max_input_parquet_bytes_per_partition(
mut self,
max_input_parquet_bytes_per_partition: usize,
) -> Self {
self.config.max_input_parquet_bytes_per_partition = max_input_parquet_bytes_per_partition;
/// Set max_compact_size
pub fn with_max_compact_size(mut self, max_compact_size: usize) -> Self {
self.config.max_compact_size = max_compact_size;
self
}

View File

@ -479,7 +479,7 @@ impl Config {
partition_filter: None,
shadow_mode: false,
ignore_partition_skip_marker: false,
max_input_parquet_bytes_per_partition: 268_435_456, // 256 MB
max_compact_size: 268_435_456, // 256 MB
shard_count: None,
shard_id: None,
min_num_l1_files_to_compact: 1,

View File

@ -32,7 +32,7 @@ use trace::TraceCollector;
const TOPIC: &str = "iox-shared";
const TRANSITION_SHARD_INDEX: i32 = TRANSITION_SHARD_NUMBER;
// Minimum multiple between max_desired_file_size_bytes and max_input_parquet_bytes_per_partition
// Minimum multiple between max_desired_file_size_bytes and max_compact_size
// Since max_desired_file_size_bytes is softly enforced, actual file sizes can exceed it, and a
// single compaction job must be able to compact >1 max sized file, so the multiple should be at least 3.
const MIN_COMPACT_SIZE_MULTIPLE: i64 = 3;
@ -178,13 +178,10 @@ pub async fn create_compactor2_server_type(
};
if compactor_config.max_desired_file_size_bytes as i64 * MIN_COMPACT_SIZE_MULTIPLE
> compactor_config
.max_input_parquet_bytes_per_partition
.try_into()
.unwrap()
> compactor_config.max_compact_size.try_into().unwrap()
{
panic!("max_input_parquet_bytes_per_partition ({}) must be at least {} times larger than max_desired_file_size_bytes ({})",
compactor_config.max_input_parquet_bytes_per_partition,
panic!("max_compact_size ({}) must be at least {} times larger than max_desired_file_size_bytes ({})",
compactor_config.max_compact_size,
MIN_COMPACT_SIZE_MULTIPLE,
compactor_config.max_desired_file_size_bytes);
}
@ -212,8 +209,7 @@ pub async fn create_compactor2_server_type(
partitions_source,
shadow_mode: compactor_config.shadow_mode,
ignore_partition_skip_marker: compactor_config.ignore_partition_skip_marker,
max_input_parquet_bytes_per_partition: compactor_config
.max_input_parquet_bytes_per_partition,
max_compact_size: compactor_config.max_compact_size,
shard_config,
min_num_l1_files_to_compact: compactor_config.min_num_l1_files_to_compact,
process_once: compactor_config.process_once,