Merge branch 'main' into dom/mutex-pushdown

pull/24376/head
kodiakhq[bot] 2022-11-07 15:54:54 +00:00 committed by GitHub
commit 4c590bdb43
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 114 additions and 14 deletions

View File

@ -176,6 +176,26 @@ macro_rules! gen_compactor_config {
action
)]
pub minutes_without_new_writes_to_be_cold: u64,
/// When querying for partitions with data for hot compaction, how many hours to look
/// back for a first pass.
#[clap(
long = "compaction-hot-partition-hours-threshold-1",
env = "INFLUXDB_IOX_COMPACTION_HOT_PARTITION_HOURS_THRESHOLD_1",
default_value = "4",
action
)]
pub hot_compaction_hours_threshold_1: u64,
/// When querying for partitions with data for hot compaction, how many hours to look
/// back for a second pass if we found nothing in the first pass.
#[clap(
long = "compaction-hot-partition-hours-threshold-2",
env = "INFLUXDB_IOX_COMPACTION_HOT_PARTITION_HOURS_THRESHOLD_2",
default_value = "24",
action
)]
pub hot_compaction_hours_threshold_2: u64,
}
};
}
@ -204,6 +224,8 @@ impl CompactorOnceConfig {
.min_num_rows_allocated_per_record_batch_to_datafusion_plan,
max_num_compacting_files: self.max_num_compacting_files,
minutes_without_new_writes_to_be_cold: self.minutes_without_new_writes_to_be_cold,
hot_compaction_hours_threshold_1: self.hot_compaction_hours_threshold_1,
hot_compaction_hours_threshold_2: self.hot_compaction_hours_threshold_2,
}
}
}

View File

@ -112,6 +112,9 @@ mod tests {
use parquet_file::storage::StorageId;
use std::collections::HashMap;
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1: u64 = 4;
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2: u64 = 24;
#[tokio::test]
async fn test_compact_remaining_level_0_files_many_files() {
test_helpers::maybe_start_logging();
@ -701,6 +704,8 @@ mod tests {
min_num_rows_allocated_per_record_batch_to_datafusion_plan: 1,
max_num_compacting_files: 20,
minutes_without_new_writes_to_be_cold: 10,
hot_compaction_hours_threshold_1: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
hot_compaction_hours_threshold_2: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
}
}

View File

@ -258,7 +258,8 @@ impl Compactor {
/// Return a list of partitions that:
///
/// - Have not received any writes in 8 hours (determined by all level 0 and level 1 parquet
/// files having a created_at time older than 8 hours ago)
/// files having a created_at time older than 8 hours ago). Note that 8 is the default but
/// it's configurable
/// - Have some level 0 or level 1 parquet files that need to be upgraded or compacted
/// - Sorted by the number of level 0 files + number of level 1 files descending
pub async fn cold_partitions_to_compact(
@ -559,6 +560,9 @@ pub mod tests {
use parquet_file::storage::StorageId;
use uuid::Uuid;
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1: u64 = 4;
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2: u64 = 24;
impl PartitionCompactionCandidateWithInfo {
pub(crate) async fn from_test_partition(test_partition: &TestPartition) -> Self {
Self {
@ -690,6 +694,8 @@ pub mod tests {
min_num_rows_allocated_per_record_batch_to_datafusion_plan: 100,
max_num_compacting_files: 20,
minutes_without_new_writes_to_be_cold: 10,
hot_compaction_hours_threshold_1: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
hot_compaction_hours_threshold_2: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
}
}

View File

@ -159,6 +159,14 @@ pub struct CompactorConfig {
/// Minutes without any new data before a partition is considered cold
pub minutes_without_new_writes_to_be_cold: u64,
/// When querying for partitions with data for hot compaction, how many hours to look
/// back for a first pass.
pub hot_compaction_hours_threshold_1: u64,
/// When querying for partitions with data for hot compaction, how many hours to look
/// back for a second pass if we found nothing in the first pass.
pub hot_compaction_hours_threshold_2: u64,
}
/// How long to pause before checking for more work again if there was

View File

@ -60,9 +60,10 @@ pub async fn compact(compactor: Arc<Compactor>) -> usize {
/// Return a list of the most recent highest ingested throughput partitions.
/// The highest throughput partitions are prioritized as follows:
/// 1. If there are partitions with new ingested files within the last 4 hours, pick them.
/// 1. If there are partitions with new ingested files within the last 4 hours (the default, but
/// configurable), pick them.
/// 2. If no new ingested files in the last 4 hours, will look for partitions with new writes
/// within the last 24 hours.
/// within the last 24 hours (the default, but configurable).
/// 3. If there are no ingested files within the last 24 hours, will look for partitions
/// with any new ingested files in the past.
///
@ -84,7 +85,11 @@ pub(crate) async fn hot_partitions_to_compact(
// Get the most recent highest ingested throughput partitions within the last 4 hours. If not,
// increase to 24 hours.
let query_times = query_times(compactor.time_provider());
let query_times = query_times(
compactor.time_provider(),
compactor.config.hot_compaction_hours_threshold_1,
compactor.config.hot_compaction_hours_threshold_2,
);
for &shard_id in &compactor.shards {
let mut partitions = hot_partitions_for_shard(
@ -191,8 +196,12 @@ async fn hot_partitions_for_shard(
Ok(Vec::new())
}
fn query_times(time_provider: Arc<dyn TimeProvider>) -> Vec<(u64, Timestamp)> {
[4, 24]
fn query_times(
time_provider: Arc<dyn TimeProvider>,
hours_threshold_1: u64,
hours_threshold_2: u64,
) -> Vec<(u64, Timestamp)> {
[hours_threshold_1, hours_threshold_2]
.iter()
.map(|&num_hours| {
(
@ -213,6 +222,9 @@ mod tests {
use parquet_file::storage::{ParquetStorage, StorageId};
use std::sync::Arc;
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1: u64 = 4;
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2: u64 = 24;
struct TestSetup {
catalog: Arc<TestCatalog>,
shard1: Arc<TestShard>,
@ -247,7 +259,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
1,
)
@ -271,7 +287,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
1,
)
@ -298,7 +318,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
1,
)
@ -326,7 +350,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
1,
)
@ -354,7 +382,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
1,
)
@ -382,7 +414,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
1,
)
@ -410,7 +446,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
1,
)
@ -447,7 +487,11 @@ mod tests {
let candidates = hot_partitions_for_shard(
Arc::clone(&catalog.catalog),
shard1.shard.id,
&query_times(catalog.time_provider()),
&query_times(
catalog.time_provider(),
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
),
1,
// Even if we ask for 2 partitions per shard, we'll only get the one partition with
// writes within 4 hours
@ -495,6 +539,8 @@ mod tests {
min_num_rows_allocated_per_record_batch_to_datafusion_plan: 100,
max_num_compacting_files: 20,
minutes_without_new_writes_to_be_cold: 10,
hot_compaction_hours_threshold_1: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
hot_compaction_hours_threshold_2: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
};
let compactor = Arc::new(Compactor::new(
vec![shard1.shard.id, shard2.shard.id],

View File

@ -448,6 +448,9 @@ pub mod tests {
sync::{Arc, Mutex},
};
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1: u64 = 4;
const DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2: u64 = 24;
// In tests that are verifying successful compaction not affected by the memory budget, this
// converts a `parquet_file_filtering::FilteredFiles` that has a `filter_result` of
// `parquet_file_filtering::FilterResult::Proceed` into a `ReadyToCompact` and panics if it
@ -562,6 +565,8 @@ pub mod tests {
min_num_rows_allocated_per_record_batch_to_datafusion_plan: 2,
max_num_compacting_files: 20,
minutes_without_new_writes_to_be_cold: 10,
hot_compaction_hours_threshold_1: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
hot_compaction_hours_threshold_2: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
}
}
@ -941,6 +946,8 @@ pub mod tests {
min_num_rows_allocated_per_record_batch_to_datafusion_plan: 100,
max_num_compacting_files: 20,
minutes_without_new_writes_to_be_cold: 10,
hot_compaction_hours_threshold_1: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_1,
hot_compaction_hours_threshold_2: DEFAULT_HOT_COMPACTION_HOURS_THRESHOLD_2,
};
let metrics = Arc::new(metric::Registry::new());

View File

@ -427,6 +427,8 @@ impl Config {
min_num_rows_allocated_per_record_batch_to_datafusion_plan: 100,
max_num_compacting_files: 20,
minutes_without_new_writes_to_be_cold: 10,
hot_compaction_hours_threshold_1: 4,
hot_compaction_hours_threshold_2: 24,
};
let querier_config = QuerierConfig {

View File

@ -211,6 +211,8 @@ pub async fn build_compactor_from_config(
min_num_rows_allocated_per_record_batch_to_datafusion_plan,
max_num_compacting_files,
minutes_without_new_writes_to_be_cold,
hot_compaction_hours_threshold_1,
hot_compaction_hours_threshold_2,
..
} = compactor_config;
@ -225,6 +227,8 @@ pub async fn build_compactor_from_config(
min_num_rows_allocated_per_record_batch_to_datafusion_plan,
max_num_compacting_files,
minutes_without_new_writes_to_be_cold,
hot_compaction_hours_threshold_1,
hot_compaction_hours_threshold_2,
};
Ok(compactor::compact::Compactor::new(