feat: run many compact partitions in parallel (#5230)
* feat: run many compact partitions in parallel * refactor: Use rust futures fu to run compactor jobs in parallel * chore: Apply suggestions from code review Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>pull/24376/head
parent
7eebe061a6
commit
fcce00bf09
|
@ -5,7 +5,7 @@ use backoff::{Backoff, BackoffConfig};
|
||||||
use data_types::SequencerId;
|
use data_types::SequencerId;
|
||||||
use futures::{
|
use futures::{
|
||||||
future::{BoxFuture, Shared},
|
future::{BoxFuture, Shared},
|
||||||
FutureExt, TryFutureExt,
|
FutureExt, StreamExt, TryFutureExt,
|
||||||
};
|
};
|
||||||
use iox_catalog::interface::Catalog;
|
use iox_catalog::interface::Catalog;
|
||||||
use iox_query::exec::Executor;
|
use iox_query::exec::Executor;
|
||||||
|
@ -296,23 +296,53 @@ async fn run_compactor(compactor: Arc<Compactor>, shutdown: CancellationToken) {
|
||||||
debug!(n_candidates, "found compaction candidates");
|
debug!(n_candidates, "found compaction candidates");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Serially compact all candidates
|
|
||||||
// TODO: we will parallelize this when everything runs smoothly in serial
|
|
||||||
let start_time = compactor.time_provider.now();
|
let start_time = compactor.time_provider.now();
|
||||||
for c in candidates {
|
|
||||||
let partition_id = c.candidate.partition_id;
|
// Repeat compacting n partitions in parallel until all candidates are compacted.
|
||||||
debug!(?partition_id, "compaction starting");
|
// Concurrency level calculation (this is estimated from previous experiments. The actual resource
|
||||||
let compaction_result = crate::compact_partition(&compactor, c).await;
|
// management will be more complicated and a future feature):
|
||||||
|
// . Each `compact partititon` takes max of this much memory input_size_threshold_bytes
|
||||||
|
// . We have this memory budget: max_concurrent_compaction_size_bytes
|
||||||
|
// --> num_parallel_partitions = max_concurrent_compaction_size_bytes/ input_size_threshold_bytes
|
||||||
|
let num_parallel_partitions = (compactor.config.max_concurrent_compaction_size_bytes
|
||||||
|
/ compactor.config.input_size_threshold_bytes)
|
||||||
|
as usize;
|
||||||
|
|
||||||
|
futures::stream::iter(candidates)
|
||||||
|
.map(|p| {
|
||||||
|
// run compaction in its own task
|
||||||
|
let comp = Arc::clone(&compactor);
|
||||||
|
tokio::task::spawn(async move {
|
||||||
|
let partition_id = p.candidate.partition_id;
|
||||||
|
let compaction_result = crate::compact_partition(&comp, p).await;
|
||||||
|
|
||||||
match compaction_result {
|
match compaction_result {
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(?partition_id, "compaction failed: {:?}", e);
|
warn!(?e, ?partition_id, "compaction failed");
|
||||||
}
|
}
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
debug!(?partition_id, "compaction complete");
|
debug!(?partition_id, "compaction complete");
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
})
|
||||||
|
})
|
||||||
|
// Assume we have enough resources to run
|
||||||
|
// num_parallel_partitions compactions in parallel
|
||||||
|
.buffer_unordered(num_parallel_partitions)
|
||||||
|
// report any JoinErrors (aka task panics)
|
||||||
|
.map(|join_result| {
|
||||||
|
if let Err(e) = join_result {
|
||||||
|
warn!(?e, "compaction task failed");
|
||||||
}
|
}
|
||||||
}
|
Ok(())
|
||||||
|
})
|
||||||
|
// Errors are reported during execution, so ignore results here
|
||||||
|
// https://stackoverflow.com/questions/64443085/how-to-run-stream-to-completion-in-rust-using-combinators-other-than-for-each
|
||||||
|
.forward(futures::sink::drain())
|
||||||
|
.await
|
||||||
|
.ok();
|
||||||
|
|
||||||
|
// Done compacting all candidates in the cycle, record its time
|
||||||
if let Some(delta) = compactor
|
if let Some(delta) = compactor
|
||||||
.time_provider
|
.time_provider
|
||||||
.now()
|
.now()
|
||||||
|
|
Loading…
Reference in New Issue