feat(persist): Add hot partition persistence counter metric

This counter tracks how many times each partition has been
enqueued for persistence as a result of hitting the pre-configured
limit.
pull/24376/head
Fraser Savage 2023-03-22 11:40:54 +00:00
parent 8518ab502b
commit 3fbab1a335
No known key found for this signature in database
GPG Key ID: DE47C33CE8C5C446
2 changed files with 26 additions and 4 deletions

View File

@ -320,8 +320,11 @@ where
// runs, such as if the configuration of the ingester was changed to persist
// smaller partitions in-between executions because it was OOMing during WAL
// replay (and the configuration was changed to mitigate it).
let hot_partition_persister =
HotPartitionPersister::new(Arc::clone(&persist_handle), persist_hot_partition_cost);
let hot_partition_persister = HotPartitionPersister::new(
Arc::clone(&persist_handle),
persist_hot_partition_cost,
&metrics,
);
let buffer = Arc::new(BufferTree::new(
namespace_name_provider,

View File

@ -1,5 +1,6 @@
use std::{fmt::Debug, sync::Arc};
use metric::U64Counter;
use observability_deps::tracing::info;
use parking_lot::{Mutex, MutexGuard};
@ -13,16 +14,29 @@ use super::queue::PersistQueue;
pub(crate) struct HotPartitionPersister<P> {
persist_handle: P,
max_estimated_persist_cost: usize,
/// A metric tracking the number of partitions persisted as "hot partitions".
persist_count: metric::Metric<U64Counter>,
}
impl<P> HotPartitionPersister<P>
where
P: PersistQueue + Clone + Sync + 'static,
{
pub fn new(persist_handle: P, max_estimated_persist_cost: usize) -> Self {
pub fn new(
persist_handle: P,
max_estimated_persist_cost: usize,
metrics: &metric::Registry,
) -> Self {
let persist_count = metrics
.register_metric::<U64Counter>(
"ingester_persist_hot_partition_enqueue_count",
"number of times persistence of a partition has been triggered because the persist cost exceeded the pre-configured limit",
);
Self {
persist_handle,
max_estimated_persist_cost,
persist_count,
}
}
@ -33,8 +47,9 @@ where
partition: Arc<Mutex<PartitionData>>,
mut guard: MutexGuard<'_, PartitionData>,
) {
let partition_id = guard.partition_id().get();
info!(
partition_id = guard.partition_id().get(),
partition_id = partition_id,
cost_estimate, "marking hot partition for persistence"
);
@ -49,6 +64,10 @@ where
// There is no need to await on the completion handle.
persist_handle.enqueue(partition, data).await;
});
// Update any exported metrics.
let attributes =
metric::Attributes::from([("partition_id", format!("{partition_id}").into())]);
self.persist_count.recorder(attributes).inc(1);
}
}