Merge branch 'main' into crepererum/fix_job_metrics

pull/24376/head
kodiakhq[bot] 2021-09-09 14:53:00 +00:00 committed by GitHub
commit a9e2ed4c14
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 83 additions and 64 deletions

View File

@ -98,11 +98,16 @@ pub struct DurationHistogram {
impl DurationHistogram { impl DurationHistogram {
pub fn record(&self, value: Duration) { pub fn record(&self, value: Duration) {
self.inner.record( self.record_multiple(value, 1)
}
pub fn record_multiple(&self, value: Duration, count: u64) {
self.inner.record_multiple(
value value
.as_nanos() .as_nanos()
.try_into() .try_into()
.expect("cannot fit duration into u64"), .expect("cannot fit duration into u64"),
count,
) )
} }
} }

View File

@ -50,6 +50,10 @@ impl U64Histogram {
} }
pub fn record(&self, value: u64) { pub fn record(&self, value: u64) {
self.record_multiple(value, 1)
}
pub fn record_multiple(&self, value: u64, count: u64) {
let mut state = self.shared.lock(); let mut state = self.shared.lock();
if let Some(bucket) = state if let Some(bucket) = state
.buckets .buckets
@ -57,8 +61,8 @@ impl U64Histogram {
.find(|bucket| value <= bucket.le) .find(|bucket| value <= bucket.le)
.as_mut() .as_mut()
{ {
bucket.count = bucket.count.wrapping_add(1); bucket.count = bucket.count.wrapping_add(count);
state.total = state.total.wrapping_add(value); state.total = state.total.wrapping_add(value * count);
} }
} }
} }

View File

@ -2319,20 +2319,27 @@ mod tests {
summary.record(Utc.timestamp_nanos(650000000000)); summary.record(Utc.timestamp_nanos(650000000000));
summary.record(Utc.timestamp_nanos(650000000010)); summary.record(Utc.timestamp_nanos(650000000010));
for (minute, count) in summary.cumulative_counts() { let mut reporter = metric::RawReporter::default();
let minute = (minute * 60).to_string(); test_db.metrics_registry_v2.report(&mut reporter);
test_db
.metric_registry let observation = reporter
.has_metric_family("catalog_row_time_seconds_bucket") .metric("catalog_row_time")
.with_attributes(&[ .unwrap()
("svr_id", "1"), .observation(&[("db_name", "placeholder"), ("table", "write_metrics_test")])
("db_name", "placeholder"), .unwrap();
("table", "write_metrics_test"),
("le", minute.as_str()), let histogram = match observation {
]) Observation::DurationHistogram(histogram) => histogram,
.counter() _ => unreachable!(),
.eq(count as _) };
.unwrap(); assert_eq!(histogram.buckets.len(), 60);
for ((minute, count), observation) in
summary.counts.iter().enumerate().zip(&histogram.buckets)
{
let minute = Duration::from_secs((minute * 60) as u64);
assert_eq!(observation.le, minute);
assert_eq!(*count as u64, observation.count)
} }
} }

View File

@ -242,7 +242,7 @@ pub struct ChunkMetrics {
pub(super) memory_metrics: StorageGauge, pub(super) memory_metrics: StorageGauge,
/// Track ingested timestamps /// Track ingested timestamps
pub(super) timestamp_histogram: TimestampHistogram, pub(super) timestamp_histogram: Option<TimestampHistogram>,
} }
impl ChunkMetrics { impl ChunkMetrics {
@ -497,7 +497,9 @@ impl CatalogChunk {
/// `time_of_write` is the wall clock time of the write /// `time_of_write` is the wall clock time of the write
/// `timestamps` is a summary of the row timestamps contained in the write /// `timestamps` is a summary of the row timestamps contained in the write
pub fn record_write(&mut self, time_of_write: DateTime<Utc>, timestamps: &TimestampSummary) { pub fn record_write(&mut self, time_of_write: DateTime<Utc>, timestamps: &TimestampSummary) {
self.metrics.timestamp_histogram.add(timestamps); if let Some(timestamp_histogram) = self.metrics.timestamp_histogram.as_ref() {
timestamp_histogram.add(timestamps)
}
self.access_recorder.record_access_now(); self.access_recorder.record_access_now();
self.time_of_first_write = self.time_of_first_write.min(time_of_write); self.time_of_first_write = self.time_of_first_write.min(time_of_write);

View File

@ -1,10 +1,15 @@
use crate::db::catalog::chunk::ChunkMetrics;
use data_types::write_summary::TimestampSummary;
use metrics::{Counter, Gauge, GaugeValue, Histogram, KeyValue, MetricObserverBuilder};
use parking_lot::Mutex;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use parking_lot::Mutex;
use data_types::write_summary::TimestampSummary;
use metric::{Attributes, DurationHistogram, DurationHistogramOptions};
use metrics::{Counter, Gauge, GaugeValue, Histogram, KeyValue};
use tracker::{LockMetrics, RwLock}; use tracker::{LockMetrics, RwLock};
use crate::db::catalog::chunk::ChunkMetrics;
const TIMESTAMP_METRICS_ENABLE_ENV: &str = "INFLUXDB_IOX_ROW_TIMESTAMP_METRICS"; const TIMESTAMP_METRICS_ENABLE_ENV: &str = "INFLUXDB_IOX_ROW_TIMESTAMP_METRICS";
fn report_timestamp_metrics(table_name: &str) -> bool { fn report_timestamp_metrics(table_name: &str) -> bool {
std::env::var(TIMESTAMP_METRICS_ENABLE_ENV) std::env::var(TIMESTAMP_METRICS_ENABLE_ENV)
@ -53,26 +58,29 @@ impl CatalogMetrics {
} }
pub(super) fn new_table_metrics(&self, table_name: &str) -> TableMetrics { pub(super) fn new_table_metrics(&self, table_name: &str) -> TableMetrics {
let mut attributes = metric::Attributes::from([ let base_attributes = metric::Attributes::from([
("db_name", self.db_name.to_string().into()), ("db_name", self.db_name.to_string().into()),
("table", table_name.to_string().into()), ("table", table_name.to_string().into()),
]); ]);
attributes.insert("lock", "table"); let mut lock_attributes = base_attributes.clone();
lock_attributes.insert("lock", "table");
let table_lock_metrics = Arc::new(LockMetrics::new( let table_lock_metrics = Arc::new(LockMetrics::new(
self.metrics_registry.as_ref(), self.metrics_registry.as_ref(),
attributes.clone(), lock_attributes.clone(),
)); ));
attributes.insert("lock", "partition"); lock_attributes.insert("lock", "partition");
let partition_lock_metrics = Arc::new(LockMetrics::new( let partition_lock_metrics = Arc::new(LockMetrics::new(
self.metrics_registry.as_ref(), self.metrics_registry.as_ref(),
attributes.clone(), lock_attributes.clone(),
)); ));
attributes.insert("lock", "chunk"); lock_attributes.insert("lock", "chunk");
let chunk_lock_metrics = let chunk_lock_metrics = Arc::new(LockMetrics::new(
Arc::new(LockMetrics::new(self.metrics_registry.as_ref(), attributes)); self.metrics_registry.as_ref(),
lock_attributes,
));
let storage_gauge = self.metrics_domain.register_gauge_metric_with_attributes( let storage_gauge = self.metrics_domain.register_gauge_metric_with_attributes(
"loaded", "loaded",
@ -88,14 +96,8 @@ impl CatalogMetrics {
&[KeyValue::new("table", table_name.to_string())], &[KeyValue::new("table", table_name.to_string())],
); );
let timestamp_histogram = Default::default(); let timestamp_histogram = report_timestamp_metrics(table_name)
if report_timestamp_metrics(table_name) { .then(|| TimestampHistogram::new(self.metrics_registry.as_ref(), base_attributes));
self.metrics_domain.register_observer(
None,
&[KeyValue::new("table", table_name.to_string())],
&timestamp_histogram,
);
}
TableMetrics { TableMetrics {
metrics_domain: Arc::clone(&self.metrics_domain), metrics_domain: Arc::clone(&self.metrics_domain),
@ -134,7 +136,7 @@ pub struct TableMetrics {
chunk_lock_metrics: Arc<LockMetrics>, chunk_lock_metrics: Arc<LockMetrics>,
/// Track ingested timestamps /// Track ingested timestamps
timestamp_histogram: TimestampHistogram, timestamp_histogram: Option<TimestampHistogram>,
} }
impl TableMetrics { impl TableMetrics {
@ -192,7 +194,7 @@ pub struct PartitionMetrics {
chunk_lock_metrics: Arc<LockMetrics>, chunk_lock_metrics: Arc<LockMetrics>,
/// Track ingested timestamps /// Track ingested timestamps
timestamp_histogram: TimestampHistogram, timestamp_histogram: Option<TimestampHistogram>,
} }
impl PartitionMetrics { impl PartitionMetrics {
@ -337,35 +339,34 @@ impl StorageGauge {
/// A Histogram-inspired metric for reporting `TimestampSummary` /// A Histogram-inspired metric for reporting `TimestampSummary`
/// ///
/// This is partly to workaround limitations defining custom Histogram bucketing in OTEL
/// and also because it can be implemented more efficiently as the set of values is fixed
///
/// Like `TimestampSummary`, this is bucketed based on minute within the hour /// Like `TimestampSummary`, this is bucketed based on minute within the hour
/// It will therefore wrap around on the hour /// It will therefore wrap around on the hour
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone)]
pub(super) struct TimestampHistogram { pub(super) struct TimestampHistogram {
inner: Arc<Mutex<TimestampSummary>>, inner: metric::DurationHistogram,
} }
impl TimestampHistogram { impl TimestampHistogram {
pub(super) fn add(&self, summary: &TimestampSummary) { fn new(registry: &metric::Registry, attributes: impl Into<Attributes>) -> Self {
self.inner.lock().merge(summary) let histogram: metric::Metric<DurationHistogram> = registry.register_metric_with_options(
} "catalog_row_time",
}
impl metrics::MetricObserver for &TimestampHistogram {
fn register(self, builder: MetricObserverBuilder<'_>) {
let inner = Arc::clone(&self.inner);
builder.register_histogram_bucket(
"row_time",
Some("seconds"),
"The cumulative distribution of the ingested row timestamps", "The cumulative distribution of the ingested row timestamps",
move |result| { || {
let inner = inner.lock(); DurationHistogramOptions::new(
for (min, total) in inner.cumulative_counts() { (0..60).map(|minute| Duration::from_secs(minute * 60)),
result.observe(total, &[KeyValue::new("le", (min * 60).to_string())]) )
}
}, },
) );
Self {
inner: histogram.recorder(attributes),
}
}
pub(super) fn add(&self, summary: &TimestampSummary) {
for (minute, count) in summary.counts.iter().enumerate() {
self.inner
.record_multiple(Duration::from_secs(minute as u64 * 60), *count as u64)
}
} }
} }

View File

@ -33,7 +33,7 @@ pub async fn test_row_timestamp() {
let db_name_attribute = format!("db_name=\"{}\"", db_name); let db_name_attribute = format!("db_name=\"{}\"", db_name);
// Should only be enabled for the system table // Should only be enabled for the system table
assert_eq!(lines.len(), 60); assert_eq!(lines.len(), 61);
assert!(lines assert!(lines
.iter() .iter()
.all(|x| x.contains("table=\"system\"") && x.contains(&db_name_attribute))); .all(|x| x.contains("table=\"system\"") && x.contains(&db_name_attribute)));