Merge pull request #1879 from influxdata/cn/kafka-read-metrics-and-e2e-tests
feat: Collect metrics on failed ingest from Kafkapull/24376/head
commit
299269a161
|
@ -235,6 +235,9 @@ pub struct Db {
|
||||||
/// Metric labels
|
/// Metric labels
|
||||||
metric_labels: Vec<KeyValue>,
|
metric_labels: Vec<KeyValue>,
|
||||||
|
|
||||||
|
/// Metrics for tracking the number of errors that occur while ingesting data
|
||||||
|
ingest_errors: metrics::Counter,
|
||||||
|
|
||||||
/// Optionally connect to a write buffer for either buffering writes or reading buffered writes
|
/// Optionally connect to a write buffer for either buffering writes or reading buffered writes
|
||||||
write_buffer: Option<WriteBufferConfig>,
|
write_buffer: Option<WriteBufferConfig>,
|
||||||
|
|
||||||
|
@ -267,6 +270,12 @@ impl Db {
|
||||||
let store = Arc::clone(&database_to_commit.object_store);
|
let store = Arc::clone(&database_to_commit.object_store);
|
||||||
let metrics_registry = Arc::clone(&database_to_commit.catalog.metrics_registry);
|
let metrics_registry = Arc::clone(&database_to_commit.catalog.metrics_registry);
|
||||||
let metric_labels = database_to_commit.catalog.metric_labels.clone();
|
let metric_labels = database_to_commit.catalog.metric_labels.clone();
|
||||||
|
|
||||||
|
let ingest_domain =
|
||||||
|
metrics_registry.register_domain_with_labels("ingest", metric_labels.clone());
|
||||||
|
let ingest_errors =
|
||||||
|
ingest_domain.register_counter_metric("errors", None, "Number of errors during ingest");
|
||||||
|
|
||||||
let catalog = Arc::new(database_to_commit.catalog);
|
let catalog = Arc::new(database_to_commit.catalog);
|
||||||
|
|
||||||
let catalog_access = QueryCatalogAccess::new(
|
let catalog_access = QueryCatalogAccess::new(
|
||||||
|
@ -294,6 +303,7 @@ impl Db {
|
||||||
worker_iterations_lifecycle: AtomicUsize::new(0),
|
worker_iterations_lifecycle: AtomicUsize::new(0),
|
||||||
worker_iterations_cleanup: AtomicUsize::new(0),
|
worker_iterations_cleanup: AtomicUsize::new(0),
|
||||||
metric_labels,
|
metric_labels,
|
||||||
|
ingest_errors,
|
||||||
write_buffer: database_to_commit.write_buffer,
|
write_buffer: database_to_commit.write_buffer,
|
||||||
cleanup_lock: Default::default(),
|
cleanup_lock: Default::default(),
|
||||||
}
|
}
|
||||||
|
@ -584,8 +594,7 @@ impl Db {
|
||||||
Ok(sequenced_entry) => sequenced_entry,
|
Ok(sequenced_entry) => sequenced_entry,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
debug!(?e, "Error converting write buffer data to SequencedEntry");
|
debug!(?e, "Error converting write buffer data to SequencedEntry");
|
||||||
// TODO: add to metrics
|
self.ingest_errors.add(1);
|
||||||
// self.ingest_errors.add_with_labels(1, &labels);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -597,8 +606,7 @@ impl Db {
|
||||||
?e,
|
?e,
|
||||||
"Error storing SequencedEntry from write buffer in database"
|
"Error storing SequencedEntry from write buffer in database"
|
||||||
);
|
);
|
||||||
// TODO: add to metrics
|
self.ingest_errors.add(1);
|
||||||
// self.ingest_errors.add_with_labels(1, &labels);
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
|
@ -1095,6 +1103,53 @@ mod tests {
|
||||||
assert_batches_eq!(expected, &batches);
|
assert_batches_eq!(expected, &batches);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn error_converting_data_from_write_buffer_to_sequenced_entry_is_reported() {
|
||||||
|
let write_buffer = Arc::new(MockBufferForReading::new(vec![Err(String::from(
|
||||||
|
"Something bad happened on the way to creating a SequencedEntry",
|
||||||
|
)
|
||||||
|
.into())]));
|
||||||
|
|
||||||
|
let test_db = TestDb::builder()
|
||||||
|
.write_buffer(WriteBufferConfig::Reading(Arc::clone(&write_buffer) as _))
|
||||||
|
.build()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let db = Arc::new(test_db.db);
|
||||||
|
let metrics = test_db.metric_registry;
|
||||||
|
|
||||||
|
// do: start background task loop
|
||||||
|
let shutdown: CancellationToken = Default::default();
|
||||||
|
let shutdown_captured = shutdown.clone();
|
||||||
|
let db_captured = Arc::clone(&db);
|
||||||
|
let join_handle =
|
||||||
|
tokio::spawn(async move { db_captured.background_worker(shutdown_captured).await });
|
||||||
|
|
||||||
|
// check: after a while the error should be reported in the database's metrics
|
||||||
|
let t_0 = Instant::now();
|
||||||
|
loop {
|
||||||
|
let family = metrics.try_has_metric_family("ingest_errors_total");
|
||||||
|
|
||||||
|
if let Ok(metric) = family {
|
||||||
|
if metric
|
||||||
|
.with_labels(&[("db_name", "placeholder"), ("svr_id", "1")])
|
||||||
|
.counter()
|
||||||
|
.eq(1.0)
|
||||||
|
.is_ok()
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(t_0.elapsed() < Duration::from_secs(10));
|
||||||
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// do: stop background task loop
|
||||||
|
shutdown.cancel();
|
||||||
|
join_handle.await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn cant_write_when_reading_from_write_buffer() {
|
async fn cant_write_when_reading_from_write_buffer() {
|
||||||
// Validate that writes are rejected if this database is reading from the write buffer
|
// Validate that writes are rejected if this database is reading from the write buffer
|
||||||
|
|
Loading…
Reference in New Issue