From fc1870ff766cf35b8320aeb2bbfb41c093966546 Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Fri, 5 Aug 2022 10:50:31 +0000 Subject: [PATCH] fix: chunk pruning stats (#5319) - emit a warning if we cannot even attempt to prune chunks due to an error. This is always either a missing feature or a bug (even though it does not impact correctness but _only_ performance). Also see https://github.com/influxdata/conductor/issues/1107 - change metrics to clearly differentiate between "could not prune" and "not pruned" - add new "not pruned" observer hook (this was missing for some reason, the "pruned" hook existed though) --- iox_query/src/pruning.rs | 86 ++++++++++++++++----- querier/src/namespace/query_access.rs | 30 +++++++- querier/src/table/query_access.rs | 107 +++++++++++++++----------- 3 files changed, 156 insertions(+), 67 deletions(-) diff --git a/iox_query/src/pruning.rs b/iox_query/src/pruning.rs index 4be2b8d20d..a9489b5848 100644 --- a/iox_query/src/pruning.rs +++ b/iox_query/src/pruning.rs @@ -12,7 +12,7 @@ use datafusion::{ logical_plan::Column, physical_optimizer::pruning::{PruningPredicate, PruningStatistics}, }; -use observability_deps::tracing::{debug, trace}; +use observability_deps::tracing::{debug, trace, warn}; use predicate::Predicate; use query_functions::group_by::Aggregate; use schema::Schema; @@ -52,10 +52,19 @@ impl std::fmt::Display for NotPrunedReason { /// Something that cares to be notified when pruning of chunks occurs pub trait PruningObserver { - /// Called when the specified chunk was pruned from observation + /// Called when the specified chunk was pruned from observation. fn was_pruned(&self, _chunk: &dyn QueryChunk) {} - /// Called when no pruning can happen at all for some reason + /// Called when a chunk was not pruned. + fn was_not_pruned(&self, _chunk: &dyn QueryChunk) {} + + /// Called when no pruning can happen at all for some reason. + /// + /// Since pruning is optional and _only_ improves performance but its lack does not affect correctness, this will + /// NOT lead to a query error. + /// + /// In this case, statistical pruning will not happen and neither [`was_pruned`](Self::was_pruned) nor + /// [`was_not_pruned`](Self::was_not_pruned) will be called. fn could_not_prune(&self, _reason: NotPrunedReason, _chunk: &dyn QueryChunk) {} } @@ -98,7 +107,7 @@ where chunk.as_ref(), ); } - trace!(%e, ?filter_expr, "Can not create pruning predicate"); + warn!(%e, ?filter_expr, "Can not create pruning predicate"); return chunks; } }; @@ -114,7 +123,7 @@ where for chunk in &chunks { observer.could_not_prune(NotPrunedReason::DataFusionPruningFailed, chunk.as_ref()); } - trace!(%e, ?filter_expr, "DataFusion pruning failed"); + warn!(%e, ?filter_expr, "DataFusion pruning failed"); return chunks; } }; @@ -124,7 +133,10 @@ where let mut pruned_chunks = Vec::with_capacity(chunks.len()); for (chunk, keep) in chunks.into_iter().zip(results) { match keep { - true => pruned_chunks.push(chunk), + true => { + observer.was_not_pruned(chunk.as_ref()); + pruned_chunks.push(chunk); + } false => { observer.was_pruned(chunk.as_ref()); } @@ -287,7 +299,7 @@ mod test { assert_eq!( observer.events(), - vec!["Could not prune: No expression on predicate"] + vec!["chunk1: Could not prune: No expression on predicate"] ); assert_eq!(names(&pruned), vec!["chunk1"]); } @@ -412,7 +424,7 @@ mod test { let predicate = Predicate::new().with_expr(col("column1").lt(lit(100.0))); let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate); - assert!(observer.events().is_empty()); + assert_eq!(observer.events(), vec!["chunk1: Not pruned"]); assert_eq!(names(&pruned), vec!["chunk1"]); } @@ -433,7 +445,7 @@ mod test { let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate); - assert!(observer.events().is_empty()); + assert_eq!(observer.events(), vec!["chunk1: Not pruned"]); assert_eq!(names(&pruned), vec!["chunk1"]); } @@ -454,7 +466,7 @@ mod test { let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate); - assert!(observer.events().is_empty()); + assert_eq!(observer.events(), vec!["chunk1: Not pruned"]); assert_eq!(names(&pruned), vec!["chunk1"]); } @@ -475,7 +487,7 @@ mod test { let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate); - assert!(observer.events().is_empty()); + assert_eq!(observer.events(), vec!["chunk1: Not pruned"]); assert_eq!(names(&pruned), vec!["chunk1"]); } @@ -498,7 +510,7 @@ mod test { let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate); - assert!(observer.events().is_empty()); + assert_eq!(observer.events(), vec!["chunk1: Not pruned"]); assert_eq!(names(&pruned), vec!["chunk1"]); } @@ -546,7 +558,15 @@ mod test { let pruned = prune_chunks(&observer, schema, chunks, &predicate); - assert_eq!(observer.events(), vec!["chunk1: Pruned"]); + assert_eq!( + observer.events(), + vec![ + "chunk1: Pruned", + "chunk2: Not pruned", + "chunk3: Not pruned", + "chunk4: Not pruned" + ] + ); assert_eq!(names(&pruned), vec!["chunk2", "chunk3", "chunk4"]); } @@ -605,7 +625,14 @@ mod test { assert_eq!( observer.events(), - vec!["chunk1: Pruned", "chunk3: Pruned", "chunk6: Pruned"] + vec![ + "chunk1: Pruned", + "chunk2: Not pruned", + "chunk3: Pruned", + "chunk4: Not pruned", + "chunk5: Not pruned", + "chunk6: Pruned" + ] ); assert_eq!(names(&pruned), vec!["chunk2", "chunk4", "chunk5"]); } @@ -643,7 +670,10 @@ mod test { let pruned = prune_chunks(&observer, schema, chunks, &predicate); - assert_eq!(observer.events(), vec!["chunk1: Pruned",]); + assert_eq!( + observer.events(), + vec!["chunk1: Pruned", "chunk2: Not pruned", "chunk3: Not pruned"] + ); assert_eq!(names(&pruned), vec!["chunk2", "chunk3"]); } @@ -702,7 +732,10 @@ mod test { let pruned = prune_chunks(&observer, schema, chunks, &predicate); - assert_eq!(observer.events(), vec!["chunk2: Pruned", "chunk3: Pruned"]); + assert_eq!( + observer.events(), + vec!["chunk1: Not pruned", "chunk2: Pruned", "chunk3: Pruned"] + ); assert_eq!(names(&pruned), vec!["chunk1"]); } @@ -764,7 +797,14 @@ mod test { assert_eq!( observer.events(), - vec!["chunk2: Pruned", "chunk3: Pruned", "chunk5: Pruned"] + vec![ + "chunk1: Not pruned", + "chunk2: Pruned", + "chunk3: Pruned", + "chunk4: Not pruned", + "chunk5: Pruned", + "chunk6: Not pruned" + ] ); assert_eq!(names(&pruned), vec!["chunk1", "chunk4", "chunk6"]); } @@ -795,10 +835,18 @@ mod test { .push(format!("{}: Pruned", chunk.table_name())) } - fn could_not_prune(&self, reason: NotPrunedReason, _chunk: &dyn QueryChunk) { + fn was_not_pruned(&self, chunk: &dyn QueryChunk) { self.events .borrow_mut() - .push(format!("Could not prune: {}", reason)) + .push(format!("{}: Not pruned", chunk.table_name())) + } + + fn could_not_prune(&self, reason: NotPrunedReason, chunk: &dyn QueryChunk) { + self.events.borrow_mut().push(format!( + "{}: Could not prune: {}", + chunk.table_name(), + reason + )) } } } diff --git a/querier/src/namespace/query_access.rs b/querier/src/namespace/query_access.rs index 0f86ed09b1..a33f4c67a7 100644 --- a/querier/src/namespace/query_access.rs +++ b/querier/src/namespace/query_access.rs @@ -387,12 +387,36 @@ mod tests { .unwrap(), &Observation::U64Counter(0), ); + assert_eq!( + reporter + .metric("query_pruner_chunks") + .unwrap() + .observation(&[("result", "not_pruned")]) + .unwrap(), + &Observation::U64Counter(0), + ); + assert_eq!( + reporter + .metric("query_pruner_rows") + .unwrap() + .observation(&[("result", "not_pruned")]) + .unwrap(), + &Observation::U64Counter(0), + ); + assert_eq!( + reporter + .metric("query_pruner_bytes") + .unwrap() + .observation(&[("result", "not_pruned")]) + .unwrap(), + &Observation::U64Counter(0), + ); assert_eq!( reporter .metric("query_pruner_chunks") .unwrap() .observation(&[ - ("result", "not_pruned"), + ("result", "could_not_prune"), ("reason", "No expression on predicate") ]) .unwrap(), @@ -403,7 +427,7 @@ mod tests { .metric("query_pruner_rows") .unwrap() .observation(&[ - ("result", "not_pruned"), + ("result", "could_not_prune"), ("reason", "No expression on predicate") ]) .unwrap(), @@ -413,7 +437,7 @@ mod tests { .metric("query_pruner_bytes") .unwrap() .observation(&[ - ("result", "not_pruned"), + ("result", "could_not_prune"), ("reason", "No expression on predicate"), ]) .unwrap() diff --git a/querier/src/table/query_access.rs b/querier/src/table/query_access.rs index 8baa4f021f..5e2e13d9b8 100644 --- a/querier/src/table/query_access.rs +++ b/querier/src/table/query_access.rs @@ -141,22 +141,30 @@ impl PruningObserver for MetricPruningObserver { .inc(chunk_estimate_size(chunk) as u64); } + fn was_not_pruned(&self, chunk: &dyn QueryChunk) { + self.metrics.chunks_not_pruned.inc(1); + self.metrics.rows_not_pruned.inc(chunk_rows(chunk) as u64); + self.metrics + .bytes_not_pruned + .inc(chunk_estimate_size(chunk) as u64); + } + fn could_not_prune(&self, reason: NotPrunedReason, chunk: &dyn QueryChunk) { let (chunks, rows, bytes) = match reason { NotPrunedReason::NoExpressionOnPredicate => ( - &self.metrics.chunks_not_pruned_no_expression, - &self.metrics.rows_not_pruned_no_expression, - &self.metrics.bytes_not_pruned_no_expression, + &self.metrics.chunks_could_not_prune_no_expression, + &self.metrics.rows_could_not_prune_no_expression, + &self.metrics.bytes_could_not_prune_no_expression, ), NotPrunedReason::CanNotCreatePruningPredicate => ( - &self.metrics.chunks_not_pruned_cannot_create_predicate, - &self.metrics.rows_not_pruned_cannot_create_predicate, - &self.metrics.bytes_not_pruned_cannot_create_predicate, + &self.metrics.chunks_could_not_prune_cannot_create_predicate, + &self.metrics.rows_could_not_prune_cannot_create_predicate, + &self.metrics.bytes_could_not_prune_cannot_create_predicate, ), NotPrunedReason::DataFusionPruningFailed => ( - &self.metrics.chunks_not_pruned_df, - &self.metrics.rows_not_pruned_df, - &self.metrics.bytes_not_pruned_df, + &self.metrics.chunks_could_not_prune_df, + &self.metrics.rows_could_not_prune_df, + &self.metrics.bytes_could_not_prune_df, ), }; @@ -170,21 +178,24 @@ impl PruningObserver for MetricPruningObserver { pub struct PruneMetrics { // number of chunks chunks_pruned: U64Counter, - chunks_not_pruned_no_expression: U64Counter, - chunks_not_pruned_cannot_create_predicate: U64Counter, - chunks_not_pruned_df: U64Counter, + chunks_not_pruned: U64Counter, + chunks_could_not_prune_no_expression: U64Counter, + chunks_could_not_prune_cannot_create_predicate: U64Counter, + chunks_could_not_prune_df: U64Counter, // number of rows rows_pruned: U64Counter, - rows_not_pruned_no_expression: U64Counter, - rows_not_pruned_cannot_create_predicate: U64Counter, - rows_not_pruned_df: U64Counter, + rows_not_pruned: U64Counter, + rows_could_not_prune_no_expression: U64Counter, + rows_could_not_prune_cannot_create_predicate: U64Counter, + rows_could_not_prune_df: U64Counter, // size in bytes bytes_pruned: U64Counter, - bytes_not_pruned_no_expression: U64Counter, - bytes_not_pruned_cannot_create_predicate: U64Counter, - bytes_not_pruned_df: U64Counter, + bytes_not_pruned: U64Counter, + bytes_could_not_prune_no_expression: U64Counter, + bytes_could_not_prune_cannot_create_predicate: U64Counter, + bytes_could_not_prune_df: U64Counter, } impl PruneMetrics { @@ -194,19 +205,20 @@ impl PruneMetrics { "Number of chunks seen by the statistics-based chunk pruner", ); let chunks_pruned = chunks.recorder(&[("result", "pruned")]); - let chunks_not_pruned_no_expression = chunks.recorder(&[ - ("result", "not_pruned"), + let chunks_not_pruned = chunks.recorder(&[("result", "not_pruned")]); + let chunks_could_not_prune_no_expression = chunks.recorder(&[ + ("result", "could_not_prune"), ("reason", NotPrunedReason::NoExpressionOnPredicate.name()), ]); - let chunks_not_pruned_cannot_create_predicate = chunks.recorder(&[ - ("result", "not_pruned"), + let chunks_could_not_prune_cannot_create_predicate = chunks.recorder(&[ + ("result", "could_not_prune"), ( "reason", NotPrunedReason::CanNotCreatePruningPredicate.name(), ), ]); - let chunks_not_pruned_df = chunks.recorder(&[ - ("result", "not_pruned"), + let chunks_could_not_prune_df = chunks.recorder(&[ + ("result", "could_not_prune"), ("reason", NotPrunedReason::DataFusionPruningFailed.name()), ]); @@ -215,19 +227,20 @@ impl PruneMetrics { "Number of rows seen by the statistics-based chunk pruner", ); let rows_pruned = rows.recorder(&[("result", "pruned")]); - let rows_not_pruned_no_expression = rows.recorder(&[ - ("result", "not_pruned"), + let rows_not_pruned = rows.recorder(&[("result", "not_pruned")]); + let rows_could_not_prune_no_expression = rows.recorder(&[ + ("result", "could_not_prune"), ("reason", NotPrunedReason::NoExpressionOnPredicate.name()), ]); - let rows_not_pruned_cannot_create_predicate = rows.recorder(&[ - ("result", "not_pruned"), + let rows_could_not_prune_cannot_create_predicate = rows.recorder(&[ + ("result", "could_not_prune"), ( "reason", NotPrunedReason::CanNotCreatePruningPredicate.name(), ), ]); - let rows_not_pruned_df = rows.recorder(&[ - ("result", "not_pruned"), + let rows_could_not_prune_df = rows.recorder(&[ + ("result", "could_not_prune"), ("reason", NotPrunedReason::DataFusionPruningFailed.name()), ]); @@ -236,35 +249,39 @@ impl PruneMetrics { "Size (in bytes) of chunks seen by the statistics-based chunk pruner", ); let bytes_pruned = bytes.recorder(&[("result", "pruned")]); - let bytes_not_pruned_no_expression = bytes.recorder(&[ - ("result", "not_pruned"), + let bytes_not_pruned = bytes.recorder(&[("result", "not_pruned")]); + let bytes_could_not_prune_no_expression = bytes.recorder(&[ + ("result", "could_not_prune"), ("reason", NotPrunedReason::NoExpressionOnPredicate.name()), ]); - let bytes_not_pruned_cannot_create_predicate = bytes.recorder(&[ - ("result", "not_pruned"), + let bytes_could_not_prune_cannot_create_predicate = bytes.recorder(&[ + ("result", "could_not_prune"), ( "reason", NotPrunedReason::CanNotCreatePruningPredicate.name(), ), ]); - let bytes_not_pruned_df = bytes.recorder(&[ - ("result", "not_pruned"), + let bytes_could_not_prune_df = bytes.recorder(&[ + ("result", "could_not_prune"), ("reason", NotPrunedReason::DataFusionPruningFailed.name()), ]); Self { chunks_pruned, - chunks_not_pruned_no_expression, - chunks_not_pruned_cannot_create_predicate, - chunks_not_pruned_df, + chunks_not_pruned, + chunks_could_not_prune_no_expression, + chunks_could_not_prune_cannot_create_predicate, + chunks_could_not_prune_df, rows_pruned, - rows_not_pruned_no_expression, - rows_not_pruned_cannot_create_predicate, - rows_not_pruned_df, + rows_not_pruned, + rows_could_not_prune_no_expression, + rows_could_not_prune_cannot_create_predicate, + rows_could_not_prune_df, bytes_pruned, - bytes_not_pruned_no_expression, - bytes_not_pruned_cannot_create_predicate, - bytes_not_pruned_df, + bytes_not_pruned, + bytes_could_not_prune_no_expression, + bytes_could_not_prune_cannot_create_predicate, + bytes_could_not_prune_df, } } }