fix: chunk pruning stats (#5319)

- emit a warning if we cannot even attempt to prune chunks due to an error. This is always either a missing feature or a bug (even though it does not impact correctness but _only_ performance). Also see https://github.com/influxdata/conductor/issues/1107 - change metrics to clearly differentiate between "could not prune" and "not pruned" - add new "not pruned" observer hook (this was missing for some reason, the "pruned" hook existed though)
2022-08-05 10:50:31 +00:00 · 2022-08-05 10:50:31 +00:00 · fc1870ff76
parent 4898a7f1e3
commit fc1870ff76
3 changed files with 156 additions and 67 deletions
--- a/iox_query/src/pruning.rs
+++ b/iox_query/src/pruning.rs
@ -12,7 +12,7 @@ use datafusion::{
    logical_plan::Column,
    physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
 };
-use observability_deps::tracing::{debug, trace};
+use observability_deps::tracing::{debug, trace, warn};
 use predicate::Predicate;
 use query_functions::group_by::Aggregate;
 use schema::Schema;
@ -52,10 +52,19 @@ impl std::fmt::Display for NotPrunedReason {

 /// Something that cares to be notified when pruning of chunks occurs
 pub trait PruningObserver {
-    /// Called when the specified chunk was pruned from observation
+    /// Called when the specified chunk was pruned from observation.
    fn was_pruned(&self, _chunk: &dyn QueryChunk) {}

-    /// Called when no pruning can happen at all for some reason
+    /// Called when a chunk was not pruned.
+    fn was_not_pruned(&self, _chunk: &dyn QueryChunk) {}
+
+    /// Called when no pruning can happen at all for some reason.
+    ///
+    /// Since pruning is optional and _only_ improves performance but its lack does not affect correctness, this will
+    /// NOT lead to a query error.
+    ///
+    /// In this case, statistical pruning will not happen and neither [`was_pruned`](Self::was_pruned) nor
+    /// [`was_not_pruned`](Self::was_not_pruned) will be called.
    fn could_not_prune(&self, _reason: NotPrunedReason, _chunk: &dyn QueryChunk) {}
 }

@ -98,7 +107,7 @@ where
                        chunk.as_ref(),
                    );
                }
-                trace!(%e, ?filter_expr, "Can not create pruning predicate");
+                warn!(%e, ?filter_expr, "Can not create pruning predicate");
                return chunks;
            }
        };
@ -114,7 +123,7 @@ where
            for chunk in &chunks {
                observer.could_not_prune(NotPrunedReason::DataFusionPruningFailed, chunk.as_ref());
            }
-            trace!(%e, ?filter_expr, "DataFusion pruning failed");
+            warn!(%e, ?filter_expr, "DataFusion pruning failed");
            return chunks;
        }
    };
@ -124,7 +133,10 @@ where
    let mut pruned_chunks = Vec::with_capacity(chunks.len());
    for (chunk, keep) in chunks.into_iter().zip(results) {
        match keep {
-            true => pruned_chunks.push(chunk),
+            true => {
+                observer.was_not_pruned(chunk.as_ref());
+                pruned_chunks.push(chunk);
+            }
            false => {
                observer.was_pruned(chunk.as_ref());
            }
@ -287,7 +299,7 @@ mod test {

        assert_eq!(
            observer.events(),
-            vec!["Could not prune: No expression on predicate"]
+            vec!["chunk1: Could not prune: No expression on predicate"]
        );
        assert_eq!(names(&pruned), vec!["chunk1"]);
    }
@ -412,7 +424,7 @@ mod test {
        let predicate = Predicate::new().with_expr(col("column1").lt(lit(100.0)));

        let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);
-        assert!(observer.events().is_empty());
+        assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
        assert_eq!(names(&pruned), vec!["chunk1"]);
    }

@ -433,7 +445,7 @@ mod test {

        let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);

-        assert!(observer.events().is_empty());
+        assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
        assert_eq!(names(&pruned), vec!["chunk1"]);
    }

@ -454,7 +466,7 @@ mod test {

        let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);

-        assert!(observer.events().is_empty());
+        assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
        assert_eq!(names(&pruned), vec!["chunk1"]);
    }

@ -475,7 +487,7 @@ mod test {

        let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);

-        assert!(observer.events().is_empty());
+        assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
        assert_eq!(names(&pruned), vec!["chunk1"]);
    }

@ -498,7 +510,7 @@ mod test {

        let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);

-        assert!(observer.events().is_empty());
+        assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
        assert_eq!(names(&pruned), vec!["chunk1"]);
    }

@ -546,7 +558,15 @@ mod test {

        let pruned = prune_chunks(&observer, schema, chunks, &predicate);

-        assert_eq!(observer.events(), vec!["chunk1: Pruned"]);
+        assert_eq!(
+            observer.events(),
+            vec![
+                "chunk1: Pruned",
+                "chunk2: Not pruned",
+                "chunk3: Not pruned",
+                "chunk4: Not pruned"
+            ]
+        );
        assert_eq!(names(&pruned), vec!["chunk2", "chunk3", "chunk4"]);
    }

@ -605,7 +625,14 @@ mod test {

        assert_eq!(
            observer.events(),
-            vec!["chunk1: Pruned", "chunk3: Pruned", "chunk6: Pruned"]
+            vec![
+                "chunk1: Pruned",
+                "chunk2: Not pruned",
+                "chunk3: Pruned",
+                "chunk4: Not pruned",
+                "chunk5: Not pruned",
+                "chunk6: Pruned"
+            ]
        );
        assert_eq!(names(&pruned), vec!["chunk2", "chunk4", "chunk5"]);
    }
@ -643,7 +670,10 @@ mod test {

        let pruned = prune_chunks(&observer, schema, chunks, &predicate);

-        assert_eq!(observer.events(), vec!["chunk1: Pruned",]);
+        assert_eq!(
+            observer.events(),
+            vec!["chunk1: Pruned", "chunk2: Not pruned", "chunk3: Not pruned"]
+        );
        assert_eq!(names(&pruned), vec!["chunk2", "chunk3"]);
    }

@ -702,7 +732,10 @@ mod test {

        let pruned = prune_chunks(&observer, schema, chunks, &predicate);

-        assert_eq!(observer.events(), vec!["chunk2: Pruned", "chunk3: Pruned"]);
+        assert_eq!(
+            observer.events(),
+            vec!["chunk1: Not pruned", "chunk2: Pruned", "chunk3: Pruned"]
+        );
        assert_eq!(names(&pruned), vec!["chunk1"]);
    }

@ -764,7 +797,14 @@ mod test {

        assert_eq!(
            observer.events(),
-            vec!["chunk2: Pruned", "chunk3: Pruned", "chunk5: Pruned"]
+            vec![
+                "chunk1: Not pruned",
+                "chunk2: Pruned",
+                "chunk3: Pruned",
+                "chunk4: Not pruned",
+                "chunk5: Pruned",
+                "chunk6: Not pruned"
+            ]
        );
        assert_eq!(names(&pruned), vec!["chunk1", "chunk4", "chunk6"]);
    }
@ -795,10 +835,18 @@ mod test {
                .push(format!("{}: Pruned", chunk.table_name()))
        }

-        fn could_not_prune(&self, reason: NotPrunedReason, _chunk: &dyn QueryChunk) {
+        fn was_not_pruned(&self, chunk: &dyn QueryChunk) {
            self.events
                .borrow_mut()
-                .push(format!("Could not prune: {}", reason))
+                .push(format!("{}: Not pruned", chunk.table_name()))
+        }
+
+        fn could_not_prune(&self, reason: NotPrunedReason, chunk: &dyn QueryChunk) {
+            self.events.borrow_mut().push(format!(
+                "{}: Could not prune: {}",
+                chunk.table_name(),
+                reason
+            ))
        }
    }
 }
--- a/querier/src/namespace/query_access.rs
+++ b/querier/src/namespace/query_access.rs
@ -387,12 +387,36 @@ mod tests {
                .unwrap(),
            &Observation::U64Counter(0),
        );
+        assert_eq!(
+            reporter
+                .metric("query_pruner_chunks")
+                .unwrap()
+                .observation(&[("result", "not_pruned")])
+                .unwrap(),
+            &Observation::U64Counter(0),
+        );
+        assert_eq!(
+            reporter
+                .metric("query_pruner_rows")
+                .unwrap()
+                .observation(&[("result", "not_pruned")])
+                .unwrap(),
+            &Observation::U64Counter(0),
+        );
+        assert_eq!(
+            reporter
+                .metric("query_pruner_bytes")
+                .unwrap()
+                .observation(&[("result", "not_pruned")])
+                .unwrap(),
+            &Observation::U64Counter(0),
+        );
        assert_eq!(
            reporter
                .metric("query_pruner_chunks")
                .unwrap()
                .observation(&[
-                    ("result", "not_pruned"),
+                    ("result", "could_not_prune"),
                    ("reason", "No expression on predicate")
                ])
                .unwrap(),
@ -403,7 +427,7 @@ mod tests {
                .metric("query_pruner_rows")
                .unwrap()
                .observation(&[
-                    ("result", "not_pruned"),
+                    ("result", "could_not_prune"),
                    ("reason", "No expression on predicate")
                ])
                .unwrap(),
@ -413,7 +437,7 @@ mod tests {
            .metric("query_pruner_bytes")
            .unwrap()
            .observation(&[
-                ("result", "not_pruned"),
+                ("result", "could_not_prune"),
                ("reason", "No expression on predicate"),
            ])
            .unwrap()
--- a/querier/src/table/query_access.rs
+++ b/querier/src/table/query_access.rs
@ -141,22 +141,30 @@ impl PruningObserver for MetricPruningObserver {
            .inc(chunk_estimate_size(chunk) as u64);
    }

+    fn was_not_pruned(&self, chunk: &dyn QueryChunk) {
+        self.metrics.chunks_not_pruned.inc(1);
+        self.metrics.rows_not_pruned.inc(chunk_rows(chunk) as u64);
+        self.metrics
+            .bytes_not_pruned
+            .inc(chunk_estimate_size(chunk) as u64);
+    }
+
    fn could_not_prune(&self, reason: NotPrunedReason, chunk: &dyn QueryChunk) {
        let (chunks, rows, bytes) = match reason {
            NotPrunedReason::NoExpressionOnPredicate => (
-                &self.metrics.chunks_not_pruned_no_expression,
-                &self.metrics.rows_not_pruned_no_expression,
-                &self.metrics.bytes_not_pruned_no_expression,
+                &self.metrics.chunks_could_not_prune_no_expression,
+                &self.metrics.rows_could_not_prune_no_expression,
+                &self.metrics.bytes_could_not_prune_no_expression,
            ),
            NotPrunedReason::CanNotCreatePruningPredicate => (
-                &self.metrics.chunks_not_pruned_cannot_create_predicate,
-                &self.metrics.rows_not_pruned_cannot_create_predicate,
-                &self.metrics.bytes_not_pruned_cannot_create_predicate,
+                &self.metrics.chunks_could_not_prune_cannot_create_predicate,
+                &self.metrics.rows_could_not_prune_cannot_create_predicate,
+                &self.metrics.bytes_could_not_prune_cannot_create_predicate,
            ),
            NotPrunedReason::DataFusionPruningFailed => (
-                &self.metrics.chunks_not_pruned_df,
-                &self.metrics.rows_not_pruned_df,
-                &self.metrics.bytes_not_pruned_df,
+                &self.metrics.chunks_could_not_prune_df,
+                &self.metrics.rows_could_not_prune_df,
+                &self.metrics.bytes_could_not_prune_df,
            ),
        };

@ -170,21 +178,24 @@ impl PruningObserver for MetricPruningObserver {
 pub struct PruneMetrics {
    // number of chunks
    chunks_pruned: U64Counter,
-    chunks_not_pruned_no_expression: U64Counter,
-    chunks_not_pruned_cannot_create_predicate: U64Counter,
-    chunks_not_pruned_df: U64Counter,
+    chunks_not_pruned: U64Counter,
+    chunks_could_not_prune_no_expression: U64Counter,
+    chunks_could_not_prune_cannot_create_predicate: U64Counter,
+    chunks_could_not_prune_df: U64Counter,

    // number of rows
    rows_pruned: U64Counter,
-    rows_not_pruned_no_expression: U64Counter,
-    rows_not_pruned_cannot_create_predicate: U64Counter,
-    rows_not_pruned_df: U64Counter,
+    rows_not_pruned: U64Counter,
+    rows_could_not_prune_no_expression: U64Counter,
+    rows_could_not_prune_cannot_create_predicate: U64Counter,
+    rows_could_not_prune_df: U64Counter,

    // size in bytes
    bytes_pruned: U64Counter,
-    bytes_not_pruned_no_expression: U64Counter,
-    bytes_not_pruned_cannot_create_predicate: U64Counter,
-    bytes_not_pruned_df: U64Counter,
+    bytes_not_pruned: U64Counter,
+    bytes_could_not_prune_no_expression: U64Counter,
+    bytes_could_not_prune_cannot_create_predicate: U64Counter,
+    bytes_could_not_prune_df: U64Counter,
 }

 impl PruneMetrics {
@ -194,19 +205,20 @@ impl PruneMetrics {
            "Number of chunks seen by the statistics-based chunk pruner",
        );
        let chunks_pruned = chunks.recorder(&[("result", "pruned")]);
-        let chunks_not_pruned_no_expression = chunks.recorder(&[
-            ("result", "not_pruned"),
+        let chunks_not_pruned = chunks.recorder(&[("result", "not_pruned")]);
+        let chunks_could_not_prune_no_expression = chunks.recorder(&[
+            ("result", "could_not_prune"),
            ("reason", NotPrunedReason::NoExpressionOnPredicate.name()),
        ]);
-        let chunks_not_pruned_cannot_create_predicate = chunks.recorder(&[
-            ("result", "not_pruned"),
+        let chunks_could_not_prune_cannot_create_predicate = chunks.recorder(&[
+            ("result", "could_not_prune"),
            (
                "reason",
                NotPrunedReason::CanNotCreatePruningPredicate.name(),
            ),
        ]);
-        let chunks_not_pruned_df = chunks.recorder(&[
-            ("result", "not_pruned"),
+        let chunks_could_not_prune_df = chunks.recorder(&[
+            ("result", "could_not_prune"),
            ("reason", NotPrunedReason::DataFusionPruningFailed.name()),
        ]);

@ -215,19 +227,20 @@ impl PruneMetrics {
            "Number of rows seen by the statistics-based chunk pruner",
        );
        let rows_pruned = rows.recorder(&[("result", "pruned")]);
-        let rows_not_pruned_no_expression = rows.recorder(&[
-            ("result", "not_pruned"),
+        let rows_not_pruned = rows.recorder(&[("result", "not_pruned")]);
+        let rows_could_not_prune_no_expression = rows.recorder(&[
+            ("result", "could_not_prune"),
            ("reason", NotPrunedReason::NoExpressionOnPredicate.name()),
        ]);
-        let rows_not_pruned_cannot_create_predicate = rows.recorder(&[
-            ("result", "not_pruned"),
+        let rows_could_not_prune_cannot_create_predicate = rows.recorder(&[
+            ("result", "could_not_prune"),
            (
                "reason",
                NotPrunedReason::CanNotCreatePruningPredicate.name(),
            ),
        ]);
-        let rows_not_pruned_df = rows.recorder(&[
-            ("result", "not_pruned"),
+        let rows_could_not_prune_df = rows.recorder(&[
+            ("result", "could_not_prune"),
            ("reason", NotPrunedReason::DataFusionPruningFailed.name()),
        ]);

@ -236,35 +249,39 @@ impl PruneMetrics {
            "Size (in bytes) of chunks seen by the statistics-based chunk pruner",
        );
        let bytes_pruned = bytes.recorder(&[("result", "pruned")]);
-        let bytes_not_pruned_no_expression = bytes.recorder(&[
-            ("result", "not_pruned"),
+        let bytes_not_pruned = bytes.recorder(&[("result", "not_pruned")]);
+        let bytes_could_not_prune_no_expression = bytes.recorder(&[
+            ("result", "could_not_prune"),
            ("reason", NotPrunedReason::NoExpressionOnPredicate.name()),
        ]);
-        let bytes_not_pruned_cannot_create_predicate = bytes.recorder(&[
-            ("result", "not_pruned"),
+        let bytes_could_not_prune_cannot_create_predicate = bytes.recorder(&[
+            ("result", "could_not_prune"),
            (
                "reason",
                NotPrunedReason::CanNotCreatePruningPredicate.name(),
            ),
        ]);
-        let bytes_not_pruned_df = bytes.recorder(&[
-            ("result", "not_pruned"),
+        let bytes_could_not_prune_df = bytes.recorder(&[
+            ("result", "could_not_prune"),
            ("reason", NotPrunedReason::DataFusionPruningFailed.name()),
        ]);

        Self {
            chunks_pruned,
-            chunks_not_pruned_no_expression,
-            chunks_not_pruned_cannot_create_predicate,
-            chunks_not_pruned_df,
+            chunks_not_pruned,
+            chunks_could_not_prune_no_expression,
+            chunks_could_not_prune_cannot_create_predicate,
+            chunks_could_not_prune_df,
            rows_pruned,
-            rows_not_pruned_no_expression,
-            rows_not_pruned_cannot_create_predicate,
-            rows_not_pruned_df,
+            rows_not_pruned,
+            rows_could_not_prune_no_expression,
+            rows_could_not_prune_cannot_create_predicate,
+            rows_could_not_prune_df,
            bytes_pruned,
-            bytes_not_pruned_no_expression,
-            bytes_not_pruned_cannot_create_predicate,
-            bytes_not_pruned_df,
+            bytes_not_pruned,
+            bytes_could_not_prune_no_expression,
+            bytes_could_not_prune_cannot_create_predicate,
+            bytes_could_not_prune_df,
        }
    }
 }