fix: chunk pruning stats (#5319)
- emit a warning if we cannot even attempt to prune chunks due to an error. This is always either a missing feature or a bug (even though it does not impact correctness but _only_ performance). Also see https://github.com/influxdata/conductor/issues/1107 - change metrics to clearly differentiate between "could not prune" and "not pruned" - add new "not pruned" observer hook (this was missing for some reason, the "pruned" hook existed though)pull/24376/head
parent
4898a7f1e3
commit
fc1870ff76
|
|
@ -12,7 +12,7 @@ use datafusion::{
|
|||
logical_plan::Column,
|
||||
physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use observability_deps::tracing::{debug, trace, warn};
|
||||
use predicate::Predicate;
|
||||
use query_functions::group_by::Aggregate;
|
||||
use schema::Schema;
|
||||
|
|
@ -52,10 +52,19 @@ impl std::fmt::Display for NotPrunedReason {
|
|||
|
||||
/// Something that cares to be notified when pruning of chunks occurs
|
||||
pub trait PruningObserver {
|
||||
/// Called when the specified chunk was pruned from observation
|
||||
/// Called when the specified chunk was pruned from observation.
|
||||
fn was_pruned(&self, _chunk: &dyn QueryChunk) {}
|
||||
|
||||
/// Called when no pruning can happen at all for some reason
|
||||
/// Called when a chunk was not pruned.
|
||||
fn was_not_pruned(&self, _chunk: &dyn QueryChunk) {}
|
||||
|
||||
/// Called when no pruning can happen at all for some reason.
|
||||
///
|
||||
/// Since pruning is optional and _only_ improves performance but its lack does not affect correctness, this will
|
||||
/// NOT lead to a query error.
|
||||
///
|
||||
/// In this case, statistical pruning will not happen and neither [`was_pruned`](Self::was_pruned) nor
|
||||
/// [`was_not_pruned`](Self::was_not_pruned) will be called.
|
||||
fn could_not_prune(&self, _reason: NotPrunedReason, _chunk: &dyn QueryChunk) {}
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +107,7 @@ where
|
|||
chunk.as_ref(),
|
||||
);
|
||||
}
|
||||
trace!(%e, ?filter_expr, "Can not create pruning predicate");
|
||||
warn!(%e, ?filter_expr, "Can not create pruning predicate");
|
||||
return chunks;
|
||||
}
|
||||
};
|
||||
|
|
@ -114,7 +123,7 @@ where
|
|||
for chunk in &chunks {
|
||||
observer.could_not_prune(NotPrunedReason::DataFusionPruningFailed, chunk.as_ref());
|
||||
}
|
||||
trace!(%e, ?filter_expr, "DataFusion pruning failed");
|
||||
warn!(%e, ?filter_expr, "DataFusion pruning failed");
|
||||
return chunks;
|
||||
}
|
||||
};
|
||||
|
|
@ -124,7 +133,10 @@ where
|
|||
let mut pruned_chunks = Vec::with_capacity(chunks.len());
|
||||
for (chunk, keep) in chunks.into_iter().zip(results) {
|
||||
match keep {
|
||||
true => pruned_chunks.push(chunk),
|
||||
true => {
|
||||
observer.was_not_pruned(chunk.as_ref());
|
||||
pruned_chunks.push(chunk);
|
||||
}
|
||||
false => {
|
||||
observer.was_pruned(chunk.as_ref());
|
||||
}
|
||||
|
|
@ -287,7 +299,7 @@ mod test {
|
|||
|
||||
assert_eq!(
|
||||
observer.events(),
|
||||
vec!["Could not prune: No expression on predicate"]
|
||||
vec!["chunk1: Could not prune: No expression on predicate"]
|
||||
);
|
||||
assert_eq!(names(&pruned), vec!["chunk1"]);
|
||||
}
|
||||
|
|
@ -412,7 +424,7 @@ mod test {
|
|||
let predicate = Predicate::new().with_expr(col("column1").lt(lit(100.0)));
|
||||
|
||||
let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);
|
||||
assert!(observer.events().is_empty());
|
||||
assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
|
||||
assert_eq!(names(&pruned), vec!["chunk1"]);
|
||||
}
|
||||
|
||||
|
|
@ -433,7 +445,7 @@ mod test {
|
|||
|
||||
let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);
|
||||
|
||||
assert!(observer.events().is_empty());
|
||||
assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
|
||||
assert_eq!(names(&pruned), vec!["chunk1"]);
|
||||
}
|
||||
|
||||
|
|
@ -454,7 +466,7 @@ mod test {
|
|||
|
||||
let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);
|
||||
|
||||
assert!(observer.events().is_empty());
|
||||
assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
|
||||
assert_eq!(names(&pruned), vec!["chunk1"]);
|
||||
}
|
||||
|
||||
|
|
@ -475,7 +487,7 @@ mod test {
|
|||
|
||||
let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);
|
||||
|
||||
assert!(observer.events().is_empty());
|
||||
assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
|
||||
assert_eq!(names(&pruned), vec!["chunk1"]);
|
||||
}
|
||||
|
||||
|
|
@ -498,7 +510,7 @@ mod test {
|
|||
|
||||
let pruned = prune_chunks(&observer, c1.schema(), vec![c1], &predicate);
|
||||
|
||||
assert!(observer.events().is_empty());
|
||||
assert_eq!(observer.events(), vec!["chunk1: Not pruned"]);
|
||||
assert_eq!(names(&pruned), vec!["chunk1"]);
|
||||
}
|
||||
|
||||
|
|
@ -546,7 +558,15 @@ mod test {
|
|||
|
||||
let pruned = prune_chunks(&observer, schema, chunks, &predicate);
|
||||
|
||||
assert_eq!(observer.events(), vec!["chunk1: Pruned"]);
|
||||
assert_eq!(
|
||||
observer.events(),
|
||||
vec![
|
||||
"chunk1: Pruned",
|
||||
"chunk2: Not pruned",
|
||||
"chunk3: Not pruned",
|
||||
"chunk4: Not pruned"
|
||||
]
|
||||
);
|
||||
assert_eq!(names(&pruned), vec!["chunk2", "chunk3", "chunk4"]);
|
||||
}
|
||||
|
||||
|
|
@ -605,7 +625,14 @@ mod test {
|
|||
|
||||
assert_eq!(
|
||||
observer.events(),
|
||||
vec!["chunk1: Pruned", "chunk3: Pruned", "chunk6: Pruned"]
|
||||
vec![
|
||||
"chunk1: Pruned",
|
||||
"chunk2: Not pruned",
|
||||
"chunk3: Pruned",
|
||||
"chunk4: Not pruned",
|
||||
"chunk5: Not pruned",
|
||||
"chunk6: Pruned"
|
||||
]
|
||||
);
|
||||
assert_eq!(names(&pruned), vec!["chunk2", "chunk4", "chunk5"]);
|
||||
}
|
||||
|
|
@ -643,7 +670,10 @@ mod test {
|
|||
|
||||
let pruned = prune_chunks(&observer, schema, chunks, &predicate);
|
||||
|
||||
assert_eq!(observer.events(), vec!["chunk1: Pruned",]);
|
||||
assert_eq!(
|
||||
observer.events(),
|
||||
vec!["chunk1: Pruned", "chunk2: Not pruned", "chunk3: Not pruned"]
|
||||
);
|
||||
assert_eq!(names(&pruned), vec!["chunk2", "chunk3"]);
|
||||
}
|
||||
|
||||
|
|
@ -702,7 +732,10 @@ mod test {
|
|||
|
||||
let pruned = prune_chunks(&observer, schema, chunks, &predicate);
|
||||
|
||||
assert_eq!(observer.events(), vec!["chunk2: Pruned", "chunk3: Pruned"]);
|
||||
assert_eq!(
|
||||
observer.events(),
|
||||
vec!["chunk1: Not pruned", "chunk2: Pruned", "chunk3: Pruned"]
|
||||
);
|
||||
assert_eq!(names(&pruned), vec!["chunk1"]);
|
||||
}
|
||||
|
||||
|
|
@ -764,7 +797,14 @@ mod test {
|
|||
|
||||
assert_eq!(
|
||||
observer.events(),
|
||||
vec!["chunk2: Pruned", "chunk3: Pruned", "chunk5: Pruned"]
|
||||
vec![
|
||||
"chunk1: Not pruned",
|
||||
"chunk2: Pruned",
|
||||
"chunk3: Pruned",
|
||||
"chunk4: Not pruned",
|
||||
"chunk5: Pruned",
|
||||
"chunk6: Not pruned"
|
||||
]
|
||||
);
|
||||
assert_eq!(names(&pruned), vec!["chunk1", "chunk4", "chunk6"]);
|
||||
}
|
||||
|
|
@ -795,10 +835,18 @@ mod test {
|
|||
.push(format!("{}: Pruned", chunk.table_name()))
|
||||
}
|
||||
|
||||
fn could_not_prune(&self, reason: NotPrunedReason, _chunk: &dyn QueryChunk) {
|
||||
fn was_not_pruned(&self, chunk: &dyn QueryChunk) {
|
||||
self.events
|
||||
.borrow_mut()
|
||||
.push(format!("Could not prune: {}", reason))
|
||||
.push(format!("{}: Not pruned", chunk.table_name()))
|
||||
}
|
||||
|
||||
fn could_not_prune(&self, reason: NotPrunedReason, chunk: &dyn QueryChunk) {
|
||||
self.events.borrow_mut().push(format!(
|
||||
"{}: Could not prune: {}",
|
||||
chunk.table_name(),
|
||||
reason
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -387,12 +387,36 @@ mod tests {
|
|||
.unwrap(),
|
||||
&Observation::U64Counter(0),
|
||||
);
|
||||
assert_eq!(
|
||||
reporter
|
||||
.metric("query_pruner_chunks")
|
||||
.unwrap()
|
||||
.observation(&[("result", "not_pruned")])
|
||||
.unwrap(),
|
||||
&Observation::U64Counter(0),
|
||||
);
|
||||
assert_eq!(
|
||||
reporter
|
||||
.metric("query_pruner_rows")
|
||||
.unwrap()
|
||||
.observation(&[("result", "not_pruned")])
|
||||
.unwrap(),
|
||||
&Observation::U64Counter(0),
|
||||
);
|
||||
assert_eq!(
|
||||
reporter
|
||||
.metric("query_pruner_bytes")
|
||||
.unwrap()
|
||||
.observation(&[("result", "not_pruned")])
|
||||
.unwrap(),
|
||||
&Observation::U64Counter(0),
|
||||
);
|
||||
assert_eq!(
|
||||
reporter
|
||||
.metric("query_pruner_chunks")
|
||||
.unwrap()
|
||||
.observation(&[
|
||||
("result", "not_pruned"),
|
||||
("result", "could_not_prune"),
|
||||
("reason", "No expression on predicate")
|
||||
])
|
||||
.unwrap(),
|
||||
|
|
@ -403,7 +427,7 @@ mod tests {
|
|||
.metric("query_pruner_rows")
|
||||
.unwrap()
|
||||
.observation(&[
|
||||
("result", "not_pruned"),
|
||||
("result", "could_not_prune"),
|
||||
("reason", "No expression on predicate")
|
||||
])
|
||||
.unwrap(),
|
||||
|
|
@ -413,7 +437,7 @@ mod tests {
|
|||
.metric("query_pruner_bytes")
|
||||
.unwrap()
|
||||
.observation(&[
|
||||
("result", "not_pruned"),
|
||||
("result", "could_not_prune"),
|
||||
("reason", "No expression on predicate"),
|
||||
])
|
||||
.unwrap()
|
||||
|
|
|
|||
|
|
@ -141,22 +141,30 @@ impl PruningObserver for MetricPruningObserver {
|
|||
.inc(chunk_estimate_size(chunk) as u64);
|
||||
}
|
||||
|
||||
fn was_not_pruned(&self, chunk: &dyn QueryChunk) {
|
||||
self.metrics.chunks_not_pruned.inc(1);
|
||||
self.metrics.rows_not_pruned.inc(chunk_rows(chunk) as u64);
|
||||
self.metrics
|
||||
.bytes_not_pruned
|
||||
.inc(chunk_estimate_size(chunk) as u64);
|
||||
}
|
||||
|
||||
fn could_not_prune(&self, reason: NotPrunedReason, chunk: &dyn QueryChunk) {
|
||||
let (chunks, rows, bytes) = match reason {
|
||||
NotPrunedReason::NoExpressionOnPredicate => (
|
||||
&self.metrics.chunks_not_pruned_no_expression,
|
||||
&self.metrics.rows_not_pruned_no_expression,
|
||||
&self.metrics.bytes_not_pruned_no_expression,
|
||||
&self.metrics.chunks_could_not_prune_no_expression,
|
||||
&self.metrics.rows_could_not_prune_no_expression,
|
||||
&self.metrics.bytes_could_not_prune_no_expression,
|
||||
),
|
||||
NotPrunedReason::CanNotCreatePruningPredicate => (
|
||||
&self.metrics.chunks_not_pruned_cannot_create_predicate,
|
||||
&self.metrics.rows_not_pruned_cannot_create_predicate,
|
||||
&self.metrics.bytes_not_pruned_cannot_create_predicate,
|
||||
&self.metrics.chunks_could_not_prune_cannot_create_predicate,
|
||||
&self.metrics.rows_could_not_prune_cannot_create_predicate,
|
||||
&self.metrics.bytes_could_not_prune_cannot_create_predicate,
|
||||
),
|
||||
NotPrunedReason::DataFusionPruningFailed => (
|
||||
&self.metrics.chunks_not_pruned_df,
|
||||
&self.metrics.rows_not_pruned_df,
|
||||
&self.metrics.bytes_not_pruned_df,
|
||||
&self.metrics.chunks_could_not_prune_df,
|
||||
&self.metrics.rows_could_not_prune_df,
|
||||
&self.metrics.bytes_could_not_prune_df,
|
||||
),
|
||||
};
|
||||
|
||||
|
|
@ -170,21 +178,24 @@ impl PruningObserver for MetricPruningObserver {
|
|||
pub struct PruneMetrics {
|
||||
// number of chunks
|
||||
chunks_pruned: U64Counter,
|
||||
chunks_not_pruned_no_expression: U64Counter,
|
||||
chunks_not_pruned_cannot_create_predicate: U64Counter,
|
||||
chunks_not_pruned_df: U64Counter,
|
||||
chunks_not_pruned: U64Counter,
|
||||
chunks_could_not_prune_no_expression: U64Counter,
|
||||
chunks_could_not_prune_cannot_create_predicate: U64Counter,
|
||||
chunks_could_not_prune_df: U64Counter,
|
||||
|
||||
// number of rows
|
||||
rows_pruned: U64Counter,
|
||||
rows_not_pruned_no_expression: U64Counter,
|
||||
rows_not_pruned_cannot_create_predicate: U64Counter,
|
||||
rows_not_pruned_df: U64Counter,
|
||||
rows_not_pruned: U64Counter,
|
||||
rows_could_not_prune_no_expression: U64Counter,
|
||||
rows_could_not_prune_cannot_create_predicate: U64Counter,
|
||||
rows_could_not_prune_df: U64Counter,
|
||||
|
||||
// size in bytes
|
||||
bytes_pruned: U64Counter,
|
||||
bytes_not_pruned_no_expression: U64Counter,
|
||||
bytes_not_pruned_cannot_create_predicate: U64Counter,
|
||||
bytes_not_pruned_df: U64Counter,
|
||||
bytes_not_pruned: U64Counter,
|
||||
bytes_could_not_prune_no_expression: U64Counter,
|
||||
bytes_could_not_prune_cannot_create_predicate: U64Counter,
|
||||
bytes_could_not_prune_df: U64Counter,
|
||||
}
|
||||
|
||||
impl PruneMetrics {
|
||||
|
|
@ -194,19 +205,20 @@ impl PruneMetrics {
|
|||
"Number of chunks seen by the statistics-based chunk pruner",
|
||||
);
|
||||
let chunks_pruned = chunks.recorder(&[("result", "pruned")]);
|
||||
let chunks_not_pruned_no_expression = chunks.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let chunks_not_pruned = chunks.recorder(&[("result", "not_pruned")]);
|
||||
let chunks_could_not_prune_no_expression = chunks.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
("reason", NotPrunedReason::NoExpressionOnPredicate.name()),
|
||||
]);
|
||||
let chunks_not_pruned_cannot_create_predicate = chunks.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let chunks_could_not_prune_cannot_create_predicate = chunks.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
(
|
||||
"reason",
|
||||
NotPrunedReason::CanNotCreatePruningPredicate.name(),
|
||||
),
|
||||
]);
|
||||
let chunks_not_pruned_df = chunks.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let chunks_could_not_prune_df = chunks.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
("reason", NotPrunedReason::DataFusionPruningFailed.name()),
|
||||
]);
|
||||
|
||||
|
|
@ -215,19 +227,20 @@ impl PruneMetrics {
|
|||
"Number of rows seen by the statistics-based chunk pruner",
|
||||
);
|
||||
let rows_pruned = rows.recorder(&[("result", "pruned")]);
|
||||
let rows_not_pruned_no_expression = rows.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let rows_not_pruned = rows.recorder(&[("result", "not_pruned")]);
|
||||
let rows_could_not_prune_no_expression = rows.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
("reason", NotPrunedReason::NoExpressionOnPredicate.name()),
|
||||
]);
|
||||
let rows_not_pruned_cannot_create_predicate = rows.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let rows_could_not_prune_cannot_create_predicate = rows.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
(
|
||||
"reason",
|
||||
NotPrunedReason::CanNotCreatePruningPredicate.name(),
|
||||
),
|
||||
]);
|
||||
let rows_not_pruned_df = rows.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let rows_could_not_prune_df = rows.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
("reason", NotPrunedReason::DataFusionPruningFailed.name()),
|
||||
]);
|
||||
|
||||
|
|
@ -236,35 +249,39 @@ impl PruneMetrics {
|
|||
"Size (in bytes) of chunks seen by the statistics-based chunk pruner",
|
||||
);
|
||||
let bytes_pruned = bytes.recorder(&[("result", "pruned")]);
|
||||
let bytes_not_pruned_no_expression = bytes.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let bytes_not_pruned = bytes.recorder(&[("result", "not_pruned")]);
|
||||
let bytes_could_not_prune_no_expression = bytes.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
("reason", NotPrunedReason::NoExpressionOnPredicate.name()),
|
||||
]);
|
||||
let bytes_not_pruned_cannot_create_predicate = bytes.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let bytes_could_not_prune_cannot_create_predicate = bytes.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
(
|
||||
"reason",
|
||||
NotPrunedReason::CanNotCreatePruningPredicate.name(),
|
||||
),
|
||||
]);
|
||||
let bytes_not_pruned_df = bytes.recorder(&[
|
||||
("result", "not_pruned"),
|
||||
let bytes_could_not_prune_df = bytes.recorder(&[
|
||||
("result", "could_not_prune"),
|
||||
("reason", NotPrunedReason::DataFusionPruningFailed.name()),
|
||||
]);
|
||||
|
||||
Self {
|
||||
chunks_pruned,
|
||||
chunks_not_pruned_no_expression,
|
||||
chunks_not_pruned_cannot_create_predicate,
|
||||
chunks_not_pruned_df,
|
||||
chunks_not_pruned,
|
||||
chunks_could_not_prune_no_expression,
|
||||
chunks_could_not_prune_cannot_create_predicate,
|
||||
chunks_could_not_prune_df,
|
||||
rows_pruned,
|
||||
rows_not_pruned_no_expression,
|
||||
rows_not_pruned_cannot_create_predicate,
|
||||
rows_not_pruned_df,
|
||||
rows_not_pruned,
|
||||
rows_could_not_prune_no_expression,
|
||||
rows_could_not_prune_cannot_create_predicate,
|
||||
rows_could_not_prune_df,
|
||||
bytes_pruned,
|
||||
bytes_not_pruned_no_expression,
|
||||
bytes_not_pruned_cannot_create_predicate,
|
||||
bytes_not_pruned_df,
|
||||
bytes_not_pruned,
|
||||
bytes_could_not_prune_no_expression,
|
||||
bytes_could_not_prune_cannot_create_predicate,
|
||||
bytes_could_not_prune_df,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue