refactor: display column names for predicates in EXPLAIN for metadata cache (#25598)

pull/25599/head
Trevor Hilton 2024-11-28 11:18:12 -05:00 committed by GitHub
parent 13ab41fa1f
commit 81715fbfea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 18 deletions

View File

@ -490,7 +490,7 @@ mod tests {
"| us-east | b |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 IN (us-east)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 IN (us-east)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -504,7 +504,7 @@ mod tests {
"| us-east | a |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 IN (us-east)], [1 IN (a)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 IN (us-east)], [host@1 IN (a)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -519,7 +519,7 @@ mod tests {
"| us-east | b |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 IN (us-east)], [1 IN (a,b)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 IN (us-east)], [host@1 IN (a,b)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -533,7 +533,7 @@ mod tests {
"| us-east | b |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 IN (us-east)], [1 NOT IN (a)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 IN (us-east)], [host@1 NOT IN (a)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -552,7 +552,7 @@ mod tests {
"| us-west | d |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 IN (ca-cent,ca-east,us-east,us-west)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 IN (ca-cent,ca-east,us-east,us-west)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -571,7 +571,7 @@ mod tests {
"| eu-west | l |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 NOT IN (ca-cent,ca-east,us-east,us-west)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 NOT IN (ca-cent,ca-east,us-east,us-west)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -587,7 +587,7 @@ mod tests {
"| us-east | b |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 IN (ca-east,us-east)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 IN (ca-east,us-east)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -602,7 +602,7 @@ mod tests {
"| us-west | d |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[1 IN (d,e)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[host@1 IN (d,e)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -632,7 +632,7 @@ mod tests {
"| us-east | b |",
"+---------+------+",
],
explain_contains: "MetaCacheExec: predicates=[[0 IN (us-east)], [1 IN (a,b)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
explain_contains: "MetaCacheExec: predicates=[[region@0 IN (us-east)], [host@1 IN (a,b)]] inner=MemoryExec: partitions=1, partition_sizes=[1]",
use_sorted_assert: false,
},
TestCase {
@ -783,12 +783,20 @@ mod tests {
// NOTE(hiltontj): this probably can be done a better way?
// The EXPLAIN output will have two columns, the one we are interested in that contains
// the details of the MetaCacheExec is called `plan`...
assert!(explain
.column_by_name("plan")
.unwrap()
.as_string::<i32>()
.iter()
.any(|plan| plan.is_some_and(|plan| plan.contains(tc.explain_contains))),);
assert!(
explain
.column_by_name("plan")
.unwrap()
.as_string::<i32>()
.iter()
.any(|plan| plan.is_some_and(|plan| plan.contains(tc.explain_contains))),
"explain plan did not contain the expression:\n\n\
{expected}\n\n\
instead, the output was:\n\n\
{actual:#?}",
expected = tc.explain_contains,
actual = explain.column_by_name("plan").unwrap().as_string::<i32>(),
);
}
}
}

View File

@ -86,8 +86,13 @@ impl TableProvider for MetaCacheFunctionProvider {
} else {
(vec![], None)
};
let mut exec =
MetaCacheExec::try_new(predicates, &[batches], self.schema(), projection.cloned())?;
let mut exec = MetaCacheExec::try_new(
predicates,
Arc::clone(&self.table_def),
&[batches],
self.schema(),
projection.cloned(),
)?;
let show_sizes = ctx.config_options().explain.show_sizes;
exec = exec.with_show_sizes(show_sizes);
@ -272,12 +277,14 @@ impl TableFunctionImpl for MetaCacheFunction {
#[derive(Debug)]
struct MetaCacheExec {
inner: MemoryExec,
table_def: Arc<TableDefinition>,
predicates: Option<IndexMap<ColumnId, Predicate>>,
}
impl MetaCacheExec {
fn try_new(
predicates: Option<IndexMap<ColumnId, Predicate>>,
table_def: Arc<TableDefinition>,
partitions: &[Vec<RecordBatch>],
schema: SchemaRef,
projection: Option<Vec<usize>>,
@ -285,6 +292,7 @@ impl MetaCacheExec {
Ok(Self {
inner: MemoryExec::try_new(partitions, schema, projection)?,
predicates,
table_def,
})
}
@ -305,7 +313,8 @@ impl DisplayAs for MetaCacheExec {
write!(f, " predicates=[")?;
let mut p_iter = predicates.iter();
while let Some((col_id, predicate)) = p_iter.next() {
write!(f, "[{col_id} {predicate}]")?;
let col_name = self.table_def.column_id_to_name(col_id).unwrap_or_default();
write!(f, "[{col_name}@{col_id} {predicate}]")?;
if p_iter.size_hint().0 > 0 {
write!(f, ", ")?;
}