refactor: query code clean ups (#5612)

* refactor: remove dead code

* refactor: `Deduplicator::build_scan_plan` consumes `self`

There is no good reason to use the same `Deduplicator` twice. In
contrast I'm quite sure that this would lead to nasty bugs, because
`split_overlapped_chunks` exists early in some cases so the 2nd plan
would have old and new chunks mixed together.
pull/24376/head
Marco Neumann 2022-09-12 13:00:56 +00:00 committed by GitHub
parent 5936941784
commit caa0dfd1e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 18 deletions

View File

@ -213,11 +213,6 @@ pub(crate) async fn query(
// Build logical plan for filtering data
// Note that this query will also apply the delete predicates that go with the QueryableBatch
let mut expr = vec![];
if let Some(filter_expr) = predicate.filter_expr() {
expr.push(filter_expr);
}
// TODO: Since we have different type of servers (router,
// ingester, compactor, and querier), we may want to add more
// types into the ExecutorType to have better log and resource

View File

@ -239,7 +239,7 @@ impl TableProvider for ChunkTableProvider {
// the scan for the plans to be correct, they are an extra
// optimization for providers which can offer them
let predicate = Predicate::default().with_pushdown_exprs(filters);
let mut deduplicate = Deduplicater::new(self.ctx.child_ctx("deduplicator"));
let deduplicate = Deduplicater::new(self.ctx.child_ctx("deduplicator"));
let plan = deduplicate.build_scan_plan(
Arc::clone(&self.table_name),
scan_schema,
@ -268,13 +268,13 @@ impl TableProvider for ChunkTableProvider {
/// A deduplicater that deduplicate the duplicated data during scan execution
pub(crate) struct Deduplicater {
/// a vector of a vector of overlapped chunks
pub overlapped_chunks_set: Vec<Vec<Arc<dyn QueryChunk>>>,
overlapped_chunks_set: Vec<Vec<Arc<dyn QueryChunk>>>,
/// a vector of non-overlapped chunks each have duplicates in itself
pub in_chunk_duplicates_chunks: Vec<Arc<dyn QueryChunk>>,
in_chunk_duplicates_chunks: Vec<Arc<dyn QueryChunk>>,
/// a vector of non-overlapped and non-duplicates chunks
pub no_duplicates_chunks: Vec<Arc<dyn QueryChunk>>,
no_duplicates_chunks: Vec<Arc<dyn QueryChunk>>,
/// schema interner
schema_interner: SchemaInterner,
@ -370,7 +370,7 @@ impl Deduplicater {
///
///```
pub(crate) fn build_scan_plan(
&mut self,
mut self,
table_name: Arc<str>,
output_schema: Arc<Schema>,
chunks: Vec<Arc<dyn QueryChunk>>,
@ -2303,7 +2303,7 @@ mod test {
];
assert_batches_sorted_eq!(&expected, &raw_data(&chunks).await);
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(Arc::from("t"), schema, chunks, Predicate::default(), None)
.unwrap();
@ -2360,7 +2360,7 @@ mod test {
];
assert_batches_eq!(&expected, &raw_data(&chunks).await);
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(Arc::from("t"), schema, chunks, Predicate::default(), None)
.unwrap();
@ -2434,7 +2434,7 @@ mod test {
.build()
.unwrap();
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(
Arc::from("t"),
@ -2534,7 +2534,7 @@ mod test {
];
assert_batches_eq!(&expected, &raw_data(&chunks).await);
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(Arc::from("t"), schema, chunks, Predicate::default(), None)
.unwrap();
@ -2686,7 +2686,7 @@ mod test {
assert_batches_eq!(&expected, &raw_data(&chunks).await);
// Create scan plan whose output data is only partially sorted
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(Arc::from("t"), schema, chunks, Predicate::default(), None)
.unwrap();
@ -2889,7 +2889,7 @@ mod test {
assert_batches_eq!(&expected, &raw_data(&chunks).await);
let sort_key = compute_sort_key_for_chunks(&schema, &chunks);
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(
Arc::from("t"),
@ -3022,7 +3022,7 @@ mod test {
let schema = chunk1.schema();
let chunks = vec![chunk1, chunk2, chunk3, chunk4];
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(
Arc::from("t"),
@ -3209,7 +3209,7 @@ mod test {
chunk1_1, chunk1_2, chunk1_3, chunk1_4, chunk2_1, chunk2_2, chunk2_3, chunk2_4,
chunk2_5, chunk2_6,
];
let mut deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let deduplicator = Deduplicater::new(IOxSessionContext::with_testing());
let plan = deduplicator
.build_scan_plan(
Arc::from("t"),