From 8a8de19fb5935045b09477dd71dd51a0d38b2c65 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Date: Wed, 2 Feb 2022 16:40:30 +0000
Subject: [PATCH] feat: combine non-overlapping chunks without deletes

---
 query/src/provider.rs                    | 17 ++++++++---------
 query_tests/cases/in/duplicates.expected | 16 ++++------------
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/query/src/provider.rs b/query/src/provider.rs
index c6deaef89d..966f661cd1 100644
--- a/query/src/provider.rs
+++ b/query/src/provider.rs
@@ -441,15 +441,14 @@ impl<C: QueryChunk + 'static> Deduplicater<C> {
             }
 
             // Go over non_duplicates_chunks, build a plan for it
-            for no_duplicates_chunk in self.no_duplicates_chunks.to_vec() {
-                plans.push(Self::build_plan_for_non_duplicates_chunk(
-                    Arc::clone(&table_name),
-                    Arc::clone(&output_schema),
-                    no_duplicates_chunk.to_owned(),
-                    predicate.clone(),
-                    &output_sort_key,
-                )?);
-            }
+            let mut non_duplicate_plans = Self::build_plans_for_non_duplicates_chunks(
+                Arc::clone(&table_name),
+                Arc::clone(&output_schema),
+                self.no_duplicates_chunks.to_vec(),
+                predicate,
+                &output_sort_key,
+            )?;
+            plans.append(&mut non_duplicate_plans);
         }
 
         if plans.is_empty() {
diff --git a/query_tests/cases/in/duplicates.expected b/query_tests/cases/in/duplicates.expected
index e092b9f0ac..5cc3679ee9 100644
--- a/query_tests/cases/in/duplicates.expected
+++ b/query_tests/cases/in/duplicates.expected
@@ -19,9 +19,7 @@
 |               |                 RepartitionExec: partitioning=RoundRobinBatch(4)                                                                            |
 |               |                   IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate                                                           |
 |               |         RepartitionExec: partitioning=RoundRobinBatch(4)                                                                                    |
-|               |           IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate                                                                   |
-|               |         RepartitionExec: partitioning=RoundRobinBatch(4)                                                                                    |
-|               |           IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate                                                                   |
+|               |           IOxReadFilterNode: table_name=h2o, chunks=2 predicate=Predicate                                                                   |
 |               |                                                                                                                                             |
 +---------------+---------------------------------------------------------------------------------------------------------------------------------------------+
 -- SQL: EXPLAIN select time, state, city, min_temp, max_temp, area from h2o;
@@ -41,9 +39,7 @@
 |               |             RepartitionExec: partitioning=RoundRobinBatch(4)                                                                            |
 |               |               IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate                                                           |
 |               |     RepartitionExec: partitioning=RoundRobinBatch(4)                                                                                    |
-|               |       IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate                                                                   |
-|               |     RepartitionExec: partitioning=RoundRobinBatch(4)                                                                                    |
-|               |       IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate                                                                   |
+|               |       IOxReadFilterNode: table_name=h2o, chunks=2 predicate=Predicate                                                                   |
 |               |                                                                                                                                         |
 +---------------+-----------------------------------------------------------------------------------------------------------------------------------------+
 -- SQL: EXPLAIN select state as name from h2o UNION ALL select city as name from h2o;
@@ -68,9 +64,7 @@
 |               |                 RepartitionExec: partitioning=RoundRobinBatch(4)                  |
 |               |                   IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate |
 |               |       RepartitionExec: partitioning=RoundRobinBatch(4)                            |
-|               |         IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate           |
-|               |       RepartitionExec: partitioning=RoundRobinBatch(4)                            |
-|               |         IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate           |
+|               |         IOxReadFilterNode: table_name=h2o, chunks=2 predicate=Predicate           |
 |               |   ProjectionExec: expr=[city@0 as name]                                           |
 |               |     UnionExec                                                                     |
 |               |       ProjectionExec: expr=[city@0 as city]                                       |
@@ -83,8 +77,6 @@
 |               |                 RepartitionExec: partitioning=RoundRobinBatch(4)                  |
 |               |                   IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate |
 |               |       RepartitionExec: partitioning=RoundRobinBatch(4)                            |
-|               |         IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate           |
-|               |       RepartitionExec: partitioning=RoundRobinBatch(4)                            |
-|               |         IOxReadFilterNode: table_name=h2o, chunks=1 predicate=Predicate           |
+|               |         IOxReadFilterNode: table_name=h2o, chunks=2 predicate=Predicate           |
 |               |                                                                                   |
 +---------------+-----------------------------------------------------------------------------------+