From fce4f3f346a7aa4f89eb5f4dd999ec4c93f10265 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Wed, 19 Jul 2023 14:40:15 -0400
Subject: [PATCH 01/10] test: Remove outdated test comments

This is not at all what this test is doing; probably copypasta
---
 ingester/src/buffer_tree/root.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/ingester/src/buffer_tree/root.rs b/ingester/src/buffer_tree/root.rs
index 299d3502d2..2d624ee729 100644
--- a/ingester/src/buffer_tree/root.rs
+++ b/ingester/src/buffer_tree/root.rs
@@ -998,12 +998,8 @@ mod tests {
         assert_eq!(m, 1, "tables counter mismatch");
     }
 
-    /// Assert that multiple writes to a single namespace/table results in a
-    /// single namespace being created, and matching metrics.
     #[tokio::test]
     async fn test_partition_iter() {
-        // Configure the mock partition provider to return a single partition, named
-        // p1.
         let partition_provider = Arc::new(
             MockPartitionProvider::default()
                 .with_partition(

From 3dd29384edc6206bc86d946b727b7c01d19964a8 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Thu, 20 Jul 2023 10:51:02 -0400
Subject: [PATCH 02/10] fix: Remove unneeded allow unused_imports and remove
 unused imports

---
 ingester/src/buffer_tree/partition/resolver/cache.rs    | 5 ++---
 ingester/src/buffer_tree/partition/resolver/coalesce.rs | 4 +---
 ingester/src/buffer_tree/partition/resolver/mod.rs      | 2 --
 ingester/src/buffer_tree/partition/resolver/trait.rs    | 4 ++--
 4 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/ingester/src/buffer_tree/partition/resolver/cache.rs b/ingester/src/buffer_tree/partition/resolver/cache.rs
index 047d9176b1..78a32e11e0 100644
--- a/ingester/src/buffer_tree/partition/resolver/cache.rs
+++ b/ingester/src/buffer_tree/partition/resolver/cache.rs
@@ -2,9 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
 
 use async_trait::async_trait;
 use backoff::BackoffConfig;
-use data_types::{
-    NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, SequenceNumber, TableId,
-};
+use data_types::{NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, TableId};
 use iox_catalog::interface::Catalog;
 use observability_deps::tracing::debug;
 use parking_lot::Mutex;
@@ -222,6 +220,7 @@ mod tests {
     // Harmless in tests - saves a bunch of extra vars.
     #![allow(clippy::await_holding_lock)]
 
+    use data_types::PartitionId;
     use iox_catalog::mem::MemCatalog;
 
     use super::*;
diff --git a/ingester/src/buffer_tree/partition/resolver/coalesce.rs b/ingester/src/buffer_tree/partition/resolver/coalesce.rs
index 57b6673e08..5b0a492b9b 100644
--- a/ingester/src/buffer_tree/partition/resolver/coalesce.rs
+++ b/ingester/src/buffer_tree/partition/resolver/coalesce.rs
@@ -6,7 +6,6 @@ use std::{
     },
 };
 
-use arrow::compute::kernels::partition;
 use async_trait::async_trait;
 use data_types::{NamespaceId, PartitionKey, TableId};
 use futures::{future::Shared, FutureExt};
@@ -267,12 +266,11 @@ mod tests {
     use assert_matches::assert_matches;
     use futures::Future;
     use futures::{stream::FuturesUnordered, StreamExt};
-    use lazy_static::lazy_static;
     use test_helpers::timeout::FutureTimeout;
     use tokio::sync::{Notify, Semaphore};
 
     use crate::{
-        buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
+        buffer_tree::partition::resolver::mock::MockPartitionProvider,
         test_util::{
             defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
             ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
diff --git a/ingester/src/buffer_tree/partition/resolver/mod.rs b/ingester/src/buffer_tree/partition/resolver/mod.rs
index bc0ea78ae8..2343bf181f 100644
--- a/ingester/src/buffer_tree/partition/resolver/mod.rs
+++ b/ingester/src/buffer_tree/partition/resolver/mod.rs
@@ -2,8 +2,6 @@
 //!
 //! [`PartitionData`]: crate::buffer_tree::partition::PartitionData
 
-#![allow(unused_imports)] // Transition time only.
-
 mod cache;
 pub(crate) use cache::*;
 
diff --git a/ingester/src/buffer_tree/partition/resolver/trait.rs b/ingester/src/buffer_tree/partition/resolver/trait.rs
index e525cd5a6f..e6c8276117 100644
--- a/ingester/src/buffer_tree/partition/resolver/trait.rs
+++ b/ingester/src/buffer_tree/partition/resolver/trait.rs
@@ -49,11 +49,11 @@ where
 
 #[cfg(test)]
 mod tests {
-    use std::{sync::Arc, time::Duration};
+    use std::sync::Arc;
 
     use super::*;
     use crate::{
-        buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
+        buffer_tree::partition::resolver::mock::MockPartitionProvider,
         test_util::{
             defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
             ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,

From 3ac0e30ac9f5fd86f65692f19a2eee0e040b3dde Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)"
 <193874+carols10cents@users.noreply.github.com>
Date: Fri, 21 Jul 2023 09:22:33 -0400
Subject: [PATCH 03/10] fix: Remove namespace ID from a partition identifier
 type (#8288)

I'm going to make a change in the future that removes the access to the
namespace ID from this code, and it's not needed anyway as partitions
are uniquely identifiable by only table ID and partition key.

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 ingester/src/buffer_tree/partition/resolver/coalesce.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ingester/src/buffer_tree/partition/resolver/coalesce.rs b/ingester/src/buffer_tree/partition/resolver/coalesce.rs
index 57b6673e08..cc038c6913 100644
--- a/ingester/src/buffer_tree/partition/resolver/coalesce.rs
+++ b/ingester/src/buffer_tree/partition/resolver/coalesce.rs
@@ -25,11 +25,10 @@ use super::PartitionProvider;
 type BoxedResolveFuture =
     Pin<Box<dyn std::future::Future<Output = Arc<Mutex<PartitionData>>> + Send>>;
 
-/// A compound key of `(namespace, table, partition_key)` which uniquely
+/// A compound key of `(table, partition_key)` which uniquely
 /// identifies a single partition.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 struct Key {
-    namespace_id: NamespaceId,
     table_id: TableId,
     partition_key: PartitionKey,
 }
@@ -149,7 +148,6 @@ where
         table: Arc<DeferredLoad<TableMetadata>>,
     ) -> Arc<Mutex<PartitionData>> {
         let key = Key {
-            namespace_id,
             table_id,
             partition_key: partition_key.clone(), // Ref-counted anyway!
         };

From 5731e012bfae39e2565b32c155e32336e57bcb6e Mon Sep 17 00:00:00 2001
From: Martin Hilton <mhilton@influxdata.com>
Date: Fri, 21 Jul 2023 15:48:15 +0100
Subject: [PATCH 04/10] fix(influxql): advanced syntax window functions with
 selector aggregates (#8303)

Ensure that advanced syntax window functions that contain a selector,
rather than an aggregate, function are considered valid and generate
a correct plan.

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 .../query_tests/cases/in/window_like.influxql |  89 ++-
 .../cases/in/window_like.influxql.expected    | 534 +++++++++++++++++-
 iox_query_influxql/src/plan/planner.rs        |  13 +
 iox_query_influxql/src/plan/rewriter.rs       |   2 +-
 4 files changed, 633 insertions(+), 5 deletions(-)

diff --git a/influxdb_iox/tests/query_tests/cases/in/window_like.influxql b/influxdb_iox/tests/query_tests/cases/in/window_like.influxql
index 98b99c4ba0..53c836b1aa 100644
--- a/influxdb_iox/tests/query_tests/cases/in/window_like.influxql
+++ b/influxdb_iox/tests/query_tests/cases/in/window_like.influxql
@@ -21,6 +21,19 @@ SELECT difference(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
 -- group by time and a tag
 SELECT difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 
+--
+-- difference + selector
+--
+SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
+SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+-- linear filling of selector functions produces an execution error
+-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
+-- SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
+-- group by time and a tag
+SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 
 --
 -- non_negative_difference
@@ -35,6 +48,11 @@ SELECT non_negative_difference(usage_idle) FROM cpu WHERE time >= 00000001300000
 --
 SELECT non_negative_difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 
+--
+-- non_negative_difference + selector
+--
+SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+
 --
 -- moving_average
 --
@@ -61,6 +79,17 @@ SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 000000013000000
 SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 
+--
+-- moving_average + selector
+--
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of moving_average
+SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+-- linear filling of selector functions produces an execution error
+-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
+-- SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
+
 --
 -- combining window functions
 --
@@ -109,7 +138,7 @@ SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
 SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
--- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of derivative
 SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
@@ -120,6 +149,26 @@ SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 000000013000000
 SELECT derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 SELECT derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 
+--
+-- derivative + selector
+--
+SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of derivative
+SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+-- linear filling of selector functions produces an execution error
+-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
+-- SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
+-- SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
+-- group by time and a tag
+SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+
 --
 -- non_negative_derivative
 --
@@ -138,7 +187,7 @@ SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 000000013
 SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
--- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of non_negative_derivative
 SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
@@ -149,6 +198,26 @@ SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 00
 SELECT non_negative_derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 SELECT non_negative_derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 
+--
+-- non_negative_derivative + selector
+--
+SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of non_negative_derivative
+SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+-- linear filling of selector functions produces an execution error
+-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
+-- SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
+-- SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
+-- group by time and a tag
+SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+
 --
 -- cumulative_sum
 --
@@ -167,4 +236,18 @@ SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 000000013000000000
 SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- group by time and a tag
-SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
\ No newline at end of file
+SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+
+--
+-- cumulative_sum + selector
+--
+SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of cumulative_sum
+SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+-- linear filling of selector functions produces an execution error
+-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
+-- SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
+-- group by time and a tag
+SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
\ No newline at end of file
diff --git a/influxdb_iox/tests/query_tests/cases/in/window_like.influxql.expected b/influxdb_iox/tests/query_tests/cases/in/window_like.influxql.expected
index 4871989912..c194b0da6f 100644
--- a/influxdb_iox/tests/query_tests/cases/in/window_like.influxql.expected
+++ b/influxdb_iox/tests/query_tests/cases/in/window_like.influxql.expected
@@ -148,6 +148,86 @@ tags: cpu=cpu1
 | 1970-01-01T00:02:30 | -0.03333333333334565 |
 | 1970-01-01T00:03:00 | -0.03333333333333144 |
 +---------------------+----------------------+
+-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+name: diskio
++---------------------+------------+
+| time                | difference |
++---------------------+------------+
+| 1970-01-01T00:02:20 | 164        |
+| 1970-01-01T00:02:27 | 187        |
+| 1970-01-01T00:02:34 | 112        |
+| 1970-01-01T00:02:48 | 110        |
+| 1970-01-01T00:02:55 | 219        |
+| 1970-01-01T00:03:09 | 75         |
+| 1970-01-01T00:03:16 | 76         |
+| 1970-01-01T00:03:30 | 146        |
++---------------------+------------+
+-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+name: diskio
++---------------------+------------+
+| time                | difference |
++---------------------+------------+
+| 1970-01-01T00:02:00 | 366        |
+| 1970-01-01T00:02:30 | 421        |
+| 1970-01-01T00:03:00 | 441        |
+| 1970-01-01T00:03:30 | 297        |
++---------------------+------------+
+-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+name: diskio
++---------------------+------------+
+| time                | difference |
++---------------------+------------+
+| 1970-01-01T00:02:06 | 5592646    |
+| 1970-01-01T00:02:13 | -5592646   |
+| 1970-01-01T00:02:20 | 5592810    |
+| 1970-01-01T00:02:27 | 187        |
+| 1970-01-01T00:02:34 | 112        |
+| 1970-01-01T00:02:41 | -5593109   |
+| 1970-01-01T00:02:48 | 5593219    |
+| 1970-01-01T00:02:55 | 219        |
+| 1970-01-01T00:03:02 | -5593438   |
+| 1970-01-01T00:03:09 | 5593513    |
+| 1970-01-01T00:03:16 | 76         |
+| 1970-01-01T00:03:23 | -5593589   |
+| 1970-01-01T00:03:30 | 5593735    |
++---------------------+------------+
+-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+name: diskio
++---------------------+------------+
+| time                | difference |
++---------------------+------------+
+| 1970-01-01T00:02:13 | 0          |
+| 1970-01-01T00:02:20 | 164        |
+| 1970-01-01T00:02:27 | 187        |
+| 1970-01-01T00:02:34 | 112        |
+| 1970-01-01T00:02:41 | 0          |
+| 1970-01-01T00:02:48 | 110        |
+| 1970-01-01T00:02:55 | 219        |
+| 1970-01-01T00:03:02 | 0          |
+| 1970-01-01T00:03:09 | 75         |
+| 1970-01-01T00:03:16 | 76         |
+| 1970-01-01T00:03:23 | 0          |
+| 1970-01-01T00:03:30 | 146        |
++---------------------+------------+
+-- InfluxQL: SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+name: cpu
+tags: cpu=cpu0
++---------------------+---------------------+
+| time                | difference          |
++---------------------+---------------------+
+| 1970-01-01T00:02:00 | -0.7999999999999972 |
+| 1970-01-01T00:02:30 | 3.5                 |
+| 1970-01-01T00:03:00 | -0.4000000000000057 |
++---------------------+---------------------+
+name: cpu
+tags: cpu=cpu1
++---------------------+----------------------+
+| time                | difference           |
++---------------------+----------------------+
+| 1970-01-01T00:02:00 | 0.20000000000000284  |
+| 1970-01-01T00:02:30 | 0.0                  |
+| 1970-01-01T00:03:00 | -0.10000000000000853 |
++---------------------+----------------------+
 -- InfluxQL: SELECT non_negative_difference(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
 name: cpu
 +---------------------+-------------------------+
@@ -202,6 +282,22 @@ tags: cpu=cpu1
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 0.36666666666667425     |
 +---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+name: cpu
+tags: cpu=cpu0
++---------------------+-------------------------+
+| time                | non_negative_difference |
++---------------------+-------------------------+
+| 1970-01-01T00:02:30 | 3.5                     |
++---------------------+-------------------------+
+name: cpu
+tags: cpu=cpu1
++---------------------+-------------------------+
+| time                | non_negative_difference |
++---------------------+-------------------------+
+| 1970-01-01T00:02:00 | 0.20000000000000284     |
+| 1970-01-01T00:02:30 | 0.0                     |
++---------------------+-------------------------+
 -- InfluxQL: SELECT moving_average(writes, 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
 name: diskio
 +---------------------+-------------------+
@@ -307,6 +403,54 @@ name: diskio
 | 1970-01-01T00:03:23 | 5593588.0         |
 | 1970-01-01T00:03:30 | 5593662.0         |
 +---------------------+-------------------+
+-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+name: diskio
++---------------------+-------------------+
+| time                | moving_average    |
++---------------------+-------------------+
+| 1970-01-01T00:02:27 | 5592817.666666667 |
+| 1970-01-01T00:02:34 | 5592972.0         |
+| 1970-01-01T00:02:48 | 5593108.333333333 |
+| 1970-01-01T00:02:55 | 5593255.333333333 |
+| 1970-01-01T00:03:09 | 5593390.0         |
+| 1970-01-01T00:03:16 | 5593513.333333333 |
+| 1970-01-01T00:03:30 | 5593612.333333333 |
++---------------------+-------------------+
+-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+name: diskio
++---------------------+--------------------+
+| time                | moving_average     |
++---------------------+--------------------+
+| 1970-01-01T00:02:13 | 1864215.3333333333 |
+| 1970-01-01T00:02:20 | 3728485.3333333335 |
+| 1970-01-01T00:02:27 | 3728602.3333333335 |
+| 1970-01-01T00:02:34 | 5592972.0          |
+| 1970-01-01T00:02:41 | 3728702.0          |
+| 1970-01-01T00:02:48 | 3728776.0          |
+| 1970-01-01T00:02:55 | 3728885.6666666665 |
+| 1970-01-01T00:03:02 | 3728885.6666666665 |
+| 1970-01-01T00:03:09 | 3728983.6666666665 |
+| 1970-01-01T00:03:16 | 3729034.0          |
+| 1970-01-01T00:03:23 | 3729034.0          |
+| 1970-01-01T00:03:30 | 3729108.0          |
++---------------------+--------------------+
+-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+name: diskio
++---------------------+-------------------+
+| time                | moving_average    |
++---------------------+-------------------+
+| 1970-01-01T00:02:20 | 5592700.666666667 |
+| 1970-01-01T00:02:27 | 5592817.666666667 |
+| 1970-01-01T00:02:34 | 5592972.0         |
+| 1970-01-01T00:02:41 | 5593071.666666667 |
+| 1970-01-01T00:02:48 | 5593145.666666667 |
+| 1970-01-01T00:02:55 | 5593255.333333333 |
+| 1970-01-01T00:03:02 | 5593365.0         |
+| 1970-01-01T00:03:09 | 5593463.0         |
+| 1970-01-01T00:03:16 | 5593513.333333333 |
+| 1970-01-01T00:03:23 | 5593563.666666667 |
+| 1970-01-01T00:03:30 | 5593637.666666667 |
++---------------------+-------------------+
 -- InfluxQL: SELECT difference(usage_idle), non_negative_difference(usage_idle), moving_average(usage_idle, 4) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
 name: cpu
 tags: cpu=cpu0
@@ -649,6 +793,166 @@ tags: cpu=cpu1
 | 1970-01-01T00:02:30 | -0.0005555555555557608 |
 | 1970-01-01T00:03:00 | -0.000555555555555524  |
 +---------------------+------------------------+
+-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+name: diskio
++---------------------+------------+
+| time                | derivative |
++---------------------+------------+
+| 1970-01-01T00:02:20 | 82.0       |
+| 1970-01-01T00:02:27 | 187.0      |
+| 1970-01-01T00:02:34 | 112.0      |
+| 1970-01-01T00:02:48 | 55.0       |
+| 1970-01-01T00:02:55 | 219.0      |
+| 1970-01-01T00:03:09 | 37.5       |
+| 1970-01-01T00:03:16 | 76.0       |
+| 1970-01-01T00:03:30 | 73.0       |
++---------------------+------------+
+-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+name: diskio
++---------------------+--------------------+
+| time                | derivative         |
++---------------------+--------------------+
+| 1970-01-01T00:02:20 | 5.857142857142857  |
+| 1970-01-01T00:02:27 | 13.357142857142858 |
+| 1970-01-01T00:02:34 | 8.0                |
+| 1970-01-01T00:02:48 | 3.9285714285714284 |
+| 1970-01-01T00:02:55 | 15.642857142857142 |
+| 1970-01-01T00:03:09 | 2.6785714285714284 |
+| 1970-01-01T00:03:16 | 5.428571428571429  |
+| 1970-01-01T00:03:30 | 5.214285714285714  |
++---------------------+--------------------+
+-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+name: diskio
++---------------------+------------+
+| time                | derivative |
++---------------------+------------+
+| 1970-01-01T00:02:00 | 366.0      |
+| 1970-01-01T00:02:30 | 421.0      |
+| 1970-01-01T00:03:00 | 441.0      |
+| 1970-01-01T00:03:30 | 297.0      |
++---------------------+------------+
+-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+name: diskio
++---------------------+-------------------+
+| time                | derivative        |
++---------------------+-------------------+
+| 1970-01-01T00:02:00 | 6.1               |
+| 1970-01-01T00:02:30 | 7.016666666666667 |
+| 1970-01-01T00:03:00 | 7.35              |
+| 1970-01-01T00:03:30 | 4.95              |
++---------------------+-------------------+
+-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+name: diskio
++---------------------+------------+
+| time                | derivative |
++---------------------+------------+
+| 1970-01-01T00:02:06 | 5592646.0  |
+| 1970-01-01T00:02:13 | -5592646.0 |
+| 1970-01-01T00:02:20 | 5592810.0  |
+| 1970-01-01T00:02:27 | 187.0      |
+| 1970-01-01T00:02:34 | 112.0      |
+| 1970-01-01T00:02:41 | -5593109.0 |
+| 1970-01-01T00:02:48 | 5593219.0  |
+| 1970-01-01T00:02:55 | 219.0      |
+| 1970-01-01T00:03:02 | -5593438.0 |
+| 1970-01-01T00:03:09 | 5593513.0  |
+| 1970-01-01T00:03:16 | 76.0       |
+| 1970-01-01T00:03:23 | -5593589.0 |
+| 1970-01-01T00:03:30 | 5593735.0  |
++---------------------+------------+
+-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+name: diskio
++---------------------+---------------------+
+| time                | derivative          |
++---------------------+---------------------+
+| 1970-01-01T00:02:06 | 399474.71428571426  |
+| 1970-01-01T00:02:13 | -399474.71428571426 |
+| 1970-01-01T00:02:20 | 399486.4285714286   |
+| 1970-01-01T00:02:27 | 13.357142857142858  |
+| 1970-01-01T00:02:34 | 8.0                 |
+| 1970-01-01T00:02:41 | -399507.78571428574 |
+| 1970-01-01T00:02:48 | 399515.64285714284  |
+| 1970-01-01T00:02:55 | 15.642857142857142  |
+| 1970-01-01T00:03:02 | -399531.28571428574 |
+| 1970-01-01T00:03:09 | 399536.64285714284  |
+| 1970-01-01T00:03:16 | 5.428571428571429   |
+| 1970-01-01T00:03:23 | -399542.0714285714  |
+| 1970-01-01T00:03:30 | 399552.5            |
++---------------------+---------------------+
+-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+name: diskio
++---------------------+------------+
+| time                | derivative |
++---------------------+------------+
+| 1970-01-01T00:02:13 | 0.0        |
+| 1970-01-01T00:02:20 | 164.0      |
+| 1970-01-01T00:02:27 | 187.0      |
+| 1970-01-01T00:02:34 | 112.0      |
+| 1970-01-01T00:02:41 | 0.0        |
+| 1970-01-01T00:02:48 | 110.0      |
+| 1970-01-01T00:02:55 | 219.0      |
+| 1970-01-01T00:03:02 | 0.0        |
+| 1970-01-01T00:03:09 | 75.0       |
+| 1970-01-01T00:03:16 | 76.0       |
+| 1970-01-01T00:03:23 | 0.0        |
+| 1970-01-01T00:03:30 | 146.0      |
++---------------------+------------+
+-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+name: diskio
++---------------------+--------------------+
+| time                | derivative         |
++---------------------+--------------------+
+| 1970-01-01T00:02:13 | 0.0                |
+| 1970-01-01T00:02:20 | 11.714285714285714 |
+| 1970-01-01T00:02:27 | 13.357142857142858 |
+| 1970-01-01T00:02:34 | 8.0                |
+| 1970-01-01T00:02:41 | 0.0                |
+| 1970-01-01T00:02:48 | 7.857142857142857  |
+| 1970-01-01T00:02:55 | 15.642857142857142 |
+| 1970-01-01T00:03:02 | 0.0                |
+| 1970-01-01T00:03:09 | 5.357142857142857  |
+| 1970-01-01T00:03:16 | 5.428571428571429  |
+| 1970-01-01T00:03:23 | 0.0                |
+| 1970-01-01T00:03:30 | 10.428571428571429 |
++---------------------+--------------------+
+-- InfluxQL: SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+name: cpu
+tags: cpu=cpu0
++---------------------+---------------------+
+| time                | derivative          |
++---------------------+---------------------+
+| 1970-01-01T00:02:00 | -0.7999999999999972 |
+| 1970-01-01T00:02:30 | 3.5                 |
+| 1970-01-01T00:03:00 | -0.4000000000000057 |
++---------------------+---------------------+
+name: cpu
+tags: cpu=cpu1
++---------------------+----------------------+
+| time                | derivative           |
++---------------------+----------------------+
+| 1970-01-01T00:02:00 | 0.20000000000000284  |
+| 1970-01-01T00:02:30 | 0.0                  |
+| 1970-01-01T00:03:00 | -0.10000000000000853 |
++---------------------+----------------------+
+-- InfluxQL: SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+name: cpu
+tags: cpu=cpu0
++---------------------+-----------------------+
+| time                | derivative            |
++---------------------+-----------------------+
+| 1970-01-01T00:02:00 | -0.013333333333333286 |
+| 1970-01-01T00:02:30 | 0.058333333333333334  |
+| 1970-01-01T00:03:00 | -0.006666666666666762 |
++---------------------+-----------------------+
+name: cpu
+tags: cpu=cpu1
++---------------------+------------------------+
+| time                | derivative             |
++---------------------+------------------------+
+| 1970-01-01T00:02:00 | 0.003333333333333381   |
+| 1970-01-01T00:02:30 | 0.0                    |
+| 1970-01-01T00:03:00 | -0.0016666666666668088 |
++---------------------+------------------------+
 -- InfluxQL: SELECT non_negative_derivative(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
 name: diskio
 +---------------------+-------------------------+
@@ -918,6 +1222,152 @@ tags: cpu=cpu1
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 0.006111111111111237    |
 +---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:20 | 82.0                    |
+| 1970-01-01T00:02:27 | 187.0                   |
+| 1970-01-01T00:02:34 | 112.0                   |
+| 1970-01-01T00:02:48 | 55.0                    |
+| 1970-01-01T00:02:55 | 219.0                   |
+| 1970-01-01T00:03:09 | 37.5                    |
+| 1970-01-01T00:03:16 | 76.0                    |
+| 1970-01-01T00:03:30 | 73.0                    |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:20 | 5.857142857142857       |
+| 1970-01-01T00:02:27 | 13.357142857142858      |
+| 1970-01-01T00:02:34 | 8.0                     |
+| 1970-01-01T00:02:48 | 3.9285714285714284      |
+| 1970-01-01T00:02:55 | 15.642857142857142      |
+| 1970-01-01T00:03:09 | 2.6785714285714284      |
+| 1970-01-01T00:03:16 | 5.428571428571429       |
+| 1970-01-01T00:03:30 | 5.214285714285714       |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:00 | 366.0                   |
+| 1970-01-01T00:02:30 | 421.0                   |
+| 1970-01-01T00:03:00 | 441.0                   |
+| 1970-01-01T00:03:30 | 297.0                   |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:00 | 6.1                     |
+| 1970-01-01T00:02:30 | 7.016666666666667       |
+| 1970-01-01T00:03:00 | 7.35                    |
+| 1970-01-01T00:03:30 | 4.95                    |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:06 | 5592646.0               |
+| 1970-01-01T00:02:20 | 5592810.0               |
+| 1970-01-01T00:02:27 | 187.0                   |
+| 1970-01-01T00:02:34 | 112.0                   |
+| 1970-01-01T00:02:48 | 5593219.0               |
+| 1970-01-01T00:02:55 | 219.0                   |
+| 1970-01-01T00:03:09 | 5593513.0               |
+| 1970-01-01T00:03:16 | 76.0                    |
+| 1970-01-01T00:03:30 | 5593735.0               |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:06 | 399474.71428571426      |
+| 1970-01-01T00:02:20 | 399486.4285714286       |
+| 1970-01-01T00:02:27 | 13.357142857142858      |
+| 1970-01-01T00:02:34 | 8.0                     |
+| 1970-01-01T00:02:48 | 399515.64285714284      |
+| 1970-01-01T00:02:55 | 15.642857142857142      |
+| 1970-01-01T00:03:09 | 399536.64285714284      |
+| 1970-01-01T00:03:16 | 5.428571428571429       |
+| 1970-01-01T00:03:30 | 399552.5                |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:13 | 0.0                     |
+| 1970-01-01T00:02:20 | 164.0                   |
+| 1970-01-01T00:02:27 | 187.0                   |
+| 1970-01-01T00:02:34 | 112.0                   |
+| 1970-01-01T00:02:41 | 0.0                     |
+| 1970-01-01T00:02:48 | 110.0                   |
+| 1970-01-01T00:02:55 | 219.0                   |
+| 1970-01-01T00:03:02 | 0.0                     |
+| 1970-01-01T00:03:09 | 75.0                    |
+| 1970-01-01T00:03:16 | 76.0                    |
+| 1970-01-01T00:03:23 | 0.0                     |
+| 1970-01-01T00:03:30 | 146.0                   |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+name: diskio
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:13 | 0.0                     |
+| 1970-01-01T00:02:20 | 11.714285714285714      |
+| 1970-01-01T00:02:27 | 13.357142857142858      |
+| 1970-01-01T00:02:34 | 8.0                     |
+| 1970-01-01T00:02:41 | 0.0                     |
+| 1970-01-01T00:02:48 | 7.857142857142857       |
+| 1970-01-01T00:02:55 | 15.642857142857142      |
+| 1970-01-01T00:03:02 | 0.0                     |
+| 1970-01-01T00:03:09 | 5.357142857142857       |
+| 1970-01-01T00:03:16 | 5.428571428571429       |
+| 1970-01-01T00:03:23 | 0.0                     |
+| 1970-01-01T00:03:30 | 10.428571428571429      |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+name: cpu
+tags: cpu=cpu0
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:30 | 3.5                     |
++---------------------+-------------------------+
+name: cpu
+tags: cpu=cpu1
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:00 | 0.20000000000000284     |
+| 1970-01-01T00:02:30 | 0.0                     |
++---------------------+-------------------------+
+-- InfluxQL: SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+name: cpu
+tags: cpu=cpu0
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:30 | 0.058333333333333334    |
++---------------------+-------------------------+
+name: cpu
+tags: cpu=cpu1
++---------------------+-------------------------+
+| time                | non_negative_derivative |
++---------------------+-------------------------+
+| 1970-01-01T00:02:00 | 0.003333333333333381    |
+| 1970-01-01T00:02:30 | 0.0                     |
++---------------------+-------------------------+
 -- InfluxQL: SELECT cumulative_sum(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
 name: diskio
 +---------------------+----------------+
@@ -1093,4 +1543,86 @@ tags: cpu=cpu1
 | 1970-01-01T00:02:00 | 99.85              |
 | 1970-01-01T00:02:30 | 199.68333333333334 |
 | 1970-01-01T00:03:00 | 299.48333333333335 |
-+---------------------+--------------------+
\ No newline at end of file
++---------------------+--------------------+
+-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
+name: diskio
++---------------------+----------------+
+| time                | cumulative_sum |
++---------------------+----------------+
+| 1970-01-01T00:02:06 | 5592646        |
+| 1970-01-01T00:02:20 | 11185456       |
+| 1970-01-01T00:02:27 | 16778453       |
+| 1970-01-01T00:02:34 | 22371562       |
+| 1970-01-01T00:02:48 | 27964781       |
+| 1970-01-01T00:02:55 | 33558219       |
+| 1970-01-01T00:03:09 | 39151732       |
+| 1970-01-01T00:03:16 | 44745321       |
+| 1970-01-01T00:03:30 | 50339056       |
++---------------------+----------------+
+-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
+name: diskio
++---------------------+----------------+
+| time                | cumulative_sum |
++---------------------+----------------+
+| 1970-01-01T00:02:00 | 5592646        |
+| 1970-01-01T00:02:30 | 11185643       |
+| 1970-01-01T00:03:00 | 16779081       |
+| 1970-01-01T00:03:30 | 22372816       |
++---------------------+----------------+
+-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
+name: diskio
++---------------------+----------------+
+| time                | cumulative_sum |
++---------------------+----------------+
+| 1970-01-01T00:02:06 | 5592646        |
+| 1970-01-01T00:02:13 | 5592646        |
+| 1970-01-01T00:02:20 | 11185456       |
+| 1970-01-01T00:02:27 | 16778453       |
+| 1970-01-01T00:02:34 | 22371562       |
+| 1970-01-01T00:02:41 | 22371562       |
+| 1970-01-01T00:02:48 | 27964781       |
+| 1970-01-01T00:02:55 | 33558219       |
+| 1970-01-01T00:03:02 | 33558219       |
+| 1970-01-01T00:03:09 | 39151732       |
+| 1970-01-01T00:03:16 | 44745321       |
+| 1970-01-01T00:03:23 | 44745321       |
+| 1970-01-01T00:03:30 | 50339056       |
++---------------------+----------------+
+-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
+name: diskio
++---------------------+----------------+
+| time                | cumulative_sum |
++---------------------+----------------+
+| 1970-01-01T00:02:06 | 5592646        |
+| 1970-01-01T00:02:13 | 11185292       |
+| 1970-01-01T00:02:20 | 16778102       |
+| 1970-01-01T00:02:27 | 22371099       |
+| 1970-01-01T00:02:34 | 27964208       |
+| 1970-01-01T00:02:41 | 33557317       |
+| 1970-01-01T00:02:48 | 39150536       |
+| 1970-01-01T00:02:55 | 44743974       |
+| 1970-01-01T00:03:02 | 50337412       |
+| 1970-01-01T00:03:09 | 55930925       |
+| 1970-01-01T00:03:16 | 61524514       |
+| 1970-01-01T00:03:23 | 67118103       |
+| 1970-01-01T00:03:30 | 72711838       |
++---------------------+----------------+
+-- InfluxQL: SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
+name: cpu
+tags: cpu=cpu0
++---------------------+----------------+
+| time                | cumulative_sum |
++---------------------+----------------+
+| 1970-01-01T00:02:00 | 89.8           |
+| 1970-01-01T00:02:30 | 180.2          |
+| 1970-01-01T00:03:00 | 270.2          |
++---------------------+----------------+
+name: cpu
+tags: cpu=cpu1
++---------------------+----------------+
+| time                | cumulative_sum |
++---------------------+----------------+
+| 1970-01-01T00:02:00 | 99.8           |
+| 1970-01-01T00:02:30 | 199.7          |
+| 1970-01-01T00:03:00 | 299.5          |
++---------------------+----------------+
\ No newline at end of file
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index d8982db092..fcd11eaa4f 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -4018,6 +4018,19 @@ mod test {
                               Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                                 TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                 "###);
+
+                // selector
+                assert_snapshot!(plan("SELECT NON_NEGATIVE_DERIVATIVE(LAST(usage_idle)) FROM cpu GROUP BY TIME(10s)"), @r###"
+                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
+                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
+                    Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
+                      Projection: time, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
+                        WindowAggr: windowExpr=[[non_negative_derivative((selector_last(cpu.usage_idle,cpu.time))[value], IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value])]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]):Float64;N]
+                          GapFill: groupBy=[time], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
+                            Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
+                              Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                                TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                "###);
             }
 
             #[test]
diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs
index 167aaa20df..329fc9a129 100644
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@@ -1029,7 +1029,7 @@ impl FieldChecker {
             ProjectionType::TopBottomSelector
         } else if self.has_group_by_time {
             if self.window_count > 0 {
-                if self.window_count == self.aggregate_count {
+                if self.window_count == self.aggregate_count + self.selector_count {
                     ProjectionType::WindowAggregate
                 } else {
                     ProjectionType::WindowAggregateMixed

From b1c695d5a29a0118ba30f427abcdd9ae5cc916c9 Mon Sep 17 00:00:00 2001
From: Martin Hilton <mhilton@influxdata.com>
Date: Fri, 21 Jul 2023 17:31:10 +0100
Subject: [PATCH 05/10] fix(influxql): fill count aggregates with 0 by default
 (#8284)

* chore: update expected output for `COUNT` aggregates with `FILL(null)`

See #8232

* fix(influxql): fill count aggregates with 0 by default

When gap-filling a COUNT aggregate any missing rows should be filled
with 0, unless otherwise directed by a FILL clause. To do this the
projection on the aggregate plan is modiefied to coalesce any COUNT
fields with 0 unless a FILL value has been specified in the query.

* chore: add more tests

* chore: add explanation of COUNT gap filling with multiple measurements

* fix: update test introduced with merge

---------

Co-authored-by: Stuart Carnie <stuart.carnie@gmail.com>
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 .../query_tests/cases/in/issue_6112.influxql  |   8 +
 .../cases/in/issue_6112.influxql.expected     | 161 +++++++++++-------
 iox_query_influxql/src/plan/planner.rs        |  75 ++++----
 iox_query_influxql/src/plan/util.rs           |   1 +
 4 files changed, 150 insertions(+), 95 deletions(-)

diff --git a/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql
index 83719b9c35..1527db08d7 100644
--- a/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql
+++ b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql
@@ -339,6 +339,12 @@ SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s) FILL(none);
 -- supports offset parameter
 SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none);
 
+-- N.B. The gap filling of the COUNT(usage_idle) and COUNT(bytes_free)
+-- columns happens before the two measurements are UNIONed together
+-- when producing the output table. This means that a COUNT column for
+-- a field that is not present for a measurement will contain NULLs,
+-- rather than being filled with 0s. This is consistent with older
+-- versions of influxdb.
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk;
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none);
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu;
@@ -360,7 +366,9 @@ SELECT COUNT(usage_idle), usage_idle FROM cpu;
 
 -- Default FILL(null) when FILL is omitted
 SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
+SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
+SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
diff --git a/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected
index 7aac3adbe7..1aefe7610b 100644
--- a/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected
+++ b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected
@@ -919,10 +919,10 @@ name: logical_plan
  plan
  Sort: iox::measurement ASC NULLS LAST, tag0 ASC NULLS LAST, time ASC NULLS LAST
    Union
-     Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, COUNT(m0.f64) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev
+     Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, coalesce_struct(COUNT(m0.f64), Int64(0)) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev
        Aggregate: groupBy=[[m0.tag0]], aggr=[[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]]
          TableScan: m0 projection=[f64, tag0]
-     Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, COUNT(m1.f64) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev
+     Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, coalesce_struct(COUNT(m1.f64), Int64(0)) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev
        Aggregate: groupBy=[[m1.tag0]], aggr=[[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)]]
          TableScan: m1 projection=[f64, tag0]
 name: physical_plan
@@ -930,7 +930,7 @@ name: physical_plan
  SortPreservingMergeExec: [iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
    UnionExec
      SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
-       ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m0.f64)@1 as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev]
+       ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m0.f64)@1, 0) as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev]
          AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
            CoalesceBatchesExec: target_batch_size=8192
              RepartitionExec: partitioning=Hash([tag0@0], 4), input_partitions=4
@@ -938,7 +938,7 @@ name: physical_plan
                  AggregateExec: mode=Partial, gby=[tag0@1 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
                    ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[f64, tag0]
      SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
-       ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m1.f64)@1 as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
+       ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m1.f64)@1, 0) as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
          RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=4
            AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)], ordering_mode=FullyOrdered
              CoalesceBatchesExec: target_batch_size=8192
@@ -1267,9 +1267,19 @@ name: cpu
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 6     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
++---------------------+-------+
+-- InfluxQL: SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
+name: cpu
++---------------------+-------+
+| time                | count |
++---------------------+-------+
+| 2022-10-31T02:00:00 | 8     |
+| 2022-10-31T02:00:30 | 2     |
+| 2022-10-31T02:01:00 | 2     |
+| 2022-10-31T02:01:30 | 2     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 name: cpu
@@ -1277,18 +1287,37 @@ name: cpu
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 6     |         |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
+| 2022-10-31T02:01:00 | 0     |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 6       |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
+| 2022-10-31T02:01:00 |       | 0       |
+| 2022-10-31T02:01:30 |       | 0       |
++---------------------+-------+---------+
+-- InfluxQL: SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
+name: cpu
++---------------------+-------+---------+
+| time                | count | count_1 |
++---------------------+-------+---------+
+| 2022-10-31T02:00:00 | 7     |         |
+| 2022-10-31T02:00:30 | 1     |         |
+| 2022-10-31T02:01:00 | 1     |         |
+| 2022-10-31T02:01:30 | 1     |         |
++---------------------+-------+---------+
+name: disk
++---------------------+-------+---------+
+| time                | count | count_1 |
++---------------------+-------+---------+
+| 2022-10-31T02:00:00 |       | 8       |
+| 2022-10-31T02:00:30 |       | 2       |
+| 2022-10-31T02:01:00 |       | 2       |
+| 2022-10-31T02:01:30 |       | 2       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 name: cpu
@@ -1296,9 +1325,9 @@ name: cpu
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 6     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 name: cpu
@@ -1306,18 +1335,18 @@ name: cpu
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 6     |         |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
+| 2022-10-31T02:01:00 | 0     |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 6       |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
+| 2022-10-31T02:01:00 |       | 0       |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
 name: cpu
@@ -1507,9 +1536,9 @@ tags: cpu=cpu-total
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu0
@@ -1517,9 +1546,9 @@ tags: cpu=cpu0
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu1
@@ -1527,9 +1556,9 @@ tags: cpu=cpu1
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(null);
 name: cpu
@@ -1538,9 +1567,9 @@ tags: cpu=cpu-total
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu0
@@ -1548,9 +1577,9 @@ tags: cpu=cpu0
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu1
@@ -1558,9 +1587,9 @@ tags: cpu=cpu1
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:00:30 | 0     |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device FILL(null);
 name: cpu
@@ -1569,9 +1598,9 @@ tags: cpu=cpu-total, device=
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 2     |         |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
+| 2022-10-31T02:01:00 | 0     |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: cpu
 tags: cpu=cpu0, device=
@@ -1579,9 +1608,9 @@ tags: cpu=cpu0, device=
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 2     |         |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
+| 2022-10-31T02:01:00 | 0     |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: cpu
 tags: cpu=cpu1, device=
@@ -1589,9 +1618,9 @@ tags: cpu=cpu1, device=
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 2     |         |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
+| 2022-10-31T02:01:00 | 0     |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 tags: cpu=, device=disk1s1
@@ -1599,9 +1628,9 @@ tags: cpu=, device=disk1s1
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 2       |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
+| 2022-10-31T02:01:00 |       | 0       |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 name: disk
 tags: cpu=, device=disk1s2
@@ -1609,9 +1638,9 @@ tags: cpu=, device=disk1s2
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 2       |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
+| 2022-10-31T02:01:00 |       | 0       |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 name: disk
 tags: cpu=, device=disk1s5
@@ -1619,9 +1648,9 @@ tags: cpu=, device=disk1s5
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 2       |
-| 2022-10-31T02:00:30 |       |         |
-| 2022-10-31T02:01:00 |       |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
+| 2022-10-31T02:01:00 |       | 0       |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(previous);
 name: cpu
@@ -2202,15 +2231,15 @@ name: cpu
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 6     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s) LIMIT 2 OFFSET 2;
 name: cpu
 +---------------------+-------+
 | time                | count |
 +---------------------+-------+
-| 2022-10-31T02:01:00 |       |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:00 | 0     |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s), cpu LIMIT 2;
 name: cpu
@@ -2219,7 +2248,7 @@ tags: cpu=cpu-total
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu0
@@ -2227,7 +2256,7 @@ tags: cpu=cpu0
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu1
@@ -2235,7 +2264,7 @@ tags: cpu=cpu1
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) LIMIT 1;
 name: cpu
@@ -2268,13 +2297,13 @@ name: cpu
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device LIMIT 1;
 name: cpu
diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs
index fcd11eaa4f..7ebfcc8be4 100644
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@@ -1323,18 +1323,25 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
             _ => None,
         };
 
+        // Some aggregates, such as COUNT, should be filled with zero by default
+        // rather than NULL.
+        let should_zero_fill_expr = fields
+            .iter()
+            .map(is_zero_filled_aggregate_field)
+            .collect::<Vec<_>>();
+
         // Rewrite the aggregate columns from the projection, so that the expressions
         // refer to the columns from the aggregate projection
         let select_exprs_post_aggr = select_exprs
             .iter()
-            .zip(should_fill_expr)
-            .map(|(expr, should_fill)| {
+            .zip(should_fill_expr.iter().zip(should_zero_fill_expr))
+            .map(|(expr, (should_fill, should_zero_fill))| {
                 // This implements the `FILL(<value>)` strategy, by coalescing any aggregate
                 // expressions to `<value>` when they are `NULL`.
-                let fill_if_null = if fill_if_null.is_some() && should_fill {
-                    fill_if_null
-                } else {
-                    None
+                let fill_if_null = match (fill_if_null, should_fill, should_zero_fill) {
+                    (Some(_), true, _) => fill_if_null,
+                    (None, true, true) => Some(0.into()),
+                    _ => None,
                 };
 
                 rebase_expr(expr, &aggr_projection_exprs, &fill_if_null, &plan)
@@ -3081,6 +3088,16 @@ fn is_aggregate_field(f: &Field) -> bool {
     .is_break()
 }
 
+/// A utility function that checks whether `f` is an aggregate field
+/// that should be filled with a 0 rather than an NULL.
+fn is_zero_filled_aggregate_field(f: &Field) -> bool {
+    walk_expr(&f.expr, &mut |e| match e {
+        IQLExpr::Call(Call { name, .. }) if name == "count" => ControlFlow::Break(()),
+        _ => ControlFlow::Continue(()),
+    })
+    .is_break()
+}
+
 fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> {
     match op {
         ConditionalOperator::Eq => Ok(Operator::Eq),
@@ -4091,7 +4108,7 @@ mod test {
             "###);
             assert_snapshot!(plan("SELECT COUNT(DISTINCT usage_idle) FROM cpu"), @r###"
             Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(DISTINCT cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(DISTINCT cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
                 Aggregate: groupBy=[[]], aggr=[[COUNT(DISTINCT cpu.usage_idle)]] [COUNT(DISTINCT cpu.usage_idle):Int64;N]
                   TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
             "###);
@@ -4162,7 +4179,7 @@ mod test {
             fn test_selectors_and_aggregate() {
                 assert_snapshot!(plan("SELECT LAST(usage_idle), COUNT(usage_idle) FROM cpu"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, COUNT(cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, coalesce_struct(COUNT(cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
                     Aggregate: groupBy=[[]], aggr=[[selector_last(cpu.usage_idle, cpu.time), COUNT(cpu.usage_idle)]] [selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, COUNT(cpu.usage_idle):Int64;N]
                       TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                 "###);
@@ -4841,20 +4858,20 @@ mod test {
             fn no_group_by() {
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data"), @r###"
             Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
                 Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                   TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
             "###);
 
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY non_existent"), @r###"
             Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
                 Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                   TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
             "###);
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo"), @r###"
             Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                 Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                   TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
             "###);
@@ -4862,7 +4879,7 @@ mod test {
                 // The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection
                 assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_count, COUNT(data.f64_field) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) + coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count_count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
                     Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                       TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
@@ -4870,7 +4887,7 @@ mod test {
                 // non-existent tags are excluded from the Aggregate groupBy and Sort operators
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###"
             Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
                 Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                   TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
             "###);
@@ -4878,7 +4895,7 @@ mod test {
                 // Aggregate expression is projected once and reused in final projection
                 assert_snapshot!(plan("SELECT COUNT(f64_field),  COUNT(f64_field) * 2 FROM data"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
                     Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                       TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
@@ -4917,7 +4934,7 @@ mod test {
             fn group_by_time() {
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                         TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -4926,7 +4943,7 @@ mod test {
                 // supports offset parameter
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                         TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -4938,7 +4955,7 @@ mod test {
                 // No time bounds
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -4951,7 +4968,7 @@ mod test {
                 // No lower time bounds
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -4964,7 +4981,7 @@ mod test {
                 // No upper time bounds
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -4977,7 +4994,7 @@ mod test {
                 // Default is FILL(null)
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -4989,7 +5006,7 @@ mod test {
             fn group_by_time_gapfill_default_is_fill_null1() {
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -5001,7 +5018,7 @@ mod test {
             fn group_by_time_gapfill_default_is_fill_null2() {
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -5013,7 +5030,7 @@ mod test {
             fn group_by_time_gapfill_default_is_fill_null3() {
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -5037,7 +5054,7 @@ mod test {
             fn group_by_time_gapfill_default_is_fill_null5() {
                 assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
                 Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                     GapFill: groupBy=[time], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                       Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                         Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -5079,7 +5096,7 @@ mod test {
                     Filter: iox::row <= Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                       WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                         Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                             Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                               TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
@@ -5093,7 +5110,7 @@ mod test {
                     Filter: iox::row > Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                       WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                         Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                             Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                               TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
@@ -5107,7 +5124,7 @@ mod test {
                     Filter: iox::row BETWEEN Int64(4) AND Int64(5) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                       WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                         Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                             Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                               TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                 "###);
@@ -5133,7 +5150,7 @@ mod test {
                 fn group_by_time_precision() {
                     assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###"
                     Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                      Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                      Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                         Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                           Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                             TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@@ -5391,7 +5408,7 @@ mod test {
             "###);
             assert_snapshot!(plan("SELECT count(foo) as foo, first(usage_idle) from cpu group by foo"), @r###"
             Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
-              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (selector_first(cpu.usage_idle,cpu.time,NULL))[other_1] AS foo_1, (selector_first(cpu.usage_idle,cpu.time,NULL))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
+              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time,NULL), Struct({value:Float64(0),time:TimestampNanosecond(0, None),other_1:NULL})))[other_1] AS foo_1, (selector_first(cpu.usage_idle,cpu.time,NULL))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
                 Aggregate: groupBy=[[]], aggr=[[selector_first(cpu.usage_idle, cpu.time, NULL)]] [selector_first(cpu.usage_idle,cpu.time,NULL):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "other_1", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
                   TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
             "###);
diff --git a/iox_query_influxql/src/plan/util.rs b/iox_query_influxql/src/plan/util.rs
index dbbbf0fa12..c72123cccc 100644
--- a/iox_query_influxql/src/plan/util.rs
+++ b/iox_query_influxql/src/plan/util.rs
@@ -118,6 +118,7 @@ fn number_to_scalar(n: &Number, data_type: &DataType) -> Result<ScalarValue> {
             ),
             fields.clone(),
         ),
+        (_, DataType::Null) => ScalarValue::Null,
         (n, data_type) => {
             // The only output data types expected are Int64, Float64 or UInt64
             return error::internal(format!("no conversion from {n} to {data_type}"));

From 76223585183403f8f9aec7c0a82e37842a526dac Mon Sep 17 00:00:00 2001
From: Joe-Blount <jblount@influxdata.com>
Date: Fri, 21 Jul 2023 13:44:16 -0500
Subject: [PATCH 06/10] fix: avoid compacting 1 L0 to 1 L0 file (stuck looping)

---
 .../src/components/round_info_source/mod.rs   |   4 +-
 compactor/tests/layouts/stuck.rs              | 411 ++++++++++++++++++
 2 files changed, 414 insertions(+), 1 deletion(-)

diff --git a/compactor/src/components/round_info_source/mod.rs b/compactor/src/components/round_info_source/mod.rs
index 6d6ab2f3d4..5f5492ffbb 100644
--- a/compactor/src/components/round_info_source/mod.rs
+++ b/compactor/src/components/round_info_source/mod.rs
@@ -103,7 +103,9 @@ impl LevelBasedRoundInfo {
         // branch in the worst case, thus if that would result in too many files to compact in a single
         // plan, run a pre-phase to reduce the number of files first
         let num_overlapped_files = get_num_overlapped_files(start_level_files, next_level_files);
-        if num_start_level + num_overlapped_files > self.max_num_files_per_plan {
+        if num_start_level > 1
+            && num_start_level + num_overlapped_files > self.max_num_files_per_plan
+        {
             // This scaenario meets the simple criteria of start level files + their overlaps are lots of files.
             // But ManySmallFiles implies we must compact only within the start level to reduce the quantity of
             // start level files. There are several reasons why that might be unhelpful.
diff --git a/compactor/tests/layouts/stuck.rs b/compactor/tests/layouts/stuck.rs
index c69b13ae88..342044e96e 100644
--- a/compactor/tests/layouts/stuck.rs
+++ b/compactor/tests/layouts/stuck.rs
@@ -1730,3 +1730,414 @@ async fn stuck_l0_large_l0s() {
     "###
     );
 }
+
+// This case is taken from a catalog where the partition was stuck doing single file L0->L0 compactions with a ManySmallFiles classification.
+// The key point is that there is 1 L0 file, and enough overlapping L1 files such that the sum of L0 and overlapping L1s are too many for
+// a single compaction.  So it it tried to do L0->L0 compaction, but you can't get less than 1 L0 file...
+#[tokio::test]
+async fn single_file_compaction() {
+    test_helpers::maybe_start_logging();
+
+    let max_files = 20;
+    let setup = layout_setup_builder()
+        .await
+        .with_max_num_files_per_plan(max_files)
+        .with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
+        .with_partition_timeout(Duration::from_millis(1000))
+        .with_suppress_run_output() // remove this to debug
+        .build()
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681776057065884000)
+                .with_max_time(1681848094846357000)
+                .with_compaction_level(CompactionLevel::Final)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681848108803007952))
+                .with_file_size_bytes(148352),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681848059723530000)
+                .with_max_time(1681849022292840000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681849158083717413))
+                .with_file_size_bytes(8532),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681849256770938000)
+                .with_max_time(1681849612137939000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681849758018522867))
+                .with_file_size_bytes(7180),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681849857540998000)
+                .with_max_time(1681849933405747000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681850058063700468))
+                .with_file_size_bytes(6354),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681850155949687000)
+                .with_max_time(1681850525337964000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681850658095040165))
+                .with_file_size_bytes(7224),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681850533564810000)
+                .with_max_time(1681850800324334000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681850958072081740))
+                .with_file_size_bytes(6442),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681850807902300000)
+                .with_max_time(1681851109057342000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681851258099471556))
+                .with_file_size_bytes(6467),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681851356697599000)
+                .with_max_time(1681851731606438000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681851858069516381))
+                .with_file_size_bytes(7202),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681851768198276000)
+                .with_max_time(1681852656555310000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681852758025054620))
+                .with_file_size_bytes(7901),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681852858788440000)
+                .with_max_time(1681853202074816000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681853358030917913))
+                .with_file_size_bytes(7175),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681853216031150000)
+                .with_max_time(1681853533814380000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681853658084495307))
+                .with_file_size_bytes(6461),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681853755089369000)
+                .with_max_time(1681854114135030000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681854258102937522))
+                .with_file_size_bytes(7172),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681854158528835000)
+                .with_max_time(1681854411758250000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681854558107269518))
+                .with_file_size_bytes(6445),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681854656198860000)
+                .with_max_time(1681855901530453000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681856058068217803))
+                .with_file_size_bytes(9388),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681855930016632000)
+                .with_max_time(1681856215951881000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681856358077776391))
+                .with_file_size_bytes(6411),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681856457094364000)
+                .with_max_time(1681856572199715000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681856658099983774))
+                .with_file_size_bytes(6471),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681856755669647000)
+                .with_max_time(1681856797376786000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681856959540758502))
+                .with_file_size_bytes(6347),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681857059467239000)
+                .with_max_time(1681857411709822000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681857559463607724))
+                .with_file_size_bytes(7179),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681857658708732000)
+                .with_max_time(1681858001258834000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681858159653340111))
+                .with_file_size_bytes(7171),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681858259089021000)
+                .with_max_time(1681858311972651000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681858459694290981))
+                .with_file_size_bytes(6417),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681858336136281000)
+                .with_max_time(1681858611711634000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681858759770566450))
+                .with_file_size_bytes(6432),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681858613076367000)
+                .with_max_time(1681859207290151000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681859359651203045))
+                .with_file_size_bytes(7211),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681859212497834000)
+                .with_max_time(1681859549996540000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681859659796715205))
+                .with_file_size_bytes(6408),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681859755984961000)
+                .with_max_time(1681860397139689000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681860559596560745))
+                .with_file_size_bytes(7919),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681860656403220000)
+                .with_max_time(1681861312602593000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681861463769557785))
+                .with_file_size_bytes(7920),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681861557592893000)
+                .with_max_time(1681861592762435000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681861760075293126))
+                .with_file_size_bytes(6432),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681861612304587000)
+                .with_max_time(1681861928505695000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681862059957822724))
+                .with_file_size_bytes(6456),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681862008720364000)
+                .with_max_time(1681862268794595000)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1681862511938856063))
+                .with_file_size_bytes(6453),
+        )
+        .await;
+
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(1681776002714783000)
+                .with_max_time(1681862102913137000)
+                .with_compaction_level(CompactionLevel::Initial)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(1683039505904263771))
+                .with_file_size_bytes(7225),
+        )
+        .await;
+
+    insta::assert_yaml_snapshot!(
+        run_layout_scenario(&setup).await,
+        @r###"
+    ---
+    - "**** Input Files "
+    - "L0                                                                                                                 "
+    - "L0.29[1681776002714783000,1681862102913137000] 1683039505.9s 7kb|-----------------------------------------L0.29-----------------------------------------| "
+    - "L1                                                                                                                 "
+    - "L1.2[1681848059723530000,1681849022292840000] 1681849158.08s 8kb                                                                           |L1.2|         "
+    - "L1.3[1681849256770938000,1681849612137939000] 1681849758.02s 7kb                                                                            |L1.3|        "
+    - "L1.4[1681849857540998000,1681849933405747000] 1681850058.06s 6kb                                                                             |L1.4|       "
+    - "L1.5[1681850155949687000,1681850525337964000] 1681850658.1s 7kb                                                                             |L1.5|       "
+    - "L1.6[1681850533564810000,1681850800324334000] 1681850958.07s 6kb                                                                             |L1.6|       "
+    - "L1.7[1681850807902300000,1681851109057342000] 1681851258.1s 6kb                                                                              |L1.7|      "
+    - "L1.8[1681851356697599000,1681851731606438000] 1681851858.07s 7kb                                                                              |L1.8|      "
+    - "L1.9[1681851768198276000,1681852656555310000] 1681852758.03s 8kb                                                                               |L1.9|     "
+    - "L1.10[1681852858788440000,1681853202074816000] 1681853358.03s 7kb                                                                                |L1.10|   "
+    - "L1.11[1681853216031150000,1681853533814380000] 1681853658.08s 6kb                                                                                |L1.11|   "
+    - "L1.12[1681853755089369000,1681854114135030000] 1681854258.1s 7kb                                                                                 |L1.12|  "
+    - "L1.13[1681854158528835000,1681854411758250000] 1681854558.11s 6kb                                                                                 |L1.13|  "
+    - "L1.14[1681854656198860000,1681855901530453000] 1681856058.07s 9kb                                                                                  |L1.14| "
+    - "L1.15[1681855930016632000,1681856215951881000] 1681856358.08s 6kb                                                                                   |L1.15|"
+    - "L1.16[1681856457094364000,1681856572199715000] 1681856658.1s 6kb                                                                                   |L1.16|"
+    - "L1.17[1681856755669647000,1681856797376786000] 1681856959.54s 6kb                                                                                    |L1.17|"
+    - "L1.18[1681857059467239000,1681857411709822000] 1681857559.46s 7kb                                                                                    |L1.18|"
+    - "L1.19[1681857658708732000,1681858001258834000] 1681858159.65s 7kb                                                                                     |L1.19|"
+    - "L1.20[1681858259089021000,1681858311972651000] 1681858459.69s 6kb                                                                                     |L1.20|"
+    - "L1.21[1681858336136281000,1681858611711634000] 1681858759.77s 6kb                                                                                     |L1.21|"
+    - "L1.22[1681858613076367000,1681859207290151000] 1681859359.65s 7kb                                                                                      |L1.22|"
+    - "L1.23[1681859212497834000,1681859549996540000] 1681859659.8s 6kb                                                                                      |L1.23|"
+    - "L1.24[1681859755984961000,1681860397139689000] 1681860559.6s 8kb                                                                                       |L1.24|"
+    - "L1.25[1681860656403220000,1681861312602593000] 1681861463.77s 8kb                                                                                        |L1.25|"
+    - "L1.26[1681861557592893000,1681861592762435000] 1681861760.08s 6kb                                                                                         |L1.26|"
+    - "L1.27[1681861612304587000,1681861928505695000] 1681862059.96s 6kb                                                                                         |L1.27|"
+    - "L1.28[1681862008720364000,1681862268794595000] 1681862511.94s 6kb                                                                                         |L1.28|"
+    - "L2                                                                                                                 "
+    - "L2.1[1681776057065884000,1681848094846357000] 1681848108.8s 145kb|----------------------------------L2.1-----------------------------------|               "
+    - "**** Final Output Files (192kb written)"
+    - "L1                                                                                                                 "
+    - "L1.30[1681776002714783000,1681862268794595000] 1683039505.9s 192kb|-----------------------------------------L1.30------------------------------------------|"
+    - "L2                                                                                                                 "
+    - "L2.1[1681776057065884000,1681848094846357000] 1681848108.8s 145kb|----------------------------------L2.1-----------------------------------|               "
+    "###
+    );
+}

From 30ac7d86eb26054c8693899a6639e8812e1d44c6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 21 Jul 2023 21:08:33 +0000
Subject: [PATCH 07/10] chore(deps): Bump tower-http from 0.4.2 to 0.4.3
 (#8298)

Bumps [tower-http](https://github.com/tower-rs/tower-http) from 0.4.2 to 0.4.3.
- [Release notes](https://github.com/tower-rs/tower-http/releases)
- [Commits](https://github.com/tower-rs/tower-http/compare/tower-http-0.4.2...tower-http-0.4.3)

---
updated-dependencies:
- dependency-name: tower-http
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 Cargo.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c579c2d9a5..9196abde51 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6090,9 +6090,9 @@ dependencies = [
 
 [[package]]
 name = "tower-http"
-version = "0.4.2"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ac8060a61f8758a61562f6fb53ba3cbe1ca906f001df2e53cccddcdbee91e7c"
+checksum = "55ae70283aba8d2a8b411c695c437fe25b8b5e44e23e780662002fc72fb47a82"
 dependencies = [
  "bitflags 2.3.3",
  "bytes",

From b2179e2f8b67ae86feb9745d72d803547f36d114 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Jul 2023 08:11:17 +0000
Subject: [PATCH 08/10] chore(deps): Bump clap from 4.3.17 to 4.3.19 (#8310)

Bumps [clap](https://github.com/clap-rs/clap) from 4.3.17 to 4.3.19.
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/v4.3.17...v4.3.19)

---
updated-dependencies:
- dependency-name: clap
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9196abde51..6621bce96f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -855,9 +855,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.3.17"
+version = "4.3.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0827b011f6f8ab38590295339817b0d26f344aa4932c3ced71b45b0c54b4a9"
+checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -887,9 +887,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.3.17"
+version = "4.3.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9441b403be87be858db6a23edb493e7f694761acdc3343d5a0fcaafd304cbc9e"
+checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1"
 dependencies = [
  "anstream",
  "anstyle",

From 748e66731c2e80fa88bbbe709254aaf010ea1474 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Mon, 24 Jul 2023 10:24:10 +0200
Subject: [PATCH 09/10] feat: batch partition catalog requests in querier (take
 2) (#8299)

* feat: batch partition catalog requests in querier

This is mostly wiring that builds on top of the other PRs linked to #8089.

I think we eventually could make the batching code nicer by adding
better wrappers / helpers, but lets do that if we have other batched
caches and this patterns proofs to be useful.

Closes #8089.

* test: extend `test_multi_get`

* test: regression test for #8286

* fix: prevent auto-flush CPU looping

* fix: panic when loading different tables at the same time

---------

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 cache_system/src/loader/batch.rs |  13 +-
 querier/src/cache/partition.rs   | 532 ++++++++++++++++++++++++++-----
 querier/src/parquet/mod.rs       |   9 +-
 querier/src/table/mod.rs         |  90 ++++--
 4 files changed, 532 insertions(+), 112 deletions(-)

diff --git a/cache_system/src/loader/batch.rs b/cache_system/src/loader/batch.rs
index 2ee0fbb2d2..6915711714 100644
--- a/cache_system/src/loader/batch.rs
+++ b/cache_system/src/loader/batch.rs
@@ -105,8 +105,6 @@ where
     L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
 {
     async fn flush(&self) {
-        trace!("flushing batch loader");
-
         let pending: Vec<_> = {
             let mut pending = self.inner.pending.lock();
             std::mem::take(pending.as_mut())
@@ -115,6 +113,8 @@ where
         if pending.is_empty() {
             return;
         }
+        trace!(n_pending = pending.len(), "flush batch loader",);
+
         let job_id = self.inner.job_id_counter.fetch_add(1, Ordering::SeqCst);
         let handle_recv = CancellationSafeFutureReceiver::default();
 
@@ -221,6 +221,15 @@ where
 
             if !pending.is_empty() {
                 self.flush().await;
+
+                // prevent hot-looping:
+                // It seems that in some cases the underlying loader is ready but the data is not available via the
+                // cache driver yet. This is likely due to the signalling system within the cache driver that prevents
+                // cancelation, but also allows side-loading and at the same time prevents that the same key is loaded
+                // multiple times. Tokio doesn't know that this method here is basically a wait loop. So we yield back
+                // to the tokio worker and to allow it to make some progress. Since flush+load take some time anyways,
+                // this yield here is not overall performance critical.
+                tokio::task::yield_now().await;
             }
 
             futures = pending;
diff --git a/querier/src/cache/partition.rs b/querier/src/cache/partition.rs
index fd5fa52cd3..7226ecd4cb 100644
--- a/querier/src/cache/partition.rs
+++ b/querier/src/cache/partition.rs
@@ -8,7 +8,11 @@ use cache_system::{
         PolicyBackend,
     },
     cache::{driver::CacheDriver, metrics::CacheWithMetrics, Cache},
-    loader::{metrics::MetricsLoader, FunctionLoader},
+    loader::{
+        batch::{BatchLoader, BatchLoaderFlusher, BatchLoaderFlusherExt},
+        metrics::MetricsLoader,
+        FunctionLoader,
+    },
     resource_consumption::FunctionEstimator,
 };
 use data_types::{
@@ -16,17 +20,17 @@ use data_types::{
     ColumnId, Partition, PartitionId, TransitionPartitionId,
 };
 use datafusion::scalar::ScalarValue;
-use iox_catalog::{interface::Catalog, partition_lookup};
+use iox_catalog::{interface::Catalog, partition_lookup_batch};
 use iox_query::chunk_statistics::{ColumnRange, ColumnRanges};
 use iox_time::TimeProvider;
 use observability_deps::tracing::debug;
 use schema::sort::SortKey;
 use std::{
-    collections::{HashMap, HashSet},
+    collections::{hash_map::Entry, HashMap, HashSet},
     mem::{size_of, size_of_val},
     sync::Arc,
 };
-use trace::span::Span;
+use trace::span::{Span, SpanRecorder};
 
 use super::{namespace::CachedTable, ram::RamSize};
 
@@ -46,6 +50,7 @@ type CacheT = Box<
 pub struct PartitionCache {
     cache: CacheT,
     remove_if_handle: RemoveIfHandle<PartitionId, Option<CachedPartition>>,
+    flusher: Arc<dyn BatchLoaderFlusher>,
 }
 
 impl PartitionCache {
@@ -58,24 +63,59 @@ impl PartitionCache {
         ram_pool: Arc<ResourcePool<RamSize>>,
         testing: bool,
     ) -> Self {
-        let loader =
-            FunctionLoader::new(move |partition_id: PartitionId, extra: Arc<CachedTable>| {
+        let loader = FunctionLoader::new(
+            move |partition_ids: Vec<PartitionId>, cached_tables: Vec<Arc<CachedTable>>| {
+                // sanity checks
+                assert_eq!(partition_ids.len(), cached_tables.len());
+
                 let catalog = Arc::clone(&catalog);
                 let backoff_config = backoff_config.clone();
 
                 async move {
-                    let partition = Backoff::new(&backoff_config)
+                    // prepare output buffer
+                    let mut out = (0..partition_ids.len()).map(|_| None).collect::<Vec<_>>();
+                    let mut out_map =
+                        HashMap::<PartitionId, usize>::with_capacity(partition_ids.len());
+                    for (idx, id) in partition_ids.iter().enumerate() {
+                        match out_map.entry(*id) {
+                            Entry::Occupied(_) => unreachable!("cache system requested same partition from loader concurrently, this should have been prevented by the CacheDriver"),
+                            Entry::Vacant(v) => {
+                                v.insert(idx);
+                            }
+                        }
+                    }
+
+                    // build `&[&TransitionPartitionId]` for batch catalog request
+                    let ids = partition_ids
+                        .iter()
+                        .copied()
+                        .map(TransitionPartitionId::Deprecated)
+                        .collect::<Vec<_>>();
+                    let ids = ids.iter().collect::<Vec<_>>();
+
+                    // fetch catalog data
+                    let partitions = Backoff::new(&backoff_config)
                         .retry_all_errors("get partition_key", || async {
                             let mut repos = catalog.repositories().await;
-                            let id = TransitionPartitionId::Deprecated(partition_id);
-                            partition_lookup(repos.as_mut(), &id).await
+                            partition_lookup_batch(repos.as_mut(), &ids).await
                         })
                         .await
-                        .expect("retry forever")?;
+                        .expect("retry forever");
 
-                    Some(CachedPartition::new(partition, &extra))
+                    // build output
+                    for p in partitions {
+                        let idx = out_map[&p.id];
+                        let cached_table = &cached_tables[idx];
+                        let p = CachedPartition::new(p, cached_table);
+                        out[idx] = Some(p);
+                    }
+
+                    out
                 }
-            });
+            },
+        );
+        let loader = Arc::new(BatchLoader::new(loader));
+        let flusher = Arc::clone(&loader);
         let loader = Arc::new(MetricsLoader::new(
             loader,
             CACHE_ID,
@@ -111,51 +151,79 @@ impl PartitionCache {
         Self {
             cache,
             remove_if_handle,
+            flusher,
         }
     }
 
     /// Get cached partition.
     ///
+    /// The result only contains existing partitions. The order is undefined.
+    ///
     /// Expire partition if the cached sort key does NOT cover the given set of columns.
     pub async fn get(
         &self,
         cached_table: Arc<CachedTable>,
-        partition_id: PartitionId,
-        sort_key_should_cover: &[ColumnId],
+        partitions: Vec<PartitionRequest>,
         span: Option<Span>,
-    ) -> Option<CachedPartition> {
-        self.remove_if_handle
-            .remove_if_and_get(
-                &self.cache,
-                partition_id,
-                |cached_partition| {
-                    let invalidates =
-                        if let Some(sort_key) = &cached_partition.and_then(|p| p.sort_key) {
-                            sort_key_should_cover
-                                .iter()
-                                .any(|col| !sort_key.column_set.contains(col))
-                        } else {
-                            // no sort key at all => need to update if there is anything to cover
-                            !sort_key_should_cover.is_empty()
-                        };
+    ) -> Vec<CachedPartition> {
+        let span_recorder = SpanRecorder::new(span);
 
-                    if invalidates {
-                        debug!(
-                            partition_id = partition_id.get(),
-                            "invalidate partition cache",
-                        );
-                    }
+        let futures = partitions
+            .into_iter()
+            .map(
+                |PartitionRequest {
+                     partition_id,
+                     sort_key_should_cover,
+                 }| {
+                    let cached_table = Arc::clone(&cached_table);
+                    let span = span_recorder.child_span("single partition cache lookup");
 
-                    invalidates
+                    self.remove_if_handle.remove_if_and_get(
+                        &self.cache,
+                        partition_id,
+                        move |cached_partition| {
+                            let invalidates = if let Some(sort_key) =
+                                &cached_partition.and_then(|p| p.sort_key)
+                            {
+                                sort_key_should_cover
+                                    .iter()
+                                    .any(|col| !sort_key.column_set.contains(col))
+                            } else {
+                                // no sort key at all => need to update if there is anything to cover
+                                !sort_key_should_cover.is_empty()
+                            };
+
+                            if invalidates {
+                                debug!(
+                                    partition_id = partition_id.get(),
+                                    "invalidate partition cache",
+                                );
+                            }
+
+                            invalidates
+                        },
+                        (cached_table, span),
+                    )
                 },
-                (cached_table, span),
             )
-            .await
+            .collect();
+
+        let res = self.flusher.auto_flush(futures).await;
+
+        res.into_iter().flatten().collect()
     }
 }
 
+/// Request for [`PartitionCache::get`].
+#[derive(Debug)]
+pub struct PartitionRequest {
+    pub partition_id: PartitionId,
+    pub sort_key_should_cover: Vec<ColumnId>,
+}
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct CachedPartition {
+    pub id: PartitionId,
     pub sort_key: Option<Arc<PartitionSortKey>>,
     pub column_ranges: ColumnRanges,
 }
@@ -231,6 +299,7 @@ impl CachedPartition {
         column_ranges.shrink_to_fit();
 
         Self {
+            id: partition.id,
             sort_key,
             column_ranges: Arc::new(column_ranges),
         }
@@ -298,12 +367,15 @@ mod tests {
     use crate::cache::{
         ram::test_util::test_ram_pool, test_util::assert_catalog_access_metric_count,
     };
+    use async_trait::async_trait;
     use data_types::{partition_template::TablePartitionTemplateOverride, ColumnType};
+    use futures::StreamExt;
     use generated_types::influxdata::iox::partition_template::v1::{
         template_part::Part, PartitionTemplate, TemplatePart,
     };
-    use iox_tests::TestCatalog;
+    use iox_tests::{TestCatalog, TestNamespace};
     use schema::{Schema, SchemaBuilder};
+    use tokio::sync::Barrier;
 
     #[tokio::test]
     async fn test_sort_key() {
@@ -348,7 +420,7 @@ mod tests {
         );
 
         let sort_key1a = cache
-            .get(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
+            .get_one(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
             .await
             .unwrap()
             .sort_key;
@@ -360,18 +432,26 @@ mod tests {
                 column_order: [c1.column.id, c2.column.id].into(),
             }
         );
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
 
         let sort_key2 = cache
-            .get(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
+            .get_one(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
             .await
             .unwrap()
             .sort_key;
         assert_eq!(sort_key2, None);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            2,
+        );
 
         let sort_key1b = cache
-            .get(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
+            .get_one(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
             .await
             .unwrap()
             .sort_key;
@@ -379,12 +459,16 @@ mod tests {
             sort_key1a.as_ref().unwrap(),
             sort_key1b.as_ref().unwrap()
         ));
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            2,
+        );
 
         // non-existing partition
         for _ in 0..2 {
             let res = cache
-                .get(
+                .get_one(
                     Arc::clone(&cached_table),
                     PartitionId::new(i64::MAX),
                     &Vec::new(),
@@ -392,7 +476,11 @@ mod tests {
                 )
                 .await;
             assert_eq!(res, None);
-            assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
+            assert_catalog_access_metric_count(
+                &catalog.metric_registry,
+                "partition_get_by_id_batch",
+                3,
+            );
         }
     }
 
@@ -461,7 +549,7 @@ mod tests {
         );
 
         let ranges1a = cache
-            .get(Arc::clone(&cached_table), p1.id, &[], None)
+            .get_one(Arc::clone(&cached_table), p1.id, &[], None)
             .await
             .unwrap()
             .column_ranges;
@@ -488,10 +576,14 @@ mod tests {
             &ranges1a.get("tag1").unwrap().min_value,
             &ranges1a.get("tag1").unwrap().max_value,
         ));
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
 
         let ranges2 = cache
-            .get(Arc::clone(&cached_table), p2.id, &[], None)
+            .get_one(Arc::clone(&cached_table), p2.id, &[], None)
             .await
             .unwrap()
             .column_ranges;
@@ -505,10 +597,14 @@ mod tests {
                 }
             ),]),
         );
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            2,
+        );
 
         let ranges3 = cache
-            .get(Arc::clone(&cached_table), p3.id, &[], None)
+            .get_one(Arc::clone(&cached_table), p3.id, &[], None)
             .await
             .unwrap()
             .column_ranges;
@@ -531,10 +627,14 @@ mod tests {
                 ),
             ]),
         );
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            3,
+        );
 
         let ranges4 = cache
-            .get(Arc::clone(&cached_table), p4.id, &[], None)
+            .get_one(Arc::clone(&cached_table), p4.id, &[], None)
             .await
             .unwrap()
             .column_ranges;
@@ -557,10 +657,14 @@ mod tests {
                 ),
             ]),
         );
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            4,
+        );
 
         let ranges5 = cache
-            .get(Arc::clone(&cached_table), p5.id, &[], None)
+            .get_one(Arc::clone(&cached_table), p5.id, &[], None)
             .await
             .unwrap()
             .column_ranges;
@@ -574,20 +678,28 @@ mod tests {
                 }
             ),]),
         );
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            5,
+        );
 
         let ranges1b = cache
-            .get(Arc::clone(&cached_table), p1.id, &[], None)
+            .get_one(Arc::clone(&cached_table), p1.id, &[], None)
             .await
             .unwrap()
             .column_ranges;
         assert!(Arc::ptr_eq(&ranges1a, &ranges1b));
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            5,
+        );
 
         // non-existing partition
         for _ in 0..2 {
             let res = cache
-                .get(
+                .get_one(
                     Arc::clone(&cached_table),
                     PartitionId::new(i64::MAX),
                     &[],
@@ -595,7 +707,11 @@ mod tests {
                 )
                 .await;
             assert_eq!(res, None);
-            assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 6);
+            assert_catalog_access_metric_count(
+                &catalog.metric_registry,
+                "partition_get_by_id_batch",
+                6,
+            );
         }
     }
 
@@ -635,31 +751,43 @@ mod tests {
         );
 
         let sort_key = cache
-            .get(Arc::clone(&cached_table), p_id, &[], None)
+            .get_one(Arc::clone(&cached_table), p_id, &[], None)
             .await
             .unwrap()
             .sort_key;
         assert_eq!(sort_key, None,);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
 
         // requesting nother will not expire
         assert!(p_sort_key.is_none());
         let sort_key = cache
-            .get(Arc::clone(&cached_table), p_id, &[], None)
+            .get_one(Arc::clone(&cached_table), p_id, &[], None)
             .await
             .unwrap()
             .sort_key;
         assert_eq!(sort_key, None,);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
 
         // but requesting something will expire
         let sort_key = cache
-            .get(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
+            .get_one(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
             .await
             .unwrap()
             .sort_key;
         assert_eq!(sort_key, None,);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            2,
+        );
 
         // set sort key
         let p = p
@@ -668,11 +796,12 @@ mod tests {
                 c2.column.name.as_str(),
             ]))
             .await;
+        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
 
         // expire & fetch
         let p_sort_key = p.partition.sort_key();
         let sort_key = cache
-            .get(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
+            .get_one(Arc::clone(&cached_table), p_id, &[c1.column.id], None)
             .await
             .unwrap()
             .sort_key;
@@ -684,7 +813,11 @@ mod tests {
                 column_order: [c1.column.id, c2.column.id].into(),
             }
         );
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            3,
+        );
 
         // subsets and the full key don't expire
         for should_cover in [
@@ -694,7 +827,7 @@ mod tests {
             vec![c1.column.id, c2.column.id],
         ] {
             let sort_key_2 = cache
-                .get(Arc::clone(&cached_table), p_id, &should_cover, None)
+                .get_one(Arc::clone(&cached_table), p_id, &should_cover, None)
                 .await
                 .unwrap()
                 .sort_key;
@@ -702,13 +835,17 @@ mod tests {
                 sort_key.as_ref().unwrap(),
                 sort_key_2.as_ref().unwrap()
             ));
-            assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
+            assert_catalog_access_metric_count(
+                &catalog.metric_registry,
+                "partition_get_by_id_batch",
+                3,
+            );
         }
 
         // unknown columns expire
         let c3 = t.create_column("x", ColumnType::Tag).await;
         let sort_key_2 = cache
-            .get(
+            .get_one(
                 Arc::clone(&cached_table),
                 p_id,
                 &[c1.column.id, c3.column.id],
@@ -722,10 +859,259 @@ mod tests {
             sort_key_2.as_ref().unwrap()
         ));
         assert_eq!(sort_key, sort_key_2);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            4,
+        );
+    }
+
+    #[tokio::test]
+    async fn test_multi_get() {
+        let catalog = TestCatalog::new();
+
+        let ns = catalog.create_namespace_1hr_retention("ns").await;
+        let t = ns.create_table("table").await;
+        let p1 = t.create_partition("k1").await.partition.clone();
+        let p2 = t.create_partition("k2").await.partition.clone();
+        let cached_table = Arc::new(CachedTable {
+            id: t.table.id,
+            schema: schema(),
+            column_id_map: HashMap::default(),
+            column_id_map_rev: HashMap::default(),
+            primary_key_column_ids: [].into(),
+            partition_template: TablePartitionTemplateOverride::default(),
+        });
+
+        let cache = PartitionCache::new(
+            catalog.catalog(),
+            BackoffConfig::default(),
+            catalog.time_provider(),
+            &catalog.metric_registry(),
+            test_ram_pool(),
+            true,
+        );
+
+        let mut res = cache
+            .get(
+                Arc::clone(&cached_table),
+                vec![
+                    PartitionRequest {
+                        partition_id: p1.id,
+                        sort_key_should_cover: vec![],
+                    },
+                    PartitionRequest {
+                        partition_id: p2.id,
+                        sort_key_should_cover: vec![],
+                    },
+                    PartitionRequest {
+                        partition_id: p1.id,
+                        sort_key_should_cover: vec![],
+                    },
+                    PartitionRequest {
+                        // requesting non-existing partitions is fine, they just don't appear in the output
+                        partition_id: PartitionId::new(i64::MAX),
+                        sort_key_should_cover: vec![],
+                    },
+                ],
+                None,
+            )
+            .await;
+        res.sort_by_key(|p| p.id);
+        let ids = res.iter().map(|p| p.id).collect::<Vec<_>>();
+        assert_eq!(ids, vec![p1.id, p1.id, p2.id]);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
+
+        // empty get
+        let res = cache.get(Arc::clone(&cached_table), vec![], None).await;
+        assert_eq!(res, vec![]);
+    }
+
+    /// This is a regression test for <https://github.com/influxdata/influxdb_iox/issues/8286>.
+    ///
+    /// The issue happened when requests for multiple (different) tables were made concurrently. The root cause was the
+    /// wrong assumption that when flushing the batched up requests, there would only be a single table in the flushed set.
+    ///
+    /// To trigger this, we need at least 2 tokio threads.
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_multi_table_concurrent_get() {
+        // In most cases, the issue triggers on the first run. However let's be sure and try multiple times.
+        for _ in 0..10 {
+            test_multi_table_concurrent_get_inner().await;
+        }
+    }
+
+    /// Actually implementation of [`test_multi_table_concurrent_get`] that is tried multiple times.
+    async fn test_multi_table_concurrent_get_inner() {
+        let catalog = TestCatalog::new();
+
+        // prepare catalog state for two tables
+        let ns = catalog.create_namespace_1hr_retention("ns").await;
+        let state_1 = ConcurrencyTestState::prepare(&ns, "t1").await;
+        let state_2 = ConcurrencyTestState::prepare(&ns, "t2").await;
+
+        // sanity checks for test setup
+        assert!(!Arc::ptr_eq(&state_1.cached_table, &state_2.cached_table));
+        assert_ne!(state_1.cached_table.id, state_2.cached_table.id);
+        assert_ne!(state_1.c_id, state_2.c_id);
+        assert_ne!(state_1.partitions, state_2.partitions);
+
+        let cache = Arc::new(PartitionCache::new(
+            catalog.catalog(),
+            BackoffConfig::default(),
+            catalog.time_provider(),
+            &catalog.metric_registry(),
+            test_ram_pool(),
+            true,
+        ));
+
+        // use a barrier to make sure that both tokio tasks are running at the same time
+        let barrier = Arc::new(Barrier::new(2));
+
+        // set up first tokio task
+        let barrier_captured = Arc::clone(&barrier);
+        let cache_captured = Arc::clone(&cache);
+        let handle_1 = tokio::spawn(async move {
+            barrier_captured.wait().await;
+
+            // When running quickly, both tasks will end up on the same tokio worker and will run in sequence. It seems
+            // that tokio tries to avoid costly work-stealing. However we can trick tokio into actually running both
+            // task concurrently with a bit more async work: a simple sleep.
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+
+            state_1.run(cache_captured).await;
+        });
+
+        // set up 2nd tokio tasks in a same manner as the first one (but for the other table)
+        let barrier_captured = Arc::clone(&barrier);
+        let cache_captured = Arc::clone(&cache);
+        let handle_2 = tokio::spawn(async move {
+            barrier_captured.wait().await;
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+            state_2.run(cache_captured).await;
+        });
+
+        handle_1.await.unwrap();
+        handle_2.await.unwrap();
+    }
+
+    /// Building block for a single table within the [`test_multi_table_concurrent_get`] test.
+    struct ConcurrencyTestState {
+        /// Cached table that is used for [`PartitionCache::get`].
+        cached_table: Arc<CachedTable>,
+
+        /// ID of the only column within that table.
+        c_id: ColumnId,
+
+        /// Partitions within that table.
+        partitions: Vec<PartitionId>,
+    }
+
+    impl ConcurrencyTestState {
+        /// Prepare catalog state.
+        async fn prepare(ns: &Arc<TestNamespace>, name: &str) -> Self {
+            let t = ns.create_table(name).await;
+            let c = t.create_column("time", ColumnType::Time).await;
+            let cached_table = Arc::new(CachedTable {
+                id: t.table.id,
+                schema: schema(),
+                column_id_map: HashMap::from([(c.column.id, Arc::from(c.column.name.clone()))]),
+                column_id_map_rev: HashMap::from([(Arc::from(c.column.name.clone()), c.column.id)]),
+                primary_key_column_ids: [c.column.id].into(),
+                partition_template: TablePartitionTemplateOverride::default(),
+            });
+            const N_PARTITIONS: usize = 20;
+            let mut partitions = futures::stream::iter(0..N_PARTITIONS)
+                .then(|i| {
+                    let t = Arc::clone(&t);
+                    async move {
+                        t.create_partition_with_sort_key(&format!("p{i}"), &["time"])
+                            .await
+                            .partition
+                            .id
+                    }
+                })
+                .collect::<Vec<_>>()
+                .await;
+            partitions.sort();
+
+            Self {
+                cached_table,
+                c_id: c.column.id,
+                partitions,
+            }
+        }
+
+        /// Perform the actual [`PartitionCache::get`] call and run some basic sanity checks on the result.
+        async fn run(self, cache: Arc<PartitionCache>) {
+            let Self {
+                cached_table,
+                c_id,
+                partitions,
+            } = self;
+
+            let mut results = cache
+                .get(
+                    cached_table,
+                    partitions
+                        .iter()
+                        .map(|p| PartitionRequest {
+                            partition_id: *p,
+                            sort_key_should_cover: vec![],
+                        })
+                        .collect(),
+                    None,
+                )
+                .await;
+            results.sort_by_key(|p| p.id);
+            let partitions_res = results.iter().map(|p| p.id).collect::<Vec<_>>();
+            assert_eq!(partitions, partitions_res);
+            assert!(results
+                .iter()
+                .all(|p| p.sort_key.as_ref().unwrap().column_set == HashSet::from([c_id])));
+        }
     }
 
     fn schema() -> Schema {
         SchemaBuilder::new().build().unwrap()
     }
+
+    /// Extension methods for simpler testing.
+    #[async_trait]
+    trait PartitionCacheExt {
+        async fn get_one(
+            &self,
+            cached_table: Arc<CachedTable>,
+            partition_id: PartitionId,
+            sort_key_should_cover: &[ColumnId],
+            span: Option<Span>,
+        ) -> Option<CachedPartition>;
+    }
+
+    #[async_trait]
+    impl PartitionCacheExt for PartitionCache {
+        async fn get_one(
+            &self,
+            cached_table: Arc<CachedTable>,
+            partition_id: PartitionId,
+            sort_key_should_cover: &[ColumnId],
+            span: Option<Span>,
+        ) -> Option<CachedPartition> {
+            self.get(
+                cached_table,
+                vec![PartitionRequest {
+                    partition_id,
+                    sort_key_should_cover: sort_key_should_cover.to_vec(),
+                }],
+                span,
+            )
+            .await
+            .into_iter()
+            .next()
+        }
+    }
 }
diff --git a/querier/src/parquet/mod.rs b/querier/src/parquet/mod.rs
index fe75fc3064..c3794a82dc 100644
--- a/querier/src/parquet/mod.rs
+++ b/querier/src/parquet/mod.rs
@@ -106,6 +106,7 @@ pub mod tests {
 
     use crate::cache::{
         namespace::{CachedNamespace, CachedTable},
+        partition::PartitionRequest,
         CatalogCache,
     };
 
@@ -249,11 +250,15 @@ pub mod tests {
                 .partition()
                 .get(
                     Arc::clone(&self.cached_table),
-                    self.parquet_file.partition_id,
-                    &[],
+                    vec![PartitionRequest {
+                        partition_id: self.parquet_file.partition_id,
+                        sort_key_should_cover: vec![],
+                    }],
                     None,
                 )
                 .await
+                .into_iter()
+                .next()
                 .unwrap();
             let cached_partitions =
                 HashMap::from([(self.parquet_file.partition_id, cached_partition)]);
diff --git a/querier/src/table/mod.rs b/querier/src/table/mod.rs
index add7d855b9..52750ec47e 100644
--- a/querier/src/table/mod.rs
+++ b/querier/src/table/mod.rs
@@ -1,17 +1,19 @@
 use self::query_access::QuerierTableChunkPruner;
 use crate::{
-    cache::{namespace::CachedTable, partition::CachedPartition},
+    cache::{
+        namespace::CachedTable,
+        partition::{CachedPartition, PartitionRequest},
+    },
     ingester::{self, IngesterPartition},
     parquet::ChunkAdapter,
-    IngesterConnection, CONCURRENT_CHUNK_CREATION_JOBS,
+    IngesterConnection,
 };
 use data_types::{ColumnId, NamespaceId, ParquetFile, PartitionId, TableId};
 use datafusion::error::DataFusionError;
-use futures::{join, StreamExt};
+use futures::join;
 use iox_query::{provider, provider::ChunkPruner, QueryChunk};
 use observability_deps::tracing::{debug, trace};
 use predicate::Predicate;
-use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};
 use schema::Schema;
 use snafu::{ResultExt, Snafu};
 use std::{
@@ -345,33 +347,26 @@ impl QuerierTable {
                 .extend(f.column_set.iter().copied().filter(|id| pk.contains(id)));
         }
 
-        // shuffle order to even catalog load, because cache hits/misses might be correlated w/ the order of the
-        // partitions.
-        //
-        // Note that we sort before shuffling to achieve a deterministic pseudo-random order
-        let mut partitions = should_cover.into_iter().collect::<Vec<_>>();
-        let mut rng = StdRng::seed_from_u64(cached_table.id.get() as u64);
-        partitions.sort_by(|(a_p_id, _a_cols), (b_p_id, _b_cols)| a_p_id.cmp(b_p_id));
-        partitions.shuffle(&mut rng);
-
-        futures::stream::iter(partitions)
-            .map(|(p_id, cover)| {
-                let catalog_cache = self.chunk_adapter.catalog_cache();
-                let span = span_recorder.child_span("fetch partition");
-
-                async move {
-                    let cover = cover.into_iter().collect::<Vec<_>>();
-                    let cached_partition = catalog_cache
-                        .partition()
-                        .get(Arc::clone(cached_table), p_id, &cover, span)
-                        .await;
-                    cached_partition.map(|p| (p_id, p))
-                }
+        // batch request all partitions
+        let requests = should_cover
+            .into_iter()
+            .map(|(id, cover)| PartitionRequest {
+                partition_id: id,
+                sort_key_should_cover: cover.into_iter().collect(),
             })
-            .buffer_unordered(CONCURRENT_CHUNK_CREATION_JOBS)
-            .filter_map(|x| async move { x })
-            .collect::<HashMap<_, _>>()
-            .await
+            .collect();
+        let partitions = self
+            .chunk_adapter
+            .catalog_cache()
+            .partition()
+            .get(
+                Arc::clone(cached_table),
+                requests,
+                span_recorder.child_span("fetch partitions"),
+            )
+            .await;
+
+        partitions.into_iter().map(|p| (p.id, p)).collect()
     }
 
     /// Get a chunk pruner that can be used to prune chunks retrieved via [`chunks`](Self::chunks)
@@ -891,12 +886,22 @@ mod tests {
 
         let chunks = querier_table.chunks().await.unwrap();
         assert_eq!(chunks.len(), 5);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 6);
+        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
         assert_cache_access_metric_count(&catalog.metric_registry, "partition", 2);
 
         let chunks = querier_table.chunks().await.unwrap();
         assert_eq!(chunks.len(), 5);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 6);
+        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
         assert_cache_access_metric_count(&catalog.metric_registry, "partition", 4);
 
         partition_2
@@ -904,12 +909,22 @@ mod tests {
                 TestParquetFileBuilder::default().with_line_protocol("table,tag1=a foo=1,bar=1 11"),
             )
             .await;
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 7);
+        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
 
         // file not visible yet
         let chunks = querier_table.chunks().await.unwrap();
         assert_eq!(chunks.len(), 5);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 7);
+        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            1,
+        );
         assert_cache_access_metric_count(&catalog.metric_registry, "partition", 6);
 
         // change inster ID => invalidates cache
@@ -918,7 +933,12 @@ mod tests {
             .with_ingester_partition(ingester_partition_builder.build());
         let chunks = querier_table.chunks().await.unwrap();
         assert_eq!(chunks.len(), 6);
-        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 8);
+        assert_catalog_access_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+        assert_catalog_access_metric_count(
+            &catalog.metric_registry,
+            "partition_get_by_id_batch",
+            2,
+        );
         assert_cache_access_metric_count(&catalog.metric_registry, "partition", 8);
     }
 

From 79bb1347e6ce8e7032c7c892bf1c6770e6a96e4b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Jul 2023 08:40:05 +0000
Subject: [PATCH 10/10] chore(deps): Bump sysinfo from 0.29.5 to 0.29.6 (#8311)

Bumps [sysinfo](https://github.com/GuillaumeGomez/sysinfo) from 0.29.5 to 0.29.6.
- [Changelog](https://github.com/GuillaumeGomez/sysinfo/blob/master/CHANGELOG.md)
- [Commits](https://github.com/GuillaumeGomez/sysinfo/commits)

---
updated-dependencies:
- dependency-name: sysinfo
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
---
 Cargo.lock         | 4 ++--
 tracker/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6621bce96f..29930c2597 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5683,9 +5683,9 @@ checksum = "d3543ca0810e71767052bdcdd5653f23998b192642a22c5164bfa6581e40a4a2"
 
 [[package]]
 name = "sysinfo"
-version = "0.29.5"
+version = "0.29.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b949f01f9c23823744b71e0060472ecbde578ef68cc2a9e46d114efd77c3034"
+checksum = "c7cb97a5a85a136d84e75d5c3cf89655090602efb1be0d8d5337b7e386af2908"
 dependencies = [
  "cfg-if",
  "core-foundation-sys",
diff --git a/tracker/Cargo.toml b/tracker/Cargo.toml
index ce9265489c..2d33754e81 100644
--- a/tracker/Cargo.toml
+++ b/tracker/Cargo.toml
@@ -19,7 +19,7 @@ tokio = { version = "1.29", features = ["macros", "parking_lot", "sync", "time"]
 tokio-util = { version = "0.7.8" }
 trace = { path = "../trace"}
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
-sysinfo = "0.29.5"
+sysinfo = "0.29.6"
 
 [dev-dependencies]
 tempfile = "3.7.0"