Merge branch 'main' into ntran/table_cli

2023-07-21 14:49:02 -04:00 · 2023-07-21 14:49:02 -04:00 · 144778430e
parent 2aff6a7495 b1c695d5a2
commit 144778430e
98 changed files with 3661 additions and 1449 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -121,8 +121,8 @@ license = "MIT OR Apache-2.0"
 [workspace.dependencies]
 arrow = { version = "43.0.0" }
 arrow-flight = { version = "43.0.0" }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false }
-datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b" }
+datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" }
 hashbrown = { version = "0.14.0" }
 object_store = { version = "0.6.0" }
--- a/cache_system/src/backend/policy/integration_tests.rs
+++ b/cache_system/src/backend/policy/integration_tests.rs
@ -315,7 +315,7 @@ struct TestStateTtlAndRefresh {
    ttl_provider: Arc<TestTtlProvider>,
    refresh_duration_provider: Arc<TestRefreshDurationProvider>,
    time_provider: Arc<MockProvider>,
-    loader: Arc<TestLoader<u8, String, ()>>,
+    loader: Arc<TestLoader<u8, (), String>>,
    notify_idle: Arc<Notify>,
 }
@ -365,7 +365,7 @@ struct TestStateLRUAndRefresh {
    size_estimator: Arc<TestSizeEstimator>,
    refresh_duration_provider: Arc<TestRefreshDurationProvider>,
    time_provider: Arc<MockProvider>,
-    loader: Arc<TestLoader<u8, String, ()>>,
+    loader: Arc<TestLoader<u8, (), String>>,
    pool: Arc<ResourcePool<TestSize>>,
    notify_idle: Arc<Notify>,
 }
@ -505,7 +505,7 @@ struct TestStateLruAndRefresh {
    size_estimator: Arc<TestSizeEstimator>,
    refresh_duration_provider: Arc<TestRefreshDurationProvider>,
    time_provider: Arc<MockProvider>,
-    loader: Arc<TestLoader<u8, String, ()>>,
+    loader: Arc<TestLoader<u8, (), String>>,
    notify_idle: Arc<Notify>,
 }
--- a/cache_system/src/backend/policy/refresh.rs
+++ b/cache_system/src/backend/policy/refresh.rs
@ -963,7 +963,7 @@ mod tests {
        metric_registry: metric::Registry,
        refresh_duration_provider: Arc<TestRefreshDurationProvider>,
        time_provider: Arc<MockProvider>,
-        loader: Arc<TestLoader<u8, String, ()>>,
+        loader: Arc<TestLoader<u8, (), String>>,
        notify_idle: Arc<Notify>,
    }
--- a/cache_system/src/cache/metrics.rs
+++ b/cache_system/src/cache/metrics.rs
@ -254,9 +254,10 @@ mod tests {
    use crate::{
        cache::{
            driver::CacheDriver,
-            test_util::{run_test_generic, AbortAndWaitExt, EnsurePendingExt, TestAdapter},
+            test_util::{run_test_generic, TestAdapter},
        },
        loader::test_util::TestLoader,
        test_util::{AbortAndWaitExt, EnsurePendingExt},
    };
    use super::*;
--- a/cache_system/src/cache/test_util.rs
+++ b/cache_system/src/cache/test_util.rs
@ -1,12 +1,11 @@
 use std::{sync::Arc, time::Duration};
-use async_trait::async_trait;
+use tokio::sync::Barrier;
 use futures::{Future, FutureExt};
 use tokio::{sync::Barrier, task::JoinHandle};
 use crate::{
    cache::{CacheGetStatus, CachePeekStatus},
    loader::test_util::TestLoader,
    test_util::{AbortAndWaitExt, EnsurePendingExt},
 };
 use super::Cache;
@ -461,60 +460,3 @@ where
    assert_eq!(res, String::from("foo"));
    assert_eq!(loader.loaded(), vec![(1, true)]);
 }
 #[async_trait]
 pub trait EnsurePendingExt {
    type Out;
    /// Ensure that the future is pending. In the pending case, try to pass the given barrier. Afterwards await the future again.
    ///
    /// This is helpful to ensure a future is in a pending state before continuing with the test setup.
    async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out;
 }
 #[async_trait]
 impl<F> EnsurePendingExt for F
 where
    F: Future + Send + Unpin,
 {
    type Out = F::Output;
    async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out {
        let mut fut = self.fuse();
        futures::select_biased! {
            _ = fut => panic!("fut should be pending"),
            _ = barrier.wait().fuse() => (),
        }
        fut.await
    }
 }
 #[async_trait]
 pub trait AbortAndWaitExt {
    /// Abort handle and wait for completion.
    ///
    /// Note that this is NOT just a "wait with timeout or panic". This extension is specific to [`JoinHandle`] and will:
    ///
    /// 1. Call [`JoinHandle::abort`].
    /// 2. Await the [`JoinHandle`] with a timeout (or panic if the timeout is reached).
    /// 3. Check that the handle returned a [`JoinError`] that signals that the tracked task was indeed cancelled and
    ///    didn't exit otherwise (either by finishing or by panicking).
    async fn abort_and_wait(self);
 }
 #[async_trait]
 impl<T> AbortAndWaitExt for JoinHandle<T>
 where
    T: std::fmt::Debug + Send,
 {
    async fn abort_and_wait(mut self) {
        self.abort();
        let join_err = tokio::time::timeout(Duration::from_secs(1), self)
            .await
            .expect("no timeout")
            .expect_err("handle was aborted and therefore MUST fail");
        assert!(join_err.is_cancelled());
    }
 }
--- a/cache_system/src/lib.rs
+++ b/cache_system/src/lib.rs
@ -24,3 +24,5 @@ pub mod cache;
 mod cancellation_safe_future;
 pub mod loader;
 pub mod resource_consumption;
 #[cfg(test)]
 mod test_util;
--- a/cache_system/src/loader/batch.rs
+++ b/cache_system/src/loader/batch.rs
@ -0,0 +1,485 @@
 //! Batching of loader request.
 use std::{
    collections::HashMap,
    fmt::Debug,
    future::Future,
    hash::Hash,
    sync::{
        atomic::{AtomicU64, Ordering},
        Arc,
    },
    task::Poll,
 };
 use async_trait::async_trait;
 use futures::FutureExt;
 use observability_deps::tracing::trace;
 use parking_lot::Mutex;
 use tokio::sync::oneshot::{channel, Sender};
 use crate::cancellation_safe_future::{CancellationSafeFuture, CancellationSafeFutureReceiver};
 use super::Loader;
 /// Batch [load](Loader::load) requests.
 ///
 /// Requests against this loader will be [pending](std::task::Poll::Pending) until [flush](BatchLoaderFlusher::flush) is
 /// called. To simplify the usage -- esp. in combination with [`Cache::get`] -- use [`BatchLoaderFlusherExt`].
 ///
 ///
 /// [`Cache::get`]: crate::cache::Cache::get
 #[derive(Debug)]
 pub struct BatchLoader<K, Extra, V, L>
 where
    K: Debug + Hash + Send + 'static,
    Extra: Debug + Send + 'static,
    V: Debug + Send + 'static,
    L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
 {
    inner: Arc<BatchLoaderInner<K, Extra, V, L>>,
 }
 impl<K, Extra, V, L> BatchLoader<K, Extra, V, L>
 where
    K: Debug + Hash + Send + 'static,
    Extra: Debug + Send + 'static,
    V: Debug + Send + 'static,
    L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
 {
    /// Create new batch loader based on a non-batched, vector-based one.
    pub fn new(inner: L) -> Self {
        Self {
            inner: Arc::new(BatchLoaderInner {
                inner,
                pending: Default::default(),
                job_id_counter: Default::default(),
                job_handles: Default::default(),
            }),
        }
    }
 }
 /// State of [`BatchLoader`].
 ///
 /// This is an extra struct so it can be wrapped into an [`Arc`] and shared with the futures that are spawned into
 /// [`CancellationSafeFuture`]
 #[derive(Debug)]
 struct BatchLoaderInner<K, Extra, V, L>
 where
    K: Debug + Hash + Send + 'static,
    Extra: Debug + Send + 'static,
    V: Debug + Send + 'static,
    L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
 {
    inner: L,
    pending: Mutex<Vec<(K, Extra, Sender<V>)>>,
    job_id_counter: AtomicU64,
    job_handles: Mutex<HashMap<u64, CancellationSafeFutureReceiver<()>>>,
 }
 /// Flush interface for [`BatchLoader`].
 ///
 /// This is a trait so you can [type-erase](https://en.wikipedia.org/wiki/Type_erasure) it by putting it into an
 /// [`Arc`],
 ///
 /// This trait is object-safe.
 #[async_trait]
 pub trait BatchLoaderFlusher: Debug + Send + Sync + 'static {
    /// Flush all batched requests.
    async fn flush(&self);
 }
 #[async_trait]
 impl BatchLoaderFlusher for Arc<dyn BatchLoaderFlusher> {
    async fn flush(&self) {
        self.as_ref().flush().await;
    }
 }
 #[async_trait]
 impl<K, Extra, V, L> BatchLoaderFlusher for BatchLoader<K, Extra, V, L>
 where
    K: Debug + Hash + Send + 'static,
    Extra: Debug + Send + 'static,
    V: Debug + Send + 'static,
    L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
 {
    async fn flush(&self) {
        trace!("flushing batch loader");
        let pending: Vec<_> = {
            let mut pending = self.inner.pending.lock();
            std::mem::take(pending.as_mut())
        };
        if pending.is_empty() {
            return;
        }
        let job_id = self.inner.job_id_counter.fetch_add(1, Ordering::SeqCst);
        let handle_recv = CancellationSafeFutureReceiver::default();
        {
            let mut job_handles = self.inner.job_handles.lock();
            job_handles.insert(job_id, handle_recv.clone());
        }
        let inner = Arc::clone(&self.inner);
        let fut = CancellationSafeFuture::new(
            async move {
                let mut keys = Vec::with_capacity(pending.len());
                let mut extras = Vec::with_capacity(pending.len());
                let mut senders = Vec::with_capacity(pending.len());
                for (k, extra, sender) in pending {
                    keys.push(k);
                    extras.push(extra);
                    senders.push(sender);
                }
                let values = inner.inner.load(keys, extras).await;
                assert_eq!(values.len(), senders.len());
                for (value, sender) in values.into_iter().zip(senders) {
                    sender.send(value).unwrap();
                }
                let mut job_handles = inner.job_handles.lock();
                job_handles.remove(&job_id);
            },
            handle_recv,
        );
        fut.await;
    }
 }
 #[async_trait]
 impl<K, Extra, V, L> Loader for BatchLoader<K, Extra, V, L>
 where
    K: Debug + Hash + Send + 'static,
    Extra: Debug + Send + 'static,
    V: Debug + Send + 'static,
    L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
 {
    type K = K;
    type Extra = Extra;
    type V = V;
    async fn load(&self, k: Self::K, extra: Self::Extra) -> Self::V {
        let (tx, rx) = channel();
        {
            let mut pending = self.inner.pending.lock();
            pending.push((k, extra, tx));
        }
        rx.await.unwrap()
    }
 }
 /// Extension trait for [`BatchLoaderFlusher`] because the methods on this extension trait are not object safe.
 #[async_trait]
 pub trait BatchLoaderFlusherExt {
    /// Try to poll all given futures and automatically [flush](BatchLoaderFlusher) if any of them end up in a pending state.
    ///
    /// This guarantees that the order of the results is identical to the order of the futures.
    async fn auto_flush<F>(&self, futures: Vec<F>) -> Vec<F::Output>
    where
        F: Future + Send,
        F::Output: Send;
 }
 #[async_trait]
 impl<B> BatchLoaderFlusherExt for B
 where
    B: BatchLoaderFlusher,
 {
    async fn auto_flush<F>(&self, futures: Vec<F>) -> Vec<F::Output>
    where
        F: Future + Send,
        F::Output: Send,
    {
        let mut futures = futures
            .into_iter()
            .map(|f| f.boxed())
            .enumerate()
            .collect::<Vec<_>>();
        let mut output: Vec<Option<F::Output>> = (0..futures.len()).map(|_| None).collect();
        while !futures.is_empty() {
            let mut pending = Vec::with_capacity(futures.len());
            for (idx, mut f) in futures.into_iter() {
                match futures::poll!(&mut f) {
                    Poll::Ready(res) => {
                        output[idx] = Some(res);
                    }
                    Poll::Pending => {
                        pending.push((idx, f));
                    }
                }
            }
            if !pending.is_empty() {
                self.flush().await;
            }
            futures = pending;
        }
        output
            .into_iter()
            .map(|o| o.expect("all futures finished"))
            .collect()
    }
 }
 #[cfg(test)]
 mod tests {
    use tokio::sync::Barrier;
    use crate::{
        cache::{driver::CacheDriver, Cache},
        loader::test_util::TestLoader,
        test_util::EnsurePendingExt,
    };
    use super::*;
    type TestLoaderT = Arc<TestLoader<Vec<u8>, Vec<bool>, Vec<String>>>;
    #[tokio::test]
    async fn test_flush_empty() {
        let (inner, batch) = setup();
        batch.flush().await;
        assert_eq!(inner.loaded(), vec![],);
    }
    #[tokio::test]
    async fn test_flush_manual() {
        let (inner, batch) = setup();
        let pending_barrier_1 = Arc::new(Barrier::new(2));
        let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
        let batch_captured = Arc::clone(&batch);
        let handle_1 = tokio::spawn(async move {
            batch_captured
                .load(1, true)
                .ensure_pending(pending_barrier_1_captured)
                .await
        });
        pending_barrier_1.wait().await;
        let pending_barrier_2 = Arc::new(Barrier::new(2));
        let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
        let batch_captured = Arc::clone(&batch);
        let handle_2 = tokio::spawn(async move {
            batch_captured
                .load(2, false)
                .ensure_pending(pending_barrier_2_captured)
                .await
        });
        pending_barrier_2.wait().await;
        inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
        batch.flush().await;
        assert_eq!(inner.loaded(), vec![(vec![1, 2], vec![true, false])],);
        assert_eq!(handle_1.await.unwrap(), String::from("foo"));
        assert_eq!(handle_2.await.unwrap(), String::from("bar"));
    }
    /// Simulate the following scenario:
    ///
    /// 1. load `1`, flush it, inner load starts processing `[1]`
    /// 2. load `2`, flush it, inner load starts processing `[2]`
    /// 3. inner loader returns result for `[2]`, batch loader returns that result as well
    /// 4. inner loader returns result for `[1]`, batch loader returns that result as well
    #[tokio::test]
    async fn test_concurrent_load() {
        let (inner, batch) = setup();
        let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
        inner.mock_next(vec![2], vec![String::from("bar")]);
        // set up first load
        let pending_barrier_1 = Arc::new(Barrier::new(2));
        let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
        let batch_captured = Arc::clone(&batch);
        let handle_1 = tokio::spawn(async move {
            batch_captured
                .load(1, true)
                .ensure_pending(pending_barrier_1_captured)
                .await
        });
        pending_barrier_1.wait().await;
        // flush first load, this is blocked by the load barrier
        let pending_barrier_2 = Arc::new(Barrier::new(2));
        let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
        let batch_captured = Arc::clone(&batch);
        let handle_2 = tokio::spawn(async move {
            batch_captured
                .flush()
                .ensure_pending(pending_barrier_2_captured)
                .await;
        });
        pending_barrier_2.wait().await;
        // set up second load
        let pending_barrier_3 = Arc::new(Barrier::new(2));
        let pending_barrier_3_captured = Arc::clone(&pending_barrier_3);
        let batch_captured = Arc::clone(&batch);
        let handle_3 = tokio::spawn(async move {
            batch_captured
                .load(2, false)
                .ensure_pending(pending_barrier_3_captured)
                .await
        });
        pending_barrier_3.wait().await;
        // flush 2nd load and get result
        batch.flush().await;
        assert_eq!(handle_3.await.unwrap(), String::from("bar"));
        // flush 1st load and get result
        load_barrier_1.wait().await;
        handle_2.await.unwrap();
        assert_eq!(handle_1.await.unwrap(), String::from("foo"));
        assert_eq!(
            inner.loaded(),
            vec![(vec![1], vec![true]), (vec![2], vec![false])],
        );
    }
    #[tokio::test]
    async fn test_cancel_flush() {
        let (inner, batch) = setup();
        let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
        // set up load
        let pending_barrier_1 = Arc::new(Barrier::new(2));
        let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
        let batch_captured = Arc::clone(&batch);
        let handle_1 = tokio::spawn(async move {
            batch_captured
                .load(1, true)
                .ensure_pending(pending_barrier_1_captured)
                .await
        });
        pending_barrier_1.wait().await;
        // flush load, this is blocked by the load barrier
        let pending_barrier_2 = Arc::new(Barrier::new(2));
        let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
        let batch_captured = Arc::clone(&batch);
        let handle_2 = tokio::spawn(async move {
            batch_captured
                .flush()
                .ensure_pending(pending_barrier_2_captured)
                .await;
        });
        pending_barrier_2.wait().await;
        // abort flush
        handle_2.abort();
        // flush load and get result
        load_barrier_1.wait().await;
        assert_eq!(handle_1.await.unwrap(), String::from("foo"));
        assert_eq!(inner.loaded(), vec![(vec![1], vec![true])],);
    }
    #[tokio::test]
    async fn test_cancel_load_and_flush() {
        let (inner, batch) = setup();
        let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
        // set up load
        let pending_barrier_1 = Arc::new(Barrier::new(2));
        let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
        let batch_captured = Arc::clone(&batch);
        let handle_1 = tokio::spawn(async move {
            batch_captured
                .load(1, true)
                .ensure_pending(pending_barrier_1_captured)
                .await
        });
        pending_barrier_1.wait().await;
        // flush load, this is blocked by the load barrier
        let pending_barrier_2 = Arc::new(Barrier::new(2));
        let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
        let batch_captured = Arc::clone(&batch);
        let handle_2 = tokio::spawn(async move {
            batch_captured
                .flush()
                .ensure_pending(pending_barrier_2_captured)
                .await;
        });
        pending_barrier_2.wait().await;
        // abort load and flush
        handle_1.abort();
        handle_2.abort();
        // unblock
        load_barrier_1.wait().await;
        // load was still driven to completion
        assert_eq!(inner.loaded(), vec![(vec![1], vec![true])],);
    }
    #[tokio::test]
    async fn test_auto_flush_with_loader() {
        let (inner, batch) = setup();
        inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
        assert_eq!(
            batch
                .auto_flush(vec![batch.load(1, true), batch.load(2, false)])
                .await,
            vec![String::from("foo"), String::from("bar")],
        );
        assert_eq!(inner.loaded(), vec![(vec![1, 2], vec![true, false])],);
    }
    #[tokio::test]
    async fn test_auto_flush_integration_with_cache_driver() {
        let (inner, batch) = setup();
        let cache = CacheDriver::new(Arc::clone(&batch), HashMap::new());
        inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
        inner.mock_next(vec![3], vec![String::from("baz")]);
        assert_eq!(
            batch
                .auto_flush(vec![cache.get(1, true), cache.get(2, false)])
                .await,
            vec![String::from("foo"), String::from("bar")],
        );
        assert_eq!(
            batch
                .auto_flush(vec![cache.get(2, true), cache.get(3, true)])
                .await,
            vec![String::from("bar"), String::from("baz")],
        );
        assert_eq!(
            inner.loaded(),
            vec![(vec![1, 2], vec![true, false]), (vec![3], vec![true])],
        );
    }
    fn setup() -> (TestLoaderT, Arc<BatchLoader<u8, bool, String, TestLoaderT>>) {
        let inner = TestLoaderT::default();
        let batch = Arc::new(BatchLoader::new(Arc::clone(&inner)));
        (inner, batch)
    }
 }
--- a/cache_system/src/loader/mod.rs
+++ b/cache_system/src/loader/mod.rs
@ -2,6 +2,7 @@
 use async_trait::async_trait;
 use std::{fmt::Debug, future::Future, hash::Hash, marker::PhantomData, sync::Arc};
 pub mod batch;
 pub mod metrics;
 #[cfg(test)]
--- a/cache_system/src/loader/test_util.rs
+++ b/cache_system/src/loader/test_util.rs
@ -14,7 +14,7 @@ enum TestLoaderResponse<V> {
 /// An easy-to-mock [`Loader`].
 #[derive(Debug, Default)]
-pub struct TestLoader<K = u8, V = String, Extra = bool>
+pub struct TestLoader<K = u8, Extra = bool, V = String>
 where
    K: Clone + Debug + Eq + Hash + Send + 'static,
    Extra: Clone + Debug + Send + 'static,
@ -25,7 +25,7 @@ where
    loaded: Mutex<Vec<(K, Extra)>>,
 }
-impl<K, V, Extra> TestLoader<K, V, Extra>
+impl<K, V, Extra> TestLoader<K, Extra, V>
 where
    K: Clone + Debug + Eq + Hash + Send + 'static,
    Extra: Clone + Debug + Send + 'static,
@ -93,7 +93,7 @@ where
    }
 }
-impl<K, V, Extra> Drop for TestLoader<K, V, Extra>
+impl<K, Extra, V> Drop for TestLoader<K, Extra, V>
 where
    K: Clone + Debug + Eq + Hash + Send + 'static,
    Extra: Clone + Debug + Send + 'static,
@ -110,15 +110,15 @@ where
 }
 #[async_trait]
-impl<K, V, Extra> Loader for TestLoader<K, V, Extra>
+impl<K, V, Extra> Loader for TestLoader<K, Extra, V>
 where
    K: Clone + Debug + Eq + Hash + Send + 'static,
    Extra: Clone + Debug + Send + 'static,
    V: Clone + Debug + Send + 'static,
 {
    type K = K;
    type V = V;
    type Extra = Extra;
    type V = V;
    async fn load(&self, k: Self::K, extra: Self::Extra) -> Self::V {
        self.loaded.lock().push((k.clone(), extra));
@ -163,7 +163,7 @@ mod tests {
    #[tokio::test]
    #[should_panic(expected = "entry not mocked")]
    async fn test_loader_panic_entry_unknown() {
-        let loader = TestLoader::<u8, String, ()>::default();
+        let loader = TestLoader::<u8, (), String>::default();
        loader.load(1, ()).await;
    }
@ -179,14 +179,14 @@ mod tests {
    #[test]
    #[should_panic(expected = "mocked response left")]
    fn test_loader_panic_requests_left() {
-        let loader = TestLoader::<u8, String, ()>::default();
+        let loader = TestLoader::<u8, (), String>::default();
        loader.mock_next(1, String::from("foo"));
    }
    #[test]
    #[should_panic(expected = "panic-by-choice")]
    fn test_loader_no_double_panic() {
-        let loader = TestLoader::<u8, String, ()>::default();
+        let loader = TestLoader::<u8, (), String>::default();
        loader.mock_next(1, String::from("foo"));
        panic!("panic-by-choice");
    }
--- a/cache_system/src/test_util.rs
+++ b/cache_system/src/test_util.rs
@ -0,0 +1,62 @@
 use std::{future::Future, sync::Arc, time::Duration};
 use async_trait::async_trait;
 use futures::FutureExt;
 use tokio::{sync::Barrier, task::JoinHandle};
 #[async_trait]
 pub trait EnsurePendingExt {
    type Out;
    /// Ensure that the future is pending. In the pending case, try to pass the given barrier. Afterwards await the future again.
    ///
    /// This is helpful to ensure a future is in a pending state before continuing with the test setup.
    async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out;
 }
 #[async_trait]
 impl<F> EnsurePendingExt for F
 where
    F: Future + Send + Unpin,
 {
    type Out = F::Output;
    async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out {
        let mut fut = self.fuse();
        futures::select_biased! {
            _ = fut => panic!("fut should be pending"),
            _ = barrier.wait().fuse() => (),
        }
        fut.await
    }
 }
 #[async_trait]
 pub trait AbortAndWaitExt {
    /// Abort handle and wait for completion.
    ///
    /// Note that this is NOT just a "wait with timeout or panic". This extension is specific to [`JoinHandle`] and will:
    ///
    /// 1. Call [`JoinHandle::abort`].
    /// 2. Await the [`JoinHandle`] with a timeout (or panic if the timeout is reached).
    /// 3. Check that the handle returned a [`JoinError`] that signals that the tracked task was indeed cancelled and
    ///    didn't exit otherwise (either by finishing or by panicking).
    async fn abort_and_wait(self);
 }
 #[async_trait]
 impl<T> AbortAndWaitExt for JoinHandle<T>
 where
    T: std::fmt::Debug + Send,
 {
    async fn abort_and_wait(mut self) {
        self.abort();
        let join_err = tokio::time::timeout(Duration::from_secs(1), self)
            .await
            .expect("no timeout")
            .expect_err("handle was aborted and therefore MUST fail");
        assert!(join_err.is_cancelled());
    }
 }
--- a/clap_blocks/Cargo.toml
+++ b/clap_blocks/Cargo.toml
@ -21,7 +21,7 @@ uuid = { version = "1", features = ["v4"] }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 [dev-dependencies]
-tempfile = "3.6.0"
+tempfile = "3.7.0"
 test_helpers = { path = "../test_helpers" }
 [features]
--- a/compactor/Cargo.toml
+++ b/compactor/Cargo.toml
@ -9,6 +9,7 @@ license.workspace = true
 async-trait = "0.1.71"
 backoff = { path = "../backoff" }
 bytes = "1.4"
 chrono = { version = "0.4", default-features = false }
 compactor_scheduler = { path = "../compactor_scheduler" }
 datafusion = { workspace = true }
 data_types = { path = "../data_types" }
--- a/compactor/src/components/df_plan_exec/dedicated.rs
+++ b/compactor/src/components/df_plan_exec/dedicated.rs
@ -69,7 +69,7 @@ mod tests {
        let err = stream.try_collect::<Vec<_>>().await.unwrap_err();
        assert_eq!(
            err.to_string(),
-            "Join Error (panic)\ncaused by\nExternal error: foo"
+            "Join Error (panic)\ncaused by\nExternal error: Panic: foo"
        );
    }
 }
--- a/compactor/src/components/ir_planner/logging.rs
+++ b/compactor/src/components/ir_planner/logging.rs
@ -2,6 +2,7 @@ use std::{fmt::Display, sync::Arc};
 use data_types::{CompactionLevel, ParquetFile};
 use observability_deps::tracing::info;
 use parquet_file::ParquetFilePath;
 use uuid::Uuid;
 use crate::{
@ -48,14 +49,21 @@ where
        target_level: CompactionLevel,
        split_or_compact: FilesToSplitOrCompact,
        object_store_ids: Vec<Uuid>,
        object_store_paths: Vec<ParquetFilePath>,
    ) -> Vec<PlanIR> {
-        self.inner
+        self.inner.create_plans(
-            .create_plans(partition, target_level, split_or_compact, object_store_ids)
+            partition,
            target_level,
            split_or_compact,
            object_store_ids,
            object_store_paths,
        )
    }
    fn compact_plan(
        &self,
        files: Vec<ParquetFile>,
        object_store_paths: Vec<ParquetFilePath>,
        object_store_ids: Vec<Uuid>,
        reason: CompactReason,
        partition: Arc<PartitionInfo>,
@ -65,9 +73,14 @@ where
        let n_input_files = files.len();
        let column_count = partition.column_count();
        let input_file_size_bytes = files.iter().map(|f| f.file_size_bytes).sum::<i64>();
-        let plan =
+        let plan = self.inner.compact_plan(
-            self.inner
+            files,
-                .compact_plan(files, object_store_ids, reason, partition, compaction_level);
+            object_store_paths,
            object_store_ids,
            reason,
            partition,
            compaction_level,
        );
        info!(
            partition_id = partition_id.get(),
@ -87,6 +100,7 @@ where
    fn split_plan(
        &self,
        file_to_split: FileToSplit,
        object_store_path: ParquetFilePath,
        object_store_id: Uuid,
        reason: SplitReason,
        partition: Arc<PartitionInfo>,
@ -98,6 +112,7 @@ where
        let input_file_size_bytes = file_to_split.file.file_size_bytes;
        let plan = self.inner.split_plan(
            file_to_split,
            object_store_path,
            object_store_id,
            reason,
            partition,
--- a/compactor/src/components/ir_planner/mod.rs
+++ b/compactor/src/components/ir_planner/mod.rs
@ -4,6 +4,7 @@ use std::{
 };
 use data_types::{CompactionLevel, ParquetFile};
 use parquet_file::ParquetFilePath;
 use uuid::Uuid;
 pub mod logging;
@ -24,12 +25,14 @@ pub trait IRPlanner: Debug + Display + Send + Sync {
        target_level: CompactionLevel,
        split_or_compact: FilesToSplitOrCompact,
        object_store_ids: Vec<Uuid>,
        object_store_paths: Vec<ParquetFilePath>,
    ) -> Vec<PlanIR>;
    /// Build a plan to compact give files
    fn compact_plan(
        &self,
        files: Vec<ParquetFile>,
        paths: Vec<ParquetFilePath>,
        object_store_ids: Vec<Uuid>,
        reason: CompactReason,
        partition: Arc<PartitionInfo>,
@ -40,6 +43,7 @@ pub trait IRPlanner: Debug + Display + Send + Sync {
    fn split_plan(
        &self,
        file_to_split: FileToSplit,
        path: ParquetFilePath,
        object_store_id: Uuid,
        reason: SplitReason,
        partition: Arc<PartitionInfo>,
--- a/compactor/src/components/ir_planner/planner_v1.rs
+++ b/compactor/src/components/ir_planner/planner_v1.rs
@ -1,6 +1,7 @@
 use std::{fmt::Display, sync::Arc};
 use data_types::{ChunkOrder, CompactionLevel, ParquetFile, Timestamp, TimestampMinMax};
 use parquet_file::ParquetFilePath;
 use uuid::Uuid;
 use crate::{
@ -125,21 +126,31 @@ impl IRPlanner for V1IRPlanner {
        target_level: CompactionLevel,
        split_or_compact: FilesToSplitOrCompact,
        object_store_ids: Vec<Uuid>,
        object_store_paths: Vec<ParquetFilePath>,
    ) -> Vec<PlanIR> {
        match split_or_compact {
            FilesToSplitOrCompact::Compact(files, reason) => {
-                vec![self.compact_plan(files, object_store_ids, reason, partition, target_level)]
+                vec![self.compact_plan(
                    files,
                    object_store_paths,
                    object_store_ids,
                    reason,
                    partition,
                    target_level,
                )]
            }
            FilesToSplitOrCompact::Split(files, reason) => {
                files
                    .into_iter()
                    .zip(object_store_ids)
-                    .map(|(file_to_split, object_store_id)| {
+                    .zip(object_store_paths)
                    .map(|((file_to_split, object_store_id), object_store_path)| {
                        // target level of a split file is the same as its level
                        let target_level = file_to_split.file.compaction_level;
                        self.split_plan(
                            file_to_split,
                            object_store_path,
                            object_store_id,
                            reason,
                            Arc::clone(&partition),
@ -157,6 +168,7 @@ impl IRPlanner for V1IRPlanner {
    fn compact_plan(
        &self,
        files: Vec<ParquetFile>,
        paths: Vec<ParquetFilePath>,
        object_store_ids: Vec<Uuid>,
        reason: CompactReason,
        _partition: Arc<PartitionInfo>,
@ -188,13 +200,15 @@ impl IRPlanner for V1IRPlanner {
        let files = files
            .into_iter()
            .zip(object_store_ids)
-            .map(|(file, object_store_id)| {
+            .zip(paths)
            .map(|((file, object_store_id), path)| {
                let order = order(file.compaction_level, target_level, file.max_l0_created_at);
                FileIR {
                    file: ParquetFile {
                        object_store_id,
                        ..file
                    },
                    path,
                    order,
                }
            })
@ -248,6 +262,7 @@ impl IRPlanner for V1IRPlanner {
    fn split_plan(
        &self,
        file_to_split: FileToSplit,
        path: ParquetFilePath,
        object_store_id: Uuid,
        reason: SplitReason,
        _partition: Arc<PartitionInfo>,
@ -261,6 +276,7 @@ impl IRPlanner for V1IRPlanner {
                object_store_id,
                ..file
            },
            path,
            order,
        };
--- a/compactor/src/components/parquet_file_sink/dedicated.rs
+++ b/compactor/src/components/parquet_file_sink/dedicated.rs
@ -102,6 +102,6 @@ mod tests {
            .store(stream, partition, level, max_l0_created_at)
            .await
            .unwrap_err();
-        assert_eq!(err.to_string(), "External error: foo",);
+        assert_eq!(err.to_string(), "External error: Panic: foo",);
    }
 }
--- a/compactor/src/components/scratchpad/mod.rs
+++ b/compactor/src/components/scratchpad/mod.rs
@ -49,6 +49,7 @@ pub trait ScratchpadGen: Debug + Display + Send + Sync {
 /// SMALLER than the uncompressed Arrow data during compaction itself.
 #[async_trait]
 pub trait Scratchpad: Debug + Send + Sync + 'static {
    fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
    async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
    async fn make_public(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
    async fn clean_from_scratchpad(&self, files: &[ParquetFilePath]);
--- a/compactor/src/components/scratchpad/noop.rs
+++ b/compactor/src/components/scratchpad/noop.rs
@ -33,6 +33,10 @@ struct NoopScratchpad;
 #[async_trait]
 impl Scratchpad for NoopScratchpad {
    fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
        files.iter().map(|f| f.objest_store_id()).collect()
    }
    async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
        files.iter().map(|f| f.objest_store_id()).collect()
    }
--- a/compactor/src/components/scratchpad/prod.rs
+++ b/compactor/src/components/scratchpad/prod.rs
@ -178,6 +178,11 @@ impl Drop for ProdScratchpad {
 #[async_trait]
 impl Scratchpad for ProdScratchpad {
    fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
        let (_, uuids) = self.apply_mask(files);
        uuids
    }
    async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
        let (files_to, uuids) = self.apply_mask(files);
        let (files_from, files_to) = self.check_known(files, &files_to, false);
@ -323,8 +328,11 @@ mod tests {
        assert_content(&store_scratchpad, []).await;
        assert_content(&store_output, []).await;
        let early_get_uuids = pad.uuids(&[f1.clone(), f2.clone()]);
        let uuids = pad.load_to_scratchpad(&[f1.clone(), f2.clone()]).await;
        assert_eq!(uuids.len(), 2);
        assert_eq!(early_get_uuids, uuids);
        let f1_masked = f1.clone().with_object_store_id(uuids[0]);
        let f2_masked = f2.clone().with_object_store_id(uuids[1]);
--- a/compactor/src/driver.rs
+++ b/compactor/src/driver.rs
@ -1,7 +1,9 @@
 use std::{num::NonZeroUsize, sync::Arc, time::Duration};
 use chrono::Utc;
 use data_types::{CompactionLevel, ParquetFile, ParquetFileParams, PartitionId};
 use futures::{stream, StreamExt, TryStreamExt};
 use iox_query::exec::query_tracing::send_metrics_to_tracing;
 use observability_deps::tracing::info;
 use parquet_file::ParquetFilePath;
 use tokio::sync::watch::Sender;
@ -17,7 +19,7 @@ use crate::{
        Components,
    },
    error::{DynError, ErrorKind, SimpleError},
-    file_classification::{FileClassification, FilesForProgress, FilesToSplitOrCompact},
+    file_classification::{FileClassification, FilesForProgress},
    partition_info::PartitionInfo,
    PlanIR, RoundInfo,
 };
@ -301,8 +303,6 @@ async fn execute_branch(
    // throw away the compaction work we've done.
    let saved_parquet_file_state = SavedParquetFileState::from(&branch);
    let input_paths: Vec<ParquetFilePath> = branch.iter().map(ParquetFilePath::from).collect();
    // Identify the target level and files that should be
    // compacted together, upgraded, and kept for next round of
    // compaction
@ -329,105 +329,128 @@ async fn execute_branch(
    }
    let FilesForProgress {
-        upgrade,
+        mut upgrade,
        split_or_compact,
    } = files_to_make_progress_on;
-    // Compact & Split
+    let paths = split_or_compact.file_input_paths();
-    let created_file_params = run_plans(
+    let object_store_ids = scratchpad_ctx.uuids(&paths);
-        span.child("run_plans"),
+    let plans = components.ir_planner.create_plans(
-        split_or_compact.clone(),
+        Arc::clone(&partition_info),
        &partition_info,
        &components,
        target_level,
-        Arc::clone(&df_semaphore),
+        split_or_compact.clone(),
-        Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
+        object_store_ids,
-    )
+        paths,
-    .await?;
+    );
-    // inputs can be removed from the scratchpad as soon as we're done with compaction.
+    let mut files_next: Vec<ParquetFile> = Vec::new();
    scratchpad_ctx.clean_from_scratchpad(&input_paths).await;
-    // upload files to real object store
+    // The number of plans is often small (1), but can be thousands, especially in vertical splitting
-    let upload_span = span.child("upload_objects");
+    // scenarios when the partition is highly backlogged.  So we chunk the plans into groups to control
-    let created_file_params = upload_files_to_object_store(
+    // memory usage (all files for all plans in a chunk are loaded to the scratchpad at once), and to
-        created_file_params,
+    // allow incremental catalog & progress updates.  But the chunk size should still be large enough
-        Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
+    // to facilitate concurrency in plan execution, which can be accomplished with a small multiple on
-    )
+    // the concurrency limit.
-    .await;
+    let mut chunks = plans.into_iter().peekable();
-    drop(upload_span);
+    while chunks.peek().is_some() {
        // 4x run_plans' concurrency limit will allow adequate concurrency.
        let chunk: Vec<PlanIR> = chunks
            .by_ref()
            .take(df_semaphore.total_permits() * 4)
            .collect();
-    for file_param in &created_file_params {
+        let files_to_delete = chunk
-        info!(
+            .iter()
-            partition_id = partition_info.partition_id.get(),
+            .flat_map(|plan| plan.input_parquet_files())
-            uuid = file_param.object_store_id.to_string(),
+            .collect();
            bytes = file_param.file_size_bytes,
            "uploaded file to objectstore",
        );
    }
-    let created_file_paths: Vec<ParquetFilePath> = created_file_params
+        // Compact & Split
-        .iter()
+        let created_file_params = run_plans(
-        .map(ParquetFilePath::from)
+            span.child("run_plans"),
-        .collect();
+            chunk,
            &partition_info,
            &components,
            Arc::clone(&df_semaphore),
            Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
        )
        .await?;
-    // conditionally (if not shaddow mode) remove the newly created files from the scratchpad.
+        // upload files to real object store
-    scratchpad_ctx
+        let upload_span = span.child("upload_objects");
-        .clean_written_from_scratchpad(&created_file_paths)
+        let created_file_params = upload_files_to_object_store(
            created_file_params,
            Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
        )
        .await;
        drop(upload_span);
        for file_param in &created_file_params {
            info!(
                partition_id = partition_info.partition_id.get(),
                uuid = file_param.object_store_id.to_string(),
                bytes = file_param.file_size_bytes,
                "uploaded file to objectstore",
            );
        }
        let created_file_paths: Vec<ParquetFilePath> = created_file_params
            .iter()
            .map(ParquetFilePath::from)
            .collect();
        // conditionally (if not shaddow mode) remove the newly created files from the scratchpad.
        scratchpad_ctx
            .clean_written_from_scratchpad(&created_file_paths)
            .await;
        // Update the catalog to reflect the newly created files, soft delete the compacted
        // files and update the upgraded files
        let (created_files, upgraded_files) = update_catalog(
            Arc::clone(&components),
            partition_id,
            &saved_parquet_file_state,
            files_to_delete,
            upgrade,
            created_file_params,
            target_level,
        )
        .await;
-    // Update the catalog to reflect the newly created files, soft delete the compacted
+        // we only need to upgrade files on the first iteration, so empty the upgrade list for next loop.
-    // files and update the upgraded files
+        upgrade = Vec::new();
    let files_to_delete = split_or_compact.into_files();
    let (created_files, upgraded_files) = update_catalog(
        Arc::clone(&components),
        partition_id,
        saved_parquet_file_state,
        files_to_delete,
        upgrade,
        created_file_params,
        target_level,
    )
    .await;
-    // Report to `timeout_with_progress_checking` that some progress has been made; stop
+        // Report to `timeout_with_progress_checking` that some progress has been made; stop
-    // if sending this signal fails because something has gone terribly wrong for the other
+        // if sending this signal fails because something has gone terribly wrong for the other
-    // end of the channel to not be listening anymore.
+        // end of the channel to not be listening anymore.
-    if let Err(e) = transmit_progress_signal.send(true) {
+        if let Err(e) = transmit_progress_signal.send(true) {
-        return Err(Box::new(e));
+            return Err(Box::new(e));
        }
        // track this chunk files to return later
        files_next.extend(created_files);
        files_next.extend(upgraded_files);
    }
    // Extend created files, upgraded files and files_to_keep to files_next
    let mut files_next = created_files;
    files_next.extend(upgraded_files);
    files_next.extend(files_to_keep);
    Ok(files_next)
 }
 /// Compact or split given files
 async fn run_plans(
    span: SpanRecorder,
-    split_or_compact: FilesToSplitOrCompact,
+    plans: Vec<PlanIR>,
    partition_info: &Arc<PartitionInfo>,
    components: &Arc<Components>,
    target_level: CompactionLevel,
    df_semaphore: Arc<InstrumentedAsyncSemaphore>,
    scratchpad_ctx: Arc<dyn Scratchpad>,
 ) -> Result<Vec<ParquetFileParams>, DynError> {
-    // stage files
+    let paths: Vec<ParquetFilePath> = plans.iter().flat_map(|plan| plan.input_paths()).collect();
    let download_span = span.child("download_objects");
    let input_uuids_inpad = scratchpad_ctx
        .load_to_scratchpad(&split_or_compact.file_input_paths())
        .await;
    drop(download_span);
-    let plans = components.ir_planner.create_plans(
+    // stage files.  This could move to execute_plan to reduce peak scratchpad memory use, but that would
-        Arc::clone(partition_info),
+    // cost some concurrency in object downloads.
-        target_level,
+    let download_span = span.child("download_objects");
-        split_or_compact,
+    let _ = scratchpad_ctx.load_to_scratchpad(&paths).await;
-        input_uuids_inpad,
+    drop(download_span);
    );
    info!(
        partition_id = partition_info.partition_id.get(),
@ -448,6 +471,7 @@ async fn run_plans(
            partition_info,
            components,
            Arc::clone(&df_semaphore),
            Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
        )
    })
    .buffer_unordered(df_semaphore.total_permits())
@ -463,6 +487,7 @@ async fn execute_plan(
    partition_info: &Arc<PartitionInfo>,
    components: &Arc<Components>,
    df_semaphore: Arc<InstrumentedAsyncSemaphore>,
    scratchpad_ctx: Arc<dyn Scratchpad>,
 ) -> Result<Vec<ParquetFileParams>, DynError> {
    span.set_metadata("input_files", plan_ir.input_files().len().to_string());
    span.set_metadata("input_bytes", plan_ir.input_bytes().to_string());
@ -508,12 +533,14 @@ async fn execute_plan(
            "job semaphore acquired",
        );
-        let df_span = span.child("data_fusion");
+        let df_span = span.child_span("data_fusion");
        let plan = components
            .df_planner
            .plan(&plan_ir, Arc::clone(partition_info))
            .await?;
-        let streams = components.df_plan_exec.exec(plan);
+        let streams = components.df_plan_exec.exec(Arc::<
            dyn datafusion::physical_plan::ExecutionPlan,
        >::clone(&plan));
        let job = components.parquet_files_sink.stream_into_file_sink(
            streams,
            Arc::clone(partition_info),
@ -524,8 +551,18 @@ async fn execute_plan(
        // TODO: react to OOM and try to divide branch
        let res = job.await;
        if let Some(span) = &df_span {
            send_metrics_to_tracing(Utc::now(), span, plan.as_ref(), true);
        };
        drop(permit);
        drop(df_span);
        // inputs can be removed from the scratchpad as soon as we're done with compaction.
        scratchpad_ctx
            .clean_from_scratchpad(&plan_ir.input_paths())
            .await;
        info!(
            partition_id = partition_info.partition_id.get(),
            plan_id, "job semaphore released",
@ -580,7 +617,7 @@ async fn fetch_and_save_parquet_file_state(
 async fn update_catalog(
    components: Arc<Components>,
    partition_id: PartitionId,
-    saved_parquet_file_state: SavedParquetFileState,
+    saved_parquet_file_state: &SavedParquetFileState,
    files_to_delete: Vec<ParquetFile>,
    files_to_upgrade: Vec<ParquetFile>,
    file_params_to_create: Vec<ParquetFileParams>,
@ -592,7 +629,7 @@ async fn update_catalog(
    // Right now this only logs; in the future we might decide not to commit these changes
    let _ignore = components
        .changed_files_filter
-        .apply(&saved_parquet_file_state, &current_parquet_file_state);
+        .apply(saved_parquet_file_state, &current_parquet_file_state);
    let created_ids = components
        .commit
--- a/compactor/src/plan_ir.rs
+++ b/compactor/src/plan_ir.rs
@ -1,6 +1,7 @@
 use std::fmt::Display;
 use data_types::{ChunkOrder, CompactionLevel, ParquetFile};
 use parquet_file::ParquetFilePath;
 use crate::file_classification::{CompactReason, NoneReason, SplitReason};
@ -78,6 +79,22 @@ impl PlanIR {
        }
    }
    /// return the ParquetFiles that will be compacted together
    pub fn input_parquet_files(&self) -> Vec<ParquetFile> {
        self.input_files()
            .iter()
            .map(|ir| ir.file.clone())
            .collect::<Vec<_>>()
    }
    /// return the paths of the input files that will be compacted together
    pub fn input_paths(&self) -> Vec<ParquetFilePath> {
        self.input_files()
            .iter()
            .map(|ir| ir.path.clone())
            .collect::<Vec<_>>()
    }
    /// return the total bytes of the input files that will be compacted together
    pub fn input_bytes(&self) -> i64 {
        self.input_files()
@ -109,5 +126,6 @@ impl Display for PlanIR {
 #[derive(Debug)]
 pub struct FileIR {
    pub file: ParquetFile,
    pub path: ParquetFilePath,
    pub order: ChunkOrder,
 }
--- a/compactor/tests/integration.rs
+++ b/compactor/tests/integration.rs
@ -460,7 +460,7 @@ async fn test_partition_fail() {
        &setup,
        [(
            setup.partition_info.partition_id,
-            "serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: foo",
+            "serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: Panic: foo",
        )],
    )
    .await;
--- a/data_types/Cargo.toml
+++ b/data_types/Cargo.toml
@ -17,7 +17,7 @@ once_cell = "1"
 ordered-float = "3"
 schema = { path = "../schema" }
 sha2 = "0.10"
-sqlx = { version = "0.6", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
+sqlx = { version = "0.7.1", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
 thiserror = "1.0.43"
 uuid = { version = "1", features = ["v4"] }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
--- a/data_types/src/columns.rs
+++ b/data_types/src/columns.rs
@ -4,7 +4,6 @@ use super::TableId;
 use generated_types::influxdata::iox::schema::v1 as proto;
 use influxdb_line_protocol::FieldValue;
 use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema};
 use sqlx::postgres::PgHasArrayType;
 use std::{
    collections::{BTreeMap, BTreeSet, HashMap},
    convert::TryFrom,
@ -26,12 +25,6 @@ impl ColumnId {
    }
 }
 impl PgHasArrayType for ColumnId {
    fn array_type_info() -> sqlx::postgres::PgTypeInfo {
        <i64 as PgHasArrayType>::array_type_info()
    }
 }
 /// Column definitions for a table indexed by their name
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
@ -328,7 +321,7 @@ impl TryFrom<proto::column_schema::ColumnType> for ColumnType {
 /// Set of columns.
 #[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)]
-#[sqlx(transparent)]
+#[sqlx(transparent, no_pg_array)]
 pub struct ColumnSet(Vec<ColumnId>);
 impl ColumnSet {
--- a/data_types/src/partition_template.rs
+++ b/data_types/src/partition_template.rs
@ -244,7 +244,7 @@ pub static PARTITION_BY_DAY_PROTO: Lazy<Arc<proto::PartitionTemplate>> = Lazy::n
 /// A partition template specified by a namespace record.
 #[derive(Debug, PartialEq, Clone, Default, sqlx::Type)]
-#[sqlx(transparent)]
+#[sqlx(transparent, no_pg_array)]
 pub struct NamespacePartitionTemplateOverride(Option<serialization::Wrapper>);
 impl TryFrom<proto::PartitionTemplate> for NamespacePartitionTemplateOverride {
@ -259,7 +259,7 @@ impl TryFrom<proto::PartitionTemplate> for NamespacePartitionTemplateOverride {
 /// A partition template specified by a table record.
 #[derive(Debug, PartialEq, Eq, Clone, Default, sqlx::Type)]
-#[sqlx(transparent)]
+#[sqlx(transparent, no_pg_array)]
 pub struct TablePartitionTemplateOverride(Option<serialization::Wrapper>);
 impl TablePartitionTemplateOverride {
--- a/deny.toml
+++ b/deny.toml
@ -7,14 +7,6 @@ yanked = "deny"
 unmaintained = "warn"
 notice = "warn"
 ignore = [
    # "It was sometimes possible for SQLite versions >= 1.0.12, < 3.39.2 to allow an array-bounds overflow when large
    # string were input into SQLite's printf function."
    #
    # We are not using `printf` with untrusted inputs.
    #
    # This is currently blocked by upstream:
    # https://github.com/launchbadge/sqlx/issues/2346
    "RUSTSEC-2022-0090",
 ]
 git-fetch-with-cli = true
--- a/docs/README.md
+++ b/docs/README.md
@ -51,3 +51,4 @@ We hold monthly Tech Talks that explain the project's technical underpinnings. Y
 * [Querier <> Ingester Query Protocol](ingester_querier_protocol.md)
 * [Underground Guide to Running IOx Locally](underground_guide.md)
 * [Query Processing](query_processing.md)
 * [How to Reproduce and Debug Production Data Locally](debug.md)
--- a/docs/debug.md
+++ b/docs/debug.md
@ -0,0 +1,105 @@
 # How to Reproduce and Debug Production Data Locally
 Here is a way to reproduce issues using production data locally with all in one mode.
 ## Summary of steps
 Reproduce the error locally by building a local catalog from the output of `influxdb_iox remote store get-table`:
 1. Download contents of table_name into a directory named 'table_name'
   ```
   influxdb_iox remote store get-table <namespace> <table_name>
   ```
 1. Create a catalog and object_store in /tmp/data_dir
   ```
   influxdb_iox debug build-catalog <table_dir> /tmp/data_dir
   ```
 1. Start iox using this data directory (you can now query `table_name` locally):
   ```
   influxdb_iox --data-dir /tmp/data_dir
   ```
 ## Demonstration
 ## Setup
 Running `influxdb_iox` and getting local telegraph data
 ```shell
 $ influxdb_iox namespace list
 [
  {
    "id": "1",
    "name": "26f7e5a4b7be365b_917b97a92e883afc",
    "maxTables": 500,
    "maxColumnsPerTable": 200
  }
 ]
 ```
 ## Export `cpu` table:
 ```shell
 $ influxdb_iox remote store get-table 26f7e5a4b7be365b_917b97a92e883afc cpu
 found 11 Parquet files, exporting...
 downloading file 1 of 11 (1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet)...
 downloading file 2 of 11 (1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet)...
 downloading file 3 of 11 (4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet)...
 downloading file 4 of 11 (be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet)...
 downloading file 5 of 11 (5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet)...
 downloading file 6 of 11 (a8f7be33-42b6-4353-8735-51b245196d39.4.parquet)...
 downloading file 7 of 11 (3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet)...
 downloading file 8 of 11 (081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet)...
 downloading file 9 of 11 (f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet)...
 downloading file 10 of 11 (1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet)...
 downloading file 11 of 11 (3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet)...
 Done.
 $ ls cpu/
 081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet       1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet       4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet       be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet
 081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet.json  1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet.json  4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet.json  be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet.json
 1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet       3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet       5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet       f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet
 1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet.json  3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet.json  5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet.json  f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet.json
 1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet       3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet       a8f7be33-42b6-4353-8735-51b245196d39.4.parquet       partition.4.json
 1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet.json  3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet.json  a8f7be33-42b6-4353-8735-51b245196d39.4.parquet.json  table.1.json
 ```
 ## Build a new `new_data_dir` from export:
 ```shell
 $ influxdb_iox debug build-catalog cpu new_data_dir
 Beginning catalog / object_store build from "cpu" in "new_data_dir"....
 Done
 $ ls new_data_dir/
 catalog.sqlite  object_store/
 ```
 ## Run `influxdb_iox` with `new_data_dir`:
 ```shell
 $ influxdb_iox --data-dir new_data_dir/
 ```
 And in a separate shell, you can query the data and see it is present:
 ```shell
 $ influxdb_iox query 26f7e5a4b7be365b_917b97a92e883afc 'select * from cpu limit 10';
 +-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
 | cpu       | host                | time                 | usage_guest | usage_guest_nice | usage_idle        | usage_iowait | usage_irq | usage_nice | usage_softirq | usage_steal | usage_system       | usage_user         |
 +-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:13:40Z | 0.0         | 0.0              | 95.6668753914105  | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.4902943018170824 | 2.8428303068453085 |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:13:50Z | 0.0         | 0.0              | 95.9551687433697  | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.4213261536472683 | 2.6235051029648098 |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:00Z | 0.0         | 0.0              | 96.52108622167991 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.37029157802418   | 2.108622199968126  |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:10Z | 0.0         | 0.0              | 95.26819803491809 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.752519246414341  | 2.979282718922596  |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:20Z | 0.0         | 0.0              | 95.28402329791422 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.6408843239063593 | 3.0750923780335997 |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:30Z | 0.0         | 0.0              | 93.97484827633119 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 2.0271538509716924 | 3.9979978727699588 |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:40Z | 0.0         | 0.0              | 95.69219209824692 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.458894245831095  | 2.848913656031324  |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:50Z | 0.0         | 0.0              | 94.78402607970591 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.9685286188771443 | 3.2474453011797517 |
 | cpu-total | MacBook-Pro-8.local | 2023-07-06T17:15:00Z | 0.0         | 0.0              | 95.85132344665212 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 1.5706151054475623 | 2.5780614479731607 |
 | cpu0      | MacBook-Pro-8.local | 2023-07-06T17:13:40Z | 0.0         | 0.0              | 78.65055387717186 | 0.0          | 0.0       | 0.0        | 0.0           | 0.0         | 7.452165156077374  | 13.897280966824042 |
 +-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
 ```
--- a/docs/query_processing.md
+++ b/docs/query_processing.md
@ -327,7 +327,7 @@ Each querier process has a set of in-memory caches. These are:
 | ---- | ---- | -------------- | --- | ----- | ------------------------------ | ----- |
 | Namespace | Metadata | Catalog | Namespace Name | `CachedNamespace` | refresh policy, TTL, invalidation by unknown table/columns | Unknown entries NOT cached (assumes upstream DDoS protection) |
 | Object Store | Data | Object Store | Path | Raw object store bytes for the entire object | -- | |
-| Parquet File | Metadata | Catalog | Table ID | Parquet files (all the data that the catalog has, i.e. the entire row) for all files that are NOT marked for deletion. | No refresh yet (see #5718), can be invalided by ingester watermark. | |
+| Parquet File | Metadata | Catalog | Table ID | Parquet files (all the data that the catalog has, i.e. the entire row) for all files that are NOT marked for deletion. | TTL, but no refresh yet (see #5718), can be invalided by ingester watermark. | |
 | Partition | Metadata | Catalog | Partition ID | `CachedPartition` | Invalided if ingester data or any parquet files has columns that are NOT covered by the sort key. | Needs `CachedTable` for access |
 | Projected Schema | Metadata | Querier | Table ID, Column IDs | `ProjectedSchema` | -- | Needs `CachedTable` for access |
--- a/executor/Cargo.toml
+++ b/executor/Cargo.toml
@ -12,6 +12,7 @@ observability_deps = { path = "../observability_deps" }
 once_cell = { version = "1.18", features = ["parking_lot"] }
 parking_lot = "0.12"
 pin-project = "1.1"
 snafu = "0.7"
 tokio = { version = "1.29" }
 tokio-util = { version = "0.7.8" }
 tokio_metrics_bridge = { path = "../tokio_metrics_bridge" }
--- a/executor/src/lib.rs
+++ b/executor/src/lib.rs
@ -16,6 +16,7 @@
 )]
 use metric::Registry;
 use snafu::Snafu;
 #[cfg(tokio_unstable)]
 use tokio_metrics_bridge::setup_tokio_metrics;
 // Workaround for "unused crate" lint false positives.
@ -68,8 +69,16 @@ impl Task {
    }
 }
-/// The type of error that is returned from tasks in this module
+/// Errors occuring when polling [`Job`].
-pub type Error = String;
+#[derive(Debug, Snafu)]
 #[allow(missing_docs)]
 pub enum JobError {
    #[snafu(display("Worker thread gone, executor was likely shut down"))]
    WorkerGone,
    #[snafu(display("Panic: {msg}"))]
    Panic { msg: String },
 }
 /// Job within the executor.
 ///
@ -80,7 +89,7 @@ pub struct Job<T> {
    cancel: CancellationToken,
    detached: bool,
    #[pin]
-    rx: Receiver<Result<T, String>>,
+    rx: Receiver<Result<T, JobError>>,
 }
 impl<T> Job<T> {
@ -94,7 +103,7 @@ impl<T> Job<T> {
 }
 impl<T> Future for Job<T> {
-    type Output = Result<T, Error>;
+    type Output = Result<T, JobError>;
    fn poll(
        self: Pin<&mut Self>,
@ -103,9 +112,7 @@ impl<T> Future for Job<T> {
        let this = self.project();
        match ready!(this.rx.poll(cx)) {
            Ok(res) => std::task::Poll::Ready(res),
-            Err(_) => std::task::Poll::Ready(Err(String::from(
+            Err(_) => std::task::Poll::Ready(Err(JobError::WorkerGone)),
                "Worker thread gone, executor was likely shut down",
            ))),
        }
    }
 }
@ -315,13 +322,15 @@ impl DedicatedExecutor {
        let fut = Box::pin(async move {
            let task_output = AssertUnwindSafe(task).catch_unwind().await.map_err(|e| {
-                if let Some(s) = e.downcast_ref::<String>() {
+                let s = if let Some(s) = e.downcast_ref::<String>() {
                    s.clone()
                } else if let Some(s) = e.downcast_ref::<&str>() {
                    s.to_string()
                } else {
                    "unknown internal error".to_string()
-                }
+                };
                JobError::Panic { msg: s }
            });
            if tx.send(task_output).is_err() {
@ -571,7 +580,7 @@ mod tests {
        let err = dedicated_task.await.unwrap_err();
        assert_eq!(
            err.to_string(),
-            "At the disco, on the dedicated task scheduler",
+            "Panic: At the disco, on the dedicated task scheduler",
        );
        exec.join().await;
@ -590,7 +599,7 @@ mod tests {
        // should not be able to get the result
        let err = dedicated_task.await.unwrap_err();
-        assert_eq!(err.to_string(), "1 2",);
+        assert_eq!(err.to_string(), "Panic: 1 2",);
        exec.join().await;
    }
@ -608,7 +617,7 @@ mod tests {
        // should not be able to get the result
        let err = dedicated_task.await.unwrap_err();
-        assert_eq!(err.to_string(), "unknown internal error",);
+        assert_eq!(err.to_string(), "Panic: unknown internal error",);
        exec.join().await;
    }
--- a/garbage_collector/Cargo.toml
+++ b/garbage_collector/Cargo.toml
@ -32,5 +32,5 @@ metric = { path = "../metric" }
 once_cell = { version = "1.18", features = ["parking_lot"] }
 parquet_file = { path = "../parquet_file" }
 tempfile = "3"
-sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" ] }
+sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" ] }
--- a/gossip/Cargo.toml
+++ b/gossip/Cargo.toml
@ -14,7 +14,7 @@ prost = "0.11.9"
 thiserror = "1.0.40"
 tokio = { version = "1.28.2", features = ["net", "io-util", "time", "rt", "sync", "macros"] }
 tracing = "0.1.37"
-uuid = { version = "1.3.3", features = ["v4"] }
+uuid = { version = "1.4.1", features = ["v4"] }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 [build-dependencies]
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@ -69,7 +69,7 @@ once_cell = { version = "1.18", features = ["parking_lot"] }
 rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
 serde_json = "1.0.103"
 snafu = "0.7"
-tempfile = "3.6.0"
+tempfile = "3.7.0"
 thiserror = "1.0.43"
 tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
 tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] }
@ -93,7 +93,7 @@ predicate = { path = "../predicate" }
 predicates = "3.0.3"
 pretty_assertions = "1.4.0"
 proptest = { version = "1.2.0", default-features = false }
-serde = "1.0.171"
+serde = "1.0.173"
 test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
 test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
 insta = { version = "1", features = ["yaml"] }
--- a/influxdb_iox/src/commands/debug/mod.rs
+++ b/influxdb_iox/src/commands/debug/mod.rs
@ -50,12 +50,12 @@ enum Command {
    Schema(schema::Config),
    // NB: The example formatting below is weird so Clap make a nice help text
-    /// Build a local catalog from the output of `remote get-table`.
+    /// Build a local catalog from the output of `remote store get-table`.
    ///
    /// For example:
    /// ```text
    ///  # download contents of table_name into a directory named 'table_name'
-    ///  influxdb_iox remote get-table <namespace> <table_name>
+    ///  influxdb_iox remote store get-table <namespace> <table_name>
    ///
    ///  # Create a catalog and object_store in /tmp/data_dir
    ///  influxdb_iox debug build-catalog <table_dir> /tmp/data_dir
--- a/influxdb_iox/tests/end_to_end_cases/cli.rs
+++ b/influxdb_iox/tests/end_to_end_cases/cli.rs
@ -957,7 +957,7 @@ async fn query_ingester() {
    test_helpers::maybe_start_logging();
    let database_url = maybe_skip_integration!();
-    let mut cluster = MiniCluster::create_shared(database_url).await;
+    let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
    StepTest::new(
        &mut cluster,
--- a/influxdb_iox/tests/end_to_end_cases/debug.rs
+++ b/influxdb_iox/tests/end_to_end_cases/debug.rs
@ -1,10 +1,5 @@
 //! Tests the `influxdb_iox debug` commands
-use std::{
+use std::path::Path;
    collections::VecDeque,
    io::Write,
    path::{Path, PathBuf},
    time::Duration,
 };
 use arrow::record_batch::RecordBatch;
 use arrow_util::assert_batches_sorted_eq;
@ -12,7 +7,6 @@ use assert_cmd::Command;
 use futures::FutureExt;
 use predicates::prelude::*;
 use tempfile::TempDir;
 use test_helpers::timeout::FutureTimeout;
 use test_helpers_end_to_end::{
    maybe_skip_integration, run_sql, MiniCluster, ServerFixture, Step, StepTest, StepTestState,
    TestConfig,
@ -52,8 +46,6 @@ async fn test_print_cpu() {
 /// 3. Start a all-in-one instance from that rebuilt catalog
 /// 4. Can run a query successfully
 #[tokio::test]
 // Ignore due to https://github.com/influxdata/influxdb_iox/issues/8203
 #[ignore]
 async fn build_catalog() {
    test_helpers::maybe_start_logging();
    let database_url = maybe_skip_integration!();
@ -111,20 +103,11 @@ async fn build_catalog() {
                    let table_dir = export_dir.path().join(table_name);
                    // We can build a catalog and start up the server and run a query
-                    let restarted = RestartedServer::build_catalog_and_start(&table_dir).await;
+                    rebuild_and_query(&table_dir, &namespace, sql, &expected).await;
                    let batches = restarted
                        .run_sql_until_non_empty(sql, namespace.as_str())
                        .await;
                    assert_batches_sorted_eq!(&expected, &batches);
                    // We can also rebuild a catalog from just the parquet files
                    let only_parquet_dir = copy_only_parquet_files(&table_dir);
-                    let restarted =
+                    rebuild_and_query(only_parquet_dir.path(), &namespace, sql, &expected).await;
                        RestartedServer::build_catalog_and_start(only_parquet_dir.path()).await;
                    let batches = restarted
                        .run_sql_until_non_empty(sql, namespace.as_str())
                        .await;
                    assert_batches_sorted_eq!(&expected, &batches);
                }
                .boxed()
            })),
@ -134,6 +117,30 @@ async fn build_catalog() {
    .await
 }
 /// Rebuilds a catalog from an export directory, starts up a server
 /// and verifies the running `sql` in `namespace` produces `expected`
 async fn rebuild_and_query(table_dir: &Path, namespace: &str, sql: &str, expected: &[&str]) {
    // Very occassionally, something goes wrong with the sqlite based
    // catalog and it doesn't get the new files. Thus try a few times
    //
    // See https://github.com/influxdata/influxdb_iox/issues/8287
    let mut retries = 5;
    while retries > 0 {
        println!("** Retries remaining:  {retries}");
        let restarted = RestartedServer::build_catalog_and_start(table_dir).await;
        let batches = restarted.run_sql(sql, namespace).await;
        // if we got results, great, otherwise try again
        if !batches.is_empty() {
            assert_batches_sorted_eq!(expected, &batches);
            return;
        }
        retries -= 1;
    }
 }
 /// An all in one instance, with data directory of `data_dir`
 struct RestartedServer {
    all_in_one: ServerFixture,
@ -171,7 +178,7 @@ impl RestartedServer {
        println!("target_directory: {data_dir:?}");
        // call `influxdb_iox debug build-catalog <table_dir> <new_data_dir>`
-        let cmd = Command::cargo_bin("influxdb_iox")
+        Command::cargo_bin("influxdb_iox")
            .unwrap()
            // use -v to enable logging so we can check the status messages
            .arg("-vv")
@ -180,31 +187,18 @@ impl RestartedServer {
            .arg(exported_table_dir.as_os_str().to_str().unwrap())
            .arg(data_dir.path().as_os_str().to_str().unwrap())
            .assert()
-            .success();
+            .success()
-
+            .stdout(
-        // debug information to track down https://github.com/influxdata/influxdb_iox/issues/8203
+                predicate::str::contains("Beginning catalog / object_store build")
-        println!("***** Begin build-catalog STDOUT ****");
+                    .and(predicate::str::contains(
-        std::io::stdout()
+                        "Begin importing files total_files=1",
-            .write_all(&cmd.get_output().stdout)
+                    ))
-            .unwrap();
+                    .and(predicate::str::contains(
-        println!("***** Begin build-catalog STDERR ****");
+                        "Completed importing files total_files=1",
-        std::io::stdout()
+                    )),
-            .write_all(&cmd.get_output().stderr)
+            );
            .unwrap();
        println!("***** DONE ****");
        cmd.stdout(
            predicate::str::contains("Beginning catalog / object_store build")
                .and(predicate::str::contains(
                    "Begin importing files total_files=1",
                ))
                .and(predicate::str::contains(
                    "Completed importing files total_files=1",
                )),
        );
        println!("Completed rebuild in {data_dir:?}");
        RecursiveDirPrinter::new().print(data_dir.path());
        // now, start up a new server in all-in-one mode
        // using the  newly built data directory
@ -216,27 +210,6 @@ impl RestartedServer {
            data_dir,
        }
    }
    /// Runs the SQL query against this server, in a loop until
    /// results are returned. Panics if the results are not produced
    /// within a 5 seconds
    async fn run_sql_until_non_empty(&self, sql: &str, namespace: &str) -> Vec<RecordBatch> {
        let timeout = Duration::from_secs(5);
        let loop_sleep = Duration::from_millis(500);
        let fut = async {
            loop {
                let batches = self.run_sql(sql, namespace).await;
                if !batches.is_empty() {
                    return batches;
                }
                tokio::time::sleep(loop_sleep).await;
            }
        };
        fut.with_timeout(timeout)
            .await
            .expect("timed out waiting for non-empty batches in result")
    }
 }
 /// Copies only parquet files from the source directory to a new
@ -262,43 +235,3 @@ fn copy_only_parquet_files(src: &Path) -> TempDir {
    }
    target_dir
 }
 /// Prints out the contents of the directory recursively
 /// for debugging.
 ///
 /// ```text
 /// RecursiveDirPrinter All files rooted at "/tmp/.tmpvf16r0"
 /// "/tmp/.tmpvf16r0"
 /// "/tmp/.tmpvf16r0/catalog.sqlite"
 /// "/tmp/.tmpvf16r0/object_store"
 /// "/tmp/.tmpvf16r0/object_store/1"
 /// "/tmp/.tmpvf16r0/object_store/1/1"
 /// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237"
 /// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237/d78abef6-6859-48eb-aa62-3518097fbb9b.parquet"
 ///
 struct RecursiveDirPrinter {
    paths: VecDeque<PathBuf>,
 }
 impl RecursiveDirPrinter {
    fn new() -> Self {
        Self {
            paths: VecDeque::new(),
        }
    }
    // print root and all directories
    fn print(mut self, root: &Path) {
        println!("RecursiveDirPrinter All files rooted at {root:?}");
        self.paths.push_back(PathBuf::from(root));
        while let Some(path) = self.paths.pop_front() {
            println!("{path:?}");
            if path.is_dir() {
                for entry in std::fs::read_dir(path).unwrap() {
                    self.paths.push_front(entry.unwrap().path());
                }
            }
        }
    }
 }
--- a/influxdb_iox/tests/end_to_end_cases/flightsql.rs
+++ b/influxdb_iox/tests/end_to_end_cases/flightsql.rs
@ -1,8 +1,8 @@
-use std::{collections::HashMap, path::PathBuf, sync::Arc};
+use std::path::PathBuf;
 use arrow::{
    array::as_generic_binary_array,
-    datatypes::{DataType, Fields, Schema, SchemaRef, TimeUnit},
+    datatypes::{DataType, Schema, TimeUnit},
    record_batch::RecordBatch,
 };
 use arrow_flight::{
@ -1592,10 +1592,7 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
    let mut saw_data = false;
    while let Some(batch) = result_stream.try_next().await.unwrap() {
        saw_data = true;
-        // strip metadata (GetFlightInfo doesn't include metadata for
+        let batch_schema = batch.schema();
        // some reason) before comparison
        // https://github.com/influxdata/influxdb_iox/issues/7282
        let batch_schema = strip_metadata(&batch.schema());
        assert_eq!(
            batch_schema.as_ref(),
            &flight_info_schema,
@ -1603,10 +1600,6 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
        );
        // The stream itself also may report a schema
        if let Some(stream_schema) = result_stream.schema() {
            // strip metadata (GetFlightInfo doesn't include metadata for
            // some reason) before comparison
            // https://github.com/influxdata/influxdb_iox/issues/7282
            let stream_schema = strip_metadata(stream_schema);
            assert_eq!(stream_schema.as_ref(), &flight_info_schema);
        }
    }
@ -1615,16 +1608,6 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
    assert!(saw_data);
 }
 fn strip_metadata(schema: &Schema) -> SchemaRef {
    let stripped_fields: Fields = schema
        .fields()
        .iter()
        .map(|f| f.as_ref().clone().with_metadata(HashMap::new()))
        .collect();
    Arc::new(Schema::new(stripped_fields))
 }
 #[tokio::test]
 async fn authz() {
    test_helpers::maybe_start_logging();
--- a/influxdb_iox/tests/end_to_end_cases/tracing.rs
+++ b/influxdb_iox/tests/end_to_end_cases/tracing.rs
@ -235,9 +235,9 @@ async fn test_tracing_create_compactor_trace() {
    // "shallow" packet inspection and verify the UDP server got omething that had some expected
    // results.  We could look for any text of any of the compaction spans.  The name of the span
-    // for data fusion execution is arbitrarily chosen.
+    // for acquiring permit is arbitrarily chosen.
    udp_capture
-        .wait_for(|m| m.to_string().contains("data_fusion"))
+        .wait_for(|m| m.to_string().contains("acquire_permit"))
        .await;
    // debugging assistance
--- a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql
+++ b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql
@ -135,3 +135,28 @@ SELECT
 from cpu
 where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
 group by region, minute;
 -- With a VALUES clause, which affects how the range is found
 -- Fix for https://github.com/influxdata/idpe/issues/17880
 SELECT
  date_bin_gapfill(INTERVAL '1 minute', time) as _time,
  pod,
  locf(selector_last(image, time))
 FROM
  (VALUES ('2023-06-10T12:00:00Z'::timestamp, 'pod1', 'imageA'),
          ('2023-06-10T12:00:00Z'::timestamp, 'pod2', 'imageA'),
          ('2023-06-10T12:00:01Z'::timestamp, 'pod1', 'imageB'),
          ('2023-06-10T12:00:02Z'::timestamp, 'pod1', 'imageB'),
          ('2023-06-10T12:00:02Z'::timestamp, 'pod2', 'imageB')
  ) AS data(time, pod, image)
 WHERE time >= timestamp '2023-06-10T11:55:00Z' AND time < timestamp '2023-06-10T12:05:00Z'
 GROUP BY _time, pod;
 -- This is not supported since the grouping is not on the values produced by
 -- date_bin_gapfill. The query should fail with a reasonable message.
 select
  date_bin_gapfill('60 seconds'::interval, time)::bigint as time,
  sum(idle)
 from cpu
 WHERE time >= '2020-06-11T16:52:00Z' AND time < '2020-06-11T16:54:00Z'
 group by 1;
--- a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected
+++ b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected
@ -223,3 +223,32 @@ Error during planning: gap-filling query is missing lower time bound
 | b      | 2000-05-05T12:40:00Z | 27.049999999999997 |
 | b      | 2000-05-05T12:50:00Z | 27.049999999999997 |
 +--------+----------------------+--------------------+
 -- SQL: SELECT date_bin_gapfill(INTERVAL '1 minute', time) as _time, pod, locf(selector_last(image, time)) FROM (VALUES ('2023-06-10T12:00:00Z'::timestamp, 'pod1', 'imageA'), ('2023-06-10T12:00:00Z'::timestamp, 'pod2', 'imageA'), ('2023-06-10T12:00:01Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod2', 'imageB') ) AS data(time, pod, image) WHERE time >= timestamp '2023-06-10T11:55:00Z' AND time < timestamp '2023-06-10T12:05:00Z' GROUP BY _time, pod;
 +----------------------+------+--------------------------------------------+
 | _time                | pod  | locf(selector_last(image,time))            |
 +----------------------+------+--------------------------------------------+
 | 2023-06-10T11:55:00Z | pod1 |                                            |
 | 2023-06-10T11:56:00Z | pod1 |                                            |
 | 2023-06-10T11:57:00Z | pod1 |                                            |
 | 2023-06-10T11:58:00Z | pod1 |                                            |
 | 2023-06-10T11:59:00Z | pod1 |                                            |
 | 2023-06-10T12:00:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:01:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:02:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:03:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:04:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T11:55:00Z | pod2 |                                            |
 | 2023-06-10T11:56:00Z | pod2 |                                            |
 | 2023-06-10T11:57:00Z | pod2 |                                            |
 | 2023-06-10T11:58:00Z | pod2 |                                            |
 | 2023-06-10T11:59:00Z | pod2 |                                            |
 | 2023-06-10T12:00:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:01:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:02:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:03:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
 | 2023-06-10T12:04:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
 +----------------------+------+--------------------------------------------+
 -- SQL: select date_bin_gapfill('60 seconds'::interval, time)::bigint as time, sum(idle) from cpu WHERE time >= '2020-06-11T16:52:00Z' AND time < '2020-06-11T16:54:00Z' group by 1;
 Error while planning query: Optimizer rule 'handle_gap_fill' failed
 caused by
 Error during planning: DATE_BIN_GAPFILL must a top-level expression in the GROUP BY clause when gap filling. It cannot be part of another expression or cast
--- a/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql
+++ b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql
@ -339,6 +339,12 @@ SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s) FILL(none);
 -- supports offset parameter
 SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none);
 -- N.B. The gap filling of the COUNT(usage_idle) and COUNT(bytes_free)
 -- columns happens before the two measurements are UNIONed together
 -- when producing the output table. This means that a COUNT column for
 -- a field that is not present for a measurement will contain NULLs,
 -- rather than being filled with 0s. This is consistent with older
 -- versions of influxdb.
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk;
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none);
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu;
@ -360,7 +366,9 @@ SELECT COUNT(usage_idle), usage_idle FROM cpu;
 -- Default FILL(null) when FILL is omitted
 SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
@ -655,3 +663,6 @@ SELECT SUM(bytes_free) / SUM(bytes_used) AS result FROM disk WHERE time >= '2022
 -- Unsupported: host is a field in one subquery and a tag in the other
 SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu);
 -- Using a selector or a aggregate function on a tag column returns NULL
 SELECT last(host) AS host, first(usage_idle) AS usage_idle FROM cpu GROUP BY host;
--- a/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected
+++ b/influxdb_iox/tests/query_tests/cases/in/issue_6112.influxql.expected
@ -919,10 +919,10 @@ name: logical_plan
 plan
 Sort: iox::measurement ASC NULLS LAST, tag0 ASC NULLS LAST, time ASC NULLS LAST
   Union
-     Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, COUNT(m0.f64) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev
+     Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, coalesce_struct(COUNT(m0.f64), Int64(0)) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev
       Aggregate: groupBy=[[m0.tag0]], aggr=[[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]]
         TableScan: m0 projection=[f64, tag0]
-     Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, COUNT(m1.f64) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev
+     Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, coalesce_struct(COUNT(m1.f64), Int64(0)) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev
       Aggregate: groupBy=[[m1.tag0]], aggr=[[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)]]
         TableScan: m1 projection=[f64, tag0]
 name: physical_plan
@ -930,7 +930,7 @@ name: physical_plan
 SortPreservingMergeExec: [iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
   UnionExec
     SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
-       ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m0.f64)@1 as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev]
+       ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m0.f64)@1, 0) as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev]
         AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
           CoalesceBatchesExec: target_batch_size=8192
             RepartitionExec: partitioning=Hash([tag0@0], 4), input_partitions=4
@ -938,7 +938,7 @@ name: physical_plan
                 AggregateExec: mode=Partial, gby=[tag0@1 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
                   ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[f64, tag0]
     SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
-       ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m1.f64)@1 as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
+       ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m1.f64)@1, 0) as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
         RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=4
           AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)], ordering_mode=FullyOrdered
             CoalesceBatchesExec: target_batch_size=8192
@ -1267,9 +1267,19 @@ name: cpu
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 6     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 name: cpu
 +---------------------+-------+
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 8     |
 | 2022-10-31T02:00:30 | 2     |
 | 2022-10-31T02:01:00 | 2     |
 | 2022-10-31T02:01:30 | 2     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 name: cpu
@ -1277,18 +1287,37 @@ name: cpu
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 6     |         |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 | 0     |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 6       |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 |       | 0       |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
 name: cpu
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 7     |         |
 | 2022-10-31T02:00:30 | 1     |         |
 | 2022-10-31T02:01:00 | 1     |         |
 | 2022-10-31T02:01:30 | 1     |         |
 +---------------------+-------+---------+
 name: disk
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 8       |
 | 2022-10-31T02:00:30 |       | 2       |
 | 2022-10-31T02:01:00 |       | 2       |
 | 2022-10-31T02:01:30 |       | 2       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 name: cpu
@ -1296,9 +1325,9 @@ name: cpu
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 6     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
 name: cpu
@ -1306,18 +1335,18 @@ name: cpu
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 6     |         |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 | 0     |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 6       |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 |       | 0       |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
 name: cpu
@ -1507,9 +1536,9 @@ tags: cpu=cpu-total
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu0
@ -1517,9 +1546,9 @@ tags: cpu=cpu0
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu1
@ -1527,9 +1556,9 @@ tags: cpu=cpu1
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(null);
 name: cpu
@ -1538,9 +1567,9 @@ tags: cpu=cpu-total
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu0
@ -1548,9 +1577,9 @@ tags: cpu=cpu0
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu1
@ -1558,9 +1587,9 @@ tags: cpu=cpu1
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device FILL(null);
 name: cpu
@ -1569,9 +1598,9 @@ tags: cpu=cpu-total, device=
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 2     |         |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 | 0     |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: cpu
 tags: cpu=cpu0, device=
@ -1579,9 +1608,9 @@ tags: cpu=cpu0, device=
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 2     |         |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 | 0     |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: cpu
 tags: cpu=cpu1, device=
@ -1589,9 +1618,9 @@ tags: cpu=cpu1, device=
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 | 2     |         |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 | 0     |         |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 | 0     |         |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 tags: cpu=, device=disk1s1
@ -1599,9 +1628,9 @@ tags: cpu=, device=disk1s1
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 2       |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 |       | 0       |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 name: disk
 tags: cpu=, device=disk1s2
@ -1609,9 +1638,9 @@ tags: cpu=, device=disk1s2
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 2       |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 |       | 0       |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 name: disk
 tags: cpu=, device=disk1s5
@ -1619,9 +1648,9 @@ tags: cpu=, device=disk1s5
 | time                | count | count_1 |
 +---------------------+-------+---------+
 | 2022-10-31T02:00:00 |       | 2       |
-| 2022-10-31T02:00:30 |       |         |
+| 2022-10-31T02:00:30 |       | 0       |
-| 2022-10-31T02:01:00 |       |         |
+| 2022-10-31T02:01:00 |       | 0       |
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(previous);
 name: cpu
@ -2202,15 +2231,15 @@ name: cpu
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 6     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s) LIMIT 2 OFFSET 2;
 name: cpu
 +---------------------+-------+
 | time                | count |
 +---------------------+-------+
-| 2022-10-31T02:01:00 |       |
+| 2022-10-31T02:01:00 | 0     |
-| 2022-10-31T02:01:30 |       |
+| 2022-10-31T02:01:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s), cpu LIMIT 2;
 name: cpu
@ -2219,7 +2248,7 @@ tags: cpu=cpu-total
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu0
@ -2227,7 +2256,7 @@ tags: cpu=cpu0
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 name: cpu
 tags: cpu=cpu1
@ -2235,7 +2264,7 @@ tags: cpu=cpu1
 | time                | count |
 +---------------------+-------+
 | 2022-10-31T02:00:00 | 2     |
-| 2022-10-31T02:00:30 |       |
+| 2022-10-31T02:00:30 | 0     |
 +---------------------+-------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) LIMIT 1;
 name: cpu
@ -2268,13 +2297,13 @@ name: cpu
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 | 0     |         |
 +---------------------+-------+---------+
 name: disk
 +---------------------+-------+---------+
 | time                | count | count_1 |
 +---------------------+-------+---------+
-| 2022-10-31T02:01:30 |       |         |
+| 2022-10-31T02:01:30 |       | 0       |
 +---------------------+-------+---------+
 -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device LIMIT 1;
 name: cpu
@ -3088,3 +3117,11 @@ name: disk
 +---------------------+----------------------+
 -- InfluxQL: SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu);
 Error while planning query: This feature is not implemented: cannot mix tag and field columns with the same name: host
 -- InfluxQL: SELECT last(host) AS host, first(usage_idle) AS usage_idle FROM cpu GROUP BY host;
 name: cpu
 tags: host=
 +---------------------+------+------------+
 | time                | host | usage_idle |
 +---------------------+------+------------+
 | 1970-01-01T00:00:00 |      | 2.98       |
 +---------------------+------+------------+
--- a/influxdb_iox/tests/query_tests/cases/in/window_like.influxql
+++ b/influxdb_iox/tests/query_tests/cases/in/window_like.influxql
@ -21,6 +21,19 @@ SELECT difference(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
 -- group by time and a tag
 SELECT difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- difference + selector
 --
 SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 -- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
 SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 -- linear filling of selector functions produces an execution error
 -- (see https://github.com/influxdata/influxdb_iox/issues/8302).
 -- SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- group by time and a tag
 SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- non_negative_difference
@ -35,6 +48,11 @@ SELECT non_negative_difference(usage_idle) FROM cpu WHERE time >= 00000001300000
 --
 SELECT non_negative_difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- non_negative_difference + selector
 --
 SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- moving_average
 --
@ -61,6 +79,17 @@ SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 000000013000000
 SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 --
 -- moving_average + selector
 --
 -- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of moving_average
 SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 -- linear filling of selector functions produces an execution error
 -- (see https://github.com/influxdata/influxdb_iox/issues/8302).
 -- SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 --
 -- combining window functions
 --
@ -109,7 +138,7 @@ SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
 SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of derivative
 SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
@ -120,6 +149,26 @@ SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 000000013000000
 SELECT derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 SELECT derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- derivative + selector
 --
 SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 -- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of derivative
 SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 -- linear filling of selector functions produces an execution error
 -- (see https://github.com/influxdata/influxdb_iox/issues/8302).
 -- SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- group by time and a tag
 SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- non_negative_derivative
 --
@ -138,7 +187,7 @@ SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 000000013
 SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
+-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of non_negative_derivative
 SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
@ -148,3 +197,57 @@ SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 00
 -- group by time and a tag
 SELECT non_negative_derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 SELECT non_negative_derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- non_negative_derivative + selector
 --
 SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 -- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of non_negative_derivative
 SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 -- linear filling of selector functions produces an execution error
 -- (see https://github.com/influxdata/influxdb_iox/issues/8302).
 -- SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- group by time and a tag
 SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- cumulative_sum
 --
 SELECT cumulative_sum(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
 SELECT cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
 SELECT cumulative_sum(usage_idle), cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
 SELECT cumulative_sum(usage_idle) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
 --
 -- cumulative_sum + aggregate
 --
 SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 -- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of cumulative_sum
 SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- group by time and a tag
 SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 --
 -- cumulative_sum + selector
 --
 SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 -- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of cumulative_sum
 SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 -- linear filling of selector functions produces an execution error
 -- (see https://github.com/influxdata/influxdb_iox/issues/8302).
 -- SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 -- group by time and a tag
 SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
--- a/influxdb_iox/tests/query_tests/cases/in/window_like.influxql.expected
+++ b/influxdb_iox/tests/query_tests/cases/in/window_like.influxql.expected
@ -148,6 +148,86 @@ tags: cpu=cpu1
 | 1970-01-01T00:02:30 | -0.03333333333334565 |
 | 1970-01-01T00:03:00 | -0.03333333333333144 |
 +---------------------+----------------------+
 -- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+------------+
 | time                | difference |
 +---------------------+------------+
 | 1970-01-01T00:02:20 | 164        |
 | 1970-01-01T00:02:27 | 187        |
 | 1970-01-01T00:02:34 | 112        |
 | 1970-01-01T00:02:48 | 110        |
 | 1970-01-01T00:02:55 | 219        |
 | 1970-01-01T00:03:09 | 75         |
 | 1970-01-01T00:03:16 | 76         |
 | 1970-01-01T00:03:30 | 146        |
 +---------------------+------------+
 -- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 name: diskio
 +---------------------+------------+
 | time                | difference |
 +---------------------+------------+
 | 1970-01-01T00:02:00 | 366        |
 | 1970-01-01T00:02:30 | 421        |
 | 1970-01-01T00:03:00 | 441        |
 | 1970-01-01T00:03:30 | 297        |
 +---------------------+------------+
 -- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+------------+
 | time                | difference |
 +---------------------+------------+
 | 1970-01-01T00:02:06 | 5592646    |
 | 1970-01-01T00:02:13 | -5592646   |
 | 1970-01-01T00:02:20 | 5592810    |
 | 1970-01-01T00:02:27 | 187        |
 | 1970-01-01T00:02:34 | 112        |
 | 1970-01-01T00:02:41 | -5593109   |
 | 1970-01-01T00:02:48 | 5593219    |
 | 1970-01-01T00:02:55 | 219        |
 | 1970-01-01T00:03:02 | -5593438   |
 | 1970-01-01T00:03:09 | 5593513    |
 | 1970-01-01T00:03:16 | 76         |
 | 1970-01-01T00:03:23 | -5593589   |
 | 1970-01-01T00:03:30 | 5593735    |
 +---------------------+------------+
 -- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+------------+
 | time                | difference |
 +---------------------+------------+
 | 1970-01-01T00:02:13 | 0          |
 | 1970-01-01T00:02:20 | 164        |
 | 1970-01-01T00:02:27 | 187        |
 | 1970-01-01T00:02:34 | 112        |
 | 1970-01-01T00:02:41 | 0          |
 | 1970-01-01T00:02:48 | 110        |
 | 1970-01-01T00:02:55 | 219        |
 | 1970-01-01T00:03:02 | 0          |
 | 1970-01-01T00:03:09 | 75         |
 | 1970-01-01T00:03:16 | 76         |
 | 1970-01-01T00:03:23 | 0          |
 | 1970-01-01T00:03:30 | 146        |
 +---------------------+------------+
 -- InfluxQL: SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+---------------------+
 | time                | difference          |
 +---------------------+---------------------+
 | 1970-01-01T00:02:00 | -0.7999999999999972 |
 | 1970-01-01T00:02:30 | 3.5                 |
 | 1970-01-01T00:03:00 | -0.4000000000000057 |
 +---------------------+---------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+----------------------+
 | time                | difference           |
 +---------------------+----------------------+
 | 1970-01-01T00:02:00 | 0.20000000000000284  |
 | 1970-01-01T00:02:30 | 0.0                  |
 | 1970-01-01T00:03:00 | -0.10000000000000853 |
 +---------------------+----------------------+
 -- InfluxQL: SELECT non_negative_difference(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
 name: cpu
 +---------------------+-------------------------+
@ -202,6 +282,22 @@ tags: cpu=cpu1
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 0.36666666666667425     |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+-------------------------+
 | time                | non_negative_difference |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:30 | 3.5                     |
 +---------------------+-------------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+-------------------------+
 | time                | non_negative_difference |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 0.20000000000000284     |
 | 1970-01-01T00:02:30 | 0.0                     |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT moving_average(writes, 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
 name: diskio
 +---------------------+-------------------+
@ -307,6 +403,54 @@ name: diskio
 | 1970-01-01T00:03:23 | 5593588.0         |
 | 1970-01-01T00:03:30 | 5593662.0         |
 +---------------------+-------------------+
 -- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+-------------------+
 | time                | moving_average    |
 +---------------------+-------------------+
 | 1970-01-01T00:02:27 | 5592817.666666667 |
 | 1970-01-01T00:02:34 | 5592972.0         |
 | 1970-01-01T00:02:48 | 5593108.333333333 |
 | 1970-01-01T00:02:55 | 5593255.333333333 |
 | 1970-01-01T00:03:09 | 5593390.0         |
 | 1970-01-01T00:03:16 | 5593513.333333333 |
 | 1970-01-01T00:03:30 | 5593612.333333333 |
 +---------------------+-------------------+
 -- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+--------------------+
 | time                | moving_average     |
 +---------------------+--------------------+
 | 1970-01-01T00:02:13 | 1864215.3333333333 |
 | 1970-01-01T00:02:20 | 3728485.3333333335 |
 | 1970-01-01T00:02:27 | 3728602.3333333335 |
 | 1970-01-01T00:02:34 | 5592972.0          |
 | 1970-01-01T00:02:41 | 3728702.0          |
 | 1970-01-01T00:02:48 | 3728776.0          |
 | 1970-01-01T00:02:55 | 3728885.6666666665 |
 | 1970-01-01T00:03:02 | 3728885.6666666665 |
 | 1970-01-01T00:03:09 | 3728983.6666666665 |
 | 1970-01-01T00:03:16 | 3729034.0          |
 | 1970-01-01T00:03:23 | 3729034.0          |
 | 1970-01-01T00:03:30 | 3729108.0          |
 +---------------------+--------------------+
 -- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+-------------------+
 | time                | moving_average    |
 +---------------------+-------------------+
 | 1970-01-01T00:02:20 | 5592700.666666667 |
 | 1970-01-01T00:02:27 | 5592817.666666667 |
 | 1970-01-01T00:02:34 | 5592972.0         |
 | 1970-01-01T00:02:41 | 5593071.666666667 |
 | 1970-01-01T00:02:48 | 5593145.666666667 |
 | 1970-01-01T00:02:55 | 5593255.333333333 |
 | 1970-01-01T00:03:02 | 5593365.0         |
 | 1970-01-01T00:03:09 | 5593463.0         |
 | 1970-01-01T00:03:16 | 5593513.333333333 |
 | 1970-01-01T00:03:23 | 5593563.666666667 |
 | 1970-01-01T00:03:30 | 5593637.666666667 |
 +---------------------+-------------------+
 -- InfluxQL: SELECT difference(usage_idle), non_negative_difference(usage_idle), moving_average(usage_idle, 4) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
 name: cpu
 tags: cpu=cpu0
@ -649,6 +793,166 @@ tags: cpu=cpu1
 | 1970-01-01T00:02:30 | -0.0005555555555557608 |
 | 1970-01-01T00:03:00 | -0.000555555555555524  |
 +---------------------+------------------------+
 -- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+------------+
 | time                | derivative |
 +---------------------+------------+
 | 1970-01-01T00:02:20 | 82.0       |
 | 1970-01-01T00:02:27 | 187.0      |
 | 1970-01-01T00:02:34 | 112.0      |
 | 1970-01-01T00:02:48 | 55.0       |
 | 1970-01-01T00:02:55 | 219.0      |
 | 1970-01-01T00:03:09 | 37.5       |
 | 1970-01-01T00:03:16 | 76.0       |
 | 1970-01-01T00:03:30 | 73.0       |
 +---------------------+------------+
 -- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+--------------------+
 | time                | derivative         |
 +---------------------+--------------------+
 | 1970-01-01T00:02:20 | 5.857142857142857  |
 | 1970-01-01T00:02:27 | 13.357142857142858 |
 | 1970-01-01T00:02:34 | 8.0                |
 | 1970-01-01T00:02:48 | 3.9285714285714284 |
 | 1970-01-01T00:02:55 | 15.642857142857142 |
 | 1970-01-01T00:03:09 | 2.6785714285714284 |
 | 1970-01-01T00:03:16 | 5.428571428571429  |
 | 1970-01-01T00:03:30 | 5.214285714285714  |
 +---------------------+--------------------+
 -- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 name: diskio
 +---------------------+------------+
 | time                | derivative |
 +---------------------+------------+
 | 1970-01-01T00:02:00 | 366.0      |
 | 1970-01-01T00:02:30 | 421.0      |
 | 1970-01-01T00:03:00 | 441.0      |
 | 1970-01-01T00:03:30 | 297.0      |
 +---------------------+------------+
 -- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 name: diskio
 +---------------------+-------------------+
 | time                | derivative        |
 +---------------------+-------------------+
 | 1970-01-01T00:02:00 | 6.1               |
 | 1970-01-01T00:02:30 | 7.016666666666667 |
 | 1970-01-01T00:03:00 | 7.35              |
 | 1970-01-01T00:03:30 | 4.95              |
 +---------------------+-------------------+
 -- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+------------+
 | time                | derivative |
 +---------------------+------------+
 | 1970-01-01T00:02:06 | 5592646.0  |
 | 1970-01-01T00:02:13 | -5592646.0 |
 | 1970-01-01T00:02:20 | 5592810.0  |
 | 1970-01-01T00:02:27 | 187.0      |
 | 1970-01-01T00:02:34 | 112.0      |
 | 1970-01-01T00:02:41 | -5593109.0 |
 | 1970-01-01T00:02:48 | 5593219.0  |
 | 1970-01-01T00:02:55 | 219.0      |
 | 1970-01-01T00:03:02 | -5593438.0 |
 | 1970-01-01T00:03:09 | 5593513.0  |
 | 1970-01-01T00:03:16 | 76.0       |
 | 1970-01-01T00:03:23 | -5593589.0 |
 | 1970-01-01T00:03:30 | 5593735.0  |
 +---------------------+------------+
 -- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+---------------------+
 | time                | derivative          |
 +---------------------+---------------------+
 | 1970-01-01T00:02:06 | 399474.71428571426  |
 | 1970-01-01T00:02:13 | -399474.71428571426 |
 | 1970-01-01T00:02:20 | 399486.4285714286   |
 | 1970-01-01T00:02:27 | 13.357142857142858  |
 | 1970-01-01T00:02:34 | 8.0                 |
 | 1970-01-01T00:02:41 | -399507.78571428574 |
 | 1970-01-01T00:02:48 | 399515.64285714284  |
 | 1970-01-01T00:02:55 | 15.642857142857142  |
 | 1970-01-01T00:03:02 | -399531.28571428574 |
 | 1970-01-01T00:03:09 | 399536.64285714284  |
 | 1970-01-01T00:03:16 | 5.428571428571429   |
 | 1970-01-01T00:03:23 | -399542.0714285714  |
 | 1970-01-01T00:03:30 | 399552.5            |
 +---------------------+---------------------+
 -- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+------------+
 | time                | derivative |
 +---------------------+------------+
 | 1970-01-01T00:02:13 | 0.0        |
 | 1970-01-01T00:02:20 | 164.0      |
 | 1970-01-01T00:02:27 | 187.0      |
 | 1970-01-01T00:02:34 | 112.0      |
 | 1970-01-01T00:02:41 | 0.0        |
 | 1970-01-01T00:02:48 | 110.0      |
 | 1970-01-01T00:02:55 | 219.0      |
 | 1970-01-01T00:03:02 | 0.0        |
 | 1970-01-01T00:03:09 | 75.0       |
 | 1970-01-01T00:03:16 | 76.0       |
 | 1970-01-01T00:03:23 | 0.0        |
 | 1970-01-01T00:03:30 | 146.0      |
 +---------------------+------------+
 -- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+--------------------+
 | time                | derivative         |
 +---------------------+--------------------+
 | 1970-01-01T00:02:13 | 0.0                |
 | 1970-01-01T00:02:20 | 11.714285714285714 |
 | 1970-01-01T00:02:27 | 13.357142857142858 |
 | 1970-01-01T00:02:34 | 8.0                |
 | 1970-01-01T00:02:41 | 0.0                |
 | 1970-01-01T00:02:48 | 7.857142857142857  |
 | 1970-01-01T00:02:55 | 15.642857142857142 |
 | 1970-01-01T00:03:02 | 0.0                |
 | 1970-01-01T00:03:09 | 5.357142857142857  |
 | 1970-01-01T00:03:16 | 5.428571428571429  |
 | 1970-01-01T00:03:23 | 0.0                |
 | 1970-01-01T00:03:30 | 10.428571428571429 |
 +---------------------+--------------------+
 -- InfluxQL: SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+---------------------+
 | time                | derivative          |
 +---------------------+---------------------+
 | 1970-01-01T00:02:00 | -0.7999999999999972 |
 | 1970-01-01T00:02:30 | 3.5                 |
 | 1970-01-01T00:03:00 | -0.4000000000000057 |
 +---------------------+---------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+----------------------+
 | time                | derivative           |
 +---------------------+----------------------+
 | 1970-01-01T00:02:00 | 0.20000000000000284  |
 | 1970-01-01T00:02:30 | 0.0                  |
 | 1970-01-01T00:03:00 | -0.10000000000000853 |
 +---------------------+----------------------+
 -- InfluxQL: SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+-----------------------+
 | time                | derivative            |
 +---------------------+-----------------------+
 | 1970-01-01T00:02:00 | -0.013333333333333286 |
 | 1970-01-01T00:02:30 | 0.058333333333333334  |
 | 1970-01-01T00:03:00 | -0.006666666666666762 |
 +---------------------+-----------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+------------------------+
 | time                | derivative             |
 +---------------------+------------------------+
 | 1970-01-01T00:02:00 | 0.003333333333333381   |
 | 1970-01-01T00:02:30 | 0.0                    |
 | 1970-01-01T00:03:00 | -0.0016666666666668088 |
 +---------------------+------------------------+
 -- InfluxQL: SELECT non_negative_derivative(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
 name: diskio
 +---------------------+-------------------------+
@ -918,3 +1222,407 @@ tags: cpu=cpu1
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 0.006111111111111237    |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:20 | 82.0                    |
 | 1970-01-01T00:02:27 | 187.0                   |
 | 1970-01-01T00:02:34 | 112.0                   |
 | 1970-01-01T00:02:48 | 55.0                    |
 | 1970-01-01T00:02:55 | 219.0                   |
 | 1970-01-01T00:03:09 | 37.5                    |
 | 1970-01-01T00:03:16 | 76.0                    |
 | 1970-01-01T00:03:30 | 73.0                    |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:20 | 5.857142857142857       |
 | 1970-01-01T00:02:27 | 13.357142857142858      |
 | 1970-01-01T00:02:34 | 8.0                     |
 | 1970-01-01T00:02:48 | 3.9285714285714284      |
 | 1970-01-01T00:02:55 | 15.642857142857142      |
 | 1970-01-01T00:03:09 | 2.6785714285714284      |
 | 1970-01-01T00:03:16 | 5.428571428571429       |
 | 1970-01-01T00:03:30 | 5.214285714285714       |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 366.0                   |
 | 1970-01-01T00:02:30 | 421.0                   |
 | 1970-01-01T00:03:00 | 441.0                   |
 | 1970-01-01T00:03:30 | 297.0                   |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 6.1                     |
 | 1970-01-01T00:02:30 | 7.016666666666667       |
 | 1970-01-01T00:03:00 | 7.35                    |
 | 1970-01-01T00:03:30 | 4.95                    |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:06 | 5592646.0               |
 | 1970-01-01T00:02:20 | 5592810.0               |
 | 1970-01-01T00:02:27 | 187.0                   |
 | 1970-01-01T00:02:34 | 112.0                   |
 | 1970-01-01T00:02:48 | 5593219.0               |
 | 1970-01-01T00:02:55 | 219.0                   |
 | 1970-01-01T00:03:09 | 5593513.0               |
 | 1970-01-01T00:03:16 | 76.0                    |
 | 1970-01-01T00:03:30 | 5593735.0               |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:06 | 399474.71428571426      |
 | 1970-01-01T00:02:20 | 399486.4285714286       |
 | 1970-01-01T00:02:27 | 13.357142857142858      |
 | 1970-01-01T00:02:34 | 8.0                     |
 | 1970-01-01T00:02:48 | 399515.64285714284      |
 | 1970-01-01T00:02:55 | 15.642857142857142      |
 | 1970-01-01T00:03:09 | 399536.64285714284      |
 | 1970-01-01T00:03:16 | 5.428571428571429       |
 | 1970-01-01T00:03:30 | 399552.5                |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:13 | 0.0                     |
 | 1970-01-01T00:02:20 | 164.0                   |
 | 1970-01-01T00:02:27 | 187.0                   |
 | 1970-01-01T00:02:34 | 112.0                   |
 | 1970-01-01T00:02:41 | 0.0                     |
 | 1970-01-01T00:02:48 | 110.0                   |
 | 1970-01-01T00:02:55 | 219.0                   |
 | 1970-01-01T00:03:02 | 0.0                     |
 | 1970-01-01T00:03:09 | 75.0                    |
 | 1970-01-01T00:03:16 | 76.0                    |
 | 1970-01-01T00:03:23 | 0.0                     |
 | 1970-01-01T00:03:30 | 146.0                   |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:13 | 0.0                     |
 | 1970-01-01T00:02:20 | 11.714285714285714      |
 | 1970-01-01T00:02:27 | 13.357142857142858      |
 | 1970-01-01T00:02:34 | 8.0                     |
 | 1970-01-01T00:02:41 | 0.0                     |
 | 1970-01-01T00:02:48 | 7.857142857142857       |
 | 1970-01-01T00:02:55 | 15.642857142857142      |
 | 1970-01-01T00:03:02 | 0.0                     |
 | 1970-01-01T00:03:09 | 5.357142857142857       |
 | 1970-01-01T00:03:16 | 5.428571428571429       |
 | 1970-01-01T00:03:23 | 0.0                     |
 | 1970-01-01T00:03:30 | 10.428571428571429      |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:30 | 3.5                     |
 +---------------------+-------------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 0.20000000000000284     |
 | 1970-01-01T00:02:30 | 0.0                     |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:30 | 0.058333333333333334    |
 +---------------------+-------------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+-------------------------+
 | time                | non_negative_derivative |
 +---------------------+-------------------------+
 | 1970-01-01T00:02:00 | 0.003333333333333381    |
 | 1970-01-01T00:02:30 | 0.0                     |
 +---------------------+-------------------------+
 -- InfluxQL: SELECT cumulative_sum(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:10 | 5592646        |
 | 1970-01-01T00:02:20 | 11185456       |
 | 1970-01-01T00:02:30 | 16778453       |
 | 1970-01-01T00:02:40 | 22371562       |
 | 1970-01-01T00:02:50 | 27964781       |
 | 1970-01-01T00:03:00 | 33558219       |
 | 1970-01-01T00:03:10 | 39151732       |
 | 1970-01-01T00:03:20 | 44745321       |
 | 1970-01-01T00:03:30 | 50339056       |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
 name: cpu
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:01:00 | 89.5           |
 | 1970-01-01T00:01:10 | 178.1          |
 | 1970-01-01T00:01:30 | 261.5          |
 | 1970-01-01T00:01:40 | 349.2          |
 | 1970-01-01T00:02:10 | 439.0          |
 | 1970-01-01T00:02:50 | 528.8          |
 | 1970-01-01T00:03:00 | 618.8          |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(usage_idle), cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
 name: cpu
 +---------------------+--------------------+------------------+
 | time                | cumulative_sum     | cumulative_sum_1 |
 +---------------------+--------------------+------------------+
 | 1970-01-01T00:01:00 | 89.5               | 89.5             |
 | 1970-01-01T00:01:10 | 178.1              | 178.1            |
 | 1970-01-01T00:01:20 | 266.7              |                  |
 | 1970-01-01T00:01:30 | 350.1              | 261.5            |
 | 1970-01-01T00:01:40 | 437.8              | 349.2            |
 | 1970-01-01T00:01:50 | 526.5              |                  |
 | 1970-01-01T00:02:00 | 613.4              |                  |
 | 1970-01-01T00:02:10 | 703.1999999999999  | 439.0            |
 | 1970-01-01T00:02:20 | 792.1999999999999  |                  |
 | 1970-01-01T00:02:30 | 882.5999999999999  |                  |
 | 1970-01-01T00:02:40 | 972.8              |                  |
 | 1970-01-01T00:02:50 | 1062.6             | 528.8            |
 | 1970-01-01T00:03:00 | 1152.6             | 618.8            |
 | 1970-01-01T00:03:10 | 1241.3999999999999 |                  |
 +---------------------+--------------------+------------------+
 -- InfluxQL: SELECT cumulative_sum(usage_idle) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+--------------------+
 | time                | cumulative_sum     |
 +---------------------+--------------------+
 | 1970-01-01T00:02:10 | 89.8               |
 | 1970-01-01T00:02:20 | 178.8              |
 | 1970-01-01T00:02:30 | 269.20000000000005 |
 | 1970-01-01T00:02:40 | 359.40000000000003 |
 | 1970-01-01T00:02:50 | 449.20000000000005 |
 | 1970-01-01T00:03:00 | 539.2              |
 | 1970-01-01T00:03:10 | 628.0              |
 +---------------------+--------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+--------------------+
 | time                | cumulative_sum     |
 +---------------------+--------------------+
 | 1970-01-01T00:02:10 | 99.8               |
 | 1970-01-01T00:02:20 | 199.7              |
 | 1970-01-01T00:02:30 | 299.6              |
 | 1970-01-01T00:02:40 | 399.40000000000003 |
 | 1970-01-01T00:02:50 | 499.20000000000005 |
 | 1970-01-01T00:03:00 | 599.0              |
 | 1970-01-01T00:03:10 | 698.8              |
 +---------------------+--------------------+
 -- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:06 | 5592646.0      |
 | 1970-01-01T00:02:20 | 11185456.0     |
 | 1970-01-01T00:02:27 | 16778453.0     |
 | 1970-01-01T00:02:34 | 22371562.0     |
 | 1970-01-01T00:02:48 | 27964781.0     |
 | 1970-01-01T00:02:55 | 33558219.0     |
 | 1970-01-01T00:03:09 | 39151732.0     |
 | 1970-01-01T00:03:16 | 44745321.0     |
 | 1970-01-01T00:03:30 | 50339056.0     |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 name: diskio
 +---------------------+--------------------+
 | time                | cumulative_sum     |
 +---------------------+--------------------+
 | 1970-01-01T00:02:00 | 5592728.0          |
 | 1970-01-01T00:02:30 | 11185836.333333332 |
 | 1970-01-01T00:03:00 | 16779349.666666664 |
 | 1970-01-01T00:03:30 | 22373084.666666664 |
 +---------------------+--------------------+
 -- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:06 | 5592646.0      |
 | 1970-01-01T00:02:13 | 5592646.0      |
 | 1970-01-01T00:02:20 | 11185456.0     |
 | 1970-01-01T00:02:27 | 16778453.0     |
 | 1970-01-01T00:02:34 | 22371562.0     |
 | 1970-01-01T00:02:41 | 22371562.0     |
 | 1970-01-01T00:02:48 | 27964781.0     |
 | 1970-01-01T00:02:55 | 33558219.0     |
 | 1970-01-01T00:03:02 | 33558219.0     |
 | 1970-01-01T00:03:09 | 39151732.0     |
 | 1970-01-01T00:03:16 | 44745321.0     |
 | 1970-01-01T00:03:23 | 44745321.0     |
 | 1970-01-01T00:03:30 | 50339056.0     |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:06 | 5592646.0      |
 | 1970-01-01T00:02:13 | 11185292.0     |
 | 1970-01-01T00:02:20 | 16778102.0     |
 | 1970-01-01T00:02:27 | 22371099.0     |
 | 1970-01-01T00:02:34 | 27964208.0     |
 | 1970-01-01T00:02:41 | 33557317.0     |
 | 1970-01-01T00:02:48 | 39150536.0     |
 | 1970-01-01T00:02:55 | 44743974.0     |
 | 1970-01-01T00:03:02 | 50337412.0     |
 | 1970-01-01T00:03:09 | 55930925.0     |
 | 1970-01-01T00:03:16 | 61524514.0     |
 | 1970-01-01T00:03:23 | 67118103.0     |
 | 1970-01-01T00:03:30 | 72711838.0     |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:06 | 5592646.0      |
 | 1970-01-01T00:02:13 | 11185374.0     |
 | 1970-01-01T00:02:20 | 16778184.0     |
 | 1970-01-01T00:02:27 | 22371181.0     |
 | 1970-01-01T00:02:34 | 27964290.0     |
 | 1970-01-01T00:02:41 | 33557454.0     |
 | 1970-01-01T00:02:48 | 39150673.0     |
 | 1970-01-01T00:02:55 | 44744111.0     |
 | 1970-01-01T00:03:02 | 50337586.5     |
 | 1970-01-01T00:03:09 | 55931099.5     |
 | 1970-01-01T00:03:16 | 61524688.5     |
 | 1970-01-01T00:03:23 | 67118350.5     |
 | 1970-01-01T00:03:30 | 72712085.5     |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+--------------------+
 | time                | cumulative_sum     |
 +---------------------+--------------------+
 | 1970-01-01T00:02:00 | 89.4               |
 | 1970-01-01T00:02:30 | 179.53333333333336 |
 | 1970-01-01T00:03:00 | 268.9333333333334  |
 +---------------------+--------------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+--------------------+
 | time                | cumulative_sum     |
 +---------------------+--------------------+
 | 1970-01-01T00:02:00 | 99.85              |
 | 1970-01-01T00:02:30 | 199.68333333333334 |
 | 1970-01-01T00:03:00 | 299.48333333333335 |
 +---------------------+--------------------+
 -- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:06 | 5592646        |
 | 1970-01-01T00:02:20 | 11185456       |
 | 1970-01-01T00:02:27 | 16778453       |
 | 1970-01-01T00:02:34 | 22371562       |
 | 1970-01-01T00:02:48 | 27964781       |
 | 1970-01-01T00:02:55 | 33558219       |
 | 1970-01-01T00:03:09 | 39151732       |
 | 1970-01-01T00:03:16 | 44745321       |
 | 1970-01-01T00:03:30 | 50339056       |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:00 | 5592646        |
 | 1970-01-01T00:02:30 | 11185643       |
 | 1970-01-01T00:03:00 | 16779081       |
 | 1970-01-01T00:03:30 | 22372816       |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:06 | 5592646        |
 | 1970-01-01T00:02:13 | 5592646        |
 | 1970-01-01T00:02:20 | 11185456       |
 | 1970-01-01T00:02:27 | 16778453       |
 | 1970-01-01T00:02:34 | 22371562       |
 | 1970-01-01T00:02:41 | 22371562       |
 | 1970-01-01T00:02:48 | 27964781       |
 | 1970-01-01T00:02:55 | 33558219       |
 | 1970-01-01T00:03:02 | 33558219       |
 | 1970-01-01T00:03:09 | 39151732       |
 | 1970-01-01T00:03:16 | 44745321       |
 | 1970-01-01T00:03:23 | 44745321       |
 | 1970-01-01T00:03:30 | 50339056       |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
 name: diskio
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:06 | 5592646        |
 | 1970-01-01T00:02:13 | 11185292       |
 | 1970-01-01T00:02:20 | 16778102       |
 | 1970-01-01T00:02:27 | 22371099       |
 | 1970-01-01T00:02:34 | 27964208       |
 | 1970-01-01T00:02:41 | 33557317       |
 | 1970-01-01T00:02:48 | 39150536       |
 | 1970-01-01T00:02:55 | 44743974       |
 | 1970-01-01T00:03:02 | 50337412       |
 | 1970-01-01T00:03:09 | 55930925       |
 | 1970-01-01T00:03:16 | 61524514       |
 | 1970-01-01T00:03:23 | 67118103       |
 | 1970-01-01T00:03:30 | 72711838       |
 +---------------------+----------------+
 -- InfluxQL: SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
 name: cpu
 tags: cpu=cpu0
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:00 | 89.8           |
 | 1970-01-01T00:02:30 | 180.2          |
 | 1970-01-01T00:03:00 | 270.2          |
 +---------------------+----------------+
 name: cpu
 tags: cpu=cpu1
 +---------------------+----------------+
 | time                | cumulative_sum |
 +---------------------+----------------+
 | 1970-01-01T00:02:00 | 99.8           |
 | 1970-01-01T00:02:30 | 199.7          |
 | 1970-01-01T00:03:00 | 299.5          |
 +---------------------+----------------+
--- a/influxrpc_parser/Cargo.toml
+++ b/influxrpc_parser/Cargo.toml
@ -6,7 +6,7 @@ edition.workspace = true
 license.workspace = true
 [dependencies]
-sqlparser = "0.35.0"
+sqlparser = "0.36.0"
 snafu = "0.7.5"
 generated_types = { path = "../generated_types" }
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@ -45,7 +45,7 @@ tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-threa
 tokio-util = "0.7.8"
 tonic = { workspace = true }
 trace = { version = "0.1.0", path = "../trace" }
-uuid = "1.4.0"
+uuid = "1.4.1"
 wal = { version = "0.1.0", path = "../wal" }
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
@ -60,7 +60,7 @@ lazy_static = "1.4.0"
 mutable_batch_lp = { path = "../mutable_batch_lp" }
 object_store = { workspace = true }
 paste = "1.0.14"
-tempfile = "3.6.0"
+tempfile = "3.7.0"
 test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
 tokio = { version = "1.29", features = ["macros", "time", "test-util"] }
--- a/ingester/src/buffer_tree/partition/resolver/cache.rs
+++ b/ingester/src/buffer_tree/partition/resolver/cache.rs
@ -2,9 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
 use async_trait::async_trait;
 use backoff::BackoffConfig;
-use data_types::{
+use data_types::{NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, TableId};
    NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, SequenceNumber, TableId,
 };
 use iox_catalog::interface::Catalog;
 use observability_deps::tracing::debug;
 use parking_lot::Mutex;
@ -222,6 +220,7 @@ mod tests {
    // Harmless in tests - saves a bunch of extra vars.
    #![allow(clippy::await_holding_lock)]
    use data_types::PartitionId;
    use iox_catalog::mem::MemCatalog;
    use super::*;
--- a/ingester/src/buffer_tree/partition/resolver/coalesce.rs
+++ b/ingester/src/buffer_tree/partition/resolver/coalesce.rs
@ -6,7 +6,6 @@ use std::{
    },
 };
 use arrow::compute::kernels::partition;
 use async_trait::async_trait;
 use data_types::{NamespaceId, PartitionKey, TableId};
 use futures::{future::Shared, FutureExt};
@ -25,11 +24,10 @@ use super::PartitionProvider;
 type BoxedResolveFuture =
    Pin<Box<dyn std::future::Future<Output = Arc<Mutex<PartitionData>>> + Send>>;
-/// A compound key of `(namespace, table, partition_key)` which uniquely
+/// A compound key of `(table, partition_key)` which uniquely
 /// identifies a single partition.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 struct Key {
    namespace_id: NamespaceId,
    table_id: TableId,
    partition_key: PartitionKey,
 }
@ -149,7 +147,6 @@ where
        table: Arc<DeferredLoad<TableMetadata>>,
    ) -> Arc<Mutex<PartitionData>> {
        let key = Key {
            namespace_id,
            table_id,
            partition_key: partition_key.clone(), // Ref-counted anyway!
        };
@ -267,12 +264,11 @@ mod tests {
    use assert_matches::assert_matches;
    use futures::Future;
    use futures::{stream::FuturesUnordered, StreamExt};
    use lazy_static::lazy_static;
    use test_helpers::timeout::FutureTimeout;
    use tokio::sync::{Notify, Semaphore};
    use crate::{
-        buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
+        buffer_tree::partition::resolver::mock::MockPartitionProvider,
        test_util::{
            defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
            ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
--- a/ingester/src/buffer_tree/partition/resolver/mod.rs
+++ b/ingester/src/buffer_tree/partition/resolver/mod.rs
@ -2,8 +2,6 @@
 //!
 //! [`PartitionData`]: crate::buffer_tree::partition::PartitionData
 #![allow(unused_imports)] // Transition time only.
 mod cache;
 pub(crate) use cache::*;
--- a/ingester/src/buffer_tree/partition/resolver/trait.rs
+++ b/ingester/src/buffer_tree/partition/resolver/trait.rs
@ -49,11 +49,11 @@ where
 #[cfg(test)]
 mod tests {
-    use std::{sync::Arc, time::Duration};
+    use std::sync::Arc;
    use super::*;
    use crate::{
-        buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
+        buffer_tree::partition::resolver::mock::MockPartitionProvider,
        test_util::{
            defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
            ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
--- a/ingester/src/buffer_tree/root.rs
+++ b/ingester/src/buffer_tree/root.rs
@ -998,12 +998,8 @@ mod tests {
        assert_eq!(m, 1, "tables counter mismatch");
    }
    /// Assert that multiple writes to a single namespace/table results in a
    /// single namespace being created, and matching metrics.
    #[tokio::test]
    async fn test_partition_iter() {
        // Configure the mock partition provider to return a single partition, named
        // p1.
        let partition_provider = Arc::new(
            MockPartitionProvider::default()
                .with_partition(
--- a/ingester_test_ctx/Cargo.toml
+++ b/ingester_test_ctx/Cargo.toml
@ -27,7 +27,7 @@ object_store = { workspace = true }
 observability_deps = { version = "0.1.0", path = "../observability_deps" }
 parquet_file = { version = "0.1.0", path = "../parquet_file" }
 prost = { version = "0.11.9", default-features = false, features = ["std"] }
-tempfile = { version = "3.6.0" }
+tempfile = { version = "3.7.0" }
 test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
 tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
 tokio-util = "0.7.8"
--- a/iox_catalog/Cargo.toml
+++ b/iox_catalog/Cargo.toml
@ -18,7 +18,7 @@ parking_lot = { version = "0.12" }
 serde = { version = "1.0", features = ["derive"] }
 siphasher = "0.3"
 snafu = "0.7"
-sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
+sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
 sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
 thiserror = "1.0.43"
 tokio = { version = "1.29", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
--- a/iox_catalog/migrations/20230714154828_make-kafka-columns-nullable.sql
+++ b/iox_catalog/migrations/20230714154828_make-kafka-columns-nullable.sql
@ -0,0 +1,11 @@
 -- Drop the foreign key constraints referencing the various 
 -- placeholder kafka columns
 ALTER TABLE IF EXISTS namespace DROP CONSTRAINT IF EXISTS namespace_kafka_topic_id_fkey, DROP CONSTRAINT IF EXISTS namespace_query_pool_id_fkey;
 ALTER TABLE IF EXISTS parquet_file DROP CONSTRAINT IF EXISTS parquet_file_sequencer_id_fkey;
 ALTER TABLE IF EXISTS partition DROP CONSTRAINT IF EXISTS partition_sequencer_id_fkey;
 ALTER TABLE IF EXISTS tombstone DROP CONSTRAINT IF EXISTS tombstone_sequencer_id_fkey;
 -- Allow the ID columns in these tables to be nullable
 ALTER TABLE IF EXISTS namespace ALTER COLUMN topic_id DROP NOT NULL, ALTER COLUMN query_pool_id DROP NOT NULL;
 ALTER TABLE IF EXISTS parquet_file ALTER COLUMN shard_id DROP NOT NULL;
 ALTER TABLE IF EXISTS partition ALTER COLUMN shard_id DROP NOT NULL;
 ALTER TABLE IF EXISTS tombstone ALTER COLUMN shard_id DROP NOT NULL;
--- a/iox_catalog/migrations/20230720132400_modify_parquet_file_triggers.sql
+++ b/iox_catalog/migrations/20230720132400_modify_parquet_file_triggers.sql
@ -0,0 +1,13 @@
 -- FUNTION that updates the new_file_at field in the partition table when the update_partition trigger is fired
 -- The field new_file_at signals when the last file was added to the partition for compaction.
 CREATE OR REPLACE FUNCTION update_partition_on_new_file_at()
 RETURNS TRIGGER 
 LANGUAGE PLPGSQL
 AS $$
 BEGIN
    UPDATE partition SET new_file_at = NEW.created_at WHERE id = NEW.partition_id;    
    RETURN NEW;
 END;
 $$;
--- a/iox_catalog/sqlite/migrations/20230720132400_parquet_file_triggers.sql
+++ b/iox_catalog/sqlite/migrations/20230720132400_parquet_file_triggers.sql
@ -0,0 +1,9 @@
 -- update new_file_at for all compactions, not just L0 & L1
 drop trigger update_partition;
 create trigger if not exists update_partition
    after insert
    on parquet_file
    for each row
 begin
    UPDATE partition set new_file_at = NEW.created_at WHERE id = NEW.partition_id;
 end;
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@ -372,12 +372,25 @@ pub trait PartitionRepo: Send + Sync {
    /// get partition by ID
    async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
    /// get multiple partitions by ID.
    ///
    /// the output order is undefined, non-existing partitions are not part of the output.
    async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>>;
    /// get partition by deterministic hash ID
    async fn get_by_hash_id(
        &mut self,
        partition_hash_id: &PartitionHashId,
    ) -> Result<Option<Partition>>;
    /// get partition by deterministic hash ID
    ///
    /// the output order is undefined, non-existing partitions are not part of the output.
    async fn get_by_hash_id_batch(
        &mut self,
        partition_hash_ids: &[&PartitionHashId],
    ) -> Result<Vec<Partition>>;
    /// return the partitions by table id
    async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
@ -1487,6 +1500,8 @@ pub(crate) mod test_helpers {
            .unwrap();
        // partitions can be retrieved easily
        let mut created_sorted = created.values().cloned().collect::<Vec<_>>();
        created_sorted.sort_by_key(|p| p.id);
        assert_eq!(
            other_partition,
            repos
@ -1505,21 +1520,47 @@ pub(crate) mod test_helpers {
                .unwrap()
                .unwrap()
        );
        let non_existing_partition_id = PartitionId::new(i64::MAX);
        let non_existing_partition_hash_id =
            PartitionHashId::new(TableId::new(i64::MAX), &PartitionKey::from("arbitrary"));
        assert!(repos
            .partitions()
-            .get_by_id(PartitionId::new(i64::MAX))
+            .get_by_id(non_existing_partition_id)
            .await
            .unwrap()
            .is_none());
        assert!(repos
            .partitions()
-            .get_by_hash_id(&PartitionHashId::new(
+            .get_by_hash_id(&non_existing_partition_hash_id)
                TableId::new(i64::MAX),
                &PartitionKey::from("arbitrary")
            ))
            .await
            .unwrap()
            .is_none());
        let mut batch = repos
            .partitions()
            .get_by_id_batch(
                created
                    .keys()
                    .cloned()
                    .chain([non_existing_partition_id])
                    .collect(),
            )
            .await
            .unwrap();
        batch.sort_by_key(|p| p.id);
        assert_eq!(created_sorted, batch);
        let mut batch = repos
            .partitions()
            .get_by_hash_id_batch(
                &created
                    .values()
                    .map(|p| p.hash_id().unwrap())
                    .chain([&non_existing_partition_hash_id])
                    .collect::<Vec<_>>(),
            )
            .await
            .unwrap();
        batch.sort_by_key(|p| p.id);
        assert_eq!(created_sorted, batch);
        let listed = repos
            .partitions()
@ -2534,7 +2575,6 @@ pub(crate) mod test_helpers {
        assert!(partitions.is_empty());
        // Add an L2 file created just now for partition three
        // Since the file is L2, the partition won't get updated
        let l2_file_params = ParquetFileParams {
            object_store_id: Uuid::new_v4(),
            created_at: time_now,
@ -2547,16 +2587,17 @@ pub(crate) mod test_helpers {
            .create(l2_file_params.clone())
            .await
            .unwrap();
-        // still should return partition one and two only
+        // now should return partition one two and three
        let mut partitions = repos
            .partitions()
            .partitions_new_file_between(time_two_hour_ago, None)
            .await
            .unwrap();
-        assert_eq!(partitions.len(), 2);
+        assert_eq!(partitions.len(), 3);
        partitions.sort();
        assert_eq!(partitions[0], partition1.id);
        assert_eq!(partitions[1], partition2.id);
        assert_eq!(partitions[2], partition3.id);
        // Only return partition1: the creation time must be strictly less than the maximum time,
        // not equal
        let partitions = repos
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@ -88,6 +88,48 @@ where
    }
 }
 /// Look up multiple partitions in the catalog by either database-assigned ID or deterministic hash ID.
 ///
 /// The output only contains existing partitions, the order is undefined.
 ///
 /// The existence of this function should be temporary; it can be removed once all partition lookup
 /// is happening with only the deterministic hash ID.
 pub async fn partition_lookup_batch<R>(
    repos: &mut R,
    ids: &[&TransitionPartitionId],
 ) -> Result<Vec<Partition>, Error>
 where
    R: RepoCollection + ?Sized,
 {
    let mut partition_ids = Vec::with_capacity(ids.len());
    let mut partition_hash_ids = Vec::with_capacity(ids.len());
    for id in ids {
        match id {
            TransitionPartitionId::Deprecated(partition_id) => {
                partition_ids.push(*partition_id);
            }
            TransitionPartitionId::Deterministic(partition_hash_id) => {
                partition_hash_ids.push(partition_hash_id);
            }
        }
    }
    let mut out = Vec::with_capacity(partition_ids.len() + partition_hash_ids.len());
    if !partition_ids.is_empty() {
        let mut partitions = repos.partitions().get_by_id_batch(partition_ids).await?;
        out.append(&mut partitions);
    }
    if !partition_hash_ids.is_empty() {
        let mut partitions = repos
            .partitions()
            .get_by_hash_id_batch(&partition_hash_ids)
            .await?;
        out.append(&mut partitions);
    }
    Ok(out)
 }
 /// Given an iterator of `(table_name, batch)` to validate, this function
 /// ensures all the columns within `batch` match the existing schema for
 /// `table_name` in `schema`. If the column does not already exist in `schema`,
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@ -586,6 +586,19 @@ impl PartitionRepo for MemTxn {
            .cloned())
    }
    async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
        let lookup = partition_ids.into_iter().collect::<HashSet<_>>();
        let stage = self.stage();
        Ok(stage
            .partitions
            .iter()
            .filter(|p| lookup.contains(&p.id))
            .cloned()
            .collect())
    }
    async fn get_by_hash_id(
        &mut self,
        partition_hash_id: &PartitionHashId,
@ -603,6 +616,26 @@ impl PartitionRepo for MemTxn {
            .cloned())
    }
    async fn get_by_hash_id_batch(
        &mut self,
        partition_hash_ids: &[&PartitionHashId],
    ) -> Result<Vec<Partition>> {
        let lookup = partition_hash_ids.iter().copied().collect::<HashSet<_>>();
        let stage = self.stage();
        Ok(stage
            .partitions
            .iter()
            .filter(|p| {
                p.hash_id()
                    .map(|hash_id| lookup.contains(hash_id))
                    .unwrap_or_default()
            })
            .cloned()
            .collect())
    }
    async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
        let stage = self.stage();
@ -962,23 +995,19 @@ async fn create_parquet_file(
        parquet_file_params,
        ParquetFileId::new(stage.parquet_files.len() as i64 + 1),
    );
    let compaction_level = parquet_file.compaction_level;
    let created_at = parquet_file.created_at;
    let partition_id = parquet_file.partition_id;
    stage.parquet_files.push(parquet_file);
    // Update the new_file_at field its partition to the time of created_at
-    // Only update if the compaction level is not Final which signal more compaction needed
+    let partition = stage
-    if compaction_level < CompactionLevel::Final {
+        .partitions
-        let partition = stage
+        .iter_mut()
-            .partitions
+        .find(|p| p.id == partition_id)
-            .iter_mut()
+        .ok_or(Error::PartitionNotFound {
-            .find(|p| p.id == partition_id)
+            id: TransitionPartitionId::Deprecated(partition_id),
-            .ok_or(Error::PartitionNotFound {
+        })?;
-                id: TransitionPartitionId::Deprecated(partition_id),
+    partition.new_file_at = Some(created_at);
            })?;
        partition.new_file_at = Some(created_at);
    }
    Ok(stage.parquet_files.last().unwrap().clone())
 }
--- a/iox_catalog/src/metrics.rs
+++ b/iox_catalog/src/metrics.rs
@ -171,7 +171,9 @@ decorate!(
    methods = [
        "partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition>;
        "partition_get_by_id" = get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
        "partition_get_by_id_batch" = get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>>;
        "partition_get_by_hash_id" = get_by_hash_id(&mut self, partition_hash_id: &PartitionHashId) -> Result<Option<Partition>>;
        "partition_get_by_hash_id_batch" = get_by_hash_id_batch(&mut self, partition_hash_ids: &[&PartitionHashId]) -> Result<Vec<Partition>>;
        "partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
        "partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>;
        "partition_update_sort_key" = cas_sort_key(&mut self, partition_id: &TransitionPartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>;
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@ -329,9 +329,9 @@ async fn new_raw_pool(
    parsed_dsn: &str,
 ) -> Result<sqlx::Pool<Postgres>, sqlx::Error> {
    // sqlx exposes some options as pool options, while other options are available as connection options.
-    let mut connect_options = PgConnectOptions::from_str(parsed_dsn)?;
+    let connect_options = PgConnectOptions::from_str(parsed_dsn)?
-    // the default is INFO, which is frankly surprising.
+        // the default is INFO, which is frankly surprising.
-    connect_options.log_statements(log::LevelFilter::Trace);
+        .log_statements(log::LevelFilter::Trace);
    let app_name = options.app_name.clone();
    let app_name2 = options.app_name.clone(); // just to log below
@ -816,7 +816,7 @@ RETURNING *;
        .bind(name) // $1
        .bind(partition_template) // $2
        .bind(namespace_id) // $3
-        .fetch_one(&mut tx)
+        .fetch_one(&mut *tx)
        .await
        .map_err(|e| match e {
            sqlx::Error::RowNotFound => Error::TableCreateLimitError {
@ -843,7 +843,8 @@ RETURNING *;
        // columns with an unsupported type.
        for template_part in table.partition_template.parts() {
            if let TemplatePart::TagValue(tag_name) = template_part {
-                insert_column_with_connection(&mut tx, tag_name, table.id, ColumnType::Tag).await?;
+                insert_column_with_connection(&mut *tx, tag_name, table.id, ColumnType::Tag)
                    .await?;
            }
        }
@ -1095,6 +1096,22 @@ WHERE id = $1;
        Ok(Some(partition))
    }
    async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
        let ids: Vec<_> = partition_ids.iter().map(|p| p.get()).collect();
        sqlx::query_as::<_, Partition>(
            r#"
 SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE id = ANY($1);
        "#,
        )
        .bind(&ids[..]) // $1
        .fetch_all(&mut self.inner)
        .await
        .map_err(|e| Error::SqlxError { source: e })
    }
    async fn get_by_hash_id(
        &mut self,
        partition_hash_id: &PartitionHashId,
@ -1119,6 +1136,25 @@ WHERE hash_id = $1;
        Ok(Some(partition))
    }
    async fn get_by_hash_id_batch(
        &mut self,
        partition_ids: &[&PartitionHashId],
    ) -> Result<Vec<Partition>> {
        let ids: Vec<_> = partition_ids.iter().map(|p| p.as_bytes()).collect();
        sqlx::query_as::<_, Partition>(
            r#"
 SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE hash_id = ANY($1);
        "#,
        )
        .bind(&ids[..]) // $1
        .fetch_all(&mut self.inner)
        .await
        .map_err(|e| Error::SqlxError { source: e })
    }
    async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
        sqlx::query_as::<_, Partition>(
            r#"
@ -1538,15 +1574,14 @@ WHERE object_store_id = $1;
    ) -> Result<Vec<Uuid>> {
        sqlx::query(
            // sqlx's readme suggests using PG's ANY operator instead of IN; see link below.
            // https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query
            r#"
 SELECT object_store_id
 FROM parquet_file
 WHERE object_store_id = ANY($1);
             "#,
        )
-        // from https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query
+        .bind(object_store_ids) // $1
        // a bug of the parameter typechecking code requires all array parameters to be slices
        .bind(&object_store_ids[..]) // $1
        .map(|pgr| pgr.get::<Uuid, _>("object_store_id"))
        .fetch_all(&mut self.inner)
        .await
@ -1576,13 +1611,13 @@ WHERE object_store_id = ANY($1);
            .map_err(|e| Error::StartTransaction { source: e })?;
        let marked_at = Timestamp::from(self.time_provider.now());
-        flag_for_delete(&mut tx, delete, marked_at).await?;
+        flag_for_delete(&mut *tx, delete, marked_at).await?;
-        update_compaction_level(&mut tx, upgrade, target_level).await?;
+        update_compaction_level(&mut *tx, upgrade, target_level).await?;
        let mut ids = Vec::with_capacity(create.len());
        for file in create {
-            let id = create_parquet_file(&mut tx, file).await?;
+            let id = create_parquet_file(&mut *tx, file).await?;
            ids.push(id);
        }
@ -1667,12 +1702,9 @@ async fn flag_for_delete<'q, E>(
 where
    E: Executor<'q, Database = Postgres>,
 {
    // If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
    // See https://github.com/launchbadge/sqlx/issues/1744
    let ids: Vec<_> = ids.iter().map(|p| p.get()).collect();
    let query = sqlx::query(r#"UPDATE parquet_file SET to_delete = $1 WHERE id = ANY($2);"#)
        .bind(marked_at) // $1
-        .bind(&ids[..]); // $2
+        .bind(ids); // $2
    query
        .execute(executor)
        .await
@ -1689,9 +1721,6 @@ async fn update_compaction_level<'q, E>(
 where
    E: Executor<'q, Database = Postgres>,
 {
    // If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
    // See https://github.com/launchbadge/sqlx/issues/1744
    let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect();
    let query = sqlx::query(
        r#"
 UPDATE parquet_file
@ -1700,7 +1729,7 @@ WHERE id = ANY($2);
        "#,
    )
    .bind(compaction_level) // $1
-    .bind(&ids[..]); // $2
+    .bind(parquet_file_ids); // $2
    query
        .execute(executor)
        .await
--- a/iox_catalog/src/sqlite.rs
+++ b/iox_catalog/src/sqlite.rs
@ -24,8 +24,8 @@ use data_types::{
    Table, TableId, Timestamp, TransitionPartitionId,
 };
 use serde::{Deserialize, Serialize};
 use std::collections::HashSet;
 use std::{collections::HashMap, fmt::Display};
 use std::{collections::HashSet, fmt::Write};
 use crate::interface::MAX_PARQUET_FILES_SELECTED_ONCE_FOR_DELETE;
 use iox_time::{SystemProvider, TimeProvider};
@ -577,7 +577,7 @@ RETURNING *;
        .bind(name) // $1
        .bind(partition_template) // $2
        .bind(namespace_id) // $3
-        .fetch_one(&mut tx)
+        .fetch_one(&mut *tx)
        .await
        .map_err(|e| match e {
            sqlx::Error::RowNotFound => Error::TableCreateLimitError {
@ -604,7 +604,8 @@ RETURNING *;
        // columns with an unsupported type.
        for template_part in table.partition_template.parts() {
            if let TemplatePart::TagValue(tag_name) = template_part {
-                insert_column_with_connection(&mut tx, tag_name, table.id, ColumnType::Tag).await?;
+                insert_column_with_connection(&mut *tx, tag_name, table.id, ColumnType::Tag)
                    .await?;
            }
        }
@ -891,6 +892,24 @@ WHERE id = $1;
        Ok(Some(partition.into()))
    }
    async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
        // We use a JSON-based "IS IN" check.
        let ids: Vec<_> = partition_ids.iter().map(|p| p.get()).collect();
        sqlx::query_as::<_, PartitionPod>(
            r#"
 SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE id IN (SELECT value FROM json_each($1));
            "#,
        )
        .bind(Json(&ids[..])) // $1
        .fetch_all(self.inner.get_mut())
        .await
        .map(|vals| vals.into_iter().map(Partition::from).collect())
        .map_err(|e| Error::SqlxError { source: e })
    }
    async fn get_by_hash_id(
        &mut self,
        partition_hash_id: &PartitionHashId,
@ -915,6 +934,38 @@ WHERE hash_id = $1;
        Ok(Some(partition.into()))
    }
    async fn get_by_hash_id_batch(
        &mut self,
        partition_hash_ids: &[&PartitionHashId],
    ) -> Result<Vec<Partition>> {
        // We use a JSON-based "IS IN" check.
        let ids: Vec<_> = partition_hash_ids
            .iter()
            .map(|id| {
                // convert partiion hash ID to uppercase hex string
                let bytes = id.as_bytes();
                let mut s = String::with_capacity(bytes.len() * 2);
                for b in bytes {
                    write!(&mut s, "{:02X}", b).expect("never fails");
                }
                s
            })
            .collect();
        sqlx::query_as::<_, PartitionPod>(
            r#"
 SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
 FROM partition
 WHERE hex(hash_id) IN (SELECT value FROM json_each($1));
            "#,
        )
        .bind(Json(&ids[..])) // $1
        .fetch_all(self.inner.get_mut())
        .await
        .map(|vals| vals.into_iter().map(Partition::from).collect())
        .map_err(|e| Error::SqlxError { source: e })
    }
    async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
        Ok(sqlx::query_as::<_, PartitionPod>(
            r#"
@ -1451,14 +1502,14 @@ WHERE object_store_id IN ({v});",
        for id in delete {
            let marked_at = Timestamp::from(self.time_provider.now());
-            flag_for_delete(&mut tx, *id, marked_at).await?;
+            flag_for_delete(&mut *tx, *id, marked_at).await?;
        }
-        update_compaction_level(&mut tx, upgrade, target_level).await?;
+        update_compaction_level(&mut *tx, upgrade, target_level).await?;
        let mut ids = Vec::with_capacity(create.len());
        for file in create {
-            let res = create_parquet_file(&mut tx, file.clone()).await?;
+            let res = create_parquet_file(&mut *tx, file.clone()).await?;
            ids.push(res.id);
        }
        tx.commit()
@ -1562,8 +1613,7 @@ async fn update_compaction_level<'q, E>(
 where
    E: Executor<'q, Database = Sqlite>,
 {
-    // If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
+    // We use a JSON-based "IS IN" check.
    // See https://github.com/launchbadge/sqlx/issues/1744
    let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect();
    let query = sqlx::query(
        r#"
--- a/iox_query/src/exec.rs
+++ b/iox_query/src/exec.rs
@ -6,7 +6,7 @@ pub mod field;
 pub mod fieldlist;
 pub mod gapfill;
 mod non_null_checker;
-mod query_tracing;
+pub mod query_tracing;
 mod schema_pivot;
 pub mod seriesset;
 pub(crate) mod split;
--- a/iox_query/src/exec/context.rs
+++ b/iox_query/src/exec/context.rs
@ -648,7 +648,7 @@ impl IOxSessionContext {
        exec.spawn(fut).await.unwrap_or_else(|e| {
            Err(Error::Context(
                "Join Error".to_string(),
-                Box::new(Error::External(e.into())),
+                Box::new(Error::External(Box::new(e))),
            ))
        })
    }
--- a/iox_query/src/exec/cross_rt_stream.rs
+++ b/iox_query/src/exec/cross_rt_stream.rs
@ -74,11 +74,11 @@ where
    /// Create new stream based on an existing stream that transports [`Result`]s.
    ///
    /// Also receives an executor that actually executes the underlying stream as well as a converter that convets
-    /// [`executor::Error`] to the error type of the stream (so we can send potential crashes/panics).
+    /// [`executor::JobError`] to the error type of the stream (so we can send potential crashes/panics).
    fn new_with_error_stream<S, C>(stream: S, exec: DedicatedExecutor, converter: C) -> Self
    where
        S: Stream<Item = Result<X, E>> + Send + 'static,
-        C: Fn(executor::Error) -> E + Send + 'static,
+        C: Fn(executor::JobError) -> E + Send + 'static,
    {
        Self::new_with_tx(|tx| {
            // future to be run in the other runtime
@ -177,7 +177,7 @@ mod tests {
        let barrier1_captured = Arc::clone(&barrier1);
        let barrier2 = Arc::new(tokio::sync::Barrier::new(2));
        let barrier2_captured = Arc::clone(&barrier2);
-        let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
+        let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
            futures::stream::once(async move {
                barrier1_captured.wait().await;
                barrier2_captured.wait().await;
@ -195,7 +195,7 @@ mod tests {
        barrier2.wait().await;
        let res = f.await.expect("streamed data");
-        assert_eq!(res, Ok(1));
+        assert_eq!(res.unwrap(), 1);
    }
    #[tokio::test]
@ -212,7 +212,7 @@ mod tests {
        let barrier1_captured = Arc::clone(&barrier1);
        let barrier2 = Arc::new(std::sync::Barrier::new(2));
        let barrier2_captured = Arc::clone(&barrier2);
-        let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
+        let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
            futures::stream::once(async move {
                barrier1_captured.wait();
                barrier2_captured.wait();
@ -230,13 +230,13 @@ mod tests {
        barrier2.wait();
        let res = f.await.expect("streamed data");
-        assert_eq!(res, Ok(1));
+        assert_eq!(res.unwrap(), 1);
    }
    #[tokio::test]
    async fn test_panic() {
        let exec = DedicatedExecutor::new_testing();
-        let mut stream = CrossRtStream::<Result<(), executor::Error>>::new_with_error_stream(
+        let mut stream = CrossRtStream::<Result<(), executor::JobError>>::new_with_error_stream(
            futures::stream::once(async { panic!("foo") }),
            exec,
            std::convert::identity,
@ -247,7 +247,7 @@ mod tests {
            .await
            .expect("stream not finished")
            .unwrap_err();
-        assert_eq!(e.to_string(), "foo");
+        assert_eq!(e.to_string(), "Panic: foo");
        let none = stream.next().await;
        assert!(none.is_none());
@ -260,7 +260,7 @@ mod tests {
        let barrier1_captured = Arc::clone(&barrier1);
        let barrier2 = Arc::new(tokio::sync::Barrier::new(2));
        let barrier2_captured = Arc::clone(&barrier2);
-        let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
+        let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
            futures::stream::once(async move {
                barrier1_captured.wait().await;
                barrier2_captured.wait().await;
@ -281,7 +281,7 @@ mod tests {
        barrier2.wait().await;
        let res = stream.next().await.expect("streamed data");
-        assert_eq!(res, Ok(1));
+        assert_eq!(res.unwrap(), 1);
    }
    #[tokio::test]
@ -289,7 +289,7 @@ mod tests {
        let exec = DedicatedExecutor::new_testing();
        let barrier = Arc::new(tokio::sync::Barrier::new(2));
        let barrier_captured = Arc::clone(&barrier);
-        let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
+        let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
            futures::stream::once(async move {
                barrier_captured.wait().await;
--- a/iox_query/src/exec/query_tracing.rs
+++ b/iox_query/src/exec/query_tracing.rs
@ -109,7 +109,7 @@ impl Drop for TracedStream {
 /// 1. If the ExecutionPlan had no metrics
 /// 2. The total number of rows produced by the ExecutionPlan (if available)
 /// 3. The elapsed compute time taken by the ExecutionPlan
-fn send_metrics_to_tracing(
+pub fn send_metrics_to_tracing(
    default_end_time: DateTime<Utc>,
    parent_span: &Span,
    physical_plan: &dyn ExecutionPlan,
--- a/iox_query/src/logical_optimizer/handle_gapfill.rs
+++ b/iox_query/src/logical_optimizer/handle_gapfill.rs
@ -8,7 +8,7 @@ use datafusion::{
    common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter, VisitRecursion},
    error::{DataFusionError, Result},
    logical_expr::{
-        expr::{ScalarFunction, ScalarUDF},
+        expr::{Alias, ScalarFunction, ScalarUDF},
        utils::expr_to_columns,
        Aggregate, BuiltinScalarFunction, Extension, LogicalPlan, Projection,
    },
@ -293,13 +293,26 @@ fn replace_date_bin_gapfill(group_expr: &[Expr]) -> Result<Option<RewriteInfo>>
        })?;
    match date_bin_gapfill_count {
        0 => return Ok(None),
-        2.. => {
+        1 => {
            // Make sure that the call to DATE_BIN_GAPFILL is root expression
            // excluding aliases.
            let dbg_idx = dbg_idx.expect("should have found exactly one call");
            if !matches_udf(
                unwrap_alias(&group_expr[dbg_idx]),
                DATE_BIN_GAPFILL_UDF_NAME,
            ) {
                return Err(DataFusionError::Plan(
                    "DATE_BIN_GAPFILL must a top-level expression in the GROUP BY clause when gap filling. It cannot be part of another expression or cast".to_string(),
                ));
            }
        }
        _ => {
            return Err(DataFusionError::Plan(
                "DATE_BIN_GAPFILL specified more than once".to_string(),
            ))
        }
        _ => (),
    }
    let date_bin_gapfill_index = dbg_idx.expect("should be found exactly one call");
    let mut rewriter = DateBinGapfillRewriter { args: None };
@ -323,6 +336,15 @@ fn replace_date_bin_gapfill(group_expr: &[Expr]) -> Result<Option<RewriteInfo>>
    }))
 }
 fn unwrap_alias(mut e: &Expr) -> &Expr {
    loop {
        match e {
            Expr::Alias(Alias { expr, .. }) => e = expr.as_ref(),
            e => break e,
        }
    }
 }
 struct DateBinGapfillRewriter {
    args: Option<Vec<Expr>>,
 }
@ -486,17 +508,21 @@ impl FillFnRewriter {
 fn count_udf(e: &Expr, name: &str) -> Result<usize> {
    let mut count = 0;
    e.apply(&mut |expr| {
-        match expr {
+        if matches_udf(expr, name) {
-            Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == name => {
+            count += 1;
-                count += 1;
+        }
            }
            _ => (),
        };
        Ok(VisitRecursion::Continue)
    })?;
    Ok(count)
 }
 fn matches_udf(e: &Expr, name: &str) -> bool {
    matches!(
        e,
        Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == name
    )
 }
 fn check_node(node: &LogicalPlan) -> Result<()> {
    node.expressions().iter().try_for_each(|expr| {
        let dbg_count = count_udf(expr, DATE_BIN_GAPFILL_UDF_NAME)?;
--- a/iox_query/src/logical_optimizer/handle_gapfill/range_predicate.rs
+++ b/iox_query/src/logical_optimizer/handle_gapfill/range_predicate.rs
@ -7,11 +7,13 @@ use datafusion::{
        DFSchema,
    },
    error::Result,
-    logical_expr::{expr::Alias, Between, BinaryExpr, LogicalPlan, Operator},
+    logical_expr::{Between, BinaryExpr, LogicalPlan, Operator},
    optimizer::utils::split_conjunction,
    prelude::{Column, Expr},
 };
 use super::unwrap_alias;
 /// Given a plan and a column, finds the predicates that use that column
 /// and return a range with expressions for upper and lower bounds.
 pub fn find_time_range(plan: &LogicalPlan, time_col: &Column) -> Result<Range<Bound<Expr>>> {
@ -65,6 +67,12 @@ impl TreeNodeVisitor for TimeRangeVisitor {
                self.range = range;
                Ok(VisitRecursion::Continue)
            }
            LogicalPlan::SubqueryAlias(_) => {
                // The nodes below this one refer to the column with a different table name,
                // just unset the relation so we match on the column name.
                self.col.relation = None;
                Ok(VisitRecursion::Continue)
            }
            // These nodes do not alter their schema, so we can recurse through them
            LogicalPlan::Sort(_)
            | LogicalPlan::Repartition(_)
@ -76,15 +84,6 @@ impl TreeNodeVisitor for TimeRangeVisitor {
    }
 }
 fn unwrap_alias(mut e: &Expr) -> &Expr {
    loop {
        match e {
            Expr::Alias(Alias { expr, .. }) => e = expr.as_ref(),
            e => break e,
        }
    }
 }
 /// Encapsulates the upper and lower bounds of a time column
 /// in a logical plan.
 #[derive(Clone)]
--- a/iox_query_influxql/src/aggregate/percentile.rs
+++ b/iox_query_influxql/src/aggregate/percentile.rs
@ -39,7 +39,7 @@ pub(super) fn accumulator(dt: &DataType) -> Result<Box<dyn Accumulator>> {
 /// Calculate the intermediate merge state for the aggregator.
 pub(super) fn state_type(dt: &DataType) -> Result<Arc<Vec<DataType>>> {
    Ok(Arc::new(vec![
-        DataType::List(Arc::new(Field::new("state", dt.clone(), false))),
+        DataType::List(Arc::new(Field::new("item", dt.clone(), true))),
        DataType::Float64,
    ]))
 }
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@ -9,18 +9,18 @@ use crate::plan::planner::select::{
 };
 use crate::plan::planner_time_range_expression::time_range_to_df_expr;
 use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
-use crate::plan::udaf::{
+use crate::plan::udaf::MOVING_AVERAGE;
    derivative_udf, non_negative_derivative_udf, DIFFERENCE, MOVING_AVERAGE,
    NON_NEGATIVE_DIFFERENCE,
 };
 use crate::plan::udf::{
-    derivative, difference, find_window_udfs, moving_average, non_negative_derivative,
+    cumulative_sum, derivative, difference, find_window_udfs, moving_average,
-    non_negative_difference,
+    non_negative_derivative, non_negative_difference,
 };
-use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
+use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, IQLSchema};
 use crate::plan::var_ref::var_ref_data_type_to_data_type;
 use crate::plan::{planner_rewrite_expression, udf, util_copy};
-use crate::window::PERCENT_ROW_NUMBER;
+use crate::window::{
    CUMULATIVE_SUM, DERIVATIVE, DIFFERENCE, NON_NEGATIVE_DERIVATIVE, NON_NEGATIVE_DIFFERENCE,
    PERCENT_ROW_NUMBER,
 };
 use arrow::array::{StringBuilder, StringDictionaryBuilder};
 use arrow::datatypes::{DataType, Field as ArrowField, Int32Type, Schema as ArrowSchema};
 use arrow::record_batch::RecordBatch;
@ -94,7 +94,6 @@ use std::ops::{Bound, ControlFlow, Deref, Not, Range};
 use std::str::FromStr;
 use std::sync::Arc;
 use super::ir::DataSourceSchema;
 use super::parse_regex;
 use super::util::contains_expr;
 use super::util_copy::clone_with_replacement;
@ -712,16 +711,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                continue;
            };
-            let schemas = Schemas::new(plan.schema())?;
+            let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
            let ds_schema = ds.schema(self.s)?;
            let plan = self.plan_condition_time_range(
                ctx.condition,
                ctx.extended_time_range(),
                plan,
-                &schemas,
+                &schema,
                &ds_schema,
            )?;
-            plans.push((plan, ds_schema));
+            plans.push((plan, schema));
        }
        Ok(match plans.len() {
@ -797,10 +794,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
    /// Plan "Raw" SELECT queriers, These are queries that have no grouping
    /// and call only scalar functions.
    fn project_select_raw(&self, input: LogicalPlan, fields: &[Field]) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
-        let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
+        let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
        // Wrap the plan in a `LogicalPlan::Projection` from the select expressions
        project(input, select_exprs)
@ -813,10 +810,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        input: LogicalPlan,
        fields: &[Field],
    ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
-        let mut select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
+        let mut select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
        // This is a special case, where exactly one column can be projected with a `DISTINCT`
        // clause or the `distinct` function.
@ -850,10 +847,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        fields: &[Field],
        group_by_tag_set: &[&str],
    ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
-        let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
+        let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
        let (plan, select_exprs) =
            self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?;
@ -871,10 +868,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        fields: &[Field],
        group_by_tag_set: &[&str],
    ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
-        let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
+        let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
        let (plan, select_exprs) =
            self.select_window(ctx, input, select_exprs, group_by_tag_set)?;
@ -909,10 +906,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        fields: &[Field],
        group_by_tag_set: &[&str],
    ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
-        let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
+        let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
        let (plan, select_exprs) =
            self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?;
@ -953,7 +950,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        fields: &[Field],
        group_by_tag_set: &[&str],
    ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
        let (selector_index, field_key, plan) = match Selector::find_enumerated(fields)? {
            (_, Selector::First { .. })
@ -1027,7 +1024,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        });
        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
-        let select_exprs = self.field_list_to_exprs(&plan, fields_vec.as_slice(), &schemas)?;
+        let select_exprs = self.field_list_to_exprs(&plan, fields_vec.as_slice(), &schema)?;
        // Wrap the plan in a `LogicalPlan::Projection` from the select expressions
        project(plan, select_exprs)
@ -1043,7 +1040,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        fields: &[Field],
        group_by_tag_set: &[&str],
    ) -> Result<LogicalPlan> {
-        let schemas = Schemas::new(input.schema())?;
+        let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
        let (selector_index, is_bottom, field_key, tag_keys, narg) =
            match Selector::find_enumerated(fields)? {
@ -1098,7 +1095,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        }
        // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
-        let select_exprs = self.field_list_to_exprs(&input, fields_vec.as_slice(), &schemas)?;
+        let select_exprs = self.field_list_to_exprs(&input, fields_vec.as_slice(), &schema)?;
        let plan = if !tag_keys.is_empty() {
            self.select_first(ctx, input, order_by, internal_group_by.as_slice(), 1)?
@ -1326,18 +1323,25 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            _ => None,
        };
        // Some aggregates, such as COUNT, should be filled with zero by default
        // rather than NULL.
        let should_zero_fill_expr = fields
            .iter()
            .map(is_zero_filled_aggregate_field)
            .collect::<Vec<_>>();
        // Rewrite the aggregate columns from the projection, so that the expressions
        // refer to the columns from the aggregate projection
        let select_exprs_post_aggr = select_exprs
            .iter()
-            .zip(should_fill_expr)
+            .zip(should_fill_expr.iter().zip(should_zero_fill_expr))
-            .map(|(expr, should_fill)| {
+            .map(|(expr, (should_fill, should_zero_fill))| {
                // This implements the `FILL(<value>)` strategy, by coalescing any aggregate
                // expressions to `<value>` when they are `NULL`.
-                let fill_if_null = if fill_if_null.is_some() && should_fill {
+                let fill_if_null = match (fill_if_null, should_fill, should_zero_fill) {
-                    fill_if_null
+                    (Some(_), true, _) => fill_if_null,
-                } else {
+                    (None, true, true) => Some(0.into()),
-                    None
+                    _ => None,
                };
                rebase_expr(expr, &aggr_projection_exprs, &fill_if_null, &plan)
@ -1450,17 +1454,17 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            return error::internal(format!("udf_to_expr: unexpected expression: {e}"))
        };
-        fn derivative_unit(ctx: &Context<'_>, args: &Vec<Expr>) -> Result<i64> {
+        fn derivative_unit(ctx: &Context<'_>, args: &Vec<Expr>) -> Result<ScalarValue> {
            if args.len() > 1 {
-                if let Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(v))) = args[1] {
+                if let Expr::Literal(v) = &args[1] {
-                    Ok(v as i64)
+                    Ok(v.clone())
                } else {
                    error::internal(format!("udf_to_expr: unexpected expression: {}", args[1]))
                }
            } else if let Some(interval) = ctx.interval {
-                Ok(interval.duration)
+                Ok(ScalarValue::new_interval_mdn(0, 0, interval.duration))
            } else {
-                Ok(1000000000) // 1s
+                Ok(ScalarValue::new_interval_mdn(0, 0, 1_000_000_000)) // 1s
            }
        }
@ -1478,63 +1482,77 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            })
            .alias(alias)),
            Some(udf::WindowFunction::Difference) => Ok(Expr::WindowFunction(WindowFunction {
-                fun: window_function::WindowFunction::AggregateUDF(DIFFERENCE.clone()),
+                fun: DIFFERENCE.clone(),
                args,
                partition_by,
                order_by,
                window_frame: WindowFrame {
                    units: WindowFrameUnits::Rows,
                    start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
-                    end_bound: WindowFrameBound::CurrentRow,
+                    end_bound: WindowFrameBound::Following(ScalarValue::Null),
                },
            })
            .alias(alias)),
            Some(udf::WindowFunction::NonNegativeDifference) => {
                Ok(Expr::WindowFunction(WindowFunction {
-                    fun: window_function::WindowFunction::AggregateUDF(
+                    fun: NON_NEGATIVE_DIFFERENCE.clone(),
                        NON_NEGATIVE_DIFFERENCE.clone(),
                    ),
                    args,
                    partition_by,
                    order_by,
                    window_frame: WindowFrame {
                        units: WindowFrameUnits::Rows,
                        start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
-                        end_bound: WindowFrameBound::CurrentRow,
+                        end_bound: WindowFrameBound::Following(ScalarValue::Null),
                    },
                })
                .alias(alias))
            }
            Some(udf::WindowFunction::Derivative) => Ok(Expr::WindowFunction(WindowFunction {
-                fun: window_function::WindowFunction::AggregateUDF(
+                fun: DERIVATIVE.clone(),
-                    derivative_udf(derivative_unit(ctx, &args)?).into(),
+                args: vec![
-                ),
+                    args[0].clone(),
-                args: vec!["time".as_expr(), args[0].clone()],
+                    lit(derivative_unit(ctx, &args)?),
                    "time".as_expr(),
                ],
                partition_by,
                order_by,
                window_frame: WindowFrame {
                    units: WindowFrameUnits::Rows,
                    start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
-                    end_bound: WindowFrameBound::CurrentRow,
+                    end_bound: WindowFrameBound::Following(ScalarValue::Null),
                },
            })
            .alias(alias)),
            Some(udf::WindowFunction::NonNegativeDerivative) => {
                Ok(Expr::WindowFunction(WindowFunction {
-                    fun: window_function::WindowFunction::AggregateUDF(
+                    fun: NON_NEGATIVE_DERIVATIVE.clone(),
-                        non_negative_derivative_udf(derivative_unit(ctx, &args)?).into(),
+                    args: vec![
-                    ),
+                        args[0].clone(),
-                    args: vec!["time".as_expr(), args[0].clone()],
+                        lit(derivative_unit(ctx, &args)?),
                        "time".as_expr(),
                    ],
                    partition_by,
                    order_by,
                    window_frame: WindowFrame {
                        units: WindowFrameUnits::Rows,
                        start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
-                        end_bound: WindowFrameBound::CurrentRow,
+                        end_bound: WindowFrameBound::Following(ScalarValue::Null),
                    },
                })
                .alias(alias))
            }
            Some(udf::WindowFunction::CumulativeSum) => Ok(Expr::WindowFunction(WindowFunction {
                fun: CUMULATIVE_SUM.clone(),
                args,
                partition_by,
                order_by,
                window_frame: WindowFrame {
                    units: WindowFrameUnits::Rows,
                    start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
                    end_bound: WindowFrameBound::Following(ScalarValue::Null),
                },
            })
            .alias(alias)),
            None => error::internal(format!(
                "unexpected user-defined window function: {}",
                fun.name
@ -1688,7 +1706,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        &self,
        plan: &LogicalPlan,
        fields: &[Field],
-        schemas: &Schemas,
+        schema: &IQLSchema<'_>,
    ) -> Result<Vec<Expr>> {
        let mut names: HashMap<&str, usize> = HashMap::new();
        fields
@ -1708,7 +1726,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                };
                new_field
            })
-            .map(|field| self.field_to_df_expr(&field, plan, schemas))
+            .map(|field| self.field_to_df_expr(&field, plan, schema))
            .collect()
    }
@ -1719,10 +1737,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        &self,
        field: &Field,
        plan: &LogicalPlan,
-        schemas: &Schemas,
+        schema: &IQLSchema<'_>,
    ) -> Result<Expr> {
-        let expr = self.expr_to_df_expr(ExprScope::Projection, &field.expr, schemas)?;
+        let expr = self.expr_to_df_expr(ExprScope::Projection, &field.expr, schema)?;
-        let expr = planner_rewrite_expression::rewrite_field_expr(expr, schemas)?;
+        let expr = planner_rewrite_expression::rewrite_field_expr(expr, schema)?;
        normalize_col(expr.alias(&field.name), plan)
    }
@ -1730,16 +1748,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
    fn conditional_to_df_expr(
        &self,
        iql: &ConditionalExpression,
-        schemas: &Schemas,
+        schema: &IQLSchema<'_>,
    ) -> Result<Expr> {
        match iql {
            ConditionalExpression::Expr(expr) => {
-                self.expr_to_df_expr(ExprScope::Where, expr, schemas)
+                self.expr_to_df_expr(ExprScope::Where, expr, schema)
            }
-            ConditionalExpression::Binary(expr) => {
+            ConditionalExpression::Binary(expr) => self.binary_conditional_to_df_expr(expr, schema),
-                self.binary_conditional_to_df_expr(expr, schemas)
+            ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(e, schema),
            }
            ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(e, schemas),
        }
    }
@ -1747,20 +1763,25 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
    fn binary_conditional_to_df_expr(
        &self,
        expr: &ConditionalBinary,
-        schemas: &Schemas,
+        schema: &IQLSchema<'_>,
    ) -> Result<Expr> {
        let ConditionalBinary { lhs, op, rhs } = expr;
        Ok(binary_expr(
-            self.conditional_to_df_expr(lhs, schemas)?,
+            self.conditional_to_df_expr(lhs, schema)?,
            conditional_op_to_operator(*op)?,
-            self.conditional_to_df_expr(rhs, schemas)?,
+            self.conditional_to_df_expr(rhs, schema)?,
        ))
    }
    /// Map an InfluxQL [`IQLExpr`] to a DataFusion [`Expr`].
-    fn expr_to_df_expr(&self, scope: ExprScope, iql: &IQLExpr, schemas: &Schemas) -> Result<Expr> {
+    fn expr_to_df_expr(
-        let schema = &schemas.df_schema;
+        &self,
        scope: ExprScope,
        iql: &IQLExpr,
        schema: &IQLSchema<'_>,
    ) -> Result<Expr> {
        let df_schema = &schema.df_schema;
        match iql {
            // rewriter is expected to expand wildcard expressions
            IQLExpr::Wildcard(_) => error::internal("unexpected wildcard in projection"),
@ -1777,7 +1798,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                        "time".as_expr()
                    }
                    (ExprScope::Projection, "time") => "time".as_expr(),
-                    (_, name) => match schema
+                    (_, name) => match df_schema
                        .fields_with_unqualified_name(name)
                        .first()
                        .map(|f| f.data_type().clone())
@ -1801,7 +1822,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                                        // and it is safe to unconditionally unwrap, as the
                                        // `is_numeric_type` call guarantees it can be mapped to
                                        // an Arrow DataType
-                                        column.cast_to(&dst_type, &schemas.df_schema)?
+                                        column.cast_to(&dst_type, &schema.df_schema)?
                                    } else {
                                        // If the cast is incompatible, evaluates to NULL
                                        Expr::Literal(ScalarValue::Null)
@ -1839,9 +1860,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            },
            // A DISTINCT <ident> clause should have been replaced by `rewrite_statement`.
            IQLExpr::Distinct(_) => error::internal("distinct expression"),
-            IQLExpr::Call(call) => self.call_to_df_expr(scope, call, schemas),
+            IQLExpr::Call(call) => self.call_to_df_expr(scope, call, schema),
-            IQLExpr::Binary(expr) => self.arithmetic_expr_to_df_expr(scope, expr, schemas),
+            IQLExpr::Binary(expr) => self.arithmetic_expr_to_df_expr(scope, expr, schema),
-            IQLExpr::Nested(e) => self.expr_to_df_expr(scope, e, schemas),
+            IQLExpr::Nested(e) => self.expr_to_df_expr(scope, e, schema),
        }
    }
@ -1861,9 +1882,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
    /// > * <https://github.com/influxdata/influxdb_iox/issues/6939>
    ///
    /// [docs]: https://docs.influxdata.com/influxdb/v1.8/query_language/functions/
-    fn call_to_df_expr(&self, scope: ExprScope, call: &Call, schemas: &Schemas) -> Result<Expr> {
+    fn call_to_df_expr(
        &self,
        scope: ExprScope,
        call: &Call,
        schema: &IQLSchema<'_>,
    ) -> Result<Expr> {
        if is_scalar_math_function(call.name.as_str()) {
-            return self.scalar_math_func_to_df_expr(scope, call, schemas);
+            return self.scalar_math_func_to_df_expr(scope, call, schema);
        }
        match scope {
@ -1875,7 +1901,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                    error::query(format!("invalid function call in condition: {name}"))
                }
            }
-            ExprScope::Projection => self.function_to_df_expr(scope, call, schemas),
+            ExprScope::Projection => self.function_to_df_expr(scope, call, schema),
        }
    }
@ -1883,7 +1909,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        &self,
        scope: ExprScope,
        call: &Call,
-        schemas: &Schemas,
+        schema: &IQLSchema<'_>,
    ) -> Result<Expr> {
        fn check_arg_count(name: &str, args: &[IQLExpr], count: usize) -> Result<()> {
            let got = args.len();
@ -1918,13 +1944,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            // The DISTINCT function is handled as a `ProjectionType::RawDistinct`
            // query, so the planner only needs to project the single column
            // argument.
-            "distinct" => self.expr_to_df_expr(scope, &args[0], schemas),
+            "distinct" => self.expr_to_df_expr(scope, &args[0], schema),
            "count" => {
                let (expr, distinct) = match &args[0] {
                    IQLExpr::Call(c) if c.name == "distinct" => {
-                        (self.expr_to_df_expr(scope, &c.args[0], schemas)?, true)
+                        (self.expr_to_df_expr(scope, &c.args[0], schema)?, true)
                    }
-                    expr => (self.expr_to_df_expr(scope, expr, schemas)?, false),
+                    expr => (self.expr_to_df_expr(scope, expr, schema)?, false),
                };
                if let Expr::Literal(ScalarValue::Null) = expr {
                    return Ok(expr);
@ -1940,7 +1966,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                )))
            }
            "sum" | "stddev" | "mean" | "median" => {
-                let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = expr {
                    return Ok(expr);
                }
@ -1955,13 +1981,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                )))
            }
            "percentile" => {
-                let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = expr {
                    return Ok(expr);
                }
                check_arg_count(name, args, 2)?;
-                let nexpr = self.expr_to_df_expr(scope, &args[1], schemas)?;
+                let nexpr = self.expr_to_df_expr(scope, &args[1], schema)?;
                Ok(Expr::AggregateUDF(expr::AggregateUDF::new(
                    PERCENTILE.clone(),
                    vec![expr, nexpr],
@ -1970,7 +1996,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                )))
            }
            name @ ("first" | "last" | "min" | "max") => {
-                let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = expr {
                    return Ok(expr);
                }
@ -1993,7 +2019,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                check_arg_count(name, args, 1)?;
                // arg0 should be a column or function
-                let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = arg0 {
                    return Ok(arg0);
                }
@ -2004,7 +2030,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                check_arg_count(name, args, 1)?;
                // arg0 should be a column or function
-                let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = arg0 {
                    return Ok(arg0);
                }
@ -2015,14 +2041,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                check_arg_count(name, args, 2)?;
                // arg0 should be a column or function
-                let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = arg0 {
                    return Ok(arg0);
                }
                // arg1 should be an integer.
                let arg1 = ScalarValue::Int64(Some(
-                    match self.expr_to_df_expr(scope, &args[1], schemas)? {
+                    match self.expr_to_df_expr(scope, &args[1], schema)? {
                        Expr::Literal(ScalarValue::Int64(Some(v))) => v,
                        Expr::Literal(ScalarValue::UInt64(Some(v))) => v as i64,
                        _ => {
@ -2039,13 +2065,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                check_arg_count_range(name, args, 1, 2)?;
                // arg0 should be a column or function
-                let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = arg0 {
                    return Ok(arg0);
                }
                let mut eargs = vec![arg0];
                if args.len() > 1 {
-                    let arg1 = self.expr_to_df_expr(scope, &args[1], schemas)?;
+                    let arg1 = self.expr_to_df_expr(scope, &args[1], schema)?;
                    eargs.push(arg1);
                }
@ -2055,22 +2081,33 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                check_arg_count_range(name, args, 1, 2)?;
                // arg0 should be a column or function
-                let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
+                let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = arg0 {
                    return Ok(arg0);
                }
                let mut eargs = vec![arg0];
                if args.len() > 1 {
-                    let arg1 = self.expr_to_df_expr(scope, &args[1], schemas)?;
+                    let arg1 = self.expr_to_df_expr(scope, &args[1], schema)?;
                    eargs.push(arg1);
                }
                Ok(non_negative_derivative(eargs))
            }
            "cumulative_sum" => {
                check_arg_count(name, args, 1)?;
                // arg0 should be a column or function
                let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
                if let Expr::Literal(ScalarValue::Null) = arg0 {
                    return Ok(arg0);
                }
                Ok(cumulative_sum(vec![arg0]))
            }
            // The TOP/BOTTOM function is handled as a `ProjectionType::TopBottomSelector`
            // query, so the planner only needs to project the single column
            // argument.
-            "top" | "bottom" => self.expr_to_df_expr(scope, &args[0], schemas),
+            "top" | "bottom" => self.expr_to_df_expr(scope, &args[0], schema),
            _ => error::query(format!("Invalid function '{name}'")),
        }
@ -2081,12 +2118,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        &self,
        scope: ExprScope,
        call: &Call,
-        schemas: &Schemas,
+        schema: &IQLSchema<'a>,
    ) -> Result<Expr> {
        let args = call
            .args
            .iter()
-            .map(|e| self.expr_to_df_expr(scope, e, schemas))
+            .map(|e| self.expr_to_df_expr(scope, e, schema))
            .collect::<Result<Vec<Expr>>>()?;
        match BuiltinScalarFunction::from_str(call.name.as_str())? {
@ -2109,12 +2146,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        &self,
        scope: ExprScope,
        expr: &Binary,
-        schemas: &Schemas,
+        schema: &IQLSchema<'_>,
    ) -> Result<Expr> {
        Ok(binary_expr(
-            self.expr_to_df_expr(scope, &expr.lhs, schemas)?,
+            self.expr_to_df_expr(scope, &expr.lhs, schema)?,
            binary_operator_to_df_operator(expr.op),
-            self.expr_to_df_expr(scope, &expr.rhs, schemas)?,
+            self.expr_to_df_expr(scope, &expr.rhs, schema)?,
        ))
    }
@ -2123,17 +2160,15 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        condition: Option<&ConditionalExpression>,
        time_range: TimeRange,
        plan: LogicalPlan,
-        schemas: &Schemas,
+        schema: &IQLSchema<'a>,
        ds_schema: &DataSourceSchema<'_>,
    ) -> Result<LogicalPlan> {
        let filter_expr = condition
            .map(|condition| {
-                let filter_expr = self.conditional_to_df_expr(condition, schemas)?;
+                let filter_expr = self.conditional_to_df_expr(condition, schema)?;
                planner_rewrite_expression::rewrite_conditional_expr(
                    self.s.execution_props(),
                    filter_expr,
-                    schemas,
+                    schema,
                    ds_schema,
                )
            })
            .transpose()?;
@ -2156,8 +2191,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        plan: LogicalPlan,
        condition: &Option<WhereClause>,
        cutoff: MetadataCutoff,
-        schemas: &Schemas,
+        schema: &IQLSchema<'_>,
        ds_schema: &DataSourceSchema<'_>,
    ) -> Result<LogicalPlan> {
        let start_time = Timestamp::from(self.s.execution_props().query_execution_start_time);
@ -2189,7 +2223,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            time_range
        };
-        self.plan_condition_time_range(cond.as_ref(), time_range, plan, schemas, ds_schema)
+        self.plan_condition_time_range(cond.as_ref(), time_range, plan, schema)
    }
    /// Generate a logical plan for the specified `DataSource`.
@ -2363,16 +2397,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                    let Some(table_schema) = self.s.table_schema(&table) else {continue};
                    let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
                    let schemas = Schemas::new(plan.schema())?;
                    let ds = DataSource::Table(table.clone());
-                    let ds_schema = ds.schema(self.s)?;
+                    let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
-                    let plan = self.plan_where_clause(
+                    let plan =
-                        plan,
+                        self.plan_where_clause(plan, &condition, metadata_cutoff, &schema)?;
                        &condition,
                        metadata_cutoff,
                        &schemas,
                        &ds_schema,
                    )?;
                    let tags = table_schema
                        .iter()
@ -2616,16 +2644,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
            let schemas = Schemas::new(plan.schema())?;
            let ds = DataSource::Table(table.clone());
-            let ds_schema = ds.schema(self.s)?;
+            let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
-            let plan = self.plan_where_clause(
+            let plan =
-                plan,
+                self.plan_where_clause(plan, &show_tag_values.condition, metadata_cutoff, &schema)?;
                &show_tag_values.condition,
                metadata_cutoff,
                &schemas,
                &ds_schema,
            )?;
            for key in keys {
                let idx = plan
@ -2722,16 +2744,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
                for table in tables {
                    let Some((plan, _measurement_expr)) = self.create_table_ref(&table)? else {continue;};
                    let schemas = Schemas::new(plan.schema())?;
                    let ds = DataSource::Table(table.clone());
-                    let ds_schema = ds.schema(self.s)?;
+                    let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
-                    let plan = self.plan_where_clause(
+                    let plan =
-                        plan,
+                        self.plan_where_clause(plan, &condition, metadata_cutoff, &schema)?;
                        &condition,
                        metadata_cutoff,
                        &schemas,
                        &ds_schema,
                    )?;
                    let plan = LogicalPlanBuilder::from(plan)
                        .limit(0, Some(1))?
@ -3072,6 +3088,16 @@ fn is_aggregate_field(f: &Field) -> bool {
    .is_break()
 }
 /// A utility function that checks whether `f` is an aggregate field
 /// that should be filled with a 0 rather than an NULL.
 fn is_zero_filled_aggregate_field(f: &Field) -> bool {
    walk_expr(&f.expr, &mut |e| match e {
        IQLExpr::Call(Call { name, .. }) if name == "count" => ControlFlow::Break(()),
        _ => ControlFlow::Continue(()),
    })
    .is_break()
 }
 fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> {
    match op {
        ConditionalOperator::Eq => Ok(Operator::Eq),
@ -3886,7 +3912,7 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), difference:Float64;N]
                    Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None), difference:Float64;N]
                      Projection: cpu.time AS time, difference(cpu.usage_idle) AS difference [time:Timestamp(Nanosecond, None), difference:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, difference(cpu.usage_idle):Float64;N]
+                        WindowAggr: windowExpr=[[difference(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, difference(cpu.usage_idle):Float64;N]
                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
@ -3896,7 +3922,7 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, difference:Float64;N]
                    Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None);N, difference:Float64;N]
                      Projection: time, difference(AVG(cpu.usage_idle)) AS difference [time:Timestamp(Nanosecond, None);N, difference:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, difference(AVG(cpu.usage_idle)):Float64;N]
+                        WindowAggr: windowExpr=[[difference(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, difference(AVG(cpu.usage_idle)):Float64;N]
                          GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                            Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                              Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -3912,7 +3938,7 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
                    Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
                      Projection: cpu.time AS time, non_negative_difference(cpu.usage_idle) AS non_negative_difference [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_difference(cpu.usage_idle):Float64;N]
+                        WindowAggr: windowExpr=[[non_negative_difference(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_difference(cpu.usage_idle):Float64;N]
                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
@ -3922,7 +3948,7 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
                    Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
                      Projection: time, non_negative_difference(AVG(cpu.usage_idle)) AS non_negative_difference [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_difference(AVG(cpu.usage_idle)):Float64;N]
+                        WindowAggr: windowExpr=[[non_negative_difference(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_difference(AVG(cpu.usage_idle)):Float64;N]
                          GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                            Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                              Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -3967,7 +3993,7 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), derivative:Float64;N]
                    Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None), derivative:Float64;N]
                      Projection: cpu.time AS time, derivative(cpu.usage_idle) AS derivative [time:Timestamp(Nanosecond, None), derivative:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "derivative(unit: 1000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.time, cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, derivative(cpu.usage_idle):Float64;N]
+                        WindowAggr: windowExpr=[[derivative(cpu.usage_idle, IntervalMonthDayNano("1000000000"), cpu.time) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, derivative(cpu.usage_idle):Float64;N]
                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
@ -3977,7 +4003,7 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
                    Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
                      Projection: time, derivative(AVG(cpu.usage_idle)) AS derivative [time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "derivative(unit: 10000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(time, AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, derivative(AVG(cpu.usage_idle)):Float64;N]
+                        WindowAggr: windowExpr=[[derivative(AVG(cpu.usage_idle), IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, derivative(AVG(cpu.usage_idle)):Float64;N]
                          GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                            Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                              Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -3993,7 +4019,7 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
                    Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
                      Projection: cpu.time AS time, non_negative_derivative(cpu.usage_idle) AS non_negative_derivative [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_derivative(unit: 1000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.time, cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_derivative(cpu.usage_idle):Float64;N]
+                        WindowAggr: windowExpr=[[non_negative_derivative(cpu.usage_idle, IntervalMonthDayNano("1000000000"), cpu.time) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_derivative(cpu.usage_idle):Float64;N]
                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
@ -4003,7 +4029,46 @@ mod test {
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
                    Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
                      Projection: time, non_negative_derivative(AVG(cpu.usage_idle)) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
-                        WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_derivative(unit: 10000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(time, AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_derivative(AVG(cpu.usage_idle)):Float64;N]
+                        WindowAggr: windowExpr=[[non_negative_derivative(AVG(cpu.usage_idle), IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_derivative(AVG(cpu.usage_idle)):Float64;N]
                          GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                            Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                              Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                                TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
                // selector
                assert_snapshot!(plan("SELECT NON_NEGATIVE_DERIVATIVE(LAST(usage_idle)) FROM cpu GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
                    Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
                      Projection: time, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
                        WindowAggr: windowExpr=[[non_negative_derivative((selector_last(cpu.usage_idle,cpu.time))[value], IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value])]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]):Float64;N]
                          GapFill: groupBy=[time], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
                            Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
                              Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                                TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
            }
            #[test]
            fn test_cumulative_sum() {
                // no aggregates
                assert_snapshot!(plan("SELECT CUMULATIVE_SUM(usage_idle) FROM cpu"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, cumulative_sum [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
                    Filter: NOT cumulative_sum IS NULL [time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
                      Projection: cpu.time AS time, cumulative_sum(cpu.usage_idle) AS cumulative_sum [time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
                        WindowAggr: windowExpr=[[cumumlative_sum(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cumulative_sum(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, cumulative_sum(cpu.usage_idle):Float64;N]
                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
                // aggregate
                assert_snapshot!(plan("SELECT CUMULATIVE_SUM(MEAN(usage_idle)) FROM cpu GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, cumulative_sum [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
                    Filter: NOT cumulative_sum IS NULL [time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
                      Projection: time, cumulative_sum(AVG(cpu.usage_idle)) AS cumulative_sum [time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
                        WindowAggr: windowExpr=[[cumumlative_sum(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cumulative_sum(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, cumulative_sum(AVG(cpu.usage_idle)):Float64;N]
                          GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                            Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
                              Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -4043,7 +4108,7 @@ mod test {
            "###);
            assert_snapshot!(plan("SELECT COUNT(DISTINCT usage_idle) FROM cpu"), @r###"
            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(DISTINCT cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(DISTINCT cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
                Aggregate: groupBy=[[]], aggr=[[COUNT(DISTINCT cpu.usage_idle)]] [COUNT(DISTINCT cpu.usage_idle):Int64;N]
                  TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
            "###);
@ -4114,7 +4179,7 @@ mod test {
            fn test_selectors_and_aggregate() {
                assert_snapshot!(plan("SELECT LAST(usage_idle), COUNT(usage_idle) FROM cpu"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, COUNT(cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, coalesce_struct(COUNT(cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
                    Aggregate: groupBy=[[]], aggr=[[selector_last(cpu.usage_idle, cpu.time), COUNT(cpu.usage_idle)]] [selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, COUNT(cpu.usage_idle):Int64;N]
                      TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
@ -4793,20 +4858,20 @@ mod test {
            fn no_group_by() {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data"), @r###"
            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
                Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                  TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
            "###);
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY non_existent"), @r###"
            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
                Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                  TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
            "###);
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo"), @r###"
            Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                  TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
            "###);
@ -4814,7 +4879,7 @@ mod test {
                // The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection
                assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_count, COUNT(data.f64_field) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) + coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count_count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
                    Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
@ -4822,7 +4887,7 @@ mod test {
                // non-existent tags are excluded from the Aggregate groupBy and Sort operators
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###"
            Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
-              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
+              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
                Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                  TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
            "###);
@ -4830,7 +4895,7 @@ mod test {
                // Aggregate expression is projected once and reused in final projection
                assert_snapshot!(plan("SELECT COUNT(f64_field),  COUNT(f64_field) * 2 FROM data"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
                    Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
@ -4869,7 +4934,7 @@ mod test {
            fn group_by_time() {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4878,7 +4943,7 @@ mod test {
                // supports offset parameter
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4890,7 +4955,7 @@ mod test {
                // No time bounds
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4903,7 +4968,7 @@ mod test {
                // No lower time bounds
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4916,7 +4981,7 @@ mod test {
                // No upper time bounds
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4929,7 +4994,7 @@ mod test {
                // Default is FILL(null)
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4941,7 +5006,7 @@ mod test {
            fn group_by_time_gapfill_default_is_fill_null1() {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4953,7 +5018,7 @@ mod test {
            fn group_by_time_gapfill_default_is_fill_null2() {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4965,7 +5030,7 @@ mod test {
            fn group_by_time_gapfill_default_is_fill_null3() {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4989,7 +5054,7 @@ mod test {
            fn group_by_time_gapfill_default_is_fill_null5() {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    GapFill: groupBy=[time], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -5031,7 +5096,7 @@ mod test {
                    Filter: iox::row <= Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                      WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                        Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                            Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                              TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
@ -5045,7 +5110,7 @@ mod test {
                    Filter: iox::row > Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                      WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                        Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                            Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                              TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
@ -5059,7 +5124,7 @@ mod test {
                    Filter: iox::row BETWEEN Int64(4) AND Int64(5) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                      WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
                        Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
-                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
+                          Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
                            Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
                              TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
@ -5085,7 +5150,7 @@ mod test {
                fn group_by_time_precision() {
                    assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###"
                    Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                      Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
+                      Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                        Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                          Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                            TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -5333,6 +5398,22 @@ mod test {
            "###);
        }
        #[test]
        fn test_select_function_tag_column() {
            assert_snapshot!(plan("SELECT last(foo) as foo, first(usage_idle) from cpu group by foo"), @r###"
            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, first:Float64;N]
              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (selector_first(cpu.usage_idle,cpu.time))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, first:Float64;N]
                Aggregate: groupBy=[[]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
                  TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
            "###);
            assert_snapshot!(plan("SELECT count(foo) as foo, first(usage_idle) from cpu group by foo"), @r###"
            Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
              Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time,NULL), Struct({value:Float64(0),time:TimestampNanosecond(0, None),other_1:NULL})))[other_1] AS foo_1, (selector_first(cpu.usage_idle,cpu.time,NULL))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
                Aggregate: groupBy=[[]], aggr=[[selector_first(cpu.usage_idle, cpu.time, NULL)]] [selector_first(cpu.usage_idle,cpu.time,NULL):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "other_1", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
                  TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
            "###);
        }
        // The following is an outline of additional scenarios to develop
        // as the planner learns more features.
        // This is not an exhaustive list and is expected to grow as the
--- a/iox_query_influxql/src/plan/planner/select.rs
+++ b/iox_query_influxql/src/plan/planner/select.rs
@ -27,11 +27,8 @@ pub(super) fn make_tag_key_column_meta(
    let index_map = fields
        .iter()
        .enumerate()
-        .filter_map(|(index, f)| match &f.expr {
+        .filter_map(|(index, f)| match &f.data_type {
-            IQLExpr::VarRef(VarRef {
+            Some(InfluxColumnType::Tag) | None => Some((f.name.as_str(), index + START_INDEX)),
                name,
                data_type: Some(VarRefDataType::Tag) | None,
            }) => Some((name.as_str(), index + START_INDEX)),
            _ => None,
        })
        .collect::<HashMap<_, _>>();
--- a/iox_query_influxql/src/plan/planner_rewrite_expression.rs
+++ b/iox_query_influxql/src/plan/planner_rewrite_expression.rs
@ -123,44 +123,42 @@
 //! [`Eval`]: https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4137
 use std::sync::Arc;
-use crate::plan::util::Schemas;
+use crate::plan::util::IQLSchema;
 use arrow::datatypes::DataType;
 use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion::common::{Result, ScalarValue};
 use datafusion::logical_expr::expr::{AggregateFunction, AggregateUDF, WindowFunction};
 use datafusion::logical_expr::{
-    binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, Operator,
+    binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, GetIndexedField, Operator,
 };
 use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
 use datafusion::physical_expr::execution_props::ExecutionProps;
-use datafusion::prelude::when;
+use datafusion::prelude::{when, Column};
 use observability_deps::tracing::trace;
 use predicate::rpc_predicate::{iox_expr_rewrite, simplify_predicate};
 use super::ir::DataSourceSchema;
 /// Perform a series of passes to rewrite `expr` in compliance with InfluxQL behavior
 /// in an effort to ensure the query executes without error.
 pub(super) fn rewrite_conditional_expr(
    exec_props: &ExecutionProps,
    expr: Expr,
-    schemas: &Schemas,
+    schema: &IQLSchema<'_>,
    ds_schema: &DataSourceSchema<'_>,
 ) -> Result<Expr> {
    let simplify_context =
-        SimplifyContext::new(exec_props).with_schema(Arc::clone(&schemas.df_schema));
+        SimplifyContext::new(exec_props).with_schema(Arc::clone(&schema.df_schema));
    let simplifier = ExprSimplifier::new(simplify_context);
    Ok(expr)
        .map(|expr| log_rewrite(expr, "original"))
        // make regex matching with invalid types produce false
-        .and_then(|expr| expr.rewrite(&mut FixRegularExpressions { schemas }))
+        .and_then(|expr| expr.rewrite(&mut FixRegularExpressions { schema }))
        .map(|expr| log_rewrite(expr, "after fix_regular_expressions"))
        // rewrite exprs with incompatible operands to NULL or FALSE
        // (seems like FixRegularExpressions could be combined into this pass)
-        .and_then(|expr| rewrite_expr(expr, schemas))
+        .and_then(|expr| rewrite_expr(expr, schema))
        .map(|expr| log_rewrite(expr, "after rewrite_expr"))
        // Convert tag column references to CASE WHEN <tag> IS NULL THEN '' ELSE <tag> END
-        .and_then(|expr| rewrite_tag_columns(expr, schemas, ds_schema))
+        .and_then(|expr| rewrite_tag_columns(expr, schema))
        .map(|expr| log_rewrite(expr, "after rewrite_tag_columns"))
        // Push comparison operators into CASE exprs:
        //     CASE WHEN tag0 IS NULL THEN '' ELSE tag0 END = 'foo'
@ -172,7 +170,7 @@ pub(super) fn rewrite_conditional_expr(
        // - convert numeric types so that operands agree
        // - convert Utf8 to Dictionary as needed
        // The next step will fail with type errors if we don't do this.
-        .and_then(|expr| simplifier.coerce(expr, Arc::clone(&schemas.df_schema)))
+        .and_then(|expr| simplifier.coerce(expr, Arc::clone(&schema.df_schema)))
        .map(|expr| log_rewrite(expr, "after coerce"))
        // DataFusion expression simplification. This is important here because:
        //     CASE WHEN tag0 IS NULL THEN '' = 'foo' ELSE tag0 = 'foo' END
@ -206,8 +204,8 @@ fn log_rewrite(expr: Expr, description: &str) -> Expr {
 /// Perform a series of passes to rewrite `expr`, used as a column projection,
 /// to match the behavior of InfluxQL.
-pub(super) fn rewrite_field_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
+pub(super) fn rewrite_field_expr(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
-    rewrite_expr(expr, schemas)
+    rewrite_expr(expr, schema)
 }
 /// The expression was rewritten
@ -225,7 +223,7 @@ fn no(expr: Expr) -> Result<Transformed<Expr>> {
 ///
 /// Rewrite and coerce the expression tree to model the behavior
 /// of an InfluxQL query.
-fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
+fn rewrite_expr(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
    expr.transform(&|expr| {
        match expr {
            Expr::BinaryExpr(BinaryExpr {
@ -233,8 +231,8 @@ fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
                op,
                ref right,
            }) => {
-                let lhs_type = left.get_type(&schemas.df_schema)?;
+                let lhs_type = left.get_type(&schema.df_schema)?;
-                let rhs_type = right.get_type(&schemas.df_schema)?;
+                let rhs_type = right.get_type(&schema.df_schema)?;
                match (lhs_type, op, rhs_type) {
                    //
@ -422,6 +420,23 @@ fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
                    _ => yes(lit(ScalarValue::Null)),
                }
            }
            // Invoking an aggregate or window function on a tag column should return `NULL`
            // to be consistent with OG.
            Expr::AggregateFunction(AggregateFunction { ref args, .. } )
            | Expr::AggregateUDF(AggregateUDF { ref args, .. } )
            | Expr::WindowFunction(WindowFunction { ref args, .. } ) => match &args[0] {
               Expr::Column(Column { ref name, ..  }) if schema.is_tag_field(name) => yes(lit(ScalarValue::Null)),
               _ => no(expr),
            }
            // If the InfluxQL query used a selector on a tag column,  like `last(tag_col)`
            // then there will be an indexed field. Convert this to `NULL` as well.
            Expr::GetIndexedField(GetIndexedField { expr: ref e, .. }) => match e.as_ref() {
               Expr::Literal(ScalarValue::Null) => yes(lit(ScalarValue::Null)),
               _ => no(expr),
            }
            //
            // Literals and other expressions are passed through to DataFusion,
            // as it will handle evaluating function calls, etc
@ -467,7 +482,7 @@ fn rewrite_boolean(lhs: Expr, op: Operator, rhs: Expr) -> Expr {
 /// Rewrite regex conditional expressions to match InfluxQL behaviour.
 struct FixRegularExpressions<'a> {
-    schemas: &'a Schemas,
+    schema: &'a IQLSchema<'a>,
 }
 impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
@ -483,7 +498,7 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
                right,
            }) => {
                Ok(if let Expr::Column(ref col) = *left {
-                    match self.schemas.df_schema.field_from_column(col)?.data_type() {
+                    match self.schema.df_schema.field_from_column(col)?.data_type() {
                        DataType::Dictionary(..) | DataType::Utf8 => {
                            Expr::BinaryExpr(BinaryExpr { left, op, right })
                        }
@ -517,13 +532,9 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
 ///         case when tag0 is null then "" else tag0 end
 /// ```
 /// This ensures that we treat tags with the same semantics as OG InfluxQL.
-fn rewrite_tag_columns(
+fn rewrite_tag_columns(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
    expr: Expr,
    _schemas: &Schemas,
    ds_schema: &DataSourceSchema<'_>,
 ) -> Result<Expr> {
    expr.transform(&|expr| match expr {
-        Expr::Column(ref c) if ds_schema.is_tag_field(&c.name) => {
+        Expr::Column(ref c) if schema.is_tag_field(&c.name) => {
            yes(when(expr.clone().is_null(), lit("")).otherwise(expr)?)
        }
        e => no(e),
@ -532,6 +543,8 @@ fn rewrite_tag_columns(
 #[cfg(test)]
 mod test {
    use crate::plan::ir::DataSourceSchema;
    use super::*;
    use datafusion::logical_expr::lit_timestamp_nano;
    use datafusion::prelude::col;
@ -542,7 +555,7 @@ mod test {
    use schema::{InfluxFieldType, SchemaBuilder};
    use std::sync::Arc;
-    fn new_schemas() -> (Schemas, DataSourceSchema<'static>) {
+    fn new_schema() -> IQLSchema<'static> {
        let iox_schema = SchemaBuilder::new()
            .measurement("m0")
            .timestamp()
@ -556,7 +569,8 @@ mod test {
            .build()
            .expect("schema failed");
        let df_schema: DFSchemaRef = Arc::clone(iox_schema.inner()).to_dfschema_ref().unwrap();
-        (Schemas { df_schema }, DataSourceSchema::Table(iox_schema))
+        let ds_schema = DataSourceSchema::Table(iox_schema);
        IQLSchema::new_from_ds_schema(&df_schema, ds_schema).unwrap()
    }
    /// Tests which validate that division is coalesced to `0`, to handle division by zero,
@ -566,7 +580,7 @@ mod test {
    /// binary expression to a scalar value, `0`.
    #[test]
    fn test_division() {
-        let (schemas, _) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
        // Float64
@ -627,7 +641,7 @@ mod test {
    #[test]
    fn test_pass_thru() {
        test_helpers::maybe_start_logging();
-        let (schemas, _) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
        let expr = lit(5.5).gt(lit(1_i64));
@ -664,9 +678,9 @@ mod test {
    #[test]
    fn test_string_operations() {
        let props = execution_props();
-        let (schemas, ds_schema) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| {
-            rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
+            rewrite_conditional_expr(&props, expr, &schemas)
                .unwrap()
                .to_string()
        };
@ -688,7 +702,7 @@ mod test {
    /// to the supported bitwise operators.
    #[test]
    fn test_boolean_operations() {
-        let (schemas, _) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
        let expr = "boolean_field".as_expr().and(lit(true));
@ -743,7 +757,7 @@ mod test {
    /// Tests cases to validate Boolean and NULL data types
    #[test]
    fn test_rewrite_conditional_null() {
-        let (schemas, _) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
        // NULL on either side and boolean on the other of a binary expression
@ -779,7 +793,7 @@ mod test {
    #[test]
    fn test_time_range() {
-        let (schemas, _) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
        let expr = "time".as_expr().gt_eq(lit_timestamp_nano(1000));
@ -811,7 +825,7 @@ mod test {
    /// valid operation for the given the operands. These are used when projecting columns.
    #[test]
    fn test_rewrite_expr_coercion_reduce_to_null() {
-        let (schemas, _) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
        //
@ -851,9 +865,9 @@ mod test {
    fn test_rewrite_tag_columns_eq() {
        test_helpers::maybe_start_logging();
        let props = execution_props();
-        let (schemas, ds_schema) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| {
-            rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
+            rewrite_conditional_expr(&props, expr, &schemas)
                .unwrap()
                .to_string()
        };
@ -904,9 +918,9 @@ mod test {
    fn test_rewrite_tag_columns_regex() {
        let props = execution_props();
        test_helpers::maybe_start_logging();
-        let (schemas, ds_schema) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| {
-            rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
+            rewrite_conditional_expr(&props, expr, &schemas)
                .unwrap()
                .to_string()
        };
@ -931,9 +945,9 @@ mod test {
    fn test_fields_pass_thru() {
        test_helpers::maybe_start_logging();
        let props = execution_props();
-        let (schemas, ds_schema) = new_schemas();
+        let schemas = new_schema();
        let rewrite = |expr| {
-            rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
+            rewrite_conditional_expr(&props, expr, &schemas)
                .unwrap()
                .to_string()
        };
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@ -1029,7 +1029,7 @@ impl FieldChecker {
            ProjectionType::TopBottomSelector
        } else if self.has_group_by_time {
            if self.window_count > 0 {
-                if self.window_count == self.aggregate_count {
+                if self.window_count == self.aggregate_count + self.selector_count {
                    ProjectionType::WindowAggregate
                } else {
                    ProjectionType::WindowAggregateMixed
@ -1338,11 +1338,8 @@ impl FieldChecker {
    }
    fn check_cumulative_sum(&mut self, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();
        check_exp_args!("cumulative_sum", 1, args);
        set_extra_intervals!(self, 1);
        self.check_nested_symbol("cumulative_sum", &args[0])
    }
--- a/iox_query_influxql/src/plan/udaf.rs
+++ b/iox_query_influxql/src/plan/udaf.rs
@ -1,13 +1,12 @@
 use crate::{error, NUMERICS};
 use arrow::array::{Array, ArrayRef, Int64Array};
-use arrow::datatypes::{DataType, TimeUnit};
+use arrow::datatypes::DataType;
 use datafusion::common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion::logical_expr::{
    Accumulator, AccumulatorFactoryFunction, AggregateUDF, ReturnTypeFunction, Signature,
    StateTypeFunction, TypeSignature, Volatility,
 };
 use once_cell::sync::Lazy;
 use std::mem::replace;
 use std::sync::Arc;
 /// Name of the `MOVING_AVERAGE` user-defined aggregate function.
@ -148,339 +147,3 @@ impl Accumulator for AvgNAccumulator {
            - std::mem::size_of_val(&self.data_type)
    }
 }
 /// Name of the `DIFFERENCE` user-defined aggregate function.
 pub(crate) const DIFFERENCE_NAME: &str = "difference";
 /// Definition of the `DIFFERENCE` user-defined aggregate function.
 pub(crate) static DIFFERENCE: Lazy<Arc<AggregateUDF>> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(|dt| Ok(Arc::new(dt[0].clone())));
    let accumulator: AccumulatorFactoryFunction =
        Arc::new(|dt| Ok(Box::new(DifferenceAccumulator::new(dt))));
    let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
    Arc::new(AggregateUDF::new(
        DIFFERENCE_NAME,
        &Signature::one_of(
            NUMERICS
                .iter()
                .map(|dt| TypeSignature::Exact(vec![dt.clone()]))
                .collect(),
            Volatility::Immutable,
        ),
        &return_type,
        &accumulator,
        // State shouldn't be called, so no schema to report
        &state_type,
    ))
 });
 #[derive(Debug)]
 struct DifferenceAccumulator {
    data_type: DataType,
    last: ScalarValue,
    diff: ScalarValue,
 }
 impl DifferenceAccumulator {
    fn new(data_type: &DataType) -> Self {
        let last: ScalarValue = data_type.try_into().expect("data_type → ScalarValue");
        let diff = last.clone();
        Self {
            data_type: data_type.clone(),
            last,
            diff,
        }
    }
 }
 impl Accumulator for DifferenceAccumulator {
    /// `state` is only called when used as an aggregate function. It can be
    /// can safely left unimplemented, as this accumulator is only used as a window aggregate.
    ///
    /// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
    fn state(&self) -> Result<Vec<ScalarValue>> {
        error::internal("unexpected call to DifferenceAccumulator::state")
    }
    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        let arr = &values[0];
        for index in 0..arr.len() {
            let scalar = ScalarValue::try_from_array(arr, index)?;
            if !scalar.is_null() {
                if !self.last.is_null() {
                    self.diff = scalar.sub(self.last.clone())?
                }
                self.last = scalar;
            } else {
                self.diff = ScalarValue::try_from(&self.data_type).unwrap()
            }
        }
        Ok(())
    }
    /// `merge_batch` is only called when used as an aggregate function. It can be
    /// can safely left unimplemented, as this accumulator is only used as a window aggregate.
    ///
    /// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
    fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
        error::internal("unexpected call to DifferenceAccumulator::merge_batch")
    }
    fn evaluate(&self) -> Result<ScalarValue> {
        Ok(self.diff.clone())
    }
    fn size(&self) -> usize {
        std::mem::size_of_val(self)
    }
 }
 /// Name of the `NON_NEGATIVE_DIFFERENCE` user-defined aggregate function.
 pub(crate) const NON_NEGATIVE_DIFFERENCE_NAME: &str = "non_negative_difference";
 /// Definition of the `NON_NEGATIVE_DIFFERENCE` user-defined aggregate function.
 pub(crate) static NON_NEGATIVE_DIFFERENCE: Lazy<Arc<AggregateUDF>> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(|dt| Ok(Arc::new(dt[0].clone())));
    let accumulator: AccumulatorFactoryFunction = Arc::new(|dt| {
        Ok(Box::new(NonNegative::<_>::new(DifferenceAccumulator::new(
            dt,
        ))))
    });
    let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
    Arc::new(AggregateUDF::new(
        NON_NEGATIVE_DIFFERENCE_NAME,
        &Signature::one_of(
            NUMERICS
                .iter()
                .map(|dt| TypeSignature::Exact(vec![dt.clone()]))
                .collect(),
            Volatility::Immutable,
        ),
        &return_type,
        &accumulator,
        // State shouldn't be called, so no schema to report
        &state_type,
    ))
 });
 /// NonNegative is a wrapper around an Accumulator that transposes
 /// negative value to be NULL.
 #[derive(Debug)]
 struct NonNegative<T> {
    acc: T,
 }
 impl<T> NonNegative<T> {
    fn new(acc: T) -> Self {
        Self { acc }
    }
 }
 impl<T: Accumulator> Accumulator for NonNegative<T> {
    fn state(&self) -> Result<Vec<ScalarValue>> {
        self.acc.state()
    }
    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
        self.acc.update_batch(values)
    }
    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
        self.acc.merge_batch(states)
    }
    fn evaluate(&self) -> Result<ScalarValue> {
        Ok(match self.acc.evaluate()? {
            ScalarValue::Float64(Some(v)) if v < 0.0 => ScalarValue::Float64(None),
            ScalarValue::Int64(Some(v)) if v < 0 => ScalarValue::Int64(None),
            v => v,
        })
    }
    fn size(&self) -> usize {
        self.acc.size()
    }
 }
 /// Name of the `DERIVATIVE` user-defined aggregate function.
 pub(crate) const DERIVATIVE_NAME: &str = "derivative";
 pub(crate) fn derivative_udf(unit: i64) -> AggregateUDF {
    let return_type: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
    let accumulator: AccumulatorFactoryFunction =
        Arc::new(move |_| Ok(Box::new(DerivativeAccumulator::new(unit))));
    let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
    let sig = Signature::one_of(
        NUMERICS
            .iter()
            .map(|dt| {
                TypeSignature::Exact(vec![
                    DataType::Timestamp(TimeUnit::Nanosecond, None),
                    dt.clone(),
                ])
            })
            .collect(),
        Volatility::Immutable,
    );
    AggregateUDF::new(
        format!("{DERIVATIVE_NAME}(unit: {unit})").as_str(),
        &sig,
        &return_type,
        &accumulator,
        // State shouldn't be called, so no schema to report
        &state_type,
    )
 }
 /// Name of the `NON_NEGATIVE_DERIVATIVE` user-defined aggregate function.
 pub(crate) const NON_NEGATIVE_DERIVATIVE_NAME: &str = "non_negative_derivative";
 pub(crate) fn non_negative_derivative_udf(unit: i64) -> AggregateUDF {
    let return_type: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
    let accumulator: AccumulatorFactoryFunction = Arc::new(move |_| {
        Ok(Box::new(NonNegative::<_>::new(DerivativeAccumulator::new(
            unit,
        ))))
    });
    let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
    let sig = Signature::one_of(
        NUMERICS
            .iter()
            .map(|dt| {
                TypeSignature::Exact(vec![
                    DataType::Timestamp(TimeUnit::Nanosecond, None),
                    dt.clone(),
                ])
            })
            .collect(),
        Volatility::Immutable,
    );
    AggregateUDF::new(
        format!("{NON_NEGATIVE_DERIVATIVE_NAME}(unit: {unit})").as_str(),
        &sig,
        &return_type,
        &accumulator,
        // State shouldn't be called, so no schema to report
        &state_type,
    )
 }
 #[derive(Debug)]
 struct DerivativeAccumulator {
    unit: i64,
    prev: Option<Point>,
    curr: Option<Point>,
 }
 impl DerivativeAccumulator {
    fn new(unit: i64) -> Self {
        Self {
            unit,
            prev: None,
            curr: None,
        }
    }
 }
 impl Accumulator for DerivativeAccumulator {
    /// `state` is only called when used as an aggregate function. It can be
    /// can safely left unimplemented, as this accumulator is only used as a window aggregate.
    ///
    /// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
    fn state(&self) -> Result<Vec<ScalarValue>> {
        error::internal("unexpected call to DerivativeAccumulator::state")
    }
    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        let times = &values[0];
        let arr = &values[1];
        for index in 0..arr.len() {
            let time = match ScalarValue::try_from_array(times, index)? {
                ScalarValue::TimestampNanosecond(Some(ts), _) => ts,
                v => {
                    return Err(DataFusionError::Internal(format!(
                        "invalid time value: {}",
                        v
                    )))
                }
            };
            let curr = Point::new(time, ScalarValue::try_from_array(arr, index)?);
            let prev = replace(&mut self.curr, curr);
            // don't replace the previous value if the current value has the same timestamp.
            if self.prev.is_none()
                || prev
                    .as_ref()
                    .is_some_and(|prev| prev.time > self.prev.as_ref().unwrap().time)
            {
                self.prev = prev
            }
        }
        Ok(())
    }
    /// `merge_batch` is only called when used as an aggregate function. It can be
    /// can safely left unimplemented, as this accumulator is only used as a window aggregate.
    ///
    /// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
    fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
        error::internal("unexpected call to DerivativeAccumulator::merge_batch")
    }
    fn evaluate(&self) -> Result<ScalarValue> {
        Ok(ScalarValue::Float64(
            self.curr
                .as_ref()
                .and_then(|c| c.derivative(self.prev.as_ref(), self.unit)),
        ))
    }
    fn size(&self) -> usize {
        std::mem::size_of_val(self)
    }
 }
 #[derive(Debug)]
 struct Point {
    time: i64,
    value: ScalarValue,
 }
 impl Point {
    fn new(time: i64, value: ScalarValue) -> Option<Self> {
        if value.is_null() {
            None
        } else {
            Some(Self { time, value })
        }
    }
    fn value_as_f64(&self) -> f64 {
        match self.value {
            ScalarValue::Int64(Some(v)) => v as f64,
            ScalarValue::Float64(Some(v)) => v,
            ScalarValue::UInt64(Some(v)) => v as f64,
            _ => panic!("invalid point {:?}", self),
        }
    }
    fn derivative(&self, prev: Option<&Self>, unit: i64) -> Option<f64> {
        prev.and_then(|prev| {
            let diff = self.value_as_f64() - prev.value_as_f64();
            let elapsed = match self.time - prev.time {
                // if the time hasn't changed then it is a NULL.
                0 => return None,
                v => v,
            } as f64;
            let devisor = elapsed / (unit as f64);
            Some(diff / devisor)
        })
    }
 }
--- a/iox_query_influxql/src/plan/udf.rs
+++ b/iox_query_influxql/src/plan/udf.rs
@ -7,7 +7,7 @@
 use crate::plan::util_copy::find_exprs_in_exprs;
 use crate::{error, NUMERICS};
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, TimeUnit};
 use datafusion::logical_expr::{
    Expr, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF, Signature, TypeSignature,
    Volatility,
@ -21,6 +21,7 @@ pub(super) enum WindowFunction {
    NonNegativeDifference,
    Derivative,
    NonNegativeDerivative,
    CumulativeSum,
 }
 impl WindowFunction {
@ -32,6 +33,7 @@ impl WindowFunction {
            NON_NEGATIVE_DIFFERENCE_UDF_NAME => Some(Self::NonNegativeDifference),
            DERIVATIVE_UDF_NAME => Some(Self::Derivative),
            NON_NEGATIVE_DERIVATIVE_UDF_NAME => Some(Self::NonNegativeDerivative),
            CUMULATIVE_SUM_UDF_NAME => Some(Self::CumulativeSum),
            _ => None,
        }
    }
@ -129,13 +131,21 @@ pub(crate) fn derivative(args: Vec<Expr>) -> Expr {
 /// Definition of the `DERIVATIVE` function.
 static DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
-    let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone())));
+    let return_type_fn: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
    Arc::new(ScalarUDF::new(
        DERIVATIVE_UDF_NAME,
        &Signature::one_of(
            NUMERICS
                .iter()
-                .map(|dt| TypeSignature::Exact(vec![dt.clone()]))
+                .flat_map(|dt| {
                    vec![
                        TypeSignature::Exact(vec![dt.clone()]),
                        TypeSignature::Exact(vec![
                            dt.clone(),
                            DataType::Duration(TimeUnit::Nanosecond),
                        ]),
                    ]
                })
                .collect(),
            Volatility::Immutable,
        ),
@ -153,13 +163,21 @@ pub(crate) fn non_negative_derivative(args: Vec<Expr>) -> Expr {
 /// Definition of the `NON_NEGATIVE_DERIVATIVE` function.
 static NON_NEGATIVE_DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
-    let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone())));
+    let return_type_fn: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
    Arc::new(ScalarUDF::new(
        NON_NEGATIVE_DERIVATIVE_UDF_NAME,
        &Signature::one_of(
            NUMERICS
                .iter()
-                .map(|dt| TypeSignature::Exact(vec![dt.clone()]))
+                .flat_map(|dt| {
                    vec![
                        TypeSignature::Exact(vec![dt.clone()]),
                        TypeSignature::Exact(vec![
                            dt.clone(),
                            DataType::Duration(TimeUnit::Nanosecond),
                        ]),
                    ]
                })
                .collect(),
            Volatility::Immutable,
        ),
@ -168,6 +186,29 @@ static NON_NEGATIVE_DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
    ))
 });
 const CUMULATIVE_SUM_UDF_NAME: &str = "cumulative_sum";
 /// Create an expression to represent the `CUMULATIVE_SUM` function.
 pub(crate) fn cumulative_sum(args: Vec<Expr>) -> Expr {
    CUMULATIVE_SUM.call(args)
 }
 /// Definition of the `CUMULATIVE_SUM` function.
 static CUMULATIVE_SUM: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
    let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone())));
    Arc::new(ScalarUDF::new(
        CUMULATIVE_SUM_UDF_NAME,
        &Signature::one_of(
            NUMERICS
                .iter()
                .map(|dt| TypeSignature::Exact(vec![dt.clone()]))
                .collect(),
            Volatility::Immutable,
        ),
        &return_type_fn,
        &stand_in_impl(CUMULATIVE_SUM_UDF_NAME),
    ))
 });
 /// Returns an implementation that always returns an error.
 fn stand_in_impl(name: &'static str) -> ScalarFunctionImplementation {
    Arc::new(move |_| error::internal(format!("{name} should not exist in the final logical plan")))
--- a/iox_query_influxql/src/plan/util.rs
+++ b/iox_query_influxql/src/plan/util.rs
@ -11,8 +11,11 @@ use influxdb_influxql_parser::literal::Number;
 use influxdb_influxql_parser::string::Regex;
 use query_functions::clean_non_meta_escapes;
 use query_functions::coalesce_struct::coalesce_struct;
 use schema::InfluxColumnType;
 use std::sync::Arc;
 use super::ir::{DataSourceSchema, Field};
 pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Operator {
    match op {
        BinaryOperator::Add => Operator::Plus,
@ -26,17 +29,62 @@ pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Ope
    }
 }
-/// Container for both the DataFusion and equivalent IOx schema.
+/// Container for the DataFusion schema as well as
-pub(in crate::plan) struct Schemas {
+/// info on which columns are tags.
 pub(in crate::plan) struct IQLSchema<'a> {
    pub(in crate::plan) df_schema: DFSchemaRef,
    tag_info: TagInfo<'a>,
 }
-impl Schemas {
+impl<'a> IQLSchema<'a> {
-    pub(in crate::plan) fn new(df_schema: &DFSchemaRef) -> Result<Self> {
+    /// Create a new IQLSchema from a [`DataSourceSchema`] from the
    /// FROM clause of a query or subquery.
    pub(in crate::plan) fn new_from_ds_schema(
        df_schema: &DFSchemaRef,
        ds_schema: DataSourceSchema<'a>,
    ) -> Result<Self> {
        Ok(Self {
            df_schema: Arc::clone(df_schema),
            tag_info: TagInfo::DataSourceSchema(ds_schema),
        })
    }
    /// Create a new IQLSchema from a list of [`Field`]s on the SELECT list
    /// of a subquery.
    pub(in crate::plan) fn new_from_fields(
        df_schema: &DFSchemaRef,
        fields: &'a [Field],
    ) -> Result<Self> {
        Ok(Self {
            df_schema: Arc::clone(df_schema),
            tag_info: TagInfo::FieldList(fields),
        })
    }
    /// Returns `true` if the schema contains a tag column with the specified name.
    pub fn is_tag_field(&self, name: &str) -> bool {
        match self.tag_info {
            TagInfo::DataSourceSchema(ref ds_schema) => ds_schema.is_tag_field(name),
            TagInfo::FieldList(fields) => fields
                .iter()
                .any(|f| f.name == name && f.data_type == Some(InfluxColumnType::Tag)),
        }
    }
    /// Returns `true` if the schema contains a tag column with the specified name.
    /// If the underlying data source is a subquery, it will apply any aliases in the
    /// projection that represents the SELECT list.
    pub fn is_projected_tag_field(&self, name: &str) -> bool {
        match self.tag_info {
            TagInfo::DataSourceSchema(ref ds_schema) => ds_schema.is_projected_tag_field(name),
            _ => self.is_tag_field(name),
        }
    }
 }
 pub(in crate::plan) enum TagInfo<'a> {
    DataSourceSchema(DataSourceSchema<'a>),
    FieldList(&'a [Field]),
 }
 /// Sanitize an InfluxQL regular expression and create a compiled [`regex::Regex`].
@ -70,6 +118,7 @@ fn number_to_scalar(n: &Number, data_type: &DataType) -> Result<ScalarValue> {
            ),
            fields.clone(),
        ),
        (_, DataType::Null) => ScalarValue::Null,
        (n, data_type) => {
            // The only output data types expected are Int64, Float64 or UInt64
            return error::internal(format!("no conversion from {n} to {data_type}"));
--- a/iox_query_influxql/src/plan/util_copy.rs
+++ b/iox_query_influxql/src/plan/util_copy.rs
@ -145,34 +145,27 @@ where
                    negated,
                    expr,
                    pattern,
                    case_insensitive,
                    escape_char,
                }) => Ok(Expr::Like(Like::new(
                    *negated,
                    Box::new(clone_with_replacement(expr, replacement_fn)?),
                    Box::new(clone_with_replacement(pattern, replacement_fn)?),
                    *escape_char,
-                ))),
+                    *case_insensitive,
                Expr::ILike(Like {
                    negated,
                    expr,
                    pattern,
                    escape_char,
                }) => Ok(Expr::ILike(Like::new(
                    *negated,
                    Box::new(clone_with_replacement(expr, replacement_fn)?),
                    Box::new(clone_with_replacement(pattern, replacement_fn)?),
                    *escape_char,
                ))),
                Expr::SimilarTo(Like {
                    negated,
                    expr,
                    pattern,
                    case_insensitive,
                    escape_char,
                }) => Ok(Expr::SimilarTo(Like::new(
                    *negated,
                    Box::new(clone_with_replacement(expr, replacement_fn)?),
                    Box::new(clone_with_replacement(pattern, replacement_fn)?),
                    *escape_char,
                    *case_insensitive,
                ))),
                Expr::Case(case) => Ok(Expr::Case(Case::new(
                    match &case.expr {
--- a/iox_query_influxql/src/window.rs
+++ b/iox_query_influxql/src/window.rs
@ -6,8 +6,92 @@ use datafusion::logical_expr::{
 use once_cell::sync::Lazy;
 use std::sync::Arc;
 mod cumulative_sum;
 mod derivative;
 mod difference;
 mod non_negative;
 mod percent_row_number;
 /// Definition of the `CUMULATIVE_SUM` user-defined window function.
 pub(crate) static CUMULATIVE_SUM: Lazy<WindowFunction> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(cumulative_sum::return_type);
    let partition_evaluator_factory: PartitionEvaluatorFactory =
        Arc::new(cumulative_sum::partition_evaluator_factory);
    WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
        cumulative_sum::NAME,
        &cumulative_sum::SIGNATURE,
        &return_type,
        &partition_evaluator_factory,
    )))
 });
 /// Definition of the `DERIVATIVE` user-defined window function.
 pub(crate) static DERIVATIVE: Lazy<WindowFunction> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(derivative::return_type);
    let partition_evaluator_factory: PartitionEvaluatorFactory =
        Arc::new(derivative::partition_evaluator_factory);
    WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
        derivative::NAME,
        &derivative::SIGNATURE,
        &return_type,
        &partition_evaluator_factory,
    )))
 });
 /// Definition of the `DIFFERENCE` user-defined window function.
 pub(crate) static DIFFERENCE: Lazy<WindowFunction> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(difference::return_type);
    let partition_evaluator_factory: PartitionEvaluatorFactory =
        Arc::new(difference::partition_evaluator_factory);
    WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
        difference::NAME,
        &difference::SIGNATURE,
        &return_type,
        &partition_evaluator_factory,
    )))
 });
 const NON_NEGATIVE_DERIVATIVE_NAME: &str = "non_negative_derivative";
 /// Definition of the `NON_NEGATIVE_DERIVATIVE` user-defined window function.
 pub(crate) static NON_NEGATIVE_DERIVATIVE: Lazy<WindowFunction> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(derivative::return_type);
    let partition_evaluator_factory: PartitionEvaluatorFactory = Arc::new(|| {
        Ok(non_negative::wrapper(
            derivative::partition_evaluator_factory()?,
        ))
    });
    WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
        NON_NEGATIVE_DERIVATIVE_NAME,
        &derivative::SIGNATURE,
        &return_type,
        &partition_evaluator_factory,
    )))
 });
 const NON_NEGATIVE_DIFFERENCE_NAME: &str = "non_negative_difference";
 /// Definition of the `NON_NEGATIVE_DIFFERENCE` user-defined window function.
 pub(crate) static NON_NEGATIVE_DIFFERENCE: Lazy<WindowFunction> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(difference::return_type);
    let partition_evaluator_factory: PartitionEvaluatorFactory = Arc::new(|| {
        Ok(non_negative::wrapper(
            difference::partition_evaluator_factory()?,
        ))
    });
    WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
        NON_NEGATIVE_DIFFERENCE_NAME,
        &difference::SIGNATURE,
        &return_type,
        &partition_evaluator_factory,
    )))
 });
 /// Definition of the `PERCENT_ROW_NUMBER` user-defined window function.
 pub(crate) static PERCENT_ROW_NUMBER: Lazy<WindowFunction> = Lazy::new(|| {
    let return_type: ReturnTypeFunction = Arc::new(percent_row_number::return_type);
--- a/iox_query_influxql/src/window/cumulative_sum.rs
+++ b/iox_query_influxql/src/window/cumulative_sum.rs
@ -0,0 +1,64 @@
 use crate::NUMERICS;
 use arrow::array::{Array, ArrayRef};
 use arrow::datatypes::DataType;
 use datafusion::common::{Result, ScalarValue};
 use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
 use once_cell::sync::Lazy;
 use std::sync::Arc;
 /// The name of the cumulative_sum window function.
 pub(super) const NAME: &str = "cumumlative_sum";
 /// Valid signatures for the cumulative_sum window function.
 pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
    Signature::one_of(
        NUMERICS
            .iter()
            .map(|dt| TypeSignature::Exact(vec![dt.clone()]))
            .collect(),
        Volatility::Immutable,
    )
 });
 /// Calculate the return type given the function signature.
 pub(super) fn return_type(sig: &[DataType]) -> Result<Arc<DataType>> {
    Ok(Arc::new(sig[0].clone()))
 }
 /// Create a new partition_evaluator_factory.
 pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
    Ok(Box::new(CumulativeSumPartitionEvaluator {}))
 }
 /// PartitionEvaluator which returns the cumulative sum of the input.
 #[derive(Debug)]
 struct CumulativeSumPartitionEvaluator {}
 impl PartitionEvaluator for CumulativeSumPartitionEvaluator {
    fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result<Arc<dyn Array>> {
        assert_eq!(values.len(), 1);
        let array = Arc::clone(&values[0]);
        let mut sum = ScalarValue::new_zero(array.data_type())?;
        let mut cumulative: Vec<ScalarValue> = vec![];
        for idx in 0..num_rows {
            let v = ScalarValue::try_from_array(&array, idx)?;
            let res = if v.is_null() {
                v
            } else {
                sum = sum.add(&v)?;
                sum.clone()
            };
            cumulative.push(res);
        }
        Ok(Arc::new(ScalarValue::iter_to_array(cumulative)?))
    }
    fn uses_window_frame(&self) -> bool {
        false
    }
    fn include_rank(&self) -> bool {
        false
    }
 }
--- a/iox_query_influxql/src/window/derivative.rs
+++ b/iox_query_influxql/src/window/derivative.rs
@ -0,0 +1,125 @@
 use crate::{error, NUMERICS};
 use arrow::array::{Array, ArrayRef};
 use arrow::datatypes::{DataType, TimeUnit};
 use datafusion::common::{Result, ScalarValue};
 use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
 use once_cell::sync::Lazy;
 use std::borrow::Borrow;
 use std::sync::Arc;
 /// The name of the derivative window function.
 pub(super) const NAME: &str = "derivative";
 /// Valid signatures for the derivative window function.
 pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
    Signature::one_of(
        NUMERICS
            .iter()
            .map(|dt| {
                TypeSignature::Exact(vec![
                    dt.clone(),
                    DataType::Duration(TimeUnit::Nanosecond),
                    DataType::Timestamp(TimeUnit::Nanosecond, None),
                ])
            })
            .collect(),
        Volatility::Immutable,
    )
 });
 /// Calculate the return type given the function signature.
 pub(super) fn return_type(_: &[DataType]) -> Result<Arc<DataType>> {
    Ok(Arc::new(DataType::Float64))
 }
 /// Create a new partition_evaluator_factory.
 pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
    Ok(Box::new(DifferencePartitionEvaluator {}))
 }
 /// PartitionEvaluator which returns the derivative between input values,
 /// in the provided units.
 #[derive(Debug)]
 struct DifferencePartitionEvaluator {}
 impl PartitionEvaluator for DifferencePartitionEvaluator {
    fn evaluate_all(&mut self, values: &[ArrayRef], _num_rows: usize) -> Result<Arc<dyn Array>> {
        assert_eq!(values.len(), 3);
        let array = Arc::clone(&values[0]);
        let times = Arc::clone(&values[2]);
        // The second element of the values array is the second argument to
        // the 'derivative' function. This specifies the unit duration for the
        // derivation to use.
        //
        // INVARIANT:
        // The planner guarantees that the second argument is always a duration
        // literal.
        let unit = ScalarValue::try_from_array(&values[1], 0)?;
        let mut idx: usize = 0;
        let mut last: ScalarValue = array.data_type().try_into()?;
        let mut last_time: ScalarValue = times.data_type().try_into()?;
        let mut derivative: Vec<ScalarValue> = vec![];
        while idx < array.len() {
            last = ScalarValue::try_from_array(&array, idx)?;
            last_time = ScalarValue::try_from_array(&times, idx)?;
            derivative.push(ScalarValue::Float64(None));
            idx += 1;
            if !last.is_null() {
                break;
            }
        }
        while idx < array.len() {
            let v = ScalarValue::try_from_array(&array, idx)?;
            let t = ScalarValue::try_from_array(&times, idx)?;
            if v.is_null() {
                derivative.push(ScalarValue::Float64(None));
            } else {
                derivative.push(ScalarValue::Float64(Some(
                    delta(&v, &last)? / delta_time(&t, &last_time, &unit)?,
                )));
                last = v.clone();
                last_time = t.clone();
            }
            idx += 1;
        }
        Ok(Arc::new(ScalarValue::iter_to_array(derivative)?))
    }
    fn uses_window_frame(&self) -> bool {
        false
    }
    fn include_rank(&self) -> bool {
        false
    }
 }
 fn delta(curr: &ScalarValue, prev: &ScalarValue) -> Result<f64> {
    match (curr.borrow(), prev.borrow()) {
        (ScalarValue::Float64(Some(curr)), ScalarValue::Float64(Some(prev))) => Ok(*curr - *prev),
        (ScalarValue::Int64(Some(curr)), ScalarValue::Int64(Some(prev))) => {
            Ok(*curr as f64 - *prev as f64)
        }
        (ScalarValue::UInt64(Some(curr)), ScalarValue::UInt64(Some(prev))) => {
            Ok(*curr as f64 - *prev as f64)
        }
        _ => error::internal("derivative attempted on unsupported values"),
    }
 }
 fn delta_time(curr: &ScalarValue, prev: &ScalarValue, unit: &ScalarValue) -> Result<f64> {
    if let (
        ScalarValue::TimestampNanosecond(Some(curr), _),
        ScalarValue::TimestampNanosecond(Some(prev), _),
        ScalarValue::IntervalMonthDayNano(Some(unit)),
    ) = (curr, prev, unit)
    {
        Ok((*curr as f64 - *prev as f64) / *unit as f64)
    } else {
        error::internal("derivative attempted on unsupported values")
    }
 }
--- a/iox_query_influxql/src/window/difference.rs
+++ b/iox_query_influxql/src/window/difference.rs
@ -0,0 +1,79 @@
 use crate::NUMERICS;
 use arrow::array::{Array, ArrayRef};
 use arrow::compute::{shift, subtract_dyn};
 use arrow::datatypes::DataType;
 use datafusion::common::{Result, ScalarValue};
 use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
 use once_cell::sync::Lazy;
 use std::sync::Arc;
 /// The name of the difference window function.
 pub(super) const NAME: &str = "difference";
 /// Valid signatures for the difference window function.
 pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
    Signature::one_of(
        NUMERICS
            .iter()
            .map(|dt| TypeSignature::Exact(vec![dt.clone()]))
            .collect(),
        Volatility::Immutable,
    )
 });
 /// Calculate the return type given the function signature.
 pub(super) fn return_type(sig: &[DataType]) -> Result<Arc<DataType>> {
    Ok(Arc::new(sig[0].clone()))
 }
 /// Create a new partition_evaluator_factory.
 pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
    Ok(Box::new(DifferencePartitionEvaluator {}))
 }
 /// PartitionEvaluator which returns the difference between input values.
 #[derive(Debug)]
 struct DifferencePartitionEvaluator {}
 impl PartitionEvaluator for DifferencePartitionEvaluator {
    fn evaluate_all(&mut self, values: &[ArrayRef], _num_rows: usize) -> Result<Arc<dyn Array>> {
        assert_eq!(values.len(), 1);
        let array = Arc::clone(&values[0]);
        if array.null_count() == 0 {
            // If there are no gaps then use arrow kernels.
            Ok(subtract_dyn(&array, &shift(&array, 1)?)?)
        } else {
            let mut idx: usize = 0;
            let mut last: ScalarValue = array.data_type().try_into()?;
            let mut difference: Vec<ScalarValue> = vec![];
            while idx < array.len() {
                last = ScalarValue::try_from_array(&array, idx)?;
                difference.push(array.data_type().try_into()?);
                idx += 1;
                if !last.is_null() {
                    break;
                }
            }
            while idx < array.len() {
                let v = ScalarValue::try_from_array(&array, idx)?;
                if v.is_null() {
                    difference.push(array.data_type().try_into()?);
                } else {
                    difference.push(v.sub(last)?);
                    last = v;
                }
                idx += 1;
            }
            Ok(Arc::new(ScalarValue::iter_to_array(difference)?))
        }
    }
    fn uses_window_frame(&self) -> bool {
        false
    }
    fn include_rank(&self) -> bool {
        false
    }
 }
--- a/iox_query_influxql/src/window/non_negative.rs
+++ b/iox_query_influxql/src/window/non_negative.rs
@ -0,0 +1,74 @@
 use arrow::array::Array;
 use arrow::compute::{lt_dyn_scalar, nullif};
 use datafusion::common::{Result, ScalarValue};
 use datafusion::logical_expr::window_state::WindowAggState;
 use datafusion::logical_expr::PartitionEvaluator;
 use std::ops::Range;
 use std::sync::Arc;
 /// Wrap a PartitionEvaluator in a non-negative filter.
 pub(super) fn wrapper(
    partition_evaluator: Box<dyn PartitionEvaluator>,
 ) -> Box<dyn PartitionEvaluator> {
    Box::new(NonNegative {
        partition_evaluator,
    })
 }
 #[derive(Debug)]
 struct NonNegative {
    partition_evaluator: Box<dyn PartitionEvaluator>,
 }
 impl PartitionEvaluator for NonNegative {
    fn memoize(&mut self, state: &mut WindowAggState) -> Result<()> {
        self.partition_evaluator.memoize(state)
    }
    fn get_range(&self, idx: usize, n_rows: usize) -> Result<Range<usize>> {
        self.partition_evaluator.get_range(idx, n_rows)
    }
    fn evaluate_all(
        &mut self,
        values: &[Arc<dyn Array>],
        num_rows: usize,
    ) -> Result<Arc<dyn Array>> {
        let array = self.partition_evaluator.evaluate_all(values, num_rows)?;
        let predicate = lt_dyn_scalar(&array, 0)?;
        Ok(nullif(&array, &predicate)?)
    }
    fn evaluate(&mut self, values: &[Arc<dyn Array>], range: &Range<usize>) -> Result<ScalarValue> {
        let value = self.partition_evaluator.evaluate(values, range)?;
        Ok(match value {
            ScalarValue::Float64(Some(v)) if v < 0.0 => ScalarValue::Float64(None),
            ScalarValue::Int64(Some(v)) if v < 0 => ScalarValue::Int64(None),
            v => v,
        })
    }
    fn evaluate_all_with_rank(
        &self,
        num_rows: usize,
        ranks_in_partition: &[Range<usize>],
    ) -> Result<Arc<dyn Array>> {
        let array = self
            .partition_evaluator
            .evaluate_all_with_rank(num_rows, ranks_in_partition)?;
        let predicate = lt_dyn_scalar(&array, 0)?;
        Ok(nullif(&array, &predicate)?)
    }
    fn supports_bounded_execution(&self) -> bool {
        self.partition_evaluator.supports_bounded_execution()
    }
    fn uses_window_frame(&self) -> bool {
        self.partition_evaluator.uses_window_frame()
    }
    fn include_rank(&self) -> bool {
        self.partition_evaluator.include_rank()
    }
 }
--- a/iox_query_influxql/src/window/percent_row_number.rs
+++ b/iox_query_influxql/src/window/percent_row_number.rs
@ -6,10 +6,10 @@ use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Vol
 use once_cell::sync::Lazy;
 use std::sync::Arc;
-/// The name of the percent_row_number aggregate function.
+/// The name of the percent_row_number window function.
 pub(super) const NAME: &str = "percent_row_number";
-/// Valid signatures for the percent_row_number aggregate function.
+/// Valid signatures for the percent_row_number window function.
 pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
    Signature::one_of(
        vec![
--- a/predicate/Cargo.toml
+++ b/predicate/Cargo.toml
@ -16,7 +16,7 @@ observability_deps = { path = "../observability_deps" }
 query_functions = { path = "../query_functions"}
 schema = { path = "../schema" }
 snafu = "0.7"
-sqlparser = "0.35.0"
+sqlparser = "0.36.0"
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
 [dev-dependencies]
--- a/predicate/src/lib.rs
+++ b/predicate/src/lib.rs
@ -496,7 +496,6 @@ impl TreeNodeVisitor for RowBasedVisitor {
            | Expr::Column(_)
            | Expr::Exists { .. }
            | Expr::GetIndexedField { .. }
            | Expr::ILike { .. }
            | Expr::InList { .. }
            | Expr::InSubquery { .. }
            | Expr::IsFalse(_)
--- a/predicate/src/rpc_predicate/rewrite.rs
+++ b/predicate/src/rpc_predicate/rewrite.rs
@ -515,6 +515,7 @@ mod tests {
            expr,
            pattern,
            escape_char: None,
            case_insensitive: false,
        })
    }
--- a/router/src/server/http/write/single_tenant/mod.rs
+++ b/router/src/server/http/write/single_tenant/mod.rs
@ -64,9 +64,7 @@ impl From<&SingleTenantExtractError> for hyper::StatusCode {
            SingleTenantExtractError::NoBucketSpecified => Self::BAD_REQUEST,
            SingleTenantExtractError::InvalidNamespace(_) => Self::BAD_REQUEST,
            SingleTenantExtractError::ParseV1Request(
-                V1WriteParseError::NoQueryParams
+                V1WriteParseError::NoQueryParams | V1WriteParseError::DecodeFail(_),
                | V1WriteParseError::DecodeFail(_)
                | V1WriteParseError::ContainsRpSeparator,
            ) => Self::BAD_REQUEST,
            SingleTenantExtractError::ParseV2Request(
                V2WriteParseError::NoQueryParams | V2WriteParseError::DecodeFail(_),
@ -125,10 +123,6 @@ async fn parse_v1(
    // Extract the write parameters.
    let write_params = WriteParamsV1::try_from(req)?;
    // Extracting the write parameters validates the db field never contains the
    // '/' separator to avoid ambiguity with the "namespace/rp" construction.
    debug_assert!(!write_params.db.contains(V1_NAMESPACE_RP_SEPARATOR));
    // Extract or construct the namespace name string from the write parameters
    let namespace = NamespaceName::new(match write_params.rp {
        RetentionPolicy::Unspecified | RetentionPolicy::Autogen => write_params.db,
@ -316,22 +310,65 @@ mod tests {
        }
    );
-    // Prevent ambiguity by denying the `/` character in the DB
+    // Permit `/` character in the DB
    test_parse_v1!(
        no_rp_db_with_rp_separator,
        query_string = "?db=bananas/are/great",
-        want = Err(Error::SingleTenantError(
+        want = Ok(WriteParams{ namespace, precision }) => {
-            SingleTenantExtractError::ParseV1Request(V1WriteParseError::ContainsRpSeparator)
+            assert_eq!(namespace.as_str(), "bananas/are/great");
-        ))
+            assert_matches!(precision, Precision::Nanoseconds);
        }
    );
-    // Prevent ambiguity by denying the `/` character in the RP
+    // Permit the `/` character in the RP
    test_parse_v1!(
        rp_with_rp_separator,
        query_string = "?db=bananas&rp=are/great",
-        want = Err(Error::SingleTenantError(
+        want = Ok(WriteParams{ namespace, precision }) => {
-            SingleTenantExtractError::ParseV1Request(V1WriteParseError::ContainsRpSeparator)
+            assert_eq!(namespace.as_str(), "bananas/are/great");
-        ))
+            assert_matches!(precision, Precision::Nanoseconds);
        }
    );
    // `/` character is allowed in the DB, if a named RP is specified
    test_parse_v1!(
        db_with_rp_separator_and_rp,
        query_string = "?db=foo/bar&rp=my_rp",
        want = Ok(WriteParams{ namespace, precision }) => {
            assert_eq!(namespace.as_str(), "foo/bar/my_rp");
            assert_matches!(precision, Precision::Nanoseconds);
        }
    );
    // Always concat, even if this results in duplication rp within the namespace.
    // ** this matches the query API behavior **
    test_parse_v1!(
        db_with_rp_separator_and_duplicate_rp,
        query_string = "?db=foo/my_rp&rp=my_rp",
        want = Ok(WriteParams{ namespace, precision }) => {
            assert_eq!(namespace.as_str(), "foo/my_rp/my_rp");
            assert_matches!(precision, Precision::Nanoseconds);
        }
    );
    // `/` character is allowed in the DB, if an autogen RP is specified
    test_parse_v1!(
        db_with_rp_separator_and_rp_autogen,
        query_string = "?db=foo/bar&rp=autogen",
        want = Ok(WriteParams{ namespace, precision }) => {
            assert_eq!(namespace.as_str(), "foo/bar");
            assert_matches!(precision, Precision::Nanoseconds);
        }
    );
    // `/` character is allowed in the DB, if a default RP is specified
    test_parse_v1!(
        db_with_rp_separator_and_rp_default,
        query_string = "?db=foo/bar&rp=default",
        want = Ok(WriteParams{ namespace, precision }) => {
            assert_eq!(namespace.as_str(), "foo/bar");
            assert_matches!(precision, Precision::Nanoseconds);
        }
    );
    test_parse_v1!(
--- a/router/src/server/http/write/v1.rs
+++ b/router/src/server/http/write/v1.rs
@ -29,12 +29,6 @@ pub enum V1WriteParseError {
    /// The request contains invalid parameters.
    #[error("failed to deserialize db/rp/precision in request: {0}")]
    DecodeFail(#[from] serde::de::value::Error),
    /// The provided "db" or "rp" value contains the reserved `/` character.
    ///
    /// See [`V1_NAMESPACE_RP_SEPARATOR`].
    #[error("db cannot contain the reserved character '/'")]
    ContainsRpSeparator,
 }
 /// May be empty string, explicit rp name, or `autogen`. As provided at the
@ -61,7 +55,7 @@ impl<'de> Deserialize<'de> for RetentionPolicy {
        Ok(match s.as_str() {
            "" => RetentionPolicy::Unspecified,
            "''" => RetentionPolicy::Unspecified,
-            "autogen" => RetentionPolicy::Autogen,
+            "autogen" | "default" => RetentionPolicy::Autogen,
            _ => RetentionPolicy::Named(s),
        })
    }
@ -90,20 +84,6 @@ impl<T> TryFrom<&Request<T>> for WriteParamsV1 {
        let query = req.uri().query().ok_or(V1WriteParseError::NoQueryParams)?;
        let params: WriteParamsV1 = serde_urlencoded::from_str(query)?;
        // No namespace (db) is ever allowed to contain a `/` to prevent
        // ambiguity with the namespace/rp NamespaceName construction.
        if params.db.contains(V1_NAMESPACE_RP_SEPARATOR) {
            return Err(V1WriteParseError::ContainsRpSeparator);
        }
        // Likewise the "rp" field itself cannot contain the `/` character if
        // specified.
        if let RetentionPolicy::Named(s) = &params.rp {
            if s.contains(V1_NAMESPACE_RP_SEPARATOR) {
                return Err(V1WriteParseError::ContainsRpSeparator);
            }
        }
        Ok(params)
    }
 }
--- a/service_common/Cargo.toml
+++ b/service_common/Cargo.toml
@ -9,6 +9,7 @@ license.workspace = true
 async-trait = "0.1.71"
 bytes = "1.4"
 datafusion = { workspace = true }
 executor = { path = "../executor" }
 iox_query = { path = "../iox_query" }
 iox_query_influxql = { path = "../iox_query_influxql" }
 iox_query_influxrpc = { path = "../iox_query_influxrpc" }
--- a/service_common/src/error.rs
+++ b/service_common/src/error.rs
@ -53,16 +53,25 @@ pub fn datafusion_error_to_tonic_code(e: &DataFusionError) -> tonic::Code {
        | DataFusionError::NotImplemented(_)
        | DataFusionError::Plan(_) => tonic::Code::InvalidArgument,
        DataFusionError::Context(_,_) => unreachable!("handled in chain traversal above"),
        // External errors are mostly traversed by the DataFusion already except for some IOx errors
        DataFusionError::External(e) => {
            if let Some(e) = e.downcast_ref::<executor::JobError>() {
                match e {
                    executor::JobError::WorkerGone => tonic::Code::Unavailable,
                    executor::JobError::Panic { .. } => tonic::Code::Internal,
                }
            } else {
                // All other, unclassified cases are signalled as "internal error" to the user since they cannot do
                // anything about it (except for reporting a bug). Note that DataFusion "external" error is only from
                // DataFusion's PoV, not from a users PoV.
                tonic::Code::Internal
            }
        }
        // Map as many as possible back into user visible
        // (non internal) errors and only treat the ones
        // the user likely can't do anything about as internal
        DataFusionError::ObjectStore(_)
        | DataFusionError::IoError(_)
        // External originate from outside DataFusion’s core codebase.
        // As of 2022-10-17, these always come external object store
        // errors (e.g. misconfiguration or bad path) which would be
        // an internal error and thus we classify them as such.
        | DataFusionError::External(_)
        // Substrait errors come from internal code and are unused
        // with DataFusion at the moment
        | DataFusionError::Substrait(_)
@ -100,7 +109,7 @@ mod test {
            tonic::Code::InvalidArgument,
        );
-        do_transl_test(DataFusionError::Internal(s), tonic::Code::Internal);
+        do_transl_test(DataFusionError::Internal(s.clone()), tonic::Code::Internal);
        // traversal
        do_transl_test(
@ -110,6 +119,29 @@ mod test {
            ),
            tonic::Code::ResourceExhausted,
        );
        // inspect "external" errors
        do_transl_test(
            DataFusionError::External(s.clone().into()),
            tonic::Code::Internal,
        );
        do_transl_test(
            DataFusionError::External(Box::new(executor::JobError::Panic { msg: s })),
            tonic::Code::Internal,
        );
        do_transl_test(
            DataFusionError::External(Box::new(executor::JobError::WorkerGone)),
            tonic::Code::Unavailable,
        );
        do_transl_test(
            DataFusionError::Context(
                "ctx".into(),
                Box::new(DataFusionError::External(Box::new(
                    executor::JobError::WorkerGone,
                ))),
            ),
            tonic::Code::Unavailable,
        );
    }
    fn do_transl_test(e: DataFusionError, code: tonic::Code) {
--- a/sqlx-hotswap-pool/Cargo.toml
+++ b/sqlx-hotswap-pool/Cargo.toml
@ -10,7 +10,7 @@ license.workspace = true
 publish = false
 [dependencies]
-sqlx = { version = "0.6.3", features = ["runtime-tokio-rustls", "postgres", "json", "tls"] }
+sqlx = { version = "0.7.1", features = ["runtime-tokio-rustls", "postgres", "json", "tls-rustls"] }
 either = "1.8.1"
 futures = "0.3"
 workspace-hack = { version = "0.1", path = "../workspace-hack" }
--- a/test_helpers/Cargo.toml
+++ b/test_helpers/Cargo.toml
@ -8,7 +8,7 @@ license.workspace = true
 [dependencies] # In alphabetical order
 dotenvy = "0.15.7"
 parking_lot = "0.12"
-tempfile = "3.6.0"
+tempfile = "3.7.0"
 tracing-log = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 observability_deps = { path = "../observability_deps" }
--- a/test_helpers_end_to_end/Cargo.toml
+++ b/test_helpers_end_to_end/Cargo.toml
@ -31,8 +31,8 @@ rand = "0.8.3"
 regex = "1.9"
 reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
 snafu = "0.7"
-sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
+sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
-tempfile = "3.6.0"
+tempfile = "3.7.0"
 test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
 tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
 tokio-util = "0.7"
--- a/tracker/Cargo.toml
+++ b/tracker/Cargo.toml
@ -22,6 +22,6 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
 sysinfo = "0.29.5"
 [dev-dependencies]
-tempfile = "3.6.0"
+tempfile = "3.7.0"
 # Need the multi-threaded executor for testing
 tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] }
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@ -16,25 +16,23 @@ license.workspace = true
 ### BEGIN HAKARI SECTION
 [dependencies]
-ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
+ahash = { version = "0.8" }
 arrow = { version = "43", features = ["dyn_cmp_dict", "prettyprint"] }
 arrow-array = { version = "43", default-features = false, features = ["chrono-tz"] }
 arrow-flight = { version = "43", features = ["flight-sql-experimental"] }
 arrow-ord = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
 arrow-string = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
-base64-594e8ee84c453af0 = { package = "base64", version = "0.13" }
+base64 = { version = "0.21" }
 base64-647d43efb71741da = { package = "base64", version = "0.21" }
 bitflags = { version = "1" }
 byteorder = { version = "1" }
 bytes = { version = "1" }
 chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
 crossbeam-utils = { version = "0.8" }
 crypto-common = { version = "0.1", default-features = false, features = ["std"] }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b" }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
 digest = { version = "0.10", features = ["mac", "std"] }
-either = { version = "1" }
+either = { version = "1", features = ["serde"] }
 fixedbitset = { version = "0.4" }
 flatbuffers = { version = "23" }
 flate2 = { version = "1" }
@ -47,8 +45,7 @@ futures-task = { version = "0.3", default-features = false, features = ["std"] }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
 hashbrown = { version = "0.14", features = ["raw"] }
-indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
+indexmap = { version = "2" }
 indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2" }
 itertools = { version = "0.10" }
 libc = { version = "0.2", features = ["extra_traits"] }
 lock_api = { version = "0.4", features = ["arc_lock"] }
@ -74,14 +71,16 @@ regex-automata = { version = "0.3", default-features = false, features = ["dfa-o
 regex-syntax = { version = "0.7" }
 reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls", "stream"] }
 ring = { version = "0.16", features = ["std"] }
 rustls = { version = "0.21", default-features = false, features = ["dangerous_configuration", "logging", "tls12"] }
 serde = { version = "1", features = ["derive", "rc"] }
 serde_json = { version = "1", features = ["raw_value"] }
 sha2 = { version = "0.10" }
 similar = { version = "2", features = ["inline"] }
 smallvec = { version = "1", default-features = false, features = ["union"] }
-sqlparser = { version = "0.35", features = ["visitor"] }
+sqlx = { version = "0.7", features = ["postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
-sqlx = { version = "0.6", features = ["json", "postgres", "runtime-tokio-rustls", "sqlite", "tls", "uuid"] }
+sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "any", "json", "migrate", "offline", "uuid"] }
-sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
+sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
 sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
 thrift = { version = "0.17" }
 tokio = { version = "1", features = ["full", "test-util", "tracing"] }
 tokio-stream = { version = "0.1", features = ["fs", "net"] }
@ -101,17 +100,15 @@ zstd-safe = { version = "6", default-features = false, features = ["arrays", "le
 zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] }
 [build-dependencies]
-ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
+ahash = { version = "0.8" }
-base64-594e8ee84c453af0 = { package = "base64", version = "0.13" }
+base64 = { version = "0.21" }
 base64-647d43efb71741da = { package = "base64", version = "0.21" }
 bitflags = { version = "1" }
 byteorder = { version = "1" }
 bytes = { version = "1" }
 cc = { version = "1", default-features = false, features = ["parallel"] }
 crossbeam-utils = { version = "0.8" }
 crypto-common = { version = "0.1", default-features = false, features = ["std"] }
 digest = { version = "0.10", features = ["mac", "std"] }
-either = { version = "1" }
+either = { version = "1", features = ["serde"] }
 fixedbitset = { version = "0.4" }
 futures-channel = { version = "0.3", features = ["sink"] }
 futures-core = { version = "0.3" }
@ -123,7 +120,7 @@ futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
 hashbrown = { version = "0.14", features = ["raw"] }
 heck = { version = "0.4", features = ["unicode"] }
-indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
+indexmap = { version = "2" }
 itertools = { version = "0.10" }
 libc = { version = "0.2", features = ["extra_traits"] }
 lock_api = { version = "0.4", features = ["arc_lock"] }
@ -144,67 +141,78 @@ regex = { version = "1" }
 regex-automata = { version = "0.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
 regex-syntax = { version = "0.7" }
 ring = { version = "0.16", features = ["std"] }
 rustls = { version = "0.21", default-features = false, features = ["dangerous_configuration", "logging", "tls12"] }
 serde = { version = "1", features = ["derive", "rc"] }
 serde_json = { version = "1", features = ["raw_value"] }
 sha2 = { version = "0.10" }
 smallvec = { version = "1", default-features = false, features = ["union"] }
-sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
+sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "any", "json", "migrate", "offline", "uuid"] }
-sqlx-macros = { version = "0.6", default-features = false, features = ["json", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
+sqlx-macros = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "json", "migrate", "postgres", "sqlite", "uuid"] }
 sqlx-macros-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "json", "migrate", "postgres", "sqlite", "uuid"] }
 sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
 sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
 syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] }
 syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }
 tokio = { version = "1", features = ["full", "test-util", "tracing"] }
 tokio-stream = { version = "0.1", features = ["fs", "net"] }
 tracing = { version = "0.1", features = ["log", "max_level_trace", "release_max_level_trace"] }
 tracing-core = { version = "0.1" }
 unicode-bidi = { version = "0.3" }
 unicode-normalization = { version = "0.1" }
 url = { version = "2" }
 uuid = { version = "1", features = ["v4"] }
 [target.x86_64-unknown-linux-gnu.dependencies]
 bitflags = { version = "2", default-features = false, features = ["std"] }
 nix = { version = "0.26" }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
 rustix = { version = "0.38", features = ["fs", "termios"] }
-rustls = { version = "0.21", features = ["dangerous_configuration"] }
+rustls = { version = "0.21" }
 webpki = { version = "0.22", default-features = false, features = ["std"] }
 [target.x86_64-unknown-linux-gnu.build-dependencies]
 bitflags = { version = "2", default-features = false, features = ["std"] }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
-webpki = { version = "0.22", default-features = false, features = ["std"] }
+rustix = { version = "0.38", features = ["fs", "termios"] }
 rustls = { version = "0.21" }
 [target.x86_64-apple-darwin.dependencies]
 bitflags = { version = "2", default-features = false, features = ["std"] }
 nix = { version = "0.26" }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
 rustix = { version = "0.38", features = ["fs", "termios"] }
-rustls = { version = "0.21", features = ["dangerous_configuration"] }
+rustls = { version = "0.21" }
 webpki = { version = "0.22", default-features = false, features = ["std"] }
 [target.x86_64-apple-darwin.build-dependencies]
 bitflags = { version = "2", default-features = false, features = ["std"] }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
-webpki = { version = "0.22", default-features = false, features = ["std"] }
+rustix = { version = "0.38", features = ["fs", "termios"] }
 rustls = { version = "0.21" }
 [target.aarch64-apple-darwin.dependencies]
 bitflags = { version = "2", default-features = false, features = ["std"] }
 nix = { version = "0.26" }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
 rustix = { version = "0.38", features = ["fs", "termios"] }
-rustls = { version = "0.21", features = ["dangerous_configuration"] }
+rustls = { version = "0.21" }
 webpki = { version = "0.22", default-features = false, features = ["std"] }
 [target.aarch64-apple-darwin.build-dependencies]
 bitflags = { version = "2", default-features = false, features = ["std"] }
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
-webpki = { version = "0.22", default-features = false, features = ["std"] }
+rustix = { version = "0.38", features = ["fs", "termios"] }
 rustls = { version = "0.21" }
 [target.x86_64-pc-windows-msvc.dependencies]
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
-rustls = { version = "0.21", features = ["dangerous_configuration"] }
+rustls = { version = "0.21" }
 scopeguard = { version = "1" }
-webpki = { version = "0.22", default-features = false, features = ["std"] }
+winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
 winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "knownfolders", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "ntstatus", "objbase", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
 windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
 [target.x86_64-pc-windows-msvc.build-dependencies]
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
 rustls = { version = "0.21" }
 scopeguard = { version = "1" }
-webpki = { version = "0.22", default-features = false, features = ["std"] }
+winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
 winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "knownfolders", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "ntstatus", "objbase", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
 windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
 ### END HAKARI SECTION