diff --git a/Cargo.lock b/Cargo.lock
index 2394a12240..33b41a73c8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1822,6 +1822,18 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "flume"
+version = "0.10.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+ "pin-project",
+ "spin 0.9.4",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -2820,9 +2832,11 @@ dependencies = [
  "mutable_batch",
  "mutable_batch_lp",
  "observability_deps",
+ "parking_lot 0.12.1",
  "paste",
  "pretty_assertions",
  "rand",
+ "serde",
  "snafu",
  "sqlx",
  "sqlx-hotswap-pool",
@@ -3316,6 +3330,17 @@ version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
 
+[[package]]
+name = "libsqlite3-sys"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14"
+dependencies = [
+ "cc",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "link-cplusplus"
 version = "1.0.8"
@@ -5442,8 +5467,10 @@ dependencies = [
  "dotenvy",
  "either",
  "event-listener",
+ "flume",
  "futures-channel",
  "futures-core",
+ "futures-executor",
  "futures-intrusive",
  "futures-util",
  "hashlink",
@@ -5453,6 +5480,7 @@ dependencies = [
  "indexmap",
  "itoa 1.0.5",
  "libc",
+ "libsqlite3-sys",
  "log",
  "md-5",
  "memchr",
@@ -6372,6 +6400,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
 
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
 [[package]]
 name = "version_check"
 version = "0.9.4"
@@ -6724,6 +6758,7 @@ dependencies = [
  "flate2",
  "futures-channel",
  "futures-core",
+ "futures-executor",
  "futures-io",
  "futures-sink",
  "futures-task",
diff --git a/clap_blocks/src/catalog_dsn.rs b/clap_blocks/src/catalog_dsn.rs
index c23882004e..40ef51c23d 100644
--- a/clap_blocks/src/catalog_dsn.rs
+++ b/clap_blocks/src/catalog_dsn.rs
@@ -1,4 +1,5 @@
 //! Catalog-DSN-related configs.
+use iox_catalog::sqlite::{SqliteCatalog, SqliteConnectionOptions};
 use iox_catalog::{
     create_or_get_default_records,
     interface::Catalog,
@@ -15,6 +16,9 @@ pub enum Error {
     #[snafu(display("A Postgres connection string in --catalog-dsn is required."))]
     ConnectionStringRequired,
 
+    #[snafu(display("A SQLite connection string in --catalog-dsn is required."))]
+    ConnectionStringSqliteRequired,
+
     #[snafu(display("A catalog error occurred: {}", source))]
     Catalog {
         source: iox_catalog::interface::Error,
@@ -44,7 +48,7 @@ fn default_hotswap_poll_interval_timeout() -> &'static str {
 }
 
 /// CLI config for catalog DSN.
-#[derive(Debug, Clone, clap::Parser)]
+#[derive(Debug, Clone, Default, clap::Parser)]
 pub struct CatalogDsnConfig {
     /// The type of catalog to use. "memory" is only useful for testing purposes.
     #[clap(
@@ -110,13 +114,17 @@ pub struct CatalogDsnConfig {
 }
 
 /// Catalog type.
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)]
+#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)]
 pub enum CatalogType {
     /// PostgreSQL.
+    #[default]
     Postgres,
 
     /// In-memory.
     Memory,
+
+    /// SQLite.
+    Sqlite,
 }
 
 impl CatalogDsnConfig {
@@ -127,12 +135,7 @@ impl CatalogDsnConfig {
 
         Self {
             catalog_type_: CatalogType::Memory,
-            dsn: None,
-            max_catalog_connections: PostgresConnectionOptions::DEFAULT_MAX_CONNS,
-            postgres_schema_name: PostgresConnectionOptions::DEFAULT_SCHEMA_NAME.to_string(),
-            connect_timeout: PostgresConnectionOptions::DEFAULT_CONNECT_TIMEOUT,
-            idle_timeout: PostgresConnectionOptions::DEFAULT_IDLE_TIMEOUT,
-            hotswap_poll_interval: PostgresConnectionOptions::DEFAULT_HOTSWAP_POLL_INTERVAL,
+            ..Self::default()
         }
     }
 
@@ -151,6 +154,17 @@ impl CatalogDsnConfig {
         }
     }
 
+    /// Create a new Postgres instance for all-in-one mode if a catalog DSN is specified
+    pub fn new_sqlite(dsn: String) -> Self {
+        info!("Catalog: SQLite at `{}`", dsn);
+
+        Self {
+            catalog_type_: CatalogType::Sqlite,
+            dsn: Some(dsn),
+            ..Self::default()
+        }
+    }
+
     /// Get config-dependent catalog.
     pub async fn get_catalog(
         &self,
@@ -189,6 +203,20 @@ impl CatalogDsnConfig {
 
                 Arc::new(mem) as Arc<dyn Catalog>
             }
+            CatalogType::Sqlite => {
+                let options = SqliteConnectionOptions {
+                    dsn: self
+                        .dsn
+                        .as_ref()
+                        .context(ConnectionStringSqliteRequiredSnafu)?
+                        .clone(),
+                };
+                Arc::new(
+                    SqliteCatalog::connect(options, metrics)
+                        .await
+                        .context(CatalogSnafu)?,
+                ) as Arc<dyn Catalog>
+            }
         };
 
         Ok(catalog)
diff --git a/compactor2/src/compactor_tests.rs b/compactor2/src/compactor_tests.rs
index 6afa42729d..45aa2eb16a 100644
--- a/compactor2/src/compactor_tests.rs
+++ b/compactor2/src/compactor_tests.rs
@@ -3,7 +3,7 @@ mod tests {
     use std::{num::NonZeroUsize, sync::Arc, time::Duration};
 
     use arrow_util::assert_batches_sorted_eq;
-    use data_types::CompactionLevel;
+    use data_types::{CompactionLevel, ParquetFile};
     use iox_query::exec::ExecutorType;
     use tracker::AsyncSemaphoreMetrics;
 
@@ -46,16 +46,10 @@ mod tests {
         setup.set_compact_version(AlgoVersion::AllAtOnce);
 
         // verify 6 files
-        let files = setup.list_by_table_not_to_delete().await;
-        assert_eq!(files.len(), 6);
-        //
         // verify ID and compaction level of the files
-        let files_and_levels: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.compaction_level))
-            .collect();
-        assert_eq!(
-            files_and_levels,
+        let files = setup.list_by_table_not_to_delete().await;
+        assert_levels(
+            &files,
             vec![
                 (1, CompactionLevel::FileNonOverlapped),
                 (2, CompactionLevel::Initial),
@@ -63,30 +57,21 @@ mod tests {
                 (4, CompactionLevel::FileNonOverlapped),
                 (5, CompactionLevel::Initial),
                 (6, CompactionLevel::Initial),
-            ]
+            ],
         );
+
         // verify ID and max_l0_created_at
-        let time_provider = Arc::clone(&setup.config.time_provider);
-
-        let time_1_minute_future = time_provider.minutes_into_future(1).timestamp_nanos();
-        let time_2_minutes_future = time_provider.minutes_into_future(2).timestamp_nanos();
-        let time_3_minutes_future = time_provider.minutes_into_future(3).timestamp_nanos();
-        let time_5_minutes_future = time_provider.minutes_into_future(5).timestamp_nanos();
-
-        let files_and_max_l0_created_ats: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.max_l0_created_at.get()))
-            .collect();
-        assert_eq!(
-            files_and_max_l0_created_ats,
+        let times = setup.test_times();
+        assert_max_l0_created_at(
+            &files,
             vec![
-                (1, time_1_minute_future),
-                (2, time_2_minutes_future),
-                (3, time_5_minutes_future),
-                (4, time_3_minutes_future),
-                (5, time_5_minutes_future),
-                (6, time_2_minutes_future),
-            ]
+                (1, times.time_1_minute_future),
+                (2, times.time_2_minutes_future),
+                (3, times.time_5_minutes_future),
+                (4, times.time_3_minutes_future),
+                (5, times.time_5_minutes_future),
+                (6, times.time_2_minutes_future),
+            ],
         );
 
         // compact
@@ -94,30 +79,21 @@ mod tests {
 
         // verify number of files: 6 files are compacted into 2 files
         let files = setup.list_by_table_not_to_delete().await;
-        assert_eq!(files.len(), 2);
-        //
-        // verify ID and compaction level of the files
-        let files_and_levels: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.compaction_level))
-            .collect();
-        println!("{files_and_levels:?}");
-        assert_eq!(
-            files_and_levels,
+        assert_levels(
+            &files,
             vec![
                 (7, CompactionLevel::FileNonOverlapped),
                 (8, CompactionLevel::FileNonOverlapped),
-            ]
+            ],
         );
-        // verify ID and max_l0_created_at
-        let files_and_max_l0_created_ats: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.max_l0_created_at.get()))
-            .collect();
-        // both files have max_l0_created time_5_minutes_future which is the max of all L0 input's max_l0_created_at
-        assert_eq!(
-            files_and_max_l0_created_ats,
-            vec![(7, time_5_minutes_future), (8, time_5_minutes_future),]
+        assert_max_l0_created_at(
+            &files,
+            // both files have max_l0_created time_5_minutes_future
+            // which is the max of all L0 input's max_l0_created_at
+            vec![
+                (7, times.time_5_minutes_future),
+                (8, times.time_5_minutes_future),
+            ],
         );
 
         // verify the content of files
@@ -170,15 +146,8 @@ mod tests {
 
         // verify 6 files
         let files = setup.list_by_table_not_to_delete().await;
-        assert_eq!(files.len(), 6);
-        //
-        // verify ID and compaction level of the files
-        let files_and_levels: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.compaction_level))
-            .collect();
-        assert_eq!(
-            files_and_levels,
+        assert_levels(
+            &files,
             vec![
                 (1, CompactionLevel::FileNonOverlapped),
                 (2, CompactionLevel::Initial),
@@ -186,30 +155,21 @@ mod tests {
                 (4, CompactionLevel::FileNonOverlapped),
                 (5, CompactionLevel::Initial),
                 (6, CompactionLevel::Initial),
-            ]
+            ],
         );
+
         // verify ID and max_l0_created_at
-        let time_provider = Arc::clone(&setup.config.time_provider);
-
-        let time_1_minute_future = time_provider.minutes_into_future(1).timestamp_nanos();
-        let time_2_minutes_future = time_provider.minutes_into_future(2).timestamp_nanos();
-        let time_3_minutes_future = time_provider.minutes_into_future(3).timestamp_nanos();
-        let time_5_minutes_future = time_provider.minutes_into_future(5).timestamp_nanos();
-
-        let files_and_max_l0_created_ats: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.max_l0_created_at.get()))
-            .collect();
-        assert_eq!(
-            files_and_max_l0_created_ats,
+        let times = setup.test_times();
+        assert_max_l0_created_at(
+            &files,
             vec![
-                (1, time_1_minute_future),
-                (2, time_2_minutes_future),
-                (3, time_5_minutes_future),
-                (4, time_3_minutes_future),
-                (5, time_5_minutes_future),
-                (6, time_2_minutes_future),
-            ]
+                (1, times.time_1_minute_future),
+                (2, times.time_2_minutes_future),
+                (3, times.time_5_minutes_future),
+                (4, times.time_3_minutes_future),
+                (5, times.time_5_minutes_future),
+                (6, times.time_2_minutes_future),
+            ],
         );
 
         // compact
@@ -218,29 +178,23 @@ mod tests {
         // verify number of files: 6 files are compacted into 2 files
         let files = setup.list_by_table_not_to_delete().await;
         assert_eq!(files.len(), 2);
-        //
-        // verify ID and compaction level of the files
-        let files_and_levels: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.compaction_level))
-            .collect();
-        println!("{files_and_levels:?}");
-        // This is the result of 2-round compaction fomr L0s -> L1s and then L1s -> L2s
-        // The first round will create two L1 files IDs 7 and 8
-        // The second round will create tow L2 file IDs 9 and 10
-        assert_eq!(
-            files_and_levels,
-            vec![(9, CompactionLevel::Final), (10, CompactionLevel::Final),]
+
+        assert_levels(
+            &files,
+            // This is the result of 2-round compaction fomr L0s -> L1s and then L1s -> L2s
+            // The first round will create two L1 files IDs 7 and 8
+            // The second round will create tow L2 file IDs 9 and 10
+            vec![(9, CompactionLevel::Final), (10, CompactionLevel::Final)],
         );
-        // verify ID and max_l0_created_at
-        let files_and_max_l0_created_ats: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.max_l0_created_at.get()))
-            .collect();
-        // both files have max_l0_created time_5_minutes_future which is the max of all L0 input's max_l0_created_at
-        assert_eq!(
-            files_and_max_l0_created_ats,
-            vec![(9, time_5_minutes_future), (10, time_5_minutes_future),]
+
+        assert_max_l0_created_at(
+            &files,
+            // both files have max_l0_created time_5_minutes_future
+            // which is the max of all L0 input's max_l0_created_at
+            vec![
+                (9, times.time_5_minutes_future),
+                (10, times.time_5_minutes_future),
+            ],
         );
 
         // verify the content of files
@@ -289,26 +243,18 @@ mod tests {
         // Create a test setup with 6 files
         let setup = TestSetup::builder().with_files().build().await;
 
+        let expected_files_and_levels = vec![
+            (1, CompactionLevel::FileNonOverlapped),
+            (2, CompactionLevel::Initial),
+            (3, CompactionLevel::Initial),
+            (4, CompactionLevel::FileNonOverlapped),
+            (5, CompactionLevel::Initial),
+            (6, CompactionLevel::Initial),
+        ];
+
         // verify 6 files
         let files = setup.list_by_table_not_to_delete().await;
-        assert_eq!(files.len(), 6);
-        //
-        // verify ID and compaction level of the files
-        let files_and_levels: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.compaction_level))
-            .collect();
-        assert_eq!(
-            files_and_levels,
-            vec![
-                (1, CompactionLevel::FileNonOverlapped),
-                (2, CompactionLevel::Initial),
-                (3, CompactionLevel::Initial),
-                (4, CompactionLevel::FileNonOverlapped),
-                (5, CompactionLevel::Initial),
-                (6, CompactionLevel::Initial),
-            ]
-        );
+        assert_levels(&files, expected_files_and_levels.clone());
 
         // add the partition into skipped compaction
         setup
@@ -321,24 +267,7 @@ mod tests {
 
         // verify still 6 files
         let files = setup.list_by_table_not_to_delete().await;
-        assert_eq!(files.len(), 6);
-        //
-        // verify ID and compaction level of the files
-        let files_and_levels: Vec<_> = files
-            .iter()
-            .map(|f| (f.id.get(), f.compaction_level))
-            .collect();
-        assert_eq!(
-            files_and_levels,
-            vec![
-                (1, CompactionLevel::FileNonOverlapped),
-                (2, CompactionLevel::Initial),
-                (3, CompactionLevel::Initial),
-                (4, CompactionLevel::FileNonOverlapped),
-                (5, CompactionLevel::Initial),
-                (6, CompactionLevel::Initial),
-            ]
-        );
+        assert_levels(&files, expected_files_and_levels.clone());
     }
 
     #[tokio::test]
@@ -478,4 +407,39 @@ mod tests {
         )
         .await;
     }
+
+    #[track_caller]
+    fn assert_levels<'a>(
+        files: impl IntoIterator<Item = &'a ParquetFile>,
+        expected_files_and_levels: impl IntoIterator<Item = (i64, CompactionLevel)>,
+    ) {
+        let files_and_levels: Vec<_> = files
+            .into_iter()
+            .map(|f| (f.id.get(), f.compaction_level))
+            .collect();
+
+        let expected_files_and_levels: Vec<_> = expected_files_and_levels.into_iter().collect();
+
+        assert_eq!(files_and_levels, expected_files_and_levels);
+    }
+
+    #[track_caller]
+    /// Asserts each parquet file has (id, max_l0_created_at)
+    fn assert_max_l0_created_at<'a>(
+        files: impl IntoIterator<Item = &'a ParquetFile>,
+        expected_files_and_max_l0_created_ats: impl IntoIterator<Item = (i64, i64)>,
+    ) {
+        let files_and_max_l0_created_ats: Vec<_> = files
+            .into_iter()
+            .map(|f| (f.id.get(), f.max_l0_created_at.get()))
+            .collect();
+
+        let expected_files_and_max_l0_created_ats: Vec<_> =
+            expected_files_and_max_l0_created_ats.into_iter().collect();
+
+        assert_eq!(
+            files_and_max_l0_created_ats,
+            expected_files_and_max_l0_created_ats
+        );
+    }
 }
diff --git a/compactor2/src/components/divide_initial/mod.rs b/compactor2/src/components/divide_initial/mod.rs
index c0c0be7018..237c000dab 100644
--- a/compactor2/src/components/divide_initial/mod.rs
+++ b/compactor2/src/components/divide_initial/mod.rs
@@ -5,5 +5,10 @@ use data_types::ParquetFile;
 pub mod single_branch;
 
 pub trait DivideInitial: Debug + Display + Send + Sync {
+    /// Divides a group of files that should be compacted into
+    /// potentially smaller groups called "branches",
+    ///
+    /// Each branch is compacted together in a single plan, and each
+    /// compact plan may produce one or more parquet files.
     fn divide(&self, files: Vec<ParquetFile>) -> Vec<Vec<ParquetFile>>;
 }
diff --git a/compactor2/src/components/files_split/mod.rs b/compactor2/src/components/files_split/mod.rs
index 964ae6e436..bb75fa61cb 100644
--- a/compactor2/src/components/files_split/mod.rs
+++ b/compactor2/src/components/files_split/mod.rs
@@ -10,7 +10,12 @@ pub mod target_level_target_level_split;
 pub mod target_level_upgrade_split;
 
 pub trait FilesSplit: Debug + Display + Send + Sync {
-    /// Split provided files into 2 groups of files. There will be different split needs:
+    /// Split provided files into 2 groups of files:
+    /// (files_to_compact, files_to_keep)
+    ///
+    /// Only files in files_to_compact are considered for compaction this round
+    ///
+    /// There will be different split needs:
     ///  . `[files <= target_level]` and `[files > target_level]`
     ///  . `[overlapping_files]` and `[non_overlapping_files]`
     ///  . `[files_to_upgrade]` and `[files_to_compact]`
diff --git a/compactor2/src/components/partition_filter/mod.rs b/compactor2/src/components/partition_filter/mod.rs
index e7b02523bc..e9afa9a25d 100644
--- a/compactor2/src/components/partition_filter/mod.rs
+++ b/compactor2/src/components/partition_filter/mod.rs
@@ -20,11 +20,16 @@ pub mod or;
 ///
 /// May return an error. In this case, the partition will be marked as "skipped".
 ///
-/// If you only plan to inspect the ID but not the files and not perform any IO, check
-/// [`IdOnlyPartitionFilter`](crate::components::id_only_partition_filter::IdOnlyPartitionFilter) which usually runs
-/// earlier in the pipeline and hence is more efficient.
+/// If you only plan to inspect the ID but not the files and not
+/// perform any IO, check
+/// [`IdOnlyPartitionFilter`](crate::components::id_only_partition_filter::IdOnlyPartitionFilter)
+/// which usually runs earlier in the pipeline and hence is more
+/// efficient.
 #[async_trait]
 pub trait PartitionFilter: Debug + Display + Send + Sync {
+    /// Return `true` if the if the compactor should run a
+    /// compaction on this partition. Return `false` if this partition
+    /// does not need any more compaction.
     async fn apply(
         &self,
         partition_id: PartitionId,
diff --git a/compactor2/src/driver.rs b/compactor2/src/driver.rs
index cd07ba8554..7f7253802c 100644
--- a/compactor2/src/driver.rs
+++ b/compactor2/src/driver.rs
@@ -4,6 +4,7 @@ use data_types::{CompactionLevel, ParquetFile, ParquetFileParams, PartitionId};
 use datafusion::physical_plan::SendableRecordBatchStream;
 use futures::{stream::FuturesOrdered, StreamExt, TryFutureExt, TryStreamExt};
 use iox_time::Time;
+use observability_deps::tracing::info;
 use parquet_file::ParquetFilePath;
 use tracker::InstrumentedAsyncSemaphore;
 
@@ -100,8 +101,8 @@ async fn compact_partition(
 /// The files are split into non-time-overlaped branches, each is compacted in parallel.
 /// The output of each branch is then combined and re-branch in next round until
 /// they should not be compacted based on defined stop conditions.
-//
-// Example: Partition has 7 files: f1, f2, f3, f4, f5, f6, f7
+///
+/// Example: Partition has 7 files: f1, f2, f3, f4, f5, f6, f7
 ///  Input: shown by their time range
 ///          |--f1--|               |----f3----|  |-f4-||-f5-||-f7-|
 ///               |------f2----------|                   |--f6--|
@@ -193,6 +194,7 @@ async fn try_compact_partition(
     // fetch partition info only if we need it
     let mut lazy_partition_info = None;
 
+    // loop for each "Round", consider each file in the partition
     loop {
         files = components.files_filter.apply(files);
 
@@ -218,15 +220,18 @@ async fn try_compact_partition(
         let mut branches = components.divide_initial.divide(files_now);
 
         let mut files_next = files_later;
+        // loop for each "Branch"
         while let Some(branch) = branches.pop() {
-            let input_paths: Vec<ParquetFilePath> = branch.iter().map(|f| f.into()).collect();
+            let input_paths: Vec<ParquetFilePath> =
+                branch.iter().map(ParquetFilePath::from).collect();
 
-            // Identify the target level and files that should be compacted, upgraded, and
-            // kept for next round of compaction
-            let compaction_plan = buil_compaction_plan(branch, Arc::clone(&components))?;
+            // Identify the target level and files that should be
+            // compacted together, upgraded, and kept for next round of
+            // compaction
+            let compaction_plan = build_compaction_plan(branch, Arc::clone(&components))?;
 
             // Compact
-            let created_file_params = compact_files(
+            let created_file_params = run_compaction_plan(
                 &compaction_plan.files_to_compact,
                 partition_info,
                 &components,
@@ -265,24 +270,30 @@ async fn try_compact_partition(
     }
 }
 
-/// Each CompactionPlan specifies the target level and files that should be compacted, upgraded, and
-/// kept for next round of compaction
+/// A CompactionPlan specifies the parameters for a single, which may
+/// generate one or more new parquet files. It includes the target
+/// [`CompactionLevel`], the specific files that should be compacted
+/// together to form new file(s), files that should be upgraded
+/// without chainging, files that should be left unmodified.
 struct CompactionPlan {
-    /// Target level to compact to
+    /// The target level of file resulting from compaction
     target_level: CompactionLevel,
-    /// Small and/or overlapped files to compact
+    /// Files which should be compacted into a new single parquet
+    /// file, often the small and/or overlapped files
     files_to_compact: Vec<ParquetFile>,
-    /// Non-overlapped and large enough files to upgrade
+    /// Non-overlapped files that should be upgraded to the target
+    /// level without rewriting (for example they are of sufficient
+    /// size)
     files_to_upgrade: Vec<ParquetFile>,
-    /// Non-overlapped or higher-target-level files to keep for next round of compaction
+    /// files which should not be modified. For example,
+    /// non-overlapped or higher-target-level files
     files_to_keep: Vec<ParquetFile>,
 }
 
-/// Build compaction plan for a given set of files
-/// This function will determine the target level to compact to and split the files into
-/// files_to_compact, files_to_upgrade, and files_to_keep
+/// Build [`CompactionPlan`] for a for a given set of files.
+///
+/// # Example:
 ///
-/// Example:
 ///  . Input:
 ///                 |--L0.1--| |--L0.2--| |--L0.3--|  |--L0.4--| --L0.5--|
 ///      |--L1.1--|           |--L1.2--|             |--L1.3--|             |--L1.4--|
@@ -294,7 +305,7 @@ struct CompactionPlan {
 ///     . files_to_upgrade = [L0.1, L0.5]
 ///     . files_to_compact = [L0.2, L0.3, L0.4, L1.2, L1.3]
 ///
-fn buil_compaction_plan(
+fn build_compaction_plan(
     files: Vec<ParquetFile>,
     components: Arc<Components>,
 ) -> Result<CompactionPlan, DynError> {
@@ -308,7 +319,7 @@ fn buil_compaction_plan(
     // Since output of one compaction is used as input of next compaction, all files that are not
     // compacted or upgraded are still kept to consider in next round of compaction
 
-    // Split atctual files to compact from its higher-target-level files
+    // Split actual files to compact from its higher-target-level files
     // The higher-target-level files are kept for next round of compaction
     let (files_to_compact, mut files_to_keep) = components
         .target_level_split
@@ -326,6 +337,14 @@ fn buil_compaction_plan(
         .upgrade_split
         .apply(files_to_compact, target_level);
 
+    info!(
+        target_level = target_level.to_string(),
+        files_to_compacts = files_to_compact.len(),
+        files_to_upgrade = files_to_upgrade.len(),
+        files_to_keep = files_to_keep.len(),
+        "Compaction Plan"
+    );
+
     Ok(CompactionPlan {
         target_level,
         files_to_compact,
@@ -334,10 +353,8 @@ fn buil_compaction_plan(
     })
 }
 
-/// Compact into the given target_level
-/// This function assumes the input files only include overlapped files of `target_level - 1`
-/// and files of target_level.
-async fn compact_files(
+/// Compact `files` into a new parquet file of the the given target_level
+async fn run_compaction_plan(
     files: &[ParquetFile],
     partition_info: &Arc<PartitionInfo>,
     components: &Arc<Components>,
diff --git a/compactor2/src/test_util.rs b/compactor2/src/test_util.rs
index 7b675b9178..1be928c6ff 100644
--- a/compactor2/src/test_util.rs
+++ b/compactor2/src/test_util.rs
@@ -550,6 +550,34 @@ impl TestSetup {
         let mut config = Arc::get_mut(&mut self.config).unwrap();
         config.min_num_l1_files_to_compact = min_num_l1_files_to_compact;
     }
+
+    /// return a set of times relative to config.time_provider.now()
+    pub fn test_times(&self) -> TestTimes {
+        TestTimes::new(self.config.time_provider.as_ref())
+    }
+}
+
+/// A collection of nanosecond timestamps relative to now
+pub struct TestTimes {
+    pub time_1_minute_future: i64,
+    pub time_2_minutes_future: i64,
+    pub time_3_minutes_future: i64,
+    pub time_5_minutes_future: i64,
+}
+
+impl TestTimes {
+    fn new(time_provider: &dyn TimeProvider) -> Self {
+        let time_1_minute_future = time_provider.minutes_into_future(1).timestamp_nanos();
+        let time_2_minutes_future = time_provider.minutes_into_future(2).timestamp_nanos();
+        let time_3_minutes_future = time_provider.minutes_into_future(3).timestamp_nanos();
+        let time_5_minutes_future = time_provider.minutes_into_future(5).timestamp_nanos();
+        Self {
+            time_1_minute_future,
+            time_2_minutes_future,
+            time_3_minutes_future,
+            time_5_minutes_future,
+        }
+    }
 }
 
 pub async fn list_object_store(store: &Arc<DynObjectStore>) -> HashSet<Path> {
diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs
index d5ff391953..402ab6e125 100644
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@@ -858,11 +858,8 @@ impl From<&str> for PartitionKey {
     }
 }
 
-impl<DB> sqlx::Type<DB> for PartitionKey
-where
-    DB: sqlx::Database<TypeInfo = sqlx::postgres::PgTypeInfo>,
-{
-    fn type_info() -> DB::TypeInfo {
+impl sqlx::Type<sqlx::Postgres> for PartitionKey {
+    fn type_info() -> sqlx::postgres::PgTypeInfo {
         // Store this type as VARCHAR
         sqlx::postgres::PgTypeInfo::with_name("VARCHAR")
     }
@@ -887,6 +884,31 @@ impl sqlx::Decode<'_, sqlx::Postgres> for PartitionKey {
     }
 }
 
+impl sqlx::Type<sqlx::Sqlite> for PartitionKey {
+    fn type_info() -> sqlx::sqlite::SqliteTypeInfo {
+        <String as sqlx::Type<sqlx::Sqlite>>::type_info()
+    }
+}
+
+impl sqlx::Encode<'_, sqlx::Sqlite> for PartitionKey {
+    fn encode_by_ref(
+        &self,
+        buf: &mut <sqlx::Sqlite as sqlx::database::HasArguments<'_>>::ArgumentBuffer,
+    ) -> sqlx::encode::IsNull {
+        <String as sqlx::Encode<sqlx::Sqlite>>::encode(self.0.to_string(), buf)
+    }
+}
+
+impl sqlx::Decode<'_, sqlx::Sqlite> for PartitionKey {
+    fn decode(
+        value: <sqlx::Sqlite as sqlx::database::HasValueRef<'_>>::ValueRef,
+    ) -> Result<Self, Box<dyn std::error::Error + 'static + Send + Sync>> {
+        Ok(Self(
+            <String as sqlx::Decode<sqlx::Sqlite>>::decode(value)?.into(),
+        ))
+    }
+}
+
 /// Data object for a partition. The combination of shard, table and key are unique (i.e. only
 /// one record can exist for each combo)
 #[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
diff --git a/influxdb_iox/src/commands/compactor.rs b/influxdb_iox/src/commands/compactor.rs
index c8a3d3f25e..9a28a7d9e4 100644
--- a/influxdb_iox/src/commands/compactor.rs
+++ b/influxdb_iox/src/commands/compactor.rs
@@ -14,8 +14,6 @@ use std::{collections::HashMap, sync::Arc};
 
 use crate::process_info::{setup_metric_registry, USIZE_MAX};
 
-mod generate;
-
 #[derive(Debug, clap::Parser)]
 pub struct Config {
     #[clap(subcommand)]
@@ -53,21 +51,6 @@ pub enum Command {
         )]
         exec_mem_pool_bytes: usize,
     },
-
-    /// Generate Parquet files and catalog entries with different characteristics for the purposes
-    /// of investigating how the compactor handles them.
-    ///
-    /// Only works with `--object-store file` because this is for generating local development
-    /// data.
-    ///
-    /// Within the directory specified by `--data-dir`, will generate a
-    /// `compactor_data/line_protocol` subdirectory to avoid interfering with other existing IOx
-    /// files that may be in the `--data-dir`.
-    ///
-    /// WARNING: On every run of this tool, the `compactor_data/line_protocol` subdirectory will be
-    /// removed. If you want to keep any previously generated files, move or copy them before
-    /// running this tool again.
-    Generate(generate::Config),
 }
 
 pub async fn command(config: Config) -> Result<()> {
@@ -121,9 +104,6 @@ pub async fn command(config: Config) -> Result<()> {
 
             compactor::handler::run_compactor_once(compactor).await;
         }
-        Command::Generate(config) => {
-            generate::run(config).await?;
-        }
     }
 
     Ok(())
@@ -143,9 +123,6 @@ pub enum Error {
 
     #[snafu(context(false))]
     Compacting { source: ioxd_compactor::Error },
-
-    #[snafu(context(false))]
-    Generating { source: generate::Error },
 }
 
 pub type Result<T, E = Error> = std::result::Result<T, E>;
diff --git a/influxdb_iox/src/commands/compactor/generate.rs b/influxdb_iox/src/commands/compactor/generate.rs
deleted file mode 100644
index de6107790d..0000000000
--- a/influxdb_iox/src/commands/compactor/generate.rs
+++ /dev/null
@@ -1,685 +0,0 @@
-//! Implements the `compactor generate` command.
-
-use bytes::Bytes;
-use clap::ValueEnum;
-use clap_blocks::{
-    catalog_dsn::CatalogDsnConfig,
-    object_store::{make_object_store, ObjectStoreConfig, ObjectStoreType},
-};
-use object_store::DynObjectStore;
-use snafu::prelude::*;
-use std::{
-    ffi::OsStr, fmt::Write, fs, num::NonZeroUsize, path::PathBuf, process::Command, sync::Arc,
-};
-
-#[derive(Debug, clap::Parser)]
-pub struct Config {
-    #[clap(flatten)]
-    object_store_config: ObjectStoreConfig,
-
-    #[clap(flatten)]
-    catalog_dsn: CatalogDsnConfig,
-
-    /// The type of compaction to be done on the files. If `hot` is specified, the generated
-    /// files will have compaction level 0, will overlap with each other slightly, and will be
-    /// marked that they were created within the last (approximately) 30 minutes. If `cold` is
-    /// specified, the generated files will have compaction level 1, won't overlap with each other,
-    /// and will be marked that they were created between 8 and 24 hours ago.
-    #[clap(
-        value_enum,
-        value_parser,
-        long = "compaction-type",
-        env = "INFLUXDB_IOX_COMPACTOR_GENERATE_TYPE",
-        default_value = "hot",
-        action
-    )]
-    compaction_type: CompactionType,
-
-    /// The number of IOx partitions to generate files for. Each partition will have the number
-    /// of files specified by `--num-files` generated.
-    #[clap(
-        long = "num-partitions",
-        env = "INFLUXDB_IOX_COMPACTOR_GENERATE_NUM_PARTITIONS",
-        default_value = "1",
-        action
-    )]
-    num_partitions: NonZeroUsize,
-
-    /// The number of parquet files to generate per partition.
-    #[clap(
-        long = "num-files",
-        env = "INFLUXDB_IOX_COMPACTOR_GENERATE_NUM_FILES",
-        default_value = "1",
-        action
-    )]
-    num_files: NonZeroUsize,
-
-    /// The number of columns to generate in each file. One column will always be the
-    /// timestamp. Additional columns will be given a type in I64, F64, String, Bool, and
-    /// Tag in equal proportion.
-    #[clap(
-        long = "num-cols",
-        env = "INFLUXDB_IOX_COMPACTOR_GENERATE_NUM_COLS",
-        default_value = "6",
-        action
-    )]
-    num_columns: NonZeroUsize,
-
-    /// The number of rows to generate in each file.
-    #[clap(
-        long = "num-rows",
-        env = "INFLUXDB_IOX_COMPACTOR_GENERATE_NUM_ROWS",
-        default_value = "1",
-        action
-    )]
-    num_rows: NonZeroUsize,
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
-pub enum CompactionType {
-    Hot,
-    Cold,
-}
-
-pub async fn run(config: Config) -> Result<()> {
-    if !matches!(
-        &config.object_store_config.object_store,
-        Some(ObjectStoreType::File)
-    ) {
-        panic!("Sorry, this tool only works with 'file' object stores.");
-    }
-
-    let object_store = make_object_store(&config.object_store_config)?;
-
-    let root_dir: PathBuf = config
-        .object_store_config
-        .database_directory
-        .as_ref()
-        .expect("--data-dir is required and has already been checked")
-        .into();
-
-    let compactor_data_dir = root_dir.join("compactor_data");
-    let parquet_dir = compactor_data_dir.join("parquet");
-
-    if compactor_data_dir
-        .try_exists()
-        .context(FileExistenceSnafu {
-            path: &compactor_data_dir,
-        })?
-    {
-        fs::remove_dir_all(&compactor_data_dir).context(RemoveSnafu {
-            path: &compactor_data_dir,
-        })?;
-    }
-
-    let spec_location = "compactor_data/spec.toml";
-    let spec_in_root = compactor_data_dir.join("spec.toml");
-
-    let Config {
-        compaction_type,
-        num_rows,
-        num_files,
-        ..
-    } = config;
-
-    let TimeValues {
-        sampling_interval_ns,
-        start_end_args,
-    } = TimeValues::new(compaction_type, num_rows.get(), num_files.get());
-
-    for (file_id, &start_end) in start_end_args
-        .iter()
-        .enumerate()
-        .take(config.num_files.get())
-    {
-        write_data_generation_spec(
-            file_id,
-            Arc::clone(&object_store),
-            config.num_columns.get(),
-            sampling_interval_ns,
-            spec_location,
-        )
-        .await?;
-
-        let StartEndMinutesAgo { start, end } = start_end;
-
-        generate_data(&spec_in_root, &parquet_dir, num_rows.get(), start, end)?;
-    }
-
-    Ok(())
-}
-
-#[derive(Debug, Snafu)]
-pub enum Error {
-    #[snafu(display("Could not parse the object store configuration"))]
-    #[snafu(context(false))]
-    ObjectStoreConfigParsing {
-        source: clap_blocks::object_store::ParseError,
-    },
-
-    #[snafu(display("Could not write file to object storage"))]
-    ObjectStoreWriting { source: object_store::Error },
-
-    #[snafu(display("Could not parse object store path"))]
-    ObjectStorePathParsing { source: object_store::path::Error },
-
-    #[snafu(display("Subcommand failed: {status}"))]
-    Subcommand { status: String },
-
-    #[snafu(display("Could not check for existence of path {}", path.display()))]
-    FileExistence {
-        path: PathBuf,
-        source: std::io::Error,
-    },
-
-    #[snafu(display("Could not remove directory {}", path.display()))]
-    Remove {
-        path: PathBuf,
-        source: std::io::Error,
-    },
-}
-
-pub type Result<T, E = Error> = std::result::Result<T, E>;
-
-async fn write_data_generation_spec(
-    file_id: usize,
-    object_store: Arc<DynObjectStore>,
-    num_columns: usize,
-    sampling_interval_ns: usize,
-    spec_location: &str,
-) -> Result<()> {
-    let object_store_spec_path =
-        object_store::path::Path::parse(spec_location).context(ObjectStorePathParsingSnafu)?;
-
-    let contents = data_generation_spec_contents(file_id, sampling_interval_ns, num_columns);
-    let data = Bytes::from(contents);
-
-    object_store
-        .put(&object_store_spec_path, data)
-        .await
-        .context(ObjectStoreWritingSnafu)?;
-
-    Ok(())
-}
-
-fn generate_data(
-    spec_in_root: impl AsRef<OsStr>,
-    parquet_dir: impl AsRef<OsStr>,
-    num_rows: usize,
-    start: usize,
-    end: usize,
-) -> Result<()> {
-    let status = Command::new("cargo")
-        .arg("run")
-        .arg("-p")
-        .arg("iox_data_generator")
-        .arg("--")
-        .arg("--specification")
-        .arg(&spec_in_root)
-        .arg("--parquet")
-        .arg(&parquet_dir)
-        .arg("--start")
-        .arg(&format!("{start} minutes ago"))
-        .arg("--end")
-        .arg(&format!("{end} minutes ago"))
-        .arg("--batch-size")
-        .arg(num_rows.to_string())
-        .status()
-        .expect("Running the data generator should have worked");
-
-    ensure!(
-        status.success(),
-        SubcommandSnafu {
-            status: status.to_string()
-        }
-    );
-
-    Ok(())
-}
-
-fn data_generation_spec_contents(
-    file_id: usize,
-    sampling_interval_ns: usize,
-    num_columns: usize,
-) -> String {
-    let mut spec = format!(
-        r#"
-name = "for_compaction"
-
-[[database_writers]]
-database_ratio = 1.0
-agents = [{{name = "data_{file_id}", sampling_interval = "{sampling_interval_ns}ns"}}]
-
-[[agents]]
-name = "data_{file_id}"
-
-[[agents.measurements]]
-name = "measure"
-"#
-    );
-
-    // The 1st column is always time, and the data generator always generates a timestamp without
-    // any configuration needed, so the number of columns that need configuration is one less.
-    let num_columns = num_columns - 1;
-
-    // Every 5th column will be a tag.
-    let num_tags = num_columns / 5;
-    // The remaining columns will be fields of various types.
-    let num_fields = num_columns - num_tags;
-
-    // Tags go with the measurement, so they have to be specified in the config first.
-    if num_tags > 0 {
-        spec.push_str("tag_pairs = [\n");
-        for tag_id in 1..=num_tags {
-            let _ = write!(
-                spec,
-                r#"    {{key = "tag_{tag_id}", template = "{{{{random 1}}}}", regenerate_after_lines = 1}},"#
-            );
-            spec.push('\n');
-        }
-        spec.push_str("]\n")
-    }
-
-    for field_id in 0..num_fields {
-        spec.push_str(&field_spec(field_id));
-        spec.push('\n');
-    }
-
-    spec
-}
-
-fn field_spec(field_id: usize) -> String {
-    match field_id % 4 {
-        0 => format!(
-            r#"
-[[agents.measurements.fields]]
-name = "i64_{field_id}"
-i64_range = [0, 100]"#
-        ),
-        1 => format!(
-            r#"
-[[agents.measurements.fields]]
-name = "f64_{field_id}"
-f64_range = [0.0, 100.0]"#
-        ),
-        2 => format!(
-            r#"
-[[agents.measurements.fields]]
-name = "string_{field_id}"
-template = "{{{{random 4}}}}""#
-        ),
-        3 => format!(
-            r#"
-[[agents.measurements.fields]]
-name = "bool_{field_id}"
-bool = true"#
-        ),
-        _ => unreachable!("% 4 can only result in 0 - 3"),
-    }
-}
-
-#[derive(Debug, PartialEq, Clone)]
-struct TimeValues {
-    sampling_interval_ns: usize,
-    start_end_args: Vec<StartEndMinutesAgo>,
-}
-
-#[derive(Debug, PartialEq, Copy, Clone)]
-struct StartEndMinutesAgo {
-    start: usize,
-    end: usize,
-}
-
-impl TimeValues {
-    fn new(compaction_type: CompactionType, num_rows: usize, num_files: usize) -> Self {
-        match compaction_type {
-            CompactionType::Hot => {
-                // Make the range approximately 30 min ago to now.
-                let full_range_start_minutes = 30;
-                let full_range_end_minutes = 0;
-
-                // Overlap each file by this many minutes on the start and end with other files to
-                // create realistic level 0 files for hot compaction.
-                let overlap_minutes = 1;
-
-                Self::inner(
-                    full_range_start_minutes,
-                    full_range_end_minutes,
-                    overlap_minutes,
-                    num_rows,
-                    num_files,
-                )
-            }
-            CompactionType::Cold => {
-                // Make the range approximately 24 hours ago to 8 hours ago.
-                let full_range_start_minutes = 24 * 60;
-                let full_range_end_minutes = 8 * 60;
-
-                // Don't overlap level 1 files
-                let overlap_minutes = 0;
-
-                Self::inner(
-                    full_range_start_minutes,
-                    full_range_end_minutes,
-                    overlap_minutes,
-                    num_rows,
-                    num_files,
-                )
-            }
-        }
-    }
-
-    // Clippy suggests changing `if overlap_minutes == 0 { 1 } else { 0 }` to
-    // `usize::from(overlap_minutes == 0)`, but I think the original is clearer
-    #[allow(clippy::bool_to_int_with_if)]
-    fn inner(
-        full_range_start_minutes: usize,
-        full_range_end_minutes: usize,
-        overlap_minutes: usize,
-        num_rows: usize,
-        num_files: usize,
-    ) -> Self {
-        // Divide the full range evenly across all files, plus the overlap on each end.
-        let full_range_length_minutes = full_range_start_minutes - full_range_end_minutes;
-        let minutes_per_file = full_range_length_minutes / num_files + overlap_minutes * 2;
-
-        // Tell the generator to create one point every this many nanoseconds to create the
-        // specified number of rows in each file.
-        let fencepost_num_rows = if num_rows != 1 {
-            num_rows - 1
-        } else {
-            num_rows
-        };
-        let sampling_interval_ns = (minutes_per_file * 60 * 1_000_000_000) / fencepost_num_rows;
-
-        let start_end_args = (0..num_files)
-            .rev()
-            .map(|file_id| StartEndMinutesAgo {
-                start: minutes_per_file * (file_id + 1) - overlap_minutes * file_id
-                    + full_range_end_minutes,
-                end: minutes_per_file * file_id - overlap_minutes * file_id
-                    + full_range_end_minutes
-                    // When the overlap is 0, subtract 1 because the data generator is inclusive
-                    - (if overlap_minutes == 0 { 1 } else { 0 }),
-            })
-            .collect();
-
-        Self {
-            sampling_interval_ns,
-            start_end_args,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    mod hot {
-        use super::*;
-
-        const COMPACTION_TYPE: CompactionType = CompactionType::Hot;
-
-        #[test]
-        fn one_row_one_file() {
-            let num_rows = 1;
-            let num_files = 1;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 1_920_000_000_000);
-            assert_eq!(
-                start_end_args,
-                vec![StartEndMinutesAgo { start: 32, end: 0 }]
-            );
-        }
-
-        #[test]
-        fn one_thousand_rows_one_file() {
-            let num_rows = 1_000;
-            let num_files = 1;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 1_921_921_921);
-            assert_eq!(
-                start_end_args,
-                vec![StartEndMinutesAgo { start: 32, end: 0 }]
-            );
-        }
-
-        #[test]
-        fn one_row_three_files() {
-            let num_rows = 1;
-            let num_files = 3;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 720_000_000_000);
-            assert_eq!(
-                start_end_args,
-                vec![
-                    StartEndMinutesAgo { start: 34, end: 22 },
-                    StartEndMinutesAgo { start: 23, end: 11 },
-                    StartEndMinutesAgo { start: 12, end: 0 },
-                ]
-            );
-        }
-
-        #[test]
-        fn one_thousand_rows_three_files() {
-            let num_rows = 1_000;
-            let num_files = 3;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 720_720_720);
-            assert_eq!(
-                start_end_args,
-                vec![
-                    StartEndMinutesAgo { start: 34, end: 22 },
-                    StartEndMinutesAgo { start: 23, end: 11 },
-                    StartEndMinutesAgo { start: 12, end: 0 },
-                ]
-            );
-        }
-    }
-
-    mod cold {
-        use super::*;
-
-        const COMPACTION_TYPE: CompactionType = CompactionType::Cold;
-
-        #[test]
-        fn one_row_one_file() {
-            let num_rows = 1;
-            let num_files = 1;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 57_600_000_000_000);
-            assert_eq!(
-                start_end_args,
-                vec![StartEndMinutesAgo {
-                    start: 24 * 60,
-                    end: 8 * 60 - 1,
-                }]
-            );
-        }
-
-        #[test]
-        fn one_thousand_rows_one_file() {
-            let num_rows = 1_000;
-            let num_files = 1;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 57_657_657_657);
-            assert_eq!(
-                start_end_args,
-                vec![StartEndMinutesAgo {
-                    start: 24 * 60,
-                    end: 8 * 60 - 1,
-                }]
-            );
-        }
-
-        #[test]
-        fn one_row_three_files() {
-            let num_rows = 1;
-            let num_files = 3;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 19_200_000_000_000);
-            assert_eq!(
-                start_end_args,
-                vec![
-                    StartEndMinutesAgo {
-                        start: 1440,
-                        end: 1119,
-                    },
-                    StartEndMinutesAgo {
-                        start: 1120,
-                        end: 799,
-                    },
-                    StartEndMinutesAgo {
-                        start: 800,
-                        end: 479,
-                    },
-                ]
-            );
-        }
-
-        #[test]
-        fn one_thousand_rows_three_files() {
-            let num_rows = 1_000;
-            let num_files = 3;
-            let TimeValues {
-                sampling_interval_ns,
-                start_end_args,
-            } = TimeValues::new(COMPACTION_TYPE, num_rows, num_files);
-
-            assert_eq!(sampling_interval_ns, 19_219_219_219);
-            assert_eq!(
-                start_end_args,
-                vec![
-                    StartEndMinutesAgo {
-                        start: 1440,
-                        end: 1119,
-                    },
-                    StartEndMinutesAgo {
-                        start: 1120,
-                        end: 799,
-                    },
-                    StartEndMinutesAgo {
-                        start: 800,
-                        end: 479,
-                    },
-                ]
-            );
-        }
-    }
-
-    #[test]
-    fn minimal_spec_contents() {
-        let spec = data_generation_spec_contents(1, 1, 2);
-
-        assert_eq!(
-            spec,
-            r#"
-name = "for_compaction"
-
-[[database_writers]]
-database_ratio = 1.0
-agents = [{name = "data_1", sampling_interval = "1ns"}]
-
-[[agents]]
-name = "data_1"
-
-[[agents.measurements]]
-name = "measure"
-
-[[agents.measurements.fields]]
-name = "i64_0"
-i64_range = [0, 100]
-"#
-        );
-    }
-
-    #[test]
-    fn many_columns_spec_contents() {
-        let spec = data_generation_spec_contents(3, 100, 12);
-
-        assert_eq!(
-            spec,
-            r#"
-name = "for_compaction"
-
-[[database_writers]]
-database_ratio = 1.0
-agents = [{name = "data_3", sampling_interval = "100ns"}]
-
-[[agents]]
-name = "data_3"
-
-[[agents.measurements]]
-name = "measure"
-tag_pairs = [
-    {key = "tag_1", template = "{{random 1}}", regenerate_after_lines = 1},
-    {key = "tag_2", template = "{{random 1}}", regenerate_after_lines = 1},
-]
-
-[[agents.measurements.fields]]
-name = "i64_0"
-i64_range = [0, 100]
-
-[[agents.measurements.fields]]
-name = "f64_1"
-f64_range = [0.0, 100.0]
-
-[[agents.measurements.fields]]
-name = "string_2"
-template = "{{random 4}}"
-
-[[agents.measurements.fields]]
-name = "bool_3"
-bool = true
-
-[[agents.measurements.fields]]
-name = "i64_4"
-i64_range = [0, 100]
-
-[[agents.measurements.fields]]
-name = "f64_5"
-f64_range = [0.0, 100.0]
-
-[[agents.measurements.fields]]
-name = "string_6"
-template = "{{random 4}}"
-
-[[agents.measurements.fields]]
-name = "bool_7"
-bool = true
-
-[[agents.measurements.fields]]
-name = "i64_8"
-i64_range = [0, 100]
-"#
-        );
-    }
-}
diff --git a/influxdb_iox/tests/end_to_end_cases/compactor.rs b/influxdb_iox/tests/end_to_end_cases/compactor.rs
deleted file mode 100644
index f1796ddafc..0000000000
--- a/influxdb_iox/tests/end_to_end_cases/compactor.rs
+++ /dev/null
@@ -1,153 +0,0 @@
-use arrow::record_batch::RecordBatch;
-use assert_cmd::Command;
-use datafusion::datasource::object_store::ObjectStoreUrl;
-use futures::TryStreamExt;
-use object_store::{local::LocalFileSystem, path::Path as ObjectStorePath, ObjectStore};
-use parquet_to_line_protocol::ParquetFileReader;
-use predicates::prelude::*;
-use std::sync::Arc;
-use test_helpers_end_to_end::maybe_skip_integration;
-
-#[tokio::test]
-async fn compactor_generate_has_defaults() {
-    let database_url = maybe_skip_integration!();
-    let dir = tempfile::tempdir()
-        .expect("could not get temporary directory")
-        .into_path();
-
-    Command::cargo_bin("influxdb_iox")
-        .unwrap()
-        .arg("compactor")
-        .arg("generate")
-        .arg("--catalog-dsn")
-        .arg(&database_url)
-        .arg("--object-store")
-        .arg("file")
-        .arg("--data-dir")
-        .arg(&dir)
-        .assert()
-        .success();
-    let data_generation_spec = dir.join("compactor_data/spec.toml");
-    assert!(data_generation_spec.exists());
-}
-
-#[tokio::test]
-async fn compactor_generate_zeroes_are_invalid() {
-    let database_url = maybe_skip_integration!();
-    let dir = tempfile::tempdir().expect("could not get temporary directory");
-
-    Command::cargo_bin("influxdb_iox")
-        .unwrap()
-        .arg("compactor")
-        .arg("generate")
-        .arg("--catalog-dsn")
-        .arg(&database_url)
-        .arg("--object-store")
-        .arg("file")
-        .arg("--data-dir")
-        .arg(dir.path())
-        .arg("--num-partitions")
-        .arg("0")
-        .arg("--num-files")
-        .arg("0")
-        .arg("--num-cols")
-        .arg("0")
-        .arg("--num-rows")
-        .arg("0")
-        .assert()
-        .failure()
-        .stderr(predicate::str::contains(
-            "number would be zero for non-zero type",
-        ));
-}
-
-#[tokio::test]
-async fn compactor_generate_creates_files_and_catalog_entries() {
-    let database_url = maybe_skip_integration!();
-    let dir = tempfile::tempdir().expect("could not get temporary directory");
-
-    Command::cargo_bin("influxdb_iox")
-        .unwrap()
-        .arg("compactor")
-        .arg("generate")
-        .arg("--catalog-dsn")
-        .arg(&database_url)
-        .arg("--object-store")
-        .arg("file")
-        .arg("--data-dir")
-        .arg(dir.path())
-        .assert()
-        .success();
-
-    let data_generation_spec = dir.path().join("compactor_data/spec.toml");
-    assert!(data_generation_spec.exists());
-}
-
-#[tokio::test]
-async fn running_compactor_generate_twice_overwrites_existing_files() {
-    let database_url = maybe_skip_integration!();
-    let dir = tempfile::tempdir().expect("could not get temporary directory");
-
-    Command::cargo_bin("influxdb_iox")
-        .unwrap()
-        .arg("compactor")
-        .arg("generate")
-        .arg("--catalog-dsn")
-        .arg(&database_url)
-        .arg("--object-store")
-        .arg("file")
-        .arg("--data-dir")
-        .arg(dir.path())
-        .assert()
-        .success();
-
-    let first_run_data_path = dir
-        .path()
-        .join("compactor_data/parquet/data_0_measure.parquet");
-    let first_run_record_batches = read_record_batches(&first_run_data_path).await;
-    assert_eq!(first_run_record_batches.len(), 1);
-
-    let first_run_record_batch = &first_run_record_batches[0];
-    let first_run_num_lines = first_run_record_batch.num_rows();
-
-    Command::cargo_bin("influxdb_iox")
-        .unwrap()
-        .arg("compactor")
-        .arg("generate")
-        .arg("--catalog-dsn")
-        .arg(&database_url)
-        .arg("--object-store")
-        .arg("file")
-        .arg("--data-dir")
-        .arg(dir.path())
-        .assert()
-        .success();
-
-    let second_run_data_path = dir
-        .path()
-        .join("compactor_data/parquet/data_0_measure.parquet");
-    let second_run_record_batches = read_record_batches(&second_run_data_path).await;
-    assert_eq!(second_run_record_batches.len(), 1);
-
-    let second_run_record_batch = &second_run_record_batches[0];
-    let second_run_num_lines = second_run_record_batch.num_rows();
-
-    // If generation is appending instead of overwriting, this will fail.
-    assert_eq!(first_run_num_lines, second_run_num_lines);
-
-    // If generation isn't creating different data every time it's invoked, this will fail.
-    assert_ne!(first_run_record_batch, second_run_record_batch);
-}
-
-async fn read_record_batches(path: impl AsRef<std::path::Path>) -> Vec<RecordBatch> {
-    let object_store_path = ObjectStorePath::from_filesystem_path(path).unwrap();
-    let object_store = Arc::new(LocalFileSystem::new()) as Arc<dyn ObjectStore>;
-    let object_store_url = ObjectStoreUrl::local_filesystem();
-    let object_meta = object_store.head(&object_store_path).await.unwrap();
-
-    let reader = ParquetFileReader::try_new(object_store, object_store_url, object_meta)
-        .await
-        .unwrap();
-
-    reader.read().await.unwrap().try_collect().await.unwrap()
-}
diff --git a/influxdb_iox/tests/end_to_end_cases/mod.rs b/influxdb_iox/tests/end_to_end_cases/mod.rs
index 26bb23ccfd..1b3a11d2a2 100644
--- a/influxdb_iox/tests/end_to_end_cases/mod.rs
+++ b/influxdb_iox/tests/end_to_end_cases/mod.rs
@@ -3,7 +3,6 @@ mod all_in_one;
 // loading shared libraries: libjemalloc.so.2: cannot open shared object file: No such file or directory"
 #[cfg(not(feature = "heappy"))]
 mod cli;
-mod compactor;
 mod debug;
 mod error;
 mod flightsql;
diff --git a/iox_catalog/.gitignore b/iox_catalog/.gitignore
new file mode 100644
index 0000000000..1dc091a735
--- /dev/null
+++ b/iox_catalog/.gitignore
@@ -0,0 +1 @@
+iox_catalog.sqlite3
\ No newline at end of file
diff --git a/iox_catalog/Cargo.toml b/iox_catalog/Cargo.toml
index d31c19ab24..8d28999b89 100644
--- a/iox_catalog/Cargo.toml
+++ b/iox_catalog/Cargo.toml
@@ -14,8 +14,10 @@ log = "0.4"
 metric = { version = "0.1.0", path = "../metric" }
 mutable_batch = { path = "../mutable_batch" }
 observability_deps = { path = "../observability_deps" }
+parking_lot = { version = "0.12" }
+serde = { version = "1.0", features = ["derive"] }
 snafu = "0.7"
-sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
+sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
 sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
 thiserror = "1.0.38"
 tokio = { version = "1.25", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
diff --git a/iox_catalog/sqlite/migrations/20230203080000_initial_schema.sql b/iox_catalog/sqlite/migrations/20230203080000_initial_schema.sql
new file mode 100644
index 0000000000..3196f80cce
--- /dev/null
+++ b/iox_catalog/sqlite/migrations/20230203080000_initial_schema.sql
@@ -0,0 +1,233 @@
+create table if not exists topic
+(
+    id   INTEGER not null
+        constraint kafka_topic_pkey
+            primary key autoincrement,
+    name VARCHAR not null
+        constraint topic_name_unique unique
+);
+
+create table if not exists query_pool
+(
+    id   INTEGER NOT NULL
+        constraint query_pool_pkey
+            primary key autoincrement,
+    name varchar not null
+        constraint query_pool_name_unique
+            unique
+);
+
+create table if not exists namespace
+(
+    id                    INTEGER
+        constraint namespace_pkey
+            primary key autoincrement,
+    name                  varchar               not null
+        constraint namespace_name_unique
+            unique,
+    topic_id              numeric               not null
+        constraint namespace_kafka_topic_id_fkey
+            references topic,
+    query_pool_id         numeric               not null
+        references query_pool,
+    max_tables            integer default 10000 not null,
+    max_columns_per_table integer default 200   not null,
+    retention_period_ns   numeric
+);
+
+create table if not exists table_name
+(
+    id           INTEGER
+        constraint table_name_pkey
+            primary key autoincrement,
+    namespace_id numeric not null
+        references namespace
+            on delete cascade,
+    name         varchar not null,
+    constraint table_name_unique
+        unique (namespace_id, name)
+);
+
+
+create index if not exists table_name_namespace_idx
+    on table_name (namespace_id);
+
+create table if not exists column_name
+(
+    id          INTEGER
+        constraint column_name_pkey
+            primary key autoincrement,
+    table_id    numeric  not null
+        references table_name
+            on delete cascade,
+    name        varchar  not null,
+    column_type smallint not null,
+    constraint column_name_unique
+        unique (table_id, name)
+);
+
+
+create index if not exists column_name_table_idx
+    on column_name (table_id);
+
+create table if not exists shard
+(
+    id                              INTEGER
+        constraint sequencer_pkey
+            primary key autoincrement,
+    topic_id                        numeric not null
+        constraint sequencer_kafka_topic_id_fkey
+            references topic,
+    shard_index                     integer not null,
+    min_unpersisted_sequence_number numeric,
+    constraint shard_unique
+        unique (topic_id, shard_index)
+);
+
+
+create table if not exists sharding_rule_override
+(
+    id           INTEGER
+        constraint sharding_rule_override_pkey
+            primary key autoincrement,
+    namespace_id numeric not null
+        references namespace,
+    table_id     numeric not null
+        references table_name,
+    column_id    numeric not null
+        references column_name
+);
+
+
+create table if not exists partition
+(
+    id                        INTEGER
+        constraint partition_pkey
+            primary key autoincrement,
+    shard_id                  numeric not null
+        constraint partition_sequencer_id_fkey
+            references shard,
+    table_id                  numeric not null
+        references table_name
+            on delete cascade,
+    partition_key             varchar not null,
+    sort_key                  text [] not null,
+    persisted_sequence_number numeric,
+    to_delete                 numeric,
+    new_file_at               numeric,
+    constraint partition_key_unique
+        unique (table_id, partition_key)
+);
+
+
+create table if not exists parquet_file
+(
+    id                  INTEGER
+        constraint parquet_file_pkey
+            primary key autoincrement,
+    shard_id            numeric            not null
+        constraint parquet_file_sequencer_id_fkey
+            references shard,
+    table_id            numeric            not null
+        references table_name,
+    partition_id        numeric            not null
+        references partition,
+    object_store_id     uuid               not null
+        constraint parquet_location_unique
+            unique,
+    max_sequence_number numeric,
+    min_time            numeric,
+    max_time            numeric,
+    to_delete           numeric,
+    row_count           numeric  default 0 not null,
+    file_size_bytes     numeric  default 0 not null,
+    compaction_level    smallint default 0 not null,
+    created_at          numeric,
+    namespace_id        numeric            not null
+        references namespace
+            on delete cascade,
+    column_set          numeric[]          not null,
+    max_l0_created_at   numeric  default 0 not null
+);
+
+
+create index if not exists parquet_file_deleted_at_idx
+    on parquet_file (to_delete);
+
+create index if not exists parquet_file_partition_idx
+    on parquet_file (partition_id);
+
+create index if not exists parquet_file_table_idx
+    on parquet_file (table_id);
+
+create index if not exists parquet_file_shard_compaction_delete_idx
+    on parquet_file (shard_id, compaction_level, to_delete);
+
+create index if not exists parquet_file_shard_compaction_delete_created_idx
+    on parquet_file (shard_id, compaction_level, to_delete, created_at);
+
+create index if not exists parquet_file_partition_created_idx
+    on parquet_file (partition_id, created_at);
+
+create table if not exists tombstone
+(
+    id                   INTEGER
+        constraint tombstone_pkey
+            primary key autoincrement,
+    table_id             numeric not null
+        references table_name
+            on delete cascade,
+    shard_id             numeric not null
+        constraint tombstone_sequencer_id_fkey
+            references shard,
+    sequence_number      numeric not null,
+    min_time             numeric not null,
+    max_time             numeric not null,
+    serialized_predicate text    not null,
+    constraint tombstone_unique
+        unique (table_id, shard_id, sequence_number)
+);
+
+
+create table if not exists processed_tombstone
+(
+    tombstone_id    INTEGER not null
+        references tombstone,
+    parquet_file_id numeric not null
+        references parquet_file
+            on delete cascade,
+    primary key (tombstone_id, parquet_file_id)
+);
+
+
+create table if not exists skipped_compactions
+(
+    partition_id                       INTEGER not null
+        constraint skipped_compactions_pkey
+            primary key
+        references partition
+            on delete cascade,
+    reason                             text    not null,
+    skipped_at                         numeric not null,
+    num_files                          numeric,
+    limit_num_files                    numeric,
+    estimated_bytes                    numeric,
+    limit_bytes                        numeric,
+    limit_num_files_first_in_partition numeric
+);
+
+
+create table if not exists billing_summary
+(
+    namespace_id          integer not null
+        constraint billing_summary_pkey
+            primary key
+        references namespace
+            on delete cascade,
+    total_file_size_bytes numeric not null
+);
+
+
+create index if not exists billing_summary_namespace_idx
+    on billing_summary (namespace_id);
+
diff --git a/iox_catalog/sqlite/migrations/20230204082400_parquet_file_triggers.sql b/iox_catalog/sqlite/migrations/20230204082400_parquet_file_triggers.sql
new file mode 100644
index 0000000000..dfea3ace46
--- /dev/null
+++ b/iox_catalog/sqlite/migrations/20230204082400_parquet_file_triggers.sql
@@ -0,0 +1,31 @@
+create trigger if not exists update_partition
+    after insert
+    on parquet_file
+    for each row
+    when NEW.compaction_level < 2
+begin
+    UPDATE partition set new_file_at = NEW.created_at WHERE id = NEW.partition_id;
+end;
+
+create trigger if not exists update_billing
+    after insert
+    on parquet_file
+    for each row
+begin
+    INSERT INTO billing_summary (namespace_id, total_file_size_bytes)
+    VALUES (NEW.namespace_id, NEW.file_size_bytes)
+    ON CONFLICT (namespace_id) DO UPDATE
+        SET total_file_size_bytes = billing_summary.total_file_size_bytes + NEW.file_size_bytes
+    WHERE billing_summary.namespace_id = NEW.namespace_id;
+end;
+
+create trigger if not exists decrement_summary
+    after update
+    on parquet_file
+    for each row
+    when OLD.to_delete IS NULL AND NEW.to_delete IS NOT NULL
+begin
+    UPDATE billing_summary
+    SET total_file_size_bytes = billing_summary.total_file_size_bytes - OLD.file_size_bytes
+    WHERE billing_summary.namespace_id = OLD.namespace_id;
+end;
\ No newline at end of file
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 21cc2030ab..19d54ddf63 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -42,6 +42,7 @@ pub mod interface;
 pub mod mem;
 pub mod metrics;
 pub mod postgres;
+pub mod sqlite;
 
 /// An [`crate::interface::Error`] scoped to a single table for schema validation errors.
 #[derive(Debug, Error)]
diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs
new file mode 100644
index 0000000000..3380c810c4
--- /dev/null
+++ b/iox_catalog/src/sqlite.rs
@@ -0,0 +1,2920 @@
+//! A SQLite backed implementation of the Catalog
+
+use crate::{
+    interface::{
+        self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo,
+        ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo,
+        ProcessedTombstoneRepo, QueryPoolRepo, RepoCollection, Result, ShardRepo, TableRepo,
+        TombstoneRepo, TopicMetadataRepo, Transaction,
+    },
+    metrics::MetricDecorator,
+    DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
+};
+use async_trait::async_trait;
+use data_types::{
+    Column, ColumnId, ColumnSet, ColumnType, ColumnTypeCount, CompactionLevel, Namespace,
+    NamespaceId, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId,
+    PartitionKey, PartitionParam, ProcessedTombstone, QueryPool, QueryPoolId, SequenceNumber,
+    Shard, ShardId, ShardIndex, SkippedCompaction, Table, TableId, TablePartition, Timestamp,
+    Tombstone, TombstoneId, TopicId, TopicMetadata, TRANSITION_SHARD_ID, TRANSITION_SHARD_INDEX,
+};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::ops::Deref;
+
+use iox_time::{SystemProvider, TimeProvider};
+use metric::Registry;
+use observability_deps::tracing::{debug, warn};
+use parking_lot::Mutex;
+use snafu::prelude::*;
+use sqlx::types::Json;
+use sqlx::{
+    migrate::Migrator, sqlite::SqliteConnectOptions, types::Uuid, Executor, Pool, Row, Sqlite,
+    SqlitePool,
+};
+use std::str::FromStr;
+use std::sync::Arc;
+
+static MIGRATOR: Migrator = sqlx::migrate!("sqlite/migrations");
+
+/// Maximum number of files deleted by [`ParquetFileRepo::delete_old_ids_only].
+const MAX_PARQUET_FILES_DELETED_ONCE: i64 = 1_000;
+
+/// SQLite connection options.
+#[derive(Debug, Clone)]
+pub struct SqliteConnectionOptions {
+    /// DSN.
+    pub dsn: String,
+}
+
+/// SQLite catalog.
+#[derive(Debug)]
+pub struct SqliteCatalog {
+    metrics: Arc<Registry>,
+    pool: Pool<Sqlite>,
+    time_provider: Arc<dyn TimeProvider>,
+}
+
+// struct to get return value from "select count(id) ..." query
+#[derive(sqlx::FromRow)]
+struct Count {
+    count: i64,
+}
+
+/// transaction for [`SqliteCatalog`].
+#[derive(Debug)]
+pub struct SqliteTxn {
+    inner: Mutex<SqliteTxnInner>,
+    time_provider: Arc<dyn TimeProvider>,
+}
+
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)]
+enum SqliteTxnInner {
+    Txn(Option<sqlx::Transaction<'static, Sqlite>>),
+    Oneshot(Pool<Sqlite>),
+}
+
+impl<'c> Executor<'c> for &'c mut SqliteTxnInner {
+    type Database = Sqlite;
+
+    #[allow(clippy::type_complexity)]
+    fn fetch_many<'e, 'q: 'e, E: 'q>(
+        self,
+        query: E,
+    ) -> futures::stream::BoxStream<
+        'e,
+        Result<
+            sqlx::Either<
+                <Self::Database as sqlx::Database>::QueryResult,
+                <Self::Database as sqlx::Database>::Row,
+            >,
+            sqlx::Error,
+        >,
+    >
+    where
+        'c: 'e,
+        E: sqlx::Execute<'q, Self::Database>,
+    {
+        match self {
+            SqliteTxnInner::Txn(txn) => txn.as_mut().expect("Not yet finalized").fetch_many(query),
+            SqliteTxnInner::Oneshot(pool) => pool.fetch_many(query),
+        }
+    }
+
+    fn fetch_optional<'e, 'q: 'e, E: 'q>(
+        self,
+        query: E,
+    ) -> futures::future::BoxFuture<
+        'e,
+        Result<Option<<Self::Database as sqlx::Database>::Row>, sqlx::Error>,
+    >
+    where
+        'c: 'e,
+        E: sqlx::Execute<'q, Self::Database>,
+    {
+        match self {
+            SqliteTxnInner::Txn(txn) => txn
+                .as_mut()
+                .expect("Not yet finalized")
+                .fetch_optional(query),
+            SqliteTxnInner::Oneshot(pool) => pool.fetch_optional(query),
+        }
+    }
+
+    fn prepare_with<'e, 'q: 'e>(
+        self,
+        sql: &'q str,
+        parameters: &'e [<Self::Database as sqlx::Database>::TypeInfo],
+    ) -> futures::future::BoxFuture<
+        'e,
+        Result<<Self::Database as sqlx::database::HasStatement<'q>>::Statement, sqlx::Error>,
+    >
+    where
+        'c: 'e,
+    {
+        match self {
+            SqliteTxnInner::Txn(txn) => txn
+                .as_mut()
+                .expect("Not yet finalized")
+                .prepare_with(sql, parameters),
+            SqliteTxnInner::Oneshot(pool) => pool.prepare_with(sql, parameters),
+        }
+    }
+
+    fn describe<'e, 'q: 'e>(
+        self,
+        sql: &'q str,
+    ) -> futures::future::BoxFuture<'e, Result<sqlx::Describe<Self::Database>, sqlx::Error>>
+    where
+        'c: 'e,
+    {
+        match self {
+            SqliteTxnInner::Txn(txn) => txn.as_mut().expect("Not yet finalized").describe(sql),
+            SqliteTxnInner::Oneshot(pool) => pool.describe(sql),
+        }
+    }
+}
+
+impl Drop for SqliteTxn {
+    fn drop(&mut self) {
+        if let SqliteTxnInner::Txn(Some(_)) = self.inner.lock().deref() {
+            warn!("Dropping SqliteTxn w/o finalizing (commit or abort)");
+
+            // SQLx ensures that the inner transaction enqueues a rollback when it is dropped, so
+            // we don't need to spawn a task here to call `rollback` manually.
+        }
+    }
+}
+
+#[async_trait]
+impl TransactionFinalize for SqliteTxn {
+    async fn commit_inplace(&mut self) -> Result<(), Error> {
+        match self.inner.get_mut() {
+            SqliteTxnInner::Txn(txn) => txn
+                .take()
+                .expect("Not yet finalized")
+                .commit()
+                .await
+                .map_err(|e| Error::SqlxError { source: e }),
+            SqliteTxnInner::Oneshot(_) => {
+                panic!("cannot commit oneshot");
+            }
+        }
+    }
+
+    async fn abort_inplace(&mut self) -> Result<(), Error> {
+        match self.inner.get_mut() {
+            SqliteTxnInner::Txn(txn) => txn
+                .take()
+                .expect("Not yet finalized")
+                .rollback()
+                .await
+                .map_err(|e| Error::SqlxError { source: e }),
+            SqliteTxnInner::Oneshot(_) => {
+                panic!("cannot abort oneshot");
+            }
+        }
+    }
+}
+
+impl SqliteCatalog {
+    /// Connect to the catalog store.
+    pub async fn connect(options: SqliteConnectionOptions, metrics: Arc<Registry>) -> Result<Self> {
+        let opts = SqliteConnectOptions::from_str(&options.dsn)
+            .map_err(|e| Error::SqlxError { source: e })?
+            .create_if_missing(true);
+
+        let pool = SqlitePool::connect_with(opts)
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?;
+        Ok(Self {
+            metrics,
+            pool,
+            time_provider: Arc::new(SystemProvider::new()),
+        })
+    }
+}
+
+#[async_trait]
+impl Catalog for SqliteCatalog {
+    async fn setup(&self) -> Result<()> {
+        MIGRATOR
+            .run(&self.pool)
+            .await
+            .map_err(|e| Error::Setup { source: e.into() })?;
+
+        if std::env::var("INFLUXDB_IOX_RPC_MODE").is_ok() {
+            // We need to manually insert the topic here so that we can create the transition shard below.
+            sqlx::query(
+                r#"
+INSERT INTO topic (name)
+VALUES ($1)
+ON CONFLICT (name)
+DO NOTHING;
+            "#,
+            )
+            .bind(SHARED_TOPIC_NAME)
+            .execute(&self.pool)
+            .await
+            .map_err(|e| Error::Setup { source: e })?;
+
+            // The transition shard must exist and must have magic ID and INDEX.
+            sqlx::query(
+                r#"
+INSERT INTO shard (id, topic_id, shard_index, min_unpersisted_sequence_number)
+VALUES ($1, $2, $3, 0)
+ON CONFLICT (topic_id, shard_index)
+DO NOTHING;
+            "#,
+            )
+            .bind(TRANSITION_SHARD_ID)
+            .bind(SHARED_TOPIC_ID)
+            .bind(TRANSITION_SHARD_INDEX)
+            .execute(&self.pool)
+            .await
+            .map_err(|e| Error::Setup { source: e })?;
+        }
+
+        Ok(())
+    }
+
+    async fn start_transaction(&self) -> Result<Box<dyn Transaction>> {
+        let transaction = self
+            .pool
+            .begin()
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Box::new(MetricDecorator::new(
+            SqliteTxn {
+                inner: Mutex::new(SqliteTxnInner::Txn(Some(transaction))),
+                time_provider: Arc::clone(&self.time_provider),
+            },
+            Arc::clone(&self.metrics),
+        )))
+    }
+
+    async fn repositories(&self) -> Box<dyn RepoCollection> {
+        Box::new(MetricDecorator::new(
+            SqliteTxn {
+                inner: Mutex::new(SqliteTxnInner::Oneshot(self.pool.clone())),
+                time_provider: Arc::clone(&self.time_provider),
+            },
+            Arc::clone(&self.metrics),
+        ))
+    }
+
+    fn metrics(&self) -> Arc<Registry> {
+        Arc::clone(&self.metrics)
+    }
+
+    fn time_provider(&self) -> Arc<dyn TimeProvider> {
+        Arc::clone(&self.time_provider)
+    }
+}
+
+#[async_trait]
+impl RepoCollection for SqliteTxn {
+    fn topics(&mut self) -> &mut dyn TopicMetadataRepo {
+        self
+    }
+
+    fn query_pools(&mut self) -> &mut dyn QueryPoolRepo {
+        self
+    }
+
+    fn namespaces(&mut self) -> &mut dyn NamespaceRepo {
+        self
+    }
+
+    fn tables(&mut self) -> &mut dyn TableRepo {
+        self
+    }
+
+    fn columns(&mut self) -> &mut dyn ColumnRepo {
+        self
+    }
+
+    fn shards(&mut self) -> &mut dyn ShardRepo {
+        self
+    }
+
+    fn partitions(&mut self) -> &mut dyn PartitionRepo {
+        self
+    }
+
+    fn tombstones(&mut self) -> &mut dyn TombstoneRepo {
+        self
+    }
+
+    fn parquet_files(&mut self) -> &mut dyn ParquetFileRepo {
+        self
+    }
+
+    fn processed_tombstones(&mut self) -> &mut dyn ProcessedTombstoneRepo {
+        self
+    }
+}
+
+#[async_trait]
+impl TopicMetadataRepo for SqliteTxn {
+    async fn create_or_get(&mut self, name: &str) -> Result<TopicMetadata> {
+        let rec = sqlx::query_as::<_, TopicMetadata>(
+            r#"
+INSERT INTO topic ( name )
+VALUES ( $1 )
+ON CONFLICT (name)
+DO UPDATE SET name = topic.name
+RETURNING *;
+        "#,
+        )
+        .bind(name) // $1
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+
+    async fn get_by_name(&mut self, name: &str) -> Result<Option<TopicMetadata>> {
+        let rec = sqlx::query_as::<_, TopicMetadata>(
+            r#"
+SELECT *
+FROM topic
+WHERE name = $1;
+        "#,
+        )
+        .bind(name) // $1
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let topic = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(topic))
+    }
+}
+
+#[async_trait]
+impl QueryPoolRepo for SqliteTxn {
+    async fn create_or_get(&mut self, name: &str) -> Result<QueryPool> {
+        let rec = sqlx::query_as::<_, QueryPool>(
+            r#"
+INSERT INTO query_pool ( name )
+VALUES ( $1 )
+ON CONFLICT (name)
+DO UPDATE SET name = query_pool.name
+RETURNING *;
+        "#,
+        )
+        .bind(name) // $1
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+}
+
+#[async_trait]
+impl NamespaceRepo for SqliteTxn {
+    async fn create(
+        &mut self,
+        name: &str,
+        retention_period_ns: Option<i64>,
+        topic_id: TopicId,
+        query_pool_id: QueryPoolId,
+    ) -> Result<Namespace> {
+        let rec = sqlx::query_as::<_, Namespace>(
+            r#"
+                INSERT INTO namespace ( name, topic_id, query_pool_id, retention_period_ns, max_tables )
+                VALUES ( $1, $2, $3, $4, $5 )
+                RETURNING *;
+            "#,
+        )
+            .bind(name) // $1
+            .bind(topic_id) // $2
+            .bind(query_pool_id) // $3
+            .bind(retention_period_ns) // $4
+            .bind(DEFAULT_MAX_TABLES); // $5
+
+        let rec = rec.fetch_one(self.inner.get_mut()).await.map_err(|e| {
+            if is_unique_violation(&e) {
+                Error::NameExists {
+                    name: name.to_string(),
+                }
+            } else if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })?;
+
+        // Ensure the column default values match the code values.
+        debug_assert_eq!(rec.max_tables, DEFAULT_MAX_TABLES);
+        debug_assert_eq!(rec.max_columns_per_table, DEFAULT_MAX_COLUMNS_PER_TABLE);
+
+        Ok(rec)
+    }
+
+    async fn list(&mut self) -> Result<Vec<Namespace>> {
+        let rec = sqlx::query_as::<_, Namespace>(
+            r#"
+SELECT *
+FROM namespace;
+            "#,
+        )
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+
+    async fn get_by_id(&mut self, id: NamespaceId) -> Result<Option<Namespace>> {
+        let rec = sqlx::query_as::<_, Namespace>(
+            r#"
+SELECT *
+FROM namespace
+WHERE id = $1;
+        "#,
+        )
+        .bind(id) // $1
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let namespace = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(namespace))
+    }
+
+    async fn get_by_name(&mut self, name: &str) -> Result<Option<Namespace>> {
+        let rec = sqlx::query_as::<_, Namespace>(
+            r#"
+SELECT *
+FROM namespace
+WHERE name = $1;
+        "#,
+        )
+        .bind(name) // $1
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let namespace = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(namespace))
+    }
+
+    async fn delete(&mut self, name: &str) -> Result<()> {
+        // note that there is a uniqueness constraint on the name column in the DB
+        sqlx::query(
+            r#"
+DELETE FROM namespace
+WHERE name = $1;
+        "#,
+        )
+        .bind(name)
+        .execute(self.inner.get_mut())
+        .await
+        .context(interface::CouldNotDeleteNamespaceSnafu)
+        .map(|_| ())
+    }
+
+    async fn update_table_limit(&mut self, name: &str, new_max: i32) -> Result<Namespace> {
+        let rec = sqlx::query_as::<_, Namespace>(
+            r#"
+UPDATE namespace
+SET max_tables = $1
+WHERE name = $2
+RETURNING *;
+        "#,
+        )
+        .bind(new_max)
+        .bind(name)
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        let namespace = rec.map_err(|e| match e {
+            sqlx::Error::RowNotFound => Error::NamespaceNotFoundByName {
+                name: name.to_string(),
+            },
+            _ => Error::SqlxError { source: e },
+        })?;
+
+        Ok(namespace)
+    }
+
+    async fn update_column_limit(&mut self, name: &str, new_max: i32) -> Result<Namespace> {
+        let rec = sqlx::query_as::<_, Namespace>(
+            r#"
+UPDATE namespace
+SET max_columns_per_table = $1
+WHERE name = $2
+RETURNING *;
+        "#,
+        )
+        .bind(new_max)
+        .bind(name)
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        let namespace = rec.map_err(|e| match e {
+            sqlx::Error::RowNotFound => Error::NamespaceNotFoundByName {
+                name: name.to_string(),
+            },
+            _ => Error::SqlxError { source: e },
+        })?;
+
+        Ok(namespace)
+    }
+
+    async fn update_retention_period(
+        &mut self,
+        name: &str,
+        retention_period_ns: Option<i64>,
+    ) -> Result<Namespace> {
+        let rec = sqlx::query_as::<_, Namespace>(
+            r#"UPDATE namespace SET retention_period_ns = $1 WHERE name = $2 RETURNING *;"#,
+        )
+        .bind(retention_period_ns) // $1
+        .bind(name) // $2
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        let namespace = rec.map_err(|e| match e {
+            sqlx::Error::RowNotFound => Error::NamespaceNotFoundByName {
+                name: name.to_string(),
+            },
+            _ => Error::SqlxError { source: e },
+        })?;
+
+        Ok(namespace)
+    }
+}
+
+#[async_trait]
+impl TableRepo for SqliteTxn {
+    async fn create_or_get(&mut self, name: &str, namespace_id: NamespaceId) -> Result<Table> {
+        // A simple insert statement becomes quite complicated in order to avoid checking the table
+        // limits in a select and then conditionally inserting (which would be racey).
+        //
+        // from https://www.postgresql.org/docs/current/sql-insert.html
+        //   "INSERT inserts new rows into a table. One can insert one or more rows specified by
+        //   value expressions, or zero or more rows resulting from a query."
+        // By using SELECT rather than VALUES it will insert zero rows if it finds a null in the
+        // subquery, i.e. if count >= max_tables. fetch_one() will return a RowNotFound error if
+        // nothing was inserted. Not pretty!
+        let rec = sqlx::query_as::<_, Table>(
+            r#"
+INSERT INTO table_name ( name, namespace_id )
+SELECT $1, id FROM (
+    SELECT namespace.id AS id, max_tables, COUNT(table_name.id) AS count
+    FROM namespace LEFT JOIN table_name ON namespace.id = table_name.namespace_id
+    WHERE namespace.id = $2
+    GROUP BY namespace.max_tables, table_name.namespace_id, namespace.id
+) AS get_count WHERE count < max_tables
+ON CONFLICT (namespace_id, name)
+DO UPDATE SET name = table_name.name
+RETURNING *;
+        "#,
+        )
+        .bind(name) // $1
+        .bind(namespace_id) // $2
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| match e {
+            sqlx::Error::RowNotFound => Error::TableCreateLimitError {
+                table_name: name.to_string(),
+                namespace_id,
+            },
+            _ => {
+                if is_fk_violation(&e) {
+                    Error::ForeignKeyViolation { source: e }
+                } else {
+                    Error::SqlxError { source: e }
+                }
+            }
+        })?;
+
+        Ok(rec)
+    }
+
+    async fn get_by_id(&mut self, table_id: TableId) -> Result<Option<Table>> {
+        let rec = sqlx::query_as::<_, Table>(
+            r#"
+SELECT *
+FROM table_name
+WHERE id = $1;
+            "#,
+        )
+        .bind(table_id) // $1
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let table = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(table))
+    }
+
+    async fn get_by_namespace_and_name(
+        &mut self,
+        namespace_id: NamespaceId,
+        name: &str,
+    ) -> Result<Option<Table>> {
+        let rec = sqlx::query_as::<_, Table>(
+            r#"
+SELECT *
+FROM table_name
+WHERE namespace_id = $1 AND name = $2;
+            "#,
+        )
+        .bind(namespace_id) // $1
+        .bind(name) // $2
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let table = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(table))
+    }
+
+    async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Table>> {
+        let rec = sqlx::query_as::<_, Table>(
+            r#"
+SELECT *
+FROM table_name
+WHERE namespace_id = $1;
+            "#,
+        )
+        .bind(namespace_id)
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+
+    async fn list(&mut self) -> Result<Vec<Table>> {
+        let rec = sqlx::query_as::<_, Table>("SELECT * FROM table_name;")
+            .fetch_all(self.inner.get_mut())
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+}
+
+#[async_trait]
+impl ColumnRepo for SqliteTxn {
+    async fn create_or_get(
+        &mut self,
+        name: &str,
+        table_id: TableId,
+        column_type: ColumnType,
+    ) -> Result<Column> {
+        let rec = sqlx::query_as::<_, Column>(
+            r#"
+INSERT INTO column_name ( name, table_id, column_type )
+SELECT $1, table_id, $3 FROM (
+    SELECT max_columns_per_table, namespace.id, table_name.id as table_id, COUNT(column_name.id) AS count
+    FROM namespace LEFT JOIN table_name ON namespace.id = table_name.namespace_id
+                   LEFT JOIN column_name ON table_name.id = column_name.table_id
+    WHERE table_name.id = $2
+    GROUP BY namespace.max_columns_per_table, namespace.id, table_name.id
+) AS get_count WHERE count < max_columns_per_table
+ON CONFLICT (table_id, name)
+DO UPDATE SET name = column_name.name
+RETURNING *;
+        "#,
+        )
+            .bind(name) // $1
+            .bind(table_id) // $2
+            .bind(column_type) // $3
+            .fetch_one(self.inner.get_mut())
+            .await
+            .map_err(|e| match e {
+                sqlx::Error::RowNotFound => Error::ColumnCreateLimitError {
+                    column_name: name.to_string(),
+                    table_id,
+                },
+                _ => {
+                    if is_fk_violation(&e) {
+                        Error::ForeignKeyViolation { source: e }
+                    } else {
+                        Error::SqlxError { source: e }
+                    }
+                }})?;
+
+        ensure!(
+            rec.column_type == column_type,
+            ColumnTypeMismatchSnafu {
+                name,
+                existing: rec.column_type,
+                new: column_type,
+            }
+        );
+
+        Ok(rec)
+    }
+
+    async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Column>> {
+        let rec = sqlx::query_as::<_, Column>(
+            r#"
+SELECT column_name.* FROM table_name
+INNER JOIN column_name on column_name.table_id = table_name.id
+WHERE table_name.namespace_id = $1;
+            "#,
+        )
+        .bind(namespace_id)
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+
+    async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Column>> {
+        let rec = sqlx::query_as::<_, Column>(
+            r#"
+SELECT * FROM column_name
+WHERE table_id = $1;
+            "#,
+        )
+        .bind(table_id)
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+
+    async fn list(&mut self) -> Result<Vec<Column>> {
+        let rec = sqlx::query_as::<_, Column>("SELECT * FROM column_name;")
+            .fetch_all(self.inner.get_mut())
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(rec)
+    }
+
+    async fn create_or_get_many_unchecked(
+        &mut self,
+        table_id: TableId,
+        columns: HashMap<&str, ColumnType>,
+    ) -> Result<Vec<Column>> {
+        let num_columns = columns.len();
+        #[derive(Deserialize, Serialize)]
+        struct NameType<'a> {
+            name: &'a str,
+            column_type: i8,
+        }
+        impl<'a> NameType<'a> {
+            fn from(value: (&&'a str, &ColumnType)) -> Self {
+                Self {
+                    name: value.0,
+                    column_type: *value.1 as i8,
+                }
+            }
+        }
+        let cols = columns.iter().map(NameType::<'_>::from).collect::<Vec<_>>();
+
+        // The `ORDER BY` in this statement is important to avoid deadlocks during concurrent
+        // writes to the same IOx table that each add many new columns. See:
+        //
+        // - <https://rcoh.svbtle.com/sqlite-unique-constraints-can-cause-deadlock>
+        // - <https://dba.stackexchange.com/a/195220/27897>
+        // - <https://github.com/influxdata/idpe/issues/16298>
+        let out = sqlx::query_as::<_, Column>(
+            r#"
+INSERT INTO column_name ( name, table_id, column_type )
+SELECT a.value ->> 'name' AS name, $1, a.value ->> 'column_type' AS column_type
+FROM json_each($2) as a
+ORDER BY name
+ON CONFLICT (table_id, name)
+DO UPDATE SET name = column_name.name
+RETURNING *;
+            "#,
+        )
+        .bind(table_id) // $1
+        .bind(&Json(cols)) // $2
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| {
+            if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })?;
+
+        assert_eq!(num_columns, out.len());
+
+        for existing in &out {
+            let want = columns.get(existing.name.as_str()).unwrap();
+            ensure!(
+                existing.column_type == *want,
+                ColumnTypeMismatchSnafu {
+                    name: &existing.name,
+                    existing: existing.column_type,
+                    new: *want,
+                }
+            );
+        }
+
+        Ok(out)
+    }
+
+    async fn list_type_count_by_table_id(
+        &mut self,
+        table_id: TableId,
+    ) -> Result<Vec<ColumnTypeCount>> {
+        sqlx::query_as::<_, ColumnTypeCount>(
+            r#"
+select column_type as col_type, count(1) AS count from column_name where table_id = $1 group by 1;
+            "#,
+        )
+        .bind(table_id) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+}
+
+#[async_trait]
+impl ShardRepo for SqliteTxn {
+    async fn create_or_get(
+        &mut self,
+        topic: &TopicMetadata,
+        shard_index: ShardIndex,
+    ) -> Result<Shard> {
+        sqlx::query_as::<_, Shard>(
+            r#"
+INSERT INTO shard
+    ( topic_id, shard_index, min_unpersisted_sequence_number )
+VALUES
+    ( $1, $2, 0 )
+ON CONFLICT (topic_id, shard_index)
+DO UPDATE SET topic_id = shard.topic_id
+RETURNING *;
+        "#,
+        )
+        .bind(topic.id) // $1
+        .bind(shard_index) // $2
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| {
+            if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })
+    }
+
+    async fn get_by_topic_id_and_shard_index(
+        &mut self,
+        topic_id: TopicId,
+        shard_index: ShardIndex,
+    ) -> Result<Option<Shard>> {
+        let rec = sqlx::query_as::<_, Shard>(
+            r#"
+SELECT *
+FROM shard
+WHERE topic_id = $1
+  AND shard_index = $2;
+        "#,
+        )
+        .bind(topic_id) // $1
+        .bind(shard_index) // $2
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let shard = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(shard))
+    }
+
+    async fn list(&mut self) -> Result<Vec<Shard>> {
+        sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard;"#)
+            .fetch_all(self.inner.get_mut())
+            .await
+            .map_err(|e| Error::SqlxError { source: e })
+    }
+
+    async fn list_by_topic(&mut self, topic: &TopicMetadata) -> Result<Vec<Shard>> {
+        sqlx::query_as::<_, Shard>(r#"SELECT * FROM shard WHERE topic_id = $1;"#)
+            .bind(topic.id) // $1
+            .fetch_all(self.inner.get_mut())
+            .await
+            .map_err(|e| Error::SqlxError { source: e })
+    }
+
+    async fn update_min_unpersisted_sequence_number(
+        &mut self,
+        shard_id: ShardId,
+        sequence_number: SequenceNumber,
+    ) -> Result<()> {
+        let _ = sqlx::query(
+            r#"
+UPDATE shard
+SET min_unpersisted_sequence_number = $1
+WHERE id = $2;
+                "#,
+        )
+        .bind(sequence_number.get()) // $1
+        .bind(shard_id) // $2
+        .execute(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(())
+    }
+}
+
+// We can't use [`Partition`], as uses Vec<String> which the Sqlite
+// driver cannot serialise
+
+#[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
+struct PartitionPod {
+    id: PartitionId,
+    shard_id: ShardId,
+    table_id: TableId,
+    partition_key: PartitionKey,
+    sort_key: Json<Vec<String>>,
+    persisted_sequence_number: Option<SequenceNumber>,
+    new_file_at: Option<Timestamp>,
+}
+
+impl From<PartitionPod> for Partition {
+    fn from(value: PartitionPod) -> Self {
+        Self {
+            id: value.id,
+            shard_id: value.shard_id,
+            table_id: value.table_id,
+            partition_key: value.partition_key,
+            sort_key: value.sort_key.0,
+            persisted_sequence_number: value.persisted_sequence_number,
+            new_file_at: value.new_file_at,
+        }
+    }
+}
+
+#[async_trait]
+impl PartitionRepo for SqliteTxn {
+    async fn create_or_get(
+        &mut self,
+        key: PartitionKey,
+        shard_id: ShardId,
+        table_id: TableId,
+    ) -> Result<Partition> {
+        // Note: since sort_key is now an array, we must explicitly insert '{}' which is an empty
+        // array rather than NULL which sqlx will throw `UnexpectedNullError` while is is doing
+        // `ColumnDecode`
+
+        let v = sqlx::query_as::<_, PartitionPod>(
+            r#"
+INSERT INTO partition
+    ( partition_key, shard_id, table_id, sort_key)
+VALUES
+    ( $1, $2, $3, '[]')
+ON CONFLICT (table_id, partition_key)
+DO UPDATE SET partition_key = partition.partition_key
+RETURNING *;
+        "#,
+        )
+        .bind(key) // $1
+        .bind(shard_id) // $2
+        .bind(table_id) // $3
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| {
+            if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })?;
+
+        // If the partition_key_unique constraint was hit because there was an
+        // existing record for (table_id, partition_key) ensure the partition
+        // key in the DB is mapped to the same shard_id the caller
+        // requested.
+        assert_eq!(
+            v.shard_id, shard_id,
+            "attempted to overwrite partition with different shard ID"
+        );
+
+        Ok(v.into())
+    }
+
+    async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>> {
+        let rec = sqlx::query_as::<_, PartitionPod>(r#"SELECT * FROM partition WHERE id = $1;"#)
+            .bind(partition_id) // $1
+            .fetch_one(self.inner.get_mut())
+            .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let partition = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(partition.into()))
+    }
+
+    async fn list_by_shard(&mut self, shard_id: ShardId) -> Result<Vec<Partition>> {
+        Ok(
+            sqlx::query_as::<_, PartitionPod>(r#"SELECT * FROM partition WHERE shard_id = $1;"#)
+                .bind(shard_id) // $1
+                .fetch_all(self.inner.get_mut())
+                .await
+                .map_err(|e| Error::SqlxError { source: e })?
+                .into_iter()
+                .map(Into::into)
+                .collect(),
+        )
+    }
+
+    async fn list_by_namespace(&mut self, namespace_id: NamespaceId) -> Result<Vec<Partition>> {
+        Ok(sqlx::query_as::<_, PartitionPod>(
+            r#"
+SELECT partition.*
+FROM table_name
+INNER JOIN partition on partition.table_id = table_name.id
+WHERE table_name.namespace_id = $1;
+            "#,
+        )
+        .bind(namespace_id) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
+        Ok(sqlx::query_as::<_, PartitionPod>(
+            r#"
+SELECT *
+FROM partition
+WHERE table_id = $1;
+            "#,
+        )
+        .bind(table_id) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    /// Update the sort key for `partition_id` if and only if `old_sort_key`
+    /// matches the current value in the database.
+    ///
+    /// This compare-and-swap operation is allowed to spuriously return
+    /// [`CasFailure::ValueMismatch`] for performance reasons (avoiding multiple
+    /// round trips to service a transaction in the happy path).
+    async fn cas_sort_key(
+        &mut self,
+        partition_id: PartitionId,
+        old_sort_key: Option<Vec<String>>,
+        new_sort_key: &[&str],
+    ) -> Result<Partition, CasFailure<Vec<String>>> {
+        let old_sort_key = old_sort_key.unwrap_or_default();
+        let res = sqlx::query_as::<_, PartitionPod>(
+            r#"
+UPDATE partition
+SET sort_key = $1
+WHERE id = $2 AND sort_key = $3
+RETURNING *;
+        "#,
+        )
+        .bind(Json(new_sort_key)) // $1
+        .bind(partition_id) // $2
+        .bind(Json(&old_sort_key)) // $3
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        let partition = match res {
+            Ok(v) => v,
+            Err(sqlx::Error::RowNotFound) => {
+                // This update may have failed either because:
+                //
+                // * A row with the specified ID did not exist at query time
+                //   (but may exist now!)
+                // * The sort key does not match.
+                //
+                // To differentiate, we submit a get partition query, returning
+                // the actual sort key if successful.
+                //
+                // NOTE: this is racy, but documented - this might return "Sort
+                // key differs! Old key: <old sort key you provided>"
+                return Err(CasFailure::ValueMismatch(
+                    PartitionRepo::get_by_id(self, partition_id)
+                        .await
+                        .map_err(CasFailure::QueryError)?
+                        .ok_or(CasFailure::QueryError(Error::PartitionNotFound {
+                            id: partition_id,
+                        }))?
+                        .sort_key,
+                ));
+            }
+            Err(e) => return Err(CasFailure::QueryError(Error::SqlxError { source: e })),
+        };
+
+        debug!(
+            ?partition_id,
+            ?old_sort_key,
+            ?new_sort_key,
+            "partition sort key cas successful"
+        );
+
+        Ok(partition.into())
+    }
+
+    async fn record_skipped_compaction(
+        &mut self,
+        partition_id: PartitionId,
+        reason: &str,
+        num_files: usize,
+        limit_num_files: usize,
+        limit_num_files_first_in_partition: usize,
+        estimated_bytes: u64,
+        limit_bytes: u64,
+    ) -> Result<()> {
+        sqlx::query(
+            r#"
+INSERT INTO skipped_compactions
+    ( partition_id, reason, num_files, limit_num_files, limit_num_files_first_in_partition, estimated_bytes, limit_bytes, skipped_at )
+VALUES
+    ( $1, $2, $3, $4, $5, $6, $7, $8 )
+ON CONFLICT ( partition_id )
+DO UPDATE
+SET
+reason = EXCLUDED.reason,
+num_files = EXCLUDED.num_files,
+limit_num_files = EXCLUDED.limit_num_files,
+limit_num_files_first_in_partition = EXCLUDED.limit_num_files_first_in_partition,
+estimated_bytes = EXCLUDED.estimated_bytes,
+limit_bytes = EXCLUDED.limit_bytes,
+skipped_at = EXCLUDED.skipped_at;
+        "#,
+        )
+            .bind(partition_id) // $1
+            .bind(reason)
+            .bind(num_files as i64)
+            .bind(limit_num_files as i64)
+            .bind(limit_num_files_first_in_partition as i64)
+            .bind(estimated_bytes as i64)
+            .bind(limit_bytes as i64)
+            .bind(std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs() as i64)
+            .execute(self.inner.get_mut())
+            .await
+            .context(interface::CouldNotRecordSkippedCompactionSnafu { partition_id })?;
+        Ok(())
+    }
+
+    async fn get_in_skipped_compaction(
+        &mut self,
+        partition_id: PartitionId,
+    ) -> Result<Option<SkippedCompaction>> {
+        let rec = sqlx::query_as::<_, SkippedCompaction>(
+            r#"SELECT * FROM skipped_compactions WHERE partition_id = $1;"#,
+        )
+        .bind(partition_id) // $1
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let skipped_partition_record = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(skipped_partition_record))
+    }
+
+    async fn list_skipped_compactions(&mut self) -> Result<Vec<SkippedCompaction>> {
+        sqlx::query_as::<_, SkippedCompaction>(
+            r#"
+SELECT * FROM skipped_compactions
+        "#,
+        )
+        .fetch_all(self.inner.get_mut())
+        .await
+        .context(interface::CouldNotListSkippedCompactionsSnafu)
+    }
+
+    async fn delete_skipped_compactions(
+        &mut self,
+        partition_id: PartitionId,
+    ) -> Result<Option<SkippedCompaction>> {
+        sqlx::query_as::<_, SkippedCompaction>(
+            r#"
+DELETE FROM skipped_compactions
+WHERE partition_id = $1
+RETURNING *
+        "#,
+        )
+        .bind(partition_id)
+        .fetch_optional(self.inner.get_mut())
+        .await
+        .context(interface::CouldNotDeleteSkippedCompactionsSnafu)
+    }
+
+    async fn update_persisted_sequence_number(
+        &mut self,
+        partition_id: PartitionId,
+        sequence_number: SequenceNumber,
+    ) -> Result<()> {
+        let _ = sqlx::query(
+            r#"
+UPDATE partition
+SET persisted_sequence_number = $1
+WHERE id = $2;
+                "#,
+        )
+        .bind(sequence_number.get()) // $1
+        .bind(partition_id) // $2
+        .execute(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(())
+    }
+
+    async fn most_recent_n(&mut self, n: usize, shards: &[ShardId]) -> Result<Vec<Partition>> {
+        Ok(sqlx::query_as::<_, PartitionPod>(
+            r#"SELECT * FROM partition WHERE shard_id IN (SELECT value FROM json_each($1)) ORDER BY id DESC LIMIT $2;"#,
+        )
+            .bind(&Json(shards.iter().map(|v| v.get()).collect::<Vec<_>>()))
+            .bind(n as i64)
+            .fetch_all(self.inner.get_mut())
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?
+            .into_iter()
+            .map(Into::into)
+            .collect())
+    }
+
+    async fn partitions_with_recent_created_files(
+        &mut self,
+        time_in_the_past: Timestamp,
+        max_num_partitions: usize,
+    ) -> Result<Vec<PartitionParam>> {
+        sqlx::query_as(
+            r#"
+            SELECT p.id as partition_id, p.table_id, t.namespace_id, p.shard_id
+            FROM partition p, table_name t
+            WHERE p.new_file_at > $1
+                AND p.table_id = t.id
+            LIMIT $2;
+            "#,
+        )
+        .bind(time_in_the_past) // $1
+        .bind(max_num_partitions as i64) // $2
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+
+    async fn partitions_to_compact(&mut self, recent_time: Timestamp) -> Result<Vec<PartitionId>> {
+        sqlx::query_as(
+            r#"
+            SELECT p.id as partition_id
+            FROM partition p
+            WHERE p.new_file_at > $1
+            "#,
+        )
+        .bind(recent_time) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+}
+
+#[async_trait]
+impl TombstoneRepo for SqliteTxn {
+    async fn create_or_get(
+        &mut self,
+        table_id: TableId,
+        shard_id: ShardId,
+        sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+        predicate: &str,
+    ) -> Result<Tombstone> {
+        let v = sqlx::query_as::<_, Tombstone>(
+            r#"
+INSERT INTO tombstone
+    ( table_id, shard_id, sequence_number, min_time, max_time, serialized_predicate )
+VALUES
+    ( $1, $2, $3, $4, $5, $6 )
+ON CONFLICT (table_id, shard_id, sequence_number)
+DO UPDATE SET table_id = tombstone.table_id
+RETURNING *;
+        "#,
+        )
+        .bind(table_id) // $1
+        .bind(shard_id) // $2
+        .bind(sequence_number) // $3
+        .bind(min_time) // $4
+        .bind(max_time) // $5
+        .bind(predicate) // $6
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| {
+            if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })?;
+
+        // If tombstone_unique is hit, a record with (table_id, shard_id,
+        // sequence_number) already exists.
+        //
+        // Ensure the caller does not falsely believe they have created the
+        // record with the provided values if the DB row contains different
+        // values.
+        assert_eq!(
+            v.min_time, min_time,
+            "attempted to overwrite min_time in tombstone record"
+        );
+        assert_eq!(
+            v.max_time, max_time,
+            "attempted to overwrite max_time in tombstone record"
+        );
+        assert_eq!(
+            v.serialized_predicate, predicate,
+            "attempted to overwrite predicate in tombstone record"
+        );
+
+        Ok(v)
+    }
+
+    async fn list_by_namespace(&mut self, namespace_id: NamespaceId) -> Result<Vec<Tombstone>> {
+        sqlx::query_as::<_, Tombstone>(
+            r#"
+SELECT
+    tombstone.id as id,
+    tombstone.table_id as table_id,
+    tombstone.shard_id as shard_id,
+    tombstone.sequence_number as sequence_number,
+    tombstone.min_time as min_time,
+    tombstone.max_time as max_time,
+    tombstone.serialized_predicate as serialized_predicate
+FROM table_name
+INNER JOIN tombstone on tombstone.table_id = table_name.id
+WHERE table_name.namespace_id = $1;
+            "#,
+        )
+        .bind(namespace_id) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+
+    async fn list_by_table(&mut self, table_id: TableId) -> Result<Vec<Tombstone>> {
+        sqlx::query_as::<_, Tombstone>(
+            r#"
+SELECT *
+FROM tombstone
+WHERE table_id = $1
+ORDER BY id;
+            "#,
+        )
+        .bind(table_id) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+
+    async fn get_by_id(&mut self, id: TombstoneId) -> Result<Option<Tombstone>> {
+        let rec = sqlx::query_as::<_, Tombstone>(
+            r#"
+SELECT *
+FROM tombstone
+WHERE id = $1;
+        "#,
+        )
+        .bind(id) // $1
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let tombstone = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(tombstone))
+    }
+
+    async fn list_tombstones_by_shard_greater_than(
+        &mut self,
+        shard_id: ShardId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<Tombstone>> {
+        sqlx::query_as::<_, Tombstone>(
+            r#"
+SELECT *
+FROM tombstone
+WHERE shard_id = $1
+  AND sequence_number > $2
+ORDER BY id;
+            "#,
+        )
+        .bind(shard_id) // $1
+        .bind(sequence_number) // $2
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+
+    async fn remove(&mut self, tombstone_ids: &[TombstoneId]) -> Result<()> {
+        let ids: Vec<_> = tombstone_ids.iter().map(|t| t.get()).collect();
+
+        // Remove processed tombstones first
+        sqlx::query(
+            r#"
+DELETE
+FROM processed_tombstone
+WHERE tombstone_id IN (SELECT value FROM json_each($1));
+            "#,
+        )
+        .bind(Json(&ids[..])) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        // Remove tombstones
+        sqlx::query(
+            r#"
+DELETE
+FROM tombstone
+WHERE id IN (SELECT value FROM json_each($1));
+            "#,
+        )
+        .bind(Json(&ids[..])) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(())
+    }
+
+    async fn list_tombstones_for_time_range(
+        &mut self,
+        shard_id: ShardId,
+        table_id: TableId,
+        sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+    ) -> Result<Vec<Tombstone>> {
+        sqlx::query_as::<_, Tombstone>(
+            r#"
+SELECT *
+FROM tombstone
+WHERE shard_id = $1
+  AND table_id = $2
+  AND sequence_number > $3
+  AND ((min_time <= $4 AND max_time >= $4)
+        OR (min_time > $4 AND min_time <= $5))
+ORDER BY id;
+            "#,
+        )
+        .bind(shard_id) // $1
+        .bind(table_id) // $2
+        .bind(sequence_number) // $3
+        .bind(min_time) // $4
+        .bind(max_time) // $5
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+}
+
+fn from_column_set(v: &ColumnSet) -> Json<Vec<i64>> {
+    Json((*v).iter().map(ColumnId::get).collect())
+}
+
+fn to_column_set(v: &Json<Vec<i64>>) -> ColumnSet {
+    ColumnSet::new(v.0.iter().map(|v| ColumnId::new(*v)))
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
+struct ParquetFilePod {
+    id: ParquetFileId,
+    shard_id: ShardId,
+    namespace_id: NamespaceId,
+    table_id: TableId,
+    partition_id: PartitionId,
+    object_store_id: Uuid,
+    max_sequence_number: SequenceNumber,
+    min_time: Timestamp,
+    max_time: Timestamp,
+    to_delete: Option<Timestamp>,
+    file_size_bytes: i64,
+    row_count: i64,
+    compaction_level: CompactionLevel,
+    created_at: Timestamp,
+    column_set: Json<Vec<i64>>,
+    max_l0_created_at: Timestamp,
+}
+
+impl From<ParquetFilePod> for ParquetFile {
+    fn from(value: ParquetFilePod) -> Self {
+        Self {
+            id: value.id,
+            shard_id: value.shard_id,
+            namespace_id: value.namespace_id,
+            table_id: value.table_id,
+            partition_id: value.partition_id,
+            object_store_id: value.object_store_id,
+            max_sequence_number: value.max_sequence_number,
+            min_time: value.min_time,
+            max_time: value.max_time,
+            to_delete: value.to_delete,
+            file_size_bytes: value.file_size_bytes,
+            row_count: value.row_count,
+            compaction_level: value.compaction_level,
+            created_at: value.created_at,
+            column_set: to_column_set(&value.column_set),
+            max_l0_created_at: value.max_l0_created_at,
+        }
+    }
+}
+
+#[async_trait]
+impl ParquetFileRepo for SqliteTxn {
+    async fn create(&mut self, parquet_file_params: ParquetFileParams) -> Result<ParquetFile> {
+        let ParquetFileParams {
+            shard_id,
+            namespace_id,
+            table_id,
+            partition_id,
+            object_store_id,
+            max_sequence_number,
+            min_time,
+            max_time,
+            file_size_bytes,
+            row_count,
+            compaction_level,
+            created_at,
+            column_set,
+            max_l0_created_at,
+        } = parquet_file_params;
+
+        let rec = sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+INSERT INTO parquet_file (
+    shard_id, table_id, partition_id, object_store_id,
+    max_sequence_number, min_time, max_time, file_size_bytes,
+    row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at )
+VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 )
+RETURNING *;
+        "#,
+        )
+        .bind(shard_id) // $1
+        .bind(table_id) // $2
+        .bind(partition_id) // $3
+        .bind(object_store_id) // $4
+        .bind(max_sequence_number) // $5
+        .bind(min_time) // $6
+        .bind(max_time) // $7
+        .bind(file_size_bytes) // $8
+        .bind(row_count) // $9
+        .bind(compaction_level) // $10
+        .bind(created_at) // $11
+        .bind(namespace_id) // $12
+        .bind(from_column_set(&column_set)) // $13
+        .bind(max_l0_created_at) // $14
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| {
+            if is_unique_violation(&e) {
+                Error::FileExists { object_store_id }
+            } else if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })?;
+
+        Ok(rec.into())
+    }
+
+    async fn flag_for_delete(&mut self, id: ParquetFileId) -> Result<()> {
+        let marked_at = Timestamp::from(self.time_provider.now());
+
+        let _ = sqlx::query(r#"UPDATE parquet_file SET to_delete = $1 WHERE id = $2;"#)
+            .bind(marked_at) // $1
+            .bind(id) // $2
+            .execute(self.inner.get_mut())
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(())
+    }
+
+    async fn flag_for_delete_by_retention(&mut self) -> Result<Vec<ParquetFileId>> {
+        let flagged_at = Timestamp::from(self.time_provider.now());
+        // TODO - include check of table retention period once implemented
+        let flagged = sqlx::query(
+            r#"
+                UPDATE parquet_file
+                SET to_delete = $1
+                FROM namespace
+                WHERE namespace.retention_period_ns IS NOT NULL
+                AND parquet_file.to_delete IS NULL
+                AND parquet_file.max_time < $1 - namespace.retention_period_ns
+                AND namespace.id = parquet_file.namespace_id
+                RETURNING parquet_file.id;
+            "#,
+        )
+        .bind(flagged_at) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        let flagged = flagged.into_iter().map(|row| row.get("id")).collect();
+        Ok(flagged)
+    }
+
+    async fn list_by_shard_greater_than(
+        &mut self,
+        shard_id: ShardId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<ParquetFile>> {
+        // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
+        // `parquet_metadata` column!!
+        Ok(sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
+       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       row_count, compaction_level, created_at, column_set, max_l0_created_at
+FROM parquet_file
+WHERE shard_id = $1
+  AND max_sequence_number > $2
+ORDER BY id;
+            "#,
+        )
+        .bind(shard_id) // $1
+        .bind(sequence_number) // $2
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    async fn list_by_namespace_not_to_delete(
+        &mut self,
+        namespace_id: NamespaceId,
+    ) -> Result<Vec<ParquetFile>> {
+        // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
+        // `parquet_metadata` column!!
+        Ok(sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+SELECT parquet_file.id, parquet_file.shard_id, parquet_file.namespace_id,
+       parquet_file.table_id, parquet_file.partition_id, parquet_file.object_store_id,
+       parquet_file.max_sequence_number, parquet_file.min_time,
+       parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes,
+       parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, parquet_file.column_set,
+       parquet_file.max_l0_created_at
+FROM parquet_file
+INNER JOIN table_name on table_name.id = parquet_file.table_id
+WHERE table_name.namespace_id = $1
+  AND parquet_file.to_delete IS NULL;
+             "#,
+        )
+            .bind(namespace_id) // $1
+            .fetch_all(self.inner.get_mut())
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?
+            .into_iter()
+            .map(Into::into)
+            .collect())
+    }
+
+    async fn list_by_table_not_to_delete(&mut self, table_id: TableId) -> Result<Vec<ParquetFile>> {
+        // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
+        // `parquet_metadata` column!!
+        Ok(sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
+       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       row_count, compaction_level, created_at, column_set, max_l0_created_at
+FROM parquet_file
+WHERE table_id = $1 AND to_delete IS NULL;
+             "#,
+        )
+        .bind(table_id) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    async fn delete_old(&mut self, older_than: Timestamp) -> Result<Vec<ParquetFile>> {
+        Ok(sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+DELETE FROM parquet_file
+WHERE to_delete < $1
+RETURNING *;
+             "#,
+        )
+        .bind(older_than) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    async fn delete_old_ids_only(&mut self, older_than: Timestamp) -> Result<Vec<ParquetFileId>> {
+        // see https://www.crunchydata.com/blog/simulating-update-or-delete-with-limit-in-sqlite-ctes-to-the-rescue
+        let deleted = sqlx::query(
+            r#"
+WITH parquet_file_ids as (
+    SELECT id
+    FROM parquet_file
+    WHERE to_delete < $1
+    LIMIT $2
+)
+DELETE FROM parquet_file
+WHERE id IN (SELECT id FROM parquet_file_ids)
+RETURNING id;
+             "#,
+        )
+        .bind(older_than) // $1
+        .bind(MAX_PARQUET_FILES_DELETED_ONCE) // $2
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        let deleted = deleted.into_iter().map(|row| row.get("id")).collect();
+        Ok(deleted)
+    }
+
+    async fn level_0(&mut self, shard_id: ShardId) -> Result<Vec<ParquetFile>> {
+        // this intentionally limits the returned files to 10,000 as it is used to make
+        // a decision on the highest priority partitions. If compaction has never been
+        // run this could end up returning millions of results and taking too long to run.
+        // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
+        // `parquet_metadata` column!!
+        Ok(sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
+       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       row_count, compaction_level, created_at, column_set, max_l0_created_at
+FROM parquet_file
+WHERE parquet_file.shard_id = $1
+  AND parquet_file.compaction_level = $2
+  AND parquet_file.to_delete IS NULL
+  LIMIT 1000;
+        "#,
+        )
+        .bind(shard_id) // $1
+        .bind(CompactionLevel::Initial) // $2
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    async fn level_1(
+        &mut self,
+        table_partition: TablePartition,
+        min_time: Timestamp,
+        max_time: Timestamp,
+    ) -> Result<Vec<ParquetFile>> {
+        // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
+        // `parquet_metadata` column!!
+        Ok(sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
+       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       row_count, compaction_level, created_at, column_set, max_l0_created_at
+FROM parquet_file
+WHERE parquet_file.shard_id = $1
+  AND parquet_file.table_id = $2
+  AND parquet_file.partition_id = $3
+  AND parquet_file.compaction_level = $4
+  AND parquet_file.to_delete IS NULL
+  AND ((parquet_file.min_time <= $5 AND parquet_file.max_time >= $5)
+      OR (parquet_file.min_time > $5 AND parquet_file.min_time <= $6));
+        "#,
+        )
+        .bind(table_partition.shard_id) // $1
+        .bind(table_partition.table_id) // $2
+        .bind(table_partition.partition_id) // $3
+        .bind(CompactionLevel::FileNonOverlapped) // $4
+        .bind(min_time) // $5
+        .bind(max_time) // $6
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    async fn recent_highest_throughput_partitions(
+        &mut self,
+        shard_id: Option<ShardId>,
+        time_in_the_past: Timestamp,
+        min_num_files: usize,
+        num_partitions: usize,
+    ) -> Result<Vec<PartitionParam>> {
+        let min_num_files = min_num_files as i32;
+        let num_partitions = num_partitions as i32;
+
+        match shard_id {
+            Some(shard_id) => {
+                sqlx::query_as::<_, PartitionParam>(
+                    r#"
+SELECT parquet_file.partition_id, parquet_file.table_id, parquet_file.shard_id,
+       parquet_file.namespace_id, count(parquet_file.id)
+FROM parquet_file
+LEFT OUTER JOIN skipped_compactions ON parquet_file.partition_id = skipped_compactions.partition_id
+WHERE compaction_level = $5
+AND   to_delete is null
+AND   shard_id = $1
+AND   created_at > $2
+AND   skipped_compactions.partition_id IS NULL
+GROUP BY 1, 2, 3, 4
+HAVING count(id) >= $3
+ORDER BY 5 DESC
+LIMIT $4;
+                    "#,
+                )
+                .bind(shard_id) // $1
+                .bind(time_in_the_past) //$2
+                .bind(min_num_files) // $3
+                .bind(num_partitions) // $4
+                .bind(CompactionLevel::Initial) // $5
+                .fetch_all(self.inner.get_mut())
+                .await
+                .map_err(|e| Error::SqlxError { source: e })
+            }
+            None => {
+                sqlx::query_as::<_, PartitionParam>(
+                    r#"
+SELECT parquet_file.partition_id, parquet_file.table_id, parquet_file.shard_id,
+       parquet_file.namespace_id, count(parquet_file.id)
+FROM parquet_file
+LEFT OUTER JOIN skipped_compactions ON parquet_file.partition_id = skipped_compactions.partition_id
+WHERE compaction_level = $4
+AND   to_delete is null
+AND   created_at > $1
+AND   skipped_compactions.partition_id IS NULL
+GROUP BY 1, 2, 3, 4
+HAVING count(id) >= $2
+ORDER BY 5 DESC
+LIMIT $3;
+                    "#,
+                )
+                .bind(time_in_the_past) //$1
+                .bind(min_num_files) // $2
+                .bind(num_partitions) // $3
+                .bind(CompactionLevel::Initial) // $4
+                .fetch_all(self.inner.get_mut())
+                .await
+                .map_err(|e| Error::SqlxError { source: e })
+            }
+        }
+    }
+
+    async fn partitions_with_small_l1_file_count(
+        &mut self,
+        shard_id: Option<ShardId>,
+        small_size_threshold_bytes: i64,
+        min_small_file_count: usize,
+        num_partitions: usize,
+    ) -> Result<Vec<PartitionParam>> {
+        // This query returns partitions with at least `min_small_file_count` small L1 files,
+        // where "small" means no bigger than `small_size_threshold_bytes`, limited to the top `num_partitions`.
+        sqlx::query_as::<_, PartitionParam>(
+            r#"
+SELECT parquet_file.partition_id, parquet_file.shard_id, parquet_file.namespace_id,
+       parquet_file.table_id,
+       COUNT(1) AS l1_file_count
+FROM   parquet_file
+LEFT OUTER JOIN skipped_compactions ON parquet_file.partition_id = skipped_compactions.partition_id
+WHERE  compaction_level = $5
+AND    to_delete IS NULL
+AND    shard_id = $1
+AND    skipped_compactions.partition_id IS NULL
+AND    file_size_bytes < $3
+GROUP BY 1, 2, 3, 4
+HAVING COUNT(1) >= $2
+ORDER BY l1_file_count DESC
+LIMIT $4;
+            "#,
+        )
+        .bind(shard_id) // $1
+        .bind(min_small_file_count as i32) // $2
+        .bind(small_size_threshold_bytes) // $3
+        .bind(num_partitions as i32) // $4
+        .bind(CompactionLevel::FileNonOverlapped) // $5
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })
+    }
+
+    async fn most_cold_files_partitions(
+        &mut self,
+        shard_id: Option<ShardId>,
+        time_in_the_past: Timestamp,
+        num_partitions: usize,
+    ) -> Result<Vec<PartitionParam>> {
+        let num_partitions = num_partitions as i32;
+
+        // This query returns partitions with most L0+L1 files and all L0 files (both deleted and
+        // non deleted) are either created before the given time ($2) or not available (removed by
+        // garbage collector)
+        match shard_id {
+            Some(shard_id) => {
+                sqlx::query_as::<_, PartitionParam>(
+                    r#"
+SELECT parquet_file.partition_id, parquet_file.shard_id, parquet_file.namespace_id,
+       parquet_file.table_id,
+       count(case when to_delete is null then 1 end) total_count,
+       max(case when compaction_level= $4 then parquet_file.created_at end)
+FROM   parquet_file
+LEFT OUTER JOIN skipped_compactions ON parquet_file.partition_id = skipped_compactions.partition_id
+WHERE  (compaction_level = $4 OR compaction_level = $5)
+AND    shard_id = $1
+AND    skipped_compactions.partition_id IS NULL
+GROUP BY 1, 2, 3, 4
+HAVING count(case when to_delete is null then 1 end) > 0
+       AND ( max(case when compaction_level= $4 then parquet_file.created_at end) < $2  OR
+             max(case when compaction_level= $4 then parquet_file.created_at end) is null)
+ORDER BY total_count DESC
+LIMIT $3;
+                    "#,
+                )
+                .bind(shard_id) // $1
+                .bind(time_in_the_past) // $2
+                .bind(num_partitions) // $3
+                .bind(CompactionLevel::Initial) // $4
+                .bind(CompactionLevel::FileNonOverlapped) // $5
+                .fetch_all(self.inner.get_mut())
+                .await
+                .map_err(|e| Error::SqlxError { source: e })
+            }
+            None => {
+                sqlx::query_as::<_, PartitionParam>(
+                    r#"
+SELECT parquet_file.partition_id, parquet_file.shard_id, parquet_file.namespace_id,
+       parquet_file.table_id,
+       count(case when to_delete is null then 1 end) total_count,
+       max(case when compaction_level= $4 then parquet_file.created_at end)
+FROM   parquet_file
+LEFT OUTER JOIN skipped_compactions ON parquet_file.partition_id = skipped_compactions.partition_id
+WHERE  (compaction_level = $3 OR compaction_level = $4)
+AND    skipped_compactions.partition_id IS NULL
+GROUP BY 1, 2, 3, 4
+HAVING count(case when to_delete is null then 1 end) > 0
+       AND ( max(case when compaction_level= $3 then parquet_file.created_at end) < $1  OR
+             max(case when compaction_level= $3 then parquet_file.created_at end) is null)
+ORDER BY total_count DESC
+LIMIT $2;
+                    "#,
+                )
+                .bind(time_in_the_past) // $1
+                .bind(num_partitions) // $2
+                .bind(CompactionLevel::Initial) // $3
+                .bind(CompactionLevel::FileNonOverlapped) // $4
+                .fetch_all(self.inner.get_mut())
+                .await
+                .map_err(|e| Error::SqlxError { source: e })
+            }
+        }
+    }
+
+    async fn list_by_partition_not_to_delete(
+        &mut self,
+        partition_id: PartitionId,
+    ) -> Result<Vec<ParquetFile>> {
+        // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
+        // `parquet_metadata` column!!
+        Ok(sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
+       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       row_count, compaction_level, created_at, column_set, max_l0_created_at
+FROM parquet_file
+WHERE parquet_file.partition_id = $1
+  AND parquet_file.to_delete IS NULL;
+        "#,
+        )
+        .bind(partition_id) // $1
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?
+        .into_iter()
+        .map(Into::into)
+        .collect())
+    }
+
+    async fn update_compaction_level(
+        &mut self,
+        parquet_file_ids: &[ParquetFileId],
+        compaction_level: CompactionLevel,
+    ) -> Result<Vec<ParquetFileId>> {
+        // If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
+        // See https://github.com/launchbadge/sqlx/issues/1744
+        let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect();
+        let updated = sqlx::query(
+            r#"
+UPDATE parquet_file
+SET compaction_level = $1
+WHERE id IN (SELECT value FROM json_each($2))
+RETURNING id;
+        "#,
+        )
+        .bind(compaction_level) // $1
+        .bind(Json(&ids[..])) // $2
+        .fetch_all(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        let updated = updated.into_iter().map(|row| row.get("id")).collect();
+        Ok(updated)
+    }
+
+    async fn exist(&mut self, id: ParquetFileId) -> Result<bool> {
+        let read_result = sqlx::query_as::<_, Count>(
+            r#"SELECT count(1) as count FROM parquet_file WHERE id = $1;"#,
+        )
+        .bind(id) // $1
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(read_result.count > 0)
+    }
+
+    async fn count(&mut self) -> Result<i64> {
+        let read_result =
+            sqlx::query_as::<_, Count>(r#"SELECT count(1) as count FROM parquet_file;"#)
+                .fetch_one(self.inner.get_mut())
+                .await
+                .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(read_result.count)
+    }
+
+    async fn count_by_overlaps_with_level_0(
+        &mut self,
+        table_id: TableId,
+        shard_id: ShardId,
+        min_time: Timestamp,
+        max_time: Timestamp,
+        sequence_number: SequenceNumber,
+    ) -> Result<i64> {
+        let read_result = sqlx::query_as::<_, Count>(
+            r#"
+SELECT count(1) as count
+FROM parquet_file
+WHERE table_id = $1
+  AND shard_id = $2
+  AND max_sequence_number < $3
+  AND parquet_file.to_delete IS NULL
+  AND compaction_level = $6
+  AND ((parquet_file.min_time <= $4 AND parquet_file.max_time >= $4)
+  OR (parquet_file.min_time > $4 AND parquet_file.min_time <= $5));
+            "#,
+        )
+        .bind(table_id) // $1
+        .bind(shard_id) // $2
+        .bind(sequence_number) // $3
+        .bind(min_time) // $4
+        .bind(max_time) // $5
+        .bind(CompactionLevel::Initial) // $6
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(read_result.count)
+    }
+
+    async fn count_by_overlaps_with_level_1(
+        &mut self,
+        table_id: TableId,
+        shard_id: ShardId,
+        min_time: Timestamp,
+        max_time: Timestamp,
+    ) -> Result<i64> {
+        let read_result = sqlx::query_as::<_, Count>(
+            r#"
+SELECT count(1) as count
+FROM parquet_file
+WHERE table_id = $1
+  AND shard_id = $2
+  AND parquet_file.to_delete IS NULL
+  AND compaction_level = $5
+  AND ((parquet_file.min_time <= $3 AND parquet_file.max_time >= $3)
+  OR (parquet_file.min_time > $3 AND parquet_file.min_time <= $4));
+            "#,
+        )
+        .bind(table_id) // $1
+        .bind(shard_id) // $2
+        .bind(min_time) // $3
+        .bind(max_time) // $4
+        .bind(CompactionLevel::FileNonOverlapped) // $5
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(read_result.count)
+    }
+
+    async fn get_by_object_store_id(
+        &mut self,
+        object_store_id: Uuid,
+    ) -> Result<Option<ParquetFile>> {
+        // Deliberately doesn't use `SELECT *` to avoid the performance hit of fetching the large
+        // `parquet_metadata` column!!
+        let rec = sqlx::query_as::<_, ParquetFilePod>(
+            r#"
+SELECT id, shard_id, namespace_id, table_id, partition_id, object_store_id,
+       max_sequence_number, min_time, max_time, to_delete, file_size_bytes,
+       row_count, compaction_level, created_at, column_set, max_l0_created_at
+FROM parquet_file
+WHERE object_store_id = $1;
+             "#,
+        )
+        .bind(object_store_id) // $1
+        .fetch_one(self.inner.get_mut())
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let parquet_file = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(parquet_file.into()))
+    }
+}
+
+#[async_trait]
+impl ProcessedTombstoneRepo for SqliteTxn {
+    async fn create(
+        &mut self,
+        parquet_file_id: ParquetFileId,
+        tombstone_id: TombstoneId,
+    ) -> Result<ProcessedTombstone> {
+        sqlx::query_as::<_, ProcessedTombstone>(
+            r#"
+INSERT INTO processed_tombstone ( tombstone_id, parquet_file_id )
+VALUES ( $1, $2 )
+RETURNING *;
+        "#,
+        )
+        .bind(tombstone_id) // $1
+        .bind(parquet_file_id) // $2
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| {
+            if is_unique_violation(&e) {
+                Error::ProcessTombstoneExists {
+                    tombstone_id: tombstone_id.get(),
+                    parquet_file_id: parquet_file_id.get(),
+                }
+            } else if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })
+    }
+
+    async fn exist(
+        &mut self,
+        parquet_file_id: ParquetFileId,
+        tombstone_id: TombstoneId,
+    ) -> Result<bool> {
+        let read_result = sqlx::query_as::<_, Count>(
+            r#"
+SELECT count(1) as count
+FROM processed_tombstone
+WHERE parquet_file_id = $1
+  AND tombstone_id = $2;
+            "#,
+        )
+        .bind(parquet_file_id) // $1
+        .bind(tombstone_id) // $2
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(read_result.count > 0)
+    }
+
+    async fn count(&mut self) -> Result<i64> {
+        let read_result =
+            sqlx::query_as::<_, Count>(r#"SELECT count(1) as count FROM processed_tombstone;"#)
+                .fetch_one(self.inner.get_mut())
+                .await
+                .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(read_result.count)
+    }
+
+    async fn count_by_tombstone_id(&mut self, tombstone_id: TombstoneId) -> Result<i64> {
+        let read_result = sqlx::query_as::<_, Count>(
+            r#"SELECT count(1) as count FROM processed_tombstone WHERE tombstone_id = $1;"#,
+        )
+        .bind(tombstone_id) // $1
+        .fetch_one(self.inner.get_mut())
+        .await
+        .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(read_result.count)
+    }
+}
+
+/// The error code returned by SQLite for a unique constraint violation.
+///
+/// See <https://sqlite.org/rescode.html#constraint_unique>
+const SQLITE_UNIQUE_VIOLATION: &str = "2067";
+
+/// Error code returned by SQLite for a foreign key constraint violation.
+/// See <https://sqlite.org/rescode.html#constraint_foreignkey>
+const SQLITE_FK_VIOLATION: &str = "787";
+
+fn is_fk_violation(e: &sqlx::Error) -> bool {
+    if let sqlx::Error::Database(inner) = e {
+        if let Some(code) = inner.code() {
+            if code == SQLITE_FK_VIOLATION {
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
+/// Returns true if `e` is a unique constraint violation error.
+fn is_unique_violation(e: &sqlx::Error) -> bool {
+    if let sqlx::Error::Database(inner) = e {
+        if let Some(code) = inner.code() {
+            if code == SQLITE_UNIQUE_VIOLATION {
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::create_or_get_default_records;
+    use assert_matches::assert_matches;
+    use metric::{Attributes, DurationHistogram, Metric};
+    use std::{ops::DerefMut, sync::Arc};
+
+    fn assert_metric_hit(metrics: &Registry, name: &'static str) {
+        let histogram = metrics
+            .get_instrument::<Metric<DurationHistogram>>("catalog_op_duration")
+            .expect("failed to read metric")
+            .get_observer(&Attributes::from(&[("op", name), ("result", "success")]))
+            .expect("failed to get observer")
+            .fetch();
+
+        let hit_count = histogram.sample_count();
+        assert!(hit_count > 0, "metric did not record any calls");
+    }
+
+    async fn setup_db() -> SqliteCatalog {
+        let dsn =
+            std::env::var("TEST_INFLUXDB_SQLITE_DSN").unwrap_or("sqlite::memory:".to_string());
+        let options = SqliteConnectionOptions { dsn };
+        let metrics = Arc::new(Registry::default());
+        let cat = SqliteCatalog::connect(options, metrics)
+            .await
+            .expect("failed to connect to catalog");
+        cat.setup().await.expect("failed to initialise database");
+        cat
+    }
+
+    #[tokio::test]
+    async fn test_catalog() {
+        let sqlite = setup_db().await;
+        let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+        interface::test_helpers::test_catalog(sqlite).await;
+    }
+
+    #[tokio::test]
+    async fn test_tombstone_create_or_get_idempotent() {
+        let sqlite = setup_db().await;
+        let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+
+        let mut txn = sqlite.start_transaction().await.expect("txn start");
+        let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
+            .await
+            .expect("db init failed");
+        txn.commit().await.expect("txn commit");
+
+        let namespace_id = sqlite
+            .repositories()
+            .await
+            .namespaces()
+            .create("ns", None, kafka.id, query.id)
+            .await
+            .expect("namespace create failed")
+            .id;
+        let table_id = sqlite
+            .repositories()
+            .await
+            .tables()
+            .create_or_get("table", namespace_id)
+            .await
+            .expect("create table failed")
+            .id;
+
+        let shard_id = *shards.keys().next().expect("no shard");
+        let sequence_number = SequenceNumber::new(3);
+        let min_timestamp = Timestamp::new(10);
+        let max_timestamp = Timestamp::new(100);
+        let predicate = "bananas";
+
+        let a = sqlite
+            .repositories()
+            .await
+            .tombstones()
+            .create_or_get(
+                table_id,
+                shard_id,
+                sequence_number,
+                min_timestamp,
+                max_timestamp,
+                predicate,
+            )
+            .await
+            .expect("should create OK");
+
+        // Call create_or_get for the same (table_id, shard_id,
+        // sequence_number) triplet, setting the same metadata to ensure the
+        // write is idempotent.
+        let b = sqlite
+            .repositories()
+            .await
+            .tombstones()
+            .create_or_get(
+                table_id,
+                shard_id,
+                sequence_number,
+                min_timestamp,
+                max_timestamp,
+                predicate,
+            )
+            .await
+            .expect("idempotent write should succeed");
+
+        assert_eq!(a, b);
+    }
+
+    #[tokio::test]
+    #[should_panic = "attempted to overwrite predicate"]
+    async fn test_tombstone_create_or_get_no_overwrite() {
+        let sqlite = setup_db().await;
+        let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+
+        let mut txn = sqlite.start_transaction().await.expect("txn start");
+        let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
+            .await
+            .expect("db init failed");
+        txn.commit().await.expect("txn commit");
+
+        let namespace_id = sqlite
+            .repositories()
+            .await
+            .namespaces()
+            .create("ns2", None, kafka.id, query.id)
+            .await
+            .expect("namespace create failed")
+            .id;
+        let table_id = sqlite
+            .repositories()
+            .await
+            .tables()
+            .create_or_get("table2", namespace_id)
+            .await
+            .expect("create table failed")
+            .id;
+
+        let shard_id = *shards.keys().next().expect("no shard");
+        let sequence_number = SequenceNumber::new(3);
+        let min_timestamp = Timestamp::new(10);
+        let max_timestamp = Timestamp::new(100);
+
+        let a = sqlite
+            .repositories()
+            .await
+            .tombstones()
+            .create_or_get(
+                table_id,
+                shard_id,
+                sequence_number,
+                min_timestamp,
+                max_timestamp,
+                "bananas",
+            )
+            .await
+            .expect("should create OK");
+
+        // Call create_or_get for the same (table_id, shard_id,
+        // sequence_number) triplet with different metadata.
+        //
+        // The caller should not falsely believe it has persisted the incorrect
+        // predicate.
+        let b = sqlite
+            .repositories()
+            .await
+            .tombstones()
+            .create_or_get(
+                table_id,
+                shard_id,
+                sequence_number,
+                min_timestamp,
+                max_timestamp,
+                "some other serialized predicate which is different",
+            )
+            .await
+            .expect("should panic before result evaluated");
+
+        assert_eq!(a, b);
+    }
+
+    #[tokio::test]
+    async fn test_partition_create_or_get_idempotent() {
+        let sqlite = setup_db().await;
+
+        let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+        let mut txn = sqlite.start_transaction().await.expect("txn start");
+        let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
+            .await
+            .expect("db init failed");
+        txn.commit().await.expect("txn commit");
+
+        let namespace_id = sqlite
+            .repositories()
+            .await
+            .namespaces()
+            .create("ns4", None, kafka.id, query.id)
+            .await
+            .expect("namespace create failed")
+            .id;
+        let table_id = sqlite
+            .repositories()
+            .await
+            .tables()
+            .create_or_get("table", namespace_id)
+            .await
+            .expect("create table failed")
+            .id;
+
+        let key = "bananas";
+        let shard_id = *shards.keys().next().expect("no shard");
+
+        let a = sqlite
+            .repositories()
+            .await
+            .partitions()
+            .create_or_get(key.into(), shard_id, table_id)
+            .await
+            .expect("should create OK");
+
+        // Call create_or_get for the same (key, table_id, shard_id)
+        // triplet, setting the same shard ID to ensure the write is
+        // idempotent.
+        let b = sqlite
+            .repositories()
+            .await
+            .partitions()
+            .create_or_get(key.into(), shard_id, table_id)
+            .await
+            .expect("idempotent write should succeed");
+
+        assert_eq!(a, b);
+    }
+
+    #[tokio::test]
+    #[should_panic = "attempted to overwrite partition"]
+    async fn test_partition_create_or_get_no_overwrite() {
+        let sqlite = setup_db().await;
+
+        let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+        let mut txn = sqlite.start_transaction().await.expect("txn start");
+        let (kafka, query, _) = create_or_get_default_records(2, txn.deref_mut())
+            .await
+            .expect("db init failed");
+        txn.commit().await.expect("txn commit");
+
+        let namespace_id = sqlite
+            .repositories()
+            .await
+            .namespaces()
+            .create("ns3", None, kafka.id, query.id)
+            .await
+            .expect("namespace create failed")
+            .id;
+        let table_id = sqlite
+            .repositories()
+            .await
+            .tables()
+            .create_or_get("table", namespace_id)
+            .await
+            .expect("create table failed")
+            .id;
+
+        let key = "bananas";
+
+        let shards = sqlite
+            .repositories()
+            .await
+            .shards()
+            .list()
+            .await
+            .expect("failed to list shards");
+        assert!(
+            shards.len() > 1,
+            "expected more shards to be created, got {}",
+            shards.len()
+        );
+
+        let a = sqlite
+            .repositories()
+            .await
+            .partitions()
+            .create_or_get(key.into(), shards[0].id, table_id)
+            .await
+            .expect("should create OK");
+
+        // Call create_or_get for the same (key, table_id) tuple, setting a
+        // different shard ID
+        let b = sqlite
+            .repositories()
+            .await
+            .partitions()
+            .create_or_get(key.into(), shards[1].id, table_id)
+            .await
+            .expect("result should not be evaluated");
+
+        assert_eq!(a, b);
+    }
+
+    macro_rules! test_column_create_or_get_many_unchecked {
+        (
+            $name:ident,
+            calls = {$([$($col_name:literal => $col_type:expr),+ $(,)?]),+},
+            want = $($want:tt)+
+        ) => {
+            paste::paste! {
+                #[tokio::test]
+                async fn [<test_column_create_or_get_many_unchecked_ $name>]() {
+                    let sqlite = setup_db().await;
+                    let metrics = Arc::clone(&sqlite.metrics);
+
+                    let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+                    let mut txn = sqlite.start_transaction().await.expect("txn start");
+                    let (kafka, query, _shards) = create_or_get_default_records(1, txn.deref_mut())
+                        .await
+                        .expect("db init failed");
+                    txn.commit().await.expect("txn commit");
+
+                    let namespace_id = sqlite
+                        .repositories()
+                        .await
+                        .namespaces()
+                        .create("ns4", None, kafka.id, query.id)
+                        .await
+                        .expect("namespace create failed")
+                        .id;
+                    let table_id = sqlite
+                        .repositories()
+                        .await
+                        .tables()
+                        .create_or_get("table", namespace_id)
+                        .await
+                        .expect("create table failed")
+                        .id;
+
+                    $(
+                        let mut insert = HashMap::new();
+                        $(
+                            insert.insert($col_name, $col_type);
+                        )+
+
+                        let got = sqlite
+                            .repositories()
+                            .await
+                            .columns()
+                            .create_or_get_many_unchecked(table_id, insert.clone())
+                            .await;
+
+                        // The returned columns MUST always match the requested
+                        // column values if successful.
+                        if let Ok(got) = &got {
+                            assert_eq!(insert.len(), got.len());
+
+                            for got in got {
+                                assert_eq!(table_id, got.table_id);
+                                let requested_column_type = insert
+                                    .get(got.name.as_str())
+                                    .expect("Should have gotten back a column that was inserted");
+                                assert_eq!(
+                                    *requested_column_type,
+                                    ColumnType::try_from(got.column_type)
+                                        .expect("invalid column type")
+                                );
+                            }
+
+                            assert_metric_hit(&metrics, "column_create_or_get_many_unchecked");
+                        }
+                    )+
+
+                    assert_matches!(got, $($want)+);
+                }
+            }
+        }
+    }
+
+    // Issue a few calls to create_or_get_many that contain distinct columns and
+    // covers the full set of column types.
+    test_column_create_or_get_many_unchecked!(
+        insert,
+        calls = {
+            [
+                "test1" => ColumnType::I64,
+                "test2" => ColumnType::U64,
+                "test3" => ColumnType::F64,
+                "test4" => ColumnType::Bool,
+                "test5" => ColumnType::String,
+                "test6" => ColumnType::Time,
+                "test7" => ColumnType::Tag,
+            ],
+            [
+                "test8" => ColumnType::String,
+                "test9" => ColumnType::Bool,
+            ]
+        },
+        want = Ok(_)
+    );
+
+    // Issue two calls with overlapping columns - request should succeed (upsert
+    // semantics).
+    test_column_create_or_get_many_unchecked!(
+        partial_upsert,
+        calls = {
+            [
+                "test1" => ColumnType::I64,
+                "test2" => ColumnType::U64,
+                "test3" => ColumnType::F64,
+                "test4" => ColumnType::Bool,
+            ],
+            [
+                "test1" => ColumnType::I64,
+                "test2" => ColumnType::U64,
+                "test3" => ColumnType::F64,
+                "test4" => ColumnType::Bool,
+                "test5" => ColumnType::String,
+                "test6" => ColumnType::Time,
+                "test7" => ColumnType::Tag,
+                "test8" => ColumnType::String,
+            ]
+        },
+        want = Ok(_)
+    );
+
+    // Issue two calls with the same columns and types.
+    test_column_create_or_get_many_unchecked!(
+        full_upsert,
+        calls = {
+            [
+                "test1" => ColumnType::I64,
+                "test2" => ColumnType::U64,
+                "test3" => ColumnType::F64,
+                "test4" => ColumnType::Bool,
+            ],
+            [
+                "test1" => ColumnType::I64,
+                "test2" => ColumnType::U64,
+                "test3" => ColumnType::F64,
+                "test4" => ColumnType::Bool,
+            ]
+        },
+        want = Ok(_)
+    );
+
+    // Issue two calls with overlapping columns with conflicting types and
+    // observe a correctly populated ColumnTypeMismatch error.
+    test_column_create_or_get_many_unchecked!(
+        partial_type_conflict,
+        calls = {
+            [
+                "test1" => ColumnType::String,
+                "test2" => ColumnType::String,
+                "test3" => ColumnType::String,
+                "test4" => ColumnType::String,
+            ],
+            [
+                "test1" => ColumnType::String,
+                "test2" => ColumnType::Bool, // This one differs
+                "test3" => ColumnType::String,
+                // 4 is missing.
+                "test5" => ColumnType::String,
+                "test6" => ColumnType::Time,
+                "test7" => ColumnType::Tag,
+                "test8" => ColumnType::String,
+            ]
+        },
+        want = Err(e) => {
+            assert_matches!(e, Error::ColumnTypeMismatch { name, existing, new } => {
+                assert_eq!(name, "test2");
+                assert_eq!(existing, ColumnType::String);
+                assert_eq!(new, ColumnType::Bool);
+            })
+        }
+    );
+
+    #[tokio::test]
+    async fn test_billing_summary_on_parqet_file_creation() {
+        let sqlite = setup_db().await;
+        let pool = sqlite.pool.clone();
+
+        let sqlite: Arc<dyn Catalog> = Arc::new(sqlite);
+        let mut txn = sqlite.start_transaction().await.expect("txn start");
+        let (kafka, query, shards) = create_or_get_default_records(1, txn.deref_mut())
+            .await
+            .expect("db init failed");
+        txn.commit().await.expect("txn commit");
+
+        let namespace_id = sqlite
+            .repositories()
+            .await
+            .namespaces()
+            .create("ns4", None, kafka.id, query.id)
+            .await
+            .expect("namespace create failed")
+            .id;
+        let table_id = sqlite
+            .repositories()
+            .await
+            .tables()
+            .create_or_get("table", namespace_id)
+            .await
+            .expect("create table failed")
+            .id;
+
+        let key = "bananas";
+        let shard_id = *shards.keys().next().expect("no shard");
+
+        let partition_id = sqlite
+            .repositories()
+            .await
+            .partitions()
+            .create_or_get(key.into(), shard_id, table_id)
+            .await
+            .expect("should create OK")
+            .id;
+
+        // parquet file to create- all we care about here is the size, the rest is to satisfy DB
+        // constraints
+        let time_provider = Arc::new(SystemProvider::new());
+        let time_now = Timestamp::from(time_provider.now());
+        let mut p1 = ParquetFileParams {
+            shard_id,
+            namespace_id,
+            table_id,
+            partition_id,
+            object_store_id: Uuid::new_v4(),
+            max_sequence_number: SequenceNumber::new(100),
+            min_time: Timestamp::new(1),
+            max_time: Timestamp::new(5),
+            file_size_bytes: 1337,
+            row_count: 0,
+            compaction_level: CompactionLevel::Initial, // level of file of new writes
+            created_at: time_now,
+            column_set: ColumnSet::new([ColumnId::new(1), ColumnId::new(2)]),
+            max_l0_created_at: time_now,
+        };
+        let f1 = sqlite
+            .repositories()
+            .await
+            .parquet_files()
+            .create(p1.clone())
+            .await
+            .expect("create parquet file should succeed");
+        // insert the same again with a different size; we should then have 3x1337 as total file size
+        p1.object_store_id = Uuid::new_v4();
+        p1.file_size_bytes *= 2;
+        let _f2 = sqlite
+            .repositories()
+            .await
+            .parquet_files()
+            .create(p1.clone())
+            .await
+            .expect("create parquet file should succeed");
+
+        // after adding two files we should have 3x1337 in the summary
+        let total_file_size_bytes: i64 =
+            sqlx::query_scalar("SELECT total_file_size_bytes FROM billing_summary;")
+                .fetch_one(&pool)
+                .await
+                .expect("fetch total file size failed");
+        assert_eq!(total_file_size_bytes, 1337 * 3);
+
+        // flag f1 for deletion and assert that the total file size is reduced accordingly.
+        sqlite
+            .repositories()
+            .await
+            .parquet_files()
+            .flag_for_delete(f1.id)
+            .await
+            .expect("flag parquet file for deletion should succeed");
+        let total_file_size_bytes: i64 =
+            sqlx::query_scalar("SELECT total_file_size_bytes FROM billing_summary;")
+                .fetch_one(&pool)
+                .await
+                .expect("fetch total file size failed");
+        // we marked the first file of size 1337 for deletion leaving only the second that was 2x that
+        assert_eq!(total_file_size_bytes, 1337 * 2);
+
+        // actually deleting shouldn't change the total
+        let now = Timestamp::from(time_provider.now());
+        sqlite
+            .repositories()
+            .await
+            .parquet_files()
+            .delete_old(now)
+            .await
+            .expect("parquet file deletion should succeed");
+        let total_file_size_bytes: i64 =
+            sqlx::query_scalar("SELECT total_file_size_bytes FROM billing_summary;")
+                .fetch_one(&pool)
+                .await
+                .expect("fetch total file size failed");
+        assert_eq!(total_file_size_bytes, 1337 * 2);
+    }
+}
diff --git a/iox_query/src/exec/gapfill.rs b/iox_query/src/exec/gapfill.rs
index a192d3822f..38c1af9da4 100644
--- a/iox_query/src/exec/gapfill.rs
+++ b/iox_query/src/exec/gapfill.rs
@@ -1,6 +1,8 @@
 //! This module contains code that implements
 //! a gap-filling extension to DataFusion
 
+mod algo;
+
 use std::{
     fmt::{self, Debug},
     ops::{Bound, Range},
@@ -15,11 +17,15 @@ use datafusion::{
     logical_expr::{LogicalPlan, UserDefinedLogicalNode},
     physical_expr::{create_physical_expr, execution_props::ExecutionProps, PhysicalSortExpr},
     physical_plan::{
-        expressions::Column, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
-        PhysicalExpr, SendableRecordBatchStream, Statistics,
+        expressions::Column,
+        metrics::{BaselineMetrics, ExecutionPlanMetricsSet},
+        DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PhysicalExpr,
+        SendableRecordBatchStream, Statistics,
     },
     prelude::Expr,
 };
+use datafusion_util::{watch::WatchedTask, AdapterStream};
+use tokio::sync::mpsc;
 
 /// A logical node that represents the gap filling operation.
 #[derive(Clone, Debug)]
@@ -31,17 +37,60 @@ pub struct GapFill {
 }
 
 /// Parameters to the GapFill operation
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 pub(crate) struct GapFillParams {
     /// The stride argument from the call to DATE_BIN_GAPFILL
     pub stride: Expr,
     /// The source time column
     pub time_column: Expr,
+    /// The origin argument from the call to DATE_BIN_GAPFILL
+    pub origin: Expr,
     /// The time range of the time column inferred from predicates
-    /// in overall the query
+    /// in the overall query
     pub time_range: Range<Bound<Expr>>,
 }
 
+impl GapFillParams {
+    // Extract the expressions so they can be optimized.
+    fn expressions(&self) -> Vec<Expr> {
+        vec![
+            self.stride.clone(),
+            self.time_column.clone(),
+            self.origin.clone(),
+            bound_extract(&self.time_range.start)
+                .unwrap_or_else(|| panic!("lower time bound is required"))
+                .clone(),
+            bound_extract(&self.time_range.end)
+                .unwrap_or_else(|| panic!("upper time bound is required"))
+                .clone(),
+        ]
+    }
+
+    #[allow(clippy::wrong_self_convention)] // follows convention of UserDefinedLogicalNode
+    fn from_template(&self, exprs: &[Expr]) -> Self {
+        assert!(
+            exprs.len() >= 3,
+            "should be a at least stride, source and origin in params"
+        );
+        let mut iter = exprs.iter().cloned();
+        let stride = iter.next().unwrap();
+        let time_column = iter.next().unwrap();
+        let origin = iter.next().unwrap();
+        let time_range = try_map_range(&self.time_range, |b| {
+            try_map_bound(b.as_ref(), |_| {
+                Ok(iter.next().expect("expr count should match template"))
+            })
+        })
+        .unwrap();
+        Self {
+            stride,
+            time_column,
+            origin,
+            time_range,
+        }
+    }
+}
+
 impl GapFill {
     pub(crate) fn try_new(
         input: Arc<LogicalPlan>,
@@ -74,7 +123,8 @@ impl UserDefinedLogicalNode for GapFill {
     fn expressions(&self) -> Vec<Expr> {
         self.group_expr
             .iter()
-            .chain(self.aggr_expr.iter())
+            .chain(&self.aggr_expr)
+            .chain(&self.params.expressions())
             .cloned()
             .collect()
     }
@@ -97,14 +147,11 @@ impl UserDefinedLogicalNode for GapFill {
         inputs: &[LogicalPlan],
     ) -> Arc<dyn UserDefinedLogicalNode> {
         let mut group_expr: Vec<_> = exprs.to_vec();
-        let aggr_expr = group_expr.split_off(self.group_expr.len());
-        let gapfill = Self::try_new(
-            Arc::new(inputs[0].clone()),
-            group_expr,
-            aggr_expr,
-            self.params.clone(),
-        )
-        .expect("should not fail");
+        let mut aggr_expr = group_expr.split_off(self.group_expr.len());
+        let param_expr = aggr_expr.split_off(self.aggr_expr.len());
+        let params = self.params.from_template(&param_expr);
+        let gapfill = Self::try_new(Arc::new(inputs[0].clone()), group_expr, aggr_expr, params)
+            .expect("should not fail");
         Arc::new(gapfill)
     }
 }
@@ -162,9 +209,17 @@ pub(crate) fn plan_gap_fill(
         })
     })?;
 
+    let origin = create_physical_expr(
+        &gap_fill.params.origin,
+        input_dfschema,
+        input_schema,
+        execution_props,
+    )?;
+
     let params = GapFillExecParams {
         stride,
         time_column,
+        origin,
         time_range,
     };
     GapFillExec::try_new(
@@ -175,9 +230,9 @@ pub(crate) fn plan_gap_fill(
     )
 }
 
-fn try_map_range<T, U, F>(tr: &Range<T>, f: F) -> Result<Range<U>>
+fn try_map_range<T, U, F>(tr: &Range<T>, mut f: F) -> Result<Range<U>>
 where
-    F: Fn(&T) -> Result<U>,
+    F: FnMut(&T) -> Result<U>,
 {
     Ok(Range {
         start: f(&tr.start)?,
@@ -185,9 +240,9 @@ where
     })
 }
 
-fn try_map_bound<T, U, F>(bt: Bound<T>, f: F) -> Result<Bound<U>>
+fn try_map_bound<T, U, F>(bt: Bound<T>, mut f: F) -> Result<Bound<U>>
 where
-    F: FnOnce(T) -> Result<U>,
+    F: FnMut(T) -> Result<U>,
 {
     Ok(match bt {
         Bound::Excluded(t) => Bound::Excluded(f(t)?),
@@ -196,6 +251,12 @@ where
     })
 }
 
+fn bound_extract<T>(b: &Bound<T>) -> Option<&T> {
+    match b {
+        Bound::Included(t) | Bound::Excluded(t) => Some(t),
+        Bound::Unbounded => None,
+    }
+}
 /// A physical node for the gap-fill operation.
 pub struct GapFillExec {
     input: Arc<dyn ExecutionPlan>,
@@ -208,6 +269,8 @@ pub struct GapFillExec {
     sort_expr: Vec<PhysicalSortExpr>,
     // Parameters (besides streaming data) to gap filling
     params: GapFillExecParams,
+    /// Metrics reporting behavior during execution.
+    metrics: ExecutionPlanMetricsSet,
 }
 
 #[derive(Clone, Debug)]
@@ -216,7 +279,10 @@ struct GapFillExecParams {
     stride: Arc<dyn PhysicalExpr>,
     /// The timestamp column produced by date_bin
     time_column: Column,
-    /// The time range of timestamps in the time column
+    /// The origin argument from the all to DATE_BIN_GAPFILL
+    origin: Arc<dyn PhysicalExpr>,
+    /// The time range of source input to DATE_BIN_GAPFILL.
+    /// Inferred from predicates in the overall query.
     time_range: Range<Bound<Arc<dyn PhysicalExpr>>>,
 }
 
@@ -242,11 +308,9 @@ impl GapFillExec {
                 .iter()
                 .enumerate()
                 .find(|(_i, e)| {
-                    if let Some(col) = e.as_any().downcast_ref::<Column>() {
-                        col.index() == params.time_column.index()
-                    } else {
-                        false
-                    }
+                    e.as_any()
+                        .downcast_ref::<Column>()
+                        .map_or(false, |c| c.index() == params.time_column.index())
                 })
                 .map(|(i, _)| i);
 
@@ -268,6 +332,7 @@ impl GapFillExec {
             aggr_expr,
             sort_expr,
             params,
+            metrics: ExecutionPlanMetricsSet::new(),
         })
     }
 }
@@ -333,14 +398,29 @@ impl ExecutionPlan for GapFillExec {
     fn execute(
         &self,
         partition: usize,
-        _context: Arc<TaskContext>,
+        context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
         if self.output_partitioning().partition_count() <= partition {
             return Err(DataFusionError::Internal(format!(
                 "GapFillExec invalid partition {partition}"
             )));
         }
-        Err(DataFusionError::NotImplemented("gap filling".to_string()))
+
+        let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
+        let output_batch_size = context.session_config().batch_size();
+        let input_stream = self.input.execute(partition, context)?;
+        let (tx, rx) = mpsc::channel(1);
+        let fut = algo::fill_gaps(
+            output_batch_size,
+            input_stream,
+            self.sort_expr.clone(),
+            self.aggr_expr.clone(),
+            self.params.clone(),
+            tx.clone(),
+            baseline_metrics,
+        );
+        let handle = WatchedTask::new(fut, vec![tx], "gapfill batches");
+        Ok(AdapterStream::adapt(self.schema(), rx, handle))
     }
 
     fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -404,6 +484,36 @@ mod test {
         logical_plan::table_scan(Some("temps"), &schema, None)?.build()
     }
 
+    #[test]
+    fn test_from_template() -> Result<()> {
+        let scan = table_scan()?;
+        let gapfill = GapFill::try_new(
+            Arc::new(scan.clone()),
+            vec![col("loc"), col("time")],
+            vec![col("temp")],
+            GapFillParams {
+                stride: lit(ScalarValue::IntervalDayTime(Some(60_000))),
+                time_column: col("time"),
+                origin: lit_timestamp_nano(0),
+                time_range: Range {
+                    start: Bound::Included(lit_timestamp_nano(1000)),
+                    end: Bound::Excluded(lit_timestamp_nano(2000)),
+                },
+            },
+        )?;
+        let exprs = gapfill.expressions();
+        assert_eq!(8, exprs.len());
+        let gapfill_ft = gapfill.from_template(&exprs, &[scan]);
+        let gapfill_ft = gapfill_ft
+            .as_any()
+            .downcast_ref::<GapFill>()
+            .expect("should be a GapFill");
+        assert_eq!(gapfill.group_expr, gapfill_ft.group_expr);
+        assert_eq!(gapfill.aggr_expr, gapfill_ft.aggr_expr);
+        assert_eq!(gapfill.params, gapfill_ft.params);
+        Ok(())
+    }
+
     #[test]
     fn fmt_logical_plan() -> Result<()> {
         // This test case does not make much sense but
@@ -417,6 +527,7 @@ mod test {
             GapFillParams {
                 stride: lit(ScalarValue::IntervalDayTime(Some(60_000))),
                 time_column: col("time"),
+                origin: lit_timestamp_nano(0),
                 time_range: Range {
                     start: Bound::Included(lit_timestamp_nano(1000)),
                     end: Bound::Excluded(lit_timestamp_nano(2000)),
diff --git a/iox_query/src/exec/gapfill/algo.rs b/iox_query/src/exec/gapfill/algo.rs
new file mode 100644
index 0000000000..916e722833
--- /dev/null
+++ b/iox_query/src/exec/gapfill/algo.rs
@@ -0,0 +1,289 @@
+use std::{ops::Bound, sync::Arc};
+
+use arrow::{datatypes::IntervalDayTimeType, record_batch::RecordBatch};
+use chrono::Duration;
+use datafusion::{
+    error::DataFusionError,
+    error::Result,
+    physical_expr::{datetime_expressions::date_bin, PhysicalSortExpr},
+    physical_plan::{
+        metrics::BaselineMetrics, ColumnarValue, PhysicalExpr, SendableRecordBatchStream,
+    },
+    scalar::ScalarValue,
+};
+use tokio::sync::mpsc;
+use tokio_stream::StreamExt;
+
+use super::{try_map_bound, try_map_range, GapFillExecParams};
+
+/// Fill in the gaps in a stream of records that represent
+/// one or more time series.
+///
+/// # Arguments
+///
+/// * `output_batch_size`
+/// * `input_stream`
+/// * `_sort_expr` - The incoming records will be sorted by these
+///        expressions. They will all be simple column references,
+///        with the last one being the timestamp value for each row.
+///        The last column will already have been normalized by a previous
+///        call to DATE_BIN.
+/// * `_aggr_expr` - A set of column expressions that are the aggregate values
+///        computed by an upstream Aggregate node.
+/// * `params` - The parameters for gap filling, including the stride and the
+///        start and end of the time range for this operation.
+/// * `_tx` - The transmit end of the channel for output.
+/// * `_baseline_metrics`
+pub(super) async fn fill_gaps(
+    _output_batch_size: usize,
+    mut input_stream: SendableRecordBatchStream,
+    _sort_expr: Vec<PhysicalSortExpr>,
+    _aggr_expr: Vec<Arc<dyn PhysicalExpr>>,
+    params: GapFillExecParams,
+    _tx: mpsc::Sender<Result<RecordBatch>>,
+    _baseline_metrics: BaselineMetrics,
+) -> Result<()> {
+    while let Some(batch) = input_stream.next().await {
+        let batch = batch?;
+        let _params = evaluate_params(&batch, &params);
+    }
+    Err(DataFusionError::NotImplemented("gap_filling".to_string()))
+}
+
+#[derive(Debug, PartialEq)]
+struct GapFillParams {
+    #[allow(unused)]
+    pub stride: i64,
+    #[allow(unused)]
+    pub first_ts: i64,
+    #[allow(unused)]
+    pub last_ts: i64,
+}
+
+/// Figure out the actual values (as native i64) for the stride,
+/// first and last timestamp for gap filling.
+fn evaluate_params(
+    batch: &RecordBatch,
+    params: &super::GapFillExecParams,
+) -> Result<GapFillParams> {
+    let stride = params.stride.evaluate(batch)?;
+    let origin = params.origin.evaluate(batch)?;
+
+    // Evaluate the upper and lower bounds of the time range
+    let range = try_map_range(&params.time_range, |b| {
+        try_map_bound(b.as_ref(), |pe| {
+            extract_timestamp_nanos(&pe.evaluate(batch)?)
+        })
+    })?;
+
+    // Find the smallest timestamp that might appear in the
+    // range
+    let first_ts = match range.start {
+        Bound::Included(v) => v,
+        Bound::Excluded(v) => v + 1,
+        Bound::Unbounded => {
+            return Err(DataFusionError::Execution(
+                "missing lower time bound for gap filling".to_string(),
+            ))
+        }
+    };
+
+    // Find the largest timestamp that might appear in the
+    // range
+    let last_ts = match range.end {
+        Bound::Included(v) => v,
+        Bound::Excluded(v) => v - 1,
+        Bound::Unbounded => {
+            return Err(DataFusionError::Execution(
+                "missing upper time bound for gap filling".to_string(),
+            ))
+        }
+    };
+
+    // Call date_bin on the timestamps to find the first and last time bins
+    // for each series
+    let mut args = vec![stride, i64_to_columnar_ts(first_ts), origin];
+    let first_ts = extract_timestamp_nanos(&date_bin(&args)?)?;
+    args[1] = i64_to_columnar_ts(last_ts);
+    let last_ts = extract_timestamp_nanos(&date_bin(&args)?)?;
+
+    Ok(GapFillParams {
+        stride: extract_interval_nanos(&args[0])?,
+        first_ts,
+        last_ts,
+    })
+}
+
+fn i64_to_columnar_ts(i: i64) -> ColumnarValue {
+    ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(i), None))
+}
+
+fn extract_timestamp_nanos(cv: &ColumnarValue) -> Result<i64> {
+    Ok(match cv {
+        ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(v), _)) => *v,
+        _ => {
+            return Err(DataFusionError::Execution(
+                "gap filling argument must be a scalar timestamp".to_string(),
+            ))
+        }
+    })
+}
+
+fn extract_interval_nanos(cv: &ColumnarValue) -> Result<i64> {
+    match cv {
+        ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(v))) => {
+            let (days, ms) = IntervalDayTimeType::to_parts(*v);
+            let nanos =
+                (Duration::days(days as i64) + Duration::milliseconds(ms as i64)).num_nanoseconds();
+            nanos.ok_or_else(|| {
+                DataFusionError::Execution("gap filling argument is too large".to_string())
+            })
+        }
+        _ => Err(DataFusionError::Execution(
+            "gap filling expects a stride parameter to be a scalar interval".to_string(),
+        )),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::{
+        array::{ArrayRef, Float64Array, StringArray, TimestampNanosecondArray},
+        datatypes::{DataType, Field, Schema, TimeUnit},
+        error::Result as ArrowResult,
+        record_batch::RecordBatch,
+    };
+    use datafusion::{
+        datasource::empty::EmptyTable, error::Result, from_slice::FromSlice, sql::TableReference,
+    };
+
+    use crate::exec::{gapfill::GapFillExec, Executor, ExecutorType};
+
+    use super::GapFillParams;
+
+    fn schema() -> Schema {
+        Schema::new(vec![
+            Field::new(
+                "time",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+            Field::new(
+                "other_time",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+            Field::new("loc", DataType::Utf8, false),
+            Field::new("temp", DataType::Float64, false),
+        ])
+    }
+
+    fn record_batch() -> ArrowResult<RecordBatch> {
+        let columns: Vec<ArrayRef> = vec![
+            Arc::new(TimestampNanosecondArray::from_slice([1000])),
+            Arc::new(TimestampNanosecondArray::from_slice([2000])),
+            Arc::new(StringArray::from_slice(["kitchen"])),
+            Arc::new(Float64Array::from_slice([27.1])),
+        ];
+        RecordBatch::try_new(Arc::new(schema()), columns)
+    }
+
+    async fn plan_statement_and_get_params(sql: &str) -> Result<GapFillParams> {
+        let executor = Executor::new_testing();
+        let context = executor.new_context(ExecutorType::Query);
+        context.inner().register_table(
+            TableReference::Bare { table: "t" },
+            Arc::new(EmptyTable::new(Arc::new(schema()))),
+        )?;
+        let physical_plan = context.prepare_sql(sql).await?;
+        let gapfill_node = &physical_plan.children()[0];
+        let gapfill_node = gapfill_node.as_any().downcast_ref::<GapFillExec>().unwrap();
+        let exec_params = &gapfill_node.params;
+        super::evaluate_params(&record_batch()?, exec_params)
+    }
+
+    #[tokio::test]
+    async fn test_evaluate_params() -> Result<()> {
+        test_helpers::maybe_start_logging();
+        let actual = plan_statement_and_get_params(
+                "select\
+               \n    date_bin_gapfill(interval '1 minute', time, timestamp '1970-01-01T00:00:00Z') minute\
+               \nfrom t\
+               \nwhere time >= timestamp '1984-01-01T16:00:00Z' - interval '5 minutes'\
+               \n    and time <= timestamp '1984-01-01T16:00:00Z'\
+               \ngroup by minute",
+            ).await?;
+        let expected = GapFillParams {
+            stride: 60_000_000_000,            // 1 minute
+            first_ts: 441_820_500_000_000_000, // Sunday, January 1, 1984 3:55:00 PM
+            last_ts: 441_820_800_000_000_000,  // Sunday, January 1, 1984 3:59:00 PM
+        };
+        assert_eq!(expected, actual);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_evaluate_params_exclude_end() -> Result<()> {
+        test_helpers::maybe_start_logging();
+        let actual = plan_statement_and_get_params(
+                "select\
+               \n    date_bin_gapfill(interval '1 minute', time, timestamp '1970-01-01T00:00:00Z') minute\
+               \nfrom t\
+               \nwhere time >= timestamp '1984-01-01T16:00:00Z' - interval '5 minutes'\
+               \n    and time < timestamp '1984-01-01T16:00:00Z'\
+               \ngroup by minute",
+            ).await?;
+        let expected = GapFillParams {
+            stride: 60_000_000_000,            // 1 minute
+            first_ts: 441_820_500_000_000_000, // Sunday, January 1, 1984 3:55:00 PM
+            // Last bin at 16:00 is excluded
+            last_ts: 441_820_740_000_000_000, // Sunday, January 1, 1984 3:59:00 PM
+        };
+        assert_eq!(expected, actual);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_evaluate_params_exclude_start() -> Result<()> {
+        test_helpers::maybe_start_logging();
+        let actual = plan_statement_and_get_params(
+                "select\
+               \n    date_bin_gapfill(interval '1 minute', time, timestamp '1970-01-01T00:00:00Z') minute\
+               \nfrom t\
+               \nwhere time > timestamp '1984-01-01T16:00:00Z' - interval '5 minutes'\
+               \n    and time <= timestamp '1984-01-01T16:00:00Z'\
+               \ngroup by minute",
+            ).await?;
+        let expected = GapFillParams {
+            stride: 60_000_000_000, // 1 minute
+            // First bin not exluded since it truncates to 15:55:00
+            first_ts: 441_820_500_000_000_000, // Sunday, January 1, 1984 3:55:00 PM
+            last_ts: 441_820_800_000_000_000,  // Sunday, January 1, 1984 3:59:00 PM
+        };
+        assert_eq!(expected, actual);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_evaluate_params_origin() -> Result<()> {
+        test_helpers::maybe_start_logging();
+        let actual = plan_statement_and_get_params(
+            // origin is 9s after the epoch
+                "select\
+               \n    date_bin_gapfill(interval '1 minute', time, timestamp '1970-01-01T00:00:09Z') minute\
+               \nfrom t\
+               \nwhere time >= timestamp '1984-01-01T16:00:00Z' - interval '5 minutes'\
+               \n    and time <= timestamp '1984-01-01T16:00:00Z'\
+               \ngroup by minute",
+            ).await?;
+        let expected = GapFillParams {
+            stride: 60_000_000_000,            // 1 minute
+            first_ts: 441_820_449_000_000_000, // Sunday, January 1, 1984 3:54:09 PM
+            last_ts: 441_820_749_000_000_000,  // Sunday, January 1, 1984 3:59:09 PM
+        };
+        assert_eq!(expected, actual);
+        Ok(())
+    }
+}
diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs
index dc488d37e8..9d21ce8e34 100644
--- a/iox_query/src/logical_optimizer/handle_gapfill.rs
+++ b/iox_query/src/logical_optimizer/handle_gapfill.rs
@@ -9,6 +9,7 @@ use datafusion::{
     logical_expr::{
         expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecursion},
         expr_visitor::{ExprVisitable, ExpressionVisitor, Recursion},
+        utils::expr_to_columns,
         Aggregate, BuiltinScalarFunction, Extension, LogicalPlan,
     },
     optimizer::{optimizer::ApplyOrder, OptimizerConfig, OptimizerRule},
@@ -16,6 +17,7 @@ use datafusion::{
 };
 use query_functions::gapfill::DATE_BIN_GAPFILL_UDF_NAME;
 use std::{
+    collections::HashSet,
     ops::{Bound, Range},
     sync::Arc,
 };
@@ -112,24 +114,6 @@ fn handle_aggregate(aggr: &Aggregate) -> Result<Option<LogicalPlan>> {
         return Ok(None);
     };
 
-    if date_bin_gapfill_args.len() != 3 {
-        return Err(DataFusionError::Plan(format!(
-            "DATE_BIN_GAPFILL expects 3 arguments, got {}",
-            date_bin_gapfill_args.len()
-        )));
-    }
-
-    let time_col = match &date_bin_gapfill_args[1] {
-        Expr::Column(c) => c,
-        _ => {
-            return Err(DataFusionError::Plan(
-                "DATE_BIN_GAPFILL requires a column as the source argument".to_string(),
-            ))
-        }
-    };
-    let time_range = range_predicate::find_time_range(input, time_col)?;
-    validate_time_range(&time_range)?;
-
     let new_aggr_plan = {
         // Create the aggregate node with the same output schema as the orignal
         // one. This means that there will be an output column called `date_bin_gapfill(...)`
@@ -146,49 +130,105 @@ fn handle_aggregate(aggr: &Aggregate) -> Result<Option<LogicalPlan>> {
         new_aggr_plan
     };
 
-    let new_gap_fill_plan = {
-        let mut new_group_expr: Vec<_> = new_aggr_plan
-            .schema()
-            .fields()
-            .iter()
-            .map(|f| Expr::Column(f.qualified_column()))
-            .collect();
-        let aggr_expr = new_group_expr.split_off(group_expr.len());
-        let time_column =
-            col(new_aggr_plan.schema().fields()[date_bin_gapfill_index].qualified_column());
-        let stride = date_bin_gapfill_args
-            .into_iter()
-            .next()
-            .expect("there are three args");
-        LogicalPlan::Extension(Extension {
-            node: Arc::new(GapFill::try_new(
-                Arc::new(new_aggr_plan),
-                new_group_expr,
-                aggr_expr,
-                GapFillParams {
-                    stride,
-                    time_column,
-                    time_range,
-                },
-            )?),
-        })
-    };
+    let new_gap_fill_plan =
+        build_gapfill_node(new_aggr_plan, date_bin_gapfill_index, date_bin_gapfill_args)?;
     Ok(Some(new_gap_fill_plan))
 }
 
+fn build_gapfill_node(
+    new_aggr_plan: LogicalPlan,
+    date_bin_gapfill_index: usize,
+    date_bin_gapfill_args: Vec<Expr>,
+) -> Result<LogicalPlan> {
+    if date_bin_gapfill_args.len() != 3 {
+        return Err(DataFusionError::Plan(format!(
+            "DATE_BIN_GAPFILL expects 3 arguments, got {}",
+            date_bin_gapfill_args.len()
+        )));
+    }
+
+    let mut args_iter = date_bin_gapfill_args.into_iter();
+
+    // Ensure that stride argument is a scalar
+    let stride = args_iter.next().unwrap();
+    validate_scalar_expr("stride argument to DATE_BIN_GAPFILL", &stride)?;
+
+    // Ensure that the source argument is a column
+    let time_col = args_iter.next().unwrap().try_into_col().map_err(|_| {
+        DataFusionError::Plan(
+            "DATE_BIN_GAPFILL requires a column as the source argument".to_string(),
+        )
+    })?;
+
+    // Ensure that a time range was specified and is valid for gap filling
+    let time_range = range_predicate::find_time_range(new_aggr_plan.inputs()[0], &time_col)?;
+    validate_time_range(&time_range)?;
+
+    // Ensure that origin argument is a scalar
+    let origin = args_iter.next().unwrap();
+    validate_scalar_expr("origin argument to DATE_BIN_GAPFILL", &origin)?;
+
+    // Make sure the time output to the gapfill node matches what the
+    // aggregate output was.
+    let time_column =
+        col(new_aggr_plan.schema().fields()[date_bin_gapfill_index].qualified_column());
+
+    let aggr = Aggregate::try_from_plan(&new_aggr_plan)?;
+    let mut new_group_expr: Vec<_> = aggr
+        .schema
+        .fields()
+        .iter()
+        .map(|f| Expr::Column(f.qualified_column()))
+        .collect();
+    let aggr_expr = new_group_expr.split_off(aggr.group_expr.len());
+
+    Ok(LogicalPlan::Extension(Extension {
+        node: Arc::new(GapFill::try_new(
+            Arc::new(new_aggr_plan),
+            new_group_expr,
+            aggr_expr,
+            GapFillParams {
+                stride,
+                time_column,
+                origin,
+                time_range,
+            },
+        )?),
+    }))
+}
+
 fn validate_time_range(range: &Range<Bound<Expr>>) -> Result<()> {
     let Range { ref start, ref end } = range;
-    match (start, end) {
-        (Bound::Unbounded, Bound::Unbounded) => Err(DataFusionError::Plan(
-            "no time bounds found for gap fill query".to_string(),
-        )),
+    let (start, end) = match (start, end) {
+        (Bound::Unbounded, Bound::Unbounded) => {
+            return Err(DataFusionError::Plan(
+                "no time bounds found for gap fill query".to_string(),
+            ))
+        }
         (Bound::Unbounded, _) => Err(DataFusionError::Plan(
             "no lower time bound found for gap fill query".to_string(),
         )),
         (_, Bound::Unbounded) => Err(DataFusionError::Plan(
             "no upper time bound found for gap fill query".to_string(),
         )),
-        _ => Ok(()),
+        (
+            Bound::Included(start) | Bound::Excluded(start),
+            Bound::Included(end) | Bound::Excluded(end),
+        ) => Ok((start, end)),
+    }?;
+    validate_scalar_expr("lower time bound", start)?;
+    validate_scalar_expr("upper time bound", end)
+}
+
+fn validate_scalar_expr(what: &str, e: &Expr) -> Result<()> {
+    let mut cols = HashSet::new();
+    expr_to_columns(e, &mut cols)?;
+    if !cols.is_empty() {
+        Err(DataFusionError::Plan(format!(
+            "{what} for gap fill query must evaluate to a scalar"
+        )))
+    } else {
+        Ok(())
     }
 }
 
@@ -323,7 +363,7 @@ mod test {
     use datafusion::logical_expr::{logical_plan, LogicalPlan, LogicalPlanBuilder};
     use datafusion::optimizer::optimizer::Optimizer;
     use datafusion::optimizer::OptimizerContext;
-    use datafusion::prelude::{avg, col, lit, lit_timestamp_nano, Expr};
+    use datafusion::prelude::{avg, case, col, lit, lit_timestamp_nano, Expr};
     use datafusion::scalar::ScalarValue;
     use query_functions::gapfill::DATE_BIN_GAPFILL_UDF_NAME;
 
@@ -334,6 +374,11 @@ mod test {
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
                 false,
             ),
+            Field::new(
+                "time2",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
             Field::new("loc", DataType::Utf8, false),
             Field::new("temp", DataType::Float64, false),
         ]);
@@ -341,9 +386,13 @@ mod test {
     }
 
     fn date_bin_gapfill(interval: Expr, time: Expr) -> Result<Expr> {
+        date_bin_gapfill_with_origin(interval, time, lit_timestamp_nano(0))
+    }
+
+    fn date_bin_gapfill_with_origin(interval: Expr, time: Expr, origin: Expr) -> Result<Expr> {
         Ok(Expr::ScalarUDF {
             fun: query_functions::registry().udf(DATE_BIN_GAPFILL_UDF_NAME)?,
-            args: vec![interval, time, lit_timestamp_nano(0)],
+            args: vec![interval, time, origin],
         })
     }
 
@@ -417,7 +466,59 @@ mod test {
     }
 
     #[test]
-    fn no_time_range_err() -> Result<()> {
+    fn nonscalar_origin() -> Result<()> {
+        let plan = LogicalPlanBuilder::from(table_scan()?)
+            .filter(
+                col("time")
+                    .gt_eq(lit_timestamp_nano(1000))
+                    .and(col("time").lt(lit_timestamp_nano(2000))),
+            )?
+            .aggregate(
+                vec![date_bin_gapfill_with_origin(
+                    lit(ScalarValue::IntervalDayTime(Some(60_000))),
+                    col("time"),
+                    col("time2"),
+                )?],
+                vec![avg(col("temp"))],
+            )?
+            .build()?;
+        assert_optimizer_err(
+            &plan,
+            "Error during planning: origin argument to DATE_BIN_GAPFILL for gap fill query must evaluate to a scalar",
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn nonscalar_stride() -> Result<()> {
+        let stride = case(col("loc"))
+            .when(
+                lit("kitchen"),
+                lit(ScalarValue::IntervalDayTime(Some(60_000))),
+            )
+            .otherwise(lit(ScalarValue::IntervalDayTime(Some(30_000))))
+            .unwrap();
+
+        let plan = LogicalPlanBuilder::from(table_scan()?)
+            .filter(
+                col("time")
+                    .gt_eq(lit_timestamp_nano(1000))
+                    .and(col("time").lt(lit_timestamp_nano(2000))),
+            )?
+            .aggregate(
+                vec![date_bin_gapfill(stride, col("time"))?],
+                vec![avg(col("temp"))],
+            )?
+            .build()?;
+        assert_optimizer_err(
+            &plan,
+            "Error during planning: stride argument to DATE_BIN_GAPFILL for gap fill query must evaluate to a scalar",
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn time_range_errs() -> Result<()> {
         let cases = vec![
             (
                 lit(true),
@@ -431,6 +532,16 @@ mod test {
                 col("time").lt(lit_timestamp_nano(2000)),
                 "Error during planning: no lower time bound found for gap fill query",
             ),
+            (
+                col("time").gt_eq(col("time2")).and(
+                    col("time").lt(lit_timestamp_nano(2000))),
+                "Error during planning: lower time bound for gap fill query must evaluate to a scalar",
+            ),
+            (
+                col("time").gt_eq(lit_timestamp_nano(2000)).and(
+                    col("time").lt(col("time2"))),
+                "Error during planning: upper time bound for gap fill query must evaluate to a scalar",
+            )
         ];
         for c in cases {
             let plan = LogicalPlanBuilder::from(table_scan()?)
diff --git a/test_helpers_end_to_end/src/snapshot_comparison.rs b/test_helpers_end_to_end/src/snapshot_comparison.rs
index e2bf769db8..118b33db54 100644
--- a/test_helpers_end_to_end/src/snapshot_comparison.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison.rs
@@ -3,7 +3,6 @@ mod queries;
 
 use crate::snapshot_comparison::queries::TestQueries;
 use crate::{run_influxql, run_sql, MiniCluster};
-use arrow_util::{display::pretty_format_batches, test_util::sort_record_batch};
 use snafu::{OptionExt, ResultExt, Snafu};
 use std::fmt::{Display, Formatter};
 use std::{
@@ -11,7 +10,6 @@ use std::{
     path::{Path, PathBuf},
 };
 
-use self::normalization::normalize_results;
 use self::queries::Query;
 
 #[derive(Debug, Snafu)]
@@ -98,19 +96,7 @@ pub async fn run(
 
     for q in queries.iter() {
         output.push(format!("-- {}: {}", language, q.text()));
-        if q.sorted_compare() {
-            output.push("-- Results After Sorting".into())
-        }
-        if q.normalized_uuids() {
-            output.push("-- Results After Normalizing UUIDs".into())
-        }
-        if q.normalized_metrics() {
-            output.push("-- Results After Normalizing Metrics".into())
-        }
-        if q.normalized_filters() {
-            output.push("-- Results After Normalizing Filters".into())
-        }
-
+        q.add_description(&mut output);
         let results = run_query(cluster, q, language).await?;
         output.extend(results);
     }
@@ -233,7 +219,7 @@ async fn run_query(
 ) -> Result<Vec<String>> {
     let query_text = query.text();
 
-    let mut results = match language {
+    let results = match language {
         Language::Sql => {
             run_sql(
                 query_text,
@@ -252,22 +238,5 @@ async fn run_query(
         }
     };
 
-    // compare against sorted results, if requested
-    if query.sorted_compare() && !results.is_empty() {
-        let schema = results[0].schema();
-        let batch =
-            arrow::compute::concat_batches(&schema, &results).expect("concatenating batches");
-        results = vec![sort_record_batch(batch)];
-    }
-
-    let current_results = pretty_format_batches(&results)
-        .unwrap()
-        .trim()
-        .lines()
-        .map(|s| s.to_string())
-        .collect::<Vec<_>>();
-
-    let current_results = normalize_results(query, current_results);
-
-    Ok(current_results)
+    Ok(query.normalize_results(results))
 }
diff --git a/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs b/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
index 39ac696c58..8a01d777ab 100644
--- a/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison/normalization.rs
@@ -1,9 +1,28 @@
-use crate::snapshot_comparison::queries::Query;
+use arrow::record_batch::RecordBatch;
+use arrow_util::{display::pretty_format_batches, test_util::sort_record_batch};
 use once_cell::sync::Lazy;
 use regex::{Captures, Regex};
 use std::{borrow::Cow, collections::HashMap};
 use uuid::Uuid;
 
+/// Match the parquet UUID
+///
+/// For example, given
+/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
+///
+/// matches `1d325760-2b20-48de-ab48-2267b034133d`
+static REGEX_UUID: Lazy<Regex> = Lazy::new(|| {
+    Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").expect("UUID regex")
+});
+
+/// Match the parquet directory names
+/// For example, given
+/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
+///
+/// matches `32/51/216/13452`
+static REGEX_DIRS: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex"));
+
 /// Replace table row separators of flexible width with fixed with. This is required
 /// because the original timing values may differ in "printed width", so the table
 /// cells have different widths and hence the separators / borders. E.g.:
@@ -22,93 +41,159 @@ static REGEX_LINESEP: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[+-]{6,}"#).expec
 ///   `         |` -> `    |`
 static REGEX_COL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\s+\|"#).expect("col regex"));
 
+/// Matches line like `metrics=[foo=1, bar=2]`
+static REGEX_METRICS: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex"));
+
+/// Matches things like  `1s`, `1.2ms` and `10.2μs`
+static REGEX_TIMING: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex"));
+
+/// Matches things like `FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000`
+static REGEX_FILTER: Lazy<Regex> =
+    Lazy::new(|| Regex::new("FilterExec: .*").expect("filter regex"));
+
 fn normalize_for_variable_width(s: Cow<str>) -> String {
     let s = REGEX_LINESEP.replace_all(&s, "----------");
     REGEX_COL.replace_all(&s, "    |").to_string()
 }
 
-pub(crate) fn normalize_results(query: &Query, mut current_results: Vec<String>) -> Vec<String> {
-    // normalize UUIDs, if requested
-    if query.normalized_uuids() {
-        let regex_uuid = Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
-            .expect("UUID regex");
-        let regex_dirs = Regex::new(r#"[0-9]+/[0-9]+/[0-9]+/[0-9]+"#).expect("directory regex");
+/// A query to run with optional annotations
+#[derive(Debug, PartialEq, Eq, Default)]
+pub struct Normalizer {
+    /// If true, results are sorted first
+    pub sorted_compare: bool,
 
-        let mut seen: HashMap<String, u128> = HashMap::new();
-        current_results = current_results
-            .into_iter()
-            .map(|s| {
-                let s = regex_uuid.replace_all(&s, |s: &Captures| {
-                    let next = seen.len() as u128;
-                    Uuid::from_u128(
-                        *seen
-                            .entry(s.get(0).unwrap().as_str().to_owned())
-                            .or_insert(next),
-                    )
-                    .to_string()
-                });
+    /// If true, replace UUIDs with static placeholders.
+    pub normalized_uuids: bool,
 
-                let s = normalize_for_variable_width(s);
+    /// If true, normalize timings in queries by replacing them with
+    /// static placeholders, for example:
+    ///
+    /// `1s`     -> `1.234ms`
+    pub normalized_metrics: bool,
 
-                regex_dirs.replace_all(&s, "1/1/1/1").to_string()
-            })
-            .collect();
-    }
-
-    // normalize metrics, if requested
-    if query.normalized_metrics() {
-        // Parse regex once and apply to all rows. See description around the `replace...` calls on
-        // why/how the regexes are used.
-        let regex_metrics = Regex::new(r#"metrics=\[([^\]]*)\]"#).expect("metrics regex");
-        let regex_timing = Regex::new(r#"[0-9]+(\.[0-9]+)?.s"#).expect("timing regex");
-
-        current_results = current_results
-            .into_iter()
-            .map(|s| {
-                // Replace timings with fixed value, e.g.:
-                //
-                //   `1s`     -> `1.234ms`
-                //   `1.2ms`  -> `1.234ms`
-                //   `10.2μs` -> `1.234ms`
-                let s = regex_timing.replace_all(&s, "1.234ms");
-
-                let s = normalize_for_variable_width(s);
-
-                // Metrics are currently ordered by value (not by key), so different timings may
-                // reorder them. We "parse" the list and normalize the sorting. E.g.:
-                //
-                // `metrics=[]`             => `metrics=[]`
-                // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
-                // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
-                regex_metrics
-                    .replace_all(&s, |c: &Captures| {
-                        let mut metrics: Vec<_> = c[1].split(", ").collect();
-                        metrics.sort();
-                        format!("metrics=[{}]", metrics.join(", "))
-                    })
-                    .to_string()
-            })
-            .collect();
-    }
-
-    // normalize Filters, if requested
-    //
-    // Converts:
-    // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
-    //
-    // to
-    // FilterExec: <REDACTED>
-    if query.normalized_filters() {
-        let filter_regex = Regex::new("FilterExec: .*").expect("filter regex");
-        current_results = current_results
-            .into_iter()
-            .map(|s| {
-                filter_regex
-                    .replace_all(&s, |_: &Captures| "FilterExec: <REDACTED>")
-                    .to_string()
-            })
-            .collect();
-    }
-
-    current_results
+    /// if true, normalize filter predicates for explain plans
+    /// `FilterExec: <REDACTED>`
+    pub normalized_filters: bool,
+}
+
+impl Normalizer {
+    #[cfg(test)]
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Take the output of running the query and apply the specified normalizations to them
+    pub fn normalize_results(&self, mut results: Vec<RecordBatch>) -> Vec<String> {
+        // compare against sorted results, if requested
+        if self.sorted_compare && !results.is_empty() {
+            let schema = results[0].schema();
+            let batch =
+                arrow::compute::concat_batches(&schema, &results).expect("concatenating batches");
+            results = vec![sort_record_batch(batch)];
+        }
+
+        let mut current_results = pretty_format_batches(&results)
+            .unwrap()
+            .trim()
+            .lines()
+            .map(|s| s.to_string())
+            .collect::<Vec<_>>();
+
+        // normalize UUIDs, if requested
+        if self.normalized_uuids {
+            let mut seen: HashMap<String, u128> = HashMap::new();
+            current_results = current_results
+                .into_iter()
+                .map(|s| {
+                    // Rewrite  parquet directory names like
+                    // `51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
+                    //
+                    // to:
+                    // 1/1/1/1/00000000-0000-0000-0000-000000000000.parquet
+
+                    let s = REGEX_UUID.replace_all(&s, |s: &Captures| {
+                        let next = seen.len() as u128;
+                        Uuid::from_u128(
+                            *seen
+                                .entry(s.get(0).unwrap().as_str().to_owned())
+                                .or_insert(next),
+                        )
+                        .to_string()
+                    });
+
+                    let s = normalize_for_variable_width(s);
+                    REGEX_DIRS.replace_all(&s, "1/1/1/1").to_string()
+                })
+                .collect();
+        }
+
+        // normalize metrics, if requested
+        if self.normalized_metrics {
+            current_results = current_results
+                .into_iter()
+                .map(|s| {
+                    // Replace timings with fixed value, e.g.:
+                    //
+                    //   `1s`     -> `1.234ms`
+                    //   `1.2ms`  -> `1.234ms`
+                    //   `10.2μs` -> `1.234ms`
+                    let s = REGEX_TIMING.replace_all(&s, "1.234ms");
+
+                    let s = normalize_for_variable_width(s);
+
+                    // Metrics are currently ordered by value (not by key), so different timings may
+                    // reorder them. We "parse" the list and normalize the sorting. E.g.:
+                    //
+                    // `metrics=[]`             => `metrics=[]`
+                    // `metrics=[foo=1, bar=2]` => `metrics=[bar=2, foo=1]`
+                    // `metrics=[foo=2, bar=1]` => `metrics=[bar=1, foo=2]`
+                    REGEX_METRICS
+                        .replace_all(&s, |c: &Captures| {
+                            let mut metrics: Vec<_> = c[1].split(", ").collect();
+                            metrics.sort();
+                            format!("metrics=[{}]", metrics.join(", "))
+                        })
+                        .to_string()
+                })
+                .collect();
+        }
+
+        // normalize Filters, if requested
+        //
+        // Converts:
+        // FilterExec: time@2 < -9223372036854775808 OR time@2 > 1640995204240217000
+        //
+        // to
+        // FilterExec: <REDACTED>
+        if self.normalized_filters {
+            current_results = current_results
+                .into_iter()
+                .map(|s| {
+                    REGEX_FILTER
+                        .replace_all(&s, |_: &Captures| "FilterExec: <REDACTED>")
+                        .to_string()
+                })
+                .collect();
+        }
+
+        current_results
+    }
+
+    /// Adds information on what normalizations were applied to the input
+    pub fn add_description(&self, output: &mut Vec<String>) {
+        if self.sorted_compare {
+            output.push("-- Results After Sorting".into())
+        }
+        if self.normalized_uuids {
+            output.push("-- Results After Normalizing UUIDs".into())
+        }
+        if self.normalized_metrics {
+            output.push("-- Results After Normalizing Metrics".into())
+        }
+        if self.normalized_filters {
+            output.push("-- Results After Normalizing Filters".into())
+        }
+    }
 }
diff --git a/test_helpers_end_to_end/src/snapshot_comparison/queries.rs b/test_helpers_end_to_end/src/snapshot_comparison/queries.rs
index 3c76195aba..70a5f8da4c 100644
--- a/test_helpers_end_to_end/src/snapshot_comparison/queries.rs
+++ b/test_helpers_end_to_end/src/snapshot_comparison/queries.rs
@@ -1,22 +1,12 @@
+use arrow::record_batch::RecordBatch;
+
+use super::normalization::Normalizer;
+
 /// A query to run with optional annotations
 #[derive(Debug, PartialEq, Eq, Default)]
 pub struct Query {
-    /// If true, results are sorted first prior to comparison, meaning that differences in the
-    /// output order compared with expected order do not cause a diff
-    sorted_compare: bool,
-
-    /// If true, replace UUIDs with static placeholders.
-    normalized_uuids: bool,
-
-    /// If true, normalize timings in queries by replacing them with
-    /// static placeholders, for example:
-    ///
-    /// `1s`     -> `1.234ms`
-    normalized_metrics: bool,
-
-    /// if true, normalize filter predicates for explain plans
-    /// `FilterExec: <REDACTED>`
-    normalized_filters: bool,
+    /// Describes how query text should be normalized
+    normalizer: Normalizer,
 
     /// The query string
     text: String,
@@ -27,49 +17,49 @@ impl Query {
     fn new(text: impl Into<String>) -> Self {
         let text = text.into();
         Self {
-            sorted_compare: false,
-            normalized_uuids: false,
-            normalized_metrics: false,
-            normalized_filters: false,
+            normalizer: Normalizer::new(),
             text,
         }
     }
 
-    #[cfg(test)]
-    fn with_sorted_compare(mut self) -> Self {
-        self.sorted_compare = true;
+    pub fn text(&self) -> &str {
+        &self.text
+    }
+
+    pub fn with_sorted_compare(mut self) -> Self {
+        self.normalizer.sorted_compare = true;
         self
     }
 
-    /// Get a reference to the query text.
-    pub fn text(&self) -> &str {
-        self.text.as_ref()
+    pub fn with_normalized_uuids(mut self) -> Self {
+        self.normalizer.normalized_uuids = true;
+        self
     }
 
-    /// Get the query's sorted compare.
-    pub fn sorted_compare(&self) -> bool {
-        self.sorted_compare
+    pub fn with_normalize_metrics(mut self) -> Self {
+        self.normalizer.normalized_metrics = true;
+        self
     }
 
-    /// Get queries normalized UUID
-    pub fn normalized_uuids(&self) -> bool {
-        self.normalized_uuids
+    pub fn with_normalize_filters(mut self) -> Self {
+        self.normalizer.normalized_filters = true;
+        self
     }
 
-    /// Use normalized timing values
-    pub fn normalized_metrics(&self) -> bool {
-        self.normalized_metrics
+    /// Take the output of running the query and apply the specified normalizations to them
+    pub fn normalize_results(&self, results: Vec<RecordBatch>) -> Vec<String> {
+        self.normalizer.normalize_results(results)
     }
 
-    /// Use normalized filter plans
-    pub fn normalized_filters(&self) -> bool {
-        self.normalized_filters
+    /// Adds information on what normalizations were applied to the input
+    pub fn add_description(&self, output: &mut Vec<String>) {
+        self.normalizer.add_description(output)
     }
 }
 
 #[derive(Debug, Default)]
 struct QueryBuilder {
-    query: Query,
+    pub query: Query,
 }
 
 impl QueryBuilder {
@@ -85,22 +75,6 @@ impl QueryBuilder {
         self.query.text.push(c)
     }
 
-    fn sorted_compare(&mut self) {
-        self.query.sorted_compare = true;
-    }
-
-    fn normalized_uuids(&mut self) {
-        self.query.normalized_uuids = true;
-    }
-
-    fn normalize_metrics(&mut self) {
-        self.query.normalized_metrics = true;
-    }
-
-    fn normalize_filters(&mut self) {
-        self.query.normalized_filters = true;
-    }
-
     fn is_empty(&self) -> bool {
         self.query.text.is_empty()
     }
@@ -125,54 +99,57 @@ impl TestQueries {
         S: AsRef<str>,
     {
         let mut queries = vec![];
-        let mut builder = QueryBuilder::new();
 
-        lines.into_iter().for_each(|line| {
-            let line = line.as_ref().trim();
-            const COMPARE_STR: &str = "-- IOX_COMPARE: ";
-            if line.starts_with(COMPARE_STR) {
-                let (_, options) = line.split_at(COMPARE_STR.len());
-                for option in options.split(',') {
-                    let option = option.trim();
-                    match option {
-                        "sorted" => {
-                            builder.sorted_compare();
+        let mut builder = lines
+            .into_iter()
+            .fold(QueryBuilder::new(), |mut builder, line| {
+                let line = line.as_ref().trim();
+                const COMPARE_STR: &str = "-- IOX_COMPARE: ";
+                if line.starts_with(COMPARE_STR) {
+                    let (_, options) = line.split_at(COMPARE_STR.len());
+                    for option in options.split(',') {
+                        let option = option.trim();
+                        match option {
+                            "sorted" => {
+                                builder.query = builder.query.with_sorted_compare();
+                            }
+                            "uuid" => {
+                                builder.query = builder.query.with_normalized_uuids();
+                            }
+                            "metrics" => {
+                                builder.query = builder.query.with_normalize_metrics();
+                            }
+                            "filters" => {
+                                builder.query = builder.query.with_normalize_filters();
+                            }
+                            _ => {}
                         }
-                        "uuid" => {
-                            builder.normalized_uuids();
-                        }
-                        "metrics" => {
-                            builder.normalize_metrics();
-                        }
-                        "filters" => {
-                            builder.normalize_filters();
-                        }
-                        _ => {}
                     }
                 }
-            }
 
-            if line.starts_with("--") {
-                return;
-            }
-            if line.is_empty() {
-                return;
-            }
-
-            // replace newlines
-            if !builder.is_empty() {
-                builder.push(' ');
-            }
-            builder.push_str(line);
-
-            // declare queries when we see a semicolon at the end of the line
-            if line.ends_with(';') {
-                if let Some(q) = builder.build_and_reset() {
-                    queries.push(q);
+                if line.starts_with("--") {
+                    return builder;
+                }
+                if line.is_empty() {
+                    return builder;
                 }
-            }
-        });
 
+                // replace newlines
+                if !builder.is_empty() {
+                    builder.push(' ');
+                }
+                builder.push_str(line);
+
+                // declare queries when we see a semicolon at the end of the line
+                if line.ends_with(';') {
+                    if let Some(q) = builder.build_and_reset() {
+                        queries.push(q);
+                    }
+                }
+                builder
+            });
+
+        // get last one, if any
         if let Some(q) = builder.build_and_reset() {
             queries.push(q);
         }
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index ad6f645019..20c32270f1 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -37,6 +37,7 @@ flatbuffers = { version = "23", features = ["std"] }
 flate2 = { version = "1", features = ["miniz_oxide", "rust_backend"] }
 futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] }
 futures-core = { version = "0.3", features = ["alloc", "std"] }
+futures-executor = { version = "0.3", features = ["std"] }
 futures-io = { version = "0.3", features = ["std"] }
 futures-sink = { version = "0.3", features = ["alloc", "std"] }
 futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] }
@@ -74,8 +75,8 @@ serde_json = { version = "1", features = ["raw_value", "std"] }
 sha2 = { version = "0.10", features = ["std"] }
 similar = { version = "2", features = ["inline", "text"] }
 smallvec = { version = "1", default-features = false, features = ["union"] }
-sqlx = { version = "0.6", features = ["_rt-tokio", "json", "macros", "migrate", "postgres", "runtime-tokio-rustls", "sqlx-macros", "tls", "uuid"] }
-sqlx-core = { version = "0.6", default-features = false, features = ["_rt-tokio", "_tls-rustls", "any", "base64", "crc", "dirs", "hkdf", "hmac", "json", "md-5", "migrate", "postgres", "rand", "runtime-tokio-rustls", "rustls", "rustls-pemfile", "serde", "serde_json", "sha1", "sha2", "tokio-stream", "uuid", "webpki-roots", "whoami"] }
+sqlx = { version = "0.6", features = ["_rt-tokio", "json", "macros", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "sqlx-macros", "tls", "uuid"] }
+sqlx-core = { version = "0.6", default-features = false, features = ["_rt-tokio", "_tls-rustls", "any", "base64", "crc", "dirs", "flume", "futures-executor", "hkdf", "hmac", "json", "libsqlite3-sys", "md-5", "migrate", "postgres", "rand", "runtime-tokio-rustls", "rustls", "rustls-pemfile", "serde", "serde_json", "sha1", "sha2", "sqlite", "tokio-stream", "uuid", "webpki-roots", "whoami"] }
 thrift = { version = "0.17", features = ["log", "server", "threadpool"] }
 tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "parking_lot", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros", "tracing"] }
 tokio-stream = { version = "0.1", features = ["fs", "net", "time"] }
@@ -107,6 +108,7 @@ either = { version = "1", features = ["use_std"] }
 fixedbitset = { version = "0.4", features = ["std"] }
 futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] }
 futures-core = { version = "0.3", features = ["alloc", "std"] }
+futures-executor = { version = "0.3", features = ["std"] }
 futures-io = { version = "0.3", features = ["std"] }
 futures-sink = { version = "0.3", features = ["alloc", "std"] }
 futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] }
@@ -137,8 +139,8 @@ serde = { version = "1", features = ["derive", "rc", "serde_derive", "std"] }
 serde_json = { version = "1", features = ["raw_value", "std"] }
 sha2 = { version = "0.10", features = ["std"] }
 smallvec = { version = "1", default-features = false, features = ["union"] }
-sqlx-core = { version = "0.6", default-features = false, features = ["_rt-tokio", "_tls-rustls", "any", "base64", "crc", "dirs", "hkdf", "hmac", "json", "md-5", "migrate", "postgres", "rand", "runtime-tokio-rustls", "rustls", "rustls-pemfile", "serde", "serde_json", "sha1", "sha2", "tokio-stream", "uuid", "webpki-roots", "whoami"] }
-sqlx-macros = { version = "0.6", default-features = false, features = ["_rt-tokio", "json", "migrate", "postgres", "runtime-tokio-rustls", "serde_json", "sha2", "uuid"] }
+sqlx-core = { version = "0.6", default-features = false, features = ["_rt-tokio", "_tls-rustls", "any", "base64", "crc", "dirs", "flume", "futures-executor", "hkdf", "hmac", "json", "libsqlite3-sys", "md-5", "migrate", "postgres", "rand", "runtime-tokio-rustls", "rustls", "rustls-pemfile", "serde", "serde_json", "sha1", "sha2", "sqlite", "tokio-stream", "uuid", "webpki-roots", "whoami"] }
+sqlx-macros = { version = "0.6", default-features = false, features = ["_rt-tokio", "json", "migrate", "postgres", "runtime-tokio-rustls", "serde_json", "sha2", "sqlite", "uuid"] }
 syn = { version = "1", features = ["clone-impls", "derive", "extra-traits", "full", "parsing", "printing", "proc-macro", "quote", "visit", "visit-mut"] }
 tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "parking_lot", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros", "tracing"] }
 tokio-stream = { version = "0.1", features = ["fs", "net", "time"] }