From b3ee1032b363cbe323f3229e6cf784d656842fb5 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Mon, 17 Jan 2022 14:09:10 -0500
Subject: [PATCH 01/32] feat: add memory based catalog

Adds a memory based catalog, useful for testing purposes.
Separates getting the namespace schema from the namespace and moves the schema code out interface out of postgres.
---
 iox_catalog/src/interface.rs |  82 ++++++++++--
 iox_catalog/src/lib.rs       |   1 +
 iox_catalog/src/mem.rs       | 248 +++++++++++++++++++++++++++++++++++
 iox_catalog/src/postgres.rs  |  65 ++-------
 4 files changed, 333 insertions(+), 63 deletions(-)
 create mode 100644 iox_catalog/src/mem.rs
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index e3e122fe0f..7da8ecce29 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -2,7 +2,7 @@
 
 use async_trait::async_trait;
 use influxdb_line_protocol::FieldValue;
-use snafu::Snafu;
+use snafu::{OptionExt, Snafu};
 use std::collections::BTreeMap;
 use std::convert::TryFrom;
 use std::fmt::Formatter;
@@ -33,6 +33,9 @@ pub enum Error {
         name
     ))]
     UnknownColumnType { data_type: i16, name: String },
+
+    #[snafu(display("namespace {} not found", name))]
+    NamespaceNotFound { name: String },
 }
 
 /// A specialized `Error` for Catalog errors
@@ -72,18 +75,18 @@ pub trait QueryPoolRepo {
 /// Functions for working with namespaces in the catalog
 #[async_trait]
 pub trait NamespaceRepo {
-    /// Creates the namespace in the catalog, or get the existing record by name. Then
-    /// constructs a namespace schema with all tables and columns under the namespace.
+    /// Creates the namespace in the catalog. If one by the same name already exists, an
+    /// error is returned.
     async fn create(
         &self,
         name: &str,
         retention_duration: &str,
         kafka_topic_id: i32,
         query_pool_id: i16,
-    ) -> Result<NamespaceSchema>;
+    ) -> Result<Namespace>;
 
-    /// Gets the namespace schema including all tables and columns.
-    async fn get_by_name(&self, name: &str) -> Result<Option<NamespaceSchema>>;
+    /// Gets the namespace by its unique name.
+    async fn get_by_name(&self, name: &str) -> Result<Option<Namespace>>;
 }
 
 /// Functions for working with tables in the catalog
@@ -124,7 +127,7 @@ pub trait SequencerRepo {
 }
 
 /// Data object for a kafka topic
-#[derive(Debug, Eq, PartialEq, sqlx::FromRow)]
+#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct KafkaTopic {
     /// The id of the topic
     pub id: i32,
@@ -133,7 +136,7 @@ pub struct KafkaTopic {
 }
 
 /// Data object for a query pool
-#[derive(Debug, Eq, PartialEq, sqlx::FromRow)]
+#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct QueryPool {
     /// The id of the pool
     pub id: i16,
@@ -142,7 +145,7 @@ pub struct QueryPool {
 }
 
 /// Data object for a namespace
-#[derive(Debug, sqlx::FromRow)]
+#[derive(Debug, Clone, sqlx::FromRow)]
 pub struct Namespace {
     /// The id of the namespace
     pub id: i32,
@@ -181,6 +184,63 @@ impl NamespaceSchema {
         }
     }
 
+    /// Gets the namespace schema including all tables and columns.
+    pub async fn get_by_name<T: RepoCollection + Send + Sync>(
+        name: &str,
+        repo: &T,
+    ) -> Result<Option<Self>> {
+        let namespace_repo = repo.namespace();
+        let table_repo = repo.table();
+        let column_repo = repo.column();
+
+        let namespace = namespace_repo
+            .get_by_name(name)
+            .await?
+            .context(NamespaceNotFoundSnafu { name })?;
+
+        // get the columns first just in case someone else is creating schema while we're doing this.
+        let columns = column_repo.list_by_namespace_id(namespace.id).await?;
+        let tables = table_repo.list_by_namespace_id(namespace.id).await?;
+
+        let mut namespace = Self::new(
+            namespace.id,
+            namespace.kafka_topic_id,
+            namespace.query_pool_id,
+        );
+
+        let mut table_id_to_schema = BTreeMap::new();
+        for t in tables {
+            table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id)));
+        }
+
+        for c in columns {
+            let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
+            match ColumnType::try_from(c.column_type) {
+                Ok(column_type) => {
+                    t.columns.insert(
+                        c.name,
+                        ColumnSchema {
+                            id: c.id,
+                            column_type,
+                        },
+                    );
+                }
+                _ => {
+                    return Err(Error::UnknownColumnType {
+                        data_type: c.column_type,
+                        name: c.name.to_string(),
+                    });
+                }
+            }
+        }
+
+        for (_, (table_name, schema)) in table_id_to_schema {
+            namespace.tables.insert(table_name, schema);
+        }
+
+        Ok(Some(namespace))
+    }
+
     /// Adds tables and columns to the `NamespaceSchema`. These are created
     /// incrementally while validating the schema for a write and this helper
     /// method takes them in to add them to the schema.
@@ -215,7 +275,7 @@ impl NamespaceSchema {
 }
 
 /// Data object for a table
-#[derive(Debug, sqlx::FromRow, Eq, PartialEq)]
+#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
 pub struct Table {
     /// The id of the table
     pub id: i32,
@@ -252,7 +312,7 @@ impl TableSchema {
 }
 
 /// Data object for a column
-#[derive(Debug, sqlx::FromRow, Eq, PartialEq)]
+#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
 pub struct Column {
     /// the column id
     pub id: i32,
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 1c9c6e0654..b2dead2df3 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -25,6 +25,7 @@ const SHARED_QUERY_POOL: &str = SHARED_KAFKA_TOPIC;
 const TIME_COLUMN: &str = "time";
 
 pub mod interface;
+pub mod mem;
 pub mod postgres;
 
 /// Given the lines of a write request and an in memory schema, this will validate the write
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
new file mode 100644
index 0000000000..ed2821c426
--- /dev/null
+++ b/iox_catalog/src/mem.rs
@@ -0,0 +1,248 @@
+//! This module implements an in-memory implementation of the iox_catalog interface. It can be
+//! used for testing or for an IOx designed to run without catalog persistence.
+
+use crate::interface::{
+    Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo,
+    QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo,
+};
+use async_trait::async_trait;
+use std::convert::TryFrom;
+use std::sync::{Arc, Mutex};
+
+struct MemCatalog {
+    collections: Mutex<MemCollections>,
+}
+
+struct MemCollections {
+    kafka_topics: Vec<KafkaTopic>,
+    query_pools: Vec<QueryPool>,
+    namespaces: Vec<Namespace>,
+    tables: Vec<Table>,
+    columns: Vec<Column>,
+    sequencers: Vec<Sequencer>,
+}
+
+impl RepoCollection for Arc<MemCatalog> {
+    fn kafka_topic(&self) -> Arc<dyn KafkaTopicRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn KafkaTopicRepo + Sync + Send>
+    }
+
+    fn query_pool(&self) -> Arc<dyn QueryPoolRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn QueryPoolRepo + Sync + Send>
+    }
+
+    fn namespace(&self) -> Arc<dyn NamespaceRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn NamespaceRepo + Sync + Send>
+    }
+
+    fn table(&self) -> Arc<dyn TableRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn TableRepo + Sync + Send>
+    }
+
+    fn column(&self) -> Arc<dyn ColumnRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn ColumnRepo + Sync + Send>
+    }
+
+    fn sequencer(&self) -> Arc<dyn SequencerRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn SequencerRepo + Sync + Send>
+    }
+}
+
+#[async_trait]
+impl KafkaTopicRepo for MemCatalog {
+    async fn create_or_get(&self, name: &str) -> Result<KafkaTopic> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+
+        let topic = match collections.kafka_topics.iter().find(|t| t.name == name) {
+            Some(t) => t,
+            None => {
+                let topic = KafkaTopic {
+                    id: collections.kafka_topics.len() as i32 + 1,
+                    name: name.to_string(),
+                };
+                collections.kafka_topics.push(topic);
+                collections.kafka_topics.last().unwrap()
+            }
+        };
+
+        Ok(topic.clone())
+    }
+}
+
+#[async_trait]
+impl QueryPoolRepo for MemCatalog {
+    async fn create_or_get(&self, name: &str) -> Result<QueryPool> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+
+        let pool = match collections.query_pools.iter().find(|t| t.name == name) {
+            Some(t) => t,
+            None => {
+                let pool = QueryPool {
+                    id: collections.query_pools.len() as i16 + 1,
+                    name: name.to_string(),
+                };
+                collections.query_pools.push(pool);
+                collections.query_pools.last().unwrap()
+            }
+        };
+
+        Ok(pool.clone())
+    }
+}
+
+#[async_trait]
+impl NamespaceRepo for MemCatalog {
+    async fn create(
+        &self,
+        name: &str,
+        retention_duration: &str,
+        kafka_topic_id: i32,
+        query_pool_id: i16,
+    ) -> Result<Namespace> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+        if collections.namespaces.iter().any(|n| n.name == name) {
+            return Err(Error::NameExists {
+                name: name.to_string(),
+            });
+        }
+
+        let namespace = Namespace {
+            id: collections.namespaces.len() as i32 + 1,
+            name: name.to_string(),
+            kafka_topic_id,
+            query_pool_id,
+            retention_duration: Some(retention_duration.to_string()),
+        };
+        collections.namespaces.push(namespace);
+        Ok(collections.namespaces.last().unwrap().clone())
+    }
+
+    async fn get_by_name(&self, name: &str) -> Result<Option<Namespace>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        Ok(collections
+            .namespaces
+            .iter()
+            .find(|n| n.name == name)
+            .cloned())
+    }
+}
+
+#[async_trait]
+impl TableRepo for MemCatalog {
+    async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result<Table> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+
+        let table = match collections.tables.iter().find(|t| t.name == name) {
+            Some(t) => t,
+            None => {
+                let table = Table {
+                    id: collections.tables.len() as i32 + 1,
+                    namespace_id,
+                    name: name.to_string(),
+                };
+                collections.tables.push(table);
+                collections.tables.last().unwrap()
+            }
+        };
+
+        Ok(table.clone())
+    }
+
+    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Table>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        let tables: Vec<_> = collections
+            .tables
+            .iter()
+            .filter(|t| t.namespace_id == namespace_id)
+            .cloned()
+            .collect();
+        Ok(tables)
+    }
+}
+
+#[async_trait]
+impl ColumnRepo for MemCatalog {
+    async fn create_or_get(
+        &self,
+        name: &str,
+        table_id: i32,
+        column_type: ColumnType,
+    ) -> Result<Column> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+
+        let column = match collections.columns.iter().find(|t| t.name == name) {
+            Some(c) => {
+                if column_type as i16 != c.column_type {
+                    return Err(Error::ColumnTypeMismatch {
+                        name: name.to_string(),
+                        existing: ColumnType::try_from(c.column_type).unwrap().to_string(),
+                        new: column_type.to_string(),
+                    });
+                }
+
+                c
+            }
+            None => {
+                let column = Column {
+                    id: collections.columns.len() as i32 + 1,
+                    table_id,
+                    name: name.to_string(),
+                    column_type: column_type as i16,
+                };
+                collections.columns.push(column);
+                collections.columns.last().unwrap()
+            }
+        };
+
+        Ok(column.clone())
+    }
+
+    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Column>> {
+        let mut columns = vec![];
+
+        let collections = self.collections.lock().expect("mutex poisoned");
+        for t in collections
+            .tables
+            .iter()
+            .filter(|t| t.namespace_id == namespace_id)
+        {
+            for c in collections.columns.iter().filter(|c| c.table_id == t.id) {
+                columns.push(c.clone());
+            }
+        }
+
+        Ok(columns)
+    }
+}
+
+#[async_trait]
+impl SequencerRepo for MemCatalog {
+    async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result<Sequencer> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+
+        let sequencer = match collections
+            .sequencers
+            .iter()
+            .find(|s| s.kafka_topic_id == topic.id && s.kafka_partition == partition)
+        {
+            Some(t) => t,
+            None => {
+                let sequencer = Sequencer {
+                    id: collections.sequencers.len() as i16 + 1,
+                    kafka_topic_id: topic.id,
+                    kafka_partition: partition,
+                    min_unpersisted_sequence_number: 0,
+                };
+                collections.sequencers.push(sequencer);
+                collections.sequencers.last().unwrap()
+            }
+        };
+
+        Ok(*sequencer)
+    }
+
+    async fn list(&self) -> Result<Vec<Sequencer>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        Ok(collections.sequencers.clone())
+    }
+}
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 1bb43ca80b..dd49e27996 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -1,15 +1,12 @@
 //! A Postgres backed implementation of the Catalog
 
 use crate::interface::{
-    Column, ColumnRepo, ColumnSchema, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace,
-    NamespaceRepo, NamespaceSchema, QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer,
-    SequencerRepo, Table, TableRepo, TableSchema,
+    Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo,
+    QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo,
 };
 use async_trait::async_trait;
 use observability_deps::tracing::info;
 use sqlx::{postgres::PgPoolOptions, Executor, Pool, Postgres};
-use std::collections::BTreeMap;
-use std::convert::TryFrom;
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -130,7 +127,7 @@ impl NamespaceRepo for PostgresCatalog {
         retention_duration: &str,
         kafka_topic_id: i32,
         query_pool_id: i16,
-    ) -> Result<NamespaceSchema> {
+    ) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
 INSERT INTO namespace ( name, retention_duration, kafka_topic_id, query_pool_id )
@@ -156,10 +153,10 @@ RETURNING *
             }
         })?;
 
-        Ok(NamespaceSchema::new(rec.id, kafka_topic_id, query_pool_id))
+        Ok(rec)
     }
 
-    async fn get_by_name(&self, name: &str) -> Result<Option<NamespaceSchema>> {
+    async fn get_by_name(&self, name: &str) -> Result<Option<Namespace>> {
         // TODO: maybe get all the data in a single call to Postgres?
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
@@ -175,47 +172,8 @@ SELECT * FROM namespace WHERE name = $1;
         }
 
         let namespace = rec.map_err(|e| Error::SqlxError { source: e })?;
-        // get the columns first just in case someone else is creating schema while we're doing this.
-        let columns = ColumnRepo::list_by_namespace_id(self, namespace.id).await?;
-        let tables = TableRepo::list_by_namespace_id(self, namespace.id).await?;
 
-        let mut namespace = NamespaceSchema::new(
-            namespace.id,
-            namespace.kafka_topic_id,
-            namespace.query_pool_id,
-        );
-
-        let mut table_id_to_schema = BTreeMap::new();
-        for t in tables {
-            table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id)));
-        }
-
-        for c in columns {
-            let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
-            match ColumnType::try_from(c.column_type) {
-                Ok(column_type) => {
-                    t.columns.insert(
-                        c.name,
-                        ColumnSchema {
-                            id: c.id,
-                            column_type,
-                        },
-                    );
-                }
-                _ => {
-                    return Err(Error::UnknownColumnType {
-                        data_type: c.column_type,
-                        name: c.name.to_string(),
-                    });
-                }
-            }
-        }
-
-        for (_, (table_name, schema)) in table_id_to_schema {
-            namespace.tables.insert(table_name, schema);
-        }
-
-        return Ok(Some(namespace));
+        Ok(Some(namespace))
     }
 }
 
@@ -390,9 +348,12 @@ fn is_fk_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::{create_or_get_default_records, validate_or_insert_schema};
+    use crate::{
+        create_or_get_default_records, interface::NamespaceSchema, validate_or_insert_schema,
+    };
     use futures::{stream::FuturesOrdered, StreamExt};
     use influxdb_line_protocol::parse_lines;
+    use std::collections::BTreeMap;
     use std::env;
 
     // Helper macro to skip tests if TEST_INTEGRATION and the AWS environment variables are not set.
@@ -535,7 +496,7 @@ m2,t3=b f1=true 1
         let new_schema = new_schema.unwrap();
 
         // ensure new schema is in the db
-        let schema_from_db = NamespaceRepo::get_by_name(postgres.as_ref(), "asdf")
+        let schema_from_db = NamespaceSchema::get_by_name("asdf", &postgres)
             .await
             .unwrap()
             .unwrap();
@@ -560,7 +521,7 @@ new_measurement,t9=a f10=true 1
             ColumnType::Tag,
             new_table.columns.get("t9").unwrap().column_type
         );
-        let schema = NamespaceRepo::get_by_name(postgres.as_ref(), "asdf")
+        let schema = NamespaceSchema::get_by_name("asdf", &postgres)
             .await
             .unwrap()
             .unwrap();
@@ -585,7 +546,7 @@ m1,new_tag=c new_field=1i 2
             ColumnType::Tag,
             table.columns.get("new_tag").unwrap().column_type
         );
-        let schema = NamespaceRepo::get_by_name(postgres.as_ref(), "asdf")
+        let schema = NamespaceSchema::get_by_name("asdf", &postgres)
             .await
             .unwrap()
             .unwrap();

From dfe95e1a564a2e2e72b92a66a5559e379a047e51 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Mon, 17 Jan 2022 14:46:10 -0500
Subject: [PATCH 02/32] refactor: make postgres and mem catalog implementations
 public

---
 iox_catalog/src/mem.rs      | 21 +++++++++-
 iox_catalog/src/postgres.rs | 81 ++++++++++++++++++++-----------------
 2 files changed, 64 insertions(+), 38 deletions(-)

diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index ed2821c426..398a16210d 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -7,12 +7,31 @@ use crate::interface::{
 };
 use async_trait::async_trait;
 use std::convert::TryFrom;
+use std::fmt::Formatter;
 use std::sync::{Arc, Mutex};
 
-struct MemCatalog {
+/// In-memory catalog that implements the `RepoCollection` and individual repo traits fromt
+/// the catalog interface.
+#[derive(Default)]
+pub struct MemCatalog {
     collections: Mutex<MemCollections>,
 }
 
+impl MemCatalog {
+    /// return new initialized `MemCatalog`
+    pub fn new() -> Self {
+        Self::default()
+    }
+}
+
+impl std::fmt::Debug for MemCatalog {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let c = self.collections.lock().expect("mutex poisoned");
+        write!(f, "MemCatalog[ {:?} ]", c)
+    }
+}
+
+#[derive(Default, Debug)]
 struct MemCollections {
     kafka_topics: Vec<KafkaTopic>,
     query_pools: Vec<QueryPool>,
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index dd49e27996..e5bcd20f4e 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -16,41 +16,46 @@ const IDLE_TIMEOUT: Duration = Duration::from_secs(500);
 #[allow(dead_code)]
 const SCHEMA_NAME: &str = "iox_catalog";
 
-/// Connect to the catalog store.
-pub async fn connect_catalog_store(
-    app_name: &'static str,
-    schema_name: &'static str,
-    dsn: &str,
-) -> Result<Pool<Postgres>, sqlx::Error> {
-    let pool = PgPoolOptions::new()
-        .min_connections(1)
-        .max_connections(MAX_CONNECTIONS)
-        .connect_timeout(CONNECT_TIMEOUT)
-        .idle_timeout(IDLE_TIMEOUT)
-        .test_before_acquire(true)
-        .after_connect(move |c| {
-            Box::pin(async move {
-                // Tag the connection with the provided application name.
-                c.execute(sqlx::query("SET application_name = '$1';").bind(app_name))
-                    .await?;
-                let search_path_query = format!("SET search_path TO {}", schema_name);
-                c.execute(sqlx::query(&search_path_query)).await?;
-
-                Ok(())
-            })
-        })
-        .connect(dsn)
-        .await?;
-
-    // Log a connection was successfully established and include the application
-    // name for cross-correlation between Conductor logs & database connections.
-    info!(application_name=%app_name, "connected to catalog store");
-
-    Ok(pool)
+/// In-memory catalog that implements the `RepoCollection` and individual repo traits.
+#[derive(Debug)]
+pub struct PostgresCatalog {
+    pool: Pool<Postgres>,
 }
 
-struct PostgresCatalog {
-    pool: Pool<Postgres>,
+impl PostgresCatalog {
+    /// Connect to the catalog store.
+    pub async fn connect(
+        app_name: &'static str,
+        schema_name: &'static str,
+        dsn: &str,
+    ) -> Result<Self> {
+        let pool = PgPoolOptions::new()
+            .min_connections(1)
+            .max_connections(MAX_CONNECTIONS)
+            .connect_timeout(CONNECT_TIMEOUT)
+            .idle_timeout(IDLE_TIMEOUT)
+            .test_before_acquire(true)
+            .after_connect(move |c| {
+                Box::pin(async move {
+                    // Tag the connection with the provided application name.
+                    c.execute(sqlx::query("SET application_name = '$1';").bind(app_name))
+                        .await?;
+                    let search_path_query = format!("SET search_path TO {}", schema_name);
+                    c.execute(sqlx::query(&search_path_query)).await?;
+
+                    Ok(())
+                })
+            })
+            .connect(dsn)
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?;
+
+        // Log a connection was successfully established and include the application
+        // name for cross-correlation between Conductor logs & database connections.
+        info!(application_name=%app_name, "connected to catalog store");
+
+        Ok(Self { pool })
+    }
 }
 
 impl RepoCollection for Arc<PostgresCatalog> {
@@ -348,6 +353,7 @@ fn is_fk_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::postgres::PostgresCatalog;
     use crate::{
         create_or_get_default_records, interface::NamespaceSchema, validate_or_insert_schema,
     };
@@ -395,10 +401,11 @@ mod tests {
 
     async fn setup_db() -> (Arc<PostgresCatalog>, KafkaTopic, QueryPool) {
         let dsn = std::env::var("DATABASE_URL").unwrap();
-        let pool = connect_catalog_store("test", SCHEMA_NAME, &dsn)
-            .await
-            .unwrap();
-        let postgres_catalog = Arc::new(PostgresCatalog { pool });
+        let postgres_catalog = Arc::new(
+            PostgresCatalog::connect("test", SCHEMA_NAME, &dsn)
+                .await
+                .unwrap(),
+        );
 
         let (kafka_topic, query_pool, _) = create_or_get_default_records(2, &postgres_catalog)
             .await

From ef336b46592f4ca5ae36104e6d804e00dafcb2ad Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Mon, 17 Jan 2022 15:38:03 -0500
Subject: [PATCH 03/32] feat: add ingester crate and a few basic data
 structures for its data lifecycle

---
 Cargo.lock           |   7 ++
 Cargo.toml           |   1 +
 ingester/Cargo.toml  |   8 +++
 ingester/src/data.rs | 149 +++++++++++++++++++++++++++++++++++++++++++
 ingester/src/lib.rs  |   5 ++
 5 files changed, 170 insertions(+)
 create mode 100644 ingester/Cargo.toml
 create mode 100644 ingester/src/data.rs
 create mode 100644 ingester/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 02e85ba6b5..be33e2e8c7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1853,6 +1853,13 @@ dependencies = [
  "workspace-hack",
 ]
 
+[[package]]
+name = "ingester"
+version = "0.1.0"
+dependencies = [
+ "mutable_batch",
+]
+
 [[package]]
 name = "instant"
 version = "0.1.12"
diff --git a/Cargo.toml b/Cargo.toml
index 8612b307da..4c6cafda3a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,7 @@ members = [
     "influxdb_storage_client",
     "influxdb_tsm",
     "influxdb2_client",
+    "ingester",
     "internal_types",
     "iox_catalog",
     "iox_data_generator",
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
new file mode 100644
index 0000000000..1412feb6a3
--- /dev/null
+++ b/ingester/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "ingester"
+version = "0.1.0"
+authors = ["Nga Tran <nga-tran@live.com>"]
+edition = "2021"
+
+[dependencies]
+mutable_batch = { path = "../mutable_batch" }
\ No newline at end of file
diff --git a/ingester/src/data.rs b/ingester/src/data.rs
new file mode 100644
index 0000000000..4b5f1c1185
--- /dev/null
+++ b/ingester/src/data.rs
@@ -0,0 +1,149 @@
+
+
+//! Data for the lifecycle of the ingeter
+//! 
+
+use std::{sync::Arc, collections::BTreeMap};
+
+use mutable_batch::MutableBatch;
+
+//                                                 ┌──────────────┐                                                               
+//                                                 │Ingester Data │                                                               
+//                                                 │ (in memory)  │                                                               
+//                                                 └──────────────┘                                                               
+//                                                         │                                                                      
+//                                          ┌──────────────┼───────────────┐                                                      
+//                                          ▼              ▼               ▼                                                      
+//                                    ┌───────────┐                 ┌────────────┐                                                
+//                                    │Sequencer 1│       ...       │Sequencer m │                 Sequencers                     
+//                                    └───────────┘                 └────────────┘     a map of sequencer_id to Namespaces        
+//                                          │                              │                                                      
+//                           ┌──────────────┼─────────────┐                │                                                      
+//                           ▼              ▼             ▼                ▼                                                      
+//                     ┌────────────┐               ┌───────────┐                                   Namespaces                    
+//                     │Namespace 1 │     ...       │Namespace n│         ...           a map of namespace_name to Tables         
+//                     └────────────┘               └───────────┘                                                                 
+//                            │                           │                                                                       
+//             ┌──────────────┼──────────────┐            │                                                                       
+//             ▼              ▼              ▼            ▼                                                                       
+//      ┌────────────┐                ┌────────────┐                                                Tables                        
+//      │  Table 1   │       ...      │  Table p   │     ...                           a map of table_name to Partitions          
+//      └────────────┘                └────────────┘                                                                              
+//             │                             │                                                                                    
+//             │              ┌──────────────┼──────────────┐                                                                     
+//             ▼              ▼              ▼              ▼                                                                     
+//                     ┌────────────┐                ┌────────────┐                                   Partitions                  
+//            ...      │Partition 1 │       ...      │Partition q │                    a map of partition_key to PartitionData    
+//                     │(2021-12-10)│                │(2021-12-20)│                                                               
+//                     └────────────┘                └──────┬─────┘                                                               
+//                            │                             │                                                                     
+//       ┌───────────┬────────▼────┬─────────────┐          │                                                                     
+//       │           │             │             │          ▼                                                                     
+//       ▼           ▼             ▼             ▼                                                                                
+// ┌──────────┐┌───────────┐ ┌───────────┐ ┌───────────┐   ...                                                                    
+// │ Writing  ││  Snaphot  │ │Persisting │ │ Persisted │                        PartitionData: a struct of 4 items                
+// │Partition ││ Partition │ │ Partition │ │ Partition │                          . A `Writing Partition Batch`                   
+// │  Batch   ││  Batch 1  │ │  Batch 1  │ │  Batch 1  │                          . A vector of `Snapshot Partition Batches`      
+// └──────────┘├───────────┤ ├───────────┤ ├───────────┤                          . A vector of `Persisting Partition Batches`    
+//             │    ...    │ │    ...    │ │    ...    │                          . A vector of `Persisted Partition batches`     
+//             │           │ │           │ │           │                                                                          
+//             ├───────────┤ ├───────────┤ ├───────────┤                        1:1 map between `Snapshot`                        
+//             │ Snapshot  │ │Persisting │ │ Persisted │                        and `Persisting` Partition Batches                
+//             │ Partition │ │ Partition │ │ Partition │                                                                          
+//             │  Batch k  │ │  Batch k  │ │  Batch i  │                                                                          
+//             └───────────┘ └───────────┘ └───────────┘                                                                          
+
+// All sequencers aiisgned to this Ingester
+#[derive(Debug, Clone)]
+pub struct Sequencers {
+    // A map between a sequencer id to its corresponding Namespaces.
+    // A sequencer id is a `kafka_partittion`, a i32 defined in iox_catalog's Sequencer and 
+    // represents a shard of data of a Table of a Namesapce. Namespace is equivalent to 
+    // a customer db (aka an org's bucket). Depending on the comfiguration of sharding a table,
+    // either full data or set of rows of data of the table are included in a shard.
+    sequencers : BTreeMap<i32, Vec<Namespace>>,
+}
+
+// A Namespace and all of its tables of a sequencer
+#[derive(Debug, Clone)]
+pub struct Namespace {
+    // Name of the namespace which is unique and represents a customer db.
+    name: String,
+
+    // Tables of this namesapce
+    tables : Vec<Table>,
+}
+
+// A Table and all of its partittion
+#[derive(Debug, Clone)]
+pub struct Table {
+    // table name
+    name: String,
+
+    // A map of partittion_key to its corresponding partition
+    partitions : BTreeMap<String, Partition>,
+}
+
+// A Partittion and all of its in-memory data batches
+//
+// Stages of a batch of a partition:
+//  . A partition has only one `Writing Batch`. When is it big or 
+//    old enough, defined by IngesterPersistenceSettings, it will
+//    be put to `Snaphot Batch` and also copied to `Pesisting Batch`.
+//    The new and empty Wrtiting Batch will be created for accpeting new writes
+//  . Snapshot and Persisting batches are 1:1 mapped at all times. Snapshot ones are 
+//    immutable and used for querying. Persisting ones are modified to sort, 
+//    dedupilcate, and apply tombstone and then persited to parquet files. 
+//    While many batches can be persisted at the same time, a batch is only marked 
+//    in the catalog to be persisted after the batches before 
+//    its in the queue are marked persisted.
+//  . After the batch are marked persisted in the catalog, its will be removed 
+//    from Sanpshot and Persisting and put in Persisted. The Persisted ones 
+//    will get evicted based on IngesterPersistenceSettings.
+//                       ┌───────────────────┐                      
+//                       │    Persisting     │                      
+//                       │                   │                      
+//                       │ ┌───────────────┐ │                      
+// ┌────────────┐        │ │   Snapshot    │ │        ┌────────────┐
+// │  Writing   │───────▶│ └───────────────┘ │───────▶│ Persisted  │
+// └────────────┘        │ ┌───────────────┐ │        └────────────┘
+//                       │ │   Persiting   │ │                      
+//                       │ └───────────────┘ │                      
+//                       └───────────────────┘                      
+// 
+#[derive(Debug, Clone)]
+pub struct Partition {
+    partition_key: String,
+
+    // Writing batch that accepts writes to this partition
+    writing_batch: PartitionBatch,
+
+    // Queue of batches that are immutable and used for querying only.
+    // The batches are queue contiguously in thier data arrival time
+    snapshot_batches: Vec<PartitionBatch>,  // todo: is Vec good enough for hanlding queue?
+
+    // Queue of persisting batches which is a one on one mapping with the snapshot_batches.
+    // Data of these batches will be modified to sort, dedupilcate, and apply tombstone and then 
+    // persited to parquet files. While many batches can be persisted at the same time, 
+    // a batch is only marked in the catalog to be persisted after the batches before 
+    // its in the queue are marked persisted
+    pesisting_batched: Vec<PartitionBatch>,
+
+    // Persisted batches that are not yet evicted from the in-memory.
+    // These are batches moved from persiting_batches after they are fully persisted and marked 
+    // so in the catalog
+    pesisted_batched: Vec<PartitionBatch>,
+
+}
+
+// A PartitionBatch of contiguous in arrival time of writes
+// todo & question: do we want to call this Chunk instead?
+#[derive(Debug, Clone)]
+pub struct PartitionBatch {
+    // To keep the PartitionBtach in order of their 
+    // arrived data, we may need this auto created batch id
+    batch_id: i32,
+
+    // Data of this partition batch
+    data: Arc<MutableBatch>,
+}
diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
new file mode 100644
index 0000000000..99c92b1c6f
--- /dev/null
+++ b/ingester/src/lib.rs
@@ -0,0 +1,5 @@
+//! IOx ingester implementation.
+//! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit#
+//!
+
+pub mod data;

From 4f876000819ca2fd8c949bdc225fd8afae5d974a Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Mon, 17 Jan 2022 17:20:11 -0500
Subject: [PATCH 04/32] chore: make iox_catalog tests generic for any backend
 implementation

---
 iox_catalog/src/interface.rs | 188 ++++++++++++++++++++++++++++++++-
 iox_catalog/src/lib.rs       |  96 +++++++++++++++++
 iox_catalog/src/mem.rs       |  29 +++++-
 iox_catalog/src/postgres.rs  | 197 +++--------------------------------
 4 files changed, 326 insertions(+), 184 deletions(-)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 7da8ecce29..d7a030ffba 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -124,6 +124,9 @@ pub trait SequencerRepo {
 
     /// list all sequencers
     async fn list(&self) -> Result<Vec<Sequencer>>;
+
+    /// list all sequencers for a given kafka topic
+    async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result<Vec<Sequencer>>;
 }
 
 /// Data object for a kafka topic
@@ -145,7 +148,7 @@ pub struct QueryPool {
 }
 
 /// Data object for a namespace
-#[derive(Debug, Clone, sqlx::FromRow)]
+#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct Namespace {
     /// The id of the namespace
     pub id: i32,
@@ -450,3 +453,186 @@ pub struct Sequencer {
     /// lower than this must have been persisted to Parquet.
     pub min_unpersisted_sequence_number: i64,
 }
+
+#[cfg(test)]
+pub(crate) mod test_helpers {
+    use super::*;
+    use futures::{stream::FuturesOrdered, StreamExt};
+
+    pub(crate) async fn test_repo<T, F>(new_repo: F)
+    where
+        T: RepoCollection + Send + Sync,
+        F: Fn() -> T + Send + Sync,
+    {
+        test_kafka_topic(&new_repo()).await;
+        test_query_pool(&new_repo()).await;
+        test_namespace(&new_repo()).await;
+        test_table(&new_repo()).await;
+        test_column(&new_repo()).await;
+        test_sequencer(&new_repo()).await;
+    }
+
+    async fn test_kafka_topic<T: RepoCollection + Send + Sync>(repo: &T) {
+        let kafka_repo = repo.kafka_topic();
+        let k = kafka_repo.create_or_get("foo").await.unwrap();
+        assert!(k.id > 0);
+        assert_eq!(k.name, "foo");
+        let k2 = kafka_repo.create_or_get("foo").await.unwrap();
+        assert_eq!(k, k2);
+    }
+
+    async fn test_query_pool<T: RepoCollection + Send + Sync>(repo: &T) {
+        let query_repo = repo.query_pool();
+        let q = query_repo.create_or_get("foo").await.unwrap();
+        assert!(q.id > 0);
+        assert_eq!(q.name, "foo");
+        let q2 = query_repo.create_or_get("foo").await.unwrap();
+        assert_eq!(q, q2);
+    }
+
+    async fn test_namespace<T: RepoCollection + Send + Sync>(repo: &T) {
+        let namespace_repo = repo.namespace();
+        let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap();
+        let pool = repo.query_pool().create_or_get("foo").await.unwrap();
+
+        let namespace_name = "test_namespace";
+        let namespace = namespace_repo
+            .create(namespace_name, "inf", kafka.id, pool.id)
+            .await
+            .unwrap();
+        assert!(namespace.id > 0);
+        assert_eq!(namespace.name, namespace_name);
+
+        let conflict = namespace_repo
+            .create(namespace_name, "inf", kafka.id, pool.id)
+            .await;
+        assert!(matches!(
+            conflict.unwrap_err(),
+            Error::NameExists { name: _ }
+        ));
+
+        let found = namespace_repo
+            .get_by_name(namespace_name)
+            .await
+            .unwrap()
+            .expect("namespace should be there");
+        assert_eq!(namespace, found);
+    }
+
+    async fn test_table<T: RepoCollection + Send + Sync>(repo: &T) {
+        let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap();
+        let pool = repo.query_pool().create_or_get("foo").await.unwrap();
+        let namespace = repo
+            .namespace()
+            .create("namespace_table_test", "inf", kafka.id, pool.id)
+            .await
+            .unwrap();
+
+        // test we can create or get a table
+        let table_repo = repo.table();
+        let t = table_repo
+            .create_or_get("test_table", namespace.id)
+            .await
+            .unwrap();
+        let tt = table_repo
+            .create_or_get("test_table", namespace.id)
+            .await
+            .unwrap();
+        assert!(t.id > 0);
+        assert_eq!(t, tt);
+
+        let tables = table_repo.list_by_namespace_id(namespace.id).await.unwrap();
+        assert_eq!(vec![t], tables);
+    }
+
+    async fn test_column<T: RepoCollection + Send + Sync>(repo: &T) {
+        let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap();
+        let pool = repo.query_pool().create_or_get("foo").await.unwrap();
+        let namespace = repo
+            .namespace()
+            .create("namespace_column_test", "inf", kafka.id, pool.id)
+            .await
+            .unwrap();
+        let table = repo
+            .table()
+            .create_or_get("test_table", namespace.id)
+            .await
+            .unwrap();
+
+        // test we can create or get a column
+        let column_repo = repo.column();
+        let c = column_repo
+            .create_or_get("column_test", table.id, ColumnType::Tag)
+            .await
+            .unwrap();
+        let cc = column_repo
+            .create_or_get("column_test", table.id, ColumnType::Tag)
+            .await
+            .unwrap();
+        assert!(c.id > 0);
+        assert_eq!(c, cc);
+
+        // test that attempting to create an already defined column of a different type returns error
+        let err = column_repo
+            .create_or_get("column_test", table.id, ColumnType::U64)
+            .await
+            .expect_err("should error with wrong column type");
+        assert!(matches!(
+            err,
+            Error::ColumnTypeMismatch {
+                name: _,
+                existing: _,
+                new: _
+            }
+        ));
+
+        // test that we can create a column of the same name under a different table
+        let table2 = repo
+            .table()
+            .create_or_get("test_table_2", namespace.id)
+            .await
+            .unwrap();
+        let ccc = column_repo
+            .create_or_get("column_test", table2.id, ColumnType::U64)
+            .await
+            .unwrap();
+        assert_ne!(c, ccc);
+
+        let columns = column_repo
+            .list_by_namespace_id(namespace.id)
+            .await
+            .unwrap();
+        assert_eq!(vec![c, ccc], columns);
+    }
+
+    async fn test_sequencer<T: RepoCollection + Send + Sync>(repo: &T) {
+        let kafka = repo
+            .kafka_topic()
+            .create_or_get("sequencer_test")
+            .await
+            .unwrap();
+        let sequencer_repo = repo.sequencer();
+
+        // Create 10 sequencers
+        let created = (1..=10)
+            .map(|partition| sequencer_repo.create_or_get(&kafka, partition))
+            .collect::<FuturesOrdered<_>>()
+            .map(|v| {
+                let v = v.expect("failed to create sequencer");
+                (v.id, v)
+            })
+            .collect::<BTreeMap<_, _>>()
+            .await;
+
+        // List them and assert they match
+        let listed = sequencer_repo
+            .list_by_kafka_topic(&kafka)
+            .await
+            .expect("failed to list sequencers")
+            .into_iter()
+            .map(|v| (v.id, v))
+            .collect::<BTreeMap<_, _>>();
+
+        assert_eq!(created, listed);
+    }
+}
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index b2dead2df3..fedae0197e 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -196,3 +196,99 @@ pub async fn create_or_get_default_records<T: RepoCollection + Sync + Send>(
 
     Ok((kafka_topic, query_pool, sequencers))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::mem::MemCatalog;
+    use influxdb_line_protocol::parse_lines;
+    use std::sync::Arc;
+
+    #[tokio::test]
+    async fn test_validate_or_insert_schema() {
+        let repo = Arc::new(MemCatalog::new());
+        let (kafka_topic, query_pool, _) = create_or_get_default_records(2, &repo).await.unwrap();
+
+        let namespace_name = "validate_schema";
+        // now test with a new namespace
+        let namespace = repo
+            .namespace()
+            .create(namespace_name, "inf", kafka_topic.id, query_pool.id)
+            .await
+            .unwrap();
+        let data = r#"
+m1,t1=a,t2=b f1=2i,f2=2.0 1
+m1,t1=a f1=3i 2
+m2,t3=b f1=true 1
+        "#;
+
+        // test that new schema gets returned
+        let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect();
+        let schema = Arc::new(NamespaceSchema::new(
+            namespace.id,
+            namespace.kafka_topic_id,
+            namespace.query_pool_id,
+        ));
+        let new_schema = validate_or_insert_schema(lines, &schema, &repo)
+            .await
+            .unwrap();
+        let new_schema = new_schema.unwrap();
+
+        // ensure new schema is in the db
+        let schema_from_db = NamespaceSchema::get_by_name(namespace_name, &repo)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(new_schema, schema_from_db);
+
+        // test that a new table will be created
+        let data = r#"
+m1,t1=c f1=1i 2
+new_measurement,t9=a f10=true 1
+        "#;
+        let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect();
+        let new_schema = validate_or_insert_schema(lines, &schema_from_db, &repo)
+            .await
+            .unwrap()
+            .unwrap();
+        let new_table = new_schema.tables.get("new_measurement").unwrap();
+        assert_eq!(
+            ColumnType::Bool,
+            new_table.columns.get("f10").unwrap().column_type
+        );
+        assert_eq!(
+            ColumnType::Tag,
+            new_table.columns.get("t9").unwrap().column_type
+        );
+        let schema = NamespaceSchema::get_by_name(namespace_name, &repo)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(new_schema, schema);
+
+        // test that a new column for an existing table will be created
+        // test that a new table will be created
+        let data = r#"
+m1,new_tag=c new_field=1i 2
+        "#;
+        let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect();
+        let new_schema = validate_or_insert_schema(lines, &schema, &repo)
+            .await
+            .unwrap()
+            .unwrap();
+        let table = new_schema.tables.get("m1").unwrap();
+        assert_eq!(
+            ColumnType::I64,
+            table.columns.get("new_field").unwrap().column_type
+        );
+        assert_eq!(
+            ColumnType::Tag,
+            table.columns.get("new_tag").unwrap().column_type
+        );
+        let schema = NamespaceSchema::get_by_name(namespace_name, &repo)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(new_schema, schema);
+    }
+}
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 398a16210d..a9d27afad2 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -189,7 +189,11 @@ impl ColumnRepo for MemCatalog {
     ) -> Result<Column> {
         let mut collections = self.collections.lock().expect("mutex poisoned");
 
-        let column = match collections.columns.iter().find(|t| t.name == name) {
+        let column = match collections
+            .columns
+            .iter()
+            .find(|t| t.name == name && t.table_id == table_id)
+        {
             Some(c) => {
                 if column_type as i16 != c.column_type {
                     return Err(Error::ColumnTypeMismatch {
@@ -264,4 +268,27 @@ impl SequencerRepo for MemCatalog {
         let collections = self.collections.lock().expect("mutex poisoned");
         Ok(collections.sequencers.clone())
     }
+
+    async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result<Vec<Sequencer>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        let sequencers: Vec<_> = collections
+            .sequencers
+            .iter()
+            .filter(|s| s.kafka_topic_id == topic.id)
+            .cloned()
+            .collect();
+        Ok(sequencers)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_mem_repo() {
+        let f = || Arc::new(MemCatalog::new());
+
+        crate::interface::test_helpers::test_repo(f).await;
+    }
 }
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index e5bcd20f4e..7efdcdc7d8 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -315,6 +315,14 @@ impl SequencerRepo for PostgresCatalog {
             .await
             .map_err(|e| Error::SqlxError { source: e })
     }
+
+    async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result<Vec<Sequencer>> {
+        sqlx::query_as::<_, Sequencer>(r#"SELECT * FROM sequencer WHERE kafka_topic_id = $1;"#)
+            .bind(&topic.id) // $1
+            .fetch_all(&self.pool)
+            .await
+            .map_err(|e| Error::SqlxError { source: e })
+    }
 }
 
 /// The error code returned by Postgres for a unique constraint violation.
@@ -353,13 +361,6 @@ fn is_fk_violation(e: &sqlx::Error) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::postgres::PostgresCatalog;
-    use crate::{
-        create_or_get_default_records, interface::NamespaceSchema, validate_or_insert_schema,
-    };
-    use futures::{stream::FuturesOrdered, StreamExt};
-    use influxdb_line_protocol::parse_lines;
-    use std::collections::BTreeMap;
     use std::env;
 
     // Helper macro to skip tests if TEST_INTEGRATION and the AWS environment variables are not set.
@@ -399,196 +400,28 @@ mod tests {
         }};
     }
 
-    async fn setup_db() -> (Arc<PostgresCatalog>, KafkaTopic, QueryPool) {
+    async fn setup_db() -> Arc<PostgresCatalog> {
         let dsn = std::env::var("DATABASE_URL").unwrap();
-        let postgres_catalog = Arc::new(
+        Arc::new(
             PostgresCatalog::connect("test", SCHEMA_NAME, &dsn)
                 .await
                 .unwrap(),
-        );
-
-        let (kafka_topic, query_pool, _) = create_or_get_default_records(2, &postgres_catalog)
-            .await
-            .unwrap();
-        (postgres_catalog, kafka_topic, query_pool)
+        )
     }
 
     #[tokio::test]
-    async fn test_catalog() {
+    async fn test_repo() {
         // If running an integration test on your laptop, this requires that you have Postgres
         // running and that you've done the sqlx migrations. See the README in this crate for
         // info to set it up.
         maybe_skip_integration!();
 
-        let (postgres, kafka_topic, query_pool) = setup_db().await;
+        let postgres = setup_db().await;
         clear_schema(&postgres.pool).await;
 
-        let namespace = NamespaceRepo::create(postgres.as_ref(), "foo", "inf", 0, 0).await;
-        assert!(matches!(
-            namespace.unwrap_err(),
-            Error::ForeignKeyViolation { source: _ }
-        ));
-        let namespace = NamespaceRepo::create(
-            postgres.as_ref(),
-            "foo",
-            "inf",
-            kafka_topic.id,
-            query_pool.id,
-        )
-        .await
-        .unwrap();
-        assert!(namespace.id > 0);
-        assert_eq!(namespace.kafka_topic_id, kafka_topic.id);
-        assert_eq!(namespace.query_pool_id, query_pool.id);
+        let f = || Arc::clone(&postgres);
 
-        // test that we can create or get a table
-        let t = TableRepo::create_or_get(postgres.as_ref(), "foo", namespace.id)
-            .await
-            .unwrap();
-        let tt = TableRepo::create_or_get(postgres.as_ref(), "foo", namespace.id)
-            .await
-            .unwrap();
-        assert!(t.id > 0);
-        assert_eq!(t, tt);
-
-        // test that we can craete or get a column
-        let c = ColumnRepo::create_or_get(postgres.as_ref(), "foo", t.id, ColumnType::I64)
-            .await
-            .unwrap();
-        let cc = ColumnRepo::create_or_get(postgres.as_ref(), "foo", t.id, ColumnType::I64)
-            .await
-            .unwrap();
-        assert!(c.id > 0);
-        assert_eq!(c, cc);
-
-        // test that attempting to create an already defined column of a different type returns error
-        let err = ColumnRepo::create_or_get(postgres.as_ref(), "foo", t.id, ColumnType::F64)
-            .await
-            .expect_err("should error with wrong column type");
-        assert!(matches!(
-            err,
-            Error::ColumnTypeMismatch {
-                name: _,
-                existing: _,
-                new: _
-            }
-        ));
-
-        // now test with a new namespace
-        let namespace = NamespaceRepo::create(
-            postgres.as_ref(),
-            "asdf",
-            "inf",
-            kafka_topic.id,
-            query_pool.id,
-        )
-        .await
-        .unwrap();
-        let data = r#"
-m1,t1=a,t2=b f1=2i,f2=2.0 1
-m1,t1=a f1=3i 2
-m2,t3=b f1=true 1
-        "#;
-
-        // test that new schema gets returned
-        let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect();
-        let schema = Arc::new(NamespaceSchema::new(
-            namespace.id,
-            namespace.kafka_topic_id,
-            namespace.query_pool_id,
-        ));
-        let new_schema = validate_or_insert_schema(lines, &schema, &postgres)
-            .await
-            .unwrap();
-        let new_schema = new_schema.unwrap();
-
-        // ensure new schema is in the db
-        let schema_from_db = NamespaceSchema::get_by_name("asdf", &postgres)
-            .await
-            .unwrap()
-            .unwrap();
-        assert_eq!(new_schema, schema_from_db);
-
-        // test that a new table will be created
-        let data = r#"
-m1,t1=c f1=1i 2
-new_measurement,t9=a f10=true 1
-        "#;
-        let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect();
-        let new_schema = validate_or_insert_schema(lines, &schema_from_db, &postgres)
-            .await
-            .unwrap()
-            .unwrap();
-        let new_table = new_schema.tables.get("new_measurement").unwrap();
-        assert_eq!(
-            ColumnType::Bool,
-            new_table.columns.get("f10").unwrap().column_type
-        );
-        assert_eq!(
-            ColumnType::Tag,
-            new_table.columns.get("t9").unwrap().column_type
-        );
-        let schema = NamespaceSchema::get_by_name("asdf", &postgres)
-            .await
-            .unwrap()
-            .unwrap();
-        assert_eq!(new_schema, schema);
-
-        // test that a new column for an existing table will be created
-        // test that a new table will be created
-        let data = r#"
-m1,new_tag=c new_field=1i 2
-        "#;
-        let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect();
-        let new_schema = validate_or_insert_schema(lines, &schema, &postgres)
-            .await
-            .unwrap()
-            .unwrap();
-        let table = new_schema.tables.get("m1").unwrap();
-        assert_eq!(
-            ColumnType::I64,
-            table.columns.get("new_field").unwrap().column_type
-        );
-        assert_eq!(
-            ColumnType::Tag,
-            table.columns.get("new_tag").unwrap().column_type
-        );
-        let schema = NamespaceSchema::get_by_name("asdf", &postgres)
-            .await
-            .unwrap()
-            .unwrap();
-        assert_eq!(new_schema, schema);
-    }
-
-    #[tokio::test]
-    async fn test_sequencers() {
-        maybe_skip_integration!();
-
-        let (postgres, kafka_topic, _query_pool) = setup_db().await;
-        clear_schema(&postgres.pool).await;
-
-        // Create 10 sequencers
-        let created = (1..=10)
-            .map(|partition| {
-                SequencerRepo::create_or_get(postgres.as_ref(), &kafka_topic, partition)
-            })
-            .collect::<FuturesOrdered<_>>()
-            .map(|v| {
-                let v = v.expect("failed to create sequencer");
-                (v.id, v)
-            })
-            .collect::<BTreeMap<_, _>>()
-            .await;
-
-        // List them and assert they match
-        let listed = SequencerRepo::list(postgres.as_ref())
-            .await
-            .expect("failed to list sequencers")
-            .into_iter()
-            .map(|v| (v.id, v))
-            .collect::<BTreeMap<_, _>>();
-
-        assert_eq!(created, listed);
+        crate::interface::test_helpers::test_repo(f).await;
     }
 
     async fn clear_schema(pool: &Pool<Postgres>) {

From 5e464727d141dfcfbaa3ecd4b821bdd4d3e0c6cd Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Tue, 18 Jan 2022 10:09:16 -0500
Subject: [PATCH 05/32] refactor: make get_schema_by_name bare function

---
 iox_catalog/src/interface.rs | 117 ++++++++++++++++++-----------------
 iox_catalog/src/lib.rs       |   7 ++-
 iox_catalog/src/mem.rs       |   2 +-
 3 files changed, 64 insertions(+), 62 deletions(-)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index d7a030ffba..828c18a525 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -163,7 +163,8 @@ pub struct Namespace {
     pub query_pool_id: i16,
 }
 
-/// Schema collection for a namespace
+/// Schema collection for a namespace. This is an in-memory object useful for a schema
+/// cache.
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct NamespaceSchema {
     /// the namespace id
@@ -187,63 +188,6 @@ impl NamespaceSchema {
         }
     }
 
-    /// Gets the namespace schema including all tables and columns.
-    pub async fn get_by_name<T: RepoCollection + Send + Sync>(
-        name: &str,
-        repo: &T,
-    ) -> Result<Option<Self>> {
-        let namespace_repo = repo.namespace();
-        let table_repo = repo.table();
-        let column_repo = repo.column();
-
-        let namespace = namespace_repo
-            .get_by_name(name)
-            .await?
-            .context(NamespaceNotFoundSnafu { name })?;
-
-        // get the columns first just in case someone else is creating schema while we're doing this.
-        let columns = column_repo.list_by_namespace_id(namespace.id).await?;
-        let tables = table_repo.list_by_namespace_id(namespace.id).await?;
-
-        let mut namespace = Self::new(
-            namespace.id,
-            namespace.kafka_topic_id,
-            namespace.query_pool_id,
-        );
-
-        let mut table_id_to_schema = BTreeMap::new();
-        for t in tables {
-            table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id)));
-        }
-
-        for c in columns {
-            let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
-            match ColumnType::try_from(c.column_type) {
-                Ok(column_type) => {
-                    t.columns.insert(
-                        c.name,
-                        ColumnSchema {
-                            id: c.id,
-                            column_type,
-                        },
-                    );
-                }
-                _ => {
-                    return Err(Error::UnknownColumnType {
-                        data_type: c.column_type,
-                        name: c.name.to_string(),
-                    });
-                }
-            }
-        }
-
-        for (_, (table_name, schema)) in table_id_to_schema {
-            namespace.tables.insert(table_name, schema);
-        }
-
-        Ok(Some(namespace))
-    }
-
     /// Adds tables and columns to the `NamespaceSchema`. These are created
     /// incrementally while validating the schema for a write and this helper
     /// method takes them in to add them to the schema.
@@ -277,6 +221,63 @@ impl NamespaceSchema {
     }
 }
 
+/// Gets the namespace schema including all tables and columns.
+pub async fn get_schema_by_name<T: RepoCollection + Send + Sync>(
+    name: &str,
+    repo: &T,
+) -> Result<Option<NamespaceSchema>> {
+    let namespace_repo = repo.namespace();
+    let table_repo = repo.table();
+    let column_repo = repo.column();
+
+    let namespace = namespace_repo
+        .get_by_name(name)
+        .await?
+        .context(NamespaceNotFoundSnafu { name })?;
+
+    // get the columns first just in case someone else is creating schema while we're doing this.
+    let columns = column_repo.list_by_namespace_id(namespace.id).await?;
+    let tables = table_repo.list_by_namespace_id(namespace.id).await?;
+
+    let mut namespace = NamespaceSchema::new(
+        namespace.id,
+        namespace.kafka_topic_id,
+        namespace.query_pool_id,
+    );
+
+    let mut table_id_to_schema = BTreeMap::new();
+    for t in tables {
+        table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id)));
+    }
+
+    for c in columns {
+        let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
+        match ColumnType::try_from(c.column_type) {
+            Ok(column_type) => {
+                t.columns.insert(
+                    c.name,
+                    ColumnSchema {
+                        id: c.id,
+                        column_type,
+                    },
+                );
+            }
+            _ => {
+                return Err(Error::UnknownColumnType {
+                    data_type: c.column_type,
+                    name: c.name.to_string(),
+                });
+            }
+        }
+    }
+
+    for (_, (table_name, schema)) in table_id_to_schema {
+        namespace.tables.insert(table_name, schema);
+    }
+
+    Ok(Some(namespace))
+}
+
 /// Data object for a table
 #[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
 pub struct Table {
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index fedae0197e..96ad9e8b8d 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -200,6 +200,7 @@ pub async fn create_or_get_default_records<T: RepoCollection + Sync + Send>(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::interface::get_schema_by_name;
     use crate::mem::MemCatalog;
     use influxdb_line_protocol::parse_lines;
     use std::sync::Arc;
@@ -235,7 +236,7 @@ m2,t3=b f1=true 1
         let new_schema = new_schema.unwrap();
 
         // ensure new schema is in the db
-        let schema_from_db = NamespaceSchema::get_by_name(namespace_name, &repo)
+        let schema_from_db = get_schema_by_name(namespace_name, &repo)
             .await
             .unwrap()
             .unwrap();
@@ -260,7 +261,7 @@ new_measurement,t9=a f10=true 1
             ColumnType::Tag,
             new_table.columns.get("t9").unwrap().column_type
         );
-        let schema = NamespaceSchema::get_by_name(namespace_name, &repo)
+        let schema = get_schema_by_name(namespace_name, &repo)
             .await
             .unwrap()
             .unwrap();
@@ -285,7 +286,7 @@ m1,new_tag=c new_field=1i 2
             ColumnType::Tag,
             table.columns.get("new_tag").unwrap().column_type
         );
-        let schema = NamespaceSchema::get_by_name(namespace_name, &repo)
+        let schema = get_schema_by_name(namespace_name, &repo)
             .await
             .unwrap()
             .unwrap();
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index a9d27afad2..1dfaa687da 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -10,7 +10,7 @@ use std::convert::TryFrom;
 use std::fmt::Formatter;
 use std::sync::{Arc, Mutex};
 
-/// In-memory catalog that implements the `RepoCollection` and individual repo traits fromt
+/// In-memory catalog that implements the `RepoCollection` and individual repo traits from
 /// the catalog interface.
 #[derive(Default)]
 pub struct MemCatalog {

From 40cac21e21e7c1c4a528450abfb89e9c9e783809 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Tue, 18 Jan 2022 12:42:10 -0500
Subject: [PATCH 06/32] refactor: change all ids in catalog to their own types

---
 iox_catalog/src/interface.rs | 154 +++++++++++++++++++++++++++--------
 iox_catalog/src/lib.rs       |   8 +-
 iox_catalog/src/mem.rs       |  29 +++----
 iox_catalog/src/postgres.rs  |  17 ++--
 4 files changed, 150 insertions(+), 58 deletions(-)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 828c18a525..f693bd238e 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -41,6 +41,96 @@ pub enum Error {
 /// A specialized `Error` for Catalog errors
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 
+/// Unique ID for a `Namespace`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct NamespaceId(i32);
+
+#[allow(missing_docs)]
+impl NamespaceId {
+    pub fn new(v: i32) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i32 {
+        self.0
+    }
+}
+
+/// Unique ID for a `KafkaTopic`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct KafkaTopicId(i32);
+
+#[allow(missing_docs)]
+impl KafkaTopicId {
+    pub fn new(v: i32) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i32 {
+        self.0
+    }
+}
+
+/// Unique ID for a `QueryPool`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct QueryPoolId(i16);
+
+#[allow(missing_docs)]
+impl QueryPoolId {
+    pub fn new(v: i16) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i16 {
+        self.0
+    }
+}
+
+/// Unique ID for a `Table`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct TableId(i32);
+
+#[allow(missing_docs)]
+impl TableId {
+    pub fn new(v: i32) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i32 {
+        self.0
+    }
+}
+
+/// Unique ID for a `Column`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct ColumnId(i32);
+
+#[allow(missing_docs)]
+impl ColumnId {
+    pub fn new(v: i32) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i32 {
+        self.0
+    }
+}
+
+/// Unique ID for a `Sequencer`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct SequencerId(i16);
+
+#[allow(missing_docs)]
+impl SequencerId {
+    pub fn new(v: i16) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i16 {
+        self.0
+    }
+}
+
 /// Container that can return repos for each of the catalog data types.
 #[async_trait]
 pub trait RepoCollection {
@@ -81,8 +171,8 @@ pub trait NamespaceRepo {
         &self,
         name: &str,
         retention_duration: &str,
-        kafka_topic_id: i32,
-        query_pool_id: i16,
+        kafka_topic_id: KafkaTopicId,
+        query_pool_id: QueryPoolId,
     ) -> Result<Namespace>;
 
     /// Gets the namespace by its unique name.
@@ -93,10 +183,10 @@ pub trait NamespaceRepo {
 #[async_trait]
 pub trait TableRepo {
     /// Creates the table in the catalog or get the existing record by name.
-    async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result<Table>;
+    async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result<Table>;
 
     /// Lists all tables in the catalog for the given namespace id.
-    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Table>>;
+    async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Table>>;
 }
 
 /// Functions for working with columns in the catalog
@@ -108,12 +198,12 @@ pub trait ColumnRepo {
     async fn create_or_get(
         &self,
         name: &str,
-        table_id: i32,
+        table_id: TableId,
         column_type: ColumnType,
     ) -> Result<Column>;
 
     /// Lists all columns in the passed in namespace id.
-    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Column>>;
+    async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Column>>;
 }
 
 /// Functions for working with sequencers in the catalog
@@ -133,7 +223,7 @@ pub trait SequencerRepo {
 #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct KafkaTopic {
     /// The id of the topic
-    pub id: i32,
+    pub id: KafkaTopicId,
     /// The unique name of the topic
     pub name: String,
 }
@@ -142,7 +232,7 @@ pub struct KafkaTopic {
 #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct QueryPool {
     /// The id of the pool
-    pub id: i16,
+    pub id: QueryPoolId,
     /// The unique name of the pool
     pub name: String,
 }
@@ -151,16 +241,16 @@ pub struct QueryPool {
 #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct Namespace {
     /// The id of the namespace
-    pub id: i32,
+    pub id: NamespaceId,
     /// The unique name of the namespace
     pub name: String,
     /// The retention duration as a string. 'inf' or not present represents infinite duration (i.e. never drop data).
     #[sqlx(default)]
     pub retention_duration: Option<String>,
     /// The kafka topic that writes to this namespace will land in
-    pub kafka_topic_id: i32,
+    pub kafka_topic_id: KafkaTopicId,
     /// The query pool assigned to answer queries for this namespace
-    pub query_pool_id: i16,
+    pub query_pool_id: QueryPoolId,
 }
 
 /// Schema collection for a namespace. This is an in-memory object useful for a schema
@@ -168,18 +258,18 @@ pub struct Namespace {
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct NamespaceSchema {
     /// the namespace id
-    pub id: i32,
+    pub id: NamespaceId,
     /// the kafka topic this namespace gets data written to
-    pub kafka_topic_id: i32,
+    pub kafka_topic_id: KafkaTopicId,
     /// the query pool assigned to answer queries for this namespace
-    pub query_pool_id: i16,
+    pub query_pool_id: QueryPoolId,
     /// the tables in the namespace by name
     pub tables: BTreeMap<String, TableSchema>,
 }
 
 impl NamespaceSchema {
     /// Create a new `NamespaceSchema`
-    pub fn new(id: i32, kafka_topic_id: i32, query_pool_id: i16) -> Self {
+    pub fn new(id: NamespaceId, kafka_topic_id: KafkaTopicId, query_pool_id: QueryPoolId) -> Self {
         Self {
             id,
             tables: BTreeMap::new(),
@@ -193,8 +283,8 @@ impl NamespaceSchema {
     /// method takes them in to add them to the schema.
     pub fn add_tables_and_columns(
         &mut self,
-        new_tables: BTreeMap<String, i32>,
-        new_columns: BTreeMap<i32, BTreeMap<String, ColumnSchema>>,
+        new_tables: BTreeMap<String, TableId>,
+        new_columns: BTreeMap<TableId, BTreeMap<String, ColumnSchema>>,
     ) {
         for (table_name, table_id) in new_tables {
             self.tables
@@ -210,7 +300,7 @@ impl NamespaceSchema {
         }
     }
 
-    fn get_table_mut(&mut self, table_id: i32) -> Option<&mut TableSchema> {
+    fn get_table_mut(&mut self, table_id: TableId) -> Option<&mut TableSchema> {
         for table in self.tables.values_mut() {
             if table.id == table_id {
                 return Some(table);
@@ -282,9 +372,9 @@ pub async fn get_schema_by_name<T: RepoCollection + Send + Sync>(
 #[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
 pub struct Table {
     /// The id of the table
-    pub id: i32,
+    pub id: TableId,
     /// The namespace id that the table is in
-    pub namespace_id: i32,
+    pub namespace_id: NamespaceId,
     /// The name of the table, which is unique within the associated namespace
     pub name: String,
 }
@@ -293,14 +383,14 @@ pub struct Table {
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct TableSchema {
     /// the table id
-    pub id: i32,
+    pub id: TableId,
     /// the table's columns by their name
     pub columns: BTreeMap<String, ColumnSchema>,
 }
 
 impl TableSchema {
     /// Initialize new `TableSchema`
-    pub fn new(id: i32) -> Self {
+    pub fn new(id: TableId) -> Self {
         Self {
             id,
             columns: BTreeMap::new(),
@@ -319,9 +409,9 @@ impl TableSchema {
 #[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)]
 pub struct Column {
     /// the column id
-    pub id: i32,
+    pub id: ColumnId,
     /// the table id the column is in
-    pub table_id: i32,
+    pub table_id: TableId,
     /// the name of the column, which is unique in the table
     pub name: String,
     /// the logical type of the column
@@ -350,7 +440,7 @@ impl Column {
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub struct ColumnSchema {
     /// the column id
-    pub id: i32,
+    pub id: ColumnId,
     /// the column type
     pub column_type: ColumnType,
 }
@@ -443,9 +533,9 @@ pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType {
 #[derive(Debug, Copy, Clone, PartialEq, sqlx::FromRow)]
 pub struct Sequencer {
     /// the id of the sequencer
-    pub id: i16,
+    pub id: SequencerId,
     /// the topic the sequencer is reading from
-    pub kafka_topic_id: i32,
+    pub kafka_topic_id: KafkaTopicId,
     /// the kafka partition the sequencer is reading from
     pub kafka_partition: i32,
     /// The minimum unpersisted sequence number. Because different tables
@@ -476,7 +566,7 @@ pub(crate) mod test_helpers {
     async fn test_kafka_topic<T: RepoCollection + Send + Sync>(repo: &T) {
         let kafka_repo = repo.kafka_topic();
         let k = kafka_repo.create_or_get("foo").await.unwrap();
-        assert!(k.id > 0);
+        assert!(k.id > KafkaTopicId::new(0));
         assert_eq!(k.name, "foo");
         let k2 = kafka_repo.create_or_get("foo").await.unwrap();
         assert_eq!(k, k2);
@@ -485,7 +575,7 @@ pub(crate) mod test_helpers {
     async fn test_query_pool<T: RepoCollection + Send + Sync>(repo: &T) {
         let query_repo = repo.query_pool();
         let q = query_repo.create_or_get("foo").await.unwrap();
-        assert!(q.id > 0);
+        assert!(q.id > QueryPoolId::new(0));
         assert_eq!(q.name, "foo");
         let q2 = query_repo.create_or_get("foo").await.unwrap();
         assert_eq!(q, q2);
@@ -501,7 +591,7 @@ pub(crate) mod test_helpers {
             .create(namespace_name, "inf", kafka.id, pool.id)
             .await
             .unwrap();
-        assert!(namespace.id > 0);
+        assert!(namespace.id > NamespaceId::new(0));
         assert_eq!(namespace.name, namespace_name);
 
         let conflict = namespace_repo
@@ -539,7 +629,7 @@ pub(crate) mod test_helpers {
             .create_or_get("test_table", namespace.id)
             .await
             .unwrap();
-        assert!(t.id > 0);
+        assert!(t.id > TableId::new(0));
         assert_eq!(t, tt);
 
         let tables = table_repo.list_by_namespace_id(namespace.id).await.unwrap();
@@ -570,7 +660,7 @@ pub(crate) mod test_helpers {
             .create_or_get("column_test", table.id, ColumnType::Tag)
             .await
             .unwrap();
-        assert!(c.id > 0);
+        assert!(c.id > ColumnId::new(0));
         assert_eq!(c, cc);
 
         // test that attempting to create an already defined column of a different type returns error
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 96ad9e8b8d..8698e8f482 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -13,7 +13,7 @@
 
 use crate::interface::{
     column_type_from_field, ColumnSchema, ColumnType, Error, KafkaTopic, NamespaceSchema,
-    QueryPool, RepoCollection, Result, Sequencer,
+    QueryPool, RepoCollection, Result, Sequencer, SequencerId, TableId,
 };
 use futures::{stream::FuturesOrdered, StreamExt};
 use influxdb_line_protocol::ParsedLine;
@@ -42,9 +42,9 @@ pub async fn validate_or_insert_schema<T: RepoCollection + Sync + Send>(
     repo: &T,
 ) -> Result<Option<NamespaceSchema>> {
     // table name to table_id
-    let mut new_tables: BTreeMap<String, i32> = BTreeMap::new();
+    let mut new_tables: BTreeMap<String, TableId> = BTreeMap::new();
     // table_id to map of column name to column
-    let mut new_columns: BTreeMap<i32, BTreeMap<String, ColumnSchema>> = BTreeMap::new();
+    let mut new_columns: BTreeMap<TableId, BTreeMap<String, ColumnSchema>> = BTreeMap::new();
 
     for line in &lines {
         let table_name = line.series.measurement.as_str();
@@ -176,7 +176,7 @@ pub async fn validate_or_insert_schema<T: RepoCollection + Sync + Send>(
 pub async fn create_or_get_default_records<T: RepoCollection + Sync + Send>(
     kafka_partition_count: i32,
     repo: &T,
-) -> Result<(KafkaTopic, QueryPool, BTreeMap<i16, Sequencer>)> {
+) -> Result<(KafkaTopic, QueryPool, BTreeMap<SequencerId, Sequencer>)> {
     let kafka_repo = repo.kafka_topic();
     let query_repo = repo.query_pool();
     let sequencer_repo = repo.sequencer();
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 1dfaa687da..19c7226e74 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -2,8 +2,9 @@
 //! used for testing or for an IOx designed to run without catalog persistence.
 
 use crate::interface::{
-    Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo,
-    QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo,
+    Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo,
+    Namespace, NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection,
+    Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo,
 };
 use async_trait::async_trait;
 use std::convert::TryFrom;
@@ -76,7 +77,7 @@ impl KafkaTopicRepo for MemCatalog {
             Some(t) => t,
             None => {
                 let topic = KafkaTopic {
-                    id: collections.kafka_topics.len() as i32 + 1,
+                    id: KafkaTopicId::new(collections.kafka_topics.len() as i32 + 1),
                     name: name.to_string(),
                 };
                 collections.kafka_topics.push(topic);
@@ -97,7 +98,7 @@ impl QueryPoolRepo for MemCatalog {
             Some(t) => t,
             None => {
                 let pool = QueryPool {
-                    id: collections.query_pools.len() as i16 + 1,
+                    id: QueryPoolId::new(collections.query_pools.len() as i16 + 1),
                     name: name.to_string(),
                 };
                 collections.query_pools.push(pool);
@@ -115,8 +116,8 @@ impl NamespaceRepo for MemCatalog {
         &self,
         name: &str,
         retention_duration: &str,
-        kafka_topic_id: i32,
-        query_pool_id: i16,
+        kafka_topic_id: KafkaTopicId,
+        query_pool_id: QueryPoolId,
     ) -> Result<Namespace> {
         let mut collections = self.collections.lock().expect("mutex poisoned");
         if collections.namespaces.iter().any(|n| n.name == name) {
@@ -126,7 +127,7 @@ impl NamespaceRepo for MemCatalog {
         }
 
         let namespace = Namespace {
-            id: collections.namespaces.len() as i32 + 1,
+            id: NamespaceId::new(collections.namespaces.len() as i32 + 1),
             name: name.to_string(),
             kafka_topic_id,
             query_pool_id,
@@ -148,14 +149,14 @@ impl NamespaceRepo for MemCatalog {
 
 #[async_trait]
 impl TableRepo for MemCatalog {
-    async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result<Table> {
+    async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result<Table> {
         let mut collections = self.collections.lock().expect("mutex poisoned");
 
         let table = match collections.tables.iter().find(|t| t.name == name) {
             Some(t) => t,
             None => {
                 let table = Table {
-                    id: collections.tables.len() as i32 + 1,
+                    id: TableId::new(collections.tables.len() as i32 + 1),
                     namespace_id,
                     name: name.to_string(),
                 };
@@ -167,7 +168,7 @@ impl TableRepo for MemCatalog {
         Ok(table.clone())
     }
 
-    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Table>> {
+    async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Table>> {
         let collections = self.collections.lock().expect("mutex poisoned");
         let tables: Vec<_> = collections
             .tables
@@ -184,7 +185,7 @@ impl ColumnRepo for MemCatalog {
     async fn create_or_get(
         &self,
         name: &str,
-        table_id: i32,
+        table_id: TableId,
         column_type: ColumnType,
     ) -> Result<Column> {
         let mut collections = self.collections.lock().expect("mutex poisoned");
@@ -207,7 +208,7 @@ impl ColumnRepo for MemCatalog {
             }
             None => {
                 let column = Column {
-                    id: collections.columns.len() as i32 + 1,
+                    id: ColumnId::new(collections.columns.len() as i32 + 1),
                     table_id,
                     name: name.to_string(),
                     column_type: column_type as i16,
@@ -220,7 +221,7 @@ impl ColumnRepo for MemCatalog {
         Ok(column.clone())
     }
 
-    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Column>> {
+    async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Column>> {
         let mut columns = vec![];
 
         let collections = self.collections.lock().expect("mutex poisoned");
@@ -251,7 +252,7 @@ impl SequencerRepo for MemCatalog {
             Some(t) => t,
             None => {
                 let sequencer = Sequencer {
-                    id: collections.sequencers.len() as i16 + 1,
+                    id: SequencerId::new(collections.sequencers.len() as i16 + 1),
                     kafka_topic_id: topic.id,
                     kafka_partition: partition,
                     min_unpersisted_sequence_number: 0,
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 7efdcdc7d8..2d3f3ae1e0 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -1,8 +1,9 @@
 //! A Postgres backed implementation of the Catalog
 
 use crate::interface::{
-    Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo,
-    QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo,
+    Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace,
+    NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result,
+    Sequencer, SequencerRepo, Table, TableId, TableRepo,
 };
 use async_trait::async_trait;
 use observability_deps::tracing::info;
@@ -130,8 +131,8 @@ impl NamespaceRepo for PostgresCatalog {
         &self,
         name: &str,
         retention_duration: &str,
-        kafka_topic_id: i32,
-        query_pool_id: i16,
+        kafka_topic_id: KafkaTopicId,
+        query_pool_id: QueryPoolId,
     ) -> Result<Namespace> {
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
@@ -184,7 +185,7 @@ SELECT * FROM namespace WHERE name = $1;
 
 #[async_trait]
 impl TableRepo for PostgresCatalog {
-    async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result<Table> {
+    async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result<Table> {
         let rec = sqlx::query_as::<_, Table>(
             r#"
 INSERT INTO table_name ( name, namespace_id )
@@ -208,7 +209,7 @@ DO UPDATE SET name = table_name.name RETURNING *;
         Ok(rec)
     }
 
-    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Table>> {
+    async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Table>> {
         let rec = sqlx::query_as::<_, Table>(
             r#"
 SELECT * FROM table_name
@@ -229,7 +230,7 @@ impl ColumnRepo for PostgresCatalog {
     async fn create_or_get(
         &self,
         name: &str,
-        table_id: i32,
+        table_id: TableId,
         column_type: ColumnType,
     ) -> Result<Column> {
         let ct = column_type as i16;
@@ -266,7 +267,7 @@ DO UPDATE SET name = column_name.name RETURNING *;
         Ok(rec)
     }
 
-    async fn list_by_namespace_id(&self, namespace_id: i32) -> Result<Vec<Column>> {
+    async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Column>> {
         let rec = sqlx::query_as::<_, Column>(
             r#"
 SELECT column_name.* FROM table_name

From 23290fd2ff14f466aa15b2a843879e6e758925d2 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 14:04:07 -0500
Subject: [PATCH 07/32] fix: new data structures suggested by reviewers

---
 Cargo.lock           |   3 +-
 ingester/Cargo.toml  |   4 +-
 ingester/src/data.rs | 238 +++++++++++++++++++------------------------
 ingester/src/lib.rs  |  11 ++
 4 files changed, 122 insertions(+), 134 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index be33e2e8c7..ceb7577155 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1857,7 +1857,8 @@ dependencies = [
 name = "ingester"
 version = "0.1.0"
 dependencies = [
- "mutable_batch",
+ "arrow",
+ "parking_lot",
 ]
 
 [[package]]
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 1412feb6a3..0683821a04 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -5,4 +5,6 @@ authors = ["Nga Tran <nga-tran@live.com>"]
 edition = "2021"
 
 [dependencies]
-mutable_batch = { path = "../mutable_batch" }
\ No newline at end of file
+arrow = { version = "7.0", features = ["prettyprint"] }
+# mutable_batch = { path = "../mutable_batch" }
+parking_lot = "0.11.2"
\ No newline at end of file
diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 4b5f1c1185..9f3e31ba26 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -1,149 +1,123 @@
-
-
-//! Data for the lifecycle of the ingeter
+//! Data for the lifecycle of the Ingeter
 //! 
 
 use std::{sync::Arc, collections::BTreeMap};
 
-use mutable_batch::MutableBatch;
+use parking_lot::RwLock;
+use arrow::datatypes::DataType;
 
-//                                                 ┌──────────────┐                                                               
-//                                                 │Ingester Data │                                                               
-//                                                 │ (in memory)  │                                                               
-//                                                 └──────────────┘                                                               
-//                                                         │                                                                      
-//                                          ┌──────────────┼───────────────┐                                                      
-//                                          ▼              ▼               ▼                                                      
-//                                    ┌───────────┐                 ┌────────────┐                                                
-//                                    │Sequencer 1│       ...       │Sequencer m │                 Sequencers                     
-//                                    └───────────┘                 └────────────┘     a map of sequencer_id to Namespaces        
-//                                          │                              │                                                      
-//                           ┌──────────────┼─────────────┐                │                                                      
-//                           ▼              ▼             ▼                ▼                                                      
-//                     ┌────────────┐               ┌───────────┐                                   Namespaces                    
-//                     │Namespace 1 │     ...       │Namespace n│         ...           a map of namespace_name to Tables         
-//                     └────────────┘               └───────────┘                                                                 
-//                            │                           │                                                                       
-//             ┌──────────────┼──────────────┐            │                                                                       
-//             ▼              ▼              ▼            ▼                                                                       
-//      ┌────────────┐                ┌────────────┐                                                Tables                        
-//      │  Table 1   │       ...      │  Table p   │     ...                           a map of table_name to Partitions          
-//      └────────────┘                └────────────┘                                                                              
-//             │                             │                                                                                    
-//             │              ┌──────────────┼──────────────┐                                                                     
-//             ▼              ▼              ▼              ▼                                                                     
-//                     ┌────────────┐                ┌────────────┐                                   Partitions                  
-//            ...      │Partition 1 │       ...      │Partition q │                    a map of partition_key to PartitionData    
-//                     │(2021-12-10)│                │(2021-12-20)│                                                               
-//                     └────────────┘                └──────┬─────┘                                                               
-//                            │                             │                                                                     
-//       ┌───────────┬────────▼────┬─────────────┐          │                                                                     
-//       │           │             │             │          ▼                                                                     
-//       ▼           ▼             ▼             ▼                                                                                
-// ┌──────────┐┌───────────┐ ┌───────────┐ ┌───────────┐   ...                                                                    
-// │ Writing  ││  Snaphot  │ │Persisting │ │ Persisted │                        PartitionData: a struct of 4 items                
-// │Partition ││ Partition │ │ Partition │ │ Partition │                          . A `Writing Partition Batch`                   
-// │  Batch   ││  Batch 1  │ │  Batch 1  │ │  Batch 1  │                          . A vector of `Snapshot Partition Batches`      
-// └──────────┘├───────────┤ ├───────────┤ ├───────────┤                          . A vector of `Persisting Partition Batches`    
-//             │    ...    │ │    ...    │ │    ...    │                          . A vector of `Persisted Partition batches`     
-//             │           │ │           │ │           │                                                                          
-//             ├───────────┤ ├───────────┤ ├───────────┤                        1:1 map between `Snapshot`                        
-//             │ Snapshot  │ │Persisting │ │ Persisted │                        and `Persisting` Partition Batches                
-//             │ Partition │ │ Partition │ │ Partition │                                                                          
-//             │  Batch k  │ │  Batch k  │ │  Batch i  │                                                                          
-//             └───────────┘ └───────────┘ └───────────┘                                                                          
-
-// All sequencers aiisgned to this Ingester
-#[derive(Debug, Clone)]
-pub struct Sequencers {
-    // A map between a sequencer id to its corresponding Namespaces.
-    // A sequencer id is a `kafka_partittion`, a i32 defined in iox_catalog's Sequencer and 
-    // represents a shard of data of a Table of a Namesapce. Namespace is equivalent to 
-    // a customer db (aka an org's bucket). Depending on the comfiguration of sharding a table,
-    // either full data or set of rows of data of the table are included in a shard.
-    sequencers : BTreeMap<i32, Vec<Namespace>>,
+// Ingetser's setup: place to keep its Kafka Topic & Sequencer IDs
+struct IngesterProfile {
+    // kafka_topic: 
+    // sequencer_ids: 
 }
 
-// A Namespace and all of its tables of a sequencer
-#[derive(Debug, Clone)]
-pub struct Namespace {
-    // Name of the namespace which is unique and represents a customer db.
-    name: String,
+/// Ingester Data: a Mapp of Shard ID to its Data
+struct Sequencers {
+    // This map gets set up on initialization of the ingester so it won't ever be modified.
+    // The content of each SequenceData will get changed when more namespaces and tables 
+    // get ingested.
+    data: BTreeMap<i32, Arc<SequencerData>>,
+  }
 
-    // Tables of this namesapce
-    tables : Vec<Table>,
+impl Sequencers {
+    /// One time initilize Sequencers of this Ingester
+    pub fn initialize() -> Self {
+    }
 }
+  
+  /// Data of a Shard
+  struct SequencerData {
+    // New namespaces can come in at any time so we need to be able to add new ones
+    namespaces: RwLock<BTreeMap<i32, Arc<NamespaceData>>>,
+  }
 
-// A Table and all of its partittion
-#[derive(Debug, Clone)]
-pub struct Table {
-    // table name
-    name: String,
+  impl SequencerData {
+      pub fn new(seq_id: i32) -> Self {
 
-    // A map of partittion_key to its corresponding partition
-    partitions : BTreeMap<String, Partition>,
-}
+      }
+  }
+  
+  /// Data of a Namespace that belongs to a given Shard
+  struct NamespaceData {
+    tables: RwLock<BTreeMap<i64, Arc<TableData>>>,
+  }
+  
 
-// A Partittion and all of its in-memory data batches
-//
-// Stages of a batch of a partition:
-//  . A partition has only one `Writing Batch`. When is it big or 
-//    old enough, defined by IngesterPersistenceSettings, it will
-//    be put to `Snaphot Batch` and also copied to `Pesisting Batch`.
-//    The new and empty Wrtiting Batch will be created for accpeting new writes
-//  . Snapshot and Persisting batches are 1:1 mapped at all times. Snapshot ones are 
-//    immutable and used for querying. Persisting ones are modified to sort, 
-//    dedupilcate, and apply tombstone and then persited to parquet files. 
-//    While many batches can be persisted at the same time, a batch is only marked 
-//    in the catalog to be persisted after the batches before 
-//    its in the queue are marked persisted.
-//  . After the batch are marked persisted in the catalog, its will be removed 
-//    from Sanpshot and Persisting and put in Persisted. The Persisted ones 
-//    will get evicted based on IngesterPersistenceSettings.
-//                       ┌───────────────────┐                      
-//                       │    Persisting     │                      
-//                       │                   │                      
-//                       │ ┌───────────────┐ │                      
-// ┌────────────┐        │ │   Snapshot    │ │        ┌────────────┐
-// │  Writing   │───────▶│ └───────────────┘ │───────▶│ Persisted  │
-// └────────────┘        │ ┌───────────────┐ │        └────────────┘
-//                       │ │   Persiting   │ │                      
-//                       │ └───────────────┘ │                      
-//                       └───────────────────┘                      
-// 
-#[derive(Debug, Clone)]
-pub struct Partition {
+  /// Data of a Table in a given Namesapce that belongs to a given Shard
+  struct TableData {
+    partitions: RwLock<BTreeMap<i64, Arc<PartitionData>>>,
+  }
+
+  /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard
+  struct PartitionData {
+    /// Key of this partition
     partition_key: String,
+    /// Data
+    inner: RwLock<DataBuffer>,
+  }
+  
+/// Data of an IOx partition split into batches
+//                       ┌────────────────────────┐            ┌────────────────────────┐
+//                       │       Snapshots        │            │       Persisting       │
+//                       │                        │            │                        │
+//                       │    ┌───────────────┐   │            │   ┌───────────────┐    │
+//                       │   ┌┴──────────────┐│   │            │   │  Persisting   │    │
+//                       │  ┌┴──────────────┐├┴───┼────────────┼──▶│     Data      │    │
+//                       │  │   Snapshot    ├┘    │            │   └───────────────┘    │
+//                       │  └───────────────┘     │            │                        │
+// ┌────────────┐        │                        │            │         ...            │
+// │   Buffer   │───────▶│          ...           │            │                        │
+// └────────────┘        │                        │            │                        │
+//                       │   ┌───────────────┐    │            │    ┌───────────────┐   │
+//                       │  ┌┴──────────────┐│    │            │    │  Persisting   │   │
+//                       │ ┌┴──────────────┐├┴────┼────────────┼───▶│     Data      │   │
+//                       │ │   Snapshot    ├┘     │            │    └───────────────┘   │
+//                       │ └───────────────┘      │            │                        │
+//                       │                        │            │                        │
+//                       └────────────────────────┘            └────────────────────────┘
+  struct DataBuffer {
 
-    // Writing batch that accepts writes to this partition
-    writing_batch: PartitionBatch,
+    /// Buffer of ingesting data
+    buffer: Vec<DataBatch>,
 
-    // Queue of batches that are immutable and used for querying only.
-    // The batches are queue contiguously in thier data arrival time
-    snapshot_batches: Vec<PartitionBatch>,  // todo: is Vec good enough for hanlding queue?
+    /// Data in `buffer` will be moved to a `snapshot` when one of these happens:
+    ///  . A background persist is called
+    ///  . A read request from Querier
+    /// The `buffer` will be empty when this happens.
+    snapshots: Vec<Arc<DataBatch>>,
 
-    // Queue of persisting batches which is a one on one mapping with the snapshot_batches.
-    // Data of these batches will be modified to sort, dedupilcate, and apply tombstone and then 
-    // persited to parquet files. While many batches can be persisted at the same time, 
-    // a batch is only marked in the catalog to be persisted after the batches before 
-    // its in the queue are marked persisted
-    pesisting_batched: Vec<PartitionBatch>,
+    /// When a persist is called, data in `buffer` will be moved to a `snapshot`
+    /// and then all `snapshots` will be moved to a `persisting`.
+    /// Both `buffer` and 'snaphots` will be empty when this happens.
+    persisting: Vec<PersistingData>,
 
-    // Persisted batches that are not yet evicted from the in-memory.
-    // These are batches moved from persiting_batches after they are fully persisted and marked 
-    // so in the catalog
-    pesisted_batched: Vec<PartitionBatch>,
-
-}
-
-// A PartitionBatch of contiguous in arrival time of writes
-// todo & question: do we want to call this Chunk instead?
-#[derive(Debug, Clone)]
-pub struct PartitionBatch {
-    // To keep the PartitionBtach in order of their 
-    // arrived data, we may need this auto created batch id
-    batch_id: i32,
-
-    // Data of this partition batch
-    data: Arc<MutableBatch>,
-}
+    // Extra Notes:
+    //  . Multiple perssiting operations may be happenning concurrently but
+    //    their persisted info must be added into the Catalog in thier data
+    //    ingesting order.
+    //  . When a read request comes from a Querier, all data from `snaphots` 
+    //    and `persisting` must be sent to the Querier.
+    //  . After the `persiting` data is persisted and successfully added 
+    //    into the Catalog, it will be removed from this Data Buffer.
+    //    This data might be added into an extra cache to serve up to 
+    //    Queriers that may not have loaded the parquet files from object 
+    //    storage yet. But this will be decided after MVP.
+  }
+  
+  struct PersistingData {
+    batches: Vec<Arc<DataBatch>>
+  }
+  
+  struct DataBatch {
+    // a map of the unique column name to its data. Every column
+    // must have the same number of values.
+    column_data: BTreeMap<i64, ColumnData<DataType>>,
+  }
+  
+  struct ColumnData<T> {
+    // it might be better to have the raw values and null markers, 
+    // but this will probably be easier and faster to get going.
+    values: Option<T>
+  }
+  
\ No newline at end of file
diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
index 99c92b1c6f..261a622cf8 100644
--- a/ingester/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -2,4 +2,15 @@
 //! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit#
 //!
 
+#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] 
+#![warn( 
+    missing_copy_implementations, 
+    missing_debug_implementations, 
+    missing_docs, 
+    clippy::explicit_iter_loop, 
+    clippy::future_not_send, 
+    clippy::use_self, 
+    clippy::clone_on_ref_ptr 
+)] 
+
 pub mod data;

From e395ef7066b20922f55a44e90cf802a24228623b Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Tue, 18 Jan 2022 14:41:02 -0500
Subject: [PATCH 08/32] feat: add Partition scaffolding to iox_catalog

---
 iox_catalog/src/interface.rs | 92 ++++++++++++++++++++++++++++++++++++
 iox_catalog/src/mem.rs       | 50 +++++++++++++++++++-
 iox_catalog/src/postgres.rs  | 53 ++++++++++++++++++++-
 3 files changed, 191 insertions(+), 4 deletions(-)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index f693bd238e..ca5f1a0930 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -131,6 +131,21 @@ impl SequencerId {
     }
 }
 
+/// Unique ID for a `Sequencer`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct PartitionId(i64);
+
+#[allow(missing_docs)]
+impl PartitionId {
+    pub fn new(v: i64) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i64 {
+        self.0
+    }
+}
+
 /// Container that can return repos for each of the catalog data types.
 #[async_trait]
 pub trait RepoCollection {
@@ -146,6 +161,8 @@ pub trait RepoCollection {
     fn column(&self) -> Arc<dyn ColumnRepo + Sync + Send>;
     /// repo for sequencers
     fn sequencer(&self) -> Arc<dyn SequencerRepo + Sync + Send>;
+    /// repo for partitions
+    fn partition(&self) -> Arc<dyn PartitionRepo + Sync + Send>;
 }
 
 /// Functions for working with Kafka topics in the catalog.
@@ -219,6 +236,22 @@ pub trait SequencerRepo {
     async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result<Vec<Sequencer>>;
 }
 
+/// Functions for working with IOx partitions in the catalog. Note that these are how
+/// IOx splits up data within a database, which is differenet than Kafka partitions.
+#[async_trait]
+pub trait PartitionRepo {
+    /// create or get a partition record for the given partition key, sequencer and table
+    async fn create_or_get(
+        &self,
+        key: &str,
+        sequencer_id: SequencerId,
+        table_id: TableId,
+    ) -> Result<Partition>;
+
+    /// return partitions for a given sequencer
+    async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result<Vec<Partition>>;
+}
+
 /// Data object for a kafka topic
 #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct KafkaTopic {
@@ -545,6 +578,19 @@ pub struct Sequencer {
     pub min_unpersisted_sequence_number: i64,
 }
 
+/// Data object for a partition. The combination of sequencer, table and key are unique (i.e. only one record can exist for each combo)
+#[derive(Debug, Clone, PartialEq, sqlx::FromRow)]
+pub struct Partition {
+    /// the id of the partition
+    pub id: PartitionId,
+    /// the sequencer the data in the partition arrived from
+    pub sequencer_id: SequencerId,
+    /// the table the partition is under
+    pub table_id: TableId,
+    /// the string key of the partition
+    pub partition_key: String,
+}
+
 #[cfg(test)]
 pub(crate) mod test_helpers {
     use super::*;
@@ -561,6 +607,7 @@ pub(crate) mod test_helpers {
         test_table(&new_repo()).await;
         test_column(&new_repo()).await;
         test_sequencer(&new_repo()).await;
+        test_partition(&new_repo()).await;
     }
 
     async fn test_kafka_topic<T: RepoCollection + Send + Sync>(repo: &T) {
@@ -726,4 +773,49 @@ pub(crate) mod test_helpers {
 
         assert_eq!(created, listed);
     }
+
+    async fn test_partition<T: RepoCollection + Send + Sync>(repo: &T) {
+        let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap();
+        let pool = repo.query_pool().create_or_get("foo").await.unwrap();
+        let namespace = repo
+            .namespace()
+            .create("namespace_partition_test", "inf", kafka.id, pool.id)
+            .await
+            .unwrap();
+        let table = repo
+            .table()
+            .create_or_get("test_table", namespace.id)
+            .await
+            .unwrap();
+        let sequencer = repo.sequencer().create_or_get(&kafka, 1).await.unwrap();
+        let other_sequencer = repo.sequencer().create_or_get(&kafka, 2).await.unwrap();
+
+        let partition_repo = repo.partition();
+
+        let created = ["foo", "bar"]
+            .iter()
+            .map(|key| partition_repo.create_or_get(key, sequencer.id, table.id))
+            .collect::<FuturesOrdered<_>>()
+            .map(|v| {
+                let v = v.expect("failed to create partition");
+                (v.id, v)
+            })
+            .collect::<BTreeMap<_, _>>()
+            .await;
+        let _ = partition_repo
+            .create_or_get("asdf", other_sequencer.id, table.id)
+            .await
+            .unwrap();
+
+        // List them and assert they match
+        let listed = partition_repo
+            .list_by_sequencer(sequencer.id)
+            .await
+            .expect("failed to list partitions")
+            .into_iter()
+            .map(|v| (v.id, v))
+            .collect::<BTreeMap<_, _>>();
+
+        assert_eq!(created, listed);
+    }
 }
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 19c7226e74..1af293bbe5 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -3,8 +3,9 @@
 
 use crate::interface::{
     Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo,
-    Namespace, NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection,
-    Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo,
+    Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo, QueryPool,
+    QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo,
+    Table, TableId, TableRepo,
 };
 use async_trait::async_trait;
 use std::convert::TryFrom;
@@ -40,6 +41,7 @@ struct MemCollections {
     tables: Vec<Table>,
     columns: Vec<Column>,
     sequencers: Vec<Sequencer>,
+    partitions: Vec<Partition>,
 }
 
 impl RepoCollection for Arc<MemCatalog> {
@@ -66,6 +68,10 @@ impl RepoCollection for Arc<MemCatalog> {
     fn sequencer(&self) -> Arc<dyn SequencerRepo + Sync + Send> {
         Self::clone(self) as Arc<dyn SequencerRepo + Sync + Send>
     }
+
+    fn partition(&self) -> Arc<dyn PartitionRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn PartitionRepo + Sync + Send>
+    }
 }
 
 #[async_trait]
@@ -282,6 +288,46 @@ impl SequencerRepo for MemCatalog {
     }
 }
 
+#[async_trait]
+impl PartitionRepo for MemCatalog {
+    async fn create_or_get(
+        &self,
+        key: &str,
+        sequencer_id: SequencerId,
+        table_id: TableId,
+    ) -> Result<Partition> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+        let partition = match collections.partitions.iter().find(|p| {
+            p.partition_key == key && p.sequencer_id == sequencer_id && p.table_id == table_id
+        }) {
+            Some(p) => p,
+            None => {
+                let p = Partition {
+                    id: PartitionId::new(collections.partitions.len() as i64 + 1),
+                    sequencer_id,
+                    table_id,
+                    partition_key: key.to_string(),
+                };
+                collections.partitions.push(p);
+                collections.partitions.last().unwrap()
+            }
+        };
+
+        Ok(partition.clone())
+    }
+
+    async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result<Vec<Partition>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        let partitions: Vec<_> = collections
+            .partitions
+            .iter()
+            .filter(|p| p.sequencer_id == sequencer_id)
+            .cloned()
+            .collect();
+        Ok(partitions)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 2d3f3ae1e0..14dafc6a4e 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -2,8 +2,8 @@
 
 use crate::interface::{
     Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace,
-    NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result,
-    Sequencer, SequencerRepo, Table, TableId, TableRepo,
+    NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo,
+    RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo,
 };
 use async_trait::async_trait;
 use observability_deps::tracing::info;
@@ -83,6 +83,10 @@ impl RepoCollection for Arc<PostgresCatalog> {
     fn sequencer(&self) -> Arc<dyn SequencerRepo + Sync + Send> {
         Self::clone(self) as Arc<dyn SequencerRepo + Sync + Send>
     }
+
+    fn partition(&self) -> Arc<dyn PartitionRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn PartitionRepo + Sync + Send>
+    }
 }
 
 #[async_trait]
@@ -326,6 +330,47 @@ impl SequencerRepo for PostgresCatalog {
     }
 }
 
+#[async_trait]
+impl PartitionRepo for PostgresCatalog {
+    async fn create_or_get(
+        &self,
+        key: &str,
+        sequencer_id: SequencerId,
+        table_id: TableId,
+    ) -> Result<Partition> {
+        sqlx::query_as::<_, Partition>(
+            r#"
+        INSERT INTO partition
+            ( partition_key, sequencer_id, table_id )
+        VALUES
+            ( $1, $2, $3 )
+        ON CONFLICT ON CONSTRAINT partition_key_unique
+        DO UPDATE SET partition_key = partition.partition_key RETURNING *;
+        "#,
+        )
+        .bind(key) // $1
+        .bind(&sequencer_id) // $2
+        .bind(&table_id) // $3
+        .fetch_one(&self.pool)
+        .await
+        .map_err(|e| {
+            if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })
+    }
+
+    async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result<Vec<Partition>> {
+        sqlx::query_as::<_, Partition>(r#"SELECT * FROM partition WHERE sequencer_id = $1;"#)
+            .bind(&sequencer_id) // $1
+            .fetch_all(&self.pool)
+            .await
+            .map_err(|e| Error::SqlxError { source: e })
+    }
+}
+
 /// The error code returned by Postgres for a unique constraint violation.
 ///
 /// See <https://www.postgresql.org/docs/9.2/errcodes-appendix.html>
@@ -430,6 +475,10 @@ mod tests {
             .execute(pool)
             .await
             .unwrap();
+        sqlx::query("delete from partition;")
+            .execute(pool)
+            .await
+            .unwrap();
         sqlx::query("delete from table_name;")
             .execute(pool)
             .await

From b1510675ae8417294e5c9f64ed804ce58a5ea665 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Tue, 18 Jan 2022 14:49:31 -0500
Subject: [PATCH 09/32] refactor: add new type for Kafka Partition in Catalog

---
 iox_catalog/src/interface.rs | 37 +++++++++++++++++++++++++++++++-----
 iox_catalog/src/lib.rs       |  6 +++---
 iox_catalog/src/mem.rs       | 14 +++++++++-----
 iox_catalog/src/postgres.rs  | 13 +++++++++----
 4 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index ca5f1a0930..2b31c4c964 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -131,6 +131,21 @@ impl SequencerId {
     }
 }
 
+/// The kafka partition identifier. This is in the actual Kafka cluster.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct KafkaPartition(i32);
+
+#[allow(missing_docs)]
+impl KafkaPartition {
+    pub fn new(v: i32) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i32 {
+        self.0
+    }
+}
+
 /// Unique ID for a `Sequencer`
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
 #[sqlx(transparent)]
@@ -227,7 +242,11 @@ pub trait ColumnRepo {
 #[async_trait]
 pub trait SequencerRepo {
     /// create a sequencer record for the kafka topic and partition or return the existing record
-    async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result<Sequencer>;
+    async fn create_or_get(
+        &self,
+        topic: &KafkaTopic,
+        partition: KafkaPartition,
+    ) -> Result<Sequencer>;
 
     /// list all sequencers
     async fn list(&self) -> Result<Vec<Sequencer>>;
@@ -570,7 +589,7 @@ pub struct Sequencer {
     /// the topic the sequencer is reading from
     pub kafka_topic_id: KafkaTopicId,
     /// the kafka partition the sequencer is reading from
-    pub kafka_partition: i32,
+    pub kafka_partition: KafkaPartition,
     /// The minimum unpersisted sequence number. Because different tables
     /// can be persisted at different times, it is possible some data has been persisted
     /// with a higher sequence number than this. However, all data with a sequence number
@@ -753,7 +772,7 @@ pub(crate) mod test_helpers {
 
         // Create 10 sequencers
         let created = (1..=10)
-            .map(|partition| sequencer_repo.create_or_get(&kafka, partition))
+            .map(|partition| sequencer_repo.create_or_get(&kafka, KafkaPartition::new(partition)))
             .collect::<FuturesOrdered<_>>()
             .map(|v| {
                 let v = v.expect("failed to create sequencer");
@@ -787,8 +806,16 @@ pub(crate) mod test_helpers {
             .create_or_get("test_table", namespace.id)
             .await
             .unwrap();
-        let sequencer = repo.sequencer().create_or_get(&kafka, 1).await.unwrap();
-        let other_sequencer = repo.sequencer().create_or_get(&kafka, 2).await.unwrap();
+        let sequencer = repo
+            .sequencer()
+            .create_or_get(&kafka, KafkaPartition::new(1))
+            .await
+            .unwrap();
+        let other_sequencer = repo
+            .sequencer()
+            .create_or_get(&kafka, KafkaPartition::new(2))
+            .await
+            .unwrap();
 
         let partition_repo = repo.partition();
 
diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs
index 8698e8f482..a23de38af4 100644
--- a/iox_catalog/src/lib.rs
+++ b/iox_catalog/src/lib.rs
@@ -12,8 +12,8 @@
 )]
 
 use crate::interface::{
-    column_type_from_field, ColumnSchema, ColumnType, Error, KafkaTopic, NamespaceSchema,
-    QueryPool, RepoCollection, Result, Sequencer, SequencerId, TableId,
+    column_type_from_field, ColumnSchema, ColumnType, Error, KafkaPartition, KafkaTopic,
+    NamespaceSchema, QueryPool, RepoCollection, Result, Sequencer, SequencerId, TableId,
 };
 use futures::{stream::FuturesOrdered, StreamExt};
 use influxdb_line_protocol::ParsedLine;
@@ -185,7 +185,7 @@ pub async fn create_or_get_default_records<T: RepoCollection + Sync + Send>(
     let query_pool = query_repo.create_or_get(SHARED_QUERY_POOL).await?;
 
     let sequencers = (1..=kafka_partition_count)
-        .map(|partition| sequencer_repo.create_or_get(&kafka_topic, partition))
+        .map(|partition| sequencer_repo.create_or_get(&kafka_topic, KafkaPartition::new(partition)))
         .collect::<FuturesOrdered<_>>()
         .map(|v| {
             let v = v.expect("failed to create sequencer");
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 1af293bbe5..88b00f3e1b 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -2,10 +2,10 @@
 //! used for testing or for an IOx designed to run without catalog persistence.
 
 use crate::interface::{
-    Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo,
-    Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo, QueryPool,
-    QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo,
-    Table, TableId, TableRepo,
+    Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId,
+    KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo,
+    QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId,
+    SequencerRepo, Table, TableId, TableRepo,
 };
 use async_trait::async_trait;
 use std::convert::TryFrom;
@@ -247,7 +247,11 @@ impl ColumnRepo for MemCatalog {
 
 #[async_trait]
 impl SequencerRepo for MemCatalog {
-    async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result<Sequencer> {
+    async fn create_or_get(
+        &self,
+        topic: &KafkaTopic,
+        partition: KafkaPartition,
+    ) -> Result<Sequencer> {
         let mut collections = self.collections.lock().expect("mutex poisoned");
 
         let sequencer = match collections
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 14dafc6a4e..230ae3617a 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -1,9 +1,10 @@
 //! A Postgres backed implementation of the Catalog
 
 use crate::interface::{
-    Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace,
-    NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo,
-    RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo,
+    Column, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId,
+    KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool,
+    QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo,
+    Table, TableId, TableRepo,
 };
 use async_trait::async_trait;
 use observability_deps::tracing::info;
@@ -290,7 +291,11 @@ WHERE table_name.namespace_id = $1;
 
 #[async_trait]
 impl SequencerRepo for PostgresCatalog {
-    async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result<Sequencer> {
+    async fn create_or_get(
+        &self,
+        topic: &KafkaTopic,
+        partition: KafkaPartition,
+    ) -> Result<Sequencer> {
         sqlx::query_as::<_, Sequencer>(
             r#"
         INSERT INTO sequencer

From 125285ae9ad347455d7e2a1b5b6d4d494d1ac06e Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 16:11:25 -0500
Subject: [PATCH 10/32] feat: commit in order to pull and merge new commit from
 main

---
 Cargo.lock             |  2 ++
 ingester/Cargo.toml    |  3 ++-
 ingester/src/data.rs   | 45 ++++++++++++++++++++++++++++++++++--------
 ingester/src/lib.rs    |  1 +
 ingester/src/server.rs | 22 +++++++++++++++++++++
 5 files changed, 64 insertions(+), 9 deletions(-)
 create mode 100644 ingester/src/server.rs

diff --git a/Cargo.lock b/Cargo.lock
index ceb7577155..b61d595d2c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1858,7 +1858,9 @@ name = "ingester"
 version = "0.1.0"
 dependencies = [
  "arrow",
+ "iox_catalog",
  "parking_lot",
+ "snafu",
 ]
 
 [[package]]
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 0683821a04..d3633a82e1 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -6,5 +6,6 @@ edition = "2021"
 
 [dependencies]
 arrow = { version = "7.0", features = ["prettyprint"] }
-# mutable_batch = { path = "../mutable_batch" }
+snafu = "0.7"
+iox_catalog = { path = "../iox_catalog" }
 parking_lot = "0.11.2"
\ No newline at end of file
diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 9f3e31ba26..97bdec2ccf 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -3,33 +3,62 @@
 
 use std::{sync::Arc, collections::BTreeMap};
 
+use crate::server::IngesterServer;
+use snafu::{OptionExt, Snafu};
+use iox_catalog::interface::{KafkaTopicId, SequencerId, RepoCollection, KafkaTopic, NamespaceId};
 use parking_lot::RwLock;
 use arrow::datatypes::DataType;
 
-// Ingetser's setup: place to keep its Kafka Topic & Sequencer IDs
-struct IngesterProfile {
-    // kafka_topic: 
-    // sequencer_ids: 
+#[derive(Debug, Snafu)]
+//#[allow(missing_copy_implementations, missing_docs)]
+pub enum Error {
+    #[snafu(display("Topic {} not found", name))]
+    TopicNotFound { name: String },
 }
 
+/// A specialized `Error` for Ingester Data errors
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+
 /// Ingester Data: a Mapp of Shard ID to its Data
 struct Sequencers {
     // This map gets set up on initialization of the ingester so it won't ever be modified.
     // The content of each SequenceData will get changed when more namespaces and tables 
     // get ingested.
-    data: BTreeMap<i32, Arc<SequencerData>>,
+    data: BTreeMap<SequencerId, Arc<SequencerData>>,
   }
 
 impl Sequencers {
-    /// One time initilize Sequencers of this Ingester
-    pub fn initialize() -> Self {
+    /// One time initialize Sequencers of this Ingester
+    pub async fn initialize(ingester: &IngesterServer) -> Result<Self> {
+        // Get kafka topic
+        let kafka_topic_repro = ingester.iox_catalog.kafka_topic();
+        let topic = kafka_topic_repro.create_or_get(ingester.kafka_topic_name).await?;
+
+        // Get all namespaces of this topic
+        let namespace_repo = ingester.iox_catalog.namespace();
+        let x = namespace_repro.
+
+
+        // Get Sequencers
+        let sequencer_repro = ingester.iox_catalog.sequencer();
+        let sequencers = BTreeMap::default();
+        for shard in ingester.kafka_partitions {
+            let sequencer = sequencer_repro.create_or_get(&topic, shard).await?;
+
+            sequencers.insert(sequencer.id, )
+        }
+
+        Ok(Self {
+            data: BTreeMap::default(),
+        })
     }
 }
   
   /// Data of a Shard
   struct SequencerData {
     // New namespaces can come in at any time so we need to be able to add new ones
-    namespaces: RwLock<BTreeMap<i32, Arc<NamespaceData>>>,
+    namespaces: RwLock<BTreeMap<NamespaceId, Arc<NamespaceData>>>,
   }
 
   impl SequencerData {
diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
index 261a622cf8..1dc60b5a46 100644
--- a/ingester/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -14,3 +14,4 @@
 )] 
 
 pub mod data;
+pub mod server;
diff --git a/ingester/src/server.rs b/ingester/src/server.rs
new file mode 100644
index 0000000000..8599b711b2
--- /dev/null
+++ b/ingester/src/server.rs
@@ -0,0 +1,22 @@
+use std::sync::Arc;
+
+use iox_catalog::mem::MemCatalog;
+
+/// The [`IngesterServer`] manages the lifecycle and contains all state for 
+/// an `ingester` server instance.
+#[derive(Debug)]
+struct IngesterServer<'a> {
+    pub kafka_topic_name: String,
+    pub kafka_partitions: Vec<i32>, // todo: use KafkaPartitionId when available
+    pub iox_catalog: &'a Arc<MemCatalog>
+}
+
+impl<'a> IngesterServer<'a>{
+    pub fn new(topic_name: String, shard_ids: Vec<i32>, catalog: &'a Arc<MemCatalog>) -> Self {
+        Self {
+            kafka_topic_name: topic_name,
+            kafka_partitions: shard_ids,
+            iox_catalog: catalog,
+        }
+    }
+}
\ No newline at end of file

From 8067316c334cfa59e82967c7e8daff5882e38f20 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Tue, 18 Jan 2022 17:33:05 -0500
Subject: [PATCH 11/32] fix: typo in partitionid description

---
 iox_catalog/src/interface.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 2b31c4c964..3012cb0339 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -146,7 +146,7 @@ impl KafkaPartition {
     }
 }
 
-/// Unique ID for a `Sequencer`
+/// Unique ID for a `Partition`
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
 #[sqlx(transparent)]
 pub struct PartitionId(i64);

From b20d1757d0c6219f403910e020a15352479161b6 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 17:43:03 -0500
Subject: [PATCH 12/32] feat: initialize ingester data

---
 ingester/src/data.rs   | 142 +++++++++++++++++++++--------------------
 ingester/src/lib.rs    |  20 +++---
 ingester/src/server.rs |  25 +++++---
 3 files changed, 101 insertions(+), 86 deletions(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 97bdec2ccf..0dc4bbb609 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -1,91 +1,100 @@
 //! Data for the lifecycle of the Ingeter
-//! 
+//!
 
-use std::{sync::Arc, collections::BTreeMap};
+use std::{collections::BTreeMap, sync::Arc};
 
 use crate::server::IngesterServer;
-use snafu::{OptionExt, Snafu};
-use iox_catalog::interface::{KafkaTopicId, SequencerId, RepoCollection, KafkaTopic, NamespaceId};
-use parking_lot::RwLock;
 use arrow::datatypes::DataType;
+use iox_catalog::interface::{NamespaceId, SequencerId, RepoCollection, KafkaPartition};
+use parking_lot::RwLock;
+use snafu::{Snafu, ResultExt};
 
 #[derive(Debug, Snafu)]
-//#[allow(missing_copy_implementations, missing_docs)]
+#[allow(missing_copy_implementations, missing_docs)]
 pub enum Error {
     #[snafu(display("Topic {} not found", name))]
-    TopicNotFound { name: String },
+    TopicNotFound { 
+        source: iox_catalog::interface::Error,
+        name: String},
+
+    #[snafu(display("Sequencer id {} not found", id.get()))]
+    SequencerNotFound { 
+        source: iox_catalog::interface::Error,
+        id: KafkaPartition},
+    
 }
 
 /// A specialized `Error` for Ingester Data errors
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 
-
 /// Ingester Data: a Mapp of Shard ID to its Data
 struct Sequencers {
     // This map gets set up on initialization of the ingester so it won't ever be modified.
-    // The content of each SequenceData will get changed when more namespaces and tables 
+    // The content of each SequenceData will get changed when more namespaces and tables
     // get ingested.
     data: BTreeMap<SequencerId, Arc<SequencerData>>,
-  }
+}
 
 impl Sequencers {
     /// One time initialize Sequencers of this Ingester
-    pub async fn initialize(ingester: &IngesterServer) -> Result<Self> {
-        // Get kafka topic
+    pub async fn initialize(ingester: &IngesterServer<'_>) -> Result<Self> {
+        // Get kafka topic from the catalog
+        let topic_name = ingester.get_topic();
         let kafka_topic_repro = ingester.iox_catalog.kafka_topic();
-        let topic = kafka_topic_repro.create_or_get(ingester.kafka_topic_name).await?;
+        let topic = kafka_topic_repro
+            .create_or_get(topic_name.as_str())
+            .await
+            .context(TopicNotFoundSnafu{name: topic_name})?;
 
-        // Get all namespaces of this topic
-        let namespace_repo = ingester.iox_catalog.namespace();
-        let x = namespace_repro.
-
-
-        // Get Sequencers
+        // Get sequencer ids from the catalog
         let sequencer_repro = ingester.iox_catalog.sequencer();
-        let sequencers = BTreeMap::default();
-        for shard in ingester.kafka_partitions {
-            let sequencer = sequencer_repro.create_or_get(&topic, shard).await?;
-
-            sequencers.insert(sequencer.id, )
+        let mut sequencers = BTreeMap::default();
+        for shard in ingester.get_kafka_partitions() {
+            let sequencer = sequencer_repro
+                .create_or_get(&topic, shard)
+                .await
+                .context(SequencerNotFoundSnafu{id: shard})?;
+            // Create empty buffer for each sequencer
+            sequencers.insert(sequencer.id, Arc::new(SequencerData::new()));
         }
 
-        Ok(Self {
-            data: BTreeMap::default(),
-        })
+        Ok(Self { data: sequencers })
     }
 }
-  
-  /// Data of a Shard
-  struct SequencerData {
+
+/// Data of a Shard
+struct SequencerData {
     // New namespaces can come in at any time so we need to be able to add new ones
     namespaces: RwLock<BTreeMap<NamespaceId, Arc<NamespaceData>>>,
-  }
+}
 
-  impl SequencerData {
-      pub fn new(seq_id: i32) -> Self {
+impl SequencerData {
+    /// Create an empty SequenceData
+    pub fn new() -> Self {
+        Self {
+            namespaces: RwLock::new(BTreeMap::default()),
+        }
+    }
+}
 
-      }
-  }
-  
-  /// Data of a Namespace that belongs to a given Shard
-  struct NamespaceData {
+/// Data of a Namespace that belongs to a given Shard
+struct NamespaceData {
     tables: RwLock<BTreeMap<i64, Arc<TableData>>>,
-  }
-  
+}
 
-  /// Data of a Table in a given Namesapce that belongs to a given Shard
-  struct TableData {
+/// Data of a Table in a given Namesapce that belongs to a given Shard
+struct TableData {
     partitions: RwLock<BTreeMap<i64, Arc<PartitionData>>>,
-  }
+}
 
-  /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard
-  struct PartitionData {
+/// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard
+struct PartitionData {
     /// Key of this partition
     partition_key: String,
     /// Data
     inner: RwLock<DataBuffer>,
-  }
-  
+}
+
 /// Data of an IOx partition split into batches
 //                       ┌────────────────────────┐            ┌────────────────────────┐
 //                       │       Snapshots        │            │       Persisting       │
@@ -105,8 +114,7 @@ impl Sequencers {
 //                       │ └───────────────┘      │            │                        │
 //                       │                        │            │                        │
 //                       └────────────────────────┘            └────────────────────────┘
-  struct DataBuffer {
-
+struct DataBuffer {
     /// Buffer of ingesting data
     buffer: Vec<DataBatch>,
 
@@ -120,33 +128,31 @@ impl Sequencers {
     /// and then all `snapshots` will be moved to a `persisting`.
     /// Both `buffer` and 'snaphots` will be empty when this happens.
     persisting: Vec<PersistingData>,
-
     // Extra Notes:
     //  . Multiple perssiting operations may be happenning concurrently but
     //    their persisted info must be added into the Catalog in thier data
     //    ingesting order.
-    //  . When a read request comes from a Querier, all data from `snaphots` 
+    //  . When a read request comes from a Querier, all data from `snaphots`
     //    and `persisting` must be sent to the Querier.
-    //  . After the `persiting` data is persisted and successfully added 
+    //  . After the `persiting` data is persisted and successfully added
     //    into the Catalog, it will be removed from this Data Buffer.
-    //    This data might be added into an extra cache to serve up to 
-    //    Queriers that may not have loaded the parquet files from object 
+    //    This data might be added into an extra cache to serve up to
+    //    Queriers that may not have loaded the parquet files from object
     //    storage yet. But this will be decided after MVP.
-  }
-  
-  struct PersistingData {
-    batches: Vec<Arc<DataBatch>>
-  }
-  
-  struct DataBatch {
+}
+
+struct PersistingData {
+    batches: Vec<Arc<DataBatch>>,
+}
+
+struct DataBatch {
     // a map of the unique column name to its data. Every column
     // must have the same number of values.
     column_data: BTreeMap<i64, ColumnData<DataType>>,
-  }
-  
-  struct ColumnData<T> {
-    // it might be better to have the raw values and null markers, 
+}
+
+struct ColumnData<T> {
+    // it might be better to have the raw values and null markers,
     // but this will probably be easier and faster to get going.
-    values: Option<T>
-  }
-  
\ No newline at end of file
+    values: Option<T>,
+}
diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
index 1dc60b5a46..05266c1d77 100644
--- a/ingester/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -2,16 +2,16 @@
 //! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit#
 //!
 
-#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] 
-#![warn( 
-    missing_copy_implementations, 
-    missing_debug_implementations, 
-    missing_docs, 
-    clippy::explicit_iter_loop, 
-    clippy::future_not_send, 
-    clippy::use_self, 
-    clippy::clone_on_ref_ptr 
-)] 
+#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
+#![warn(
+    missing_copy_implementations,
+    missing_debug_implementations,
+    missing_docs,
+    clippy::explicit_iter_loop,
+    clippy::future_not_send,
+    clippy::use_self,
+    clippy::clone_on_ref_ptr
+)]
 
 pub mod data;
 pub mod server;
diff --git a/ingester/src/server.rs b/ingester/src/server.rs
index 8599b711b2..1e4cb21a49 100644
--- a/ingester/src/server.rs
+++ b/ingester/src/server.rs
@@ -1,22 +1,31 @@
 use std::sync::Arc;
 
-use iox_catalog::mem::MemCatalog;
+use iox_catalog::{mem::MemCatalog, interface::KafkaPartition};
 
-/// The [`IngesterServer`] manages the lifecycle and contains all state for 
+/// The [`IngesterServer`] manages the lifecycle and contains all state for
 /// an `ingester` server instance.
 #[derive(Debug)]
-struct IngesterServer<'a> {
+pub struct IngesterServer<'a> {
     pub kafka_topic_name: String,
-    pub kafka_partitions: Vec<i32>, // todo: use KafkaPartitionId when available
-    pub iox_catalog: &'a Arc<MemCatalog>
+    pub kafka_partitions: Vec<KafkaPartition>, // todo: use KafkaPartitionId when available
+    pub iox_catalog: &'a Arc<MemCatalog>,
 }
 
-impl<'a> IngesterServer<'a>{
-    pub fn new(topic_name: String, shard_ids: Vec<i32>, catalog: &'a Arc<MemCatalog>) -> Self {
+impl<'a> IngesterServer<'a> {
+    pub fn new(topic_name: String, shard_ids: Vec<KafkaPartition>, catalog: &'a Arc<MemCatalog>) -> Self {
         Self {
             kafka_topic_name: topic_name,
             kafka_partitions: shard_ids,
             iox_catalog: catalog,
         }
     }
-}
\ No newline at end of file
+
+    pub fn get_topic(&self) -> String {
+        self.kafka_topic_name.clone()
+    }
+
+
+    pub fn get_kafka_partitions(&self) -> Vec<KafkaPartition> {
+        self.kafka_partitions.clone()
+    }
+}

From 667ec5bfc5a663362ee2d578132450e686060280 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 18:01:06 -0500
Subject: [PATCH 13/32] fix: the code is now compile without warnings

---
 ingester/src/lib.rs    |  4 ++++
 ingester/src/server.rs | 14 +++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
index 05266c1d77..9fd1048da9 100644
--- a/ingester/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -13,5 +13,9 @@
     clippy::clone_on_ref_ptr
 )]
 
+#[allow(
+    dead_code
+)]
+
 pub mod data;
 pub mod server;
diff --git a/ingester/src/server.rs b/ingester/src/server.rs
index 1e4cb21a49..86897b86a9 100644
--- a/ingester/src/server.rs
+++ b/ingester/src/server.rs
@@ -1,3 +1,6 @@
+//! Ingester Server
+//!
+
 use std::sync::Arc;
 
 use iox_catalog::{mem::MemCatalog, interface::KafkaPartition};
@@ -6,12 +9,16 @@ use iox_catalog::{mem::MemCatalog, interface::KafkaPartition};
 /// an `ingester` server instance.
 #[derive(Debug)]
 pub struct IngesterServer<'a> {
-    pub kafka_topic_name: String,
-    pub kafka_partitions: Vec<KafkaPartition>, // todo: use KafkaPartitionId when available
+    // Kafka Topic assigned to this ingester
+    kafka_topic_name: String,
+    // Kafka Partitions (Shards) assigned to this INgester
+    kafka_partitions: Vec<KafkaPartition>,
+    /// Catalog of this ingester 
     pub iox_catalog: &'a Arc<MemCatalog>,
 }
 
 impl<'a> IngesterServer<'a> {
+    /// Initialize the Ingester
     pub fn new(topic_name: String, shard_ids: Vec<KafkaPartition>, catalog: &'a Arc<MemCatalog>) -> Self {
         Self {
             kafka_topic_name: topic_name,
@@ -20,11 +27,12 @@ impl<'a> IngesterServer<'a> {
         }
     }
 
+    /// Return a kafka topic name
     pub fn get_topic(&self) -> String {
         self.kafka_topic_name.clone()
     }
 
-
+    /// Return Kafka Partitions
     pub fn get_kafka_partitions(&self) -> Vec<KafkaPartition> {
         self.kafka_partitions.clone()
     }

From 1c970a2064eaf5faf0080fc8e5a899aa50566b3f Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 18:01:47 -0500
Subject: [PATCH 14/32] fix: format

---
 ingester/src/data.rs   | 19 ++++++++++---------
 ingester/src/lib.rs    |  5 +----
 ingester/src/server.rs | 10 +++++++---
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 0dc4bbb609..746156503d 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -5,23 +5,24 @@ use std::{collections::BTreeMap, sync::Arc};
 
 use crate::server::IngesterServer;
 use arrow::datatypes::DataType;
-use iox_catalog::interface::{NamespaceId, SequencerId, RepoCollection, KafkaPartition};
+use iox_catalog::interface::{KafkaPartition, NamespaceId, RepoCollection, SequencerId};
 use parking_lot::RwLock;
-use snafu::{Snafu, ResultExt};
+use snafu::{ResultExt, Snafu};
 
 #[derive(Debug, Snafu)]
 #[allow(missing_copy_implementations, missing_docs)]
 pub enum Error {
     #[snafu(display("Topic {} not found", name))]
-    TopicNotFound { 
+    TopicNotFound {
         source: iox_catalog::interface::Error,
-        name: String},
+        name: String,
+    },
 
     #[snafu(display("Sequencer id {} not found", id.get()))]
-    SequencerNotFound { 
+    SequencerNotFound {
         source: iox_catalog::interface::Error,
-        id: KafkaPartition},
-    
+        id: KafkaPartition,
+    },
 }
 
 /// A specialized `Error` for Ingester Data errors
@@ -44,7 +45,7 @@ impl Sequencers {
         let topic = kafka_topic_repro
             .create_or_get(topic_name.as_str())
             .await
-            .context(TopicNotFoundSnafu{name: topic_name})?;
+            .context(TopicNotFoundSnafu { name: topic_name })?;
 
         // Get sequencer ids from the catalog
         let sequencer_repro = ingester.iox_catalog.sequencer();
@@ -53,7 +54,7 @@ impl Sequencers {
             let sequencer = sequencer_repro
                 .create_or_get(&topic, shard)
                 .await
-                .context(SequencerNotFoundSnafu{id: shard})?;
+                .context(SequencerNotFoundSnafu { id: shard })?;
             // Create empty buffer for each sequencer
             sequencers.insert(sequencer.id, Arc::new(SequencerData::new()));
         }
diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
index 9fd1048da9..309f26fe61 100644
--- a/ingester/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -13,9 +13,6 @@
     clippy::clone_on_ref_ptr
 )]
 
-#[allow(
-    dead_code
-)]
-
+#[allow(dead_code)]
 pub mod data;
 pub mod server;
diff --git a/ingester/src/server.rs b/ingester/src/server.rs
index 86897b86a9..18561e3be0 100644
--- a/ingester/src/server.rs
+++ b/ingester/src/server.rs
@@ -3,7 +3,7 @@
 
 use std::sync::Arc;
 
-use iox_catalog::{mem::MemCatalog, interface::KafkaPartition};
+use iox_catalog::{interface::KafkaPartition, mem::MemCatalog};
 
 /// The [`IngesterServer`] manages the lifecycle and contains all state for
 /// an `ingester` server instance.
@@ -13,13 +13,17 @@ pub struct IngesterServer<'a> {
     kafka_topic_name: String,
     // Kafka Partitions (Shards) assigned to this INgester
     kafka_partitions: Vec<KafkaPartition>,
-    /// Catalog of this ingester 
+    /// Catalog of this ingester
     pub iox_catalog: &'a Arc<MemCatalog>,
 }
 
 impl<'a> IngesterServer<'a> {
     /// Initialize the Ingester
-    pub fn new(topic_name: String, shard_ids: Vec<KafkaPartition>, catalog: &'a Arc<MemCatalog>) -> Self {
+    pub fn new(
+        topic_name: String,
+        shard_ids: Vec<KafkaPartition>,
+        catalog: &'a Arc<MemCatalog>,
+    ) -> Self {
         Self {
             kafka_topic_name: topic_name,
             kafka_partitions: shard_ids,

From 367a9fb812ec84f3985237b32c7ed1663a630b22 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 18:10:42 -0500
Subject: [PATCH 15/32] fix: add workspace-hack

---
 Cargo.lock          | 1 +
 ingester/Cargo.toml | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index b61d595d2c..ae11c9380a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1861,6 +1861,7 @@ dependencies = [
  "iox_catalog",
  "parking_lot",
  "snafu",
+ "workspace-hack",
 ]
 
 [[package]]
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index d3633a82e1..85b0497cdc 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -8,4 +8,5 @@ edition = "2021"
 arrow = { version = "7.0", features = ["prettyprint"] }
 snafu = "0.7"
 iox_catalog = { path = "../iox_catalog" }
-parking_lot = "0.11.2"
\ No newline at end of file
+parking_lot = "0.11.2"
+workspace-hack = { path = "../workspace-hack"}

From f36d66deb730164d1e30247a89d0ae0b4fd497b0 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Tue, 18 Jan 2022 18:17:21 -0500
Subject: [PATCH 16/32] feat: Add Tombstone to Catalog

* Adds TombstoneId and Tombstone to the iox_catalog with associated interfaces
* Adds SequenceNumber new type for use with Tombstone
* Adds Timestamp new type for use with Tombstone
* Adds constraint to the Postgres schema to enforce tombstone uniqueness by table_id, sequencer_id, and sequence_number
---
 .../20211229171744_initial_schema.sql         |   3 +-
 iox_catalog/src/interface.rs                  | 165 ++++++++++++++++++
 iox_catalog/src/mem.rs                        |  62 ++++++-
 iox_catalog/src/postgres.rs                   |  64 ++++++-
 4 files changed, 289 insertions(+), 5 deletions(-)

diff --git a/iox_catalog/migrations/20211229171744_initial_schema.sql b/iox_catalog/migrations/20211229171744_initial_schema.sql
index 16fe51b09f..1ce222b18f 100644
--- a/iox_catalog/migrations/20211229171744_initial_schema.sql
+++ b/iox_catalog/migrations/20211229171744_initial_schema.sql
@@ -104,7 +104,8 @@ CREATE TABLE IF NOT EXISTS iox_catalog.tombstone
     min_time BIGINT NOT NULL,
     max_time BIGINT NOT NULL,
     serialized_predicate TEXT NOT NULL,
-    PRIMARY KEY (id)
+    PRIMARY KEY (id),
+    CONSTRAINT tombstone_unique UNIQUE (table_id, sequencer_id, sequence_number)
     );
 
 CREATE TABLE IF NOT EXISTS iox_catalog.processed_tombstone
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 3012cb0339..eb319b8be5 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -161,6 +161,51 @@ impl PartitionId {
     }
 }
 
+/// Unique ID for a `Tombstone`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct TombstoneId(i64);
+
+#[allow(missing_docs)]
+impl TombstoneId {
+    pub fn new(v: i64) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i64 {
+        self.0
+    }
+}
+
+/// A sequence number from a `Sequencer` (kafka partition)
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct SequenceNumber(i64);
+
+#[allow(missing_docs)]
+impl SequenceNumber {
+    pub fn new(v: i64) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i64 {
+        self.0
+    }
+}
+
+/// A time in nanoseconds from epoch
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct Timestamp(i64);
+
+#[allow(missing_docs)]
+impl Timestamp {
+    pub fn new(v: i64) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i64 {
+        self.0
+    }
+}
+
 /// Container that can return repos for each of the catalog data types.
 #[async_trait]
 pub trait RepoCollection {
@@ -178,6 +223,8 @@ pub trait RepoCollection {
     fn sequencer(&self) -> Arc<dyn SequencerRepo + Sync + Send>;
     /// repo for partitions
     fn partition(&self) -> Arc<dyn PartitionRepo + Sync + Send>;
+    /// repo for tombstones
+    fn tombstone(&self) -> Arc<dyn TombstoneRepo + Sync + Send>;
 }
 
 /// Functions for working with Kafka topics in the catalog.
@@ -271,6 +318,30 @@ pub trait PartitionRepo {
     async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result<Vec<Partition>>;
 }
 
+/// Functions for working with tombstones in the catalog
+#[async_trait]
+pub trait TombstoneRepo {
+    /// create or get a tombstone
+    async fn create_or_get(
+        &self,
+        table_id: TableId,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+        predicate: &str,
+    ) -> Result<Tombstone>;
+
+    /// return all tombstones for the sequencer with a sequence number greater than that
+    /// passed in. This will be used by the ingester on startup to see what tombstones
+    /// might have to be applied to data that is read from the write buffer.
+    async fn list_tombstones_by_sequencer_greater_than(
+        &self,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<Tombstone>>;
+}
+
 /// Data object for a kafka topic
 #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct KafkaTopic {
@@ -610,6 +681,25 @@ pub struct Partition {
     pub partition_key: String,
 }
 
+/// Data object for a tombstone.
+#[derive(Debug, Clone, PartialEq, sqlx::FromRow)]
+pub struct Tombstone {
+    /// the id of the tombstone
+    pub id: TombstoneId,
+    /// the table the tombstone is associated with
+    pub table_id: TableId,
+    /// the sequencer the tombstone was sent through
+    pub sequencer_id: SequencerId,
+    /// the sequence nubmer assigned to the tombstone from the sequencer
+    pub sequence_number: SequenceNumber,
+    /// the min time (inclusive) that the delete applies to
+    pub min_time: Timestamp,
+    /// the max time (exclusive) that the delete applies to
+    pub max_time: Timestamp,
+    /// the full delete predicate
+    pub serialized_predicate: String,
+}
+
 #[cfg(test)]
 pub(crate) mod test_helpers {
     use super::*;
@@ -627,6 +717,7 @@ pub(crate) mod test_helpers {
         test_column(&new_repo()).await;
         test_sequencer(&new_repo()).await;
         test_partition(&new_repo()).await;
+        test_tombstone(&new_repo()).await;
     }
 
     async fn test_kafka_topic<T: RepoCollection + Send + Sync>(repo: &T) {
@@ -845,4 +936,78 @@ pub(crate) mod test_helpers {
 
         assert_eq!(created, listed);
     }
+
+    async fn test_tombstone<T: RepoCollection + Send + Sync>(repo: &T) {
+        let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap();
+        let pool = repo.query_pool().create_or_get("foo").await.unwrap();
+        let namespace = repo
+            .namespace()
+            .create("namespace_tombstone_test", "inf", kafka.id, pool.id)
+            .await
+            .unwrap();
+        let table = repo
+            .table()
+            .create_or_get("test_table", namespace.id)
+            .await
+            .unwrap();
+        let other_table = repo
+            .table()
+            .create_or_get("other", namespace.id)
+            .await
+            .unwrap();
+        let sequencer = repo
+            .sequencer()
+            .create_or_get(&kafka, KafkaPartition::new(1))
+            .await
+            .unwrap();
+
+        let tombstone_repo = repo.tombstone();
+        let min_time = Timestamp::new(1);
+        let max_time = Timestamp::new(10);
+        let t1 = tombstone_repo
+            .create_or_get(
+                table.id,
+                sequencer.id,
+                SequenceNumber::new(1),
+                min_time,
+                max_time,
+                "whatevs",
+            )
+            .await
+            .unwrap();
+        assert!(t1.id > TombstoneId::new(0));
+        assert_eq!(t1.sequencer_id, sequencer.id);
+        assert_eq!(t1.sequence_number, SequenceNumber::new(1));
+        assert_eq!(t1.min_time, min_time);
+        assert_eq!(t1.max_time, max_time);
+        assert_eq!(t1.serialized_predicate, "whatevs");
+        let t2 = tombstone_repo
+            .create_or_get(
+                other_table.id,
+                sequencer.id,
+                SequenceNumber::new(2),
+                min_time,
+                max_time,
+                "bleh",
+            )
+            .await
+            .unwrap();
+        let t3 = tombstone_repo
+            .create_or_get(
+                table.id,
+                sequencer.id,
+                SequenceNumber::new(3),
+                min_time,
+                max_time,
+                "sdf",
+            )
+            .await
+            .unwrap();
+
+        let listed = tombstone_repo
+            .list_tombstones_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1))
+            .await
+            .unwrap();
+        assert_eq!(vec![t2, t3], listed);
+    }
 }
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 88b00f3e1b..999d7e7175 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -4,8 +4,9 @@
 use crate::interface::{
     Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId,
     KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo,
-    QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId,
-    SequencerRepo, Table, TableId, TableRepo,
+    QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer,
+    SequencerId, SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneId,
+    TombstoneRepo,
 };
 use async_trait::async_trait;
 use std::convert::TryFrom;
@@ -42,6 +43,7 @@ struct MemCollections {
     columns: Vec<Column>,
     sequencers: Vec<Sequencer>,
     partitions: Vec<Partition>,
+    tombstones: Vec<Tombstone>,
 }
 
 impl RepoCollection for Arc<MemCatalog> {
@@ -72,6 +74,10 @@ impl RepoCollection for Arc<MemCatalog> {
     fn partition(&self) -> Arc<dyn PartitionRepo + Sync + Send> {
         Self::clone(self) as Arc<dyn PartitionRepo + Sync + Send>
     }
+
+    fn tombstone(&self) -> Arc<dyn TombstoneRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn TombstoneRepo + Sync + Send>
+    }
 }
 
 #[async_trait]
@@ -332,6 +338,58 @@ impl PartitionRepo for MemCatalog {
     }
 }
 
+#[async_trait]
+impl TombstoneRepo for MemCatalog {
+    async fn create_or_get(
+        &self,
+        table_id: TableId,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+        predicate: &str,
+    ) -> Result<Tombstone> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+        let tombstone = match collections.tombstones.iter().find(|t| {
+            t.table_id == table_id
+                && t.sequencer_id == sequencer_id
+                && t.sequence_number == sequence_number
+        }) {
+            Some(t) => t,
+            None => {
+                let t = Tombstone {
+                    id: TombstoneId::new(collections.tombstones.len() as i64 + 1),
+                    table_id,
+                    sequencer_id,
+                    sequence_number,
+                    min_time,
+                    max_time,
+                    serialized_predicate: predicate.to_string(),
+                };
+                collections.tombstones.push(t);
+                collections.tombstones.last().unwrap()
+            }
+        };
+
+        Ok(tombstone.clone())
+    }
+
+    async fn list_tombstones_by_sequencer_greater_than(
+        &self,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<Tombstone>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        let tombstones: Vec<_> = collections
+            .tombstones
+            .iter()
+            .filter(|t| t.sequencer_id == sequencer_id && t.sequence_number > sequence_number)
+            .cloned()
+            .collect();
+        Ok(tombstones)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 230ae3617a..08c8fc43cf 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -3,8 +3,8 @@
 use crate::interface::{
     Column, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId,
     KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool,
-    QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo,
-    Table, TableId, TableRepo,
+    QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer, SequencerId,
+    SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneRepo,
 };
 use async_trait::async_trait;
 use observability_deps::tracing::info;
@@ -88,6 +88,10 @@ impl RepoCollection for Arc<PostgresCatalog> {
     fn partition(&self) -> Arc<dyn PartitionRepo + Sync + Send> {
         Self::clone(self) as Arc<dyn PartitionRepo + Sync + Send>
     }
+
+    fn tombstone(&self) -> Arc<dyn TombstoneRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn TombstoneRepo + Sync + Send>
+    }
 }
 
 #[async_trait]
@@ -376,6 +380,58 @@ impl PartitionRepo for PostgresCatalog {
     }
 }
 
+#[async_trait]
+impl TombstoneRepo for PostgresCatalog {
+    async fn create_or_get(
+        &self,
+        table_id: TableId,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+        predicate: &str,
+    ) -> Result<Tombstone> {
+        sqlx::query_as::<_, Tombstone>(
+            r#"
+        INSERT INTO tombstone
+            ( table_id, sequencer_id, sequence_number, min_time, max_time, serialized_predicate )
+        VALUES
+            ( $1, $2, $3, $4, $5, $6 )
+        ON CONFLICT ON CONSTRAINT tombstone_unique
+        DO UPDATE SET table_id = tombstone.table_id RETURNING *;
+        "#,
+        )
+        .bind(&table_id) // $1
+        .bind(&sequencer_id) // $2
+        .bind(&sequence_number) // $3
+        .bind(&min_time) // $4
+        .bind(&max_time) // $5
+        .bind(predicate) // $6
+        .fetch_one(&self.pool)
+        .await
+        .map_err(|e| {
+            if is_fk_violation(&e) {
+                Error::ForeignKeyViolation { source: e }
+            } else {
+                Error::SqlxError { source: e }
+            }
+        })
+    }
+
+    async fn list_tombstones_by_sequencer_greater_than(
+        &self,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<Tombstone>> {
+        sqlx::query_as::<_, Tombstone>(r#"SELECT * FROM tombstone WHERE sequencer_id = $1 AND sequence_number > $2 ORDER BY id;"#)
+            .bind(&sequencer_id) // $1
+            .bind(&sequence_number) // $2
+            .fetch_all(&self.pool)
+            .await
+            .map_err(|e| Error::SqlxError { source: e })
+    }
+}
+
 /// The error code returned by Postgres for a unique constraint violation.
 ///
 /// See <https://www.postgresql.org/docs/9.2/errcodes-appendix.html>
@@ -476,6 +532,10 @@ mod tests {
     }
 
     async fn clear_schema(pool: &Pool<Postgres>) {
+        sqlx::query("delete from tombstone;")
+            .execute(pool)
+            .await
+            .unwrap();
         sqlx::query("delete from column_name;")
             .execute(pool)
             .await

From b57f027e3514e56fe7d4588975274ce956fdc751 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 20:57:13 -0500
Subject: [PATCH 17/32] refactor: address review comments

---
 Cargo.lock                   |  1 +
 ingester/Cargo.toml          |  3 +-
 ingester/src/data.rs         | 63 +++++++++++++++---------------------
 ingester/src/lib.rs          |  2 +-
 iox_catalog/src/interface.rs |  2 +-
 5 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ae11c9380a..7c2b3c7018 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1859,6 +1859,7 @@ version = "0.1.0"
 dependencies = [
  "arrow",
  "iox_catalog",
+ "mutable_batch",
  "parking_lot",
  "snafu",
  "workspace-hack",
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 85b0497cdc..8d8017e904 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -6,7 +6,8 @@ edition = "2021"
 
 [dependencies]
 arrow = { version = "7.0", features = ["prettyprint"] }
-snafu = "0.7"
 iox_catalog = { path = "../iox_catalog" }
+mutable_batch = { path = "../mutable_batch"}
 parking_lot = "0.11.2"
+snafu = "0.7"
 workspace-hack = { path = "../workspace-hack"}
diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 746156503d..b353a3cd46 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -4,22 +4,24 @@
 use std::{collections::BTreeMap, sync::Arc};
 
 use crate::server::IngesterServer;
-use arrow::datatypes::DataType;
-use iox_catalog::interface::{KafkaPartition, NamespaceId, RepoCollection, SequencerId};
+use iox_catalog::interface::{
+    KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequencerId, TableId,
+};
+use mutable_batch::MutableBatch;
 use parking_lot::RwLock;
 use snafu::{ResultExt, Snafu};
 
 #[derive(Debug, Snafu)]
 #[allow(missing_copy_implementations, missing_docs)]
 pub enum Error {
-    #[snafu(display("Topic {} not found", name))]
-    TopicNotFound {
+    #[snafu(display("Error while reading Topic {}", name))]
+    ReadTopic {
         source: iox_catalog::interface::Error,
         name: String,
     },
 
-    #[snafu(display("Sequencer id {} not found", id.get()))]
-    SequencerNotFound {
+    #[snafu(display("Error while reading Kafka Partition id {}", id.get()))]
+    ReadSequencer {
         source: iox_catalog::interface::Error,
         id: KafkaPartition,
     },
@@ -29,6 +31,7 @@ pub enum Error {
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 
 /// Ingester Data: a Mapp of Shard ID to its Data
+#[derive(Default)]
 struct Sequencers {
     // This map gets set up on initialization of the ingester so it won't ever be modified.
     // The content of each SequenceData will get changed when more namespaces and tables
@@ -45,7 +48,7 @@ impl Sequencers {
         let topic = kafka_topic_repro
             .create_or_get(topic_name.as_str())
             .await
-            .context(TopicNotFoundSnafu { name: topic_name })?;
+            .context(ReadTopicSnafu { name: topic_name })?;
 
         // Get sequencer ids from the catalog
         let sequencer_repro = ingester.iox_catalog.sequencer();
@@ -54,9 +57,9 @@ impl Sequencers {
             let sequencer = sequencer_repro
                 .create_or_get(&topic, shard)
                 .await
-                .context(SequencerNotFoundSnafu { id: shard })?;
+                .context(ReadSequencerSnafu { id: shard })?;
             // Create empty buffer for each sequencer
-            sequencers.insert(sequencer.id, Arc::new(SequencerData::new()));
+            sequencers.insert(sequencer.id, Arc::new(SequencerData::default()));
         }
 
         Ok(Self { data: sequencers })
@@ -64,35 +67,29 @@ impl Sequencers {
 }
 
 /// Data of a Shard
+#[derive(Default)]
 struct SequencerData {
     // New namespaces can come in at any time so we need to be able to add new ones
     namespaces: RwLock<BTreeMap<NamespaceId, Arc<NamespaceData>>>,
 }
 
-impl SequencerData {
-    /// Create an empty SequenceData
-    pub fn new() -> Self {
-        Self {
-            namespaces: RwLock::new(BTreeMap::default()),
-        }
-    }
-}
-
 /// Data of a Namespace that belongs to a given Shard
+#[derive(Default)]
 struct NamespaceData {
-    tables: RwLock<BTreeMap<i64, Arc<TableData>>>,
+    tables: RwLock<BTreeMap<TableId, Arc<TableData>>>,
 }
 
 /// Data of a Table in a given Namesapce that belongs to a given Shard
+#[derive(Default)]
 struct TableData {
-    partitions: RwLock<BTreeMap<i64, Arc<PartitionData>>>,
+    // Map pf partition key to its data
+    partition_data: RwLock<BTreeMap<String, Arc<PartitionData>>>,
 }
 
 /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard
+#[derive(Default)]
 struct PartitionData {
-    /// Key of this partition
-    partition_key: String,
-    /// Data
+    id: PartitionId,
     inner: RwLock<DataBuffer>,
 }
 
@@ -115,6 +112,7 @@ struct PartitionData {
 //                       │ └───────────────┘      │            │                        │
 //                       │                        │            │                        │
 //                       └────────────────────────┘            └────────────────────────┘
+#[derive(Default)]
 struct DataBuffer {
     /// Buffer of ingesting data
     buffer: Vec<DataBatch>,
@@ -128,7 +126,7 @@ struct DataBuffer {
     /// When a persist is called, data in `buffer` will be moved to a `snapshot`
     /// and then all `snapshots` will be moved to a `persisting`.
     /// Both `buffer` and 'snaphots` will be empty when this happens.
-    persisting: Vec<PersistingData>,
+    persisting: Vec<DataBatch>,
     // Extra Notes:
     //  . Multiple perssiting operations may be happenning concurrently but
     //    their persisted info must be added into the Catalog in thier data
@@ -142,18 +140,9 @@ struct DataBuffer {
     //    storage yet. But this will be decided after MVP.
 }
 
-struct PersistingData {
-    batches: Vec<Arc<DataBatch>>,
-}
-
 struct DataBatch {
-    // a map of the unique column name to its data. Every column
-    // must have the same number of values.
-    column_data: BTreeMap<i64, ColumnData<DataType>>,
-}
-
-struct ColumnData<T> {
-    // it might be better to have the raw values and null markers,
-    // but this will probably be easier and faster to get going.
-    values: Option<T>,
+    /// Sequencer number of the ingesting data
+    pub sequencer_number: u64,
+    /// Ingesting data
+    pub inner: MutableBatch,
 }
diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
index 309f26fe61..fea1eaff23 100644
--- a/ingester/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -2,7 +2,7 @@
 //! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit#
 //!
 
-#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
+#![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)]
 #![warn(
     missing_copy_implementations,
     missing_debug_implementations,
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 2b31c4c964..ba8c6f15b9 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -147,7 +147,7 @@ impl KafkaPartition {
 }
 
 /// Unique ID for a `Sequencer`
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
 #[sqlx(transparent)]
 pub struct PartitionId(i64);
 

From b89c250ccca8b8e92288f87f41cad30ece1ecce6 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 21:39:22 -0500
Subject: [PATCH 18/32] refactor: use RepoColection instead of MemCatalog

---
 ingester/src/data.rs   |  4 +++-
 ingester/src/lib.rs    |  1 -
 ingester/src/server.rs | 21 +++++++++++----------
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index b353a3cd46..3e03a9189b 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -41,7 +41,9 @@ struct Sequencers {
 
 impl Sequencers {
     /// One time initialize Sequencers of this Ingester
-    pub async fn initialize(ingester: &IngesterServer<'_>) -> Result<Self> {
+    pub async fn initialize<T: RepoCollection + Send + Sync>(
+        ingester: &IngesterServer<'_, T>,
+    ) -> Result<Self> {
         // Get kafka topic from the catalog
         let topic_name = ingester.get_topic();
         let kafka_topic_repro = ingester.iox_catalog.kafka_topic();
diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs
index fea1eaff23..31bc719a49 100644
--- a/ingester/src/lib.rs
+++ b/ingester/src/lib.rs
@@ -5,7 +5,6 @@
 #![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)]
 #![warn(
     missing_copy_implementations,
-    missing_debug_implementations,
     missing_docs,
     clippy::explicit_iter_loop,
     clippy::future_not_send,
diff --git a/ingester/src/server.rs b/ingester/src/server.rs
index 18561e3be0..6019b77262 100644
--- a/ingester/src/server.rs
+++ b/ingester/src/server.rs
@@ -3,27 +3,28 @@
 
 use std::sync::Arc;
 
-use iox_catalog::{interface::KafkaPartition, mem::MemCatalog};
+use iox_catalog::interface::{KafkaPartition, RepoCollection};
 
 /// The [`IngesterServer`] manages the lifecycle and contains all state for
 /// an `ingester` server instance.
-#[derive(Debug)]
-pub struct IngesterServer<'a> {
+pub struct IngesterServer<'a, T>
+where
+    T: RepoCollection + Send + Sync,
+{
     // Kafka Topic assigned to this ingester
     kafka_topic_name: String,
     // Kafka Partitions (Shards) assigned to this INgester
     kafka_partitions: Vec<KafkaPartition>,
     /// Catalog of this ingester
-    pub iox_catalog: &'a Arc<MemCatalog>,
+    pub iox_catalog: &'a Arc<T>,
 }
 
-impl<'a> IngesterServer<'a> {
+impl<'a, T> IngesterServer<'a, T>
+where
+    T: RepoCollection + Send + Sync,
+{
     /// Initialize the Ingester
-    pub fn new(
-        topic_name: String,
-        shard_ids: Vec<KafkaPartition>,
-        catalog: &'a Arc<MemCatalog>,
-    ) -> Self {
+    pub fn new(topic_name: String, shard_ids: Vec<KafkaPartition>, catalog: &'a Arc<T>) -> Self {
         Self {
             kafka_topic_name: topic_name,
             kafka_partitions: shard_ids,

From fe9a41ee9acc8696611b5672b90cc54e12acba0f Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 21:45:20 -0500
Subject: [PATCH 19/32] chore: remove non-longer needed dependency

---
 ingester/Cargo.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 8d8017e904..1d4324b0c6 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -5,7 +5,6 @@ authors = ["Nga Tran <nga-tran@live.com>"]
 edition = "2021"
 
 [dependencies]
-arrow = { version = "7.0", features = ["prettyprint"] }
 iox_catalog = { path = "../iox_catalog" }
 mutable_batch = { path = "../mutable_batch"}
 parking_lot = "0.11.2"

From e8294d21ec4cfb1c0155274198ad8e7d62200e77 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Tue, 18 Jan 2022 22:02:27 -0500
Subject: [PATCH 20/32] fix: add .lock

---
 Cargo.lock | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7c2b3c7018..fb0c35c9cd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1857,7 +1857,6 @@ dependencies = [
 name = "ingester"
 version = "0.1.0"
 dependencies = [
- "arrow",
  "iox_catalog",
  "mutable_batch",
  "parking_lot",

From 8a17e1c132a8fc807ffae699f6531385edf59cb2 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Wed, 19 Jan 2022 11:20:20 -0500
Subject: [PATCH 21/32] refactor: address review comments

---
 Cargo.lock                   |  1 +
 ingester/Cargo.toml          |  1 +
 ingester/src/data.rs         | 66 ++++++++++++++++++++++--------------
 iox_catalog/src/interface.rs |  2 +-
 4 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fb0c35c9cd..7c2b3c7018 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1857,6 +1857,7 @@ dependencies = [
 name = "ingester"
 version = "0.1.0"
 dependencies = [
+ "arrow",
  "iox_catalog",
  "mutable_batch",
  "parking_lot",
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 1d4324b0c6..8d8017e904 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -5,6 +5,7 @@ authors = ["Nga Tran <nga-tran@live.com>"]
 edition = "2021"
 
 [dependencies]
+arrow = { version = "7.0", features = ["prettyprint"] }
 iox_catalog = { path = "../iox_catalog" }
 mutable_batch = { path = "../mutable_batch"}
 parking_lot = "0.11.2"
diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 3e03a9189b..8909032de9 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -1,6 +1,7 @@
 //! Data for the lifecycle of the Ingeter
 //!
 
+use arrow::record_batch::RecordBatch;
 use std::{collections::BTreeMap, sync::Arc};
 
 use crate::server::IngesterServer;
@@ -48,7 +49,7 @@ impl Sequencers {
         let topic_name = ingester.get_topic();
         let kafka_topic_repro = ingester.iox_catalog.kafka_topic();
         let topic = kafka_topic_repro
-            .create_or_get(topic_name.as_str())
+            .create_or_get(topic_name.as_str()) //todo: use `get` instead
             .await
             .context(ReadTopicSnafu { name: topic_name })?;
 
@@ -57,7 +58,7 @@ impl Sequencers {
         let mut sequencers = BTreeMap::default();
         for shard in ingester.get_kafka_partitions() {
             let sequencer = sequencer_repro
-                .create_or_get(&topic, shard)
+                .create_or_get(&topic, shard) //todo: use `get` instead
                 .await
                 .context(ReadSequencerSnafu { id: shard })?;
             // Create empty buffer for each sequencer
@@ -89,46 +90,48 @@ struct TableData {
 }
 
 /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard
-#[derive(Default)]
 struct PartitionData {
     id: PartitionId,
     inner: RwLock<DataBuffer>,
 }
 
 /// Data of an IOx partition split into batches
-//                       ┌────────────────────────┐            ┌────────────────────────┐
-//                       │       Snapshots        │            │       Persisting       │
-//                       │                        │            │                        │
-//                       │    ┌───────────────┐   │            │   ┌───────────────┐    │
-//                       │   ┌┴──────────────┐│   │            │   │  Persisting   │    │
-//                       │  ┌┴──────────────┐├┴───┼────────────┼──▶│     Data      │    │
-//                       │  │   Snapshot    ├┘    │            │   └───────────────┘    │
-//                       │  └───────────────┘     │            │                        │
-// ┌────────────┐        │                        │            │         ...            │
-// │   Buffer   │───────▶│          ...           │            │                        │
-// └────────────┘        │                        │            │                        │
-//                       │   ┌───────────────┐    │            │    ┌───────────────┐   │
-//                       │  ┌┴──────────────┐│    │            │    │  Persisting   │   │
-//                       │ ┌┴──────────────┐├┴────┼────────────┼───▶│     Data      │   │
-//                       │ │   Snapshot    ├┘     │            │    └───────────────┘   │
-//                       │ └───────────────┘      │            │                        │
-//                       │                        │            │                        │
-//                       └────────────────────────┘            └────────────────────────┘
+/// ┌────────────────────────┐        ┌────────────────────────┐      ┌─────────────────────────┐
+/// │         Buffer         │        │       Snapshots        │      │       Persisting        │
+/// │  ┌───────────────────┐ │        │                        │      │                         │
+/// │  │  ┌───────────────┐│ │        │ ┌───────────────────┐  │      │  ┌───────────────────┐  │
+/// │  │ ┌┴──────────────┐│├─┼────────┼─┼─▶┌───────────────┐│  │      │  │  ┌───────────────┐│  │
+/// │  │┌┴──────────────┐├┘│ │        │ │ ┌┴──────────────┐││  │      │  │ ┌┴──────────────┐││  │
+/// │  ││  BufferBatch  ├┘ │ │        │ │┌┴──────────────┐├┘│──┼──────┼─▶│┌┴──────────────┐├┘│  │
+/// │  │└───────────────┘  │ │    ┌───┼─▶│ SnapshotBatch ├┘ │  │      │  ││ SnapshotBatch ├┘ │  │
+/// │  └───────────────────┘ │    │   │ │└───────────────┘  │  │      │  │└───────────────┘  │  │
+/// │          ...           │    │   │ └───────────────────┘  │      │  └───────────────────┘  │
+/// │  ┌───────────────────┐ │    │   │                        │      │                         │
+/// │  │  ┌───────────────┐│ │    │   │          ...           │      │           ...           │
+/// │  │ ┌┴──────────────┐││ │    │   │                        │      │                         │
+/// │  │┌┴──────────────┐├┘│─┼────┘   │ ┌───────────────────┐  │      │  ┌───────────────────┐  │
+/// │  ││  BufferBatch  ├┘ │ │        │ │  ┌───────────────┐│  │      │  │  ┌───────────────┐│  │
+/// │  │└───────────────┘  │ │        │ │ ┌┴──────────────┐││  │      │  │ ┌┴──────────────┐││  │
+/// │  └───────────────────┘ │        │ │┌┴──────────────┐├┘│──┼──────┼─▶│┌┴──────────────┐├┘│  │
+/// │                        │        │ ││ SnapshotBatch ├┘ │  │      │  ││ SnapshotBatch ├┘ │  │
+/// │          ...           │        │ │└───────────────┘  │  │      │  │└───────────────┘  │  │
+/// │                        │        │ └───────────────────┘  │      │  └───────────────────┘  │
+/// └────────────────────────┘        └────────────────────────┘      └─────────────────────────┘
 #[derive(Default)]
 struct DataBuffer {
     /// Buffer of ingesting data
-    buffer: Vec<DataBatch>,
+    buffer: Vec<BufferBatch>,
 
     /// Data in `buffer` will be moved to a `snapshot` when one of these happens:
     ///  . A background persist is called
     ///  . A read request from Querier
     /// The `buffer` will be empty when this happens.
-    snapshots: Vec<Arc<DataBatch>>,
+    snapshots: Vec<Arc<SnapshotBatch>>,
 
     /// When a persist is called, data in `buffer` will be moved to a `snapshot`
     /// and then all `snapshots` will be moved to a `persisting`.
     /// Both `buffer` and 'snaphots` will be empty when this happens.
-    persisting: Vec<DataBatch>,
+    persisting: Vec<SnapshotBatch>,
     // Extra Notes:
     //  . Multiple perssiting operations may be happenning concurrently but
     //    their persisted info must be added into the Catalog in thier data
@@ -141,10 +144,21 @@ struct DataBuffer {
     //    Queriers that may not have loaded the parquet files from object
     //    storage yet. But this will be decided after MVP.
 }
-
-struct DataBatch {
+/// BufferBatch is a MutauableBatch with its ingesting order, sequencer_number, that
+/// helps the ingester keep the batches of data in thier ingesting order
+struct BufferBatch {
     /// Sequencer number of the ingesting data
     pub sequencer_number: u64,
     /// Ingesting data
     pub inner: MutableBatch,
 }
+
+/// SnapshotBatch contains data of many contiguous BufferBatches
+struct SnapshotBatch {
+    /// Min sequencer number of its comebined BufferBatches
+    pub min_sequencer_number: u64,
+    /// Max sequencer number of its comebined BufferBatches
+    pub max_sequencer_number: u64,
+    /// Data of its comebined BufferBatches kept in one RecordBatch
+    pub inner: RecordBatch,
+}
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 85994d47bc..3012cb0339 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -147,7 +147,7 @@ impl KafkaPartition {
 }
 
 /// Unique ID for a `Partition`
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
 #[sqlx(transparent)]
 pub struct PartitionId(i64);
 

From edb97f51cf18f8b403158e81c2feded711c2804d Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Wed, 19 Jan 2022 12:36:18 -0500
Subject: [PATCH 22/32] refactor: add persisting struct

---
 Cargo.lock           |  1 +
 ingester/Cargo.toml  |  1 +
 ingester/src/data.rs | 20 ++++++++++++++++----
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7c2b3c7018..32dcd82657 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1862,6 +1862,7 @@ dependencies = [
  "mutable_batch",
  "parking_lot",
  "snafu",
+ "uuid",
  "workspace-hack",
 ]
 
diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml
index 8d8017e904..7e1d8f1719 100644
--- a/ingester/Cargo.toml
+++ b/ingester/Cargo.toml
@@ -10,4 +10,5 @@ iox_catalog = { path = "../iox_catalog" }
 mutable_batch = { path = "../mutable_batch"}
 parking_lot = "0.11.2"
 snafu = "0.7"
+uuid = { version = "0.8", features = ["v4"] }
 workspace-hack = { path = "../workspace-hack"}
diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 8909032de9..d53a96b0a3 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -3,6 +3,7 @@
 
 use arrow::record_batch::RecordBatch;
 use std::{collections::BTreeMap, sync::Arc};
+use uuid::Uuid;
 
 use crate::server::IngesterServer;
 use iox_catalog::interface::{
@@ -131,9 +132,10 @@ struct DataBuffer {
     /// When a persist is called, data in `buffer` will be moved to a `snapshot`
     /// and then all `snapshots` will be moved to a `persisting`.
     /// Both `buffer` and 'snaphots` will be empty when this happens.
-    persisting: Vec<SnapshotBatch>,
+    persisting: Option<Arc<SnapshotBatch>>,
     // Extra Notes:
-    //  . Multiple perssiting operations may be happenning concurrently but
+    //  . In MVP, we will only persist a set of sanpshots at a time.
+    //    In later version, multiple perssiting operations may be happenning concurrently but
     //    their persisted info must be added into the Catalog in thier data
     //    ingesting order.
     //  . When a read request comes from a Querier, all data from `snaphots`
@@ -150,7 +152,7 @@ struct BufferBatch {
     /// Sequencer number of the ingesting data
     pub sequencer_number: u64,
     /// Ingesting data
-    pub inner: MutableBatch,
+    pub data: MutableBatch,
 }
 
 /// SnapshotBatch contains data of many contiguous BufferBatches
@@ -160,5 +162,15 @@ struct SnapshotBatch {
     /// Max sequencer number of its comebined BufferBatches
     pub max_sequencer_number: u64,
     /// Data of its comebined BufferBatches kept in one RecordBatch
-    pub inner: RecordBatch,
+    pub data: RecordBatch,
+}
+
+/// PersistingBatch contains all needed info and data for creating
+/// a parquet file for given set of SnapshotBatches
+struct PersistingBatch {
+    sequencer_id: SequencerId,
+    table_id: TableId,
+    partition_id: PartitionId,
+    object_store_id: Uuid,
+    data: Vec<SnapshotBatch>,
 }

From 9977f174b720c1e431695da7068c5e582eff4d67 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Wed, 19 Jan 2022 12:51:04 -0500
Subject: [PATCH 23/32] refactor: use wrapper ID

---
 ingester/src/data.rs   | 17 +++++------------
 ingester/src/server.rs | 26 ++++++++++++++++++--------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index d53a96b0a3..91454c8eba 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -7,7 +7,7 @@ use uuid::Uuid;
 
 use crate::server::IngesterServer;
 use iox_catalog::interface::{
-    KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequencerId, TableId,
+    KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequenceNumber, SequencerId, TableId,
 };
 use mutable_batch::MutableBatch;
 use parking_lot::RwLock;
@@ -46,17 +46,10 @@ impl Sequencers {
     pub async fn initialize<T: RepoCollection + Send + Sync>(
         ingester: &IngesterServer<'_, T>,
     ) -> Result<Self> {
-        // Get kafka topic from the catalog
-        let topic_name = ingester.get_topic();
-        let kafka_topic_repro = ingester.iox_catalog.kafka_topic();
-        let topic = kafka_topic_repro
-            .create_or_get(topic_name.as_str()) //todo: use `get` instead
-            .await
-            .context(ReadTopicSnafu { name: topic_name })?;
-
         // Get sequencer ids from the catalog
         let sequencer_repro = ingester.iox_catalog.sequencer();
         let mut sequencers = BTreeMap::default();
+        let topic = ingester.get_topic();
         for shard in ingester.get_kafka_partitions() {
             let sequencer = sequencer_repro
                 .create_or_get(&topic, shard) //todo: use `get` instead
@@ -150,7 +143,7 @@ struct DataBuffer {
 /// helps the ingester keep the batches of data in thier ingesting order
 struct BufferBatch {
     /// Sequencer number of the ingesting data
-    pub sequencer_number: u64,
+    pub sequencer_number: SequenceNumber,
     /// Ingesting data
     pub data: MutableBatch,
 }
@@ -158,9 +151,9 @@ struct BufferBatch {
 /// SnapshotBatch contains data of many contiguous BufferBatches
 struct SnapshotBatch {
     /// Min sequencer number of its comebined BufferBatches
-    pub min_sequencer_number: u64,
+    pub min_sequencer_number: SequenceNumber,
     /// Max sequencer number of its comebined BufferBatches
-    pub max_sequencer_number: u64,
+    pub max_sequencer_number: SequenceNumber,
     /// Data of its comebined BufferBatches kept in one RecordBatch
     pub data: RecordBatch,
 }
diff --git a/ingester/src/server.rs b/ingester/src/server.rs
index 6019b77262..11ce6dc553 100644
--- a/ingester/src/server.rs
+++ b/ingester/src/server.rs
@@ -3,7 +3,7 @@
 
 use std::sync::Arc;
 
-use iox_catalog::interface::{KafkaPartition, RepoCollection};
+use iox_catalog::interface::{KafkaPartition, KafkaTopic, KafkaTopicId, RepoCollection};
 
 /// The [`IngesterServer`] manages the lifecycle and contains all state for
 /// an `ingester` server instance.
@@ -11,9 +11,9 @@ pub struct IngesterServer<'a, T>
 where
     T: RepoCollection + Send + Sync,
 {
-    // Kafka Topic assigned to this ingester
-    kafka_topic_name: String,
-    // Kafka Partitions (Shards) assigned to this INgester
+    /// Kafka Topic assigned to this ingester
+    kafka_topic: KafkaTopic,
+    /// Kafka Partitions (Shards) assigned to this INgester
     kafka_partitions: Vec<KafkaPartition>,
     /// Catalog of this ingester
     pub iox_catalog: &'a Arc<T>,
@@ -24,17 +24,27 @@ where
     T: RepoCollection + Send + Sync,
 {
     /// Initialize the Ingester
-    pub fn new(topic_name: String, shard_ids: Vec<KafkaPartition>, catalog: &'a Arc<T>) -> Self {
+    pub fn new(topic: KafkaTopic, shard_ids: Vec<KafkaPartition>, catalog: &'a Arc<T>) -> Self {
         Self {
-            kafka_topic_name: topic_name,
+            kafka_topic: topic,
             kafka_partitions: shard_ids,
             iox_catalog: catalog,
         }
     }
 
+    /// Return a kafka topic
+    pub fn get_topic(&self) -> KafkaTopic {
+        self.kafka_topic.clone()
+    }
+
+    /// Return a kafka topic id
+    pub fn get_topic_id(&self) -> KafkaTopicId {
+        self.kafka_topic.id
+    }
+
     /// Return a kafka topic name
-    pub fn get_topic(&self) -> String {
-        self.kafka_topic_name.clone()
+    pub fn get_topic_name(&self) -> String {
+        self.kafka_topic.name.clone()
     }
 
     /// Return Kafka Partitions

From be3e52331253b3eff5365ebe08186a399cabe268 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Wed, 19 Jan 2022 13:25:03 -0500
Subject: [PATCH 24/32] fix: use PersistingBatch

---
 ingester/src/data.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 91454c8eba..f21bf15364 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -125,7 +125,7 @@ struct DataBuffer {
     /// When a persist is called, data in `buffer` will be moved to a `snapshot`
     /// and then all `snapshots` will be moved to a `persisting`.
     /// Both `buffer` and 'snaphots` will be empty when this happens.
-    persisting: Option<Arc<SnapshotBatch>>,
+    persisting: Option<Arc<PersistingBatch>>,
     // Extra Notes:
     //  . In MVP, we will only persist a set of sanpshots at a time.
     //    In later version, multiple perssiting operations may be happenning concurrently but

From 41038721e100ce478b4244d248ed1092aeaeb2f8 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Wed, 19 Jan 2022 14:13:02 -0500
Subject: [PATCH 25/32] feat: Add parquet file records to iox_catalog

* Adds ParquetFile and scaffolding to IOx catalog
* Changed the file_location in parquet_file to object_store_id which is a uuid
---
 Cargo.lock                                    |   2 +
 iox_catalog/Cargo.toml                        |   3 +-
 .../20211229171744_initial_schema.sql         |   6 +-
 iox_catalog/src/interface.rs                  | 191 ++++++++++++++++++
 iox_catalog/src/mem.rs                        |  79 +++++++-
 iox_catalog/src/postgres.rs                   |  81 +++++++-
 6 files changed, 351 insertions(+), 11 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 32dcd82657..28cd1db4b0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1911,6 +1911,7 @@ dependencies = [
  "snafu",
  "sqlx",
  "tokio",
+ "uuid",
  "workspace-hack",
 ]
 
@@ -4351,6 +4352,7 @@ dependencies = [
  "thiserror",
  "tokio-stream",
  "url",
+ "uuid",
  "whoami",
 ]
 
diff --git a/iox_catalog/Cargo.toml b/iox_catalog/Cargo.toml
index be3c2ea82f..50d63d10f3 100644
--- a/iox_catalog/Cargo.toml
+++ b/iox_catalog/Cargo.toml
@@ -10,10 +10,11 @@ chrono = { version = "0.4", default-features = false, features = ["clock"] }
 futures = "0.3"
 observability_deps = { path = "../observability_deps" }
 snafu = "0.7"
-sqlx = { version = "0.5", features = [ "runtime-tokio-native-tls" , "postgres" ] }
+sqlx = { version = "0.5", features = [ "runtime-tokio-native-tls" , "postgres", "uuid" ] }
 tokio = { version = "1.13", features = ["full", "io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
 influxdb_line_protocol = { path = "../influxdb_line_protocol" }
 workspace-hack = { path = "../workspace-hack"}
+uuid = { version = "0.8", features = ["v4"] }
 
 [dev-dependencies] # In alphabetical order
 dotenv = "0.15.0"
diff --git a/iox_catalog/migrations/20211229171744_initial_schema.sql b/iox_catalog/migrations/20211229171744_initial_schema.sql
index 1ce222b18f..6c8606ec73 100644
--- a/iox_catalog/migrations/20211229171744_initial_schema.sql
+++ b/iox_catalog/migrations/20211229171744_initial_schema.sql
@@ -84,15 +84,15 @@ CREATE TABLE IF NOT EXISTS iox_catalog.parquet_file
     id BIGINT GENERATED ALWAYS AS IDENTITY,
     sequencer_id SMALLINT NOT NULL,
     table_id INT NOT NULL,
-    partition_id INT NOT NULL,
-    file_location VARCHAR NOT NULL,
+    partition_id BIGINT NOT NULL,
+    object_store_id uuid NOT NULL,
     min_sequence_number BIGINT,
     max_sequence_number BIGINT,
     min_time BIGINT,
     max_time BIGINT,
     to_delete BOOLEAN,
     PRIMARY KEY (id),
-    CONSTRAINT parquet_location_unique UNIQUE (file_location)
+    CONSTRAINT parquet_location_unique UNIQUE (object_store_id)
     );
 
 CREATE TABLE IF NOT EXISTS iox_catalog.tombstone
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index eb319b8be5..0f62638709 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -7,6 +7,7 @@ use std::collections::BTreeMap;
 use std::convert::TryFrom;
 use std::fmt::Formatter;
 use std::sync::Arc;
+use uuid::Uuid;
 
 #[derive(Debug, Snafu)]
 #[allow(missing_copy_implementations, missing_docs)]
@@ -36,6 +37,12 @@ pub enum Error {
 
     #[snafu(display("namespace {} not found", name))]
     NamespaceNotFound { name: String },
+
+    #[snafu(display("parquet file with object_store_id {} already exists", object_store_id))]
+    FileExists { object_store_id: Uuid },
+
+    #[snafu(display("parquet_file record {} not found", id))]
+    ParquetRecordNotFound { id: ParquetFileId },
 }
 
 /// A specialized `Error` for Catalog errors
@@ -206,6 +213,28 @@ impl Timestamp {
     }
 }
 
+/// Unique ID for a `ParquetFile`
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
+#[sqlx(transparent)]
+pub struct ParquetFileId(i64);
+
+#[allow(missing_docs)]
+impl ParquetFileId {
+    pub fn new(v: i64) -> Self {
+        Self(v)
+    }
+    pub fn get(&self) -> i64 {
+        self.0
+    }
+}
+
+impl std::fmt::Display for ParquetFileId {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        // Use `self.number` to refer to each positional data point.
+        write!(f, "{}", self.0)
+    }
+}
+
 /// Container that can return repos for each of the catalog data types.
 #[async_trait]
 pub trait RepoCollection {
@@ -225,6 +254,8 @@ pub trait RepoCollection {
     fn partition(&self) -> Arc<dyn PartitionRepo + Sync + Send>;
     /// repo for tombstones
     fn tombstone(&self) -> Arc<dyn TombstoneRepo + Sync + Send>;
+    /// repo for parquet_files
+    fn parquet_file(&self) -> Arc<dyn ParquetFileRepo + Sync + Send>;
 }
 
 /// Functions for working with Kafka topics in the catalog.
@@ -342,6 +373,37 @@ pub trait TombstoneRepo {
     ) -> Result<Vec<Tombstone>>;
 }
 
+/// Functions for working with parquet file pointers in the catalog
+#[async_trait]
+pub trait ParquetFileRepo {
+    /// create the parquet file
+    #[allow(clippy::too_many_arguments)]
+    async fn create(
+        &self,
+        sequencer_id: SequencerId,
+        table_id: TableId,
+        partition_id: PartitionId,
+        object_store_id: Uuid,
+        min_sequence_number: SequenceNumber,
+        max_sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+    ) -> Result<ParquetFile>;
+
+    /// Flag the parquet file for deletion
+    async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()>;
+
+    /// Get all parquet files for a sequencer with a max_sequence_number greater than the
+    /// one passed in. The ingester will use this on startup to see which files were persisted
+    /// that are greater than its min_unpersisted_number so that it can discard any data in
+    /// these partitions on replay.
+    async fn list_by_sequencer_greater_than(
+        &self,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<ParquetFile>>;
+}
+
 /// Data object for a kafka topic
 #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)]
 pub struct KafkaTopic {
@@ -700,6 +762,31 @@ pub struct Tombstone {
     pub serialized_predicate: String,
 }
 
+/// Data for a parquet file reference in the catalog.
+#[derive(Debug, Copy, Clone, PartialEq, sqlx::FromRow)]
+pub struct ParquetFile {
+    /// the id of the file in the catalog
+    pub id: ParquetFileId,
+    /// the sequencer that sequenced writes that went into this file
+    pub sequencer_id: SequencerId,
+    /// the table
+    pub table_id: TableId,
+    /// the partition
+    pub partition_id: PartitionId,
+    /// the uuid used in the object store path for this file
+    pub object_store_id: Uuid,
+    /// the minimum sequence number from a record in this file
+    pub min_sequence_number: SequenceNumber,
+    /// the maximum sequence number from a record in this file
+    pub max_sequence_number: SequenceNumber,
+    /// the min timestamp of data in this file
+    pub min_time: Timestamp,
+    /// the max timestamp of data in this file
+    pub max_time: Timestamp,
+    /// flag to mark that this file should be deleted from object storage
+    pub to_delete: bool,
+}
+
 #[cfg(test)]
 pub(crate) mod test_helpers {
     use super::*;
@@ -718,6 +805,7 @@ pub(crate) mod test_helpers {
         test_sequencer(&new_repo()).await;
         test_partition(&new_repo()).await;
         test_tombstone(&new_repo()).await;
+        test_parquet_file(&new_repo()).await;
     }
 
     async fn test_kafka_topic<T: RepoCollection + Send + Sync>(repo: &T) {
@@ -1010,4 +1098,107 @@ pub(crate) mod test_helpers {
             .unwrap();
         assert_eq!(vec![t2, t3], listed);
     }
+
+    async fn test_parquet_file<T: RepoCollection + Send + Sync>(repo: &T) {
+        let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap();
+        let pool = repo.query_pool().create_or_get("foo").await.unwrap();
+        let namespace = repo
+            .namespace()
+            .create("namespace_parquet_file_test", "inf", kafka.id, pool.id)
+            .await
+            .unwrap();
+        let table = repo
+            .table()
+            .create_or_get("test_table", namespace.id)
+            .await
+            .unwrap();
+        let other_table = repo
+            .table()
+            .create_or_get("other", namespace.id)
+            .await
+            .unwrap();
+        let sequencer = repo
+            .sequencer()
+            .create_or_get(&kafka, KafkaPartition::new(1))
+            .await
+            .unwrap();
+        let partition = repo
+            .partition()
+            .create_or_get("one", sequencer.id, table.id)
+            .await
+            .unwrap();
+        let other_partition = repo
+            .partition()
+            .create_or_get("one", sequencer.id, other_table.id)
+            .await
+            .unwrap();
+
+        let min_time = Timestamp::new(1);
+        let max_time = Timestamp::new(10);
+
+        let parquet_repo = repo.parquet_file();
+        let parquet_file = parquet_repo
+            .create(
+                sequencer.id,
+                partition.table_id,
+                partition.id,
+                Uuid::new_v4(),
+                SequenceNumber::new(10),
+                SequenceNumber::new(140),
+                min_time,
+                max_time,
+            )
+            .await
+            .unwrap();
+
+        // verify that trying to create a file with the same UUID throws an error
+        let err = parquet_repo
+            .create(
+                sequencer.id,
+                partition.table_id,
+                partition.id,
+                parquet_file.object_store_id,
+                SequenceNumber::new(10),
+                SequenceNumber::new(140),
+                min_time,
+                max_time,
+            )
+            .await
+            .unwrap_err();
+        assert!(matches!(err, Error::FileExists { object_store_id: _ }));
+
+        let other_file = parquet_repo
+            .create(
+                sequencer.id,
+                other_partition.table_id,
+                other_partition.id,
+                Uuid::new_v4(),
+                SequenceNumber::new(45),
+                SequenceNumber::new(200),
+                min_time,
+                max_time,
+            )
+            .await
+            .unwrap();
+
+        let files = parquet_repo
+            .list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1))
+            .await
+            .unwrap();
+        assert_eq!(vec![parquet_file, other_file], files);
+        let files = parquet_repo
+            .list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(150))
+            .await
+            .unwrap();
+        assert_eq!(vec![other_file], files);
+
+        // verify that to_delete is initially set to false and that it can be updated to true
+        assert!(!parquet_file.to_delete);
+        parquet_repo.flag_for_delete(parquet_file.id).await.unwrap();
+        let files = parquet_repo
+            .list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1))
+            .await
+            .unwrap();
+        assert!(files.first().unwrap().to_delete);
+    }
 }
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 999d7e7175..f2eeda8579 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -3,15 +3,16 @@
 
 use crate::interface::{
     Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId,
-    KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo,
-    QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer,
-    SequencerId, SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneId,
-    TombstoneRepo,
+    KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, ParquetFile, ParquetFileId,
+    ParquetFileRepo, Partition, PartitionId, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo,
+    RepoCollection, Result, SequenceNumber, Sequencer, SequencerId, SequencerRepo, Table, TableId,
+    TableRepo, Timestamp, Tombstone, TombstoneId, TombstoneRepo,
 };
 use async_trait::async_trait;
 use std::convert::TryFrom;
 use std::fmt::Formatter;
 use std::sync::{Arc, Mutex};
+use uuid::Uuid;
 
 /// In-memory catalog that implements the `RepoCollection` and individual repo traits from
 /// the catalog interface.
@@ -44,6 +45,7 @@ struct MemCollections {
     sequencers: Vec<Sequencer>,
     partitions: Vec<Partition>,
     tombstones: Vec<Tombstone>,
+    parquet_files: Vec<ParquetFile>,
 }
 
 impl RepoCollection for Arc<MemCatalog> {
@@ -78,6 +80,10 @@ impl RepoCollection for Arc<MemCatalog> {
     fn tombstone(&self) -> Arc<dyn TombstoneRepo + Sync + Send> {
         Self::clone(self) as Arc<dyn TombstoneRepo + Sync + Send>
     }
+
+    fn parquet_file(&self) -> Arc<dyn ParquetFileRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn ParquetFileRepo + Sync + Send>
+    }
 }
 
 #[async_trait]
@@ -390,6 +396,71 @@ impl TombstoneRepo for MemCatalog {
     }
 }
 
+#[async_trait]
+impl ParquetFileRepo for MemCatalog {
+    async fn create(
+        &self,
+        sequencer_id: SequencerId,
+        table_id: TableId,
+        partition_id: PartitionId,
+        object_store_id: Uuid,
+        min_sequence_number: SequenceNumber,
+        max_sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+    ) -> Result<ParquetFile> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+        if collections
+            .parquet_files
+            .iter()
+            .any(|f| f.object_store_id == object_store_id)
+        {
+            return Err(Error::FileExists { object_store_id });
+        }
+
+        let parquet_file = ParquetFile {
+            id: ParquetFileId::new(collections.parquet_files.len() as i64 + 1),
+            sequencer_id,
+            table_id,
+            partition_id,
+            object_store_id,
+            min_sequence_number,
+            max_sequence_number,
+            min_time,
+            max_time,
+            to_delete: false,
+        };
+        collections.parquet_files.push(parquet_file);
+        Ok(*collections.parquet_files.last().unwrap())
+    }
+
+    async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()> {
+        let mut collections = self.collections.lock().expect("mutex poisoned");
+
+        match collections.parquet_files.iter_mut().find(|p| p.id == id) {
+            Some(f) => f.to_delete = true,
+            None => return Err(Error::ParquetRecordNotFound { id }),
+        }
+
+        Ok(())
+    }
+
+    async fn list_by_sequencer_greater_than(
+        &self,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<ParquetFile>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        let files: Vec<_> = collections
+            .parquet_files
+            .iter()
+            .filter(|f| f.sequencer_id == sequencer_id && f.max_sequence_number > sequence_number)
+            .cloned()
+            .collect();
+        Ok(files)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 08c8fc43cf..0e8300555b 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -2,15 +2,17 @@
 
 use crate::interface::{
     Column, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId,
-    KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool,
-    QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer, SequencerId,
-    SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneRepo,
+    KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, ParquetFile, ParquetFileId,
+    ParquetFileRepo, Partition, PartitionId, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo,
+    RepoCollection, Result, SequenceNumber, Sequencer, SequencerId, SequencerRepo, Table, TableId,
+    TableRepo, Timestamp, Tombstone, TombstoneRepo,
 };
 use async_trait::async_trait;
 use observability_deps::tracing::info;
 use sqlx::{postgres::PgPoolOptions, Executor, Pool, Postgres};
 use std::sync::Arc;
 use std::time::Duration;
+use uuid::Uuid;
 
 const MAX_CONNECTIONS: u32 = 5;
 const CONNECT_TIMEOUT: Duration = Duration::from_secs(2);
@@ -92,6 +94,10 @@ impl RepoCollection for Arc<PostgresCatalog> {
     fn tombstone(&self) -> Arc<dyn TombstoneRepo + Sync + Send> {
         Self::clone(self) as Arc<dyn TombstoneRepo + Sync + Send>
     }
+
+    fn parquet_file(&self) -> Arc<dyn ParquetFileRepo + Sync + Send> {
+        Self::clone(self) as Arc<dyn ParquetFileRepo + Sync + Send>
+    }
 }
 
 #[async_trait]
@@ -432,6 +438,75 @@ impl TombstoneRepo for PostgresCatalog {
     }
 }
 
+#[async_trait]
+impl ParquetFileRepo for PostgresCatalog {
+    async fn create(
+        &self,
+        sequencer_id: SequencerId,
+        table_id: TableId,
+        partition_id: PartitionId,
+        object_store_id: Uuid,
+        min_sequence_number: SequenceNumber,
+        max_sequence_number: SequenceNumber,
+        min_time: Timestamp,
+        max_time: Timestamp,
+    ) -> Result<ParquetFile> {
+        let rec = sqlx::query_as::<_, ParquetFile>(
+            r#"
+INSERT INTO parquet_file ( sequencer_id, table_id, partition_id, object_store_id, min_sequence_number, max_sequence_number, min_time, max_time, to_delete )
+VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, false )
+RETURNING *
+        "#,
+        )
+            .bind(sequencer_id) // $1
+            .bind(table_id) // $2
+            .bind(partition_id) // $3
+            .bind(object_store_id) // $4
+            .bind(min_sequence_number) // $5
+            .bind(max_sequence_number) // $6
+            .bind(min_time) // $7
+            .bind(max_time) // $8
+            .fetch_one(&self.pool)
+            .await
+            .map_err(|e| {
+                if is_unique_violation(&e) {
+                    Error::FileExists {
+                        object_store_id,
+                    }
+                } else if is_fk_violation(&e) {
+                    Error::ForeignKeyViolation { source: e }
+                } else {
+                    Error::SqlxError { source: e }
+                }
+            })?;
+
+        Ok(rec)
+    }
+
+    async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()> {
+        let _ = sqlx::query(r#"UPDATE parquet_file SET to_delete = true WHERE id = $1;"#)
+            .bind(&id) // $1
+            .execute(&self.pool)
+            .await
+            .map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(())
+    }
+
+    async fn list_by_sequencer_greater_than(
+        &self,
+        sequencer_id: SequencerId,
+        sequence_number: SequenceNumber,
+    ) -> Result<Vec<ParquetFile>> {
+        sqlx::query_as::<_, ParquetFile>(r#"SELECT * FROM parquet_file WHERE sequencer_id = $1 AND max_sequence_number > $2 ORDER BY id;"#)
+            .bind(&sequencer_id) // $1
+            .bind(&sequence_number) // $2
+            .fetch_all(&self.pool)
+            .await
+            .map_err(|e| Error::SqlxError { source: e })
+    }
+}
+
 /// The error code returned by Postgres for a unique constraint violation.
 ///
 /// See <https://www.postgresql.org/docs/9.2/errcodes-appendix.html>

From d825dab8e2b12d182a71e8a9aa9a378db8424068 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Wed, 19 Jan 2022 14:48:00 -0500
Subject: [PATCH 26/32] fix: hakari workspace hack

---
 Cargo.lock                | 1 +
 workspace-hack/Cargo.toml | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index 28cd1db4b0..04a96ff666 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5423,6 +5423,7 @@ dependencies = [
  "tracing",
  "tracing-core",
  "tracing-subscriber",
+ "uuid",
 ]
 
 [[package]]
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index aa265f90c0..d93dbcb42c 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -53,6 +53,7 @@ tower = { version = "0.4", features = ["balance", "buffer", "discover", "futures
 tracing = { version = "0.1", features = ["attributes", "log", "max_level_trace", "release_max_level_debug", "std", "tracing-attributes"] }
 tracing-core = { version = "0.1", features = ["lazy_static", "std"] }
 tracing-subscriber = { version = "0.3", features = ["alloc", "ansi", "ansi_term", "env-filter", "fmt", "lazy_static", "matchers", "regex", "registry", "sharded-slab", "smallvec", "std", "thread_local", "tracing", "tracing-log"] }
+uuid = { version = "0.8", features = ["getrandom", "std", "v4"] }
 
 [build-dependencies]
 ahash = { version = "0.7", features = ["std"] }
@@ -86,5 +87,6 @@ smallvec = { version = "1", default-features = false, features = ["union"] }
 syn = { version = "1", features = ["clone-impls", "derive", "extra-traits", "full", "parsing", "printing", "proc-macro", "quote", "visit", "visit-mut"] }
 tokio = { version = "1", features = ["bytes", "fs", "full", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "parking_lot", "process", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "sync", "time", "tokio-macros", "winapi"] }
 tokio-stream = { version = "0.1", features = ["fs", "net", "time"] }
+uuid = { version = "0.8", features = ["getrandom", "std", "v4"] }
 
 ### END HAKARI SECTION

From 28db06297ffba2ec4932eaa899b88f76abc48bc5 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Wed, 19 Jan 2022 16:30:54 -0500
Subject: [PATCH 27/32] fix: clear postgres schema in test wasn't deleting
 parquet_file

---
 iox_catalog/src/postgres.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 0e8300555b..82235e9268 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -611,6 +611,10 @@ mod tests {
             .execute(pool)
             .await
             .unwrap();
+        sqlx::query("delete from parquet_file;")
+            .execute(pool)
+            .await
+            .unwrap();
         sqlx::query("delete from column_name;")
             .execute(pool)
             .await

From 172d75c6d76ef27bfe1125d8b56ebba1dda540db Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Wed, 19 Jan 2022 16:45:06 -0500
Subject: [PATCH 28/32] feat: add sequencer get_by_kafka_topic_id_and_partition
 to catalog

---
 iox_catalog/src/interface.rs | 23 +++++++++++++++++++++++
 iox_catalog/src/mem.rs       | 14 ++++++++++++++
 iox_catalog/src/postgres.rs  | 24 ++++++++++++++++++++++++
 3 files changed, 61 insertions(+)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 0f62638709..626c032836 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -326,6 +326,13 @@ pub trait SequencerRepo {
         partition: KafkaPartition,
     ) -> Result<Sequencer>;
 
+    /// get the sequencer record by `KafkaTopicId` and `KafkaPartition`
+    async fn get_by_topic_id_and_partition(
+        &self,
+        topic_id: KafkaTopicId,
+        partition: KafkaPartition,
+    ) -> Result<Option<Sequencer>>;
+
     /// list all sequencers
     async fn list(&self) -> Result<Vec<Sequencer>>;
 
@@ -970,6 +977,22 @@ pub(crate) mod test_helpers {
             .collect::<BTreeMap<_, _>>();
 
         assert_eq!(created, listed);
+
+        // get by the sequencer id and partition
+        let kafka_partition = KafkaPartition::new(1);
+        let sequencer = sequencer_repo
+            .get_by_topic_id_and_partition(kafka.id, kafka_partition)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(kafka.id, sequencer.kafka_topic_id);
+        assert_eq!(kafka_partition, sequencer.kafka_partition);
+
+        let sequencer = sequencer_repo
+            .get_by_topic_id_and_partition(kafka.id, KafkaPartition::new(523))
+            .await
+            .unwrap();
+        assert!(sequencer.is_none());
     }
 
     async fn test_partition<T: RepoCollection + Send + Sync>(repo: &T) {
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index f2eeda8579..419e4ea624 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -287,6 +287,20 @@ impl SequencerRepo for MemCatalog {
         Ok(*sequencer)
     }
 
+    async fn get_by_topic_id_and_partition(
+        &self,
+        topic_id: KafkaTopicId,
+        partition: KafkaPartition,
+    ) -> Result<Option<Sequencer>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        let sequencer = collections
+            .sequencers
+            .iter()
+            .find(|s| s.kafka_topic_id == topic_id && s.kafka_partition == partition)
+            .cloned();
+        Ok(sequencer)
+    }
+
     async fn list(&self) -> Result<Vec<Sequencer>> {
         let collections = self.collections.lock().expect("mutex poisoned");
         Ok(collections.sequencers.clone())
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 82235e9268..3f573f1fb0 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -329,6 +329,30 @@ impl SequencerRepo for PostgresCatalog {
         })
     }
 
+    async fn get_by_topic_id_and_partition(
+        &self,
+        topic_id: KafkaTopicId,
+        partition: KafkaPartition,
+    ) -> Result<Option<Sequencer>> {
+        let rec = sqlx::query_as::<_, Sequencer>(
+            r#"
+SELECT * FROM sequencer WHERE kafka_topic_id = $1 AND kafka_partition = $2;
+        "#,
+        )
+        .bind(topic_id) // $1
+        .bind(partition) // $2
+        .fetch_one(&self.pool)
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let sequencer = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(sequencer))
+    }
+
     async fn list(&self) -> Result<Vec<Sequencer>> {
         sqlx::query_as::<_, Sequencer>(r#"SELECT * FROM sequencer;"#)
             .fetch_all(&self.pool)

From 860e5a30ca9da1a57c243e899bf1caca28444daa Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Wed, 19 Jan 2022 17:15:10 -0500
Subject: [PATCH 29/32] refactor: update ingester to get sequencer record and
 not attempt to create

---
 ingester/src/data.rs         | 24 +++++++++++++++++++-----
 iox_catalog/src/interface.rs | 12 ++++++++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index f21bf15364..4b1bca735d 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -7,11 +7,12 @@ use uuid::Uuid;
 
 use crate::server::IngesterServer;
 use iox_catalog::interface::{
-    KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequenceNumber, SequencerId, TableId,
+    KafkaPartition, KafkaTopicId, NamespaceId, PartitionId, RepoCollection, SequenceNumber,
+    SequencerId, TableId,
 };
 use mutable_batch::MutableBatch;
 use parking_lot::RwLock;
-use snafu::{ResultExt, Snafu};
+use snafu::{OptionExt, ResultExt, Snafu};
 
 #[derive(Debug, Snafu)]
 #[allow(missing_copy_implementations, missing_docs)]
@@ -27,6 +28,16 @@ pub enum Error {
         source: iox_catalog::interface::Error,
         id: KafkaPartition,
     },
+
+    #[snafu(display(
+        "Sequencer record not found for kafka_topic_id {} and kafka_partition {}",
+        kafka_topic_id,
+        kafka_partition
+    ))]
+    SequencerNotFound {
+        kafka_topic_id: KafkaTopicId,
+        kafka_partition: KafkaPartition,
+    },
 }
 
 /// A specialized `Error` for Ingester Data errors
@@ -52,9 +63,13 @@ impl Sequencers {
         let topic = ingester.get_topic();
         for shard in ingester.get_kafka_partitions() {
             let sequencer = sequencer_repro
-                .create_or_get(&topic, shard) //todo: use `get` instead
+                .get_by_topic_id_and_partition(topic.id, shard)
                 .await
-                .context(ReadSequencerSnafu { id: shard })?;
+                .context(ReadSequencerSnafu { id: shard })?
+                .context(SequencerNotFoundSnafu {
+                    kafka_topic_id: topic.id,
+                    kafka_partition: shard,
+                })?;
             // Create empty buffer for each sequencer
             sequencers.insert(sequencer.id, Arc::new(SequencerData::default()));
         }
@@ -121,7 +136,6 @@ struct DataBuffer {
     ///  . A read request from Querier
     /// The `buffer` will be empty when this happens.
     snapshots: Vec<Arc<SnapshotBatch>>,
-
     /// When a persist is called, data in `buffer` will be moved to a `snapshot`
     /// and then all `snapshots` will be moved to a `persisting`.
     /// Both `buffer` and 'snaphots` will be empty when this happens.
diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index 626c032836..ea0714d3f7 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -78,6 +78,12 @@ impl KafkaTopicId {
     }
 }
 
+impl std::fmt::Display for KafkaTopicId {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
 /// Unique ID for a `QueryPool`
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
 #[sqlx(transparent)]
@@ -153,6 +159,12 @@ impl KafkaPartition {
     }
 }
 
+impl std::fmt::Display for KafkaPartition {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
 /// Unique ID for a `Partition`
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
 #[sqlx(transparent)]

From bfc085c20df0a7db331facfd9fbda0944a85de1a Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Wed, 19 Jan 2022 17:32:23 -0500
Subject: [PATCH 30/32] feat: add get kafka_topic by name to catalog

---
 iox_catalog/src/interface.rs |  7 +++++++
 iox_catalog/src/mem.rs       | 10 ++++++++++
 iox_catalog/src/postgres.rs  | 20 +++++++++++++++++++-
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs
index ea0714d3f7..d72e91a4ee 100644
--- a/iox_catalog/src/interface.rs
+++ b/iox_catalog/src/interface.rs
@@ -275,6 +275,9 @@ pub trait RepoCollection {
 pub trait KafkaTopicRepo {
     /// Creates the kafka topic in the catalog or gets the existing record by name.
     async fn create_or_get(&self, name: &str) -> Result<KafkaTopic>;
+
+    /// Gets the kafka topic by its unique name
+    async fn get_by_name(&self, name: &str) -> Result<Option<KafkaTopic>>;
 }
 
 /// Functions for working with query pools in the catalog.
@@ -834,6 +837,10 @@ pub(crate) mod test_helpers {
         assert_eq!(k.name, "foo");
         let k2 = kafka_repo.create_or_get("foo").await.unwrap();
         assert_eq!(k, k2);
+        let k3 = kafka_repo.get_by_name("foo").await.unwrap().unwrap();
+        assert_eq!(k3, k);
+        let k3 = kafka_repo.get_by_name("asdf").await.unwrap();
+        assert!(k3.is_none());
     }
 
     async fn test_query_pool<T: RepoCollection + Send + Sync>(repo: &T) {
diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs
index 419e4ea624..c4cf0333b1 100644
--- a/iox_catalog/src/mem.rs
+++ b/iox_catalog/src/mem.rs
@@ -105,6 +105,16 @@ impl KafkaTopicRepo for MemCatalog {
 
         Ok(topic.clone())
     }
+
+    async fn get_by_name(&self, name: &str) -> Result<Option<KafkaTopic>> {
+        let collections = self.collections.lock().expect("mutex poisoned");
+        let kafka_topic = collections
+            .kafka_topics
+            .iter()
+            .find(|t| t.name == name)
+            .cloned();
+        Ok(kafka_topic)
+    }
 }
 
 #[async_trait]
diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs
index 3f573f1fb0..2b052a9738 100644
--- a/iox_catalog/src/postgres.rs
+++ b/iox_catalog/src/postgres.rs
@@ -118,6 +118,25 @@ DO UPDATE SET name = kafka_topic.name RETURNING *;
 
         Ok(rec)
     }
+
+    async fn get_by_name(&self, name: &str) -> Result<Option<KafkaTopic>> {
+        let rec = sqlx::query_as::<_, KafkaTopic>(
+            r#"
+SELECT * FROM kafka_topic WHERE name = $1;
+        "#,
+        )
+        .bind(&name) // $1
+        .fetch_one(&self.pool)
+        .await;
+
+        if let Err(sqlx::Error::RowNotFound) = rec {
+            return Ok(None);
+        }
+
+        let kafka_topic = rec.map_err(|e| Error::SqlxError { source: e })?;
+
+        Ok(Some(kafka_topic))
+    }
 }
 
 #[async_trait]
@@ -178,7 +197,6 @@ RETURNING *
     }
 
     async fn get_by_name(&self, name: &str) -> Result<Option<Namespace>> {
-        // TODO: maybe get all the data in a single call to Postgres?
         let rec = sqlx::query_as::<_, Namespace>(
             r#"
 SELECT * FROM namespace WHERE name = $1;

From 4ede10b3a06cae9ea68b295cc19bfa0bb641de65 Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Wed, 19 Jan 2022 17:53:58 -0500
Subject: [PATCH 31/32] refactor: add new fields and comments in ingest data
 buffer

---
 ingester/src/data.rs | 52 ++++++++++++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index f21bf15364..d5ec8c4dd8 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -8,6 +8,7 @@ use uuid::Uuid;
 use crate::server::IngesterServer;
 use iox_catalog::interface::{
     KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequenceNumber, SequencerId, TableId,
+    Tombstone,
 };
 use mutable_batch::MutableBatch;
 use parking_lot::RwLock;
@@ -34,7 +35,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
 
 /// Ingester Data: a Mapp of Shard ID to its Data
 #[derive(Default)]
-struct Sequencers {
+pub struct Sequencers {
     // This map gets set up on initialization of the ingester so it won't ever be modified.
     // The content of each SequenceData will get changed when more namespaces and tables
     // get ingested.
@@ -65,26 +66,26 @@ impl Sequencers {
 
 /// Data of a Shard
 #[derive(Default)]
-struct SequencerData {
+pub struct SequencerData {
     // New namespaces can come in at any time so we need to be able to add new ones
     namespaces: RwLock<BTreeMap<NamespaceId, Arc<NamespaceData>>>,
 }
 
 /// Data of a Namespace that belongs to a given Shard
 #[derive(Default)]
-struct NamespaceData {
+pub struct NamespaceData {
     tables: RwLock<BTreeMap<TableId, Arc<TableData>>>,
 }
 
 /// Data of a Table in a given Namesapce that belongs to a given Shard
 #[derive(Default)]
-struct TableData {
+pub struct TableData {
     // Map pf partition key to its data
     partition_data: RwLock<BTreeMap<String, Arc<PartitionData>>>,
 }
 
 /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard
-struct PartitionData {
+pub struct PartitionData {
     id: PartitionId,
     inner: RwLock<DataBuffer>,
 }
@@ -112,9 +113,15 @@ struct PartitionData {
 /// │                        │        │ └───────────────────┘  │      │  └───────────────────┘  │
 /// └────────────────────────┘        └────────────────────────┘      └─────────────────────────┘
 #[derive(Default)]
-struct DataBuffer {
-    /// Buffer of ingesting data
-    buffer: Vec<BufferBatch>,
+pub struct DataBuffer {
+    /// Buffer of incoming writes
+    pub buffer: Vec<BufferBatch>,
+
+    /// Buffer of tombstones whose time range overlaps with this partition.
+    /// These tombstone first will be written into the Catalog and then here.
+    /// When a persist is called, these tombstones will be moved into the
+    /// PersistingBatch to get applied in those data.
+    pub deletes: Vec<Tombstone>,
 
     /// Data in `buffer` will be moved to a `snapshot` when one of these happens:
     ///  . A background persist is called
@@ -141,7 +148,7 @@ struct DataBuffer {
 }
 /// BufferBatch is a MutauableBatch with its ingesting order, sequencer_number, that
 /// helps the ingester keep the batches of data in thier ingesting order
-struct BufferBatch {
+pub struct BufferBatch {
     /// Sequencer number of the ingesting data
     pub sequencer_number: SequenceNumber,
     /// Ingesting data
@@ -149,7 +156,7 @@ struct BufferBatch {
 }
 
 /// SnapshotBatch contains data of many contiguous BufferBatches
-struct SnapshotBatch {
+pub struct SnapshotBatch {
     /// Min sequencer number of its comebined BufferBatches
     pub min_sequencer_number: SequenceNumber,
     /// Max sequencer number of its comebined BufferBatches
@@ -160,10 +167,23 @@ struct SnapshotBatch {
 
 /// PersistingBatch contains all needed info and data for creating
 /// a parquet file for given set of SnapshotBatches
-struct PersistingBatch {
-    sequencer_id: SequencerId,
-    table_id: TableId,
-    partition_id: PartitionId,
-    object_store_id: Uuid,
-    data: Vec<SnapshotBatch>,
+pub struct PersistingBatch {
+    /// Sesquencer id of the data
+    pub sequencer_id: SequencerId,
+
+    /// Table id of the data
+    pub table_id: TableId,
+
+    /// Parittion Id of the data
+    pub partition_id: PartitionId,
+
+    /// Id of to-be-created parquet file of this data
+    pub object_store_id: Uuid,
+
+    /// data to be persisted
+    pub data: Vec<SnapshotBatch>,
+
+    /// delete predicates to be appied to the data
+    /// before perssiting
+    pub deletes: Vec<Tombstone>,
 }

From 029f4bb41e32354e60a72b8312cccbad9cd1fd3e Mon Sep 17 00:00:00 2001
From: NGA-TRAN <nga-tran@live.com>
Date: Wed, 19 Jan 2022 18:11:00 -0500
Subject: [PATCH 32/32] fix: comment

---
 ingester/src/data.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ingester/src/data.rs b/ingester/src/data.rs
index 4e31ded524..b838faca54 100644
--- a/ingester/src/data.rs
+++ b/ingester/src/data.rs
@@ -131,7 +131,7 @@ pub struct DataBuffer {
     /// Buffer of incoming writes
     pub buffer: Vec<BufferBatch>,
 
-    /// Buffer of tombstones whose time range overlaps with this partition.
+    /// Buffer of tombstones whose time range may overlap with this partition.
     /// These tombstone first will be written into the Catalog and then here.
     /// When a persist is called, these tombstones will be moved into the
     /// PersistingBatch to get applied in those data.