From b3ee1032b363cbe323f3229e6cf784d656842fb5 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Mon, 17 Jan 2022 14:09:10 -0500 Subject: [PATCH 01/32] feat: add memory based catalog Adds a memory based catalog, useful for testing purposes. Separates getting the namespace schema from the namespace and moves the schema code out interface out of postgres. --- iox_catalog/src/interface.rs | 82 ++++++++++-- iox_catalog/src/lib.rs | 1 + iox_catalog/src/mem.rs | 248 +++++++++++++++++++++++++++++++++++ iox_catalog/src/postgres.rs | 65 ++------- 4 files changed, 333 insertions(+), 63 deletions(-) create mode 100644 iox_catalog/src/mem.rs diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index e3e122fe0f..7da8ecce29 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -2,7 +2,7 @@ use async_trait::async_trait; use influxdb_line_protocol::FieldValue; -use snafu::Snafu; +use snafu::{OptionExt, Snafu}; use std::collections::BTreeMap; use std::convert::TryFrom; use std::fmt::Formatter; @@ -33,6 +33,9 @@ pub enum Error { name ))] UnknownColumnType { data_type: i16, name: String }, + + #[snafu(display("namespace {} not found", name))] + NamespaceNotFound { name: String }, } /// A specialized `Error` for Catalog errors @@ -72,18 +75,18 @@ pub trait QueryPoolRepo { /// Functions for working with namespaces in the catalog #[async_trait] pub trait NamespaceRepo { - /// Creates the namespace in the catalog, or get the existing record by name. Then - /// constructs a namespace schema with all tables and columns under the namespace. + /// Creates the namespace in the catalog. If one by the same name already exists, an + /// error is returned. async fn create( &self, name: &str, retention_duration: &str, kafka_topic_id: i32, query_pool_id: i16, - ) -> Result; + ) -> Result; - /// Gets the namespace schema including all tables and columns. - async fn get_by_name(&self, name: &str) -> Result>; + /// Gets the namespace by its unique name. + async fn get_by_name(&self, name: &str) -> Result>; } /// Functions for working with tables in the catalog @@ -124,7 +127,7 @@ pub trait SequencerRepo { } /// Data object for a kafka topic -#[derive(Debug, Eq, PartialEq, sqlx::FromRow)] +#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct KafkaTopic { /// The id of the topic pub id: i32, @@ -133,7 +136,7 @@ pub struct KafkaTopic { } /// Data object for a query pool -#[derive(Debug, Eq, PartialEq, sqlx::FromRow)] +#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct QueryPool { /// The id of the pool pub id: i16, @@ -142,7 +145,7 @@ pub struct QueryPool { } /// Data object for a namespace -#[derive(Debug, sqlx::FromRow)] +#[derive(Debug, Clone, sqlx::FromRow)] pub struct Namespace { /// The id of the namespace pub id: i32, @@ -181,6 +184,63 @@ impl NamespaceSchema { } } + /// Gets the namespace schema including all tables and columns. + pub async fn get_by_name( + name: &str, + repo: &T, + ) -> Result> { + let namespace_repo = repo.namespace(); + let table_repo = repo.table(); + let column_repo = repo.column(); + + let namespace = namespace_repo + .get_by_name(name) + .await? + .context(NamespaceNotFoundSnafu { name })?; + + // get the columns first just in case someone else is creating schema while we're doing this. + let columns = column_repo.list_by_namespace_id(namespace.id).await?; + let tables = table_repo.list_by_namespace_id(namespace.id).await?; + + let mut namespace = Self::new( + namespace.id, + namespace.kafka_topic_id, + namespace.query_pool_id, + ); + + let mut table_id_to_schema = BTreeMap::new(); + for t in tables { + table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id))); + } + + for c in columns { + let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap(); + match ColumnType::try_from(c.column_type) { + Ok(column_type) => { + t.columns.insert( + c.name, + ColumnSchema { + id: c.id, + column_type, + }, + ); + } + _ => { + return Err(Error::UnknownColumnType { + data_type: c.column_type, + name: c.name.to_string(), + }); + } + } + } + + for (_, (table_name, schema)) in table_id_to_schema { + namespace.tables.insert(table_name, schema); + } + + Ok(Some(namespace)) + } + /// Adds tables and columns to the `NamespaceSchema`. These are created /// incrementally while validating the schema for a write and this helper /// method takes them in to add them to the schema. @@ -215,7 +275,7 @@ impl NamespaceSchema { } /// Data object for a table -#[derive(Debug, sqlx::FromRow, Eq, PartialEq)] +#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)] pub struct Table { /// The id of the table pub id: i32, @@ -252,7 +312,7 @@ impl TableSchema { } /// Data object for a column -#[derive(Debug, sqlx::FromRow, Eq, PartialEq)] +#[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)] pub struct Column { /// the column id pub id: i32, diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs index 1c9c6e0654..b2dead2df3 100644 --- a/iox_catalog/src/lib.rs +++ b/iox_catalog/src/lib.rs @@ -25,6 +25,7 @@ const SHARED_QUERY_POOL: &str = SHARED_KAFKA_TOPIC; const TIME_COLUMN: &str = "time"; pub mod interface; +pub mod mem; pub mod postgres; /// Given the lines of a write request and an in memory schema, this will validate the write diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs new file mode 100644 index 0000000000..ed2821c426 --- /dev/null +++ b/iox_catalog/src/mem.rs @@ -0,0 +1,248 @@ +//! This module implements an in-memory implementation of the iox_catalog interface. It can be +//! used for testing or for an IOx designed to run without catalog persistence. + +use crate::interface::{ + Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo, + QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo, +}; +use async_trait::async_trait; +use std::convert::TryFrom; +use std::sync::{Arc, Mutex}; + +struct MemCatalog { + collections: Mutex, +} + +struct MemCollections { + kafka_topics: Vec, + query_pools: Vec, + namespaces: Vec, + tables: Vec, + columns: Vec, + sequencers: Vec, +} + +impl RepoCollection for Arc { + fn kafka_topic(&self) -> Arc { + Self::clone(self) as Arc + } + + fn query_pool(&self) -> Arc { + Self::clone(self) as Arc + } + + fn namespace(&self) -> Arc { + Self::clone(self) as Arc + } + + fn table(&self) -> Arc { + Self::clone(self) as Arc + } + + fn column(&self) -> Arc { + Self::clone(self) as Arc + } + + fn sequencer(&self) -> Arc { + Self::clone(self) as Arc + } +} + +#[async_trait] +impl KafkaTopicRepo for MemCatalog { + async fn create_or_get(&self, name: &str) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + + let topic = match collections.kafka_topics.iter().find(|t| t.name == name) { + Some(t) => t, + None => { + let topic = KafkaTopic { + id: collections.kafka_topics.len() as i32 + 1, + name: name.to_string(), + }; + collections.kafka_topics.push(topic); + collections.kafka_topics.last().unwrap() + } + }; + + Ok(topic.clone()) + } +} + +#[async_trait] +impl QueryPoolRepo for MemCatalog { + async fn create_or_get(&self, name: &str) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + + let pool = match collections.query_pools.iter().find(|t| t.name == name) { + Some(t) => t, + None => { + let pool = QueryPool { + id: collections.query_pools.len() as i16 + 1, + name: name.to_string(), + }; + collections.query_pools.push(pool); + collections.query_pools.last().unwrap() + } + }; + + Ok(pool.clone()) + } +} + +#[async_trait] +impl NamespaceRepo for MemCatalog { + async fn create( + &self, + name: &str, + retention_duration: &str, + kafka_topic_id: i32, + query_pool_id: i16, + ) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + if collections.namespaces.iter().any(|n| n.name == name) { + return Err(Error::NameExists { + name: name.to_string(), + }); + } + + let namespace = Namespace { + id: collections.namespaces.len() as i32 + 1, + name: name.to_string(), + kafka_topic_id, + query_pool_id, + retention_duration: Some(retention_duration.to_string()), + }; + collections.namespaces.push(namespace); + Ok(collections.namespaces.last().unwrap().clone()) + } + + async fn get_by_name(&self, name: &str) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + Ok(collections + .namespaces + .iter() + .find(|n| n.name == name) + .cloned()) + } +} + +#[async_trait] +impl TableRepo for MemCatalog { + async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result
{ + let mut collections = self.collections.lock().expect("mutex poisoned"); + + let table = match collections.tables.iter().find(|t| t.name == name) { + Some(t) => t, + None => { + let table = Table { + id: collections.tables.len() as i32 + 1, + namespace_id, + name: name.to_string(), + }; + collections.tables.push(table); + collections.tables.last().unwrap() + } + }; + + Ok(table.clone()) + } + + async fn list_by_namespace_id(&self, namespace_id: i32) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + let tables: Vec<_> = collections + .tables + .iter() + .filter(|t| t.namespace_id == namespace_id) + .cloned() + .collect(); + Ok(tables) + } +} + +#[async_trait] +impl ColumnRepo for MemCatalog { + async fn create_or_get( + &self, + name: &str, + table_id: i32, + column_type: ColumnType, + ) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + + let column = match collections.columns.iter().find(|t| t.name == name) { + Some(c) => { + if column_type as i16 != c.column_type { + return Err(Error::ColumnTypeMismatch { + name: name.to_string(), + existing: ColumnType::try_from(c.column_type).unwrap().to_string(), + new: column_type.to_string(), + }); + } + + c + } + None => { + let column = Column { + id: collections.columns.len() as i32 + 1, + table_id, + name: name.to_string(), + column_type: column_type as i16, + }; + collections.columns.push(column); + collections.columns.last().unwrap() + } + }; + + Ok(column.clone()) + } + + async fn list_by_namespace_id(&self, namespace_id: i32) -> Result> { + let mut columns = vec![]; + + let collections = self.collections.lock().expect("mutex poisoned"); + for t in collections + .tables + .iter() + .filter(|t| t.namespace_id == namespace_id) + { + for c in collections.columns.iter().filter(|c| c.table_id == t.id) { + columns.push(c.clone()); + } + } + + Ok(columns) + } +} + +#[async_trait] +impl SequencerRepo for MemCatalog { + async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + + let sequencer = match collections + .sequencers + .iter() + .find(|s| s.kafka_topic_id == topic.id && s.kafka_partition == partition) + { + Some(t) => t, + None => { + let sequencer = Sequencer { + id: collections.sequencers.len() as i16 + 1, + kafka_topic_id: topic.id, + kafka_partition: partition, + min_unpersisted_sequence_number: 0, + }; + collections.sequencers.push(sequencer); + collections.sequencers.last().unwrap() + } + }; + + Ok(*sequencer) + } + + async fn list(&self) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + Ok(collections.sequencers.clone()) + } +} diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 1bb43ca80b..dd49e27996 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -1,15 +1,12 @@ //! A Postgres backed implementation of the Catalog use crate::interface::{ - Column, ColumnRepo, ColumnSchema, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, - NamespaceRepo, NamespaceSchema, QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, - SequencerRepo, Table, TableRepo, TableSchema, + Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo, + QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo, }; use async_trait::async_trait; use observability_deps::tracing::info; use sqlx::{postgres::PgPoolOptions, Executor, Pool, Postgres}; -use std::collections::BTreeMap; -use std::convert::TryFrom; use std::sync::Arc; use std::time::Duration; @@ -130,7 +127,7 @@ impl NamespaceRepo for PostgresCatalog { retention_duration: &str, kafka_topic_id: i32, query_pool_id: i16, - ) -> Result { + ) -> Result { let rec = sqlx::query_as::<_, Namespace>( r#" INSERT INTO namespace ( name, retention_duration, kafka_topic_id, query_pool_id ) @@ -156,10 +153,10 @@ RETURNING * } })?; - Ok(NamespaceSchema::new(rec.id, kafka_topic_id, query_pool_id)) + Ok(rec) } - async fn get_by_name(&self, name: &str) -> Result> { + async fn get_by_name(&self, name: &str) -> Result> { // TODO: maybe get all the data in a single call to Postgres? let rec = sqlx::query_as::<_, Namespace>( r#" @@ -175,47 +172,8 @@ SELECT * FROM namespace WHERE name = $1; } let namespace = rec.map_err(|e| Error::SqlxError { source: e })?; - // get the columns first just in case someone else is creating schema while we're doing this. - let columns = ColumnRepo::list_by_namespace_id(self, namespace.id).await?; - let tables = TableRepo::list_by_namespace_id(self, namespace.id).await?; - let mut namespace = NamespaceSchema::new( - namespace.id, - namespace.kafka_topic_id, - namespace.query_pool_id, - ); - - let mut table_id_to_schema = BTreeMap::new(); - for t in tables { - table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id))); - } - - for c in columns { - let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap(); - match ColumnType::try_from(c.column_type) { - Ok(column_type) => { - t.columns.insert( - c.name, - ColumnSchema { - id: c.id, - column_type, - }, - ); - } - _ => { - return Err(Error::UnknownColumnType { - data_type: c.column_type, - name: c.name.to_string(), - }); - } - } - } - - for (_, (table_name, schema)) in table_id_to_schema { - namespace.tables.insert(table_name, schema); - } - - return Ok(Some(namespace)); + Ok(Some(namespace)) } } @@ -390,9 +348,12 @@ fn is_fk_violation(e: &sqlx::Error) -> bool { #[cfg(test)] mod tests { use super::*; - use crate::{create_or_get_default_records, validate_or_insert_schema}; + use crate::{ + create_or_get_default_records, interface::NamespaceSchema, validate_or_insert_schema, + }; use futures::{stream::FuturesOrdered, StreamExt}; use influxdb_line_protocol::parse_lines; + use std::collections::BTreeMap; use std::env; // Helper macro to skip tests if TEST_INTEGRATION and the AWS environment variables are not set. @@ -535,7 +496,7 @@ m2,t3=b f1=true 1 let new_schema = new_schema.unwrap(); // ensure new schema is in the db - let schema_from_db = NamespaceRepo::get_by_name(postgres.as_ref(), "asdf") + let schema_from_db = NamespaceSchema::get_by_name("asdf", &postgres) .await .unwrap() .unwrap(); @@ -560,7 +521,7 @@ new_measurement,t9=a f10=true 1 ColumnType::Tag, new_table.columns.get("t9").unwrap().column_type ); - let schema = NamespaceRepo::get_by_name(postgres.as_ref(), "asdf") + let schema = NamespaceSchema::get_by_name("asdf", &postgres) .await .unwrap() .unwrap(); @@ -585,7 +546,7 @@ m1,new_tag=c new_field=1i 2 ColumnType::Tag, table.columns.get("new_tag").unwrap().column_type ); - let schema = NamespaceRepo::get_by_name(postgres.as_ref(), "asdf") + let schema = NamespaceSchema::get_by_name("asdf", &postgres) .await .unwrap() .unwrap(); From dfe95e1a564a2e2e72b92a66a5559e379a047e51 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Mon, 17 Jan 2022 14:46:10 -0500 Subject: [PATCH 02/32] refactor: make postgres and mem catalog implementations public --- iox_catalog/src/mem.rs | 21 +++++++++- iox_catalog/src/postgres.rs | 81 ++++++++++++++++++++----------------- 2 files changed, 64 insertions(+), 38 deletions(-) diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index ed2821c426..398a16210d 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -7,12 +7,31 @@ use crate::interface::{ }; use async_trait::async_trait; use std::convert::TryFrom; +use std::fmt::Formatter; use std::sync::{Arc, Mutex}; -struct MemCatalog { +/// In-memory catalog that implements the `RepoCollection` and individual repo traits fromt +/// the catalog interface. +#[derive(Default)] +pub struct MemCatalog { collections: Mutex, } +impl MemCatalog { + /// return new initialized `MemCatalog` + pub fn new() -> Self { + Self::default() + } +} + +impl std::fmt::Debug for MemCatalog { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let c = self.collections.lock().expect("mutex poisoned"); + write!(f, "MemCatalog[ {:?} ]", c) + } +} + +#[derive(Default, Debug)] struct MemCollections { kafka_topics: Vec, query_pools: Vec, diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index dd49e27996..e5bcd20f4e 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -16,41 +16,46 @@ const IDLE_TIMEOUT: Duration = Duration::from_secs(500); #[allow(dead_code)] const SCHEMA_NAME: &str = "iox_catalog"; -/// Connect to the catalog store. -pub async fn connect_catalog_store( - app_name: &'static str, - schema_name: &'static str, - dsn: &str, -) -> Result, sqlx::Error> { - let pool = PgPoolOptions::new() - .min_connections(1) - .max_connections(MAX_CONNECTIONS) - .connect_timeout(CONNECT_TIMEOUT) - .idle_timeout(IDLE_TIMEOUT) - .test_before_acquire(true) - .after_connect(move |c| { - Box::pin(async move { - // Tag the connection with the provided application name. - c.execute(sqlx::query("SET application_name = '$1';").bind(app_name)) - .await?; - let search_path_query = format!("SET search_path TO {}", schema_name); - c.execute(sqlx::query(&search_path_query)).await?; - - Ok(()) - }) - }) - .connect(dsn) - .await?; - - // Log a connection was successfully established and include the application - // name for cross-correlation between Conductor logs & database connections. - info!(application_name=%app_name, "connected to catalog store"); - - Ok(pool) +/// In-memory catalog that implements the `RepoCollection` and individual repo traits. +#[derive(Debug)] +pub struct PostgresCatalog { + pool: Pool, } -struct PostgresCatalog { - pool: Pool, +impl PostgresCatalog { + /// Connect to the catalog store. + pub async fn connect( + app_name: &'static str, + schema_name: &'static str, + dsn: &str, + ) -> Result { + let pool = PgPoolOptions::new() + .min_connections(1) + .max_connections(MAX_CONNECTIONS) + .connect_timeout(CONNECT_TIMEOUT) + .idle_timeout(IDLE_TIMEOUT) + .test_before_acquire(true) + .after_connect(move |c| { + Box::pin(async move { + // Tag the connection with the provided application name. + c.execute(sqlx::query("SET application_name = '$1';").bind(app_name)) + .await?; + let search_path_query = format!("SET search_path TO {}", schema_name); + c.execute(sqlx::query(&search_path_query)).await?; + + Ok(()) + }) + }) + .connect(dsn) + .await + .map_err(|e| Error::SqlxError { source: e })?; + + // Log a connection was successfully established and include the application + // name for cross-correlation between Conductor logs & database connections. + info!(application_name=%app_name, "connected to catalog store"); + + Ok(Self { pool }) + } } impl RepoCollection for Arc { @@ -348,6 +353,7 @@ fn is_fk_violation(e: &sqlx::Error) -> bool { #[cfg(test)] mod tests { use super::*; + use crate::postgres::PostgresCatalog; use crate::{ create_or_get_default_records, interface::NamespaceSchema, validate_or_insert_schema, }; @@ -395,10 +401,11 @@ mod tests { async fn setup_db() -> (Arc, KafkaTopic, QueryPool) { let dsn = std::env::var("DATABASE_URL").unwrap(); - let pool = connect_catalog_store("test", SCHEMA_NAME, &dsn) - .await - .unwrap(); - let postgres_catalog = Arc::new(PostgresCatalog { pool }); + let postgres_catalog = Arc::new( + PostgresCatalog::connect("test", SCHEMA_NAME, &dsn) + .await + .unwrap(), + ); let (kafka_topic, query_pool, _) = create_or_get_default_records(2, &postgres_catalog) .await From ef336b46592f4ca5ae36104e6d804e00dafcb2ad Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Mon, 17 Jan 2022 15:38:03 -0500 Subject: [PATCH 03/32] feat: add ingester crate and a few basic data structures for its data lifecycle --- Cargo.lock | 7 ++ Cargo.toml | 1 + ingester/Cargo.toml | 8 +++ ingester/src/data.rs | 149 +++++++++++++++++++++++++++++++++++++++++++ ingester/src/lib.rs | 5 ++ 5 files changed, 170 insertions(+) create mode 100644 ingester/Cargo.toml create mode 100644 ingester/src/data.rs create mode 100644 ingester/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 02e85ba6b5..be33e2e8c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1853,6 +1853,13 @@ dependencies = [ "workspace-hack", ] +[[package]] +name = "ingester" +version = "0.1.0" +dependencies = [ + "mutable_batch", +] + [[package]] name = "instant" version = "0.1.12" diff --git a/Cargo.toml b/Cargo.toml index 8612b307da..4c6cafda3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ members = [ "influxdb_storage_client", "influxdb_tsm", "influxdb2_client", + "ingester", "internal_types", "iox_catalog", "iox_data_generator", diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml new file mode 100644 index 0000000000..1412feb6a3 --- /dev/null +++ b/ingester/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "ingester" +version = "0.1.0" +authors = ["Nga Tran "] +edition = "2021" + +[dependencies] +mutable_batch = { path = "../mutable_batch" } \ No newline at end of file diff --git a/ingester/src/data.rs b/ingester/src/data.rs new file mode 100644 index 0000000000..4b5f1c1185 --- /dev/null +++ b/ingester/src/data.rs @@ -0,0 +1,149 @@ + + +//! Data for the lifecycle of the ingeter +//! + +use std::{sync::Arc, collections::BTreeMap}; + +use mutable_batch::MutableBatch; + +// ┌──────────────┐ +// │Ingester Data │ +// │ (in memory) │ +// └──────────────┘ +// │ +// ┌──────────────┼───────────────┐ +// ▼ ▼ ▼ +// ┌───────────┐ ┌────────────┐ +// │Sequencer 1│ ... │Sequencer m │ Sequencers +// └───────────┘ └────────────┘ a map of sequencer_id to Namespaces +// │ │ +// ┌──────────────┼─────────────┐ │ +// ▼ ▼ ▼ ▼ +// ┌────────────┐ ┌───────────┐ Namespaces +// │Namespace 1 │ ... │Namespace n│ ... a map of namespace_name to Tables +// └────────────┘ └───────────┘ +// │ │ +// ┌──────────────┼──────────────┐ │ +// ▼ ▼ ▼ ▼ +// ┌────────────┐ ┌────────────┐ Tables +// │ Table 1 │ ... │ Table p │ ... a map of table_name to Partitions +// └────────────┘ └────────────┘ +// │ │ +// │ ┌──────────────┼──────────────┐ +// ▼ ▼ ▼ ▼ +// ┌────────────┐ ┌────────────┐ Partitions +// ... │Partition 1 │ ... │Partition q │ a map of partition_key to PartitionData +// │(2021-12-10)│ │(2021-12-20)│ +// └────────────┘ └──────┬─────┘ +// │ │ +// ┌───────────┬────────▼────┬─────────────┐ │ +// │ │ │ │ ▼ +// ▼ ▼ ▼ ▼ +// ┌──────────┐┌───────────┐ ┌───────────┐ ┌───────────┐ ... +// │ Writing ││ Snaphot │ │Persisting │ │ Persisted │ PartitionData: a struct of 4 items +// │Partition ││ Partition │ │ Partition │ │ Partition │ . A `Writing Partition Batch` +// │ Batch ││ Batch 1 │ │ Batch 1 │ │ Batch 1 │ . A vector of `Snapshot Partition Batches` +// └──────────┘├───────────┤ ├───────────┤ ├───────────┤ . A vector of `Persisting Partition Batches` +// │ ... │ │ ... │ │ ... │ . A vector of `Persisted Partition batches` +// │ │ │ │ │ │ +// ├───────────┤ ├───────────┤ ├───────────┤ 1:1 map between `Snapshot` +// │ Snapshot │ │Persisting │ │ Persisted │ and `Persisting` Partition Batches +// │ Partition │ │ Partition │ │ Partition │ +// │ Batch k │ │ Batch k │ │ Batch i │ +// └───────────┘ └───────────┘ └───────────┘ + +// All sequencers aiisgned to this Ingester +#[derive(Debug, Clone)] +pub struct Sequencers { + // A map between a sequencer id to its corresponding Namespaces. + // A sequencer id is a `kafka_partittion`, a i32 defined in iox_catalog's Sequencer and + // represents a shard of data of a Table of a Namesapce. Namespace is equivalent to + // a customer db (aka an org's bucket). Depending on the comfiguration of sharding a table, + // either full data or set of rows of data of the table are included in a shard. + sequencers : BTreeMap>, +} + +// A Namespace and all of its tables of a sequencer +#[derive(Debug, Clone)] +pub struct Namespace { + // Name of the namespace which is unique and represents a customer db. + name: String, + + // Tables of this namesapce + tables : Vec
, +} + +// A Table and all of its partittion +#[derive(Debug, Clone)] +pub struct Table { + // table name + name: String, + + // A map of partittion_key to its corresponding partition + partitions : BTreeMap, +} + +// A Partittion and all of its in-memory data batches +// +// Stages of a batch of a partition: +// . A partition has only one `Writing Batch`. When is it big or +// old enough, defined by IngesterPersistenceSettings, it will +// be put to `Snaphot Batch` and also copied to `Pesisting Batch`. +// The new and empty Wrtiting Batch will be created for accpeting new writes +// . Snapshot and Persisting batches are 1:1 mapped at all times. Snapshot ones are +// immutable and used for querying. Persisting ones are modified to sort, +// dedupilcate, and apply tombstone and then persited to parquet files. +// While many batches can be persisted at the same time, a batch is only marked +// in the catalog to be persisted after the batches before +// its in the queue are marked persisted. +// . After the batch are marked persisted in the catalog, its will be removed +// from Sanpshot and Persisting and put in Persisted. The Persisted ones +// will get evicted based on IngesterPersistenceSettings. +// ┌───────────────────┐ +// │ Persisting │ +// │ │ +// │ ┌───────────────┐ │ +// ┌────────────┐ │ │ Snapshot │ │ ┌────────────┐ +// │ Writing │───────▶│ └───────────────┘ │───────▶│ Persisted │ +// └────────────┘ │ ┌───────────────┐ │ └────────────┘ +// │ │ Persiting │ │ +// │ └───────────────┘ │ +// └───────────────────┘ +// +#[derive(Debug, Clone)] +pub struct Partition { + partition_key: String, + + // Writing batch that accepts writes to this partition + writing_batch: PartitionBatch, + + // Queue of batches that are immutable and used for querying only. + // The batches are queue contiguously in thier data arrival time + snapshot_batches: Vec, // todo: is Vec good enough for hanlding queue? + + // Queue of persisting batches which is a one on one mapping with the snapshot_batches. + // Data of these batches will be modified to sort, dedupilcate, and apply tombstone and then + // persited to parquet files. While many batches can be persisted at the same time, + // a batch is only marked in the catalog to be persisted after the batches before + // its in the queue are marked persisted + pesisting_batched: Vec, + + // Persisted batches that are not yet evicted from the in-memory. + // These are batches moved from persiting_batches after they are fully persisted and marked + // so in the catalog + pesisted_batched: Vec, + +} + +// A PartitionBatch of contiguous in arrival time of writes +// todo & question: do we want to call this Chunk instead? +#[derive(Debug, Clone)] +pub struct PartitionBatch { + // To keep the PartitionBtach in order of their + // arrived data, we may need this auto created batch id + batch_id: i32, + + // Data of this partition batch + data: Arc, +} diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs new file mode 100644 index 0000000000..99c92b1c6f --- /dev/null +++ b/ingester/src/lib.rs @@ -0,0 +1,5 @@ +//! IOx ingester implementation. +//! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit# +//! + +pub mod data; From 4f876000819ca2fd8c949bdc225fd8afae5d974a Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Mon, 17 Jan 2022 17:20:11 -0500 Subject: [PATCH 04/32] chore: make iox_catalog tests generic for any backend implementation --- iox_catalog/src/interface.rs | 188 ++++++++++++++++++++++++++++++++- iox_catalog/src/lib.rs | 96 +++++++++++++++++ iox_catalog/src/mem.rs | 29 +++++- iox_catalog/src/postgres.rs | 197 +++-------------------------------- 4 files changed, 326 insertions(+), 184 deletions(-) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 7da8ecce29..d7a030ffba 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -124,6 +124,9 @@ pub trait SequencerRepo { /// list all sequencers async fn list(&self) -> Result>; + + /// list all sequencers for a given kafka topic + async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result>; } /// Data object for a kafka topic @@ -145,7 +148,7 @@ pub struct QueryPool { } /// Data object for a namespace -#[derive(Debug, Clone, sqlx::FromRow)] +#[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct Namespace { /// The id of the namespace pub id: i32, @@ -450,3 +453,186 @@ pub struct Sequencer { /// lower than this must have been persisted to Parquet. pub min_unpersisted_sequence_number: i64, } + +#[cfg(test)] +pub(crate) mod test_helpers { + use super::*; + use futures::{stream::FuturesOrdered, StreamExt}; + + pub(crate) async fn test_repo(new_repo: F) + where + T: RepoCollection + Send + Sync, + F: Fn() -> T + Send + Sync, + { + test_kafka_topic(&new_repo()).await; + test_query_pool(&new_repo()).await; + test_namespace(&new_repo()).await; + test_table(&new_repo()).await; + test_column(&new_repo()).await; + test_sequencer(&new_repo()).await; + } + + async fn test_kafka_topic(repo: &T) { + let kafka_repo = repo.kafka_topic(); + let k = kafka_repo.create_or_get("foo").await.unwrap(); + assert!(k.id > 0); + assert_eq!(k.name, "foo"); + let k2 = kafka_repo.create_or_get("foo").await.unwrap(); + assert_eq!(k, k2); + } + + async fn test_query_pool(repo: &T) { + let query_repo = repo.query_pool(); + let q = query_repo.create_or_get("foo").await.unwrap(); + assert!(q.id > 0); + assert_eq!(q.name, "foo"); + let q2 = query_repo.create_or_get("foo").await.unwrap(); + assert_eq!(q, q2); + } + + async fn test_namespace(repo: &T) { + let namespace_repo = repo.namespace(); + let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap(); + let pool = repo.query_pool().create_or_get("foo").await.unwrap(); + + let namespace_name = "test_namespace"; + let namespace = namespace_repo + .create(namespace_name, "inf", kafka.id, pool.id) + .await + .unwrap(); + assert!(namespace.id > 0); + assert_eq!(namespace.name, namespace_name); + + let conflict = namespace_repo + .create(namespace_name, "inf", kafka.id, pool.id) + .await; + assert!(matches!( + conflict.unwrap_err(), + Error::NameExists { name: _ } + )); + + let found = namespace_repo + .get_by_name(namespace_name) + .await + .unwrap() + .expect("namespace should be there"); + assert_eq!(namespace, found); + } + + async fn test_table(repo: &T) { + let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap(); + let pool = repo.query_pool().create_or_get("foo").await.unwrap(); + let namespace = repo + .namespace() + .create("namespace_table_test", "inf", kafka.id, pool.id) + .await + .unwrap(); + + // test we can create or get a table + let table_repo = repo.table(); + let t = table_repo + .create_or_get("test_table", namespace.id) + .await + .unwrap(); + let tt = table_repo + .create_or_get("test_table", namespace.id) + .await + .unwrap(); + assert!(t.id > 0); + assert_eq!(t, tt); + + let tables = table_repo.list_by_namespace_id(namespace.id).await.unwrap(); + assert_eq!(vec![t], tables); + } + + async fn test_column(repo: &T) { + let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap(); + let pool = repo.query_pool().create_or_get("foo").await.unwrap(); + let namespace = repo + .namespace() + .create("namespace_column_test", "inf", kafka.id, pool.id) + .await + .unwrap(); + let table = repo + .table() + .create_or_get("test_table", namespace.id) + .await + .unwrap(); + + // test we can create or get a column + let column_repo = repo.column(); + let c = column_repo + .create_or_get("column_test", table.id, ColumnType::Tag) + .await + .unwrap(); + let cc = column_repo + .create_or_get("column_test", table.id, ColumnType::Tag) + .await + .unwrap(); + assert!(c.id > 0); + assert_eq!(c, cc); + + // test that attempting to create an already defined column of a different type returns error + let err = column_repo + .create_or_get("column_test", table.id, ColumnType::U64) + .await + .expect_err("should error with wrong column type"); + assert!(matches!( + err, + Error::ColumnTypeMismatch { + name: _, + existing: _, + new: _ + } + )); + + // test that we can create a column of the same name under a different table + let table2 = repo + .table() + .create_or_get("test_table_2", namespace.id) + .await + .unwrap(); + let ccc = column_repo + .create_or_get("column_test", table2.id, ColumnType::U64) + .await + .unwrap(); + assert_ne!(c, ccc); + + let columns = column_repo + .list_by_namespace_id(namespace.id) + .await + .unwrap(); + assert_eq!(vec![c, ccc], columns); + } + + async fn test_sequencer(repo: &T) { + let kafka = repo + .kafka_topic() + .create_or_get("sequencer_test") + .await + .unwrap(); + let sequencer_repo = repo.sequencer(); + + // Create 10 sequencers + let created = (1..=10) + .map(|partition| sequencer_repo.create_or_get(&kafka, partition)) + .collect::>() + .map(|v| { + let v = v.expect("failed to create sequencer"); + (v.id, v) + }) + .collect::>() + .await; + + // List them and assert they match + let listed = sequencer_repo + .list_by_kafka_topic(&kafka) + .await + .expect("failed to list sequencers") + .into_iter() + .map(|v| (v.id, v)) + .collect::>(); + + assert_eq!(created, listed); + } +} diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs index b2dead2df3..fedae0197e 100644 --- a/iox_catalog/src/lib.rs +++ b/iox_catalog/src/lib.rs @@ -196,3 +196,99 @@ pub async fn create_or_get_default_records( Ok((kafka_topic, query_pool, sequencers)) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::mem::MemCatalog; + use influxdb_line_protocol::parse_lines; + use std::sync::Arc; + + #[tokio::test] + async fn test_validate_or_insert_schema() { + let repo = Arc::new(MemCatalog::new()); + let (kafka_topic, query_pool, _) = create_or_get_default_records(2, &repo).await.unwrap(); + + let namespace_name = "validate_schema"; + // now test with a new namespace + let namespace = repo + .namespace() + .create(namespace_name, "inf", kafka_topic.id, query_pool.id) + .await + .unwrap(); + let data = r#" +m1,t1=a,t2=b f1=2i,f2=2.0 1 +m1,t1=a f1=3i 2 +m2,t3=b f1=true 1 + "#; + + // test that new schema gets returned + let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect(); + let schema = Arc::new(NamespaceSchema::new( + namespace.id, + namespace.kafka_topic_id, + namespace.query_pool_id, + )); + let new_schema = validate_or_insert_schema(lines, &schema, &repo) + .await + .unwrap(); + let new_schema = new_schema.unwrap(); + + // ensure new schema is in the db + let schema_from_db = NamespaceSchema::get_by_name(namespace_name, &repo) + .await + .unwrap() + .unwrap(); + assert_eq!(new_schema, schema_from_db); + + // test that a new table will be created + let data = r#" +m1,t1=c f1=1i 2 +new_measurement,t9=a f10=true 1 + "#; + let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect(); + let new_schema = validate_or_insert_schema(lines, &schema_from_db, &repo) + .await + .unwrap() + .unwrap(); + let new_table = new_schema.tables.get("new_measurement").unwrap(); + assert_eq!( + ColumnType::Bool, + new_table.columns.get("f10").unwrap().column_type + ); + assert_eq!( + ColumnType::Tag, + new_table.columns.get("t9").unwrap().column_type + ); + let schema = NamespaceSchema::get_by_name(namespace_name, &repo) + .await + .unwrap() + .unwrap(); + assert_eq!(new_schema, schema); + + // test that a new column for an existing table will be created + // test that a new table will be created + let data = r#" +m1,new_tag=c new_field=1i 2 + "#; + let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect(); + let new_schema = validate_or_insert_schema(lines, &schema, &repo) + .await + .unwrap() + .unwrap(); + let table = new_schema.tables.get("m1").unwrap(); + assert_eq!( + ColumnType::I64, + table.columns.get("new_field").unwrap().column_type + ); + assert_eq!( + ColumnType::Tag, + table.columns.get("new_tag").unwrap().column_type + ); + let schema = NamespaceSchema::get_by_name(namespace_name, &repo) + .await + .unwrap() + .unwrap(); + assert_eq!(new_schema, schema); + } +} diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 398a16210d..a9d27afad2 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -189,7 +189,11 @@ impl ColumnRepo for MemCatalog { ) -> Result { let mut collections = self.collections.lock().expect("mutex poisoned"); - let column = match collections.columns.iter().find(|t| t.name == name) { + let column = match collections + .columns + .iter() + .find(|t| t.name == name && t.table_id == table_id) + { Some(c) => { if column_type as i16 != c.column_type { return Err(Error::ColumnTypeMismatch { @@ -264,4 +268,27 @@ impl SequencerRepo for MemCatalog { let collections = self.collections.lock().expect("mutex poisoned"); Ok(collections.sequencers.clone()) } + + async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + let sequencers: Vec<_> = collections + .sequencers + .iter() + .filter(|s| s.kafka_topic_id == topic.id) + .cloned() + .collect(); + Ok(sequencers) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_mem_repo() { + let f = || Arc::new(MemCatalog::new()); + + crate::interface::test_helpers::test_repo(f).await; + } } diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index e5bcd20f4e..7efdcdc7d8 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -315,6 +315,14 @@ impl SequencerRepo for PostgresCatalog { .await .map_err(|e| Error::SqlxError { source: e }) } + + async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result> { + sqlx::query_as::<_, Sequencer>(r#"SELECT * FROM sequencer WHERE kafka_topic_id = $1;"#) + .bind(&topic.id) // $1 + .fetch_all(&self.pool) + .await + .map_err(|e| Error::SqlxError { source: e }) + } } /// The error code returned by Postgres for a unique constraint violation. @@ -353,13 +361,6 @@ fn is_fk_violation(e: &sqlx::Error) -> bool { #[cfg(test)] mod tests { use super::*; - use crate::postgres::PostgresCatalog; - use crate::{ - create_or_get_default_records, interface::NamespaceSchema, validate_or_insert_schema, - }; - use futures::{stream::FuturesOrdered, StreamExt}; - use influxdb_line_protocol::parse_lines; - use std::collections::BTreeMap; use std::env; // Helper macro to skip tests if TEST_INTEGRATION and the AWS environment variables are not set. @@ -399,196 +400,28 @@ mod tests { }}; } - async fn setup_db() -> (Arc, KafkaTopic, QueryPool) { + async fn setup_db() -> Arc { let dsn = std::env::var("DATABASE_URL").unwrap(); - let postgres_catalog = Arc::new( + Arc::new( PostgresCatalog::connect("test", SCHEMA_NAME, &dsn) .await .unwrap(), - ); - - let (kafka_topic, query_pool, _) = create_or_get_default_records(2, &postgres_catalog) - .await - .unwrap(); - (postgres_catalog, kafka_topic, query_pool) + ) } #[tokio::test] - async fn test_catalog() { + async fn test_repo() { // If running an integration test on your laptop, this requires that you have Postgres // running and that you've done the sqlx migrations. See the README in this crate for // info to set it up. maybe_skip_integration!(); - let (postgres, kafka_topic, query_pool) = setup_db().await; + let postgres = setup_db().await; clear_schema(&postgres.pool).await; - let namespace = NamespaceRepo::create(postgres.as_ref(), "foo", "inf", 0, 0).await; - assert!(matches!( - namespace.unwrap_err(), - Error::ForeignKeyViolation { source: _ } - )); - let namespace = NamespaceRepo::create( - postgres.as_ref(), - "foo", - "inf", - kafka_topic.id, - query_pool.id, - ) - .await - .unwrap(); - assert!(namespace.id > 0); - assert_eq!(namespace.kafka_topic_id, kafka_topic.id); - assert_eq!(namespace.query_pool_id, query_pool.id); + let f = || Arc::clone(&postgres); - // test that we can create or get a table - let t = TableRepo::create_or_get(postgres.as_ref(), "foo", namespace.id) - .await - .unwrap(); - let tt = TableRepo::create_or_get(postgres.as_ref(), "foo", namespace.id) - .await - .unwrap(); - assert!(t.id > 0); - assert_eq!(t, tt); - - // test that we can craete or get a column - let c = ColumnRepo::create_or_get(postgres.as_ref(), "foo", t.id, ColumnType::I64) - .await - .unwrap(); - let cc = ColumnRepo::create_or_get(postgres.as_ref(), "foo", t.id, ColumnType::I64) - .await - .unwrap(); - assert!(c.id > 0); - assert_eq!(c, cc); - - // test that attempting to create an already defined column of a different type returns error - let err = ColumnRepo::create_or_get(postgres.as_ref(), "foo", t.id, ColumnType::F64) - .await - .expect_err("should error with wrong column type"); - assert!(matches!( - err, - Error::ColumnTypeMismatch { - name: _, - existing: _, - new: _ - } - )); - - // now test with a new namespace - let namespace = NamespaceRepo::create( - postgres.as_ref(), - "asdf", - "inf", - kafka_topic.id, - query_pool.id, - ) - .await - .unwrap(); - let data = r#" -m1,t1=a,t2=b f1=2i,f2=2.0 1 -m1,t1=a f1=3i 2 -m2,t3=b f1=true 1 - "#; - - // test that new schema gets returned - let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect(); - let schema = Arc::new(NamespaceSchema::new( - namespace.id, - namespace.kafka_topic_id, - namespace.query_pool_id, - )); - let new_schema = validate_or_insert_schema(lines, &schema, &postgres) - .await - .unwrap(); - let new_schema = new_schema.unwrap(); - - // ensure new schema is in the db - let schema_from_db = NamespaceSchema::get_by_name("asdf", &postgres) - .await - .unwrap() - .unwrap(); - assert_eq!(new_schema, schema_from_db); - - // test that a new table will be created - let data = r#" -m1,t1=c f1=1i 2 -new_measurement,t9=a f10=true 1 - "#; - let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect(); - let new_schema = validate_or_insert_schema(lines, &schema_from_db, &postgres) - .await - .unwrap() - .unwrap(); - let new_table = new_schema.tables.get("new_measurement").unwrap(); - assert_eq!( - ColumnType::Bool, - new_table.columns.get("f10").unwrap().column_type - ); - assert_eq!( - ColumnType::Tag, - new_table.columns.get("t9").unwrap().column_type - ); - let schema = NamespaceSchema::get_by_name("asdf", &postgres) - .await - .unwrap() - .unwrap(); - assert_eq!(new_schema, schema); - - // test that a new column for an existing table will be created - // test that a new table will be created - let data = r#" -m1,new_tag=c new_field=1i 2 - "#; - let lines: Vec<_> = parse_lines(data).map(|l| l.unwrap()).collect(); - let new_schema = validate_or_insert_schema(lines, &schema, &postgres) - .await - .unwrap() - .unwrap(); - let table = new_schema.tables.get("m1").unwrap(); - assert_eq!( - ColumnType::I64, - table.columns.get("new_field").unwrap().column_type - ); - assert_eq!( - ColumnType::Tag, - table.columns.get("new_tag").unwrap().column_type - ); - let schema = NamespaceSchema::get_by_name("asdf", &postgres) - .await - .unwrap() - .unwrap(); - assert_eq!(new_schema, schema); - } - - #[tokio::test] - async fn test_sequencers() { - maybe_skip_integration!(); - - let (postgres, kafka_topic, _query_pool) = setup_db().await; - clear_schema(&postgres.pool).await; - - // Create 10 sequencers - let created = (1..=10) - .map(|partition| { - SequencerRepo::create_or_get(postgres.as_ref(), &kafka_topic, partition) - }) - .collect::>() - .map(|v| { - let v = v.expect("failed to create sequencer"); - (v.id, v) - }) - .collect::>() - .await; - - // List them and assert they match - let listed = SequencerRepo::list(postgres.as_ref()) - .await - .expect("failed to list sequencers") - .into_iter() - .map(|v| (v.id, v)) - .collect::>(); - - assert_eq!(created, listed); + crate::interface::test_helpers::test_repo(f).await; } async fn clear_schema(pool: &Pool) { From 5e464727d141dfcfbaa3ecd4b821bdd4d3e0c6cd Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Tue, 18 Jan 2022 10:09:16 -0500 Subject: [PATCH 05/32] refactor: make get_schema_by_name bare function --- iox_catalog/src/interface.rs | 117 ++++++++++++++++++----------------- iox_catalog/src/lib.rs | 7 ++- iox_catalog/src/mem.rs | 2 +- 3 files changed, 64 insertions(+), 62 deletions(-) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index d7a030ffba..828c18a525 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -163,7 +163,8 @@ pub struct Namespace { pub query_pool_id: i16, } -/// Schema collection for a namespace +/// Schema collection for a namespace. This is an in-memory object useful for a schema +/// cache. #[derive(Debug, Clone, Eq, PartialEq)] pub struct NamespaceSchema { /// the namespace id @@ -187,63 +188,6 @@ impl NamespaceSchema { } } - /// Gets the namespace schema including all tables and columns. - pub async fn get_by_name( - name: &str, - repo: &T, - ) -> Result> { - let namespace_repo = repo.namespace(); - let table_repo = repo.table(); - let column_repo = repo.column(); - - let namespace = namespace_repo - .get_by_name(name) - .await? - .context(NamespaceNotFoundSnafu { name })?; - - // get the columns first just in case someone else is creating schema while we're doing this. - let columns = column_repo.list_by_namespace_id(namespace.id).await?; - let tables = table_repo.list_by_namespace_id(namespace.id).await?; - - let mut namespace = Self::new( - namespace.id, - namespace.kafka_topic_id, - namespace.query_pool_id, - ); - - let mut table_id_to_schema = BTreeMap::new(); - for t in tables { - table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id))); - } - - for c in columns { - let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap(); - match ColumnType::try_from(c.column_type) { - Ok(column_type) => { - t.columns.insert( - c.name, - ColumnSchema { - id: c.id, - column_type, - }, - ); - } - _ => { - return Err(Error::UnknownColumnType { - data_type: c.column_type, - name: c.name.to_string(), - }); - } - } - } - - for (_, (table_name, schema)) in table_id_to_schema { - namespace.tables.insert(table_name, schema); - } - - Ok(Some(namespace)) - } - /// Adds tables and columns to the `NamespaceSchema`. These are created /// incrementally while validating the schema for a write and this helper /// method takes them in to add them to the schema. @@ -277,6 +221,63 @@ impl NamespaceSchema { } } +/// Gets the namespace schema including all tables and columns. +pub async fn get_schema_by_name( + name: &str, + repo: &T, +) -> Result> { + let namespace_repo = repo.namespace(); + let table_repo = repo.table(); + let column_repo = repo.column(); + + let namespace = namespace_repo + .get_by_name(name) + .await? + .context(NamespaceNotFoundSnafu { name })?; + + // get the columns first just in case someone else is creating schema while we're doing this. + let columns = column_repo.list_by_namespace_id(namespace.id).await?; + let tables = table_repo.list_by_namespace_id(namespace.id).await?; + + let mut namespace = NamespaceSchema::new( + namespace.id, + namespace.kafka_topic_id, + namespace.query_pool_id, + ); + + let mut table_id_to_schema = BTreeMap::new(); + for t in tables { + table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id))); + } + + for c in columns { + let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap(); + match ColumnType::try_from(c.column_type) { + Ok(column_type) => { + t.columns.insert( + c.name, + ColumnSchema { + id: c.id, + column_type, + }, + ); + } + _ => { + return Err(Error::UnknownColumnType { + data_type: c.column_type, + name: c.name.to_string(), + }); + } + } + } + + for (_, (table_name, schema)) in table_id_to_schema { + namespace.tables.insert(table_name, schema); + } + + Ok(Some(namespace)) +} + /// Data object for a table #[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)] pub struct Table { diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs index fedae0197e..96ad9e8b8d 100644 --- a/iox_catalog/src/lib.rs +++ b/iox_catalog/src/lib.rs @@ -200,6 +200,7 @@ pub async fn create_or_get_default_records( #[cfg(test)] mod tests { use super::*; + use crate::interface::get_schema_by_name; use crate::mem::MemCatalog; use influxdb_line_protocol::parse_lines; use std::sync::Arc; @@ -235,7 +236,7 @@ m2,t3=b f1=true 1 let new_schema = new_schema.unwrap(); // ensure new schema is in the db - let schema_from_db = NamespaceSchema::get_by_name(namespace_name, &repo) + let schema_from_db = get_schema_by_name(namespace_name, &repo) .await .unwrap() .unwrap(); @@ -260,7 +261,7 @@ new_measurement,t9=a f10=true 1 ColumnType::Tag, new_table.columns.get("t9").unwrap().column_type ); - let schema = NamespaceSchema::get_by_name(namespace_name, &repo) + let schema = get_schema_by_name(namespace_name, &repo) .await .unwrap() .unwrap(); @@ -285,7 +286,7 @@ m1,new_tag=c new_field=1i 2 ColumnType::Tag, table.columns.get("new_tag").unwrap().column_type ); - let schema = NamespaceSchema::get_by_name(namespace_name, &repo) + let schema = get_schema_by_name(namespace_name, &repo) .await .unwrap() .unwrap(); diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index a9d27afad2..1dfaa687da 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -10,7 +10,7 @@ use std::convert::TryFrom; use std::fmt::Formatter; use std::sync::{Arc, Mutex}; -/// In-memory catalog that implements the `RepoCollection` and individual repo traits fromt +/// In-memory catalog that implements the `RepoCollection` and individual repo traits from /// the catalog interface. #[derive(Default)] pub struct MemCatalog { From 40cac21e21e7c1c4a528450abfb89e9c9e783809 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Tue, 18 Jan 2022 12:42:10 -0500 Subject: [PATCH 06/32] refactor: change all ids in catalog to their own types --- iox_catalog/src/interface.rs | 154 +++++++++++++++++++++++++++-------- iox_catalog/src/lib.rs | 8 +- iox_catalog/src/mem.rs | 29 +++---- iox_catalog/src/postgres.rs | 17 ++-- 4 files changed, 150 insertions(+), 58 deletions(-) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 828c18a525..f693bd238e 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -41,6 +41,96 @@ pub enum Error { /// A specialized `Error` for Catalog errors pub type Result = std::result::Result; +/// Unique ID for a `Namespace` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct NamespaceId(i32); + +#[allow(missing_docs)] +impl NamespaceId { + pub fn new(v: i32) -> Self { + Self(v) + } + pub fn get(&self) -> i32 { + self.0 + } +} + +/// Unique ID for a `KafkaTopic` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct KafkaTopicId(i32); + +#[allow(missing_docs)] +impl KafkaTopicId { + pub fn new(v: i32) -> Self { + Self(v) + } + pub fn get(&self) -> i32 { + self.0 + } +} + +/// Unique ID for a `QueryPool` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct QueryPoolId(i16); + +#[allow(missing_docs)] +impl QueryPoolId { + pub fn new(v: i16) -> Self { + Self(v) + } + pub fn get(&self) -> i16 { + self.0 + } +} + +/// Unique ID for a `Table` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct TableId(i32); + +#[allow(missing_docs)] +impl TableId { + pub fn new(v: i32) -> Self { + Self(v) + } + pub fn get(&self) -> i32 { + self.0 + } +} + +/// Unique ID for a `Column` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct ColumnId(i32); + +#[allow(missing_docs)] +impl ColumnId { + pub fn new(v: i32) -> Self { + Self(v) + } + pub fn get(&self) -> i32 { + self.0 + } +} + +/// Unique ID for a `Sequencer` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct SequencerId(i16); + +#[allow(missing_docs)] +impl SequencerId { + pub fn new(v: i16) -> Self { + Self(v) + } + pub fn get(&self) -> i16 { + self.0 + } +} + /// Container that can return repos for each of the catalog data types. #[async_trait] pub trait RepoCollection { @@ -81,8 +171,8 @@ pub trait NamespaceRepo { &self, name: &str, retention_duration: &str, - kafka_topic_id: i32, - query_pool_id: i16, + kafka_topic_id: KafkaTopicId, + query_pool_id: QueryPoolId, ) -> Result; /// Gets the namespace by its unique name. @@ -93,10 +183,10 @@ pub trait NamespaceRepo { #[async_trait] pub trait TableRepo { /// Creates the table in the catalog or get the existing record by name. - async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result
; + async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result
; /// Lists all tables in the catalog for the given namespace id. - async fn list_by_namespace_id(&self, namespace_id: i32) -> Result>; + async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result>; } /// Functions for working with columns in the catalog @@ -108,12 +198,12 @@ pub trait ColumnRepo { async fn create_or_get( &self, name: &str, - table_id: i32, + table_id: TableId, column_type: ColumnType, ) -> Result; /// Lists all columns in the passed in namespace id. - async fn list_by_namespace_id(&self, namespace_id: i32) -> Result>; + async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result>; } /// Functions for working with sequencers in the catalog @@ -133,7 +223,7 @@ pub trait SequencerRepo { #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct KafkaTopic { /// The id of the topic - pub id: i32, + pub id: KafkaTopicId, /// The unique name of the topic pub name: String, } @@ -142,7 +232,7 @@ pub struct KafkaTopic { #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct QueryPool { /// The id of the pool - pub id: i16, + pub id: QueryPoolId, /// The unique name of the pool pub name: String, } @@ -151,16 +241,16 @@ pub struct QueryPool { #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct Namespace { /// The id of the namespace - pub id: i32, + pub id: NamespaceId, /// The unique name of the namespace pub name: String, /// The retention duration as a string. 'inf' or not present represents infinite duration (i.e. never drop data). #[sqlx(default)] pub retention_duration: Option, /// The kafka topic that writes to this namespace will land in - pub kafka_topic_id: i32, + pub kafka_topic_id: KafkaTopicId, /// The query pool assigned to answer queries for this namespace - pub query_pool_id: i16, + pub query_pool_id: QueryPoolId, } /// Schema collection for a namespace. This is an in-memory object useful for a schema @@ -168,18 +258,18 @@ pub struct Namespace { #[derive(Debug, Clone, Eq, PartialEq)] pub struct NamespaceSchema { /// the namespace id - pub id: i32, + pub id: NamespaceId, /// the kafka topic this namespace gets data written to - pub kafka_topic_id: i32, + pub kafka_topic_id: KafkaTopicId, /// the query pool assigned to answer queries for this namespace - pub query_pool_id: i16, + pub query_pool_id: QueryPoolId, /// the tables in the namespace by name pub tables: BTreeMap, } impl NamespaceSchema { /// Create a new `NamespaceSchema` - pub fn new(id: i32, kafka_topic_id: i32, query_pool_id: i16) -> Self { + pub fn new(id: NamespaceId, kafka_topic_id: KafkaTopicId, query_pool_id: QueryPoolId) -> Self { Self { id, tables: BTreeMap::new(), @@ -193,8 +283,8 @@ impl NamespaceSchema { /// method takes them in to add them to the schema. pub fn add_tables_and_columns( &mut self, - new_tables: BTreeMap, - new_columns: BTreeMap>, + new_tables: BTreeMap, + new_columns: BTreeMap>, ) { for (table_name, table_id) in new_tables { self.tables @@ -210,7 +300,7 @@ impl NamespaceSchema { } } - fn get_table_mut(&mut self, table_id: i32) -> Option<&mut TableSchema> { + fn get_table_mut(&mut self, table_id: TableId) -> Option<&mut TableSchema> { for table in self.tables.values_mut() { if table.id == table_id { return Some(table); @@ -282,9 +372,9 @@ pub async fn get_schema_by_name( #[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)] pub struct Table { /// The id of the table - pub id: i32, + pub id: TableId, /// The namespace id that the table is in - pub namespace_id: i32, + pub namespace_id: NamespaceId, /// The name of the table, which is unique within the associated namespace pub name: String, } @@ -293,14 +383,14 @@ pub struct Table { #[derive(Debug, Clone, Eq, PartialEq)] pub struct TableSchema { /// the table id - pub id: i32, + pub id: TableId, /// the table's columns by their name pub columns: BTreeMap, } impl TableSchema { /// Initialize new `TableSchema` - pub fn new(id: i32) -> Self { + pub fn new(id: TableId) -> Self { Self { id, columns: BTreeMap::new(), @@ -319,9 +409,9 @@ impl TableSchema { #[derive(Debug, Clone, sqlx::FromRow, Eq, PartialEq)] pub struct Column { /// the column id - pub id: i32, + pub id: ColumnId, /// the table id the column is in - pub table_id: i32, + pub table_id: TableId, /// the name of the column, which is unique in the table pub name: String, /// the logical type of the column @@ -350,7 +440,7 @@ impl Column { #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct ColumnSchema { /// the column id - pub id: i32, + pub id: ColumnId, /// the column type pub column_type: ColumnType, } @@ -443,9 +533,9 @@ pub fn column_type_from_field(field_value: &FieldValue) -> ColumnType { #[derive(Debug, Copy, Clone, PartialEq, sqlx::FromRow)] pub struct Sequencer { /// the id of the sequencer - pub id: i16, + pub id: SequencerId, /// the topic the sequencer is reading from - pub kafka_topic_id: i32, + pub kafka_topic_id: KafkaTopicId, /// the kafka partition the sequencer is reading from pub kafka_partition: i32, /// The minimum unpersisted sequence number. Because different tables @@ -476,7 +566,7 @@ pub(crate) mod test_helpers { async fn test_kafka_topic(repo: &T) { let kafka_repo = repo.kafka_topic(); let k = kafka_repo.create_or_get("foo").await.unwrap(); - assert!(k.id > 0); + assert!(k.id > KafkaTopicId::new(0)); assert_eq!(k.name, "foo"); let k2 = kafka_repo.create_or_get("foo").await.unwrap(); assert_eq!(k, k2); @@ -485,7 +575,7 @@ pub(crate) mod test_helpers { async fn test_query_pool(repo: &T) { let query_repo = repo.query_pool(); let q = query_repo.create_or_get("foo").await.unwrap(); - assert!(q.id > 0); + assert!(q.id > QueryPoolId::new(0)); assert_eq!(q.name, "foo"); let q2 = query_repo.create_or_get("foo").await.unwrap(); assert_eq!(q, q2); @@ -501,7 +591,7 @@ pub(crate) mod test_helpers { .create(namespace_name, "inf", kafka.id, pool.id) .await .unwrap(); - assert!(namespace.id > 0); + assert!(namespace.id > NamespaceId::new(0)); assert_eq!(namespace.name, namespace_name); let conflict = namespace_repo @@ -539,7 +629,7 @@ pub(crate) mod test_helpers { .create_or_get("test_table", namespace.id) .await .unwrap(); - assert!(t.id > 0); + assert!(t.id > TableId::new(0)); assert_eq!(t, tt); let tables = table_repo.list_by_namespace_id(namespace.id).await.unwrap(); @@ -570,7 +660,7 @@ pub(crate) mod test_helpers { .create_or_get("column_test", table.id, ColumnType::Tag) .await .unwrap(); - assert!(c.id > 0); + assert!(c.id > ColumnId::new(0)); assert_eq!(c, cc); // test that attempting to create an already defined column of a different type returns error diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs index 96ad9e8b8d..8698e8f482 100644 --- a/iox_catalog/src/lib.rs +++ b/iox_catalog/src/lib.rs @@ -13,7 +13,7 @@ use crate::interface::{ column_type_from_field, ColumnSchema, ColumnType, Error, KafkaTopic, NamespaceSchema, - QueryPool, RepoCollection, Result, Sequencer, + QueryPool, RepoCollection, Result, Sequencer, SequencerId, TableId, }; use futures::{stream::FuturesOrdered, StreamExt}; use influxdb_line_protocol::ParsedLine; @@ -42,9 +42,9 @@ pub async fn validate_or_insert_schema( repo: &T, ) -> Result> { // table name to table_id - let mut new_tables: BTreeMap = BTreeMap::new(); + let mut new_tables: BTreeMap = BTreeMap::new(); // table_id to map of column name to column - let mut new_columns: BTreeMap> = BTreeMap::new(); + let mut new_columns: BTreeMap> = BTreeMap::new(); for line in &lines { let table_name = line.series.measurement.as_str(); @@ -176,7 +176,7 @@ pub async fn validate_or_insert_schema( pub async fn create_or_get_default_records( kafka_partition_count: i32, repo: &T, -) -> Result<(KafkaTopic, QueryPool, BTreeMap)> { +) -> Result<(KafkaTopic, QueryPool, BTreeMap)> { let kafka_repo = repo.kafka_topic(); let query_repo = repo.query_pool(); let sequencer_repo = repo.sequencer(); diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 1dfaa687da..19c7226e74 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -2,8 +2,9 @@ //! used for testing or for an IOx designed to run without catalog persistence. use crate::interface::{ - Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo, - QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo, + Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, + Namespace, NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, + Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo, }; use async_trait::async_trait; use std::convert::TryFrom; @@ -76,7 +77,7 @@ impl KafkaTopicRepo for MemCatalog { Some(t) => t, None => { let topic = KafkaTopic { - id: collections.kafka_topics.len() as i32 + 1, + id: KafkaTopicId::new(collections.kafka_topics.len() as i32 + 1), name: name.to_string(), }; collections.kafka_topics.push(topic); @@ -97,7 +98,7 @@ impl QueryPoolRepo for MemCatalog { Some(t) => t, None => { let pool = QueryPool { - id: collections.query_pools.len() as i16 + 1, + id: QueryPoolId::new(collections.query_pools.len() as i16 + 1), name: name.to_string(), }; collections.query_pools.push(pool); @@ -115,8 +116,8 @@ impl NamespaceRepo for MemCatalog { &self, name: &str, retention_duration: &str, - kafka_topic_id: i32, - query_pool_id: i16, + kafka_topic_id: KafkaTopicId, + query_pool_id: QueryPoolId, ) -> Result { let mut collections = self.collections.lock().expect("mutex poisoned"); if collections.namespaces.iter().any(|n| n.name == name) { @@ -126,7 +127,7 @@ impl NamespaceRepo for MemCatalog { } let namespace = Namespace { - id: collections.namespaces.len() as i32 + 1, + id: NamespaceId::new(collections.namespaces.len() as i32 + 1), name: name.to_string(), kafka_topic_id, query_pool_id, @@ -148,14 +149,14 @@ impl NamespaceRepo for MemCatalog { #[async_trait] impl TableRepo for MemCatalog { - async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result
{ + async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result
{ let mut collections = self.collections.lock().expect("mutex poisoned"); let table = match collections.tables.iter().find(|t| t.name == name) { Some(t) => t, None => { let table = Table { - id: collections.tables.len() as i32 + 1, + id: TableId::new(collections.tables.len() as i32 + 1), namespace_id, name: name.to_string(), }; @@ -167,7 +168,7 @@ impl TableRepo for MemCatalog { Ok(table.clone()) } - async fn list_by_namespace_id(&self, namespace_id: i32) -> Result> { + async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result> { let collections = self.collections.lock().expect("mutex poisoned"); let tables: Vec<_> = collections .tables @@ -184,7 +185,7 @@ impl ColumnRepo for MemCatalog { async fn create_or_get( &self, name: &str, - table_id: i32, + table_id: TableId, column_type: ColumnType, ) -> Result { let mut collections = self.collections.lock().expect("mutex poisoned"); @@ -207,7 +208,7 @@ impl ColumnRepo for MemCatalog { } None => { let column = Column { - id: collections.columns.len() as i32 + 1, + id: ColumnId::new(collections.columns.len() as i32 + 1), table_id, name: name.to_string(), column_type: column_type as i16, @@ -220,7 +221,7 @@ impl ColumnRepo for MemCatalog { Ok(column.clone()) } - async fn list_by_namespace_id(&self, namespace_id: i32) -> Result> { + async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result> { let mut columns = vec![]; let collections = self.collections.lock().expect("mutex poisoned"); @@ -251,7 +252,7 @@ impl SequencerRepo for MemCatalog { Some(t) => t, None => { let sequencer = Sequencer { - id: collections.sequencers.len() as i16 + 1, + id: SequencerId::new(collections.sequencers.len() as i16 + 1), kafka_topic_id: topic.id, kafka_partition: partition, min_unpersisted_sequence_number: 0, diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 7efdcdc7d8..2d3f3ae1e0 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -1,8 +1,9 @@ //! A Postgres backed implementation of the Catalog use crate::interface::{ - Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicRepo, Namespace, NamespaceRepo, - QueryPool, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerRepo, Table, TableRepo, + Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace, + NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, + Sequencer, SequencerRepo, Table, TableId, TableRepo, }; use async_trait::async_trait; use observability_deps::tracing::info; @@ -130,8 +131,8 @@ impl NamespaceRepo for PostgresCatalog { &self, name: &str, retention_duration: &str, - kafka_topic_id: i32, - query_pool_id: i16, + kafka_topic_id: KafkaTopicId, + query_pool_id: QueryPoolId, ) -> Result { let rec = sqlx::query_as::<_, Namespace>( r#" @@ -184,7 +185,7 @@ SELECT * FROM namespace WHERE name = $1; #[async_trait] impl TableRepo for PostgresCatalog { - async fn create_or_get(&self, name: &str, namespace_id: i32) -> Result
{ + async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result
{ let rec = sqlx::query_as::<_, Table>( r#" INSERT INTO table_name ( name, namespace_id ) @@ -208,7 +209,7 @@ DO UPDATE SET name = table_name.name RETURNING *; Ok(rec) } - async fn list_by_namespace_id(&self, namespace_id: i32) -> Result> { + async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result> { let rec = sqlx::query_as::<_, Table>( r#" SELECT * FROM table_name @@ -229,7 +230,7 @@ impl ColumnRepo for PostgresCatalog { async fn create_or_get( &self, name: &str, - table_id: i32, + table_id: TableId, column_type: ColumnType, ) -> Result { let ct = column_type as i16; @@ -266,7 +267,7 @@ DO UPDATE SET name = column_name.name RETURNING *; Ok(rec) } - async fn list_by_namespace_id(&self, namespace_id: i32) -> Result> { + async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result> { let rec = sqlx::query_as::<_, Column>( r#" SELECT column_name.* FROM table_name From 23290fd2ff14f466aa15b2a843879e6e758925d2 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 14:04:07 -0500 Subject: [PATCH 07/32] fix: new data structures suggested by reviewers --- Cargo.lock | 3 +- ingester/Cargo.toml | 4 +- ingester/src/data.rs | 238 +++++++++++++++++++------------------------ ingester/src/lib.rs | 11 ++ 4 files changed, 122 insertions(+), 134 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be33e2e8c7..ceb7577155 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1857,7 +1857,8 @@ dependencies = [ name = "ingester" version = "0.1.0" dependencies = [ - "mutable_batch", + "arrow", + "parking_lot", ] [[package]] diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index 1412feb6a3..0683821a04 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -5,4 +5,6 @@ authors = ["Nga Tran "] edition = "2021" [dependencies] -mutable_batch = { path = "../mutable_batch" } \ No newline at end of file +arrow = { version = "7.0", features = ["prettyprint"] } +# mutable_batch = { path = "../mutable_batch" } +parking_lot = "0.11.2" \ No newline at end of file diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 4b5f1c1185..9f3e31ba26 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -1,149 +1,123 @@ - - -//! Data for the lifecycle of the ingeter +//! Data for the lifecycle of the Ingeter //! use std::{sync::Arc, collections::BTreeMap}; -use mutable_batch::MutableBatch; +use parking_lot::RwLock; +use arrow::datatypes::DataType; -// ┌──────────────┐ -// │Ingester Data │ -// │ (in memory) │ -// └──────────────┘ -// │ -// ┌──────────────┼───────────────┐ -// ▼ ▼ ▼ -// ┌───────────┐ ┌────────────┐ -// │Sequencer 1│ ... │Sequencer m │ Sequencers -// └───────────┘ └────────────┘ a map of sequencer_id to Namespaces -// │ │ -// ┌──────────────┼─────────────┐ │ -// ▼ ▼ ▼ ▼ -// ┌────────────┐ ┌───────────┐ Namespaces -// │Namespace 1 │ ... │Namespace n│ ... a map of namespace_name to Tables -// └────────────┘ └───────────┘ -// │ │ -// ┌──────────────┼──────────────┐ │ -// ▼ ▼ ▼ ▼ -// ┌────────────┐ ┌────────────┐ Tables -// │ Table 1 │ ... │ Table p │ ... a map of table_name to Partitions -// └────────────┘ └────────────┘ -// │ │ -// │ ┌──────────────┼──────────────┐ -// ▼ ▼ ▼ ▼ -// ┌────────────┐ ┌────────────┐ Partitions -// ... │Partition 1 │ ... │Partition q │ a map of partition_key to PartitionData -// │(2021-12-10)│ │(2021-12-20)│ -// └────────────┘ └──────┬─────┘ -// │ │ -// ┌───────────┬────────▼────┬─────────────┐ │ -// │ │ │ │ ▼ -// ▼ ▼ ▼ ▼ -// ┌──────────┐┌───────────┐ ┌───────────┐ ┌───────────┐ ... -// │ Writing ││ Snaphot │ │Persisting │ │ Persisted │ PartitionData: a struct of 4 items -// │Partition ││ Partition │ │ Partition │ │ Partition │ . A `Writing Partition Batch` -// │ Batch ││ Batch 1 │ │ Batch 1 │ │ Batch 1 │ . A vector of `Snapshot Partition Batches` -// └──────────┘├───────────┤ ├───────────┤ ├───────────┤ . A vector of `Persisting Partition Batches` -// │ ... │ │ ... │ │ ... │ . A vector of `Persisted Partition batches` -// │ │ │ │ │ │ -// ├───────────┤ ├───────────┤ ├───────────┤ 1:1 map between `Snapshot` -// │ Snapshot │ │Persisting │ │ Persisted │ and `Persisting` Partition Batches -// │ Partition │ │ Partition │ │ Partition │ -// │ Batch k │ │ Batch k │ │ Batch i │ -// └───────────┘ └───────────┘ └───────────┘ - -// All sequencers aiisgned to this Ingester -#[derive(Debug, Clone)] -pub struct Sequencers { - // A map between a sequencer id to its corresponding Namespaces. - // A sequencer id is a `kafka_partittion`, a i32 defined in iox_catalog's Sequencer and - // represents a shard of data of a Table of a Namesapce. Namespace is equivalent to - // a customer db (aka an org's bucket). Depending on the comfiguration of sharding a table, - // either full data or set of rows of data of the table are included in a shard. - sequencers : BTreeMap>, +// Ingetser's setup: place to keep its Kafka Topic & Sequencer IDs +struct IngesterProfile { + // kafka_topic: + // sequencer_ids: } -// A Namespace and all of its tables of a sequencer -#[derive(Debug, Clone)] -pub struct Namespace { - // Name of the namespace which is unique and represents a customer db. - name: String, +/// Ingester Data: a Mapp of Shard ID to its Data +struct Sequencers { + // This map gets set up on initialization of the ingester so it won't ever be modified. + // The content of each SequenceData will get changed when more namespaces and tables + // get ingested. + data: BTreeMap>, + } - // Tables of this namesapce - tables : Vec
, +impl Sequencers { + /// One time initilize Sequencers of this Ingester + pub fn initialize() -> Self { + } } + + /// Data of a Shard + struct SequencerData { + // New namespaces can come in at any time so we need to be able to add new ones + namespaces: RwLock>>, + } -// A Table and all of its partittion -#[derive(Debug, Clone)] -pub struct Table { - // table name - name: String, + impl SequencerData { + pub fn new(seq_id: i32) -> Self { - // A map of partittion_key to its corresponding partition - partitions : BTreeMap, -} + } + } + + /// Data of a Namespace that belongs to a given Shard + struct NamespaceData { + tables: RwLock>>, + } + -// A Partittion and all of its in-memory data batches -// -// Stages of a batch of a partition: -// . A partition has only one `Writing Batch`. When is it big or -// old enough, defined by IngesterPersistenceSettings, it will -// be put to `Snaphot Batch` and also copied to `Pesisting Batch`. -// The new and empty Wrtiting Batch will be created for accpeting new writes -// . Snapshot and Persisting batches are 1:1 mapped at all times. Snapshot ones are -// immutable and used for querying. Persisting ones are modified to sort, -// dedupilcate, and apply tombstone and then persited to parquet files. -// While many batches can be persisted at the same time, a batch is only marked -// in the catalog to be persisted after the batches before -// its in the queue are marked persisted. -// . After the batch are marked persisted in the catalog, its will be removed -// from Sanpshot and Persisting and put in Persisted. The Persisted ones -// will get evicted based on IngesterPersistenceSettings. -// ┌───────────────────┐ -// │ Persisting │ -// │ │ -// │ ┌───────────────┐ │ -// ┌────────────┐ │ │ Snapshot │ │ ┌────────────┐ -// │ Writing │───────▶│ └───────────────┘ │───────▶│ Persisted │ -// └────────────┘ │ ┌───────────────┐ │ └────────────┘ -// │ │ Persiting │ │ -// │ └───────────────┘ │ -// └───────────────────┘ -// -#[derive(Debug, Clone)] -pub struct Partition { + /// Data of a Table in a given Namesapce that belongs to a given Shard + struct TableData { + partitions: RwLock>>, + } + + /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard + struct PartitionData { + /// Key of this partition partition_key: String, + /// Data + inner: RwLock, + } + +/// Data of an IOx partition split into batches +// ┌────────────────────────┐ ┌────────────────────────┐ +// │ Snapshots │ │ Persisting │ +// │ │ │ │ +// │ ┌───────────────┐ │ │ ┌───────────────┐ │ +// │ ┌┴──────────────┐│ │ │ │ Persisting │ │ +// │ ┌┴──────────────┐├┴───┼────────────┼──▶│ Data │ │ +// │ │ Snapshot ├┘ │ │ └───────────────┘ │ +// │ └───────────────┘ │ │ │ +// ┌────────────┐ │ │ │ ... │ +// │ Buffer │───────▶│ ... │ │ │ +// └────────────┘ │ │ │ │ +// │ ┌───────────────┐ │ │ ┌───────────────┐ │ +// │ ┌┴──────────────┐│ │ │ │ Persisting │ │ +// │ ┌┴──────────────┐├┴────┼────────────┼───▶│ Data │ │ +// │ │ Snapshot ├┘ │ │ └───────────────┘ │ +// │ └───────────────┘ │ │ │ +// │ │ │ │ +// └────────────────────────┘ └────────────────────────┘ + struct DataBuffer { - // Writing batch that accepts writes to this partition - writing_batch: PartitionBatch, + /// Buffer of ingesting data + buffer: Vec, - // Queue of batches that are immutable and used for querying only. - // The batches are queue contiguously in thier data arrival time - snapshot_batches: Vec, // todo: is Vec good enough for hanlding queue? + /// Data in `buffer` will be moved to a `snapshot` when one of these happens: + /// . A background persist is called + /// . A read request from Querier + /// The `buffer` will be empty when this happens. + snapshots: Vec>, - // Queue of persisting batches which is a one on one mapping with the snapshot_batches. - // Data of these batches will be modified to sort, dedupilcate, and apply tombstone and then - // persited to parquet files. While many batches can be persisted at the same time, - // a batch is only marked in the catalog to be persisted after the batches before - // its in the queue are marked persisted - pesisting_batched: Vec, + /// When a persist is called, data in `buffer` will be moved to a `snapshot` + /// and then all `snapshots` will be moved to a `persisting`. + /// Both `buffer` and 'snaphots` will be empty when this happens. + persisting: Vec, - // Persisted batches that are not yet evicted from the in-memory. - // These are batches moved from persiting_batches after they are fully persisted and marked - // so in the catalog - pesisted_batched: Vec, - -} - -// A PartitionBatch of contiguous in arrival time of writes -// todo & question: do we want to call this Chunk instead? -#[derive(Debug, Clone)] -pub struct PartitionBatch { - // To keep the PartitionBtach in order of their - // arrived data, we may need this auto created batch id - batch_id: i32, - - // Data of this partition batch - data: Arc, -} + // Extra Notes: + // . Multiple perssiting operations may be happenning concurrently but + // their persisted info must be added into the Catalog in thier data + // ingesting order. + // . When a read request comes from a Querier, all data from `snaphots` + // and `persisting` must be sent to the Querier. + // . After the `persiting` data is persisted and successfully added + // into the Catalog, it will be removed from this Data Buffer. + // This data might be added into an extra cache to serve up to + // Queriers that may not have loaded the parquet files from object + // storage yet. But this will be decided after MVP. + } + + struct PersistingData { + batches: Vec> + } + + struct DataBatch { + // a map of the unique column name to its data. Every column + // must have the same number of values. + column_data: BTreeMap>, + } + + struct ColumnData { + // it might be better to have the raw values and null markers, + // but this will probably be easier and faster to get going. + values: Option + } + \ No newline at end of file diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs index 99c92b1c6f..261a622cf8 100644 --- a/ingester/src/lib.rs +++ b/ingester/src/lib.rs @@ -2,4 +2,15 @@ //! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit# //! +#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] +#![warn( + missing_copy_implementations, + missing_debug_implementations, + missing_docs, + clippy::explicit_iter_loop, + clippy::future_not_send, + clippy::use_self, + clippy::clone_on_ref_ptr +)] + pub mod data; From e395ef7066b20922f55a44e90cf802a24228623b Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Tue, 18 Jan 2022 14:41:02 -0500 Subject: [PATCH 08/32] feat: add Partition scaffolding to iox_catalog --- iox_catalog/src/interface.rs | 92 ++++++++++++++++++++++++++++++++++++ iox_catalog/src/mem.rs | 50 +++++++++++++++++++- iox_catalog/src/postgres.rs | 53 ++++++++++++++++++++- 3 files changed, 191 insertions(+), 4 deletions(-) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index f693bd238e..ca5f1a0930 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -131,6 +131,21 @@ impl SequencerId { } } +/// Unique ID for a `Sequencer` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct PartitionId(i64); + +#[allow(missing_docs)] +impl PartitionId { + pub fn new(v: i64) -> Self { + Self(v) + } + pub fn get(&self) -> i64 { + self.0 + } +} + /// Container that can return repos for each of the catalog data types. #[async_trait] pub trait RepoCollection { @@ -146,6 +161,8 @@ pub trait RepoCollection { fn column(&self) -> Arc; /// repo for sequencers fn sequencer(&self) -> Arc; + /// repo for partitions + fn partition(&self) -> Arc; } /// Functions for working with Kafka topics in the catalog. @@ -219,6 +236,22 @@ pub trait SequencerRepo { async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result>; } +/// Functions for working with IOx partitions in the catalog. Note that these are how +/// IOx splits up data within a database, which is differenet than Kafka partitions. +#[async_trait] +pub trait PartitionRepo { + /// create or get a partition record for the given partition key, sequencer and table + async fn create_or_get( + &self, + key: &str, + sequencer_id: SequencerId, + table_id: TableId, + ) -> Result; + + /// return partitions for a given sequencer + async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result>; +} + /// Data object for a kafka topic #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct KafkaTopic { @@ -545,6 +578,19 @@ pub struct Sequencer { pub min_unpersisted_sequence_number: i64, } +/// Data object for a partition. The combination of sequencer, table and key are unique (i.e. only one record can exist for each combo) +#[derive(Debug, Clone, PartialEq, sqlx::FromRow)] +pub struct Partition { + /// the id of the partition + pub id: PartitionId, + /// the sequencer the data in the partition arrived from + pub sequencer_id: SequencerId, + /// the table the partition is under + pub table_id: TableId, + /// the string key of the partition + pub partition_key: String, +} + #[cfg(test)] pub(crate) mod test_helpers { use super::*; @@ -561,6 +607,7 @@ pub(crate) mod test_helpers { test_table(&new_repo()).await; test_column(&new_repo()).await; test_sequencer(&new_repo()).await; + test_partition(&new_repo()).await; } async fn test_kafka_topic(repo: &T) { @@ -726,4 +773,49 @@ pub(crate) mod test_helpers { assert_eq!(created, listed); } + + async fn test_partition(repo: &T) { + let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap(); + let pool = repo.query_pool().create_or_get("foo").await.unwrap(); + let namespace = repo + .namespace() + .create("namespace_partition_test", "inf", kafka.id, pool.id) + .await + .unwrap(); + let table = repo + .table() + .create_or_get("test_table", namespace.id) + .await + .unwrap(); + let sequencer = repo.sequencer().create_or_get(&kafka, 1).await.unwrap(); + let other_sequencer = repo.sequencer().create_or_get(&kafka, 2).await.unwrap(); + + let partition_repo = repo.partition(); + + let created = ["foo", "bar"] + .iter() + .map(|key| partition_repo.create_or_get(key, sequencer.id, table.id)) + .collect::>() + .map(|v| { + let v = v.expect("failed to create partition"); + (v.id, v) + }) + .collect::>() + .await; + let _ = partition_repo + .create_or_get("asdf", other_sequencer.id, table.id) + .await + .unwrap(); + + // List them and assert they match + let listed = partition_repo + .list_by_sequencer(sequencer.id) + .await + .expect("failed to list partitions") + .into_iter() + .map(|v| (v.id, v)) + .collect::>(); + + assert_eq!(created, listed); + } } diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 19c7226e74..1af293bbe5 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -3,8 +3,9 @@ use crate::interface::{ Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, - Namespace, NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, - Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo, + Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo, QueryPool, + QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, + Table, TableId, TableRepo, }; use async_trait::async_trait; use std::convert::TryFrom; @@ -40,6 +41,7 @@ struct MemCollections { tables: Vec
, columns: Vec, sequencers: Vec, + partitions: Vec, } impl RepoCollection for Arc { @@ -66,6 +68,10 @@ impl RepoCollection for Arc { fn sequencer(&self) -> Arc { Self::clone(self) as Arc } + + fn partition(&self) -> Arc { + Self::clone(self) as Arc + } } #[async_trait] @@ -282,6 +288,46 @@ impl SequencerRepo for MemCatalog { } } +#[async_trait] +impl PartitionRepo for MemCatalog { + async fn create_or_get( + &self, + key: &str, + sequencer_id: SequencerId, + table_id: TableId, + ) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + let partition = match collections.partitions.iter().find(|p| { + p.partition_key == key && p.sequencer_id == sequencer_id && p.table_id == table_id + }) { + Some(p) => p, + None => { + let p = Partition { + id: PartitionId::new(collections.partitions.len() as i64 + 1), + sequencer_id, + table_id, + partition_key: key.to_string(), + }; + collections.partitions.push(p); + collections.partitions.last().unwrap() + } + }; + + Ok(partition.clone()) + } + + async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + let partitions: Vec<_> = collections + .partitions + .iter() + .filter(|p| p.sequencer_id == sequencer_id) + .cloned() + .collect(); + Ok(partitions) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 2d3f3ae1e0..14dafc6a4e 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -2,8 +2,8 @@ use crate::interface::{ Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace, - NamespaceId, NamespaceRepo, QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, - Sequencer, SequencerRepo, Table, TableId, TableRepo, + NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo, + RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo, }; use async_trait::async_trait; use observability_deps::tracing::info; @@ -83,6 +83,10 @@ impl RepoCollection for Arc { fn sequencer(&self) -> Arc { Self::clone(self) as Arc } + + fn partition(&self) -> Arc { + Self::clone(self) as Arc + } } #[async_trait] @@ -326,6 +330,47 @@ impl SequencerRepo for PostgresCatalog { } } +#[async_trait] +impl PartitionRepo for PostgresCatalog { + async fn create_or_get( + &self, + key: &str, + sequencer_id: SequencerId, + table_id: TableId, + ) -> Result { + sqlx::query_as::<_, Partition>( + r#" + INSERT INTO partition + ( partition_key, sequencer_id, table_id ) + VALUES + ( $1, $2, $3 ) + ON CONFLICT ON CONSTRAINT partition_key_unique + DO UPDATE SET partition_key = partition.partition_key RETURNING *; + "#, + ) + .bind(key) // $1 + .bind(&sequencer_id) // $2 + .bind(&table_id) // $3 + .fetch_one(&self.pool) + .await + .map_err(|e| { + if is_fk_violation(&e) { + Error::ForeignKeyViolation { source: e } + } else { + Error::SqlxError { source: e } + } + }) + } + + async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result> { + sqlx::query_as::<_, Partition>(r#"SELECT * FROM partition WHERE sequencer_id = $1;"#) + .bind(&sequencer_id) // $1 + .fetch_all(&self.pool) + .await + .map_err(|e| Error::SqlxError { source: e }) + } +} + /// The error code returned by Postgres for a unique constraint violation. /// /// See @@ -430,6 +475,10 @@ mod tests { .execute(pool) .await .unwrap(); + sqlx::query("delete from partition;") + .execute(pool) + .await + .unwrap(); sqlx::query("delete from table_name;") .execute(pool) .await From b1510675ae8417294e5c9f64ed804ce58a5ea665 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Tue, 18 Jan 2022 14:49:31 -0500 Subject: [PATCH 09/32] refactor: add new type for Kafka Partition in Catalog --- iox_catalog/src/interface.rs | 37 +++++++++++++++++++++++++++++++----- iox_catalog/src/lib.rs | 6 +++--- iox_catalog/src/mem.rs | 14 +++++++++----- iox_catalog/src/postgres.rs | 13 +++++++++---- 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index ca5f1a0930..2b31c4c964 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -131,6 +131,21 @@ impl SequencerId { } } +/// The kafka partition identifier. This is in the actual Kafka cluster. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct KafkaPartition(i32); + +#[allow(missing_docs)] +impl KafkaPartition { + pub fn new(v: i32) -> Self { + Self(v) + } + pub fn get(&self) -> i32 { + self.0 + } +} + /// Unique ID for a `Sequencer` #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] #[sqlx(transparent)] @@ -227,7 +242,11 @@ pub trait ColumnRepo { #[async_trait] pub trait SequencerRepo { /// create a sequencer record for the kafka topic and partition or return the existing record - async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result; + async fn create_or_get( + &self, + topic: &KafkaTopic, + partition: KafkaPartition, + ) -> Result; /// list all sequencers async fn list(&self) -> Result>; @@ -570,7 +589,7 @@ pub struct Sequencer { /// the topic the sequencer is reading from pub kafka_topic_id: KafkaTopicId, /// the kafka partition the sequencer is reading from - pub kafka_partition: i32, + pub kafka_partition: KafkaPartition, /// The minimum unpersisted sequence number. Because different tables /// can be persisted at different times, it is possible some data has been persisted /// with a higher sequence number than this. However, all data with a sequence number @@ -753,7 +772,7 @@ pub(crate) mod test_helpers { // Create 10 sequencers let created = (1..=10) - .map(|partition| sequencer_repo.create_or_get(&kafka, partition)) + .map(|partition| sequencer_repo.create_or_get(&kafka, KafkaPartition::new(partition))) .collect::>() .map(|v| { let v = v.expect("failed to create sequencer"); @@ -787,8 +806,16 @@ pub(crate) mod test_helpers { .create_or_get("test_table", namespace.id) .await .unwrap(); - let sequencer = repo.sequencer().create_or_get(&kafka, 1).await.unwrap(); - let other_sequencer = repo.sequencer().create_or_get(&kafka, 2).await.unwrap(); + let sequencer = repo + .sequencer() + .create_or_get(&kafka, KafkaPartition::new(1)) + .await + .unwrap(); + let other_sequencer = repo + .sequencer() + .create_or_get(&kafka, KafkaPartition::new(2)) + .await + .unwrap(); let partition_repo = repo.partition(); diff --git a/iox_catalog/src/lib.rs b/iox_catalog/src/lib.rs index 8698e8f482..a23de38af4 100644 --- a/iox_catalog/src/lib.rs +++ b/iox_catalog/src/lib.rs @@ -12,8 +12,8 @@ )] use crate::interface::{ - column_type_from_field, ColumnSchema, ColumnType, Error, KafkaTopic, NamespaceSchema, - QueryPool, RepoCollection, Result, Sequencer, SequencerId, TableId, + column_type_from_field, ColumnSchema, ColumnType, Error, KafkaPartition, KafkaTopic, + NamespaceSchema, QueryPool, RepoCollection, Result, Sequencer, SequencerId, TableId, }; use futures::{stream::FuturesOrdered, StreamExt}; use influxdb_line_protocol::ParsedLine; @@ -185,7 +185,7 @@ pub async fn create_or_get_default_records( let query_pool = query_repo.create_or_get(SHARED_QUERY_POOL).await?; let sequencers = (1..=kafka_partition_count) - .map(|partition| sequencer_repo.create_or_get(&kafka_topic, partition)) + .map(|partition| sequencer_repo.create_or_get(&kafka_topic, KafkaPartition::new(partition))) .collect::>() .map(|v| { let v = v.expect("failed to create sequencer"); diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 1af293bbe5..88b00f3e1b 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -2,10 +2,10 @@ //! used for testing or for an IOx designed to run without catalog persistence. use crate::interface::{ - Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, - Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo, QueryPool, - QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, - Table, TableId, TableRepo, + Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId, + KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo, + QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, + SequencerRepo, Table, TableId, TableRepo, }; use async_trait::async_trait; use std::convert::TryFrom; @@ -247,7 +247,11 @@ impl ColumnRepo for MemCatalog { #[async_trait] impl SequencerRepo for MemCatalog { - async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result { + async fn create_or_get( + &self, + topic: &KafkaTopic, + partition: KafkaPartition, + ) -> Result { let mut collections = self.collections.lock().expect("mutex poisoned"); let sequencer = match collections diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 14dafc6a4e..230ae3617a 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -1,9 +1,10 @@ //! A Postgres backed implementation of the Catalog use crate::interface::{ - Column, ColumnRepo, ColumnType, Error, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace, - NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo, - RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo, + Column, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId, + KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool, + QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, + Table, TableId, TableRepo, }; use async_trait::async_trait; use observability_deps::tracing::info; @@ -290,7 +291,11 @@ WHERE table_name.namespace_id = $1; #[async_trait] impl SequencerRepo for PostgresCatalog { - async fn create_or_get(&self, topic: &KafkaTopic, partition: i32) -> Result { + async fn create_or_get( + &self, + topic: &KafkaTopic, + partition: KafkaPartition, + ) -> Result { sqlx::query_as::<_, Sequencer>( r#" INSERT INTO sequencer From 125285ae9ad347455d7e2a1b5b6d4d494d1ac06e Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 16:11:25 -0500 Subject: [PATCH 10/32] feat: commit in order to pull and merge new commit from main --- Cargo.lock | 2 ++ ingester/Cargo.toml | 3 ++- ingester/src/data.rs | 45 ++++++++++++++++++++++++++++++++++-------- ingester/src/lib.rs | 1 + ingester/src/server.rs | 22 +++++++++++++++++++++ 5 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 ingester/src/server.rs diff --git a/Cargo.lock b/Cargo.lock index ceb7577155..b61d595d2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1858,7 +1858,9 @@ name = "ingester" version = "0.1.0" dependencies = [ "arrow", + "iox_catalog", "parking_lot", + "snafu", ] [[package]] diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index 0683821a04..d3633a82e1 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -6,5 +6,6 @@ edition = "2021" [dependencies] arrow = { version = "7.0", features = ["prettyprint"] } -# mutable_batch = { path = "../mutable_batch" } +snafu = "0.7" +iox_catalog = { path = "../iox_catalog" } parking_lot = "0.11.2" \ No newline at end of file diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 9f3e31ba26..97bdec2ccf 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -3,33 +3,62 @@ use std::{sync::Arc, collections::BTreeMap}; +use crate::server::IngesterServer; +use snafu::{OptionExt, Snafu}; +use iox_catalog::interface::{KafkaTopicId, SequencerId, RepoCollection, KafkaTopic, NamespaceId}; use parking_lot::RwLock; use arrow::datatypes::DataType; -// Ingetser's setup: place to keep its Kafka Topic & Sequencer IDs -struct IngesterProfile { - // kafka_topic: - // sequencer_ids: +#[derive(Debug, Snafu)] +//#[allow(missing_copy_implementations, missing_docs)] +pub enum Error { + #[snafu(display("Topic {} not found", name))] + TopicNotFound { name: String }, } +/// A specialized `Error` for Ingester Data errors +pub type Result = std::result::Result; + + /// Ingester Data: a Mapp of Shard ID to its Data struct Sequencers { // This map gets set up on initialization of the ingester so it won't ever be modified. // The content of each SequenceData will get changed when more namespaces and tables // get ingested. - data: BTreeMap>, + data: BTreeMap>, } impl Sequencers { - /// One time initilize Sequencers of this Ingester - pub fn initialize() -> Self { + /// One time initialize Sequencers of this Ingester + pub async fn initialize(ingester: &IngesterServer) -> Result { + // Get kafka topic + let kafka_topic_repro = ingester.iox_catalog.kafka_topic(); + let topic = kafka_topic_repro.create_or_get(ingester.kafka_topic_name).await?; + + // Get all namespaces of this topic + let namespace_repo = ingester.iox_catalog.namespace(); + let x = namespace_repro. + + + // Get Sequencers + let sequencer_repro = ingester.iox_catalog.sequencer(); + let sequencers = BTreeMap::default(); + for shard in ingester.kafka_partitions { + let sequencer = sequencer_repro.create_or_get(&topic, shard).await?; + + sequencers.insert(sequencer.id, ) + } + + Ok(Self { + data: BTreeMap::default(), + }) } } /// Data of a Shard struct SequencerData { // New namespaces can come in at any time so we need to be able to add new ones - namespaces: RwLock>>, + namespaces: RwLock>>, } impl SequencerData { diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs index 261a622cf8..1dc60b5a46 100644 --- a/ingester/src/lib.rs +++ b/ingester/src/lib.rs @@ -14,3 +14,4 @@ )] pub mod data; +pub mod server; diff --git a/ingester/src/server.rs b/ingester/src/server.rs new file mode 100644 index 0000000000..8599b711b2 --- /dev/null +++ b/ingester/src/server.rs @@ -0,0 +1,22 @@ +use std::sync::Arc; + +use iox_catalog::mem::MemCatalog; + +/// The [`IngesterServer`] manages the lifecycle and contains all state for +/// an `ingester` server instance. +#[derive(Debug)] +struct IngesterServer<'a> { + pub kafka_topic_name: String, + pub kafka_partitions: Vec, // todo: use KafkaPartitionId when available + pub iox_catalog: &'a Arc +} + +impl<'a> IngesterServer<'a>{ + pub fn new(topic_name: String, shard_ids: Vec, catalog: &'a Arc) -> Self { + Self { + kafka_topic_name: topic_name, + kafka_partitions: shard_ids, + iox_catalog: catalog, + } + } +} \ No newline at end of file From 8067316c334cfa59e82967c7e8daff5882e38f20 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Tue, 18 Jan 2022 17:33:05 -0500 Subject: [PATCH 11/32] fix: typo in partitionid description --- iox_catalog/src/interface.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 2b31c4c964..3012cb0339 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -146,7 +146,7 @@ impl KafkaPartition { } } -/// Unique ID for a `Sequencer` +/// Unique ID for a `Partition` #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] #[sqlx(transparent)] pub struct PartitionId(i64); From b20d1757d0c6219f403910e020a15352479161b6 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 17:43:03 -0500 Subject: [PATCH 12/32] feat: initialize ingester data --- ingester/src/data.rs | 142 +++++++++++++++++++++-------------------- ingester/src/lib.rs | 20 +++--- ingester/src/server.rs | 25 +++++--- 3 files changed, 101 insertions(+), 86 deletions(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 97bdec2ccf..0dc4bbb609 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -1,91 +1,100 @@ //! Data for the lifecycle of the Ingeter -//! +//! -use std::{sync::Arc, collections::BTreeMap}; +use std::{collections::BTreeMap, sync::Arc}; use crate::server::IngesterServer; -use snafu::{OptionExt, Snafu}; -use iox_catalog::interface::{KafkaTopicId, SequencerId, RepoCollection, KafkaTopic, NamespaceId}; -use parking_lot::RwLock; use arrow::datatypes::DataType; +use iox_catalog::interface::{NamespaceId, SequencerId, RepoCollection, KafkaPartition}; +use parking_lot::RwLock; +use snafu::{Snafu, ResultExt}; #[derive(Debug, Snafu)] -//#[allow(missing_copy_implementations, missing_docs)] +#[allow(missing_copy_implementations, missing_docs)] pub enum Error { #[snafu(display("Topic {} not found", name))] - TopicNotFound { name: String }, + TopicNotFound { + source: iox_catalog::interface::Error, + name: String}, + + #[snafu(display("Sequencer id {} not found", id.get()))] + SequencerNotFound { + source: iox_catalog::interface::Error, + id: KafkaPartition}, + } /// A specialized `Error` for Ingester Data errors pub type Result = std::result::Result; - /// Ingester Data: a Mapp of Shard ID to its Data struct Sequencers { // This map gets set up on initialization of the ingester so it won't ever be modified. - // The content of each SequenceData will get changed when more namespaces and tables + // The content of each SequenceData will get changed when more namespaces and tables // get ingested. data: BTreeMap>, - } +} impl Sequencers { /// One time initialize Sequencers of this Ingester - pub async fn initialize(ingester: &IngesterServer) -> Result { - // Get kafka topic + pub async fn initialize(ingester: &IngesterServer<'_>) -> Result { + // Get kafka topic from the catalog + let topic_name = ingester.get_topic(); let kafka_topic_repro = ingester.iox_catalog.kafka_topic(); - let topic = kafka_topic_repro.create_or_get(ingester.kafka_topic_name).await?; + let topic = kafka_topic_repro + .create_or_get(topic_name.as_str()) + .await + .context(TopicNotFoundSnafu{name: topic_name})?; - // Get all namespaces of this topic - let namespace_repo = ingester.iox_catalog.namespace(); - let x = namespace_repro. - - - // Get Sequencers + // Get sequencer ids from the catalog let sequencer_repro = ingester.iox_catalog.sequencer(); - let sequencers = BTreeMap::default(); - for shard in ingester.kafka_partitions { - let sequencer = sequencer_repro.create_or_get(&topic, shard).await?; - - sequencers.insert(sequencer.id, ) + let mut sequencers = BTreeMap::default(); + for shard in ingester.get_kafka_partitions() { + let sequencer = sequencer_repro + .create_or_get(&topic, shard) + .await + .context(SequencerNotFoundSnafu{id: shard})?; + // Create empty buffer for each sequencer + sequencers.insert(sequencer.id, Arc::new(SequencerData::new())); } - Ok(Self { - data: BTreeMap::default(), - }) + Ok(Self { data: sequencers }) } } - - /// Data of a Shard - struct SequencerData { + +/// Data of a Shard +struct SequencerData { // New namespaces can come in at any time so we need to be able to add new ones namespaces: RwLock>>, - } +} - impl SequencerData { - pub fn new(seq_id: i32) -> Self { +impl SequencerData { + /// Create an empty SequenceData + pub fn new() -> Self { + Self { + namespaces: RwLock::new(BTreeMap::default()), + } + } +} - } - } - - /// Data of a Namespace that belongs to a given Shard - struct NamespaceData { +/// Data of a Namespace that belongs to a given Shard +struct NamespaceData { tables: RwLock>>, - } - +} - /// Data of a Table in a given Namesapce that belongs to a given Shard - struct TableData { +/// Data of a Table in a given Namesapce that belongs to a given Shard +struct TableData { partitions: RwLock>>, - } +} - /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard - struct PartitionData { +/// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard +struct PartitionData { /// Key of this partition partition_key: String, /// Data inner: RwLock, - } - +} + /// Data of an IOx partition split into batches // ┌────────────────────────┐ ┌────────────────────────┐ // │ Snapshots │ │ Persisting │ @@ -105,8 +114,7 @@ impl Sequencers { // │ └───────────────┘ │ │ │ // │ │ │ │ // └────────────────────────┘ └────────────────────────┘ - struct DataBuffer { - +struct DataBuffer { /// Buffer of ingesting data buffer: Vec, @@ -120,33 +128,31 @@ impl Sequencers { /// and then all `snapshots` will be moved to a `persisting`. /// Both `buffer` and 'snaphots` will be empty when this happens. persisting: Vec, - // Extra Notes: // . Multiple perssiting operations may be happenning concurrently but // their persisted info must be added into the Catalog in thier data // ingesting order. - // . When a read request comes from a Querier, all data from `snaphots` + // . When a read request comes from a Querier, all data from `snaphots` // and `persisting` must be sent to the Querier. - // . After the `persiting` data is persisted and successfully added + // . After the `persiting` data is persisted and successfully added // into the Catalog, it will be removed from this Data Buffer. - // This data might be added into an extra cache to serve up to - // Queriers that may not have loaded the parquet files from object + // This data might be added into an extra cache to serve up to + // Queriers that may not have loaded the parquet files from object // storage yet. But this will be decided after MVP. - } - - struct PersistingData { - batches: Vec> - } - - struct DataBatch { +} + +struct PersistingData { + batches: Vec>, +} + +struct DataBatch { // a map of the unique column name to its data. Every column // must have the same number of values. column_data: BTreeMap>, - } - - struct ColumnData { - // it might be better to have the raw values and null markers, +} + +struct ColumnData { + // it might be better to have the raw values and null markers, // but this will probably be easier and faster to get going. - values: Option - } - \ No newline at end of file + values: Option, +} diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs index 1dc60b5a46..05266c1d77 100644 --- a/ingester/src/lib.rs +++ b/ingester/src/lib.rs @@ -2,16 +2,16 @@ //! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit# //! -#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] -#![warn( - missing_copy_implementations, - missing_debug_implementations, - missing_docs, - clippy::explicit_iter_loop, - clippy::future_not_send, - clippy::use_self, - clippy::clone_on_ref_ptr -)] +#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] +#![warn( + missing_copy_implementations, + missing_debug_implementations, + missing_docs, + clippy::explicit_iter_loop, + clippy::future_not_send, + clippy::use_self, + clippy::clone_on_ref_ptr +)] pub mod data; pub mod server; diff --git a/ingester/src/server.rs b/ingester/src/server.rs index 8599b711b2..1e4cb21a49 100644 --- a/ingester/src/server.rs +++ b/ingester/src/server.rs @@ -1,22 +1,31 @@ use std::sync::Arc; -use iox_catalog::mem::MemCatalog; +use iox_catalog::{mem::MemCatalog, interface::KafkaPartition}; -/// The [`IngesterServer`] manages the lifecycle and contains all state for +/// The [`IngesterServer`] manages the lifecycle and contains all state for /// an `ingester` server instance. #[derive(Debug)] -struct IngesterServer<'a> { +pub struct IngesterServer<'a> { pub kafka_topic_name: String, - pub kafka_partitions: Vec, // todo: use KafkaPartitionId when available - pub iox_catalog: &'a Arc + pub kafka_partitions: Vec, // todo: use KafkaPartitionId when available + pub iox_catalog: &'a Arc, } -impl<'a> IngesterServer<'a>{ - pub fn new(topic_name: String, shard_ids: Vec, catalog: &'a Arc) -> Self { +impl<'a> IngesterServer<'a> { + pub fn new(topic_name: String, shard_ids: Vec, catalog: &'a Arc) -> Self { Self { kafka_topic_name: topic_name, kafka_partitions: shard_ids, iox_catalog: catalog, } } -} \ No newline at end of file + + pub fn get_topic(&self) -> String { + self.kafka_topic_name.clone() + } + + + pub fn get_kafka_partitions(&self) -> Vec { + self.kafka_partitions.clone() + } +} From 667ec5bfc5a663362ee2d578132450e686060280 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 18:01:06 -0500 Subject: [PATCH 13/32] fix: the code is now compile without warnings --- ingester/src/lib.rs | 4 ++++ ingester/src/server.rs | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs index 05266c1d77..9fd1048da9 100644 --- a/ingester/src/lib.rs +++ b/ingester/src/lib.rs @@ -13,5 +13,9 @@ clippy::clone_on_ref_ptr )] +#[allow( + dead_code +)] + pub mod data; pub mod server; diff --git a/ingester/src/server.rs b/ingester/src/server.rs index 1e4cb21a49..86897b86a9 100644 --- a/ingester/src/server.rs +++ b/ingester/src/server.rs @@ -1,3 +1,6 @@ +//! Ingester Server +//! + use std::sync::Arc; use iox_catalog::{mem::MemCatalog, interface::KafkaPartition}; @@ -6,12 +9,16 @@ use iox_catalog::{mem::MemCatalog, interface::KafkaPartition}; /// an `ingester` server instance. #[derive(Debug)] pub struct IngesterServer<'a> { - pub kafka_topic_name: String, - pub kafka_partitions: Vec, // todo: use KafkaPartitionId when available + // Kafka Topic assigned to this ingester + kafka_topic_name: String, + // Kafka Partitions (Shards) assigned to this INgester + kafka_partitions: Vec, + /// Catalog of this ingester pub iox_catalog: &'a Arc, } impl<'a> IngesterServer<'a> { + /// Initialize the Ingester pub fn new(topic_name: String, shard_ids: Vec, catalog: &'a Arc) -> Self { Self { kafka_topic_name: topic_name, @@ -20,11 +27,12 @@ impl<'a> IngesterServer<'a> { } } + /// Return a kafka topic name pub fn get_topic(&self) -> String { self.kafka_topic_name.clone() } - + /// Return Kafka Partitions pub fn get_kafka_partitions(&self) -> Vec { self.kafka_partitions.clone() } From 1c970a2064eaf5faf0080fc8e5a899aa50566b3f Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 18:01:47 -0500 Subject: [PATCH 14/32] fix: format --- ingester/src/data.rs | 19 ++++++++++--------- ingester/src/lib.rs | 5 +---- ingester/src/server.rs | 10 +++++++--- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 0dc4bbb609..746156503d 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -5,23 +5,24 @@ use std::{collections::BTreeMap, sync::Arc}; use crate::server::IngesterServer; use arrow::datatypes::DataType; -use iox_catalog::interface::{NamespaceId, SequencerId, RepoCollection, KafkaPartition}; +use iox_catalog::interface::{KafkaPartition, NamespaceId, RepoCollection, SequencerId}; use parking_lot::RwLock; -use snafu::{Snafu, ResultExt}; +use snafu::{ResultExt, Snafu}; #[derive(Debug, Snafu)] #[allow(missing_copy_implementations, missing_docs)] pub enum Error { #[snafu(display("Topic {} not found", name))] - TopicNotFound { + TopicNotFound { source: iox_catalog::interface::Error, - name: String}, + name: String, + }, #[snafu(display("Sequencer id {} not found", id.get()))] - SequencerNotFound { + SequencerNotFound { source: iox_catalog::interface::Error, - id: KafkaPartition}, - + id: KafkaPartition, + }, } /// A specialized `Error` for Ingester Data errors @@ -44,7 +45,7 @@ impl Sequencers { let topic = kafka_topic_repro .create_or_get(topic_name.as_str()) .await - .context(TopicNotFoundSnafu{name: topic_name})?; + .context(TopicNotFoundSnafu { name: topic_name })?; // Get sequencer ids from the catalog let sequencer_repro = ingester.iox_catalog.sequencer(); @@ -53,7 +54,7 @@ impl Sequencers { let sequencer = sequencer_repro .create_or_get(&topic, shard) .await - .context(SequencerNotFoundSnafu{id: shard})?; + .context(SequencerNotFoundSnafu { id: shard })?; // Create empty buffer for each sequencer sequencers.insert(sequencer.id, Arc::new(SequencerData::new())); } diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs index 9fd1048da9..309f26fe61 100644 --- a/ingester/src/lib.rs +++ b/ingester/src/lib.rs @@ -13,9 +13,6 @@ clippy::clone_on_ref_ptr )] -#[allow( - dead_code -)] - +#[allow(dead_code)] pub mod data; pub mod server; diff --git a/ingester/src/server.rs b/ingester/src/server.rs index 86897b86a9..18561e3be0 100644 --- a/ingester/src/server.rs +++ b/ingester/src/server.rs @@ -3,7 +3,7 @@ use std::sync::Arc; -use iox_catalog::{mem::MemCatalog, interface::KafkaPartition}; +use iox_catalog::{interface::KafkaPartition, mem::MemCatalog}; /// The [`IngesterServer`] manages the lifecycle and contains all state for /// an `ingester` server instance. @@ -13,13 +13,17 @@ pub struct IngesterServer<'a> { kafka_topic_name: String, // Kafka Partitions (Shards) assigned to this INgester kafka_partitions: Vec, - /// Catalog of this ingester + /// Catalog of this ingester pub iox_catalog: &'a Arc, } impl<'a> IngesterServer<'a> { /// Initialize the Ingester - pub fn new(topic_name: String, shard_ids: Vec, catalog: &'a Arc) -> Self { + pub fn new( + topic_name: String, + shard_ids: Vec, + catalog: &'a Arc, + ) -> Self { Self { kafka_topic_name: topic_name, kafka_partitions: shard_ids, From 367a9fb812ec84f3985237b32c7ed1663a630b22 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 18:10:42 -0500 Subject: [PATCH 15/32] fix: add workspace-hack --- Cargo.lock | 1 + ingester/Cargo.toml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index b61d595d2c..ae11c9380a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1861,6 +1861,7 @@ dependencies = [ "iox_catalog", "parking_lot", "snafu", + "workspace-hack", ] [[package]] diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index d3633a82e1..85b0497cdc 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -8,4 +8,5 @@ edition = "2021" arrow = { version = "7.0", features = ["prettyprint"] } snafu = "0.7" iox_catalog = { path = "../iox_catalog" } -parking_lot = "0.11.2" \ No newline at end of file +parking_lot = "0.11.2" +workspace-hack = { path = "../workspace-hack"} From f36d66deb730164d1e30247a89d0ae0b4fd497b0 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Tue, 18 Jan 2022 18:17:21 -0500 Subject: [PATCH 16/32] feat: Add Tombstone to Catalog * Adds TombstoneId and Tombstone to the iox_catalog with associated interfaces * Adds SequenceNumber new type for use with Tombstone * Adds Timestamp new type for use with Tombstone * Adds constraint to the Postgres schema to enforce tombstone uniqueness by table_id, sequencer_id, and sequence_number --- .../20211229171744_initial_schema.sql | 3 +- iox_catalog/src/interface.rs | 165 ++++++++++++++++++ iox_catalog/src/mem.rs | 62 ++++++- iox_catalog/src/postgres.rs | 64 ++++++- 4 files changed, 289 insertions(+), 5 deletions(-) diff --git a/iox_catalog/migrations/20211229171744_initial_schema.sql b/iox_catalog/migrations/20211229171744_initial_schema.sql index 16fe51b09f..1ce222b18f 100644 --- a/iox_catalog/migrations/20211229171744_initial_schema.sql +++ b/iox_catalog/migrations/20211229171744_initial_schema.sql @@ -104,7 +104,8 @@ CREATE TABLE IF NOT EXISTS iox_catalog.tombstone min_time BIGINT NOT NULL, max_time BIGINT NOT NULL, serialized_predicate TEXT NOT NULL, - PRIMARY KEY (id) + PRIMARY KEY (id), + CONSTRAINT tombstone_unique UNIQUE (table_id, sequencer_id, sequence_number) ); CREATE TABLE IF NOT EXISTS iox_catalog.processed_tombstone diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 3012cb0339..eb319b8be5 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -161,6 +161,51 @@ impl PartitionId { } } +/// Unique ID for a `Tombstone` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct TombstoneId(i64); + +#[allow(missing_docs)] +impl TombstoneId { + pub fn new(v: i64) -> Self { + Self(v) + } + pub fn get(&self) -> i64 { + self.0 + } +} + +/// A sequence number from a `Sequencer` (kafka partition) +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct SequenceNumber(i64); + +#[allow(missing_docs)] +impl SequenceNumber { + pub fn new(v: i64) -> Self { + Self(v) + } + pub fn get(&self) -> i64 { + self.0 + } +} + +/// A time in nanoseconds from epoch +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct Timestamp(i64); + +#[allow(missing_docs)] +impl Timestamp { + pub fn new(v: i64) -> Self { + Self(v) + } + pub fn get(&self) -> i64 { + self.0 + } +} + /// Container that can return repos for each of the catalog data types. #[async_trait] pub trait RepoCollection { @@ -178,6 +223,8 @@ pub trait RepoCollection { fn sequencer(&self) -> Arc; /// repo for partitions fn partition(&self) -> Arc; + /// repo for tombstones + fn tombstone(&self) -> Arc; } /// Functions for working with Kafka topics in the catalog. @@ -271,6 +318,30 @@ pub trait PartitionRepo { async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result>; } +/// Functions for working with tombstones in the catalog +#[async_trait] +pub trait TombstoneRepo { + /// create or get a tombstone + async fn create_or_get( + &self, + table_id: TableId, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + min_time: Timestamp, + max_time: Timestamp, + predicate: &str, + ) -> Result; + + /// return all tombstones for the sequencer with a sequence number greater than that + /// passed in. This will be used by the ingester on startup to see what tombstones + /// might have to be applied to data that is read from the write buffer. + async fn list_tombstones_by_sequencer_greater_than( + &self, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + ) -> Result>; +} + /// Data object for a kafka topic #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct KafkaTopic { @@ -610,6 +681,25 @@ pub struct Partition { pub partition_key: String, } +/// Data object for a tombstone. +#[derive(Debug, Clone, PartialEq, sqlx::FromRow)] +pub struct Tombstone { + /// the id of the tombstone + pub id: TombstoneId, + /// the table the tombstone is associated with + pub table_id: TableId, + /// the sequencer the tombstone was sent through + pub sequencer_id: SequencerId, + /// the sequence nubmer assigned to the tombstone from the sequencer + pub sequence_number: SequenceNumber, + /// the min time (inclusive) that the delete applies to + pub min_time: Timestamp, + /// the max time (exclusive) that the delete applies to + pub max_time: Timestamp, + /// the full delete predicate + pub serialized_predicate: String, +} + #[cfg(test)] pub(crate) mod test_helpers { use super::*; @@ -627,6 +717,7 @@ pub(crate) mod test_helpers { test_column(&new_repo()).await; test_sequencer(&new_repo()).await; test_partition(&new_repo()).await; + test_tombstone(&new_repo()).await; } async fn test_kafka_topic(repo: &T) { @@ -845,4 +936,78 @@ pub(crate) mod test_helpers { assert_eq!(created, listed); } + + async fn test_tombstone(repo: &T) { + let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap(); + let pool = repo.query_pool().create_or_get("foo").await.unwrap(); + let namespace = repo + .namespace() + .create("namespace_tombstone_test", "inf", kafka.id, pool.id) + .await + .unwrap(); + let table = repo + .table() + .create_or_get("test_table", namespace.id) + .await + .unwrap(); + let other_table = repo + .table() + .create_or_get("other", namespace.id) + .await + .unwrap(); + let sequencer = repo + .sequencer() + .create_or_get(&kafka, KafkaPartition::new(1)) + .await + .unwrap(); + + let tombstone_repo = repo.tombstone(); + let min_time = Timestamp::new(1); + let max_time = Timestamp::new(10); + let t1 = tombstone_repo + .create_or_get( + table.id, + sequencer.id, + SequenceNumber::new(1), + min_time, + max_time, + "whatevs", + ) + .await + .unwrap(); + assert!(t1.id > TombstoneId::new(0)); + assert_eq!(t1.sequencer_id, sequencer.id); + assert_eq!(t1.sequence_number, SequenceNumber::new(1)); + assert_eq!(t1.min_time, min_time); + assert_eq!(t1.max_time, max_time); + assert_eq!(t1.serialized_predicate, "whatevs"); + let t2 = tombstone_repo + .create_or_get( + other_table.id, + sequencer.id, + SequenceNumber::new(2), + min_time, + max_time, + "bleh", + ) + .await + .unwrap(); + let t3 = tombstone_repo + .create_or_get( + table.id, + sequencer.id, + SequenceNumber::new(3), + min_time, + max_time, + "sdf", + ) + .await + .unwrap(); + + let listed = tombstone_repo + .list_tombstones_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1)) + .await + .unwrap(); + assert_eq!(vec![t2, t3], listed); + } } diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 88b00f3e1b..999d7e7175 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -4,8 +4,9 @@ use crate::interface::{ Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo, - QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, - SequencerRepo, Table, TableId, TableRepo, + QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer, + SequencerId, SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneId, + TombstoneRepo, }; use async_trait::async_trait; use std::convert::TryFrom; @@ -42,6 +43,7 @@ struct MemCollections { columns: Vec, sequencers: Vec, partitions: Vec, + tombstones: Vec, } impl RepoCollection for Arc { @@ -72,6 +74,10 @@ impl RepoCollection for Arc { fn partition(&self) -> Arc { Self::clone(self) as Arc } + + fn tombstone(&self) -> Arc { + Self::clone(self) as Arc + } } #[async_trait] @@ -332,6 +338,58 @@ impl PartitionRepo for MemCatalog { } } +#[async_trait] +impl TombstoneRepo for MemCatalog { + async fn create_or_get( + &self, + table_id: TableId, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + min_time: Timestamp, + max_time: Timestamp, + predicate: &str, + ) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + let tombstone = match collections.tombstones.iter().find(|t| { + t.table_id == table_id + && t.sequencer_id == sequencer_id + && t.sequence_number == sequence_number + }) { + Some(t) => t, + None => { + let t = Tombstone { + id: TombstoneId::new(collections.tombstones.len() as i64 + 1), + table_id, + sequencer_id, + sequence_number, + min_time, + max_time, + serialized_predicate: predicate.to_string(), + }; + collections.tombstones.push(t); + collections.tombstones.last().unwrap() + } + }; + + Ok(tombstone.clone()) + } + + async fn list_tombstones_by_sequencer_greater_than( + &self, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + ) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + let tombstones: Vec<_> = collections + .tombstones + .iter() + .filter(|t| t.sequencer_id == sequencer_id && t.sequence_number > sequence_number) + .cloned() + .collect(); + Ok(tombstones) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 230ae3617a..08c8fc43cf 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -3,8 +3,8 @@ use crate::interface::{ Column, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool, - QueryPoolId, QueryPoolRepo, RepoCollection, Result, Sequencer, SequencerId, SequencerRepo, - Table, TableId, TableRepo, + QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer, SequencerId, + SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneRepo, }; use async_trait::async_trait; use observability_deps::tracing::info; @@ -88,6 +88,10 @@ impl RepoCollection for Arc { fn partition(&self) -> Arc { Self::clone(self) as Arc } + + fn tombstone(&self) -> Arc { + Self::clone(self) as Arc + } } #[async_trait] @@ -376,6 +380,58 @@ impl PartitionRepo for PostgresCatalog { } } +#[async_trait] +impl TombstoneRepo for PostgresCatalog { + async fn create_or_get( + &self, + table_id: TableId, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + min_time: Timestamp, + max_time: Timestamp, + predicate: &str, + ) -> Result { + sqlx::query_as::<_, Tombstone>( + r#" + INSERT INTO tombstone + ( table_id, sequencer_id, sequence_number, min_time, max_time, serialized_predicate ) + VALUES + ( $1, $2, $3, $4, $5, $6 ) + ON CONFLICT ON CONSTRAINT tombstone_unique + DO UPDATE SET table_id = tombstone.table_id RETURNING *; + "#, + ) + .bind(&table_id) // $1 + .bind(&sequencer_id) // $2 + .bind(&sequence_number) // $3 + .bind(&min_time) // $4 + .bind(&max_time) // $5 + .bind(predicate) // $6 + .fetch_one(&self.pool) + .await + .map_err(|e| { + if is_fk_violation(&e) { + Error::ForeignKeyViolation { source: e } + } else { + Error::SqlxError { source: e } + } + }) + } + + async fn list_tombstones_by_sequencer_greater_than( + &self, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + ) -> Result> { + sqlx::query_as::<_, Tombstone>(r#"SELECT * FROM tombstone WHERE sequencer_id = $1 AND sequence_number > $2 ORDER BY id;"#) + .bind(&sequencer_id) // $1 + .bind(&sequence_number) // $2 + .fetch_all(&self.pool) + .await + .map_err(|e| Error::SqlxError { source: e }) + } +} + /// The error code returned by Postgres for a unique constraint violation. /// /// See @@ -476,6 +532,10 @@ mod tests { } async fn clear_schema(pool: &Pool) { + sqlx::query("delete from tombstone;") + .execute(pool) + .await + .unwrap(); sqlx::query("delete from column_name;") .execute(pool) .await From b57f027e3514e56fe7d4588975274ce956fdc751 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 20:57:13 -0500 Subject: [PATCH 17/32] refactor: address review comments --- Cargo.lock | 1 + ingester/Cargo.toml | 3 +- ingester/src/data.rs | 63 +++++++++++++++--------------------- ingester/src/lib.rs | 2 +- iox_catalog/src/interface.rs | 2 +- 5 files changed, 31 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae11c9380a..7c2b3c7018 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1859,6 +1859,7 @@ version = "0.1.0" dependencies = [ "arrow", "iox_catalog", + "mutable_batch", "parking_lot", "snafu", "workspace-hack", diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index 85b0497cdc..8d8017e904 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -6,7 +6,8 @@ edition = "2021" [dependencies] arrow = { version = "7.0", features = ["prettyprint"] } -snafu = "0.7" iox_catalog = { path = "../iox_catalog" } +mutable_batch = { path = "../mutable_batch"} parking_lot = "0.11.2" +snafu = "0.7" workspace-hack = { path = "../workspace-hack"} diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 746156503d..b353a3cd46 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -4,22 +4,24 @@ use std::{collections::BTreeMap, sync::Arc}; use crate::server::IngesterServer; -use arrow::datatypes::DataType; -use iox_catalog::interface::{KafkaPartition, NamespaceId, RepoCollection, SequencerId}; +use iox_catalog::interface::{ + KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequencerId, TableId, +}; +use mutable_batch::MutableBatch; use parking_lot::RwLock; use snafu::{ResultExt, Snafu}; #[derive(Debug, Snafu)] #[allow(missing_copy_implementations, missing_docs)] pub enum Error { - #[snafu(display("Topic {} not found", name))] - TopicNotFound { + #[snafu(display("Error while reading Topic {}", name))] + ReadTopic { source: iox_catalog::interface::Error, name: String, }, - #[snafu(display("Sequencer id {} not found", id.get()))] - SequencerNotFound { + #[snafu(display("Error while reading Kafka Partition id {}", id.get()))] + ReadSequencer { source: iox_catalog::interface::Error, id: KafkaPartition, }, @@ -29,6 +31,7 @@ pub enum Error { pub type Result = std::result::Result; /// Ingester Data: a Mapp of Shard ID to its Data +#[derive(Default)] struct Sequencers { // This map gets set up on initialization of the ingester so it won't ever be modified. // The content of each SequenceData will get changed when more namespaces and tables @@ -45,7 +48,7 @@ impl Sequencers { let topic = kafka_topic_repro .create_or_get(topic_name.as_str()) .await - .context(TopicNotFoundSnafu { name: topic_name })?; + .context(ReadTopicSnafu { name: topic_name })?; // Get sequencer ids from the catalog let sequencer_repro = ingester.iox_catalog.sequencer(); @@ -54,9 +57,9 @@ impl Sequencers { let sequencer = sequencer_repro .create_or_get(&topic, shard) .await - .context(SequencerNotFoundSnafu { id: shard })?; + .context(ReadSequencerSnafu { id: shard })?; // Create empty buffer for each sequencer - sequencers.insert(sequencer.id, Arc::new(SequencerData::new())); + sequencers.insert(sequencer.id, Arc::new(SequencerData::default())); } Ok(Self { data: sequencers }) @@ -64,35 +67,29 @@ impl Sequencers { } /// Data of a Shard +#[derive(Default)] struct SequencerData { // New namespaces can come in at any time so we need to be able to add new ones namespaces: RwLock>>, } -impl SequencerData { - /// Create an empty SequenceData - pub fn new() -> Self { - Self { - namespaces: RwLock::new(BTreeMap::default()), - } - } -} - /// Data of a Namespace that belongs to a given Shard +#[derive(Default)] struct NamespaceData { - tables: RwLock>>, + tables: RwLock>>, } /// Data of a Table in a given Namesapce that belongs to a given Shard +#[derive(Default)] struct TableData { - partitions: RwLock>>, + // Map pf partition key to its data + partition_data: RwLock>>, } /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard +#[derive(Default)] struct PartitionData { - /// Key of this partition - partition_key: String, - /// Data + id: PartitionId, inner: RwLock, } @@ -115,6 +112,7 @@ struct PartitionData { // │ └───────────────┘ │ │ │ // │ │ │ │ // └────────────────────────┘ └────────────────────────┘ +#[derive(Default)] struct DataBuffer { /// Buffer of ingesting data buffer: Vec, @@ -128,7 +126,7 @@ struct DataBuffer { /// When a persist is called, data in `buffer` will be moved to a `snapshot` /// and then all `snapshots` will be moved to a `persisting`. /// Both `buffer` and 'snaphots` will be empty when this happens. - persisting: Vec, + persisting: Vec, // Extra Notes: // . Multiple perssiting operations may be happenning concurrently but // their persisted info must be added into the Catalog in thier data @@ -142,18 +140,9 @@ struct DataBuffer { // storage yet. But this will be decided after MVP. } -struct PersistingData { - batches: Vec>, -} - struct DataBatch { - // a map of the unique column name to its data. Every column - // must have the same number of values. - column_data: BTreeMap>, -} - -struct ColumnData { - // it might be better to have the raw values and null markers, - // but this will probably be easier and faster to get going. - values: Option, + /// Sequencer number of the ingesting data + pub sequencer_number: u64, + /// Ingesting data + pub inner: MutableBatch, } diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs index 309f26fe61..fea1eaff23 100644 --- a/ingester/src/lib.rs +++ b/ingester/src/lib.rs @@ -2,7 +2,7 @@ //! Design doc: https://docs.google.com/document/d/14NlzBiWwn0H37QxnE0k3ybTU58SKyUZmdgYpVw6az0Q/edit# //! -#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] +#![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)] #![warn( missing_copy_implementations, missing_debug_implementations, diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 2b31c4c964..ba8c6f15b9 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -147,7 +147,7 @@ impl KafkaPartition { } /// Unique ID for a `Sequencer` -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] #[sqlx(transparent)] pub struct PartitionId(i64); From b89c250ccca8b8e92288f87f41cad30ece1ecce6 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 21:39:22 -0500 Subject: [PATCH 18/32] refactor: use RepoColection instead of MemCatalog --- ingester/src/data.rs | 4 +++- ingester/src/lib.rs | 1 - ingester/src/server.rs | 21 +++++++++++---------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index b353a3cd46..3e03a9189b 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -41,7 +41,9 @@ struct Sequencers { impl Sequencers { /// One time initialize Sequencers of this Ingester - pub async fn initialize(ingester: &IngesterServer<'_>) -> Result { + pub async fn initialize( + ingester: &IngesterServer<'_, T>, + ) -> Result { // Get kafka topic from the catalog let topic_name = ingester.get_topic(); let kafka_topic_repro = ingester.iox_catalog.kafka_topic(); diff --git a/ingester/src/lib.rs b/ingester/src/lib.rs index fea1eaff23..31bc719a49 100644 --- a/ingester/src/lib.rs +++ b/ingester/src/lib.rs @@ -5,7 +5,6 @@ #![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)] #![warn( missing_copy_implementations, - missing_debug_implementations, missing_docs, clippy::explicit_iter_loop, clippy::future_not_send, diff --git a/ingester/src/server.rs b/ingester/src/server.rs index 18561e3be0..6019b77262 100644 --- a/ingester/src/server.rs +++ b/ingester/src/server.rs @@ -3,27 +3,28 @@ use std::sync::Arc; -use iox_catalog::{interface::KafkaPartition, mem::MemCatalog}; +use iox_catalog::interface::{KafkaPartition, RepoCollection}; /// The [`IngesterServer`] manages the lifecycle and contains all state for /// an `ingester` server instance. -#[derive(Debug)] -pub struct IngesterServer<'a> { +pub struct IngesterServer<'a, T> +where + T: RepoCollection + Send + Sync, +{ // Kafka Topic assigned to this ingester kafka_topic_name: String, // Kafka Partitions (Shards) assigned to this INgester kafka_partitions: Vec, /// Catalog of this ingester - pub iox_catalog: &'a Arc, + pub iox_catalog: &'a Arc, } -impl<'a> IngesterServer<'a> { +impl<'a, T> IngesterServer<'a, T> +where + T: RepoCollection + Send + Sync, +{ /// Initialize the Ingester - pub fn new( - topic_name: String, - shard_ids: Vec, - catalog: &'a Arc, - ) -> Self { + pub fn new(topic_name: String, shard_ids: Vec, catalog: &'a Arc) -> Self { Self { kafka_topic_name: topic_name, kafka_partitions: shard_ids, From fe9a41ee9acc8696611b5672b90cc54e12acba0f Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 21:45:20 -0500 Subject: [PATCH 19/32] chore: remove non-longer needed dependency --- ingester/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index 8d8017e904..1d4324b0c6 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -5,7 +5,6 @@ authors = ["Nga Tran "] edition = "2021" [dependencies] -arrow = { version = "7.0", features = ["prettyprint"] } iox_catalog = { path = "../iox_catalog" } mutable_batch = { path = "../mutable_batch"} parking_lot = "0.11.2" From e8294d21ec4cfb1c0155274198ad8e7d62200e77 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Tue, 18 Jan 2022 22:02:27 -0500 Subject: [PATCH 20/32] fix: add .lock --- Cargo.lock | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 7c2b3c7018..fb0c35c9cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1857,7 +1857,6 @@ dependencies = [ name = "ingester" version = "0.1.0" dependencies = [ - "arrow", "iox_catalog", "mutable_batch", "parking_lot", From 8a17e1c132a8fc807ffae699f6531385edf59cb2 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 19 Jan 2022 11:20:20 -0500 Subject: [PATCH 21/32] refactor: address review comments --- Cargo.lock | 1 + ingester/Cargo.toml | 1 + ingester/src/data.rs | 66 ++++++++++++++++++++++-------------- iox_catalog/src/interface.rs | 2 +- 4 files changed, 43 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb0c35c9cd..7c2b3c7018 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1857,6 +1857,7 @@ dependencies = [ name = "ingester" version = "0.1.0" dependencies = [ + "arrow", "iox_catalog", "mutable_batch", "parking_lot", diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index 1d4324b0c6..8d8017e904 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Nga Tran "] edition = "2021" [dependencies] +arrow = { version = "7.0", features = ["prettyprint"] } iox_catalog = { path = "../iox_catalog" } mutable_batch = { path = "../mutable_batch"} parking_lot = "0.11.2" diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 3e03a9189b..8909032de9 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -1,6 +1,7 @@ //! Data for the lifecycle of the Ingeter //! +use arrow::record_batch::RecordBatch; use std::{collections::BTreeMap, sync::Arc}; use crate::server::IngesterServer; @@ -48,7 +49,7 @@ impl Sequencers { let topic_name = ingester.get_topic(); let kafka_topic_repro = ingester.iox_catalog.kafka_topic(); let topic = kafka_topic_repro - .create_or_get(topic_name.as_str()) + .create_or_get(topic_name.as_str()) //todo: use `get` instead .await .context(ReadTopicSnafu { name: topic_name })?; @@ -57,7 +58,7 @@ impl Sequencers { let mut sequencers = BTreeMap::default(); for shard in ingester.get_kafka_partitions() { let sequencer = sequencer_repro - .create_or_get(&topic, shard) + .create_or_get(&topic, shard) //todo: use `get` instead .await .context(ReadSequencerSnafu { id: shard })?; // Create empty buffer for each sequencer @@ -89,46 +90,48 @@ struct TableData { } /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard -#[derive(Default)] struct PartitionData { id: PartitionId, inner: RwLock, } /// Data of an IOx partition split into batches -// ┌────────────────────────┐ ┌────────────────────────┐ -// │ Snapshots │ │ Persisting │ -// │ │ │ │ -// │ ┌───────────────┐ │ │ ┌───────────────┐ │ -// │ ┌┴──────────────┐│ │ │ │ Persisting │ │ -// │ ┌┴──────────────┐├┴───┼────────────┼──▶│ Data │ │ -// │ │ Snapshot ├┘ │ │ └───────────────┘ │ -// │ └───────────────┘ │ │ │ -// ┌────────────┐ │ │ │ ... │ -// │ Buffer │───────▶│ ... │ │ │ -// └────────────┘ │ │ │ │ -// │ ┌───────────────┐ │ │ ┌───────────────┐ │ -// │ ┌┴──────────────┐│ │ │ │ Persisting │ │ -// │ ┌┴──────────────┐├┴────┼────────────┼───▶│ Data │ │ -// │ │ Snapshot ├┘ │ │ └───────────────┘ │ -// │ └───────────────┘ │ │ │ -// │ │ │ │ -// └────────────────────────┘ └────────────────────────┘ +/// ┌────────────────────────┐ ┌────────────────────────┐ ┌─────────────────────────┐ +/// │ Buffer │ │ Snapshots │ │ Persisting │ +/// │ ┌───────────────────┐ │ │ │ │ │ +/// │ │ ┌───────────────┐│ │ │ ┌───────────────────┐ │ │ ┌───────────────────┐ │ +/// │ │ ┌┴──────────────┐│├─┼────────┼─┼─▶┌───────────────┐│ │ │ │ ┌───────────────┐│ │ +/// │ │┌┴──────────────┐├┘│ │ │ │ ┌┴──────────────┐││ │ │ │ ┌┴──────────────┐││ │ +/// │ ││ BufferBatch ├┘ │ │ │ │┌┴──────────────┐├┘│──┼──────┼─▶│┌┴──────────────┐├┘│ │ +/// │ │└───────────────┘ │ │ ┌───┼─▶│ SnapshotBatch ├┘ │ │ │ ││ SnapshotBatch ├┘ │ │ +/// │ └───────────────────┘ │ │ │ │└───────────────┘ │ │ │ │└───────────────┘ │ │ +/// │ ... │ │ │ └───────────────────┘ │ │ └───────────────────┘ │ +/// │ ┌───────────────────┐ │ │ │ │ │ │ +/// │ │ ┌───────────────┐│ │ │ │ ... │ │ ... │ +/// │ │ ┌┴──────────────┐││ │ │ │ │ │ │ +/// │ │┌┴──────────────┐├┘│─┼────┘ │ ┌───────────────────┐ │ │ ┌───────────────────┐ │ +/// │ ││ BufferBatch ├┘ │ │ │ │ ┌───────────────┐│ │ │ │ ┌───────────────┐│ │ +/// │ │└───────────────┘ │ │ │ │ ┌┴──────────────┐││ │ │ │ ┌┴──────────────┐││ │ +/// │ └───────────────────┘ │ │ │┌┴──────────────┐├┘│──┼──────┼─▶│┌┴──────────────┐├┘│ │ +/// │ │ │ ││ SnapshotBatch ├┘ │ │ │ ││ SnapshotBatch ├┘ │ │ +/// │ ... │ │ │└───────────────┘ │ │ │ │└───────────────┘ │ │ +/// │ │ │ └───────────────────┘ │ │ └───────────────────┘ │ +/// └────────────────────────┘ └────────────────────────┘ └─────────────────────────┘ #[derive(Default)] struct DataBuffer { /// Buffer of ingesting data - buffer: Vec, + buffer: Vec, /// Data in `buffer` will be moved to a `snapshot` when one of these happens: /// . A background persist is called /// . A read request from Querier /// The `buffer` will be empty when this happens. - snapshots: Vec>, + snapshots: Vec>, /// When a persist is called, data in `buffer` will be moved to a `snapshot` /// and then all `snapshots` will be moved to a `persisting`. /// Both `buffer` and 'snaphots` will be empty when this happens. - persisting: Vec, + persisting: Vec, // Extra Notes: // . Multiple perssiting operations may be happenning concurrently but // their persisted info must be added into the Catalog in thier data @@ -141,10 +144,21 @@ struct DataBuffer { // Queriers that may not have loaded the parquet files from object // storage yet. But this will be decided after MVP. } - -struct DataBatch { +/// BufferBatch is a MutauableBatch with its ingesting order, sequencer_number, that +/// helps the ingester keep the batches of data in thier ingesting order +struct BufferBatch { /// Sequencer number of the ingesting data pub sequencer_number: u64, /// Ingesting data pub inner: MutableBatch, } + +/// SnapshotBatch contains data of many contiguous BufferBatches +struct SnapshotBatch { + /// Min sequencer number of its comebined BufferBatches + pub min_sequencer_number: u64, + /// Max sequencer number of its comebined BufferBatches + pub max_sequencer_number: u64, + /// Data of its comebined BufferBatches kept in one RecordBatch + pub inner: RecordBatch, +} diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 85994d47bc..3012cb0339 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -147,7 +147,7 @@ impl KafkaPartition { } /// Unique ID for a `Partition` -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] #[sqlx(transparent)] pub struct PartitionId(i64); From edb97f51cf18f8b403158e81c2feded711c2804d Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 19 Jan 2022 12:36:18 -0500 Subject: [PATCH 22/32] refactor: add persisting struct --- Cargo.lock | 1 + ingester/Cargo.toml | 1 + ingester/src/data.rs | 20 ++++++++++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7c2b3c7018..32dcd82657 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1862,6 +1862,7 @@ dependencies = [ "mutable_batch", "parking_lot", "snafu", + "uuid", "workspace-hack", ] diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index 8d8017e904..7e1d8f1719 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -10,4 +10,5 @@ iox_catalog = { path = "../iox_catalog" } mutable_batch = { path = "../mutable_batch"} parking_lot = "0.11.2" snafu = "0.7" +uuid = { version = "0.8", features = ["v4"] } workspace-hack = { path = "../workspace-hack"} diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 8909032de9..d53a96b0a3 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -3,6 +3,7 @@ use arrow::record_batch::RecordBatch; use std::{collections::BTreeMap, sync::Arc}; +use uuid::Uuid; use crate::server::IngesterServer; use iox_catalog::interface::{ @@ -131,9 +132,10 @@ struct DataBuffer { /// When a persist is called, data in `buffer` will be moved to a `snapshot` /// and then all `snapshots` will be moved to a `persisting`. /// Both `buffer` and 'snaphots` will be empty when this happens. - persisting: Vec, + persisting: Option>, // Extra Notes: - // . Multiple perssiting operations may be happenning concurrently but + // . In MVP, we will only persist a set of sanpshots at a time. + // In later version, multiple perssiting operations may be happenning concurrently but // their persisted info must be added into the Catalog in thier data // ingesting order. // . When a read request comes from a Querier, all data from `snaphots` @@ -150,7 +152,7 @@ struct BufferBatch { /// Sequencer number of the ingesting data pub sequencer_number: u64, /// Ingesting data - pub inner: MutableBatch, + pub data: MutableBatch, } /// SnapshotBatch contains data of many contiguous BufferBatches @@ -160,5 +162,15 @@ struct SnapshotBatch { /// Max sequencer number of its comebined BufferBatches pub max_sequencer_number: u64, /// Data of its comebined BufferBatches kept in one RecordBatch - pub inner: RecordBatch, + pub data: RecordBatch, +} + +/// PersistingBatch contains all needed info and data for creating +/// a parquet file for given set of SnapshotBatches +struct PersistingBatch { + sequencer_id: SequencerId, + table_id: TableId, + partition_id: PartitionId, + object_store_id: Uuid, + data: Vec, } From 9977f174b720c1e431695da7068c5e582eff4d67 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 19 Jan 2022 12:51:04 -0500 Subject: [PATCH 23/32] refactor: use wrapper ID --- ingester/src/data.rs | 17 +++++------------ ingester/src/server.rs | 26 ++++++++++++++++++-------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index d53a96b0a3..91454c8eba 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -7,7 +7,7 @@ use uuid::Uuid; use crate::server::IngesterServer; use iox_catalog::interface::{ - KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequencerId, TableId, + KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequenceNumber, SequencerId, TableId, }; use mutable_batch::MutableBatch; use parking_lot::RwLock; @@ -46,17 +46,10 @@ impl Sequencers { pub async fn initialize( ingester: &IngesterServer<'_, T>, ) -> Result { - // Get kafka topic from the catalog - let topic_name = ingester.get_topic(); - let kafka_topic_repro = ingester.iox_catalog.kafka_topic(); - let topic = kafka_topic_repro - .create_or_get(topic_name.as_str()) //todo: use `get` instead - .await - .context(ReadTopicSnafu { name: topic_name })?; - // Get sequencer ids from the catalog let sequencer_repro = ingester.iox_catalog.sequencer(); let mut sequencers = BTreeMap::default(); + let topic = ingester.get_topic(); for shard in ingester.get_kafka_partitions() { let sequencer = sequencer_repro .create_or_get(&topic, shard) //todo: use `get` instead @@ -150,7 +143,7 @@ struct DataBuffer { /// helps the ingester keep the batches of data in thier ingesting order struct BufferBatch { /// Sequencer number of the ingesting data - pub sequencer_number: u64, + pub sequencer_number: SequenceNumber, /// Ingesting data pub data: MutableBatch, } @@ -158,9 +151,9 @@ struct BufferBatch { /// SnapshotBatch contains data of many contiguous BufferBatches struct SnapshotBatch { /// Min sequencer number of its comebined BufferBatches - pub min_sequencer_number: u64, + pub min_sequencer_number: SequenceNumber, /// Max sequencer number of its comebined BufferBatches - pub max_sequencer_number: u64, + pub max_sequencer_number: SequenceNumber, /// Data of its comebined BufferBatches kept in one RecordBatch pub data: RecordBatch, } diff --git a/ingester/src/server.rs b/ingester/src/server.rs index 6019b77262..11ce6dc553 100644 --- a/ingester/src/server.rs +++ b/ingester/src/server.rs @@ -3,7 +3,7 @@ use std::sync::Arc; -use iox_catalog::interface::{KafkaPartition, RepoCollection}; +use iox_catalog::interface::{KafkaPartition, KafkaTopic, KafkaTopicId, RepoCollection}; /// The [`IngesterServer`] manages the lifecycle and contains all state for /// an `ingester` server instance. @@ -11,9 +11,9 @@ pub struct IngesterServer<'a, T> where T: RepoCollection + Send + Sync, { - // Kafka Topic assigned to this ingester - kafka_topic_name: String, - // Kafka Partitions (Shards) assigned to this INgester + /// Kafka Topic assigned to this ingester + kafka_topic: KafkaTopic, + /// Kafka Partitions (Shards) assigned to this INgester kafka_partitions: Vec, /// Catalog of this ingester pub iox_catalog: &'a Arc, @@ -24,17 +24,27 @@ where T: RepoCollection + Send + Sync, { /// Initialize the Ingester - pub fn new(topic_name: String, shard_ids: Vec, catalog: &'a Arc) -> Self { + pub fn new(topic: KafkaTopic, shard_ids: Vec, catalog: &'a Arc) -> Self { Self { - kafka_topic_name: topic_name, + kafka_topic: topic, kafka_partitions: shard_ids, iox_catalog: catalog, } } + /// Return a kafka topic + pub fn get_topic(&self) -> KafkaTopic { + self.kafka_topic.clone() + } + + /// Return a kafka topic id + pub fn get_topic_id(&self) -> KafkaTopicId { + self.kafka_topic.id + } + /// Return a kafka topic name - pub fn get_topic(&self) -> String { - self.kafka_topic_name.clone() + pub fn get_topic_name(&self) -> String { + self.kafka_topic.name.clone() } /// Return Kafka Partitions From be3e52331253b3eff5365ebe08186a399cabe268 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 19 Jan 2022 13:25:03 -0500 Subject: [PATCH 24/32] fix: use PersistingBatch --- ingester/src/data.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 91454c8eba..f21bf15364 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -125,7 +125,7 @@ struct DataBuffer { /// When a persist is called, data in `buffer` will be moved to a `snapshot` /// and then all `snapshots` will be moved to a `persisting`. /// Both `buffer` and 'snaphots` will be empty when this happens. - persisting: Option>, + persisting: Option>, // Extra Notes: // . In MVP, we will only persist a set of sanpshots at a time. // In later version, multiple perssiting operations may be happenning concurrently but From 41038721e100ce478b4244d248ed1092aeaeb2f8 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Wed, 19 Jan 2022 14:13:02 -0500 Subject: [PATCH 25/32] feat: Add parquet file records to iox_catalog * Adds ParquetFile and scaffolding to IOx catalog * Changed the file_location in parquet_file to object_store_id which is a uuid --- Cargo.lock | 2 + iox_catalog/Cargo.toml | 3 +- .../20211229171744_initial_schema.sql | 6 +- iox_catalog/src/interface.rs | 191 ++++++++++++++++++ iox_catalog/src/mem.rs | 79 +++++++- iox_catalog/src/postgres.rs | 81 +++++++- 6 files changed, 351 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 32dcd82657..28cd1db4b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1911,6 +1911,7 @@ dependencies = [ "snafu", "sqlx", "tokio", + "uuid", "workspace-hack", ] @@ -4351,6 +4352,7 @@ dependencies = [ "thiserror", "tokio-stream", "url", + "uuid", "whoami", ] diff --git a/iox_catalog/Cargo.toml b/iox_catalog/Cargo.toml index be3c2ea82f..50d63d10f3 100644 --- a/iox_catalog/Cargo.toml +++ b/iox_catalog/Cargo.toml @@ -10,10 +10,11 @@ chrono = { version = "0.4", default-features = false, features = ["clock"] } futures = "0.3" observability_deps = { path = "../observability_deps" } snafu = "0.7" -sqlx = { version = "0.5", features = [ "runtime-tokio-native-tls" , "postgres" ] } +sqlx = { version = "0.5", features = [ "runtime-tokio-native-tls" , "postgres", "uuid" ] } tokio = { version = "1.13", features = ["full", "io-util", "macros", "parking_lot", "rt-multi-thread", "time"] } influxdb_line_protocol = { path = "../influxdb_line_protocol" } workspace-hack = { path = "../workspace-hack"} +uuid = { version = "0.8", features = ["v4"] } [dev-dependencies] # In alphabetical order dotenv = "0.15.0" diff --git a/iox_catalog/migrations/20211229171744_initial_schema.sql b/iox_catalog/migrations/20211229171744_initial_schema.sql index 1ce222b18f..6c8606ec73 100644 --- a/iox_catalog/migrations/20211229171744_initial_schema.sql +++ b/iox_catalog/migrations/20211229171744_initial_schema.sql @@ -84,15 +84,15 @@ CREATE TABLE IF NOT EXISTS iox_catalog.parquet_file id BIGINT GENERATED ALWAYS AS IDENTITY, sequencer_id SMALLINT NOT NULL, table_id INT NOT NULL, - partition_id INT NOT NULL, - file_location VARCHAR NOT NULL, + partition_id BIGINT NOT NULL, + object_store_id uuid NOT NULL, min_sequence_number BIGINT, max_sequence_number BIGINT, min_time BIGINT, max_time BIGINT, to_delete BOOLEAN, PRIMARY KEY (id), - CONSTRAINT parquet_location_unique UNIQUE (file_location) + CONSTRAINT parquet_location_unique UNIQUE (object_store_id) ); CREATE TABLE IF NOT EXISTS iox_catalog.tombstone diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index eb319b8be5..0f62638709 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -7,6 +7,7 @@ use std::collections::BTreeMap; use std::convert::TryFrom; use std::fmt::Formatter; use std::sync::Arc; +use uuid::Uuid; #[derive(Debug, Snafu)] #[allow(missing_copy_implementations, missing_docs)] @@ -36,6 +37,12 @@ pub enum Error { #[snafu(display("namespace {} not found", name))] NamespaceNotFound { name: String }, + + #[snafu(display("parquet file with object_store_id {} already exists", object_store_id))] + FileExists { object_store_id: Uuid }, + + #[snafu(display("parquet_file record {} not found", id))] + ParquetRecordNotFound { id: ParquetFileId }, } /// A specialized `Error` for Catalog errors @@ -206,6 +213,28 @@ impl Timestamp { } } +/// Unique ID for a `ParquetFile` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[sqlx(transparent)] +pub struct ParquetFileId(i64); + +#[allow(missing_docs)] +impl ParquetFileId { + pub fn new(v: i64) -> Self { + Self(v) + } + pub fn get(&self) -> i64 { + self.0 + } +} + +impl std::fmt::Display for ParquetFileId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + // Use `self.number` to refer to each positional data point. + write!(f, "{}", self.0) + } +} + /// Container that can return repos for each of the catalog data types. #[async_trait] pub trait RepoCollection { @@ -225,6 +254,8 @@ pub trait RepoCollection { fn partition(&self) -> Arc; /// repo for tombstones fn tombstone(&self) -> Arc; + /// repo for parquet_files + fn parquet_file(&self) -> Arc; } /// Functions for working with Kafka topics in the catalog. @@ -342,6 +373,37 @@ pub trait TombstoneRepo { ) -> Result>; } +/// Functions for working with parquet file pointers in the catalog +#[async_trait] +pub trait ParquetFileRepo { + /// create the parquet file + #[allow(clippy::too_many_arguments)] + async fn create( + &self, + sequencer_id: SequencerId, + table_id: TableId, + partition_id: PartitionId, + object_store_id: Uuid, + min_sequence_number: SequenceNumber, + max_sequence_number: SequenceNumber, + min_time: Timestamp, + max_time: Timestamp, + ) -> Result; + + /// Flag the parquet file for deletion + async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()>; + + /// Get all parquet files for a sequencer with a max_sequence_number greater than the + /// one passed in. The ingester will use this on startup to see which files were persisted + /// that are greater than its min_unpersisted_number so that it can discard any data in + /// these partitions on replay. + async fn list_by_sequencer_greater_than( + &self, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + ) -> Result>; +} + /// Data object for a kafka topic #[derive(Debug, Clone, Eq, PartialEq, sqlx::FromRow)] pub struct KafkaTopic { @@ -700,6 +762,31 @@ pub struct Tombstone { pub serialized_predicate: String, } +/// Data for a parquet file reference in the catalog. +#[derive(Debug, Copy, Clone, PartialEq, sqlx::FromRow)] +pub struct ParquetFile { + /// the id of the file in the catalog + pub id: ParquetFileId, + /// the sequencer that sequenced writes that went into this file + pub sequencer_id: SequencerId, + /// the table + pub table_id: TableId, + /// the partition + pub partition_id: PartitionId, + /// the uuid used in the object store path for this file + pub object_store_id: Uuid, + /// the minimum sequence number from a record in this file + pub min_sequence_number: SequenceNumber, + /// the maximum sequence number from a record in this file + pub max_sequence_number: SequenceNumber, + /// the min timestamp of data in this file + pub min_time: Timestamp, + /// the max timestamp of data in this file + pub max_time: Timestamp, + /// flag to mark that this file should be deleted from object storage + pub to_delete: bool, +} + #[cfg(test)] pub(crate) mod test_helpers { use super::*; @@ -718,6 +805,7 @@ pub(crate) mod test_helpers { test_sequencer(&new_repo()).await; test_partition(&new_repo()).await; test_tombstone(&new_repo()).await; + test_parquet_file(&new_repo()).await; } async fn test_kafka_topic(repo: &T) { @@ -1010,4 +1098,107 @@ pub(crate) mod test_helpers { .unwrap(); assert_eq!(vec![t2, t3], listed); } + + async fn test_parquet_file(repo: &T) { + let kafka = repo.kafka_topic().create_or_get("foo").await.unwrap(); + let pool = repo.query_pool().create_or_get("foo").await.unwrap(); + let namespace = repo + .namespace() + .create("namespace_parquet_file_test", "inf", kafka.id, pool.id) + .await + .unwrap(); + let table = repo + .table() + .create_or_get("test_table", namespace.id) + .await + .unwrap(); + let other_table = repo + .table() + .create_or_get("other", namespace.id) + .await + .unwrap(); + let sequencer = repo + .sequencer() + .create_or_get(&kafka, KafkaPartition::new(1)) + .await + .unwrap(); + let partition = repo + .partition() + .create_or_get("one", sequencer.id, table.id) + .await + .unwrap(); + let other_partition = repo + .partition() + .create_or_get("one", sequencer.id, other_table.id) + .await + .unwrap(); + + let min_time = Timestamp::new(1); + let max_time = Timestamp::new(10); + + let parquet_repo = repo.parquet_file(); + let parquet_file = parquet_repo + .create( + sequencer.id, + partition.table_id, + partition.id, + Uuid::new_v4(), + SequenceNumber::new(10), + SequenceNumber::new(140), + min_time, + max_time, + ) + .await + .unwrap(); + + // verify that trying to create a file with the same UUID throws an error + let err = parquet_repo + .create( + sequencer.id, + partition.table_id, + partition.id, + parquet_file.object_store_id, + SequenceNumber::new(10), + SequenceNumber::new(140), + min_time, + max_time, + ) + .await + .unwrap_err(); + assert!(matches!(err, Error::FileExists { object_store_id: _ })); + + let other_file = parquet_repo + .create( + sequencer.id, + other_partition.table_id, + other_partition.id, + Uuid::new_v4(), + SequenceNumber::new(45), + SequenceNumber::new(200), + min_time, + max_time, + ) + .await + .unwrap(); + + let files = parquet_repo + .list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1)) + .await + .unwrap(); + assert_eq!(vec![parquet_file, other_file], files); + let files = parquet_repo + .list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(150)) + .await + .unwrap(); + assert_eq!(vec![other_file], files); + + // verify that to_delete is initially set to false and that it can be updated to true + assert!(!parquet_file.to_delete); + parquet_repo.flag_for_delete(parquet_file.id).await.unwrap(); + let files = parquet_repo + .list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1)) + .await + .unwrap(); + assert!(files.first().unwrap().to_delete); + } } diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 999d7e7175..f2eeda8579 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -3,15 +3,16 @@ use crate::interface::{ Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId, - KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionId, PartitionRepo, - QueryPool, QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer, - SequencerId, SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneId, - TombstoneRepo, + KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, ParquetFile, ParquetFileId, + ParquetFileRepo, Partition, PartitionId, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo, + RepoCollection, Result, SequenceNumber, Sequencer, SequencerId, SequencerRepo, Table, TableId, + TableRepo, Timestamp, Tombstone, TombstoneId, TombstoneRepo, }; use async_trait::async_trait; use std::convert::TryFrom; use std::fmt::Formatter; use std::sync::{Arc, Mutex}; +use uuid::Uuid; /// In-memory catalog that implements the `RepoCollection` and individual repo traits from /// the catalog interface. @@ -44,6 +45,7 @@ struct MemCollections { sequencers: Vec, partitions: Vec, tombstones: Vec, + parquet_files: Vec, } impl RepoCollection for Arc { @@ -78,6 +80,10 @@ impl RepoCollection for Arc { fn tombstone(&self) -> Arc { Self::clone(self) as Arc } + + fn parquet_file(&self) -> Arc { + Self::clone(self) as Arc + } } #[async_trait] @@ -390,6 +396,71 @@ impl TombstoneRepo for MemCatalog { } } +#[async_trait] +impl ParquetFileRepo for MemCatalog { + async fn create( + &self, + sequencer_id: SequencerId, + table_id: TableId, + partition_id: PartitionId, + object_store_id: Uuid, + min_sequence_number: SequenceNumber, + max_sequence_number: SequenceNumber, + min_time: Timestamp, + max_time: Timestamp, + ) -> Result { + let mut collections = self.collections.lock().expect("mutex poisoned"); + if collections + .parquet_files + .iter() + .any(|f| f.object_store_id == object_store_id) + { + return Err(Error::FileExists { object_store_id }); + } + + let parquet_file = ParquetFile { + id: ParquetFileId::new(collections.parquet_files.len() as i64 + 1), + sequencer_id, + table_id, + partition_id, + object_store_id, + min_sequence_number, + max_sequence_number, + min_time, + max_time, + to_delete: false, + }; + collections.parquet_files.push(parquet_file); + Ok(*collections.parquet_files.last().unwrap()) + } + + async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()> { + let mut collections = self.collections.lock().expect("mutex poisoned"); + + match collections.parquet_files.iter_mut().find(|p| p.id == id) { + Some(f) => f.to_delete = true, + None => return Err(Error::ParquetRecordNotFound { id }), + } + + Ok(()) + } + + async fn list_by_sequencer_greater_than( + &self, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + ) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + let files: Vec<_> = collections + .parquet_files + .iter() + .filter(|f| f.sequencer_id == sequencer_id && f.max_sequence_number > sequence_number) + .cloned() + .collect(); + Ok(files) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 08c8fc43cf..0e8300555b 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -2,15 +2,17 @@ use crate::interface::{ Column, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId, - KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, Partition, PartitionRepo, QueryPool, - QueryPoolId, QueryPoolRepo, RepoCollection, Result, SequenceNumber, Sequencer, SequencerId, - SequencerRepo, Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneRepo, + KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, ParquetFile, ParquetFileId, + ParquetFileRepo, Partition, PartitionId, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo, + RepoCollection, Result, SequenceNumber, Sequencer, SequencerId, SequencerRepo, Table, TableId, + TableRepo, Timestamp, Tombstone, TombstoneRepo, }; use async_trait::async_trait; use observability_deps::tracing::info; use sqlx::{postgres::PgPoolOptions, Executor, Pool, Postgres}; use std::sync::Arc; use std::time::Duration; +use uuid::Uuid; const MAX_CONNECTIONS: u32 = 5; const CONNECT_TIMEOUT: Duration = Duration::from_secs(2); @@ -92,6 +94,10 @@ impl RepoCollection for Arc { fn tombstone(&self) -> Arc { Self::clone(self) as Arc } + + fn parquet_file(&self) -> Arc { + Self::clone(self) as Arc + } } #[async_trait] @@ -432,6 +438,75 @@ impl TombstoneRepo for PostgresCatalog { } } +#[async_trait] +impl ParquetFileRepo for PostgresCatalog { + async fn create( + &self, + sequencer_id: SequencerId, + table_id: TableId, + partition_id: PartitionId, + object_store_id: Uuid, + min_sequence_number: SequenceNumber, + max_sequence_number: SequenceNumber, + min_time: Timestamp, + max_time: Timestamp, + ) -> Result { + let rec = sqlx::query_as::<_, ParquetFile>( + r#" +INSERT INTO parquet_file ( sequencer_id, table_id, partition_id, object_store_id, min_sequence_number, max_sequence_number, min_time, max_time, to_delete ) +VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, false ) +RETURNING * + "#, + ) + .bind(sequencer_id) // $1 + .bind(table_id) // $2 + .bind(partition_id) // $3 + .bind(object_store_id) // $4 + .bind(min_sequence_number) // $5 + .bind(max_sequence_number) // $6 + .bind(min_time) // $7 + .bind(max_time) // $8 + .fetch_one(&self.pool) + .await + .map_err(|e| { + if is_unique_violation(&e) { + Error::FileExists { + object_store_id, + } + } else if is_fk_violation(&e) { + Error::ForeignKeyViolation { source: e } + } else { + Error::SqlxError { source: e } + } + })?; + + Ok(rec) + } + + async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()> { + let _ = sqlx::query(r#"UPDATE parquet_file SET to_delete = true WHERE id = $1;"#) + .bind(&id) // $1 + .execute(&self.pool) + .await + .map_err(|e| Error::SqlxError { source: e })?; + + Ok(()) + } + + async fn list_by_sequencer_greater_than( + &self, + sequencer_id: SequencerId, + sequence_number: SequenceNumber, + ) -> Result> { + sqlx::query_as::<_, ParquetFile>(r#"SELECT * FROM parquet_file WHERE sequencer_id = $1 AND max_sequence_number > $2 ORDER BY id;"#) + .bind(&sequencer_id) // $1 + .bind(&sequence_number) // $2 + .fetch_all(&self.pool) + .await + .map_err(|e| Error::SqlxError { source: e }) + } +} + /// The error code returned by Postgres for a unique constraint violation. /// /// See From d825dab8e2b12d182a71e8a9aa9a378db8424068 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Wed, 19 Jan 2022 14:48:00 -0500 Subject: [PATCH 26/32] fix: hakari workspace hack --- Cargo.lock | 1 + workspace-hack/Cargo.toml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 28cd1db4b0..04a96ff666 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5423,6 +5423,7 @@ dependencies = [ "tracing", "tracing-core", "tracing-subscriber", + "uuid", ] [[package]] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index aa265f90c0..d93dbcb42c 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -53,6 +53,7 @@ tower = { version = "0.4", features = ["balance", "buffer", "discover", "futures tracing = { version = "0.1", features = ["attributes", "log", "max_level_trace", "release_max_level_debug", "std", "tracing-attributes"] } tracing-core = { version = "0.1", features = ["lazy_static", "std"] } tracing-subscriber = { version = "0.3", features = ["alloc", "ansi", "ansi_term", "env-filter", "fmt", "lazy_static", "matchers", "regex", "registry", "sharded-slab", "smallvec", "std", "thread_local", "tracing", "tracing-log"] } +uuid = { version = "0.8", features = ["getrandom", "std", "v4"] } [build-dependencies] ahash = { version = "0.7", features = ["std"] } @@ -86,5 +87,6 @@ smallvec = { version = "1", default-features = false, features = ["union"] } syn = { version = "1", features = ["clone-impls", "derive", "extra-traits", "full", "parsing", "printing", "proc-macro", "quote", "visit", "visit-mut"] } tokio = { version = "1", features = ["bytes", "fs", "full", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "parking_lot", "process", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "sync", "time", "tokio-macros", "winapi"] } tokio-stream = { version = "0.1", features = ["fs", "net", "time"] } +uuid = { version = "0.8", features = ["getrandom", "std", "v4"] } ### END HAKARI SECTION From 28db06297ffba2ec4932eaa899b88f76abc48bc5 Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Wed, 19 Jan 2022 16:30:54 -0500 Subject: [PATCH 27/32] fix: clear postgres schema in test wasn't deleting parquet_file --- iox_catalog/src/postgres.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 0e8300555b..82235e9268 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -611,6 +611,10 @@ mod tests { .execute(pool) .await .unwrap(); + sqlx::query("delete from parquet_file;") + .execute(pool) + .await + .unwrap(); sqlx::query("delete from column_name;") .execute(pool) .await From 172d75c6d76ef27bfe1125d8b56ebba1dda540db Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Wed, 19 Jan 2022 16:45:06 -0500 Subject: [PATCH 28/32] feat: add sequencer get_by_kafka_topic_id_and_partition to catalog --- iox_catalog/src/interface.rs | 23 +++++++++++++++++++++++ iox_catalog/src/mem.rs | 14 ++++++++++++++ iox_catalog/src/postgres.rs | 24 ++++++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 0f62638709..626c032836 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -326,6 +326,13 @@ pub trait SequencerRepo { partition: KafkaPartition, ) -> Result; + /// get the sequencer record by `KafkaTopicId` and `KafkaPartition` + async fn get_by_topic_id_and_partition( + &self, + topic_id: KafkaTopicId, + partition: KafkaPartition, + ) -> Result>; + /// list all sequencers async fn list(&self) -> Result>; @@ -970,6 +977,22 @@ pub(crate) mod test_helpers { .collect::>(); assert_eq!(created, listed); + + // get by the sequencer id and partition + let kafka_partition = KafkaPartition::new(1); + let sequencer = sequencer_repo + .get_by_topic_id_and_partition(kafka.id, kafka_partition) + .await + .unwrap() + .unwrap(); + assert_eq!(kafka.id, sequencer.kafka_topic_id); + assert_eq!(kafka_partition, sequencer.kafka_partition); + + let sequencer = sequencer_repo + .get_by_topic_id_and_partition(kafka.id, KafkaPartition::new(523)) + .await + .unwrap(); + assert!(sequencer.is_none()); } async fn test_partition(repo: &T) { diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index f2eeda8579..419e4ea624 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -287,6 +287,20 @@ impl SequencerRepo for MemCatalog { Ok(*sequencer) } + async fn get_by_topic_id_and_partition( + &self, + topic_id: KafkaTopicId, + partition: KafkaPartition, + ) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + let sequencer = collections + .sequencers + .iter() + .find(|s| s.kafka_topic_id == topic_id && s.kafka_partition == partition) + .cloned(); + Ok(sequencer) + } + async fn list(&self) -> Result> { let collections = self.collections.lock().expect("mutex poisoned"); Ok(collections.sequencers.clone()) diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 82235e9268..3f573f1fb0 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -329,6 +329,30 @@ impl SequencerRepo for PostgresCatalog { }) } + async fn get_by_topic_id_and_partition( + &self, + topic_id: KafkaTopicId, + partition: KafkaPartition, + ) -> Result> { + let rec = sqlx::query_as::<_, Sequencer>( + r#" +SELECT * FROM sequencer WHERE kafka_topic_id = $1 AND kafka_partition = $2; + "#, + ) + .bind(topic_id) // $1 + .bind(partition) // $2 + .fetch_one(&self.pool) + .await; + + if let Err(sqlx::Error::RowNotFound) = rec { + return Ok(None); + } + + let sequencer = rec.map_err(|e| Error::SqlxError { source: e })?; + + Ok(Some(sequencer)) + } + async fn list(&self) -> Result> { sqlx::query_as::<_, Sequencer>(r#"SELECT * FROM sequencer;"#) .fetch_all(&self.pool) From 860e5a30ca9da1a57c243e899bf1caca28444daa Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Wed, 19 Jan 2022 17:15:10 -0500 Subject: [PATCH 29/32] refactor: update ingester to get sequencer record and not attempt to create --- ingester/src/data.rs | 24 +++++++++++++++++++----- iox_catalog/src/interface.rs | 12 ++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index f21bf15364..4b1bca735d 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -7,11 +7,12 @@ use uuid::Uuid; use crate::server::IngesterServer; use iox_catalog::interface::{ - KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequenceNumber, SequencerId, TableId, + KafkaPartition, KafkaTopicId, NamespaceId, PartitionId, RepoCollection, SequenceNumber, + SequencerId, TableId, }; use mutable_batch::MutableBatch; use parking_lot::RwLock; -use snafu::{ResultExt, Snafu}; +use snafu::{OptionExt, ResultExt, Snafu}; #[derive(Debug, Snafu)] #[allow(missing_copy_implementations, missing_docs)] @@ -27,6 +28,16 @@ pub enum Error { source: iox_catalog::interface::Error, id: KafkaPartition, }, + + #[snafu(display( + "Sequencer record not found for kafka_topic_id {} and kafka_partition {}", + kafka_topic_id, + kafka_partition + ))] + SequencerNotFound { + kafka_topic_id: KafkaTopicId, + kafka_partition: KafkaPartition, + }, } /// A specialized `Error` for Ingester Data errors @@ -52,9 +63,13 @@ impl Sequencers { let topic = ingester.get_topic(); for shard in ingester.get_kafka_partitions() { let sequencer = sequencer_repro - .create_or_get(&topic, shard) //todo: use `get` instead + .get_by_topic_id_and_partition(topic.id, shard) .await - .context(ReadSequencerSnafu { id: shard })?; + .context(ReadSequencerSnafu { id: shard })? + .context(SequencerNotFoundSnafu { + kafka_topic_id: topic.id, + kafka_partition: shard, + })?; // Create empty buffer for each sequencer sequencers.insert(sequencer.id, Arc::new(SequencerData::default())); } @@ -121,7 +136,6 @@ struct DataBuffer { /// . A read request from Querier /// The `buffer` will be empty when this happens. snapshots: Vec>, - /// When a persist is called, data in `buffer` will be moved to a `snapshot` /// and then all `snapshots` will be moved to a `persisting`. /// Both `buffer` and 'snaphots` will be empty when this happens. diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 626c032836..ea0714d3f7 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -78,6 +78,12 @@ impl KafkaTopicId { } } +impl std::fmt::Display for KafkaTopicId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + /// Unique ID for a `QueryPool` #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] #[sqlx(transparent)] @@ -153,6 +159,12 @@ impl KafkaPartition { } } +impl std::fmt::Display for KafkaPartition { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + /// Unique ID for a `Partition` #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] #[sqlx(transparent)] From bfc085c20df0a7db331facfd9fbda0944a85de1a Mon Sep 17 00:00:00 2001 From: Paul Dix Date: Wed, 19 Jan 2022 17:32:23 -0500 Subject: [PATCH 30/32] feat: add get kafka_topic by name to catalog --- iox_catalog/src/interface.rs | 7 +++++++ iox_catalog/src/mem.rs | 10 ++++++++++ iox_catalog/src/postgres.rs | 20 +++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index ea0714d3f7..d72e91a4ee 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -275,6 +275,9 @@ pub trait RepoCollection { pub trait KafkaTopicRepo { /// Creates the kafka topic in the catalog or gets the existing record by name. async fn create_or_get(&self, name: &str) -> Result; + + /// Gets the kafka topic by its unique name + async fn get_by_name(&self, name: &str) -> Result>; } /// Functions for working with query pools in the catalog. @@ -834,6 +837,10 @@ pub(crate) mod test_helpers { assert_eq!(k.name, "foo"); let k2 = kafka_repo.create_or_get("foo").await.unwrap(); assert_eq!(k, k2); + let k3 = kafka_repo.get_by_name("foo").await.unwrap().unwrap(); + assert_eq!(k3, k); + let k3 = kafka_repo.get_by_name("asdf").await.unwrap(); + assert!(k3.is_none()); } async fn test_query_pool(repo: &T) { diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index 419e4ea624..c4cf0333b1 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -105,6 +105,16 @@ impl KafkaTopicRepo for MemCatalog { Ok(topic.clone()) } + + async fn get_by_name(&self, name: &str) -> Result> { + let collections = self.collections.lock().expect("mutex poisoned"); + let kafka_topic = collections + .kafka_topics + .iter() + .find(|t| t.name == name) + .cloned(); + Ok(kafka_topic) + } } #[async_trait] diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 3f573f1fb0..2b052a9738 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -118,6 +118,25 @@ DO UPDATE SET name = kafka_topic.name RETURNING *; Ok(rec) } + + async fn get_by_name(&self, name: &str) -> Result> { + let rec = sqlx::query_as::<_, KafkaTopic>( + r#" +SELECT * FROM kafka_topic WHERE name = $1; + "#, + ) + .bind(&name) // $1 + .fetch_one(&self.pool) + .await; + + if let Err(sqlx::Error::RowNotFound) = rec { + return Ok(None); + } + + let kafka_topic = rec.map_err(|e| Error::SqlxError { source: e })?; + + Ok(Some(kafka_topic)) + } } #[async_trait] @@ -178,7 +197,6 @@ RETURNING * } async fn get_by_name(&self, name: &str) -> Result> { - // TODO: maybe get all the data in a single call to Postgres? let rec = sqlx::query_as::<_, Namespace>( r#" SELECT * FROM namespace WHERE name = $1; From 4ede10b3a06cae9ea68b295cc19bfa0bb641de65 Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 19 Jan 2022 17:53:58 -0500 Subject: [PATCH 31/32] refactor: add new fields and comments in ingest data buffer --- ingester/src/data.rs | 52 ++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index f21bf15364..d5ec8c4dd8 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -8,6 +8,7 @@ use uuid::Uuid; use crate::server::IngesterServer; use iox_catalog::interface::{ KafkaPartition, NamespaceId, PartitionId, RepoCollection, SequenceNumber, SequencerId, TableId, + Tombstone, }; use mutable_batch::MutableBatch; use parking_lot::RwLock; @@ -34,7 +35,7 @@ pub type Result = std::result::Result; /// Ingester Data: a Mapp of Shard ID to its Data #[derive(Default)] -struct Sequencers { +pub struct Sequencers { // This map gets set up on initialization of the ingester so it won't ever be modified. // The content of each SequenceData will get changed when more namespaces and tables // get ingested. @@ -65,26 +66,26 @@ impl Sequencers { /// Data of a Shard #[derive(Default)] -struct SequencerData { +pub struct SequencerData { // New namespaces can come in at any time so we need to be able to add new ones namespaces: RwLock>>, } /// Data of a Namespace that belongs to a given Shard #[derive(Default)] -struct NamespaceData { +pub struct NamespaceData { tables: RwLock>>, } /// Data of a Table in a given Namesapce that belongs to a given Shard #[derive(Default)] -struct TableData { +pub struct TableData { // Map pf partition key to its data partition_data: RwLock>>, } /// Data of an IOx Partition of a given Table of a Namesapce that belongs to a given Shard -struct PartitionData { +pub struct PartitionData { id: PartitionId, inner: RwLock, } @@ -112,9 +113,15 @@ struct PartitionData { /// │ │ │ └───────────────────┘ │ │ └───────────────────┘ │ /// └────────────────────────┘ └────────────────────────┘ └─────────────────────────┘ #[derive(Default)] -struct DataBuffer { - /// Buffer of ingesting data - buffer: Vec, +pub struct DataBuffer { + /// Buffer of incoming writes + pub buffer: Vec, + + /// Buffer of tombstones whose time range overlaps with this partition. + /// These tombstone first will be written into the Catalog and then here. + /// When a persist is called, these tombstones will be moved into the + /// PersistingBatch to get applied in those data. + pub deletes: Vec, /// Data in `buffer` will be moved to a `snapshot` when one of these happens: /// . A background persist is called @@ -141,7 +148,7 @@ struct DataBuffer { } /// BufferBatch is a MutauableBatch with its ingesting order, sequencer_number, that /// helps the ingester keep the batches of data in thier ingesting order -struct BufferBatch { +pub struct BufferBatch { /// Sequencer number of the ingesting data pub sequencer_number: SequenceNumber, /// Ingesting data @@ -149,7 +156,7 @@ struct BufferBatch { } /// SnapshotBatch contains data of many contiguous BufferBatches -struct SnapshotBatch { +pub struct SnapshotBatch { /// Min sequencer number of its comebined BufferBatches pub min_sequencer_number: SequenceNumber, /// Max sequencer number of its comebined BufferBatches @@ -160,10 +167,23 @@ struct SnapshotBatch { /// PersistingBatch contains all needed info and data for creating /// a parquet file for given set of SnapshotBatches -struct PersistingBatch { - sequencer_id: SequencerId, - table_id: TableId, - partition_id: PartitionId, - object_store_id: Uuid, - data: Vec, +pub struct PersistingBatch { + /// Sesquencer id of the data + pub sequencer_id: SequencerId, + + /// Table id of the data + pub table_id: TableId, + + /// Parittion Id of the data + pub partition_id: PartitionId, + + /// Id of to-be-created parquet file of this data + pub object_store_id: Uuid, + + /// data to be persisted + pub data: Vec, + + /// delete predicates to be appied to the data + /// before perssiting + pub deletes: Vec, } From 029f4bb41e32354e60a72b8312cccbad9cd1fd3e Mon Sep 17 00:00:00 2001 From: NGA-TRAN Date: Wed, 19 Jan 2022 18:11:00 -0500 Subject: [PATCH 32/32] fix: comment --- ingester/src/data.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingester/src/data.rs b/ingester/src/data.rs index 4e31ded524..b838faca54 100644 --- a/ingester/src/data.rs +++ b/ingester/src/data.rs @@ -131,7 +131,7 @@ pub struct DataBuffer { /// Buffer of incoming writes pub buffer: Vec, - /// Buffer of tombstones whose time range overlaps with this partition. + /// Buffer of tombstones whose time range may overlap with this partition. /// These tombstone first will be written into the Catalog and then here. /// When a persist is called, these tombstones will be moved into the /// PersistingBatch to get applied in those data.