diff --git a/Cargo.lock b/Cargo.lock index 7e00a60ddc..bea3067320 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2645,6 +2645,8 @@ version = "0.1.0" dependencies = [ "arrow", "bimap", + "hashbrown 0.14.5", + "indexmap 2.6.0", "influxdb-line-protocol", "influxdb3_id", "influxdb3_wal", @@ -2680,7 +2682,7 @@ dependencies = [ name = "influxdb3_id" version = "0.1.0" dependencies = [ - "hashbrown 0.14.5", + "indexmap 2.6.0", "serde", "serde_json", ] @@ -2842,6 +2844,7 @@ dependencies = [ "data_types", "futures-util", "hashbrown 0.14.5", + "indexmap 2.6.0", "influxdb-line-protocol", "influxdb3_id", "iox_time", diff --git a/influxdb3/tests/server/configure.rs b/influxdb3/tests/server/configure.rs index 7c09aed513..d59fa2c952 100644 --- a/influxdb3/tests/server/configure.rs +++ b/influxdb3/tests/server/configure.rs @@ -26,6 +26,7 @@ async fn api_v3_configure_last_cache_create() { #[derive(Default)] struct TestCase { // These attributes all map to parameters of the request body: + description: &'static str, db: Option<&'static str>, table: Option<&'static str>, cache_name: Option<&'static str>, @@ -38,58 +39,58 @@ async fn api_v3_configure_last_cache_create() { } let test_cases = [ - // No parameters specified: TestCase { + description: "no parameters specified", expected: StatusCode::BAD_REQUEST, ..Default::default() }, - // Missing database name: TestCase { + description: "missing database name", table: Some(tbl_name), expected: StatusCode::BAD_REQUEST, ..Default::default() }, - // Missing table name: TestCase { + description: "missing table name", db: Some(db_name), expected: StatusCode::BAD_REQUEST, ..Default::default() }, - // Good, will use defaults for everything omitted, and get back a 201: TestCase { + description: "Good, will use defaults for everything omitted, and get back a 201", db: Some(db_name), table: Some(tbl_name), expected: StatusCode::CREATED, ..Default::default() }, - // Same as before, will be successful, but with 204: TestCase { + description: "Same as before, will be successful, but with 204", db: Some(db_name), table: Some(tbl_name), expected: StatusCode::NO_CONTENT, ..Default::default() }, - // Use a specific cache name, will succeed and create new cache: // NOTE: this will only differ from the previous cache in name, should this actually // be an error? TestCase { + description: "Use a specific cache name, will succeed and create new cache", db: Some(db_name), table: Some(tbl_name), cache_name: Some("my_cache"), expected: StatusCode::CREATED, ..Default::default() }, - // Same as previous, but will get 204 because it does nothing: TestCase { + description: "Same as previous, but will get 204 because it does nothing", db: Some(db_name), table: Some(tbl_name), cache_name: Some("my_cache"), expected: StatusCode::NO_CONTENT, ..Default::default() }, - // Same as previous, but this time try to use different parameters, this will result in - // a bad request: TestCase { + description: "Same as previous, but this time try to use different parameters, this \ + will result in a bad request", db: Some(db_name), table: Some(tbl_name), cache_name: Some("my_cache"), @@ -98,32 +99,33 @@ async fn api_v3_configure_last_cache_create() { expected: StatusCode::BAD_REQUEST, ..Default::default() }, - // Will create new cache, because key columns are unique, and so will be the name: TestCase { + description: + "Will create new cache, because key columns are unique, and so will be the name", db: Some(db_name), table: Some(tbl_name), key_cols: Some(&["t1", "t2"]), expected: StatusCode::CREATED, ..Default::default() }, - // Same as previous, but will get 204 because nothing happens: TestCase { + description: "Same as previous, but will get 204 because nothing happens", db: Some(db_name), table: Some(tbl_name), key_cols: Some(&["t1", "t2"]), expected: StatusCode::NO_CONTENT, ..Default::default() }, - // Use an invalid key column (by name) is a bad request: TestCase { + description: "Use an invalid key column (by name) is a bad request", db: Some(db_name), table: Some(tbl_name), key_cols: Some(&["not_a_key_column"]), expected: StatusCode::BAD_REQUEST, ..Default::default() }, - // Use an invalid key column (by type) is a bad request: TestCase { + description: "Use an invalid key column (by type) is a bad request", db: Some(db_name), table: Some(tbl_name), // f5 is a float, which is not supported as a key column: @@ -131,16 +133,16 @@ async fn api_v3_configure_last_cache_create() { expected: StatusCode::BAD_REQUEST, ..Default::default() }, - // Use an invalid value column is a bad request: TestCase { + description: "Use an invalid value column is a bad request", db: Some(db_name), table: Some(tbl_name), val_cols: Some(&["not_a_value_column"]), expected: StatusCode::BAD_REQUEST, ..Default::default() }, - // Use an invalid cache size is a bad request: TestCase { + description: "Use an invalid cache size is a bad request", db: Some(db_name), table: Some(tbl_name), count: Some(11), @@ -166,7 +168,12 @@ async fn api_v3_configure_last_cache_create() { .await .expect("send /api/v3/configure/last_cache request"); let status = resp.status(); - assert_eq!(t.expected, status, "test case ({i}) failed"); + assert_eq!( + t.expected, + status, + "test case ({i}) failed, {description}", + description = t.description + ); } } diff --git a/influxdb3/tests/server/system_tables.rs b/influxdb3/tests/server/system_tables.rs index 07c4d9efeb..a19618941f 100644 --- a/influxdb3/tests/server/system_tables.rs +++ b/influxdb3/tests/server/system_tables.rs @@ -185,12 +185,12 @@ async fn last_caches_table() { let batches = collect_stream(resp).await; assert_batches_sorted_eq!( [ - "+-------+---------------------+----------------+---------------+-------+-------+", - "| table | name | key_columns | value_columns | count | ttl |", - "+-------+---------------------+----------------+---------------+-------+-------+", - "| cpu | cpu_host_last_cache | [host] | | 1 | 14400 |", - "| mem | mem_last_cache | [host, region] | [time, usage] | 1 | 60 |", - "+-------+---------------------+----------------+---------------+-------+-------+", + "+-------+---------------------+-------------+---------------+-------+-------+", + "| table | name | key_columns | value_columns | count | ttl |", + "+-------+---------------------+-------------+---------------+-------+-------+", + "| cpu | cpu_host_last_cache | [1] | | 1 | 14400 |", + "| mem | mem_last_cache | [6, 5] | [7, 8] | 1 | 60 |", + "+-------+---------------------+-------------+---------------+-------+-------+", ], &batches ); @@ -204,11 +204,11 @@ async fn last_caches_table() { .unwrap(); let batches = collect_stream(resp).await; assert_batches_sorted_eq!([ - "+-------+--------------------------------+---------------------+---------------+-------+-------+", - "| table | name | key_columns | value_columns | count | ttl |", - "+-------+--------------------------------+---------------------+---------------+-------+-------+", - "| cpu | cpu_cpu_host_region_last_cache | [cpu, host, region] | | 5 | 14400 |", - "+-------+--------------------------------+---------------------+---------------+-------+-------+", + "+-------+--------------------------------+-------------+---------------+-------+-------+", + "| table | name | key_columns | value_columns | count | ttl |", + "+-------+--------------------------------+-------------+---------------+-------+-------+", + "| cpu | cpu_cpu_host_region_last_cache | [11, 10, 9] | | 5 | 14400 |", + "+-------+--------------------------------+-------------+---------------+-------+-------+", ], &batches ); @@ -237,11 +237,11 @@ async fn last_caches_table() { let batches = collect_stream(resp).await; assert_batches_sorted_eq!( [ - "+-------+----------------+----------------+---------------+-------+-----+", - "| table | name | key_columns | value_columns | count | ttl |", - "+-------+----------------+----------------+---------------+-------+-----+", - "| mem | mem_last_cache | [host, region] | [time, usage] | 1 | 60 |", - "+-------+----------------+----------------+---------------+-------+-----+", + "+-------+----------------+-------------+---------------+-------+-----+", + "| table | name | key_columns | value_columns | count | ttl |", + "+-------+----------------+-------------+---------------+-------+-----+", + "| mem | mem_last_cache | [6, 5] | [7, 8] | 1 | 60 |", + "+-------+----------------+-------------+---------------+-------+-----+", ], &batches ); @@ -268,11 +268,11 @@ async fn last_caches_table() { .unwrap(); let batches = collect_stream(resp).await; assert_batches_sorted_eq!([ - "+-------+--------------------------------+---------------------+---------------+-------+-------+", - "| table | name | key_columns | value_columns | count | ttl |", - "+-------+--------------------------------+---------------------+---------------+-------+-------+", - "| cpu | cpu_cpu_host_region_last_cache | [cpu, host, region] | | 5 | 14400 |", - "+-------+--------------------------------+---------------------+---------------+-------+-------+", + "+-------+--------------------------------+-------------+---------------+-------+-------+", + "| table | name | key_columns | value_columns | count | ttl |", + "+-------+--------------------------------+-------------+---------------+-------+-------+", + "| cpu | cpu_cpu_host_region_last_cache | [11, 10, 9] | | 5 | 14400 |", + "+-------+--------------------------------+-------------+---------------+-------+-------+", ], &batches ); diff --git a/influxdb3_catalog/Cargo.toml b/influxdb3_catalog/Cargo.toml index e24ef603fb..efb40111a2 100644 --- a/influxdb3_catalog/Cargo.toml +++ b/influxdb3_catalog/Cargo.toml @@ -18,6 +18,8 @@ influxdb3_wal = { path = "../influxdb3_wal" } # crates.io dependencies arrow.workspace = true bimap.workspace = true +hashbrown.workspace = true +indexmap.workspace = true parking_lot.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/influxdb3_catalog/src/catalog.rs b/influxdb3_catalog/src/catalog.rs index d3a7123a93..830a421fb3 100644 --- a/influxdb3_catalog/src/catalog.rs +++ b/influxdb3_catalog/src/catalog.rs @@ -1,9 +1,10 @@ //! Implementation of the Catalog that sits entirely in memory. use crate::catalog::Error::TableNotFound; -use arrow::datatypes::SchemaRef; use bimap::BiHashMap; -use influxdb3_id::{ColumnId, DbId, SerdeVecHashMap, TableId}; +use hashbrown::HashMap; +use indexmap::IndexMap; +use influxdb3_id::{ColumnId, DbId, SerdeVecMap, TableId}; use influxdb3_wal::{ CatalogBatch, CatalogOp, FieldAdditions, LastCacheDefinition, LastCacheDelete, }; @@ -40,7 +41,10 @@ pub enum Error { TooManyDbs, #[error("Table {} not in DB schema for {}", table_name, db_name)] - TableNotFound { db_name: String, table_name: String }, + TableNotFound { + db_name: Arc, + table_name: Arc, + }, #[error( "Field type mismatch on table {} column {}. Existing column is {} but attempted to add {}", @@ -57,15 +61,13 @@ pub enum Error { }, #[error( - "Series key mismatch on table {}. Existing table has {} but attempted to add {}", + "Series key mismatch on table {}. Existing table has {}", table_name, - existing, - attempted + existing )] SeriesKeyMismatch { table_name: String, existing: String, - attempted: String, }, } @@ -79,7 +81,7 @@ pub const TIME_COLUMN_NAME: &str = "time"; )] pub struct SequenceNumber(u32); -type SeriesKey = Option>; +type SeriesKey = Option>; impl SequenceNumber { pub fn new(id: u32) -> Self { @@ -218,8 +220,14 @@ impl Catalog { .expect("db should exist") .as_ref() .clone(); - let table = db.tables.get_mut(&table_id).expect("table should exist"); + let mut table = db + .tables + .get(&table_id) + .expect("table should exist") + .as_ref() + .clone(); table.add_last_cache(last_cache); + db.tables.insert(table_id, Arc::new(table)); inner.databases.insert(db_id, Arc::new(db)); inner.sequence = inner.sequence.next(); inner.updated = true; @@ -233,8 +241,14 @@ impl Catalog { .expect("db should exist") .as_ref() .clone(); - let table = db.tables.get_mut(&table_id).expect("table should exist"); + let mut table = db + .tables + .get(&table_id) + .expect("table should exist") + .as_ref() + .clone(); table.remove_last_cache(name); + db.tables.insert(table_id, Arc::new(table)); inner.databases.insert(db_id, Arc::new(db)); inner.sequence = inner.sequence.next(); inner.updated = true; @@ -284,7 +298,7 @@ impl Catalog { #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone, Default)] pub struct InnerCatalog { /// The catalog is a map of databases with their table schemas - databases: SerdeVecHashMap>, + databases: SerdeVecMap>, sequence: SequenceNumber, /// The host_id is the prefix that is passed in when starting up (`host_identifier_prefix`) host_id: Arc, @@ -353,7 +367,7 @@ serde_with::serde_conv!( impl InnerCatalog { pub(crate) fn new(host_id: Arc, instance_id: Arc) -> Self { Self { - databases: SerdeVecHashMap::new(), + databases: SerdeVecMap::new(), sequence: SequenceNumber::new(0), host_id, instance_id, @@ -414,14 +428,12 @@ impl InnerCatalog { } } -#[serde_with::serde_as] -#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)] +#[derive(Debug, Eq, PartialEq, Clone)] pub struct DatabaseSchema { pub id: DbId, pub name: Arc, /// The database is a map of tables - pub tables: SerdeVecHashMap, - #[serde_as(as = "TableMapAsArray")] + pub tables: SerdeVecMap>, pub table_map: BiHashMap>, } @@ -439,7 +451,7 @@ impl DatabaseSchema { /// everything is compatible and there are no updates to the existing schema, None will be /// returned, otherwise a new `DatabaseSchema` will be returned with the updates applied. pub fn new_if_updated_from_batch(&self, catalog_batch: &CatalogBatch) -> Result> { - let mut updated_or_new_tables = SerdeVecHashMap::new(); + let mut updated_or_new_tables = SerdeVecMap::new(); for catalog_op in &catalog_batch.ops { match catalog_op { @@ -452,11 +464,11 @@ impl DatabaseSchema { if let Some(new_table) = existing_table.new_if_definition_adds_new_fields(table_definition)? { - updated_or_new_tables.insert(new_table.table_id, new_table); + updated_or_new_tables.insert(new_table.table_id, Arc::new(new_table)); } } else { let new_table = TableDefinition::new_from_op(table_definition); - updated_or_new_tables.insert(new_table.table_id, new_table); + updated_or_new_tables.insert(new_table.table_id, Arc::new(new_table)); } } CatalogOp::AddFields(field_additions) => { @@ -467,13 +479,13 @@ impl DatabaseSchema { if let Some(new_table) = existing_table.new_if_field_additions_add_fields(field_additions)? { - updated_or_new_tables.insert(new_table.table_id, new_table); + updated_or_new_tables.insert(new_table.table_id, Arc::new(new_table)); } } else { let fields = field_additions .field_definitions .iter() - .map(|f| (f.name.to_string(), f.data_type.into())) + .map(|f| (f.id, Arc::clone(&f.name), f.data_type.into())) .collect::>(); let new_table = TableDefinition::new( field_additions.table_id, @@ -481,7 +493,7 @@ impl DatabaseSchema { fields, SeriesKey::None, )?; - updated_or_new_tables.insert(new_table.table_id, new_table); + updated_or_new_tables.insert(new_table.table_id, Arc::new(new_table)); } } CatalogOp::CreateLastCache(last_cache_definition) => { @@ -490,14 +502,14 @@ impl DatabaseSchema { .or_else(|| self.tables.get(&last_cache_definition.table_id)); let table = new_or_existing_table.ok_or(TableNotFound { - db_name: self.name.to_string(), - table_name: last_cache_definition.table.clone(), + db_name: Arc::clone(&self.name), + table_name: Arc::clone(&last_cache_definition.table), })?; if let Some(new_table) = table.new_if_last_cache_definition_is_new(last_cache_definition) { - updated_or_new_tables.insert(new_table.table_id, new_table); + updated_or_new_tables.insert(new_table.table_id, Arc::new(new_table)); } } CatalogOp::DeleteLastCache(last_cache_deletion) => { @@ -506,14 +518,14 @@ impl DatabaseSchema { .or_else(|| self.tables.get(&last_cache_deletion.table_id)); let table = new_or_existing_table.ok_or(TableNotFound { - db_name: self.name.to_string(), - table_name: last_cache_deletion.table_name.clone(), + db_name: Arc::clone(&self.name), + table_name: Arc::clone(&last_cache_deletion.table_name), })?; if let Some(new_table) = table.new_if_last_cache_deletes_existing(last_cache_deletion) { - updated_or_new_tables.insert(new_table.table_id, new_table); + updated_or_new_tables.insert(new_table.table_id, Arc::new(new_table)); } } } @@ -524,7 +536,7 @@ impl DatabaseSchema { } else { for (table_id, table_def) in &self.tables { if !updated_or_new_tables.contains_key(table_id) { - updated_or_new_tables.insert(*table_id, table_def.clone()); + updated_or_new_tables.insert(*table_id, Arc::clone(table_def)); } } @@ -554,6 +566,17 @@ impl DatabaseSchema { Ok(new_db) } + /// Insert a [`TableDefinition`] to the `tables` map and also update the `table_map` + pub fn insert_table( + &mut self, + table_id: TableId, + table_def: Arc, + ) -> Option> { + self.table_map + .insert(table_id, Arc::clone(&table_def.table_name)); + self.tables.insert(table_id, table_def) + } + pub fn table_schema(&self, table_name: impl Into>) -> Option { self.table_schema_and_id(table_name) .map(|(_, schema)| schema.clone()) @@ -579,14 +602,27 @@ impl DatabaseSchema { }) } - pub fn table_definition(&self, table_name: impl Into>) -> Option<&TableDefinition> { + pub fn table_definition( + &self, + table_name: impl Into>, + ) -> Option> { self.table_map .get_by_right(&table_name.into()) - .and_then(|table_id| self.tables.get(table_id)) + .and_then(|table_id| self.tables.get(table_id).cloned()) } - pub fn table_definition_by_id(&self, table_id: TableId) -> Option<&TableDefinition> { - self.tables.get(&table_id) + pub fn table_definition_by_id(&self, table_id: TableId) -> Option> { + self.tables.get(&table_id).cloned() + } + + pub fn table_definition_and_id( + &self, + table_name: impl Into>, + ) -> Option<(TableId, Arc)> { + let table_id = self.table_map.get_by_right(&table_name.into())?; + self.tables + .get(table_id) + .map(|table_def| (*table_id, Arc::clone(table_def))) } pub fn table_ids(&self) -> Vec { @@ -604,8 +640,8 @@ impl DatabaseSchema { self.tables.contains_key(&table_id) } - pub fn tables(&self) -> impl Iterator { - self.tables.values() + pub fn tables(&self) -> impl Iterator> + use<'_> { + self.tables.values().map(Arc::clone) } pub fn table_name_to_id(&self, table_name: impl Into>) -> Option { @@ -621,53 +657,77 @@ impl DatabaseSchema { pub struct TableDefinition { pub table_id: TableId, pub table_name: Arc, - pub schema: TableSchema, - pub last_caches: BTreeMap, + pub schema: Schema, + pub columns: IndexMap, + pub column_map: BiHashMap>, + pub series_key: Option>, + pub last_caches: HashMap, LastCacheDefinition>, } impl TableDefinition { /// Create a new [`TableDefinition`] /// /// Ensures the provided columns will be ordered before constructing the schema. - pub fn new>( + pub fn new( table_id: TableId, table_name: Arc, - columns: impl AsRef<[(CN, InfluxColumnType)]>, - series_key: Option>>, + columns: Vec<(ColumnId, Arc, InfluxColumnType)>, + series_key: Option>, ) -> Result { // ensure we're under the column limit - if columns.as_ref().len() > Catalog::NUM_COLUMNS_PER_TABLE_LIMIT { + if columns.len() > Catalog::NUM_COLUMNS_PER_TABLE_LIMIT { return Err(Error::TooManyColumns); } // Use a BTree to ensure that the columns are ordered: let mut ordered_columns = BTreeMap::new(); - for (name, column_type) in columns.as_ref() { - ordered_columns.insert(name.as_ref(), column_type); + for (col_id, name, column_type) in &columns { + ordered_columns.insert(name.as_ref(), (col_id, column_type)); } - let mut schema_builder = SchemaBuilder::with_capacity(columns.as_ref().len()); + let mut schema_builder = SchemaBuilder::with_capacity(columns.len()); schema_builder.measurement(table_name.as_ref()); - if let Some(sk) = series_key { - schema_builder.with_series_key(sk); - } - for (name, column_type) in ordered_columns { + let mut columns = IndexMap::with_capacity(ordered_columns.len()); + let mut column_map = BiHashMap::with_capacity(ordered_columns.len()); + for (name, (col_id, column_type)) in ordered_columns { schema_builder.influx_column(name, *column_type); + let not_nullable = matches!(column_type, InfluxColumnType::Timestamp) + || series_key.as_ref().is_some_and(|sk| sk.contains(col_id)); + columns.insert( + *col_id, + ColumnDefinition::new(*col_id, name, *column_type, !not_nullable), + ); + column_map.insert(*col_id, name.into()); } - let schema = TableSchema::new(schema_builder.build().unwrap()); + if let Some(sk) = series_key.clone() { + schema_builder.with_series_key(sk.into_iter().map(|id| { + column_map + .get_by_left(&id) + // NOTE: should this be an error instead of panic? + .expect("invalid column id in series key definition") + })); + } + let schema = schema_builder.build().expect("schema should be valid"); Ok(Self { table_id, table_name, schema, - last_caches: BTreeMap::new(), + columns, + column_map, + series_key, + last_caches: HashMap::new(), }) } /// Create a new table definition from a catalog op pub fn new_from_op(table_definition: &influxdb3_wal::TableDefinition) -> Self { - let mut columns = Vec::new(); + let mut columns = Vec::with_capacity(table_definition.field_definitions.len()); for field_def in &table_definition.field_definitions { - columns.push((field_def.name.as_ref(), field_def.data_type.into())); + columns.push(( + field_def.id, + Arc::clone(&field_def.name), + field_def.data_type.into(), + )); } Self::new( table_definition.table_id, @@ -685,28 +745,17 @@ impl TableDefinition { table_definition: &influxdb3_wal::TableDefinition, ) -> Result> { // validate the series key is the same - let existing_key = self - .schema - .schema() - .series_key() - .map(|k| k.iter().map(|v| v.to_string()).collect()); - - if table_definition.key != existing_key { + if table_definition.key != self.series_key { return Err(Error::SeriesKeyMismatch { table_name: self.table_name.to_string(), - existing: existing_key.unwrap_or_default().join("/"), - attempted: table_definition.key.clone().unwrap_or_default().join("/"), + existing: self.schema.series_key().unwrap_or_default().join("/"), }); } - let mut new_fields: Vec<(String, InfluxColumnType)> = + let mut new_fields: Vec<(ColumnId, Arc, InfluxColumnType)> = Vec::with_capacity(table_definition.field_definitions.len()); for field_def in &table_definition.field_definitions { - if let Some(existing_type) = self - .schema - .schema() - .field_type_by_name(field_def.name.as_ref()) - { + if let Some(existing_type) = self.columns.get(&field_def.id).map(|def| def.data_type) { if existing_type != field_def.data_type.into() { return Err(Error::FieldTypeMismatch { table_name: self.table_name.to_string(), @@ -716,7 +765,11 @@ impl TableDefinition { }); } } else { - new_fields.push((field_def.name.to_string(), field_def.data_type.into())); + new_fields.push(( + field_def.id, + Arc::clone(&field_def.name), + field_def.data_type.into(), + )); } } @@ -736,22 +789,22 @@ impl TableDefinition { field_additions: &FieldAdditions, ) -> Result> { let mut new_fields = Vec::with_capacity(field_additions.field_definitions.len()); - for c in &field_additions.field_definitions { - let field_type = c.data_type.into(); - match self.field_type_by_name(&c.name) { - None => { - new_fields.push((c.name.to_string(), field_type)); - } - Some(existing_field_type) => { - if existing_field_type != field_type { - return Err(Error::FieldTypeMismatch { - table_name: self.table_name.to_string(), - column_name: c.name.to_string(), - existing: existing_field_type, - attempted: field_type, - }); - } + for field_def in &field_additions.field_definitions { + if let Some(existing_type) = self.columns.get(&field_def.id).map(|def| def.data_type) { + if existing_type != field_def.data_type.into() { + return Err(Error::FieldTypeMismatch { + table_name: self.table_name.to_string(), + column_name: field_def.name.to_string(), + existing: existing_type, + attempted: field_def.data_type.into(), + }); } + } else { + new_fields.push(( + field_def.id, + Arc::clone(&field_def.name), + field_def.data_type.into(), + )); } } @@ -790,23 +843,34 @@ impl TableDefinition { } } - /// Check if the column exists in the [`TableDefinition`]s schema - pub fn column_exists(&self, column: &str) -> bool { - self.influx_schema().find_index_of(column).is_some() + /// Check if the column exists in the [`TableDefinition`] + pub fn column_exists(&self, column: impl Into>) -> bool { + self.column_map.get_by_right(&column.into()).is_some() } /// Add the columns to this [`TableDefinition`] /// /// This ensures that the resulting schema has its columns ordered - pub fn add_columns(&mut self, columns: Vec<(String, InfluxColumnType)>) -> Result<()> { + pub fn add_columns( + &mut self, + columns: Vec<(ColumnId, Arc, InfluxColumnType)>, + ) -> Result<()> { // Use BTree to insert existing and new columns, and use that to generate the // resulting schema, to ensure column order is consistent: let mut cols = BTreeMap::new(); - for (col_type, field) in self.influx_schema().iter() { - cols.insert(field.name(), col_type); + for (_, col_def) in self.columns.drain(..) { + cols.insert(Arc::clone(&col_def.name), col_def); } - for (name, column_type) in columns.iter() { - cols.insert(name, *column_type); + for (id, name, column_type) in columns { + assert!( + cols.insert( + Arc::clone(&name), + // any new column added by this function must be nullable: + ColumnDefinition::new(id, name, column_type, true) + ) + .is_none(), + "attempted to add existing column" + ); } // ensure we don't go over the column limit @@ -814,50 +878,49 @@ impl TableDefinition { return Err(Error::TooManyColumns); } - let mut schema_builder = SchemaBuilder::with_capacity(columns.len()); + let mut schema_builder = SchemaBuilder::with_capacity(cols.len()); // TODO: may need to capture some schema-level metadata, currently, this causes trouble in // tests, so I am omitting this for now: // schema_builder.measurement(&self.name); - for (name, col_type) in cols { - schema_builder.influx_column(name, col_type); + for (name, col_def) in &cols { + schema_builder.influx_column(name.as_ref(), col_def.data_type); } - let schema = schema_builder.build().unwrap(); + let schema = schema_builder.build().expect("schema should be valid"); + self.schema = schema; - // Now that we have all the columns we know will be added and haven't - // triggered the limit add the ColumnId <-> Name mapping to the schema - for (name, _) in columns.iter() { - self.schema.add_column(name); - } - - self.schema.schema = schema; + self.columns = cols + .into_iter() + .inspect(|(_, def)| { + self.column_map.insert(def.id, Arc::clone(&def.name)); + }) + .map(|(_, def)| (def.id, def)) + .collect(); Ok(()) } - pub fn index_columns(&self) -> Vec<&str> { - self.influx_schema() + pub fn index_column_ids(&self) -> Vec { + self.columns .iter() - .filter_map(|(col_type, field)| match col_type { - InfluxColumnType::Tag => Some(field.name().as_str()), + .filter_map(|(id, def)| match def.data_type { + InfluxColumnType::Tag => Some(*id), InfluxColumnType::Field(_) | InfluxColumnType::Timestamp => None, }) .collect() } - pub fn schema(&self) -> &TableSchema { - &self.schema - } - pub fn influx_schema(&self) -> &Schema { - &self.schema.schema + &self.schema } pub fn num_columns(&self) -> usize { self.influx_schema().len() } - pub fn field_type_by_name(&self, name: &str) -> Option { - self.influx_schema().field_type_by_name(name) + pub fn field_type_by_name(&self, name: impl Into>) -> Option { + self.column_name_to_id(name) + .and_then(|id| self.columns.get(&id)) + .map(|def| def.data_type) } pub fn is_v3(&self) -> bool { @@ -867,7 +930,7 @@ impl TableDefinition { /// Add a new last cache to this table definition pub fn add_last_cache(&mut self, last_cache: LastCacheDefinition) { self.last_caches - .insert(last_cache.name.to_string(), last_cache); + .insert(Arc::clone(&last_cache.name), last_cache); } /// Remove a last cache from the table definition @@ -875,102 +938,67 @@ impl TableDefinition { self.last_caches.remove(name); } - pub fn last_caches(&self) -> impl Iterator { - self.last_caches.iter() - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct TableSchema { - schema: Schema, - column_map: BiHashMap>, - next_column_id: ColumnId, -} - -impl TableSchema { - /// Creates a new `TableSchema` from scratch and assigns an id based off the - /// index. Any changes to the schema after this point will use the - /// next_column_id. For example if we have a column foo and drop it and then - /// add a new column foo, then it will use a new id, not reuse the old one. - fn new(schema: Schema) -> Self { - let column_map: BiHashMap> = schema - .as_arrow() - .fields() + pub fn last_caches(&self) -> impl Iterator, &LastCacheDefinition)> { + self.last_caches .iter() - .enumerate() - .map(|(idx, field)| (ColumnId::from(idx as u16), field.name().as_str().into())) - .collect(); - Self { - schema, - next_column_id: ColumnId::from(column_map.len() as u16), - column_map, - } + .map(|(name, def)| (Arc::clone(name), def)) } - pub(crate) fn new_with_mapping( - schema: Schema, - column_map: BiHashMap>, - next_column_id: ColumnId, - ) -> Self { - Self { - schema, - column_map, - next_column_id, - } + pub fn column_name_to_id(&self, name: impl Into>) -> Option { + self.column_map.get_by_right(&name.into()).copied() } - pub fn as_arrow(&self) -> SchemaRef { - self.schema.as_arrow() - } - - pub fn schema(&self) -> &Schema { - &self.schema - } - - pub(crate) fn column_map(&self) -> &BiHashMap> { - &self.column_map - } - - pub(crate) fn next_column_id(&self) -> ColumnId { - self.next_column_id - } - - fn add_column(&mut self, column_name: &str) { - let id = self.next_column_id; - self.next_column_id = id.next_id(); - self.column_map.insert(id, column_name.into()); - } - - pub fn series_key(&self) -> Option> { - self.schema.series_key() - } - - pub fn iter(&self) -> schema::SchemaIter<'_> { - self.schema.iter() - } - - pub fn name_to_id(&self, name: Arc) -> Option { - self.column_map.get_by_right(&name).copied() - } - - pub fn id_to_name(&self, id: ColumnId) -> Option> { + pub fn column_id_to_name(&self, id: ColumnId) -> Option> { self.column_map.get_by_left(&id).cloned() } - pub fn name_to_id_unchecked(&self, name: Arc) -> ColumnId { + pub fn column_name_to_id_unchecked(&self, name: Arc) -> ColumnId { *self .column_map .get_by_right(&name) .expect("Column exists in mapping") } - pub fn id_to_name_unchecked(&self, id: ColumnId) -> Arc { + pub fn column_id_to_name_unchecked(&self, id: ColumnId) -> Arc { Arc::clone( self.column_map .get_by_left(&id) .expect("Column exists in mapping"), ) } + + pub fn column_def_and_id( + &self, + name: impl Into>, + ) -> Option<(ColumnId, &ColumnDefinition)> { + self.column_map + .get_by_right(&name.into()) + .and_then(|id| self.columns.get(id).map(|def| (*id, def))) + } +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct ColumnDefinition { + pub id: ColumnId, + pub name: Arc, + pub data_type: InfluxColumnType, + pub nullable: bool, +} + +impl ColumnDefinition { + pub fn new( + id: ColumnId, + name: impl Into>, + data_type: InfluxColumnType, + nullable: bool, + ) -> Self { + Self { + id, + name: name.into(), + data_type, + nullable, + } + } } pub fn influx_column_type_from_field_value(fv: &FieldValue<'_>) -> InfluxColumnType { @@ -990,7 +1018,7 @@ mod tests { use super::*; - type SeriesKey = Option>; + type SeriesKey = Option>; #[test] fn catalog_serialization() { @@ -1001,7 +1029,7 @@ mod tests { let mut database = DatabaseSchema { id: DbId::from(0), name: "test_db".into(), - tables: SerdeVecHashMap::new(), + tables: SerdeVecMap::new(), table_map: { let mut map = BiHashMap::new(); map.insert(TableId::from(1), "test_table_1".into()); @@ -1013,43 +1041,47 @@ mod tests { use InfluxFieldType::*; database.tables.insert( TableId::from(1), - TableDefinition::new( - TableId::from(1), - "test_table_1".into(), - [ - ("tag_1", Tag), - ("tag_2", Tag), - ("tag_3", Tag), - ("time", Timestamp), - ("string_field", Field(String)), - ("bool_field", Field(Boolean)), - ("i64_field", Field(Integer)), - ("u64_field", Field(UInteger)), - ("f64_field", Field(Float)), - ], - SeriesKey::None, - ) - .unwrap(), + Arc::new( + TableDefinition::new( + TableId::from(1), + "test_table_1".into(), + vec![ + (ColumnId::new(), "tag_1".into(), Tag), + (ColumnId::new(), "tag_2".into(), Tag), + (ColumnId::new(), "tag_3".into(), Tag), + (ColumnId::new(), "time".into(), Timestamp), + (ColumnId::new(), "string_field".into(), Field(String)), + (ColumnId::new(), "bool_field".into(), Field(Boolean)), + (ColumnId::new(), "i64_field".into(), Field(Integer)), + (ColumnId::new(), "u64_field".into(), Field(UInteger)), + (ColumnId::new(), "f64_field".into(), Field(Float)), + ], + SeriesKey::None, + ) + .unwrap(), + ), ); database.tables.insert( TableId::from(2), - TableDefinition::new( - TableId::from(2), - "test_table_2".into(), - [ - ("tag_1", Tag), - ("tag_2", Tag), - ("tag_3", Tag), - ("time", Timestamp), - ("string_field", Field(String)), - ("bool_field", Field(Boolean)), - ("i64_field", Field(Integer)), - ("u64_field", Field(UInteger)), - ("f64_field", Field(Float)), - ], - SeriesKey::None, - ) - .unwrap(), + Arc::new( + TableDefinition::new( + TableId::from(2), + "test_table_2".into(), + vec![ + (ColumnId::new(), "tag_1".into(), Tag), + (ColumnId::new(), "tag_2".into(), Tag), + (ColumnId::new(), "tag_3".into(), Tag), + (ColumnId::new(), "time".into(), Timestamp), + (ColumnId::new(), "string_field".into(), Field(String)), + (ColumnId::new(), "bool_field".into(), Field(Boolean)), + (ColumnId::new(), "i64_field".into(), Field(Integer)), + (ColumnId::new(), "u64_field".into(), Field(UInteger)), + (ColumnId::new(), "f64_field".into(), Field(Float)), + ], + SeriesKey::None, + ) + .unwrap(), + ), ); catalog .inner @@ -1057,6 +1089,13 @@ mod tests { .databases .insert(database.id, Arc::new(database)); + insta::with_settings!({ + sort_maps => true, + description => "catalog serialization to help catch breaking changes" + }, { + insta::assert_json_snapshot!(catalog); + }); + // Serialize/deserialize to ensure roundtrip to/from JSON let serialized = serde_json::to_string(&catalog).unwrap(); let deserialized_inner: InnerCatalog = serde_json::from_str(&serialized).unwrap(); @@ -1113,8 +1152,7 @@ mod tests { { "table_id": 0, "table_name": "tbl1", - "cols": {}, - "column_map": [], + "cols": [], "next_column_id": 0 } ], @@ -1123,8 +1161,7 @@ mod tests { { "table_id": 0, "table_name": "tbl1", - "cols": {}, - "column_map": [], + "cols": [], "next_column_id": 0 } ] @@ -1155,27 +1192,28 @@ mod tests { { "table_id": 0, "table_name": "tbl1", - "cols": { - "col1": { - "column_id": 0, - "type": "i64", - "influx_type": "field", - "nullable": true - }, - "col1": { - "column_id": 0, - "type": "u64", - "influx_type": "field", - "nullable": true - } - }, - "column_map": [ - { - "column_id": 0, - "name": "col1" - } - ], - "next_column_id": 1 + "cols": [ + [ + 0, + { + "id": 0, + "name": "col", + "type": "i64", + "influx_type": "field", + "nullable": true + } + ], + [ + 0, + { + "id": 0, + "name": "col", + "type": "u64", + "influx_type": "field", + "nullable": true + } + ] + ] } ] ] @@ -1184,7 +1222,7 @@ mod tests { ] }"#; let err = serde_json::from_str::(json).unwrap_err(); - assert_contains!(err.to_string(), "found duplicate key"); + assert_contains!(err.to_string(), "duplicate key found"); } } @@ -1193,29 +1231,37 @@ mod tests { let mut database = DatabaseSchema { id: DbId::from(0), name: "test".into(), - tables: SerdeVecHashMap::new(), + tables: SerdeVecMap::new(), table_map: BiHashMap::new(), }; database.tables.insert( TableId::from(0), - TableDefinition::new( - TableId::from(0), - "test".into(), - [( - "test".to_string(), - InfluxColumnType::Field(InfluxFieldType::String), - )], - SeriesKey::None, - ) - .unwrap(), + Arc::new( + TableDefinition::new( + TableId::from(0), + "test".into(), + vec![( + ColumnId::from(0), + "test".into(), + InfluxColumnType::Field(InfluxFieldType::String), + )], + SeriesKey::None, + ) + .unwrap(), + ), ); let table = database.tables.get_mut(&TableId::from(0)).unwrap(); - assert_eq!(table.schema.column_map().len(), 1); - assert_eq!(table.schema.id_to_name_unchecked(0.into()), "test".into()); + println!("table: {table:#?}"); + assert_eq!(table.column_map.len(), 1); + assert_eq!(table.column_id_to_name_unchecked(0.into()), "test".into()); - table - .add_columns(vec![("test2".to_string(), InfluxColumnType::Tag)]) + Arc::make_mut(table) + .add_columns(vec![( + ColumnId::from(1), + "test2".into(), + InfluxColumnType::Tag, + )]) .unwrap(); let schema = table.influx_schema(); assert_eq!( @@ -1224,8 +1270,9 @@ mod tests { ); assert_eq!(schema.field(1).0, InfluxColumnType::Tag); - assert_eq!(table.schema.column_map().len(), 2); - assert_eq!(table.schema.name_to_id_unchecked("test2".into()), 1.into()); + println!("table: {table:#?}"); + assert_eq!(table.column_map.len(), 2); + assert_eq!(table.column_name_to_id_unchecked("test2".into()), 1.into()); } #[test] @@ -1236,7 +1283,7 @@ mod tests { let mut database = DatabaseSchema { id: DbId::from(0), name: "test_db".into(), - tables: SerdeVecHashMap::new(), + tables: SerdeVecMap::new(), table_map: { let mut map = BiHashMap::new(); map.insert(TableId::from(1), "test_table_1".into()); @@ -1247,23 +1294,25 @@ mod tests { use InfluxFieldType::*; database.tables.insert( TableId::from(1), - TableDefinition::new( - TableId::from(1), - "test_table_1".into(), - [ - ("tag_1", Tag), - ("tag_2", Tag), - ("tag_3", Tag), - ("time", Timestamp), - ("field", Field(String)), - ], - SeriesKey::Some(vec![ - "tag_1".to_string(), - "tag_2".to_string(), - "tag_3".to_string(), - ]), - ) - .unwrap(), + Arc::new( + TableDefinition::new( + TableId::from(1), + "test_table_1".into(), + vec![ + (ColumnId::from(0), "tag_1".into(), Tag), + (ColumnId::from(1), "tag_2".into(), Tag), + (ColumnId::from(2), "tag_3".into(), Tag), + (ColumnId::from(3), "time".into(), Timestamp), + (ColumnId::from(4), "field".into(), Field(String)), + ], + SeriesKey::Some(vec![ + ColumnId::from(0), + ColumnId::from(1), + ColumnId::from(2), + ]), + ) + .unwrap(), + ), ); catalog .inner @@ -1271,6 +1320,13 @@ mod tests { .databases .insert(database.id, Arc::new(database)); + insta::with_settings!({ + sort_maps => true, + description => "catalog serialization to help catch breaking changes" + }, { + insta::assert_json_snapshot!(catalog); + }); + let serialized = serde_json::to_string(&catalog).unwrap(); let deserialized_inner: InnerCatalog = serde_json::from_str(&serialized).unwrap(); let deserialized = Catalog::from_inner(deserialized_inner); @@ -1285,7 +1341,7 @@ mod tests { let mut database = DatabaseSchema { id: DbId::from(0), name: "test_db".into(), - tables: SerdeVecHashMap::new(), + tables: SerdeVecMap::new(), table_map: { let mut map = BiHashMap::new(); map.insert(TableId::from(0), "test".into()); @@ -1297,12 +1353,12 @@ mod tests { let mut table_def = TableDefinition::new( TableId::from(0), "test".into(), - [ - ("tag_1", Tag), - ("tag_2", Tag), - ("tag_3", Tag), - ("time", Timestamp), - ("field", Field(String)), + vec![ + (ColumnId::from(0), "tag_1".into(), Tag), + (ColumnId::from(1), "tag_2".into(), Tag), + (ColumnId::from(2), "tag_3".into(), Tag), + (ColumnId::from(3), "time".into(), Timestamp), + (ColumnId::from(4), "field".into(), Field(String)), ], SeriesKey::None, ) @@ -1312,20 +1368,29 @@ mod tests { TableId::from(0), "test", "test_table_last_cache", - ["tag_2", "tag_3"], - ["field"], + vec![ColumnId::from(1), ColumnId::from(2)], + vec![ColumnId::from(4)], 1, 600, ) .unwrap(), ); - database.tables.insert(TableId::from(0), table_def); + database + .tables + .insert(TableId::from(0), Arc::new(table_def)); catalog .inner .write() .databases .insert(database.id, Arc::new(database)); + insta::with_settings!({ + sort_maps => true, + description => "catalog serialization to help catch breaking changes" + }, { + insta::assert_json_snapshot!(catalog); + }); + let serialized = serde_json::to_string(&catalog).unwrap(); let deserialized_inner: InnerCatalog = serde_json::from_str(&serialized).unwrap(); let deserialized = Catalog::from_inner(deserialized_inner); diff --git a/influxdb3_catalog/src/serialize.rs b/influxdb3_catalog/src/serialize.rs index d96600c80a..8eefadaeff 100644 --- a/influxdb3_catalog/src/serialize.rs +++ b/influxdb3_catalog/src/serialize.rs @@ -1,15 +1,79 @@ +use crate::catalog::ColumnDefinition; +use crate::catalog::DatabaseSchema; use crate::catalog::TableDefinition; -use crate::catalog::TableSchema; use arrow::datatypes::DataType as ArrowDataType; use bimap::BiHashMap; use influxdb3_id::ColumnId; +use influxdb3_id::DbId; +use influxdb3_id::SerdeVecMap; use influxdb3_id::TableId; use influxdb3_wal::{LastCacheDefinition, LastCacheValueColumnsDef}; -use schema::{InfluxColumnType, SchemaBuilder}; +use schema::InfluxColumnType; +use schema::InfluxFieldType; +use schema::TIME_DATA_TIMEZONE; use serde::{Deserialize, Serialize}; -use std::collections::BTreeMap; use std::sync::Arc; +impl Serialize for DatabaseSchema { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let snapshot = DatabaseSnapshot::from(self); + snapshot.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for DatabaseSchema { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + DatabaseSnapshot::deserialize(deserializer).map(Into::into) + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct DatabaseSnapshot { + id: DbId, + name: Arc, + tables: SerdeVecMap, +} + +impl From<&DatabaseSchema> for DatabaseSnapshot { + fn from(db: &DatabaseSchema) -> Self { + Self { + id: db.id, + name: Arc::clone(&db.name), + tables: db + .tables + .iter() + .map(|(table_id, table_def)| (*table_id, table_def.as_ref().into())) + .collect(), + } + } +} + +impl From for DatabaseSchema { + fn from(snap: DatabaseSnapshot) -> Self { + let mut table_map = BiHashMap::with_capacity(snap.tables.len()); + let tables = snap + .tables + .into_iter() + .map(|(id, table)| { + table_map.insert(id, Arc::clone(&table.table_name)); + (id, Arc::new(table.into())) + }) + .collect(); + Self { + id: snap.id, + name: snap.name, + tables, + table_map, + } + } +} + impl Serialize for TableDefinition { fn serialize(&self, serializer: S) -> Result where @@ -25,7 +89,7 @@ impl<'de> Deserialize<'de> for TableDefinition { where D: serde::Deserializer<'de>, { - TableSnapshot::<'de>::deserialize(deserializer).map(Into::into) + TableSnapshot::deserialize(deserializer).map(Into::into) } } @@ -35,59 +99,25 @@ impl<'de> Deserialize<'de> for TableDefinition { /// This is used over serde's `Serialize`/`Deserialize` implementations on the inner `Schema` type /// due to them being considered unstable. This type intends to mimic the structure of the Arrow /// `Schema`, and will help guard against potential breaking changes to the Arrow Schema types. -#[serde_with::serde_as] #[derive(Debug, Serialize, Deserialize)] -struct TableSnapshot<'a> { +struct TableSnapshot { table_id: TableId, - table_name: &'a str, + table_name: Arc, #[serde(default, skip_serializing_if = "Option::is_none")] - key: Option>, - #[serde_as(as = "serde_with::MapPreventDuplicates<_, _>")] - cols: BTreeMap<&'a str, ColumnDefinition<'a>>, + key: Option>, + cols: SerdeVecMap, #[serde(default, skip_serializing_if = "Vec::is_empty")] - last_caches: Vec>, - #[serde_as(as = "ColumnMapAsArray")] - column_map: BiHashMap>, - next_column_id: ColumnId, -} - -serde_with::serde_conv!( - ColumnMapAsArray, - BiHashMap>, - |map: &BiHashMap>| { - let mut vec = map.iter().fold(Vec::new(), |mut acc, (id, name)| { - acc.push(ColumnMap { - column_id: *id, - name: Arc::clone(&name) - }); - acc - }); - - vec.sort_by_key(|col| col.column_id); - vec - }, - |vec: Vec| -> Result<_, std::convert::Infallible> { - Ok(vec.into_iter().fold(BiHashMap::new(), |mut acc, column| { - acc.insert(column.column_id, column.name); - acc - })) - } -); - -#[derive(Debug, Serialize, Deserialize)] -struct ColumnMap { - column_id: ColumnId, - name: Arc, + last_caches: Vec, } /// Representation of Arrow's `DataType` for table snapshots. /// /// Uses `#[non_exhaustive]` with the assumption that variants will be added as we support /// more Arrow data types. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "lowercase")] #[non_exhaustive] -enum DataType<'a> { +enum DataType { Null, Bool, I8, @@ -107,12 +137,12 @@ enum DataType<'a> { Bin, BigBin, BinView, - Dict(Box>, Box>), - Time(TimeUnit, Option<&'a str>), + Dict(Box, Box), + Time(TimeUnit, Option>), } /// Representation of Arrow's `TimeUnit` for table snapshots. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] enum TimeUnit { #[serde(rename = "s")] Second, @@ -154,53 +184,80 @@ impl From for InfluxType { } } +impl From for DataType { + fn from(value: InfluxColumnType) -> Self { + match value { + InfluxColumnType::Tag => Self::Dict(Box::new(Self::I32), Box::new(Self::Str)), + InfluxColumnType::Field(field) => match field { + InfluxFieldType::Float => Self::F64, + InfluxFieldType::Integer => Self::I64, + InfluxFieldType::UInteger => Self::U64, + InfluxFieldType::String => Self::Str, + InfluxFieldType::Boolean => Self::Bool, + }, + InfluxColumnType::Timestamp => Self::Time(TimeUnit::Nanosecond, TIME_DATA_TIMEZONE()), + } + } +} + /// The inner column definition for a [`TableSnapshot`] #[derive(Debug, Serialize, Deserialize)] -struct ColumnDefinition<'a> { +struct ColumnDefinitionSnapshot { + name: Arc, /// The id of the column - column_id: ColumnId, + id: ColumnId, /// The column's data type - #[serde(borrow)] - r#type: DataType<'a>, + r#type: DataType, /// The columns Influx type influx_type: InfluxType, /// Whether the column can hold NULL values nullable: bool, } -impl<'a> From<&'a TableDefinition> for TableSnapshot<'a> { - fn from(def: &'a TableDefinition) -> Self { - let cols = def - .schema() - .iter() - .map(|(col_type, f)| { - ( - f.name().as_str(), - ColumnDefinition { - column_id: def.schema.name_to_id_unchecked(f.name().as_str().into()), - r#type: f.data_type().into(), - influx_type: col_type.into(), - nullable: f.is_nullable(), - }, - ) - }) - .collect(); - let keys = def.schema().series_key(); - let last_caches = def.last_caches.values().map(Into::into).collect(); +impl From for ColumnDefinition { + fn from(snap: ColumnDefinitionSnapshot) -> Self { Self { - table_id: def.table_id, - table_name: def.table_name.as_ref(), - cols, - key: keys, - last_caches, - next_column_id: def.schema.next_column_id(), - column_map: def.schema.column_map().clone(), + id: snap.id, + name: Arc::clone(&snap.name), + data_type: match snap.influx_type { + InfluxType::Tag => InfluxColumnType::Tag, + InfluxType::Field => InfluxColumnType::Field(InfluxFieldType::from(&snap.r#type)), + InfluxType::Time => InfluxColumnType::Timestamp, + }, + nullable: snap.nullable, } } } -impl<'a> From<&'a ArrowDataType> for DataType<'a> { - fn from(arrow_type: &'a ArrowDataType) -> Self { +impl From<&TableDefinition> for TableSnapshot { + fn from(def: &TableDefinition) -> Self { + Self { + table_id: def.table_id, + table_name: Arc::clone(&def.table_name), + key: def.series_key.clone(), + cols: def + .columns + .iter() + .map(|(col_id, col_def)| { + ( + *col_id, + ColumnDefinitionSnapshot { + name: Arc::clone(&col_def.name), + id: *col_id, + r#type: col_def.data_type.into(), + influx_type: col_def.data_type.into(), + nullable: col_def.nullable, + }, + ) + }) + .collect(), + last_caches: def.last_caches.values().map(Into::into).collect(), + } + } +} + +impl From<&ArrowDataType> for DataType { + fn from(arrow_type: &ArrowDataType) -> Self { match arrow_type { ArrowDataType::Null => Self::Null, ArrowDataType::Boolean => Self::Bool, @@ -215,7 +272,7 @@ impl<'a> From<&'a ArrowDataType> for DataType<'a> { ArrowDataType::Float16 => Self::F16, ArrowDataType::Float32 => Self::F32, ArrowDataType::Float64 => Self::F64, - ArrowDataType::Timestamp(unit, tz) => Self::Time((*unit).into(), tz.as_deref()), + ArrowDataType::Timestamp(unit, tz) => Self::Time((*unit).into(), tz.clone()), ArrowDataType::Date32 => unimplemented!(), ArrowDataType::Date64 => unimplemented!(), ArrowDataType::Time32(_) => unimplemented!(), @@ -248,45 +305,38 @@ impl<'a> From<&'a ArrowDataType> for DataType<'a> { } } -impl<'a> From> for TableDefinition { - fn from(snap: TableSnapshot<'a>) -> Self { - let table_name = snap.table_name.into(); +impl From for TableDefinition { + fn from(snap: TableSnapshot) -> Self { let table_id = snap.table_id; - let mut b = SchemaBuilder::new(); - b.measurement(snap.table_name.to_string()); - if let Some(keys) = snap.key { - b.with_series_key(keys); - } - for (name, col) in snap.cols { - match col.influx_type { - InfluxType::Tag => { - b.influx_column(name, schema::InfluxColumnType::Tag); - } - InfluxType::Field => { - b.influx_field(name, col.r#type.into()); - } - InfluxType::Time => { - b.timestamp(); - } - } - } - - let schema = TableSchema::new_with_mapping( - b.build().expect("valid schema from snapshot"), - snap.column_map, - snap.next_column_id, - ); - let last_caches = snap - .last_caches - .into_iter() - .map(|lc_snap| (lc_snap.name.to_string(), lc_snap.into())) - .collect(); - - Self { - table_name, + let table_def = Self::new( table_id, - schema, - last_caches, + snap.table_name, + snap.cols + .into_iter() + .map(|(id, def)| { + ( + id, + def.name, + match def.influx_type { + InfluxType::Tag => InfluxColumnType::Tag, + InfluxType::Field => { + InfluxColumnType::Field(InfluxFieldType::from(def.r#type)) + } + InfluxType::Time => InfluxColumnType::Timestamp, + }, + ) + }) + .collect(), + snap.key, + ) + .expect("serialized catalog should be valid"); + Self { + last_caches: snap + .last_caches + .into_iter() + .map(|lc_snap| (Arc::clone(&lc_snap.name), lc_snap.into())) + .collect(), + ..table_def } } } @@ -297,8 +347,21 @@ impl<'a> From> for TableDefinition { // has been defined to mimic the Arrow type. // // See -impl<'a> From> for schema::InfluxFieldType { - fn from(data_type: DataType<'a>) -> Self { +impl From for InfluxFieldType { + fn from(data_type: DataType) -> Self { + match data_type { + DataType::Bool => Self::Boolean, + DataType::I64 => Self::Integer, + DataType::U64 => Self::UInteger, + DataType::F64 => Self::Float, + DataType::Str => Self::String, + other => unimplemented!("unsupported data type in catalog {other:?}"), + } + } +} + +impl From<&DataType> for InfluxFieldType { + fn from(data_type: &DataType) -> Self { match data_type { DataType::Bool => Self::Boolean, DataType::I64 => Self::Integer, @@ -311,27 +374,25 @@ impl<'a> From> for schema::InfluxFieldType { } #[derive(Debug, Serialize, Deserialize)] -struct LastCacheSnapshot<'a> { +struct LastCacheSnapshot { table_id: TableId, - table: &'a str, - name: &'a str, - keys: Vec<&'a str>, - vals: Option>, + table: Arc, + name: Arc, + keys: Vec, + vals: Option>, n: usize, ttl: u64, } -impl<'a> From<&'a LastCacheDefinition> for LastCacheSnapshot<'a> { - fn from(lcd: &'a LastCacheDefinition) -> Self { +impl From<&LastCacheDefinition> for LastCacheSnapshot { + fn from(lcd: &LastCacheDefinition) -> Self { Self { table_id: lcd.table_id, - table: &lcd.table, - name: &lcd.name, - keys: lcd.key_columns.iter().map(|v| v.as_str()).collect(), + table: Arc::clone(&lcd.table), + name: Arc::clone(&lcd.name), + keys: lcd.key_columns.to_vec(), vals: match &lcd.value_columns { - LastCacheValueColumnsDef::Explicit { columns } => { - Some(columns.iter().map(|v| v.as_str()).collect()) - } + LastCacheValueColumnsDef::Explicit { columns } => Some(columns.to_vec()), LastCacheValueColumnsDef::AllNonKeyColumns => None, }, n: lcd.count.into(), @@ -340,17 +401,15 @@ impl<'a> From<&'a LastCacheDefinition> for LastCacheSnapshot<'a> { } } -impl<'a> From> for LastCacheDefinition { - fn from(snap: LastCacheSnapshot<'a>) -> Self { +impl From for LastCacheDefinition { + fn from(snap: LastCacheSnapshot) -> Self { Self { table_id: snap.table_id, - table: snap.table.to_string(), - name: snap.name.to_string(), - key_columns: snap.keys.iter().map(|s| s.to_string()).collect(), + table: snap.table, + name: snap.name, + key_columns: snap.keys, value_columns: match snap.vals { - Some(cols) => LastCacheValueColumnsDef::Explicit { - columns: cols.iter().map(|s| s.to_string()).collect(), - }, + Some(columns) => LastCacheValueColumnsDef::Explicit { columns }, None => LastCacheValueColumnsDef::AllNonKeyColumns, }, count: snap diff --git a/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__catalog_serialization.snap b/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__catalog_serialization.snap new file mode 100644 index 0000000000..e5b78da2b5 --- /dev/null +++ b/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__catalog_serialization.snap @@ -0,0 +1,260 @@ +--- +source: influxdb3_catalog/src/catalog.rs +description: catalog serialization to help catch breaking changes +expression: catalog +--- +{ + "databases": [ + [ + 0, + { + "id": 0, + "name": "test_db", + "tables": [ + [ + 1, + { + "table_id": 1, + "table_name": "test_table_1", + "cols": [ + [ + 5, + { + "name": "bool_field", + "id": 5, + "type": "bool", + "influx_type": "field", + "nullable": true + } + ], + [ + 8, + { + "name": "f64_field", + "id": 8, + "type": "f64", + "influx_type": "field", + "nullable": true + } + ], + [ + 6, + { + "name": "i64_field", + "id": 6, + "type": "i64", + "influx_type": "field", + "nullable": true + } + ], + [ + 4, + { + "name": "string_field", + "id": 4, + "type": "str", + "influx_type": "field", + "nullable": true + } + ], + [ + 0, + { + "name": "tag_1", + "id": 0, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 1, + { + "name": "tag_2", + "id": 1, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 2, + { + "name": "tag_3", + "id": 2, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 3, + { + "name": "time", + "id": 3, + "type": { + "time": [ + "ns", + null + ] + }, + "influx_type": "time", + "nullable": false + } + ], + [ + 7, + { + "name": "u64_field", + "id": 7, + "type": "u64", + "influx_type": "field", + "nullable": true + } + ] + ] + } + ], + [ + 2, + { + "table_id": 2, + "table_name": "test_table_2", + "cols": [ + [ + 14, + { + "name": "bool_field", + "id": 14, + "type": "bool", + "influx_type": "field", + "nullable": true + } + ], + [ + 17, + { + "name": "f64_field", + "id": 17, + "type": "f64", + "influx_type": "field", + "nullable": true + } + ], + [ + 15, + { + "name": "i64_field", + "id": 15, + "type": "i64", + "influx_type": "field", + "nullable": true + } + ], + [ + 13, + { + "name": "string_field", + "id": 13, + "type": "str", + "influx_type": "field", + "nullable": true + } + ], + [ + 9, + { + "name": "tag_1", + "id": 9, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 10, + { + "name": "tag_2", + "id": 10, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 11, + { + "name": "tag_3", + "id": 11, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 12, + { + "name": "time", + "id": 12, + "type": { + "time": [ + "ns", + null + ] + }, + "influx_type": "time", + "nullable": false + } + ], + [ + 16, + { + "name": "u64_field", + "id": 16, + "type": "u64", + "influx_type": "field", + "nullable": true + } + ] + ] + } + ] + ] + } + ] + ], + "sequence": 0, + "host_id": "sample-host-id", + "instance_id": "instance-id", + "db_map": [] +} diff --git a/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__serialize_last_cache.snap b/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__serialize_last_cache.snap new file mode 100644 index 0000000000..cd109deeeb --- /dev/null +++ b/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__serialize_last_cache.snap @@ -0,0 +1,117 @@ +--- +source: influxdb3_catalog/src/catalog.rs +description: catalog serialization to help catch breaking changes +expression: catalog +--- +{ + "databases": [ + [ + 0, + { + "id": 0, + "name": "test_db", + "tables": [ + [ + 0, + { + "table_id": 0, + "table_name": "test", + "cols": [ + [ + 4, + { + "name": "field", + "id": 4, + "type": "str", + "influx_type": "field", + "nullable": true + } + ], + [ + 0, + { + "name": "tag_1", + "id": 0, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 1, + { + "name": "tag_2", + "id": 1, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 2, + { + "name": "tag_3", + "id": 2, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": true + } + ], + [ + 3, + { + "name": "time", + "id": 3, + "type": { + "time": [ + "ns", + null + ] + }, + "influx_type": "time", + "nullable": false + } + ] + ], + "last_caches": [ + { + "table_id": 0, + "table": "test", + "name": "test_table_last_cache", + "keys": [ + 1, + 2 + ], + "vals": [ + 4 + ], + "n": 1, + "ttl": 600 + } + ] + } + ] + ] + } + ] + ], + "sequence": 0, + "host_id": "sample-host-id", + "instance_id": "instance-id", + "db_map": [] +} diff --git a/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__serialize_series_keys.snap b/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__serialize_series_keys.snap new file mode 100644 index 0000000000..5190be04ed --- /dev/null +++ b/influxdb3_catalog/src/snapshots/influxdb3_catalog__catalog__tests__serialize_series_keys.snap @@ -0,0 +1,106 @@ +--- +source: influxdb3_catalog/src/catalog.rs +description: catalog serialization to help catch breaking changes +expression: catalog +--- +{ + "databases": [ + [ + 0, + { + "id": 0, + "name": "test_db", + "tables": [ + [ + 1, + { + "table_id": 1, + "table_name": "test_table_1", + "key": [ + 0, + 1, + 2 + ], + "cols": [ + [ + 4, + { + "name": "field", + "id": 4, + "type": "str", + "influx_type": "field", + "nullable": true + } + ], + [ + 0, + { + "name": "tag_1", + "id": 0, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": false + } + ], + [ + 1, + { + "name": "tag_2", + "id": 1, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": false + } + ], + [ + 2, + { + "name": "tag_3", + "id": 2, + "type": { + "dict": [ + "i32", + "str" + ] + }, + "influx_type": "tag", + "nullable": false + } + ], + [ + 3, + { + "name": "time", + "id": 3, + "type": { + "time": [ + "ns", + null + ] + }, + "influx_type": "time", + "nullable": false + } + ] + ] + } + ] + ] + } + ] + ], + "sequence": 0, + "host_id": "sample-host-id", + "instance_id": "instance-id", + "db_map": [] +} diff --git a/influxdb3_client/src/lib.rs b/influxdb3_client/src/lib.rs index 5e59382095..61e0fe61c6 100644 --- a/influxdb3_client/src/lib.rs +++ b/influxdb3_client/src/lib.rs @@ -711,7 +711,7 @@ pub struct LastCacheCreatedResponse { /// Given name of the cache pub name: String, /// Columns intended to be used as predicates in the cache - pub key_columns: Vec, + pub key_columns: Vec, /// Columns that store values in the cache pub value_columns: LastCacheValueColumnsDef, /// The number of last values to hold in the cache @@ -726,7 +726,7 @@ pub struct LastCacheCreatedResponse { #[serde(tag = "type", rename_all = "snake_case")] pub enum LastCacheValueColumnsDef { /// Explicit list of column names - Explicit { columns: Vec }, + Explicit { columns: Vec }, /// Stores all non-key columns AllNonKeyColumns, } @@ -999,10 +999,10 @@ mod tests { r#"{ "table": "table", "name": "cache_name", - "key_columns": ["col1", "col2"], + "key_columns": [0, 1], "value_columns": { "type": "explicit", - "columns": ["col3", "col4"] + "columns": [2, 3] }, "ttl": 120, "count": 5 diff --git a/influxdb3_id/Cargo.toml b/influxdb3_id/Cargo.toml index 4cf2c9cf2f..1c7675c1fd 100644 --- a/influxdb3_id/Cargo.toml +++ b/influxdb3_id/Cargo.toml @@ -6,7 +6,7 @@ edition.workspace = true license.workspace = true [dependencies] -hashbrown.workspace = true +indexmap.workspace = true serde.workspace = true [dev-dependencies] diff --git a/influxdb3_id/src/lib.rs b/influxdb3_id/src/lib.rs index bc9defd816..6791122183 100644 --- a/influxdb3_id/src/lib.rs +++ b/influxdb3_id/src/lib.rs @@ -6,7 +6,7 @@ use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; mod serialize; -pub use serialize::SerdeVecHashMap; +pub use serialize::SerdeVecMap; #[derive(Debug, Copy, Clone, Eq, PartialOrd, Ord, PartialEq, Serialize, Deserialize, Hash)] pub struct DbId(u32); @@ -90,23 +90,29 @@ impl Display for TableId { } #[derive(Debug, Copy, Clone, Eq, PartialOrd, Ord, PartialEq, Serialize, Deserialize, Hash)] -pub struct ColumnId(u16); +pub struct ColumnId(u32); + +static NEXT_COLUMN_ID: AtomicU32 = AtomicU32::new(0); impl ColumnId { - pub fn new(id: u16) -> Self { - Self(id) + pub fn new() -> Self { + Self(NEXT_COLUMN_ID.fetch_add(1, Ordering::SeqCst)) } - pub fn next_id(&self) -> Self { - Self(self.0 + 1) + pub fn next_id() -> Self { + Self(NEXT_COLUMN_ID.load(Ordering::SeqCst)) } - pub fn as_u16(&self) -> u16 { + pub fn set_next_id(&self) { + NEXT_COLUMN_ID.store(self.0, Ordering::SeqCst) + } + + pub fn as_u32(&self) -> u32 { self.0 } } -impl From for ColumnId { - fn from(value: u16) -> Self { +impl From for ColumnId { + fn from(value: u32) -> Self { Self(value) } } @@ -117,6 +123,12 @@ impl Display for ColumnId { } } +impl Default for ColumnId { + fn default() -> Self { + Self::new() + } +} + /// The next file id to be used when persisting `ParquetFile`s pub static NEXT_FILE_ID: AtomicU64 = AtomicU64::new(0); diff --git a/influxdb3_id/src/serialize.rs b/influxdb3_id/src/serialize.rs index bdaf522f2b..89fbe68e8d 100644 --- a/influxdb3_id/src/serialize.rs +++ b/influxdb3_id/src/serialize.rs @@ -3,9 +3,9 @@ use std::{ ops::{Deref, DerefMut}, }; -use hashbrown::{ - hash_map::{IntoIter, Iter, IterMut}, - HashMap, +use indexmap::{ + map::{IntoIter, Iter, IterMut}, + IndexMap, }; use serde::{ de::{self, SeqAccess, Visitor}, @@ -13,28 +13,38 @@ use serde::{ Deserialize, Deserializer, Serialize, Serializer, }; -/// A new-type around a `HashMap` that provides special serialization and deserialization behaviour. +/// A new-type around a [`IndexMap`] that provides special serialization and deserialization behaviour. /// /// Specifically, it will be serialized as a vector of tuples, each tuple containing a key-value /// pair from the map. Deserialization assumes said serialization, and deserializes from the vector /// of tuples back into the map. Traits like `Deref`, `From`, etc. are implemented on this type such -/// that it can be used as a `HashMap`. +/// that it can be used as a `IndexMap`. /// /// During deserialization, there are no duplicate keys allowed. If duplicates are found, an error /// will be thrown. +/// +/// The `IndexMap` type is used to preserve insertion, and thereby iteration order. This ensures +/// consistent ordering of entities when this map is iterated over, for e.g., column ordering in +/// queries, or entity ordering during serialization. Since `IndexMap` stores key/value pairs in a +/// contiguous vector, iterating over its members is faster than a `HashMap`. This is beneficial for +/// WAL serialization. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct SerdeVecHashMap(HashMap); +pub struct SerdeVecMap(IndexMap); -impl SerdeVecHashMap +impl SerdeVecMap where K: Eq + std::hash::Hash, { pub fn new() -> Self { Self::default() } + + pub fn with_capacity(size: usize) -> Self { + Self(IndexMap::with_capacity(size)) + } } -impl Default for SerdeVecHashMap +impl Default for SerdeVecMap where K: Eq + std::hash::Hash, { @@ -43,17 +53,17 @@ where } } -impl From for SerdeVecHashMap +impl From for SerdeVecMap where K: Eq + std::hash::Hash, - T: Into>, + T: Into>, { fn from(value: T) -> Self { Self(value.into()) } } -impl IntoIterator for SerdeVecHashMap +impl IntoIterator for SerdeVecMap where K: Eq + std::hash::Hash, { @@ -66,7 +76,7 @@ where } } -impl<'a, K, V> IntoIterator for &'a SerdeVecHashMap +impl<'a, K, V> IntoIterator for &'a SerdeVecMap where K: Eq + std::hash::Hash, { @@ -79,7 +89,7 @@ where } } -impl<'a, K, V> IntoIterator for &'a mut SerdeVecHashMap +impl<'a, K, V> IntoIterator for &'a mut SerdeVecMap where K: Eq + std::hash::Hash, { @@ -92,18 +102,27 @@ where } } -impl Deref for SerdeVecHashMap +impl FromIterator<(K, V)> for SerdeVecMap where K: Eq + std::hash::Hash, { - type Target = HashMap; + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl Deref for SerdeVecMap +where + K: Eq + std::hash::Hash, +{ + type Target = IndexMap; fn deref(&self) -> &Self::Target { &self.0 } } -impl DerefMut for SerdeVecHashMap +impl DerefMut for SerdeVecMap where K: Eq + std::hash::Hash, { @@ -112,7 +131,7 @@ where } } -impl Serialize for SerdeVecHashMap +impl Serialize for SerdeVecMap where K: Eq + std::hash::Hash + Serialize, V: Serialize, @@ -129,7 +148,7 @@ where } } -impl<'de, K, V> Deserialize<'de> for SerdeVecHashMap +impl<'de, K, V> Deserialize<'de> for SerdeVecMap where K: Eq + std::hash::Hash + Deserialize<'de>, V: Deserialize<'de>, @@ -139,7 +158,7 @@ where D: Deserializer<'de>, { let v = deserializer.deserialize_seq(VecVisitor::new())?; - let mut map = HashMap::with_capacity(v.len()); + let mut map = IndexMap::with_capacity(v.len()); for (k, v) in v.into_iter() { if map.insert(k, v).is_some() { return Err(de::Error::custom("duplicate key found")); @@ -188,20 +207,18 @@ where #[cfg(test)] mod tests { - use hashbrown::HashMap; + use indexmap::IndexMap; - use super::SerdeVecHashMap; + use super::SerdeVecMap; #[test] fn serde_vec_map_with_json() { - let map = HashMap::::from_iter([(0, "foo"), (1, "bar"), (2, "baz")]); - let serde_vec_map = SerdeVecHashMap::from(map); + let map = IndexMap::::from_iter([(0, "foo"), (1, "bar"), (2, "baz")]); + let serde_vec_map = SerdeVecMap::from(map); // test round-trip to JSON: let s = serde_json::to_string(&serde_vec_map).unwrap(); - // with using a hashmap the order changes so asserting on the JSON itself is flaky, so if - // you want to see it working use --nocapture on the test... - println!("{s}"); - let d: SerdeVecHashMap = serde_json::from_str(&s).unwrap(); + assert_eq!(r#"[[0,"foo"],[1,"bar"],[2,"baz"]]"#, s); + let d: SerdeVecMap = serde_json::from_str(&s).unwrap(); assert_eq!(d, serde_vec_map); } } diff --git a/influxdb3_server/src/http.rs b/influxdb3_server/src/http.rs index e4c1843a61..504d2f2185 100644 --- a/influxdb3_server/src/http.rs +++ b/influxdb3_server/src/http.rs @@ -221,6 +221,7 @@ struct ErrorMessage { impl Error { /// Convert this error into an HTTP [`Response`] fn into_response(self) -> Response { + debug!(error = ?self, "API error"); match self { Self::WriteBuffer(WriteBufferError::CatalogUpdateError( err @ (CatalogError::TooManyDbs @@ -250,10 +251,25 @@ impl Error { .body(body) .unwrap() } + Self::WriteBuffer(err @ WriteBufferError::ColumnDoesNotExist(_)) => { + let err: ErrorMessage<()> = ErrorMessage { + error: err.to_string(), + data: None, + }; + let serialized = serde_json::to_string(&err).unwrap(); + let body = Body::from(serialized); + Response::builder() + .status(StatusCode::BAD_REQUEST) + .body(body) + .unwrap() + } Self::WriteBuffer(WriteBufferError::LastCacheError(ref lc_err)) => match lc_err { last_cache::Error::InvalidCacheSize | last_cache::Error::CacheAlreadyExists { .. } + | last_cache::Error::ColumnDoesNotExistByName { .. } + | last_cache::Error::ColumnDoesNotExistById { .. } | last_cache::Error::KeyColumnDoesNotExist { .. } + | last_cache::Error::KeyColumnDoesNotExistByName { .. } | last_cache::Error::InvalidKeyColumn | last_cache::Error::ValueColumnDoesNotExist { .. } => Response::builder() .status(StatusCode::BAD_REQUEST) @@ -697,9 +713,35 @@ where .catalog() .db_schema_and_id(&db) .ok_or_else(|| WriteBufferError::DbDoesNotExist)?; - let table_id = db_schema - .table_name_to_id(table.as_str()) + let (table_id, table_def) = db_schema + .table_definition_and_id(table.as_str()) .ok_or_else(|| WriteBufferError::TableDoesNotExist)?; + let key_columns = key_columns + .map(|names| { + names + .into_iter() + .map(|name| { + table_def + .column_def_and_id(name.as_str()) + .map(|(id, def)| (id, Arc::clone(&def.name))) + .ok_or_else(|| WriteBufferError::ColumnDoesNotExist(name)) + }) + .collect::, WriteBufferError>>() + }) + .transpose()?; + let value_columns = value_columns + .map(|names| { + names + .into_iter() + .map(|name| { + table_def + .column_def_and_id(name.as_str()) + .map(|(id, def)| (id, Arc::clone(&def.name))) + .ok_or_else(|| WriteBufferError::ColumnDoesNotExist(name)) + }) + .collect::, WriteBufferError>>() + }) + .transpose()?; match self .write_buffer diff --git a/influxdb3_server/src/system_tables/last_caches.rs b/influxdb3_server/src/system_tables/last_caches.rs index fb23c7643d..9b1b6dfac1 100644 --- a/influxdb3_server/src/system_tables/last_caches.rs +++ b/influxdb3_server/src/system_tables/last_caches.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use arrow::array::{GenericListBuilder, StringBuilder}; +use arrow::array::{GenericListBuilder, UInt32Builder}; use arrow_array::{ArrayRef, RecordBatch, StringArray, UInt64Array}; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use datafusion::{error::DataFusionError, logical_expr::Expr}; @@ -31,12 +31,12 @@ fn last_caches_schema() -> SchemaRef { Field::new("name", DataType::Utf8, false), Field::new( "key_columns", - DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))), false, ), Field::new( "value_columns", - DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))), true, ), Field::new("count", DataType::UInt64, false), @@ -83,20 +83,20 @@ fn from_last_cache_definitions( )); // Key Columns columns.push({ - let values_builder = StringBuilder::new(); + let values_builder = UInt32Builder::new(); let mut builder = GenericListBuilder::::new(values_builder); for c in caches { c.key_columns .iter() - .for_each(|k| builder.values().append_value(k)); + .for_each(|k| builder.values().append_value(k.as_u32())); builder.append(true); } Arc::new(builder.finish()) }); // Value Columns columns.push({ - let values_builder = StringBuilder::new(); + let values_builder = UInt32Builder::new(); let mut builder = GenericListBuilder::::new(values_builder); for c in caches { @@ -104,7 +104,7 @@ fn from_last_cache_definitions( LastCacheValueColumnsDef::Explicit { columns } => { columns .iter() - .for_each(|v| builder.values().append_value(v)); + .for_each(|v| builder.values().append_value(v.as_u32())); builder.append(true); } LastCacheValueColumnsDef::AllNonKeyColumns => { diff --git a/influxdb3_wal/Cargo.toml b/influxdb3_wal/Cargo.toml index 8b82d6b350..29e18905c1 100644 --- a/influxdb3_wal/Cargo.toml +++ b/influxdb3_wal/Cargo.toml @@ -23,6 +23,7 @@ byteorder.workspace = true crc32fast.workspace = true futures-util.workspace = true hashbrown.workspace = true +indexmap.workspace = true object_store.workspace = true parking_lot.workspace = true serde.workspace = true diff --git a/influxdb3_wal/src/lib.rs b/influxdb3_wal/src/lib.rs index ed4599e2bc..00bb73bd6f 100644 --- a/influxdb3_wal/src/lib.rs +++ b/influxdb3_wal/src/lib.rs @@ -11,7 +11,8 @@ use crate::snapshot_tracker::SnapshotInfo; use async_trait::async_trait; use data_types::Timestamp; use hashbrown::HashMap; -use influxdb3_id::{DbId, SerdeVecHashMap, TableId}; +use indexmap::IndexMap; +use influxdb3_id::{ColumnId, DbId, SerdeVecMap, TableId}; use influxdb_line_protocol::v3::SeriesValue; use influxdb_line_protocol::FieldValue; use iox_time::Time; @@ -241,7 +242,7 @@ pub struct TableDefinition { pub table_name: Arc, pub table_id: TableId, pub field_definitions: Vec, - pub key: Option>, + pub key: Option>, } #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] @@ -256,9 +257,24 @@ pub struct FieldAdditions { #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct FieldDefinition { pub name: Arc, + pub id: ColumnId, pub data_type: FieldDataType, } +impl FieldDefinition { + pub fn new( + id: ColumnId, + name: impl Into>, + data_type: impl Into, + ) -> Self { + Self { + id, + name: name.into(), + data_type: data_type.into(), + } + } +} + #[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] pub enum FieldDataType { String, @@ -307,11 +323,11 @@ pub struct LastCacheDefinition { /// The table id the cache is associated with pub table_id: TableId, /// The table name the cache is associated with - pub table: String, + pub table: Arc, /// Given name of the cache - pub name: String, + pub name: Arc, /// Columns intended to be used as predicates in the cache - pub key_columns: Vec, + pub key_columns: Vec, /// Columns that store values in the cache pub value_columns: LastCacheValueColumnsDef, /// The number of last values to hold in the cache @@ -322,12 +338,15 @@ pub struct LastCacheDefinition { impl LastCacheDefinition { /// Create a new [`LastCacheDefinition`] with explicit value columns + /// + /// This is intended for tests and expects that the column id for the time + /// column is included in the value columns argument. pub fn new_with_explicit_value_columns( table_id: TableId, - table: impl Into, - name: impl Into, - key_columns: impl IntoIterator>, - value_columns: impl IntoIterator>, + table: impl Into>, + name: impl Into>, + key_columns: Vec, + value_columns: Vec, count: usize, ttl: u64, ) -> Result { @@ -335,9 +354,9 @@ impl LastCacheDefinition { table_id, table: table.into(), name: name.into(), - key_columns: key_columns.into_iter().map(Into::into).collect(), + key_columns, value_columns: LastCacheValueColumnsDef::Explicit { - columns: value_columns.into_iter().map(Into::into).collect(), + columns: value_columns, }, count: count.try_into()?, ttl, @@ -345,11 +364,13 @@ impl LastCacheDefinition { } /// Create a new [`LastCacheDefinition`] with explicit value columns + /// + /// This is intended for tests. pub fn new_all_non_key_value_columns( table_id: TableId, - table: impl Into, - name: impl Into, - key_columns: impl IntoIterator>, + table: impl Into>, + name: impl Into>, + key_columns: Vec, count: usize, ttl: u64, ) -> Result { @@ -357,7 +378,7 @@ impl LastCacheDefinition { table_id, table: table.into(), name: name.into(), - key_columns: key_columns.into_iter().map(Into::into).collect(), + key_columns, value_columns: LastCacheValueColumnsDef::AllNonKeyColumns, count: count.try_into()?, ttl, @@ -371,7 +392,7 @@ impl LastCacheDefinition { #[serde(tag = "type", rename_all = "snake_case")] pub enum LastCacheValueColumnsDef { /// Explicit list of column names - Explicit { columns: Vec }, + Explicit { columns: Vec }, /// Stores all non-key columns AllNonKeyColumns, } @@ -432,9 +453,9 @@ impl PartialEq for usize { #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct LastCacheDelete { - pub table_name: String, + pub table_name: Arc, pub table_id: TableId, - pub name: String, + pub name: Arc, } #[serde_as] @@ -442,7 +463,7 @@ pub struct LastCacheDelete { pub struct WriteBatch { pub database_id: DbId, pub database_name: Arc, - pub table_chunks: SerdeVecHashMap, + pub table_chunks: SerdeVecMap, pub min_time_ns: i64, pub max_time_ns: i64, } @@ -451,7 +472,7 @@ impl WriteBatch { pub fn new( database_id: DbId, database_name: Arc, - table_chunks: HashMap, + table_chunks: IndexMap, ) -> Self { // find the min and max times across the table chunks let (min_time_ns, max_time_ns) = table_chunks.values().fold( @@ -475,7 +496,7 @@ impl WriteBatch { pub fn add_write_batch( &mut self, - new_table_chunks: SerdeVecHashMap, + new_table_chunks: SerdeVecMap, min_time_ns: i64, max_time_ns: i64, ) { @@ -536,10 +557,19 @@ pub struct TableChunk { #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct Field { - pub name: Arc, + pub id: ColumnId, pub value: FieldData, } +impl Field { + pub fn new(id: ColumnId, value: impl Into) -> Self { + Self { + id, + value: value.into(), + } + } +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct Row { pub time: i64, @@ -596,6 +626,18 @@ impl<'a> From> for FieldData { } } +impl<'a> From<&FieldValue<'a>> for FieldData { + fn from(value: &FieldValue<'a>) -> Self { + match value { + FieldValue::I64(v) => Self::Integer(*v), + FieldValue::U64(v) => Self::UInteger(*v), + FieldValue::F64(v) => Self::Float(*v), + FieldValue::String(v) => Self::String(v.to_string()), + FieldValue::Boolean(v) => Self::Boolean(*v), + } + } +} + #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct WalContents { /// The min timestamp from any writes in the WAL file diff --git a/influxdb3_wal/src/object_store.rs b/influxdb3_wal/src/object_store.rs index 14dbe4c02f..aad0ebde58 100644 --- a/influxdb3_wal/src/object_store.rs +++ b/influxdb3_wal/src/object_store.rs @@ -613,7 +613,8 @@ mod tests { Field, FieldData, Gen1Duration, Row, SnapshotSequenceNumber, TableChunk, TableChunks, }; use async_trait::async_trait; - use influxdb3_id::{DbId, TableId}; + use indexmap::IndexMap; + use influxdb3_id::{ColumnId, DbId, TableId}; use object_store::memory::InMemory; use std::any::Any; use tokio::sync::oneshot::Receiver; @@ -642,7 +643,7 @@ mod tests { let op1 = WalOp::Write(WriteBatch { database_id: DbId::from(0), database_name: Arc::clone(&db_name), - table_chunks: HashMap::from([( + table_chunks: IndexMap::from([( TableId::from(0), TableChunks { min_time: 1, @@ -655,11 +656,11 @@ mod tests { time: 1, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(1), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(1), }, ], @@ -668,11 +669,11 @@ mod tests { time: 3, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(2), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(3), }, ], @@ -691,7 +692,7 @@ mod tests { let op2 = WalOp::Write(WriteBatch { database_id: DbId::from(0), database_name: Arc::clone(&db_name), - table_chunks: HashMap::from([( + table_chunks: IndexMap::from([( TableId::from(0), TableChunks { min_time: 12, @@ -703,11 +704,11 @@ mod tests { time: 12, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(3), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(62_000000000), }, ], @@ -732,7 +733,7 @@ mod tests { ops: vec![WalOp::Write(WriteBatch { database_id: DbId::from(0), database_name: "db1".into(), - table_chunks: HashMap::from([( + table_chunks: IndexMap::from([( TableId::from(0), TableChunks { min_time: 1, @@ -745,11 +746,11 @@ mod tests { time: 1, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(1), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(1), }, ], @@ -758,11 +759,11 @@ mod tests { time: 3, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(2), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(3), }, ], @@ -771,11 +772,11 @@ mod tests { time: 12, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(3), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(62_000000000), }, ], @@ -803,7 +804,7 @@ mod tests { ops: vec![WalOp::Write(WriteBatch { database_id: DbId::from(0), database_name: "db1".into(), - table_chunks: HashMap::from([( + table_chunks: IndexMap::from([( TableId::from(0), TableChunks { min_time: 12, @@ -815,11 +816,11 @@ mod tests { time: 12, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(3), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(62_000000000), }, ], @@ -876,7 +877,7 @@ mod tests { let op3 = WalOp::Write(WriteBatch { database_id: DbId::from(0), database_name: Arc::clone(&db_name), - table_chunks: HashMap::from([( + table_chunks: IndexMap::from([( TableId::from(0), TableChunks { min_time: 26, @@ -888,11 +889,11 @@ mod tests { time: 26, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(3), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(128_000000000), }, ], @@ -937,7 +938,7 @@ mod tests { ops: vec![WalOp::Write(WriteBatch { database_id: DbId::from(0), database_name: "db1".into(), - table_chunks: HashMap::from([( + table_chunks: IndexMap::from([( TableId::from(0), TableChunks { min_time: 26, @@ -949,11 +950,11 @@ mod tests { time: 26, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(3), }, Field { - name: "time".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(128_000000000), }, ], diff --git a/influxdb3_wal/src/serialize.rs b/influxdb3_wal/src/serialize.rs index 1b61439383..9b3a603f11 100644 --- a/influxdb3_wal/src/serialize.rs +++ b/influxdb3_wal/src/serialize.rs @@ -91,7 +91,7 @@ mod tests { use crate::{ Field, FieldData, Row, TableChunk, TableChunks, WalFileSequenceNumber, WalOp, WriteBatch, }; - use influxdb3_id::{DbId, SerdeVecHashMap, TableId}; + use influxdb3_id::{ColumnId, DbId, SerdeVecMap, TableId}; #[test] fn test_serialize_deserialize() { @@ -100,11 +100,11 @@ mod tests { time: 1, fields: vec![ Field { - name: "f1".into(), + id: ColumnId::from(0), value: FieldData::Integer(10), }, Field { - name: "baz".into(), + id: ColumnId::from(1), value: FieldData::Timestamp(1), }, ], @@ -116,7 +116,7 @@ mod tests { chunk_time_to_chunk: [(1, chunk)].iter().cloned().collect(), }; let table_id = TableId::from(2); - let mut table_chunks = SerdeVecHashMap::new(); + let mut table_chunks = SerdeVecMap::new(); table_chunks.insert(table_id, chunks); let contents = WalContents { diff --git a/influxdb3_write/src/last_cache/mod.rs b/influxdb3_write/src/last_cache/mod.rs index b60c6c2583..2c08a61341 100644 --- a/influxdb3_write/src/last_cache/mod.rs +++ b/influxdb3_write/src/last_cache/mod.rs @@ -4,16 +4,16 @@ use std::{ time::{Duration, Instant}, }; -use arrow::datatypes::SchemaRef; +use arrow::array::new_null_array; use arrow::{ array::{ - new_null_array, ArrayRef, BooleanBuilder, Float64Builder, GenericByteDictionaryBuilder, - Int64Builder, RecordBatch, StringBuilder, StringDictionaryBuilder, - TimestampNanosecondBuilder, UInt64Builder, + ArrayRef, BooleanBuilder, Float64Builder, GenericByteDictionaryBuilder, Int64Builder, + RecordBatch, StringBuilder, StringDictionaryBuilder, TimestampNanosecondBuilder, + UInt64Builder, }, datatypes::{ - DataType, Field as ArrowField, FieldRef, GenericStringType, Int32Type, - SchemaBuilder as ArrowSchemaBuilder, SchemaRef as ArrowSchemaRef, TimeUnit, + DataType, Field as ArrowField, GenericStringType, Int32Type, + SchemaBuilder as ArrowSchemaBuilder, SchemaRef as ArrowSchemaRef, }, error::ArrowError, }; @@ -23,16 +23,17 @@ use datafusion::{ }; use hashbrown::{HashMap, HashSet}; use indexmap::{IndexMap, IndexSet}; -use influxdb3_catalog::catalog::Catalog; -use influxdb3_id::DbId; +use influxdb3_catalog::catalog::{Catalog, TableDefinition}; use influxdb3_id::TableId; +use influxdb3_id::{ColumnId, DbId}; use influxdb3_wal::{ Field, FieldData, LastCacheDefinition, LastCacheSize, LastCacheValueColumnsDef, Row, WalContents, WalOp, }; use iox_time::Time; +use observability_deps::tracing::debug; use parking_lot::RwLock; -use schema::{InfluxColumnType, InfluxFieldType, Schema, TIME_COLUMN_NAME}; +use schema::{InfluxColumnType, InfluxFieldType, TIME_COLUMN_NAME}; mod table_function; pub use table_function::LastCacheFunction; @@ -43,12 +44,18 @@ pub enum Error { InvalidCacheSize, #[error("last cache already exists for database and table, but it was configured differently: {reason}")] CacheAlreadyExists { reason: String }, - #[error("specified key column ({column_name}) does not exist in the table schema")] - KeyColumnDoesNotExist { column_name: String }, + #[error("specified column (name: {column_name}) does not exist in the table definition")] + ColumnDoesNotExistByName { column_name: String }, + #[error("specified column (id: {column_id}) does not exist in the table definition")] + ColumnDoesNotExistById { column_id: ColumnId }, + #[error("specified key column (id: {column_id}) does not exist in the table schema")] + KeyColumnDoesNotExist { column_id: ColumnId }, + #[error("specified key column (name: {column_name}) does not exist in the table schema")] + KeyColumnDoesNotExistByName { column_name: String }, #[error("key column must be string, int, uint, or bool types")] InvalidKeyColumn, - #[error("specified value column ({column_name}) does not exist in the table schema")] - ValueColumnDoesNotExist { column_name: String }, + #[error("specified value column ({column_id}) does not exist in the table schema")] + ValueColumnDoesNotExist { column_id: ColumnId }, #[error("requested last cache does not exist")] CacheDoesNotExist, } @@ -62,7 +69,8 @@ impl Error { } /// A three level hashmap storing DbId -> TableId -> Cache Name -> LastCache -type CacheMap = RwLock>>>; +// TODO: last caches could get a similar ID, e.g., `LastCacheId` +type CacheMap = RwLock, LastCache>>>>; /// Provides all last-N-value caches for the entire database pub struct LastCacheProvider { @@ -83,18 +91,12 @@ pub const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(60 * 60 * 4); pub struct CreateCacheArguments { /// The id of the database to create the cache for pub db_id: DbId, - /// The name of the database to create the cache for - pub db_name: String, - /// The id of the table in the database to create the cache for - pub table_id: TableId, - /// The name of the table in the database to create the cache for - pub table_name: String, - /// The Influx Schema of the table - pub schema: Schema, + /// The definition of the table for which the cache is being created + pub table_def: Arc, /// An optional name for the cache /// /// The cache name will default to `__last_cache` - pub cache_name: Option, + pub cache_name: Option>, /// The number of values to hold in the created cache /// /// This will default to 1. @@ -108,11 +110,11 @@ pub struct CreateCacheArguments { /// This will default to: /// - the series key columns for a v3 table /// - the lexicographically ordered tag set for a v1 table - pub key_columns: Option>, + pub key_columns: Option)>>, /// The value columns to use in the cache /// /// This will default to all non-key columns. The `time` column is always included. - pub value_columns: Option>, + pub value_columns: Option)>>, } impl LastCacheProvider { @@ -125,23 +127,41 @@ impl LastCacheProvider { for db_schema in catalog.list_db_schema() { for table_def in db_schema.tables() { for (cache_name, cache_def) in table_def.last_caches() { + debug!(%cache_name, ?cache_def, "adding last cache from catalog"); + let key_columns = cache_def + .key_columns + .iter() + .map(|id| { + table_def + .column_id_to_name(*id) + .map(|name| (*id, name)) + .ok_or(Error::KeyColumnDoesNotExist { column_id: *id }) + }) + .collect::, Error>>()?; + let value_columns = match cache_def.value_columns { + LastCacheValueColumnsDef::Explicit { ref columns } => Some( + columns + .iter() + .map(|id| { + table_def + .column_id_to_name(*id) + .map(|name| (*id, name)) + .ok_or(Error::ValueColumnDoesNotExist { column_id: *id }) + }) + .collect::, Error>>()?, + ), + LastCacheValueColumnsDef::AllNonKeyColumns => None, + }; assert!( provider .create_cache(CreateCacheArguments { db_id: db_schema.id, - db_name: db_schema.name.to_string(), - table_id: table_def.table_id, - table_name: table_def.table_name.to_string(), - schema: table_def.influx_schema().clone(), - cache_name: Some(cache_name.to_owned()), + table_def: Arc::clone(&table_def), + cache_name: Some(Arc::clone(&cache_name)), count: Some(cache_def.count.into()), ttl: Some(Duration::from_secs(cache_def.ttl)), - key_columns: Some(cache_def.key_columns.clone()), - value_columns: match &cache_def.value_columns { - LastCacheValueColumnsDef::Explicit { columns } => - Some(columns.clone()), - LastCacheValueColumnsDef::AllNonKeyColumns => None, - }, + key_columns: Some(key_columns), + value_columns, })? .is_some(), "catalog should not contain duplicate last cache definitions" @@ -161,20 +181,20 @@ impl LastCacheProvider { db_id: DbId, table_id: TableId, cache_name: Option<&str>, - ) -> Option<(String, ArrowSchemaRef)> { + ) -> Option<(Arc, ArrowSchemaRef)> { self.cache_map .read() .get(&db_id) .and_then(|db| db.get(&table_id)) .and_then(|table| { - if let Some(name) = cache_name { + if let Some(cache_name) = cache_name { table - .get(name) - .map(|lc| (name.to_string(), Arc::clone(&lc.schema))) + .get_key_value(cache_name) + .map(|(name, lc)| (Arc::clone(name), lc.arrow_schema())) } else if table.len() == 1 { table .iter() - .map(|(name, lc)| (name.to_string(), Arc::clone(&lc.schema))) + .map(|(name, lc)| (Arc::clone(name), lc.arrow_schema())) .next() } else { None @@ -197,7 +217,7 @@ impl LastCacheProvider { .table_id_to_name(*table_id) .expect("table exists"); table_map.iter().map(move |(lc_name, lc)| { - lc.to_definition(*table_id, table_name.as_ref(), lc_name) + lc.to_definition(*table_id, table_name.as_ref(), Arc::clone(lc_name)) }) }) .collect() @@ -214,34 +234,27 @@ impl LastCacheProvider { &self, CreateCacheArguments { db_id, - table_id, - table_name, - schema, + table_def, cache_name, count, ttl, key_columns, value_columns, - .. }: CreateCacheArguments, ) -> Result, Error> { let key_columns = if let Some(keys) = key_columns { // validate the key columns specified to ensure correct type (string, int, unit, or bool) // and that they exist in the table's schema. - for key in keys.iter() { + for (col_id, col_name) in keys.iter() { use InfluxColumnType::*; use InfluxFieldType::*; - match schema.field_by_name(key) { + match table_def.schema.field_by_name(col_name) { Some(( Tag | Field(Integer) | Field(UInteger) | Field(String) | Field(Boolean), _, )) => (), Some((_, _)) => return Err(Error::InvalidKeyColumn), - None => { - return Err(Error::KeyColumnDoesNotExist { - column_name: key.into(), - }) - } + None => return Err(Error::KeyColumnDoesNotExist { column_id: *col_id }), } } keys @@ -249,34 +262,80 @@ impl LastCacheProvider { // use primary key, which defaults to series key if present, then lexicographically // ordered tags otherwise, there is no user-defined sort order in the schema, so if that // is introduced, we will need to make sure that is accommodated here. - let mut keys = schema.primary_key(); + let mut keys = table_def.schema.primary_key(); + debug!( + ?cache_name, + ?keys, + "using primary key for key cols in cache" + ); if let Some(&TIME_COLUMN_NAME) = keys.last() { keys.pop(); } - keys.iter().map(|s| s.to_string()).collect() + keys.iter() + .map(|s| { + table_def + .column_name_to_id(Arc::::from(*s)) + .map(|id| (id, Arc::::from(*s))) + .ok_or_else(|| Error::KeyColumnDoesNotExistByName { + column_name: s.to_string(), + }) + }) + .collect::, Error>>()? }; // Generate the cache name if it was not provided let cache_name = cache_name.unwrap_or_else(|| { format!( "{table_name}_{keys}_last_cache", - keys = key_columns.join("_") + table_name = table_def.table_name, + keys = key_columns + .iter() + .map(|(_, name)| Arc::clone(name)) + .collect::>() + .join("_") ) + .into() }); - let accept_new_fields = value_columns.is_none(); - let last_cache_value_columns_def = match &value_columns { - None => LastCacheValueColumnsDef::AllNonKeyColumns, - Some(cols) => LastCacheValueColumnsDef::Explicit { - columns: cols.clone(), - }, - }; + let (value_columns, schema) = + match value_columns { + Some(cols) => { + let mut has_time = false; + let mut ids = cols + .into_iter() + .map(|(id, name)| { + has_time = has_time || name.as_ref() == TIME_COLUMN_NAME; + id + }) + .collect::>(); + // Check that the `time` column is included, and add it if not: + if !has_time { + ids.push(table_def.column_name_to_id(TIME_COLUMN_NAME).ok_or_else( + || Error::ColumnDoesNotExistByName { + column_name: TIME_COLUMN_NAME.to_string(), + }, + )?); + } + let (schema, _) = last_cache_schema_from_table_def( + Arc::clone(&table_def), + key_columns.iter().map(|(id, _)| *id).collect(), + Some(ids.as_slice()), + ); + (ValueColumnType::Explicit { columns: ids }, schema) + } + None => { + let (schema, seen) = last_cache_schema_from_table_def( + Arc::clone(&table_def), + key_columns.iter().map(|(id, _)| *id).collect(), + None, + ); + (ValueColumnType::AcceptNew { seen }, schema) + } + }; - let cache_schema = self.last_cache_schema_from_schema(&schema, &key_columns, value_columns); + let last_cache_value_columns_def = LastCacheValueColumnsDef::from(&value_columns); - let series_key = schema - .series_key() - .map(|keys| keys.into_iter().map(|s| s.to_string()).collect()); + let series_key = table_def.series_key.as_deref(); // create the actual last cache: let count = count @@ -288,9 +347,9 @@ impl LastCacheProvider { count, ttl, key_columns.clone(), - cache_schema, + value_columns, + schema, series_key, - accept_new_fields, ); // Check to see if there is already a cache for the same database/table/cache name, and with @@ -301,7 +360,7 @@ impl LastCacheProvider { let mut lock = self.cache_map.write(); if let Some(lc) = lock .get(&db_id) - .and_then(|db| db.get(&table_id)) + .and_then(|db| db.get(&table_def.table_id)) .and_then(|table| table.get(&cache_name)) { return lc.compare_config(&last_cache).map(|_| None); @@ -309,89 +368,69 @@ impl LastCacheProvider { lock.entry(db_id) .or_default() - .entry(table_id) + .entry(table_def.table_id) .or_default() .insert(cache_name.clone(), last_cache); Ok(Some(LastCacheDefinition { - table_id, - table: table_name, + table_id: table_def.table_id, + table: Arc::clone(&table_def.table_name), name: cache_name, - key_columns, + key_columns: key_columns.into_iter().map(|(id, _)| id).collect(), value_columns: last_cache_value_columns_def, count, ttl: ttl.as_secs(), })) } - fn last_cache_schema_from_schema( - &self, - schema: &Schema, - key_columns: &[String], - value_columns: Option>, - ) -> SchemaRef { - let mut schema_builder = ArrowSchemaBuilder::new(); - // Add key columns first: - for (t, field) in schema - .iter() - .filter(|&(_, f)| key_columns.contains(f.name())) - { - if let InfluxColumnType::Tag = t { - // override tags with string type in the schema, because the KeyValue type stores - // them as strings, and produces them as StringArray when creating RecordBatches: - schema_builder.push(ArrowField::new(field.name(), DataType::Utf8, false)) - } else { - schema_builder.push(field.clone()); - }; - } - // Add value columns second: - match value_columns { - Some(cols) => { - for (_, field) in schema - .iter() - .filter(|&(_, f)| cols.contains(f.name()) || f.name() == TIME_COLUMN_NAME) - { - schema_builder.push(field.clone()); - } - } - None => { - for (_, field) in schema - .iter() - .filter(|&(_, f)| !key_columns.contains(f.name())) - { - schema_builder.push(field.clone()); - } - } - } - - Arc::new(schema_builder.finish()) - } - pub fn create_cache_from_definition( &self, db_id: DbId, - schema: &Schema, + table_def: Arc, definition: &LastCacheDefinition, ) { - let value_columns = match &definition.value_columns { - LastCacheValueColumnsDef::AllNonKeyColumns => None, - LastCacheValueColumnsDef::Explicit { columns } => Some(columns.clone()), + let key_columns = definition + .key_columns + .iter() + .map(|id| { + table_def + .column_id_to_name(*id) + .map(|name| (*id, name)) + .expect("a valid column id for key column") + }) + .collect::)>>(); + let (value_columns, schema) = match &definition.value_columns { + LastCacheValueColumnsDef::AllNonKeyColumns => { + let (schema, seen) = last_cache_schema_from_table_def( + Arc::clone(&table_def), + key_columns.iter().map(|(id, _)| *id).collect(), + None, + ); + (ValueColumnType::AcceptNew { seen }, schema) + } + LastCacheValueColumnsDef::Explicit { columns } => { + let (schema, _) = last_cache_schema_from_table_def( + Arc::clone(&table_def), + key_columns.iter().map(|(id, _)| *id).collect(), + Some(columns.as_slice()), + ); + ( + ValueColumnType::Explicit { + columns: columns.to_vec(), + }, + schema, + ) + } }; - let accept_new_fields = value_columns.is_none(); - let series_key = schema - .series_key() - .map(|keys| keys.into_iter().map(|s| s.to_string()).collect()); - - let schema = - self.last_cache_schema_from_schema(schema, &definition.key_columns, value_columns); + let series_key = table_def.series_key.as_deref(); let last_cache = LastCache::new( definition.count, Duration::from_secs(definition.ttl), - definition.key_columns.clone(), + key_columns, + value_columns, schema, series_key, - accept_new_fields, ); let mut lock = self.cache_map.write(); @@ -452,12 +491,20 @@ impl LastCacheProvider { if db_cache.is_empty() { continue; } + let Some(db_schema) = self.catalog.db_schema_by_id(batch.database_id) + else { + continue; + }; for (table_id, table_chunks) in &batch.table_chunks { if let Some(table_cache) = db_cache.get_mut(table_id) { + let Some(table_def) = db_schema.table_definition_by_id(*table_id) + else { + continue; + }; for (_, last_cache) in table_cache.iter_mut() { for chunk in table_chunks.chunk_time_to_chunk.values() { for row in &chunk.rows { - last_cache.push(row); + last_cache.push(row, Arc::clone(&table_def)); } } } @@ -489,6 +536,12 @@ impl LastCacheProvider { cache_name: Option<&str>, predicates: &[Predicate], ) -> Option, ArrowError>> { + let table_def = self + .catalog + .db_schema_by_id(db_id) + .and_then(|db| db.table_definition_by_id(table_id)) + .expect("valid db and table ids to get table definition"); + self.cache_map .read() .get(&db_id) @@ -502,7 +555,7 @@ impl LastCacheProvider { None } }) - .map(|lc| lc.to_record_batches(predicates)) + .map(|lc| lc.to_record_batches(table_def, predicates)) } /// Returns the total number of caches contained in the provider @@ -516,6 +569,60 @@ impl LastCacheProvider { } } +fn last_cache_schema_from_table_def( + table_def: Arc, + key_columns: Vec, + value_columns: Option<&[ColumnId]>, +) -> (ArrowSchemaRef, HashSet) { + let mut seen = HashSet::new(); + let mut schema_builder = ArrowSchemaBuilder::new(); + // Add key columns first, because of how the cache produces records, they should appear first + // in the schema, it is important that they go in order: + for id in &key_columns { + let def = table_def.columns.get(id).expect("valid key column"); + seen.insert(*id); + if let InfluxColumnType::Tag = def.data_type { + // override tags with string type in the schema, because the KeyValue type stores + // them as strings, and produces them as StringArray when creating RecordBatches: + schema_builder.push(ArrowField::new(def.name.as_ref(), DataType::Utf8, false)) + } else { + schema_builder.push(ArrowField::new( + def.name.as_ref(), + DataType::from(&def.data_type), + false, + )); + }; + } + // Add value columns second: + if let Some(value_columns) = value_columns { + for (id, def) in table_def.columns.iter().filter(|(id, def)| { + value_columns.contains(id) || matches!(def.data_type, InfluxColumnType::Timestamp) + }) { + seen.insert(*id); + schema_builder.push(ArrowField::new( + def.name.as_ref(), + DataType::from(&def.data_type), + true, + )); + } + } else { + for (id, def) in table_def + .columns + .iter() + .filter(|(id, _)| !key_columns.contains(id)) + { + seen.insert(*id); + schema_builder.push(ArrowField::new( + def.name.as_ref(), + DataType::from(&def.data_type), + true, + )); + } + } + + (Arc::new(schema_builder.finish()), seen) +} + /// A Last-N-Values Cache /// /// A hierarchical cache whose structure is determined by a set of `key_columns`, each of which @@ -532,41 +639,69 @@ pub(crate) struct LastCache { /// Once values have lived in the cache beyond this [`Duration`], they can be evicted using /// the [`remove_expired`][LastCache::remove_expired] method. pub(crate) ttl: Duration, - /// The key columns for this cache + /// The key columns for this cache, by their IDs /// - /// Uses an [`IndexSet`] for both fast iteration and fast lookup. - pub(crate) key_columns: Arc>, + /// Uses an [`IndexSet`] for both fast iteration and fast lookup and more importantly, this + /// map preserves the order of the elements, thereby maintaining the order of the keys in + /// the cache. + pub(crate) key_column_ids: Arc>, + /// The key columns for this cache, by their names + pub(crate) key_column_name_to_ids: Arc, ColumnId>>, + /// The value columns for this cache + pub(crate) value_columns: ValueColumnType, /// The Arrow Schema for the table that this cache is associated with pub(crate) schema: ArrowSchemaRef, - /// Whether or not this cache accepts newly written fields - pub(crate) accept_new_fields: bool, /// Optionally store the series key for tables that use it for ensuring non-nullability in the /// column buffer for series key columns /// /// We only use this to check for columns that are part of the series key, so we don't care /// about the order, and a HashSet is sufficient. - series_key: Option>, + series_key: Option>, /// The internal state of the cache state: LastCacheState, } +#[derive(Debug, PartialEq, Eq)] +pub(crate) enum ValueColumnType { + AcceptNew { seen: HashSet }, + Explicit { columns: Vec }, +} + +impl From<&ValueColumnType> for LastCacheValueColumnsDef { + fn from(t: &ValueColumnType) -> Self { + match t { + ValueColumnType::AcceptNew { .. } => Self::AllNonKeyColumns, + ValueColumnType::Explicit { columns } => Self::Explicit { + columns: columns.to_vec(), + }, + } + } +} + impl LastCache { /// Create a new [`LastCache`] fn new( count: LastCacheSize, ttl: Duration, - key_columns: Vec, + key_columns: Vec<(ColumnId, Arc)>, + value_columns: ValueColumnType, schema: ArrowSchemaRef, - series_key: Option>, - accept_new_fields: bool, + series_key: Option<&[ColumnId]>, ) -> Self { + let mut key_column_ids = IndexSet::new(); + let mut key_column_name_to_ids = HashMap::new(); + for (id, name) in key_columns { + key_column_ids.insert(id); + key_column_name_to_ids.insert(name, id); + } Self { count, ttl, - key_columns: Arc::new(key_columns.into_iter().collect()), - series_key, - accept_new_fields, + key_column_ids: Arc::new(key_column_ids), + key_column_name_to_ids: Arc::new(key_column_name_to_ids), + value_columns, schema, + series_key: series_key.map(|sk| sk.iter().copied().collect()), state: LastCacheState::Init, } } @@ -581,19 +716,12 @@ impl LastCache { if self.ttl != other.ttl { return Err(Error::cache_already_exists("different ttl specified")); } - if self.key_columns != other.key_columns { + if self.key_column_ids != other.key_column_ids { return Err(Error::cache_already_exists("key columns are not the same")); } - if self.accept_new_fields != other.accept_new_fields { - return Err(Error::cache_already_exists(if self.accept_new_fields { - "new configuration does not accept new fields" - } else { - "new configuration accepts new fields while the existing does not" - })); - } - if !self.schema.contains(&other.schema) { + if self.value_columns != other.value_columns { return Err(Error::cache_already_exists( - "the schema from specified value columns do not align", + "provided value columns are not the same", )); } if self.series_key != other.series_key { @@ -604,6 +732,21 @@ impl LastCache { Ok(()) } + fn arrow_schema(&self) -> ArrowSchemaRef { + Arc::clone(&self.schema) + } + + fn accept_new_fields(&self) -> bool { + matches!(self.value_columns, ValueColumnType::AcceptNew { .. }) + } + + fn should_update_schema_from_row(&self, row: &Row) -> bool { + match &self.value_columns { + ValueColumnType::AcceptNew { seen } => row.fields.iter().any(|f| !seen.contains(&f.id)), + ValueColumnType::Explicit { .. } => false, + } + } + /// Push a [`Row`] from the write buffer into the cache /// /// If a key column is not present in the row, the row will be ignored. @@ -612,21 +755,21 @@ impl LastCache { /// /// This will panic if the internal cache state's keys are out-of-order with respect to the /// order of the `key_columns` on this [`LastCache`] - pub(crate) fn push(&mut self, row: &Row) { - let schema = Arc::clone(&self.schema); + pub(crate) fn push(&mut self, row: &Row, table_def: Arc) { + let accept_new_fields = self.accept_new_fields(); let mut target = &mut self.state; - let mut key_iter = self.key_columns.iter().peekable(); - while let (Some(key), peek) = (key_iter.next(), key_iter.peek()) { + let mut key_iter = self.key_column_ids.iter().peekable(); + while let (Some(col_id), peek) = (key_iter.next(), key_iter.peek()) { if target.is_init() { *target = LastCacheState::Key(LastCacheKey { - column_name: key.to_string(), + column_id: *col_id, value_map: Default::default(), }); } let Some(value) = row .fields .iter() - .find(|f| f.name.as_ref() == *key) + .find(|f| f.id == *col_id) .map(|f| KeyValue::from(&f.value)) else { // ignore the row if it does not contain all key columns @@ -634,22 +777,23 @@ impl LastCache { }; let cache_key = target.as_key_mut().unwrap(); assert_eq!( - &cache_key.column_name, key, + &cache_key.column_id, col_id, "key columns must match cache key order" ); target = cache_key.value_map.entry(value).or_insert_with(|| { - if let Some(next_key) = peek { + if let Some(next_col_id) = peek { LastCacheState::Key(LastCacheKey { - column_name: next_key.to_string(), + column_id: **next_col_id, value_map: Default::default(), }) } else { LastCacheState::Store(LastCacheStore::new( self.count.into(), self.ttl, - Arc::clone(&schema), - Arc::clone(&self.key_columns), + Arc::clone(&table_def), + Arc::clone(&self.key_column_ids), self.series_key.as_ref(), + accept_new_fields, )) } }); @@ -659,44 +803,44 @@ impl LastCache { *target = LastCacheState::Store(LastCacheStore::new( self.count.into(), self.ttl, - Arc::clone(&schema), - Arc::clone(&self.key_columns), + Arc::clone(&table_def), + Arc::clone(&self.key_column_ids), self.series_key.as_ref(), + accept_new_fields, )); } let store = target.as_store_mut().expect( "cache target should be the actual store after iterating through all key columns", ); - let Some(new_columns) = store.push(row, self.accept_new_fields) else { - // Unless new columns were added, and we need to update the schema, we are done. - return; - }; - - let mut sb = ArrowSchemaBuilder::new(); - for f in self.schema.fields().iter() { - sb.push(Arc::clone(f)); + store.push(row); + if self.should_update_schema_from_row(row) { + let (schema, seen) = last_cache_schema_from_table_def( + table_def, + self.key_column_ids.iter().copied().collect(), + None, + ); + self.schema = schema; + self.value_columns = ValueColumnType::AcceptNew { seen }; } - for (name, data_type) in new_columns { - let field = Arc::new(ArrowField::new(name, data_type, true)); - sb.try_merge(&field).expect("buffer should have validated incoming writes to prevent against data type conflicts"); - } - let new_schema = Arc::new(sb.finish()); - self.schema = new_schema; } /// Produce a set of [`RecordBatch`]es from the cache, using the given set of [`Predicate`]s - fn to_record_batches(&self, predicates: &[Predicate]) -> Result, ArrowError> { + fn to_record_batches( + &self, + table_def: Arc, + predicates: &[Predicate], + ) -> Result, ArrowError> { // map the provided predicates on to the key columns // there may not be predicates provided for each key column, hence the Option let predicates: Vec> = self - .key_columns + .key_column_ids .iter() - .map(|key| predicates.iter().find(|p| p.key == *key)) + .map(|col_id| predicates.iter().find(|p| p.column_id == *col_id)) .collect(); let mut caches = vec![ExtendedLastCacheState { state: &self.state, - additional_columns: vec![], + key_column_values: vec![], }]; for predicate in predicates { @@ -711,20 +855,20 @@ impl LastCache { if let Some(pred) = predicate { let next_states = cache_key.evaluate_predicate(pred); new_caches.extend(next_states.into_iter().map(|(state, value)| { - let mut additional_columns = c.additional_columns.clone(); - additional_columns.push((&cache_key.column_name, value)); + let mut additional_columns = c.key_column_values.clone(); + additional_columns.push(value); ExtendedLastCacheState { state, - additional_columns, + key_column_values: additional_columns, } })); } else { new_caches.extend(cache_key.value_map.iter().map(|(v, state)| { - let mut additional_columns = c.additional_columns.clone(); - additional_columns.push((&cache_key.column_name, v)); + let mut additional_columns = c.key_column_values.clone(); + additional_columns.push(v); ExtendedLastCacheState { state, - additional_columns, + key_column_values: additional_columns, } })); } @@ -734,7 +878,7 @@ impl LastCache { caches .into_iter() - .map(|c| c.to_record_batch(&self.schema)) + .map(|c| c.to_record_batch(Arc::clone(&table_def), Arc::clone(&self.schema))) .collect() } @@ -748,11 +892,8 @@ impl LastCache { .filter_map(|expr| { match expr { Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - let key = if let Expr::Column(c) = left.as_ref() { - if !self.key_columns.contains(c.name()) { - return None; - } - c.name.to_string() + let col_id = if let Expr::Column(c) = left.as_ref() { + self.key_column_name_to_ids.get(c.name()).copied()? } else { return None; }; @@ -767,8 +908,8 @@ impl LastCache { _ => return None, }; match op { - Operator::Eq => Some(Predicate::new_eq(key, value)), - Operator::NotEq => Some(Predicate::new_not_eq(key, value)), + Operator::Eq => Some(Predicate::new_eq(col_id, value)), + Operator::NotEq => Some(Predicate::new_not_eq(col_id, value)), _ => None, } } @@ -777,11 +918,8 @@ impl LastCache { list, negated, }) => { - let key = if let Expr::Column(c) = expr.as_ref() { - if !self.key_columns.contains(c.name()) { - return None; - } - c.name.to_string() + let col_id = if let Expr::Column(c) = expr.as_ref() { + self.key_column_name_to_ids.get(c.name()).copied()? } else { return None; }; @@ -805,9 +943,9 @@ impl LastCache { }) .collect(); if *negated { - Some(Predicate::new_not_in(key, values)) + Some(Predicate::new_not_in(col_id, values)) } else { - Some(Predicate::new_in(key, values)) + Some(Predicate::new_in(col_id, values)) } } _ => None, @@ -825,26 +963,19 @@ impl LastCache { fn to_definition( &self, table_id: TableId, - table: impl Into, - name: impl Into, + table: impl Into>, + name: impl Into>, ) -> LastCacheDefinition { LastCacheDefinition { table_id, table: table.into(), name: name.into(), - key_columns: self.key_columns.iter().cloned().collect(), - value_columns: if self.accept_new_fields { - LastCacheValueColumnsDef::AllNonKeyColumns - } else { - LastCacheValueColumnsDef::Explicit { - columns: self - .schema - .fields() - .iter() - .filter(|f| !self.key_columns.contains(f.name())) - .map(|f| f.name().to_owned()) - .collect(), - } + key_columns: self.key_column_ids.iter().copied().collect(), + value_columns: match &self.value_columns { + ValueColumnType::AcceptNew { .. } => LastCacheValueColumnsDef::AllNonKeyColumns, + ValueColumnType::Explicit { columns, .. } => LastCacheValueColumnsDef::Explicit { + columns: columns.to_vec(), + }, }, count: self.count, ttl: self.ttl.as_secs(), @@ -859,7 +990,7 @@ impl LastCache { #[derive(Debug)] struct ExtendedLastCacheState<'a> { state: &'a LastCacheState, - additional_columns: Vec<(&'a String, &'a KeyValue)>, + key_column_values: Vec<&'a KeyValue>, } impl<'a> ExtendedLastCacheState<'a> { @@ -870,94 +1001,94 @@ impl<'a> ExtendedLastCacheState<'a> { /// /// # Panics /// - /// This assumes taht the `state` is a [`LastCacheStore`] and will panic otherwise. - fn to_record_batch(&self, output_schema: &ArrowSchemaRef) -> Result { + /// This assumes that the `state` is a [`LastCacheStore`] and will panic otherwise. + fn to_record_batch( + &self, + table_def: Arc, + schema: ArrowSchemaRef, + ) -> Result { let store = self .state .as_store() .expect("should only be calling to_record_batch when using a store"); let n = store.len(); - let extended: Option<(Vec, Vec)> = if self.additional_columns.is_empty() - { + let extended: Option> = if self.key_column_values.is_empty() { None } else { Some( - self.additional_columns + self.key_column_values .iter() - .map(|(name, value)| { - let field = Arc::new(value.as_arrow_field(*name)); - match value { - KeyValue::String(v) => { - let mut builder = StringBuilder::new(); - for _ in 0..n { - builder.append_value(v); - } - (field, Arc::new(builder.finish()) as ArrayRef) + .map(|value| match value { + KeyValue::String(v) => { + let mut builder = StringBuilder::new(); + for _ in 0..n { + builder.append_value(v); } - KeyValue::Int(v) => { - let mut builder = Int64Builder::new(); - for _ in 0..n { - builder.append_value(*v); - } - (field, Arc::new(builder.finish()) as ArrayRef) + Arc::new(builder.finish()) as ArrayRef + } + KeyValue::Int(v) => { + let mut builder = Int64Builder::new(); + for _ in 0..n { + builder.append_value(*v); } - KeyValue::UInt(v) => { - let mut builder = UInt64Builder::new(); - for _ in 0..n { - builder.append_value(*v); - } - (field, Arc::new(builder.finish()) as ArrayRef) + Arc::new(builder.finish()) as ArrayRef + } + KeyValue::UInt(v) => { + let mut builder = UInt64Builder::new(); + for _ in 0..n { + builder.append_value(*v); } - KeyValue::Bool(v) => { - let mut builder = BooleanBuilder::new(); - for _ in 0..n { - builder.append_value(*v); - } - (field, Arc::new(builder.finish()) as ArrayRef) + Arc::new(builder.finish()) as ArrayRef + } + KeyValue::Bool(v) => { + let mut builder = BooleanBuilder::new(); + for _ in 0..n { + builder.append_value(*v); } + Arc::new(builder.finish()) as ArrayRef } }) .collect(), ) }; - store.to_record_batch(output_schema, extended) + store.to_record_batch(table_def, schema, extended) } } /// A predicate used for evaluating key column values in the cache on query #[derive(Debug, Clone)] pub(crate) struct Predicate { - /// The left-hand-side of the predicate - key: String, + /// The left-hand-side of the predicate as a valid `ColumnId` + column_id: ColumnId, /// The right-hand-side of the predicate kind: PredicateKind, } impl Predicate { - fn new_eq(key: impl Into, value: KeyValue) -> Self { + fn new_eq(column_id: ColumnId, value: KeyValue) -> Self { Self { - key: key.into(), + column_id, kind: PredicateKind::Eq(value), } } - fn new_not_eq(key: impl Into, value: KeyValue) -> Self { + fn new_not_eq(column_id: ColumnId, value: KeyValue) -> Self { Self { - key: key.into(), + column_id, kind: PredicateKind::NotEq(value), } } - fn new_in(key: impl Into, values: Vec) -> Self { + fn new_in(column_id: ColumnId, values: Vec) -> Self { Self { - key: key.into(), + column_id, kind: PredicateKind::In(values), } } - fn new_not_in(key: impl Into, values: Vec) -> Self { + fn new_not_in(column_id: ColumnId, values: Vec) -> Self { Self { - key: key.into(), + column_id, kind: PredicateKind::NotIn(values), } } @@ -1028,8 +1159,8 @@ impl LastCacheState { /// Holds a node within a [`LastCache`] for a given key column #[derive(Debug)] struct LastCacheKey { - /// The name of the key column - column_name: String, + /// The column's ID + column_id: ColumnId, /// A map of key column value to nested [`LastCacheState`] /// /// All values should point at either another key or a [`LastCacheStore`] @@ -1048,10 +1179,10 @@ impl LastCacheKey { &'a self, predicate: &'b Predicate, ) -> Vec<(&'a LastCacheState, &'b KeyValue)> { - if predicate.key != self.column_name { + if predicate.column_id != self.column_id { panic!( - "attempted to evaluate unexpected predicate with key {} for column named {}", - predicate.key, self.column_name + "attempted to evaluate unexpected predicate with key {} for column with id {}", + predicate.column_id, self.column_id ); } match &predicate.kind { @@ -1105,18 +1236,6 @@ impl KeyValue { } } -impl KeyValue { - /// Get the corresponding arrow field definition for this [`KeyValue`] - fn as_arrow_field(&self, name: impl Into) -> ArrowField { - match self { - KeyValue::String(_) => ArrowField::new(name, DataType::Utf8, false), - KeyValue::Int(_) => ArrowField::new(name, DataType::Int64, false), - KeyValue::UInt(_) => ArrowField::new(name, DataType::UInt64, false), - KeyValue::Bool(_) => ArrowField::new(name, DataType::Boolean, false), - } - } -} - impl From<&FieldData> for KeyValue { fn from(field: &FieldData) -> Self { match field { @@ -1142,9 +1261,12 @@ struct LastCacheStore { /// as fast lookup (see [here][perf]). /// /// [perf]: https://github.com/indexmap-rs/indexmap?tab=readme-ov-file#performance - cache: IndexMap, - /// A reference to the set of key columns for the cache - key_columns: Arc>, + cache: IndexMap, + /// A reference to the key column id lookup for the cache. This is within an `Arc` because it is + /// shared with the parent `LastCache`. + key_column_ids: Arc>, + /// Whether or not this store accepts new fields when they are added to the cached table + accept_new_fields: bool, /// A ring buffer holding the instants at which entries in the cache were inserted /// /// This is used to evict cache values that outlive the `ttl` @@ -1164,32 +1286,34 @@ impl LastCacheStore { fn new( count: usize, ttl: Duration, - schema: ArrowSchemaRef, - key_columns: Arc>, - series_keys: Option<&HashSet>, + table_def: Arc, + key_column_ids: Arc>, + series_keys: Option<&HashSet>, + accept_new_fields: bool, ) -> Self { - let cache = schema - .fields() + let cache = table_def + .columns .iter() - .filter(|f| !key_columns.contains(f.name())) - .map(|f| { + .filter(|&(col_id, _)| (!key_column_ids.contains(col_id))) + .map(|(col_id, col_def)| { ( - f.name().to_string(), + *col_id, CacheColumn::new( - f.data_type(), + col_def.data_type, count, - series_keys.is_some_and(|sk| sk.contains(f.name().as_str())), + series_keys.is_some_and(|sk| sk.contains(col_id)), ), ) }) .collect(); Self { cache, - key_columns, + key_column_ids, instants: VecDeque::with_capacity(count), count, ttl, last_time: Time::from_timestamp_nanos(0), + accept_new_fields, } } @@ -1207,58 +1331,47 @@ impl LastCacheStore { /// /// If new fields were added to the [`LastCacheStore`] by this push, the return will be a /// list of those field's name and arrow [`DataType`], and `None` otherwise. - fn push<'a>( - &mut self, - row: &'a Row, - accept_new_fields: bool, - ) -> Option> { + fn push(&mut self, row: &Row) { if row.time <= self.last_time.timestamp_nanos() { - return None; + return; } - let mut result = None; - let mut seen = HashSet::<&str>::new(); - if accept_new_fields { + let mut seen = HashSet::::new(); + if self.accept_new_fields { // Check the length before any rows are added to ensure that the correct amount // of nulls are back-filled when new fields/columns are added: let starting_cache_size = self.len(); for field in row.fields.iter() { - seen.insert(field.name.as_ref()); - if let Some(col) = self.cache.get_mut(field.name.as_ref()) { + seen.insert(field.id); + if let Some(col) = self.cache.get_mut(&field.id) { // In this case, the field already has an entry in the cache, so just push: col.push(&field.value); - } else if !self.key_columns.contains(field.name.as_ref()) { + } else if !self.key_column_ids.contains(&field.id) { // In this case, there is not an entry for the field in the cache, so if the // value is not one of the key columns, then it is a new field being added. - let data_type = data_type_from_buffer_field(field); - let col = self - .cache - .entry(field.name.to_string()) - .or_insert_with(|| CacheColumn::new(&data_type, self.count, false)); + let col = self.cache.entry(field.id).or_insert_with(|| { + CacheColumn::new(data_type_from_buffer_field(field), self.count, false) + }); // Back-fill the new cache entry with nulls, then push the new value: for _ in 0..starting_cache_size { col.push_null(); } col.push(&field.value); - // Add the new field to the list of new columns returned: - result - .get_or_insert_with(Vec::new) - .push((field.name.as_ref(), data_type)); } // There is no else block, because the only alternative would be that this is a // key column, which we ignore. } } else { for field in row.fields.iter() { - seen.insert(field.name.as_ref()); - if let Some(c) = self.cache.get_mut(field.name.as_ref()) { + seen.insert(field.id); + if let Some(c) = self.cache.get_mut(&field.id) { c.push(&field.value); } } } // Need to check for columns not seen in the buffered row data, to push nulls into // those respective cache entries. - for (name, column) in self.cache.iter_mut() { - if !seen.contains(name.as_str()) { + for (id, column) in self.cache.iter_mut() { + if !seen.contains(id) { column.push_null(); } } @@ -1267,7 +1380,6 @@ impl LastCacheStore { } self.instants.push_front(Instant::now()); self.last_time = Time::from_timestamp_nanos(row.time); - result } /// Convert the contents of this cache into a arrow [`RecordBatch`] @@ -1278,35 +1390,32 @@ impl LastCacheStore { /// for the cache. fn to_record_batch( &self, - output_schema: &ArrowSchemaRef, - extended: Option<(Vec, Vec)>, + table_def: Arc, + schema: ArrowSchemaRef, + extended: Option>, ) -> Result { - let (fields, mut arrays): (Vec, Vec) = output_schema - .fields() - .iter() - .cloned() - .filter_map(|f| { - if let Some(c) = self.cache.get(f.name()) { - Some((f, c.data.as_array())) - } else if self.key_columns.contains(f.name()) { - // We prepend key columns with the extended set provided - None - } else { - Some((Arc::clone(&f), new_null_array(f.data_type(), self.len()))) + let mut arrays = extended.unwrap_or_default(); + if self.accept_new_fields { + for field in schema.fields().iter() { + let id = table_def + .column_name_to_id(field.name().as_str()) + .ok_or_else(|| { + ArrowError::from_external_error(Box::new(Error::ColumnDoesNotExistByName { + column_name: field.name().to_string(), + })) + })?; + if self.key_column_ids.contains(&id) { + continue; } - }) - .collect(); - - let mut sb = ArrowSchemaBuilder::new(); - if let Some((ext_fields, mut ext_arrays)) = extended { - // If there are key column values being outputted, they get prepended to appear before - // all value columns in the query output: - sb.extend(ext_fields); - ext_arrays.append(&mut arrays); - arrays = ext_arrays; + arrays.push(self.cache.get(&id).map_or_else( + || new_null_array(field.data_type(), self.len()), + |c| c.data.as_array(), + )); + } + } else { + arrays.extend(self.cache.iter().map(|(_, col)| col.data.as_array())); } - sb.extend(fields); - RecordBatch::try_new(Arc::new(sb.finish()), arrays) + RecordBatch::try_new(schema, arrays) } /// Remove expired values from the [`LastCacheStore`] @@ -1345,7 +1454,7 @@ struct CacheColumn { impl CacheColumn { /// Create a new [`CacheColumn`] for the given arrow [`DataType`] and size - fn new(data_type: &DataType, size: usize, is_series_key: bool) -> Self { + fn new(data_type: InfluxColumnType, size: usize, is_series_key: bool) -> Self { Self { size, data: CacheColumnData::new(data_type, size, is_series_key), @@ -1388,24 +1497,23 @@ enum CacheColumnData { impl CacheColumnData { /// Create a new [`CacheColumnData`] - fn new(data_type: &DataType, size: usize, is_series_key: bool) -> Self { + fn new(data_type: InfluxColumnType, size: usize, is_series_key: bool) -> Self { match data_type { - DataType::Boolean => Self::Bool(VecDeque::with_capacity(size)), - DataType::Int64 => Self::I64(VecDeque::with_capacity(size)), - DataType::UInt64 => Self::U64(VecDeque::with_capacity(size)), - DataType::Float64 => Self::F64(VecDeque::with_capacity(size)), - DataType::Timestamp(TimeUnit::Nanosecond, _) => { - Self::Time(VecDeque::with_capacity(size)) - } - DataType::Utf8 => Self::String(VecDeque::with_capacity(size)), - DataType::Dictionary(k, v) if **k == DataType::Int32 && **v == DataType::Utf8 => { + InfluxColumnType::Tag => { if is_series_key { Self::Key(VecDeque::with_capacity(size)) } else { Self::Tag(VecDeque::with_capacity(size)) } } - _ => panic!("unsupported data type for last cache: {data_type}"), + InfluxColumnType::Field(field) => match field { + InfluxFieldType::Float => Self::F64(VecDeque::with_capacity(size)), + InfluxFieldType::Integer => Self::I64(VecDeque::with_capacity(size)), + InfluxFieldType::UInteger => Self::U64(VecDeque::with_capacity(size)), + InfluxFieldType::String => Self::String(VecDeque::with_capacity(size)), + InfluxFieldType::Boolean => Self::Bool(VecDeque::with_capacity(size)), + }, + InfluxColumnType::Timestamp => Self::Time(VecDeque::with_capacity(size)), } } @@ -1567,20 +1675,15 @@ impl CacheColumnData { } } -fn data_type_from_buffer_field(field: &Field) -> DataType { +fn data_type_from_buffer_field(field: &Field) -> InfluxColumnType { match field.value { - FieldData::Timestamp(_) => DataType::Timestamp(TimeUnit::Nanosecond, None), - FieldData::Key(_) => { - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)) - } - FieldData::Tag(_) => { - DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)) - } - FieldData::String(_) => DataType::Utf8, - FieldData::Integer(_) => DataType::Int64, - FieldData::UInteger(_) => DataType::UInt64, - FieldData::Float(_) => DataType::Float64, - FieldData::Boolean(_) => DataType::Boolean, + FieldData::Timestamp(_) => InfluxColumnType::Timestamp, + FieldData::Key(_) | FieldData::Tag(_) => InfluxColumnType::Tag, + FieldData::String(_) => InfluxColumnType::Field(InfluxFieldType::String), + FieldData::Integer(_) => InfluxColumnType::Field(InfluxFieldType::Integer), + FieldData::UInteger(_) => InfluxColumnType::Field(InfluxFieldType::UInteger), + FieldData::Float(_) => InfluxColumnType::Field(InfluxFieldType::Float), + FieldData::Boolean(_) => InfluxColumnType::Field(InfluxFieldType::Boolean), } } @@ -1600,7 +1703,7 @@ mod tests { use bimap::BiHashMap; use data_types::NamespaceName; use influxdb3_catalog::catalog::{Catalog, DatabaseSchema, TableDefinition}; - use influxdb3_id::{DbId, SerdeVecHashMap, TableId}; + use influxdb3_id::{ColumnId, DbId, SerdeVecMap, TableId}; use influxdb3_wal::{LastCacheDefinition, WalConfig}; use insta::assert_json_snapshot; use iox_time::{MockProvider, Time, TimeProvider}; @@ -1628,12 +1731,10 @@ mod tests { .unwrap() } - #[tokio::test] + #[test_log::test(tokio::test)] async fn pick_up_latest_write() { let db_name = "foo"; - let db_id = DbId::from(0); let tbl_name = "cpu"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; @@ -1648,6 +1749,10 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id("foo").unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id("cpu").unwrap(); + let col_id = table_def.column_name_to_id("host").unwrap(); + // Create the last cache: wbuf.create_last_cache( db_id, @@ -1655,7 +1760,7 @@ mod tests { Some("cache"), None, None, - Some(vec!["host".to_string()]), + Some(vec![(col_id, "host".into())]), None, ) .await @@ -1672,7 +1777,7 @@ mod tests { .await .unwrap(); - let predicates = &[Predicate::new_eq("host", KeyValue::string("a"))]; + let predicates = &[Predicate::new_eq(col_id, KeyValue::string("a"))]; // Check what is in the last cache: let batch = wbuf @@ -1736,12 +1841,10 @@ mod tests { /// /// We expect that the query result will include a `host` column, to delineate rows associated /// with different host values in the cache. - #[tokio::test] + #[test_log::test(tokio::test)] async fn cache_key_column_predicates() { let db_name = "foo"; - let db_id = DbId::from(0); let tbl_name = "cpu"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do one write to update the catalog with a db and table: @@ -1755,6 +1858,11 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id("foo").unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id("cpu").unwrap(); + let host_col_id = table_def.column_name_to_id("host").unwrap(); + let region_col_id = table_def.column_name_to_id("region").unwrap(); + // Create the last cache with keys on all tag columns: wbuf.create_last_cache( db_id, @@ -1762,7 +1870,10 @@ mod tests { Some("cache"), None, None, - Some(vec!["region".to_string(), "host".to_string()]), + Some(vec![ + (region_col_id, "region".into()), + (host_col_id, "host".into()), + ]), None, ) .await @@ -1798,8 +1909,8 @@ mod tests { // Predicate including both key columns only produces value columns from the cache TestCase { predicates: &[ - Predicate::new_eq("region", KeyValue::string("us")), - Predicate::new_eq("host", KeyValue::string("c")), + Predicate::new_eq(region_col_id, KeyValue::string("us")), + Predicate::new_eq(host_col_id, KeyValue::string("c")), ], expected: &[ "+--------+------+-----------------------------+-------+", @@ -1812,7 +1923,7 @@ mod tests { // Predicate on only region key column will have host column outputted in addition to // the value columns: TestCase { - predicates: &[Predicate::new_eq("region", KeyValue::string("us"))], + predicates: &[Predicate::new_eq(region_col_id, KeyValue::string("us"))], expected: &[ "+--------+------+-----------------------------+-------+", "| region | host | time | usage |", @@ -1825,7 +1936,7 @@ mod tests { }, // Similar to previous, with a different region predicate: TestCase { - predicates: &[Predicate::new_eq("region", KeyValue::string("ca"))], + predicates: &[Predicate::new_eq(region_col_id, KeyValue::string("ca"))], expected: &[ "+--------+------+-----------------------------+-------+", "| region | host | time | usage |", @@ -1839,7 +1950,7 @@ mod tests { // Predicate on only host key column will have region column outputted in addition to // the value columns: TestCase { - predicates: &[Predicate::new_eq("host", KeyValue::string("a"))], + predicates: &[Predicate::new_eq(host_col_id, KeyValue::string("a"))], expected: &[ "+--------+------+-----------------------------+-------+", "| region | host | time | usage |", @@ -1868,7 +1979,10 @@ mod tests { // Using a non-existent key column as a predicate has no effect: // TODO: should this be an error? TestCase { - predicates: &[Predicate::new_eq("container_id", KeyValue::string("12345"))], + predicates: &[Predicate::new_eq( + ColumnId::new(), + KeyValue::string("12345"), + )], expected: &[ "+--------+------+-----------------------------+-------+", "| region | host | time | usage |", @@ -1884,31 +1998,31 @@ mod tests { }, // Using a non existent key column value yields empty result set: TestCase { - predicates: &[Predicate::new_eq("region", KeyValue::string("eu"))], + predicates: &[Predicate::new_eq(region_col_id, KeyValue::string("eu"))], expected: &["++", "++"], }, // Using an invalid combination of key column values yields an empty result set: TestCase { predicates: &[ - Predicate::new_eq("region", KeyValue::string("ca")), - Predicate::new_eq("host", KeyValue::string("a")), + Predicate::new_eq(region_col_id, KeyValue::string("ca")), + Predicate::new_eq(host_col_id, KeyValue::string("a")), ], expected: &["++", "++"], }, // Using a non-existent key column value (for host column) also yields empty result set: TestCase { - predicates: &[Predicate::new_eq("host", KeyValue::string("g"))], + predicates: &[Predicate::new_eq(host_col_id, KeyValue::string("g"))], expected: &["++", "++"], }, // Using an incorrect type for a key column value in predicate also yields empty result // set. TODO: should this be an error? TestCase { - predicates: &[Predicate::new_eq("host", KeyValue::Bool(true))], + predicates: &[Predicate::new_eq(host_col_id, KeyValue::Bool(true))], expected: &["++", "++"], }, // Using a != predicate TestCase { - predicates: &[Predicate::new_not_eq("region", KeyValue::string("us"))], + predicates: &[Predicate::new_not_eq(region_col_id, KeyValue::string("us"))], expected: &[ "+--------+------+-----------------------------+-------+", "| region | host | time | usage |", @@ -1922,7 +2036,7 @@ mod tests { // Using an IN predicate: TestCase { predicates: &[Predicate::new_in( - "host", + host_col_id, vec![KeyValue::string("a"), KeyValue::string("b")], )], expected: &[ @@ -1937,7 +2051,7 @@ mod tests { // Using a NOT IN predicate: TestCase { predicates: &[Predicate::new_not_in( - "host", + host_col_id, vec![KeyValue::string("a"), KeyValue::string("b")], )], expected: &[ @@ -1967,9 +2081,7 @@ mod tests { #[tokio::test] async fn non_default_cache_size() { let db_name = "foo"; - let db_id = DbId::from(0); let tbl_name = "cpu"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do one write to update the catalog with a db and table: @@ -1983,6 +2095,11 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id("foo").unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id("cpu").unwrap(); + let host_col_id = table_def.column_name_to_id("host").unwrap(); + let region_col_id = table_def.column_name_to_id("region").unwrap(); + // Create the last cache with keys on all tag columns: wbuf.create_last_cache( db_id, @@ -1990,7 +2107,10 @@ mod tests { Some("cache"), Some(10), None, - Some(vec!["region".to_string(), "host".to_string()]), + Some(vec![ + (region_col_id, "region".into()), + (host_col_id, "host".into()), + ]), None, ) .await @@ -2053,8 +2173,8 @@ mod tests { let test_cases = [ TestCase { predicates: &[ - Predicate::new_eq("region", KeyValue::string("us")), - Predicate::new_eq("host", KeyValue::string("a")), + Predicate::new_eq(region_col_id, KeyValue::string("us")), + Predicate::new_eq(host_col_id, KeyValue::string("a")), ], expected: &[ "+--------+------+--------------------------------+-------+", @@ -2068,7 +2188,7 @@ mod tests { ], }, TestCase { - predicates: &[Predicate::new_eq("region", KeyValue::string("us"))], + predicates: &[Predicate::new_eq(region_col_id, KeyValue::string("us"))], expected: &[ "+--------+------+--------------------------------+-------+", "| region | host | time | usage |", @@ -2085,7 +2205,7 @@ mod tests { ], }, TestCase { - predicates: &[Predicate::new_eq("host", KeyValue::string("a"))], + predicates: &[Predicate::new_eq(host_col_id, KeyValue::string("a"))], expected: &[ "+--------+------+--------------------------------+-------+", "| region | host | time | usage |", @@ -2098,7 +2218,7 @@ mod tests { ], }, TestCase { - predicates: &[Predicate::new_eq("host", KeyValue::string("b"))], + predicates: &[Predicate::new_eq(host_col_id, KeyValue::string("b"))], expected: &[ "+--------+------+--------------------------------+-------+", "| region | host | time | usage |", @@ -2143,9 +2263,7 @@ mod tests { #[tokio::test] async fn cache_ttl() { let db_name = "foo"; - let db_id = DbId::from(0); let tbl_name = "cpu"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do one write to update the catalog with a db and table: @@ -2159,6 +2277,11 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id("foo").unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id("cpu").unwrap(); + let host_col_id = table_def.column_name_to_id("host").unwrap(); + let region_col_id = table_def.column_name_to_id("region").unwrap(); + // Create the last cache with keys on all tag columns: wbuf.create_last_cache( db_id, @@ -2167,7 +2290,10 @@ mod tests { // use a cache size greater than 1 to ensure the TTL is doing the evicting Some(10), Some(Duration::from_millis(50)), - Some(vec!["region".to_string(), "host".to_string()]), + Some(vec![ + (region_col_id, "region".into()), + (host_col_id, "host".into()), + ]), None, ) .await @@ -2196,8 +2322,8 @@ mod tests { // Check the cache for values: let predicates = &[ - Predicate::new_eq("region", KeyValue::string("us")), - Predicate::new_eq("host", KeyValue::string("a")), + Predicate::new_eq(region_col_id, KeyValue::string("us")), + Predicate::new_eq(host_col_id, KeyValue::string("a")), ]; // Check what is in the last cache: @@ -2266,7 +2392,7 @@ mod tests { .unwrap(); // Check the cache for values: - let predicates = &[Predicate::new_eq("host", KeyValue::string("a"))]; + let predicates = &[Predicate::new_eq(host_col_id, KeyValue::string("a"))]; // Check what is in the last cache: let batches = wbuf @@ -2290,9 +2416,7 @@ mod tests { #[tokio::test] async fn fields_as_key_columns() { let db_name = "cassini_mission"; - let db_id = DbId::from(0); let tbl_name = "temp"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do one write to update the catalog with a db and table: @@ -2309,6 +2433,13 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id("cassini_mission").unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id("temp").unwrap(); + let component_id_col_id = table_def.column_name_to_id("component_id").unwrap(); + let active_col_id = table_def.column_name_to_id("active").unwrap(); + let type_col_id = table_def.column_name_to_id("type").unwrap(); + let loc_col_id = table_def.column_name_to_id("loc").unwrap(); + // Create the last cache with keys on some field columns: wbuf.create_last_cache( db_id, @@ -2317,10 +2448,10 @@ mod tests { None, Some(Duration::from_millis(50)), Some(vec![ - "component_id".to_string(), - "active".to_string(), - "type".to_string(), - "loc".to_string(), + (component_id_col_id, "component_id".into()), + (active_col_id, "active".into()), + (type_col_id, "type".into()), + (loc_col_id, "loc".into()), ]), None, ) @@ -2372,7 +2503,7 @@ mod tests { }, // Predicates on tag key column work as expected: TestCase { - predicates: &[Predicate::new_eq("component_id", KeyValue::string("333"))], + predicates: &[Predicate::new_eq(component_id_col_id, KeyValue::string("333"))], expected: &[ "+--------------+--------+--------+------+---------+-----------------------------+", "| component_id | active | type | loc | reading | time |", @@ -2383,7 +2514,7 @@ mod tests { }, // Predicate on a non-string field key: TestCase { - predicates: &[Predicate::new_eq("active", KeyValue::Bool(false))], + predicates: &[Predicate::new_eq(active_col_id, KeyValue::Bool(false))], expected: &[ "+--------------+--------+-------------+---------+---------+-----------------------------+", "| component_id | active | type | loc | reading | time |", @@ -2395,7 +2526,7 @@ mod tests { }, // Predicate on a string field key: TestCase { - predicates: &[Predicate::new_eq("type", KeyValue::string("camera"))], + predicates: &[Predicate::new_eq(type_col_id, KeyValue::string("camera"))], expected: &[ "+--------------+--------+--------+-----------+---------+-----------------------------+", "| component_id | active | type | loc | reading | time |", @@ -2422,9 +2553,7 @@ mod tests { #[tokio::test] async fn series_key_as_default() { let db_name = "windmills"; - let db_id = DbId::from(0); let tbl_name = "wind_speed"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do one write to update the catalog with a db and table: @@ -2438,6 +2567,12 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id(db_name).unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id(tbl_name).unwrap(); + let state_col_id = table_def.column_name_to_id("state").unwrap(); + let county_col_id = table_def.column_name_to_id("county").unwrap(); + let farm_col_id = table_def.column_name_to_id("farm").unwrap(); + // Create the last cache with keys on some field columns: wbuf.create_last_cache(db_id, tbl_id, Some("cache"), None, None, None, None) .await @@ -2488,7 +2623,7 @@ mod tests { }, // Predicate on state column, which is part of the series key: TestCase { - predicates: &[Predicate::new_eq("state", KeyValue::string("ca"))], + predicates: &[Predicate::new_eq(state_col_id, KeyValue::string("ca"))], expected: &[ "+-------+--------+-------+-------+-----------------------------+", "| state | county | farm | speed | time |", @@ -2504,7 +2639,7 @@ mod tests { }, // Predicate on county column, which is part of the series key: TestCase { - predicates: &[Predicate::new_eq("county", KeyValue::string("napa"))], + predicates: &[Predicate::new_eq(county_col_id, KeyValue::string("napa"))], expected: &[ "+-------+--------+-------+-------+-----------------------------+", "| state | county | farm | speed | time |", @@ -2516,7 +2651,7 @@ mod tests { }, // Predicate on farm column, which is part of the series key: TestCase { - predicates: &[Predicate::new_eq("farm", KeyValue::string("30-01"))], + predicates: &[Predicate::new_eq(farm_col_id, KeyValue::string("30-01"))], expected: &[ "+-------+--------+-------+-------+-----------------------------+", "| state | county | farm | speed | time |", @@ -2528,9 +2663,9 @@ mod tests { // Predicate on all series key columns: TestCase { predicates: &[ - Predicate::new_eq("state", KeyValue::string("ca")), - Predicate::new_eq("county", KeyValue::string("nevada")), - Predicate::new_eq("farm", KeyValue::string("40-01")), + Predicate::new_eq(state_col_id, KeyValue::string("ca")), + Predicate::new_eq(county_col_id, KeyValue::string("nevada")), + Predicate::new_eq(farm_col_id, KeyValue::string("40-01")), ], expected: &[ "+-------+--------+-------+-------+-----------------------------+", @@ -2556,9 +2691,7 @@ mod tests { #[tokio::test] async fn tag_set_as_default() { let db_name = "windmills"; - let db_id = DbId::from(0); let tbl_name = "wind_speed"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do one write to update the catalog with a db and table: @@ -2572,6 +2705,12 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id(db_name).unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id(tbl_name).unwrap(); + let state_col_id = table_def.column_name_to_id("state").unwrap(); + let county_col_id = table_def.column_name_to_id("county").unwrap(); + let farm_col_id = table_def.column_name_to_id("farm").unwrap(); + // Create the last cache with keys on some field columns: wbuf.create_last_cache(db_id, tbl_id, Some("cache"), None, None, None, None) .await @@ -2622,7 +2761,7 @@ mod tests { }, // Predicate on state column, which is part of the series key: TestCase { - predicates: &[Predicate::new_eq("state", KeyValue::string("ca"))], + predicates: &[Predicate::new_eq(state_col_id, KeyValue::string("ca"))], expected: &[ "+--------+-------+-------+-------+-----------------------------+", "| county | farm | state | speed | time |", @@ -2638,7 +2777,7 @@ mod tests { }, // Predicate on county column, which is part of the series key: TestCase { - predicates: &[Predicate::new_eq("county", KeyValue::string("napa"))], + predicates: &[Predicate::new_eq(county_col_id, KeyValue::string("napa"))], expected: &[ "+--------+-------+-------+-------+-----------------------------+", "| county | farm | state | speed | time |", @@ -2650,7 +2789,7 @@ mod tests { }, // Predicate on farm column, which is part of the series key: TestCase { - predicates: &[Predicate::new_eq("farm", KeyValue::string("30-01"))], + predicates: &[Predicate::new_eq(farm_col_id, KeyValue::string("30-01"))], expected: &[ "+--------+-------+-------+-------+-----------------------------+", "| county | farm | state | speed | time |", @@ -2662,9 +2801,9 @@ mod tests { // Predicate on all series key columns: TestCase { predicates: &[ - Predicate::new_eq("state", KeyValue::string("ca")), - Predicate::new_eq("county", KeyValue::string("nevada")), - Predicate::new_eq("farm", KeyValue::string("40-01")), + Predicate::new_eq(state_col_id, KeyValue::string("ca")), + Predicate::new_eq(county_col_id, KeyValue::string("nevada")), + Predicate::new_eq(farm_col_id, KeyValue::string("40-01")), ], expected: &[ "+--------+-------+-------+-------+-----------------------------+", @@ -2690,9 +2829,7 @@ mod tests { #[tokio::test] async fn null_values() { let db_name = "weather"; - let db_id = DbId::from(0); let tbl_name = "temp"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do a write to update catalog @@ -2707,6 +2844,9 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id(db_name).unwrap(); + let tbl_id = db_schema.table_name_to_id(tbl_name).unwrap(); + // Create the last cache using default tags as keys wbuf.create_last_cache(db_id, tbl_id, None, Some(10), None, None, None) .await @@ -2756,12 +2896,10 @@ mod tests { ); } - #[tokio::test] + #[test_log::test(tokio::test)] async fn new_fields_added_to_default_cache() { let db_name = "nhl_stats"; - let db_id = DbId::from(0); let tbl_name = "plays"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do a write to setup catalog: @@ -2775,6 +2913,10 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id(db_name).unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id(tbl_name).unwrap(); + let game_id_col_id = table_def.column_name_to_id("game_id").unwrap(); + // Create the last cache using default tags as keys wbuf.create_last_cache(db_id, tbl_id, None, Some(10), None, None, None) .await @@ -2808,7 +2950,7 @@ mod tests { let test_cases = [ // Cache that has values in the zone columns should produce them: TestCase { - predicates: &[Predicate::new_eq("game_id", KeyValue::string("4"))], + predicates: &[Predicate::new_eq(game_id_col_id, KeyValue::string("4"))], expected: &[ "+---------+-----------+-----------------------------+------+------+", "| game_id | player | time | type | zone |", @@ -2819,7 +2961,7 @@ mod tests { }, // Cache that does not have a zone column will produce it with nulls: TestCase { - predicates: &[Predicate::new_eq("game_id", KeyValue::string("1"))], + predicates: &[Predicate::new_eq(game_id_col_id, KeyValue::string("1"))], expected: &[ "+---------+-----------+-----------------------------+------+------+", "| game_id | player | time | type | zone |", @@ -2855,12 +2997,10 @@ mod tests { } } - #[tokio::test] + #[test_log::test(tokio::test)] async fn new_field_ordering() { let db_name = "db"; - let db_id = DbId::from(0); let tbl_name = "tbl"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do a write to setup catalog: @@ -2874,6 +3014,10 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id(db_name).unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id(tbl_name).unwrap(); + let t1_col_id = table_def.column_name_to_id("t1").unwrap(); + // Create the last cache using the single `t1` tag column as key // and using the default for fields, so that new fields will get added // to the cache. @@ -2883,7 +3027,7 @@ mod tests { None, None, // use default cache size of 1 None, - Some(vec!["t1".to_string()]), + Some(vec![(t1_col_id, "t1".into())]), None, ) .await @@ -2935,46 +3079,46 @@ mod tests { let test_cases = [ // Can query on specific key column values: TestCase { - predicates: &[Predicate::new_eq("t1", KeyValue::string("a"))], + predicates: &[Predicate::new_eq(t1_col_id, KeyValue::string("a"))], expected: &[ - "+----+-----+--------------------------------+-----+-----+-----+", - "| t1 | f1 | time | f2 | f3 | f4 |", - "+----+-----+--------------------------------+-----+-----+-----+", - "| a | 1.0 | 1970-01-01T00:00:00.000001500Z | 2.0 | 3.0 | 4.0 |", - "+----+-----+--------------------------------+-----+-----+-----+", + "+----+-----+-----+-----+-----+--------------------------------+", + "| t1 | f1 | f2 | f3 | f4 | time |", + "+----+-----+-----+-----+-----+--------------------------------+", + "| a | 1.0 | 2.0 | 3.0 | 4.0 | 1970-01-01T00:00:00.000001500Z |", + "+----+-----+-----+-----+-----+--------------------------------+", ], }, TestCase { - predicates: &[Predicate::new_eq("t1", KeyValue::string("b"))], + predicates: &[Predicate::new_eq(t1_col_id, KeyValue::string("b"))], expected: &[ - "+----+------+--------------------------------+----+------+------+", - "| t1 | f1 | time | f2 | f3 | f4 |", - "+----+------+--------------------------------+----+------+------+", - "| b | 10.0 | 1970-01-01T00:00:00.000001500Z | | 30.0 | 40.0 |", - "+----+------+--------------------------------+----+------+------+", + "+----+------+----+------+------+--------------------------------+", + "| t1 | f1 | f2 | f3 | f4 | time |", + "+----+------+----+------+------+--------------------------------+", + "| b | 10.0 | | 30.0 | 40.0 | 1970-01-01T00:00:00.000001500Z |", + "+----+------+----+------+------+--------------------------------+", ], }, TestCase { - predicates: &[Predicate::new_eq("t1", KeyValue::string("c"))], + predicates: &[Predicate::new_eq(t1_col_id, KeyValue::string("c"))], expected: &[ - "+----+-------+--------------------------------+-------+-------+----+", - "| t1 | f1 | time | f2 | f3 | f4 |", - "+----+-------+--------------------------------+-------+-------+----+", - "| c | 100.0 | 1970-01-01T00:00:00.000001500Z | 200.0 | 300.0 | |", - "+----+-------+--------------------------------+-------+-------+----+", + "+----+-------+-------+-------+----+--------------------------------+", + "| t1 | f1 | f2 | f3 | f4 | time |", + "+----+-------+-------+-------+----+--------------------------------+", + "| c | 100.0 | 200.0 | 300.0 | | 1970-01-01T00:00:00.000001500Z |", + "+----+-------+-------+-------+----+--------------------------------+", ], }, // Can query accross key column values: TestCase { predicates: &[], expected: &[ - "+----+-------+--------------------------------+-------+-------+------+", - "| t1 | f1 | time | f2 | f3 | f4 |", - "+----+-------+--------------------------------+-------+-------+------+", - "| a | 1.0 | 1970-01-01T00:00:00.000001500Z | 2.0 | 3.0 | 4.0 |", - "| b | 10.0 | 1970-01-01T00:00:00.000001500Z | | 30.0 | 40.0 |", - "| c | 100.0 | 1970-01-01T00:00:00.000001500Z | 200.0 | 300.0 | |", - "+----+-------+--------------------------------+-------+-------+------+", + "+----+-------+-------+-------+------+--------------------------------+", + "| t1 | f1 | f2 | f3 | f4 | time |", + "+----+-------+-------+-------+------+--------------------------------+", + "| a | 1.0 | 2.0 | 3.0 | 4.0 | 1970-01-01T00:00:00.000001500Z |", + "| b | 10.0 | | 30.0 | 40.0 | 1970-01-01T00:00:00.000001500Z |", + "| c | 100.0 | 200.0 | 300.0 | | 1970-01-01T00:00:00.000001500Z |", + "+----+-------+-------+-------+------+--------------------------------+", ], }, ]; @@ -2993,9 +3137,7 @@ mod tests { #[tokio::test] async fn idempotent_cache_creation() { let db_name = "db"; - let db_id = DbId::from(0); let tbl_name = "tbl"; - let tbl_id = TableId::from(0); let wbuf = setup_write_buffer().await; // Do a write to setup catalog: @@ -3009,6 +3151,13 @@ mod tests { .await .unwrap(); + let (db_id, db_schema) = wbuf.catalog().db_schema_and_id(db_name).unwrap(); + let (tbl_id, table_def) = db_schema.table_definition_and_id(tbl_name).unwrap(); + let t1_col_id = table_def.column_name_to_id("t1").unwrap(); + let t2_col_id = table_def.column_name_to_id("t2").unwrap(); + let f1_col_id = table_def.column_name_to_id("f1").unwrap(); + let f2_col_id = table_def.column_name_to_id("f2").unwrap(); + // Create a last cache using all default settings wbuf.create_last_cache(db_id, tbl_id, None, None, None, None, None) .await @@ -3028,7 +3177,7 @@ mod tests { Some("tbl_t1_t2_last_cache"), Some(1), Some(DEFAULT_CACHE_TTL), - Some(vec!["t1".to_string(), "t2".to_string()]), + Some(vec![(t1_col_id, "t1".into()), (t2_col_id, "t2".into())]), None, ) .await @@ -3044,7 +3193,7 @@ mod tests { None, None, None, - Some(vec!["f1".to_string(), "f2".to_string()]), + Some(vec![(f1_col_id, "f1".into()), (f2_col_id, "f2".into())]), ) .await .expect_err("create last cache should have failed"); @@ -3057,7 +3206,7 @@ mod tests { Some("tbl_t1_t2_last_cache"), None, None, - Some(vec!["t1".to_string()]), + Some(vec![(t1_col_id, "t1".into())]), None, ) .await @@ -3073,7 +3222,7 @@ mod tests { None, None, None, - Some(vec!["t1".to_string()]), + Some(vec![(t1_col_id, "t1".into())]), None, ) .await @@ -3105,16 +3254,16 @@ mod tests { assert_eq!(wbuf.last_cache_provider().size(), 2); } - type SeriesKey = Option>; + type SeriesKey = Option>; - #[test] + #[test_log::test] fn catalog_initialization() { // Set up a database in the catalog: let db_name = "test_db"; let mut database = DatabaseSchema { id: DbId::from(0), name: db_name.into(), - tables: SerdeVecHashMap::new(), + tables: SerdeVecMap::new(), table_map: { let mut map = BiHashMap::new(); map.insert(TableId::from(0), "test_table_1".into()); @@ -3129,13 +3278,13 @@ mod tests { let mut table_def = TableDefinition::new( table_id, "test_table_1".into(), - [ - ("t1", Tag), - ("t2", Tag), - ("t3", Tag), - ("time", Timestamp), - ("f1", Field(String)), - ("f2", Field(Float)), + vec![ + (ColumnId::from(0), "t1".into(), Tag), + (ColumnId::from(1), "t2".into(), Tag), + (ColumnId::from(2), "t3".into(), Tag), + (ColumnId::from(3), "time".into(), Timestamp), + (ColumnId::from(4), "f1".into(), Field(String)), + (ColumnId::from(5), "f2".into(), Field(Float)), ], SeriesKey::None, ) @@ -3146,23 +3295,25 @@ mod tests { table_id, "test_table_1", "test_cache_1", - ["t1", "t2"], + vec![ColumnId::from(0), ColumnId::from(1)], 1, 600, ) .unwrap(), ); - database.tables.insert(table_def.table_id, table_def); + database + .tables + .insert(table_def.table_id, Arc::new(table_def)); // Add another table to it: let table_id = TableId::from(1); let mut table_def = TableDefinition::new( table_id, "test_table_2".into(), - [ - ("t1", Tag), - ("time", Timestamp), - ("f1", Field(String)), - ("f2", Field(Float)), + vec![ + (ColumnId::from(6), "t1".into(), Tag), + (ColumnId::from(7), "time".into(), Timestamp), + (ColumnId::from(8), "f1".into(), Field(String)), + (ColumnId::from(9), "f2".into(), Field(Float)), ], SeriesKey::None, ) @@ -3173,8 +3324,8 @@ mod tests { table_id, "test_table_2", "test_cache_2", - ["t1"], - ["f1"], + vec![ColumnId::from(6)], + vec![ColumnId::from(8), ColumnId::from(7)], 5, 60, ) @@ -3186,14 +3337,16 @@ mod tests { table_id, "test_table_2", "test_cache_3", - &[] as &[std::string::String], - ["f2"], + vec![], + vec![ColumnId::from(9), ColumnId::from(7)], 10, 500, ) .unwrap(), ); - database.tables.insert(table_def.table_id, table_def); + database + .tables + .insert(table_def.table_id, Arc::new(table_def)); // Create the catalog and clone its InnerCatalog (which is what the LastCacheProvider is // initialized from): let host_id = Arc::from("sample-host-id"); diff --git a/influxdb3_write/src/last_cache/snapshots/influxdb3_write__last_cache__tests__catalog_initialization.snap b/influxdb3_write/src/last_cache/snapshots/influxdb3_write__last_cache__tests__catalog_initialization.snap index 79e9797b63..453fefc719 100644 --- a/influxdb3_write/src/last_cache/snapshots/influxdb3_write__last_cache__tests__catalog_initialization.snap +++ b/influxdb3_write/src/last_cache/snapshots/influxdb3_write__last_cache__tests__catalog_initialization.snap @@ -8,8 +8,8 @@ expression: caches "table": "test_table_1", "name": "test_cache_1", "key_columns": [ - "t1", - "t2" + 0, + 1 ], "value_columns": { "type": "all_non_key_columns" @@ -22,13 +22,13 @@ expression: caches "table": "test_table_2", "name": "test_cache_2", "key_columns": [ - "t1" + 6 ], "value_columns": { "type": "explicit", "columns": [ - "f1", - "time" + 8, + 7 ] }, "count": 5, @@ -42,8 +42,8 @@ expression: caches "value_columns": { "type": "explicit", "columns": [ - "f2", - "time" + 9, + 7 ] }, "count": 10, diff --git a/influxdb3_write/src/last_cache/table_function.rs b/influxdb3_write/src/last_cache/table_function.rs index 72d5275fc6..37d04a641f 100644 --- a/influxdb3_write/src/last_cache/table_function.rs +++ b/influxdb3_write/src/last_cache/table_function.rs @@ -10,15 +10,15 @@ use datafusion::{ physical_plan::{memory::MemoryExec, ExecutionPlan}, scalar::ScalarValue, }; +use influxdb3_catalog::catalog::TableDefinition; use influxdb3_id::DbId; -use influxdb3_id::TableId; use super::LastCacheProvider; struct LastCacheFunctionProvider { db_id: DbId, - table_id: TableId, - cache_name: String, + table_def: Arc, + cache_name: Arc, schema: SchemaRef, provider: Arc, } @@ -54,11 +54,11 @@ impl TableProvider for LastCacheFunctionProvider { let read = self.provider.cache_map.read(); let batches = if let Some(cache) = read .get(&self.db_id) - .and_then(|db| db.get(&self.table_id)) + .and_then(|db| db.get(&self.table_def.table_id)) .and_then(|tbl| tbl.get(&self.cache_name)) { let predicates = cache.convert_filter_exprs(filters); - cache.to_record_batches(&predicates)? + cache.to_record_batches(Arc::clone(&self.table_def), &predicates)? } else { // If there is no cache, it means that it was removed, in which case, we just return // an empty set of record batches. @@ -97,27 +97,29 @@ impl TableFunctionImpl for LastCacheFunction { } None => None, }; - let table_id = self + let Some(table_def) = self .provider .catalog .db_schema_by_id(self.db_id) .expect("db exists") - .table_name_to_id(table_name.as_str()) - .expect("table exists"); - - match self.provider.get_cache_name_and_schema( + .table_definition(table_name.as_str()) + else { + return plan_err!("provided table name is invalid"); + }; + let Some((cache_name, schema)) = self.provider.get_cache_name_and_schema( self.db_id, - table_id, + table_def.table_id, cache_name.map(|x| x.as_str()), - ) { - Some((cache_name, schema)) => Ok(Arc::new(LastCacheFunctionProvider { - db_id: self.db_id, - table_id, - cache_name, - schema, - provider: Arc::clone(&self.provider), - })), - None => plan_err!("could not find cache for the given arguments"), - } + ) else { + return plan_err!("could not find cache for the given arguments"); + }; + + Ok(Arc::new(LastCacheFunctionProvider { + db_id: self.db_id, + table_def, + cache_name, + schema, + provider: Arc::clone(&self.provider), + })) } } diff --git a/influxdb3_write/src/lib.rs b/influxdb3_write/src/lib.rs index 37a4aff690..e079fdc8de 100644 --- a/influxdb3_write/src/lib.rs +++ b/influxdb3_write/src/lib.rs @@ -18,9 +18,9 @@ use datafusion::error::DataFusionError; use datafusion::prelude::Expr; use influxdb3_catalog::catalog::Catalog; use influxdb3_catalog::catalog::{self, SequenceNumber}; -use influxdb3_id::DbId; use influxdb3_id::ParquetFileId; use influxdb3_id::TableId; +use influxdb3_id::{ColumnId, DbId}; use influxdb3_wal::{LastCacheDefinition, SnapshotSequenceNumber, WalFileSequenceNumber}; use iox_query::QueryChunk; use iox_time::Time; @@ -117,8 +117,8 @@ pub trait LastCacheManager: Debug + Send + Sync + 'static { cache_name: Option<&str>, count: Option, ttl: Option, - key_columns: Option>, - value_columns: Option>, + key_columns: Option)>>, + value_columns: Option)>>, ) -> Result, write_buffer::Error>; /// Delete a last-n-value cache /// @@ -171,6 +171,8 @@ pub struct PersistedSnapshot { pub next_db_id: DbId, /// The next table id to be used for tables when the snapshot is loaded pub next_table_id: TableId, + /// The next column id to be used for columns when the snapshot is loaded + pub next_column_id: ColumnId, /// The snapshot sequence number associated with this snapshot pub snapshot_sequence_number: SnapshotSequenceNumber, /// The wal file sequence number that triggered this snapshot @@ -202,6 +204,7 @@ impl PersistedSnapshot { next_file_id: ParquetFileId::next_id(), next_db_id: DbId::next_id(), next_table_id: TableId::next_id(), + next_column_id: ColumnId::next_id(), snapshot_sequence_number, wal_file_sequence_number, catalog_sequence_number, diff --git a/influxdb3_write/src/persister.rs b/influxdb3_write/src/persister.rs index 6141b303f7..c07fead3d1 100644 --- a/influxdb3_write/src/persister.rs +++ b/influxdb3_write/src/persister.rs @@ -416,7 +416,7 @@ mod tests { use super::*; use crate::ParquetFileId; use influxdb3_catalog::catalog::SequenceNumber; - use influxdb3_id::{DbId, TableId}; + use influxdb3_id::{ColumnId, DbId, TableId}; use influxdb3_wal::SnapshotSequenceNumber; use object_store::memory::InMemory; use observability_deps::tracing::info; @@ -493,6 +493,7 @@ mod tests { next_file_id: ParquetFileId::from(0), next_db_id: DbId::from(1), next_table_id: TableId::from(1), + next_column_id: ColumnId::from(1), snapshot_sequence_number: SnapshotSequenceNumber::new(0), wal_file_sequence_number: WalFileSequenceNumber::new(0), catalog_sequence_number: SequenceNumber::new(0), @@ -516,6 +517,7 @@ mod tests { next_file_id: ParquetFileId::from(0), next_db_id: DbId::from(1), next_table_id: TableId::from(1), + next_column_id: ColumnId::from(1), snapshot_sequence_number: SnapshotSequenceNumber::new(0), wal_file_sequence_number: WalFileSequenceNumber::new(0), catalog_sequence_number: SequenceNumber::default(), @@ -530,6 +532,7 @@ mod tests { next_file_id: ParquetFileId::from(1), next_db_id: DbId::from(1), next_table_id: TableId::from(1), + next_column_id: ColumnId::from(1), snapshot_sequence_number: SnapshotSequenceNumber::new(1), wal_file_sequence_number: WalFileSequenceNumber::new(1), catalog_sequence_number: SequenceNumber::default(), @@ -544,6 +547,7 @@ mod tests { next_file_id: ParquetFileId::from(2), next_db_id: DbId::from(1), next_table_id: TableId::from(1), + next_column_id: ColumnId::from(1), snapshot_sequence_number: SnapshotSequenceNumber::new(2), wal_file_sequence_number: WalFileSequenceNumber::new(2), catalog_sequence_number: SequenceNumber::default(), @@ -579,6 +583,7 @@ mod tests { next_file_id: ParquetFileId::from(0), next_db_id: DbId::from(1), next_table_id: TableId::from(1), + next_column_id: ColumnId::from(1), snapshot_sequence_number: SnapshotSequenceNumber::new(0), wal_file_sequence_number: WalFileSequenceNumber::new(0), catalog_sequence_number: SequenceNumber::default(), @@ -607,6 +612,7 @@ mod tests { next_file_id: ParquetFileId::from(id), next_db_id: DbId::from(1), next_table_id: TableId::from(1), + next_column_id: ColumnId::from(1), snapshot_sequence_number: SnapshotSequenceNumber::new(id), wal_file_sequence_number: WalFileSequenceNumber::new(id), catalog_sequence_number: SequenceNumber::new(id as u32), diff --git a/influxdb3_write/src/write_buffer/mod.rs b/influxdb3_write/src/write_buffer/mod.rs index 19869a96fd..175684b18c 100644 --- a/influxdb3_write/src/write_buffer/mod.rs +++ b/influxdb3_write/src/write_buffer/mod.rs @@ -23,7 +23,7 @@ use datafusion::common::DataFusionError; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::logical_expr::Expr; use influxdb3_catalog::catalog::Catalog; -use influxdb3_id::{DbId, TableId}; +use influxdb3_id::{ColumnId, DbId, TableId}; use influxdb3_wal::object_store::WalObjectStore; use influxdb3_wal::CatalogOp::CreateLastCache; use influxdb3_wal::{ @@ -79,6 +79,9 @@ pub enum Error { #[error("tried accessing database and table that do not exist")] TableDoesNotExist, + #[error("tried accessing column with name ({0}) that does not exist")] + ColumnDoesNotExist(String), + #[error( "updating catalog on delete of last cache failed, you will need to delete the cache \ again on server restart" @@ -150,6 +153,11 @@ impl WriteBufferImpl { .first() .map(|s| s.next_table_id.set_next_id()) .unwrap_or(()); + // Set the next table id to use when adding a new database + persisted_snapshots + .first() + .map(|s| s.next_column_id.set_next_id()) + .unwrap_or(()); // Set the next file id to use when persisting ParquetFiles persisted_snapshots .first() @@ -452,27 +460,21 @@ impl LastCacheManager for WriteBufferImpl { cache_name: Option<&str>, count: Option, ttl: Option, - key_columns: Option>, - value_columns: Option>, + key_columns: Option)>>, + value_columns: Option)>>, ) -> Result, Error> { let cache_name = cache_name.map(Into::into); let catalog = self.catalog(); let db_schema = catalog .db_schema_by_id(db_id) .ok_or(Error::DbDoesNotExist)?; - let schema = db_schema - .table_schema_by_id(table_id) + let table_def = db_schema + .table_definition_by_id(table_id) .ok_or(Error::TableDoesNotExist)?; if let Some(info) = self.last_cache.create_cache(CreateCacheArguments { db_id, - db_name: db_schema.name.to_string(), - table_id, - table_name: db_schema - .table_id_to_name(table_id) - .expect("table exists") - .to_string(), - schema, + table_def, cache_name, count, ttl, @@ -514,10 +516,7 @@ impl LastCacheManager for WriteBufferImpl { database_name: Arc::clone(&db_schema.name), ops: vec![CatalogOp::DeleteLastCache(LastCacheDelete { table_id: tbl_id, - table_name: db_schema - .table_id_to_name(tbl_id) - .expect("table exists") - .to_string(), + table_name: db_schema.table_id_to_name(tbl_id).expect("table exists"), name: cache_name.into(), })], })]) diff --git a/influxdb3_write/src/write_buffer/queryable_buffer.rs b/influxdb3_write/src/write_buffer/queryable_buffer.rs index b7632b08f1..a98b2ed0e0 100644 --- a/influxdb3_write/src/write_buffer/queryable_buffer.rs +++ b/influxdb3_write/src/write_buffer/queryable_buffer.rs @@ -80,11 +80,11 @@ impl QueryableBuffer { _projection: Option<&Vec>, _ctx: &dyn Session, ) -> Result>, DataFusionError> { - let (table_id, table_schema) = db_schema - .table_schema_and_id(table_name) + let (table_id, table_def) = db_schema + .table_definition_and_id(table_name) .ok_or_else(|| DataFusionError::Execution(format!("table {} not found", table_name)))?; - let arrow_schema = table_schema.as_arrow(); + let influx_schema = table_def.influx_schema(); let buffer = self.buffer.read(); @@ -96,20 +96,20 @@ impl QueryableBuffer { }; Ok(table_buffer - .partitioned_record_batches(Arc::clone(&arrow_schema), filters) + .partitioned_record_batches(Arc::clone(&table_def), filters) .map_err(|e| DataFusionError::Execution(format!("error getting batches {}", e)))? .into_iter() .map(|(gen_time, (ts_min_max, batches))| { let row_count = batches.iter().map(|b| b.num_rows()).sum::(); let chunk_stats = create_chunk_statistics( Some(row_count), - &table_schema, + influx_schema, Some(ts_min_max), &NoColumnRanges, ); Arc::new(BufferChunk { batches, - schema: table_schema.clone(), + schema: influx_schema.clone(), stats: Arc::new(chunk_stats), partition_id: TransitionPartitionId::new( data_types::TableId::new(0), @@ -150,7 +150,11 @@ impl QueryableBuffer { for (database_id, table_map) in buffer.db_to_table.iter_mut() { let db_schema = catalog.db_schema_by_id(*database_id).expect("db exists"); for (table_id, table_buffer) in table_map.iter_mut() { - let snapshot_chunks = table_buffer.snapshot(snapshot_details.end_time_marker); + let table_def = db_schema + .table_definition_by_id(*table_id) + .expect("table exists"); + let snapshot_chunks = + table_buffer.snapshot(table_def, snapshot_details.end_time_marker); for chunk in snapshot_chunks { let table_name = @@ -375,12 +379,12 @@ impl BufferState { for op in catalog_batch.ops { match op { CatalogOp::CreateLastCache(definition) => { - let table_schema = db_schema - .table_schema_by_id(definition.table_id) + let table_def = db_schema + .table_definition_by_id(definition.table_id) .expect("table should exist"); last_cache_provider.create_cache_from_definition( db_schema.id, - &table_schema, + table_def, &definition, ); } @@ -411,18 +415,20 @@ impl BufferState { for (table_id, table_chunks) in write_batch.table_chunks { let table_buffer = database_buffer.entry(table_id).or_insert_with(|| { - let table_schema = db_schema + let table_def = db_schema .table_definition_by_id(table_id) .expect("table should exist"); - let sort_key = table_schema + // TODO: can we have the primary key stored on the table definition (we already have + // the series key, so that doesn't seem like too much of a stretch). + let sort_key = table_def .influx_schema() .primary_key() .iter() .map(|c| c.to_string()) .collect::>(); - let index_columns = table_schema.index_columns(); + let index_columns = table_def.index_column_ids(); - TableBuffer::new(&index_columns, SortKey::from(sort_key)) + TableBuffer::new(index_columns, SortKey::from(sort_key)) }); for (chunk_time, chunk) in table_chunks.chunk_time_to_chunk { table_buffer.buffer_chunk(chunk_time, chunk.rows); diff --git a/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-after-last-cache-create-and-new-field.snap b/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-after-last-cache-create-and-new-field.snap index fcacd3e0fc..28e92be42f 100644 --- a/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-after-last-cache-create-and-new-field.snap +++ b/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-after-last-cache-create-and-new-field.snap @@ -9,74 +9,66 @@ expression: catalog_json { "id": 0, "name": "db", - "table_map": [ - { - "name": "table", - "table_id": 0 - } - ], "tables": [ [ 0, { - "cols": { - "f1": { - "column_id": 0, - "influx_type": "field", - "nullable": true, - "type": "bool" - }, - "f2": { - "column_id": 3, - "influx_type": "field", - "nullable": true, - "type": "i64" - }, - "t1": { - "column_id": 1, - "influx_type": "tag", - "nullable": true, - "type": { - "dict": [ - "i32", - "str" - ] + "cols": [ + [ + 1, + { + "id": 1, + "influx_type": "field", + "name": "f1", + "nullable": true, + "type": "bool" } - }, - "time": { - "column_id": 2, - "influx_type": "time", - "nullable": false, - "type": { - "time": [ - "ns", - null - ] + ], + [ + 3, + { + "id": 3, + "influx_type": "field", + "name": "f2", + "nullable": true, + "type": "i64" } - } - }, - "column_map": [ - { - "column_id": 0, - "name": "f1" - }, - { - "column_id": 1, - "name": "t1" - }, - { - "column_id": 2, - "name": "time" - }, - { - "column_id": 3, - "name": "f2" - } + ], + [ + 0, + { + "id": 0, + "influx_type": "tag", + "name": "t1", + "nullable": true, + "type": { + "dict": [ + "i32", + "str" + ] + } + } + ], + [ + 2, + { + "id": 2, + "influx_type": "time", + "name": "time", + "nullable": false, + "type": { + "time": [ + "ns", + null + ] + } + } + ] ], "last_caches": [ { "keys": [ - "t1" + 0 ], "n": 1, "name": "cache", @@ -86,7 +78,6 @@ expression: catalog_json "vals": null } ], - "next_column_id": 4, "table_id": 0, "table_name": "table" } diff --git a/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-create.snap b/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-create.snap index 0512ccd036..acbe22787b 100644 --- a/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-create.snap +++ b/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-create.snap @@ -9,64 +9,56 @@ expression: catalog_json { "id": 0, "name": "db", - "table_map": [ - { - "name": "table", - "table_id": 0 - } - ], "tables": [ [ 0, { - "cols": { - "f1": { - "column_id": 0, - "influx_type": "field", - "nullable": true, - "type": "bool" - }, - "t1": { - "column_id": 1, - "influx_type": "tag", - "nullable": true, - "type": { - "dict": [ - "i32", - "str" - ] + "cols": [ + [ + 1, + { + "id": 1, + "influx_type": "field", + "name": "f1", + "nullable": true, + "type": "bool" } - }, - "time": { - "column_id": 2, - "influx_type": "time", - "nullable": false, - "type": { - "time": [ - "ns", - null - ] + ], + [ + 0, + { + "id": 0, + "influx_type": "tag", + "name": "t1", + "nullable": true, + "type": { + "dict": [ + "i32", + "str" + ] + } } - } - }, - "column_map": [ - { - "column_id": 0, - "name": "f1" - }, - { - "column_id": 1, - "name": "t1" - }, - { - "column_id": 2, - "name": "time" - } + ], + [ + 2, + { + "id": 2, + "influx_type": "time", + "name": "time", + "nullable": false, + "type": { + "time": [ + "ns", + null + ] + } + } + ] ], "last_caches": [ { "keys": [ - "t1" + 0 ], "n": 1, "name": "cache", @@ -76,7 +68,6 @@ expression: catalog_json "vals": null } ], - "next_column_id": 3, "table_id": 0, "table_name": "table" } diff --git a/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-delete.snap b/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-delete.snap index 9925b2f06a..57da613592 100644 --- a/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-delete.snap +++ b/influxdb3_write/src/write_buffer/snapshots/influxdb3_write__write_buffer__tests__catalog-immediately-after-last-cache-delete.snap @@ -9,71 +9,62 @@ expression: catalog_json { "id": 0, "name": "db", - "table_map": [ - { - "name": "table", - "table_id": 0 - } - ], "tables": [ [ 0, { - "cols": { - "f1": { - "column_id": 0, - "influx_type": "field", - "nullable": true, - "type": "bool" - }, - "f2": { - "column_id": 3, - "influx_type": "field", - "nullable": true, - "type": "i64" - }, - "t1": { - "column_id": 1, - "influx_type": "tag", - "nullable": true, - "type": { - "dict": [ - "i32", - "str" - ] + "cols": [ + [ + 1, + { + "id": 1, + "influx_type": "field", + "name": "f1", + "nullable": true, + "type": "bool" } - }, - "time": { - "column_id": 2, - "influx_type": "time", - "nullable": false, - "type": { - "time": [ - "ns", - null - ] + ], + [ + 3, + { + "id": 3, + "influx_type": "field", + "name": "f2", + "nullable": true, + "type": "i64" } - } - }, - "column_map": [ - { - "column_id": 0, - "name": "f1" - }, - { - "column_id": 1, - "name": "t1" - }, - { - "column_id": 2, - "name": "time" - }, - { - "column_id": 3, - "name": "f2" - } + ], + [ + 0, + { + "id": 0, + "influx_type": "tag", + "name": "t1", + "nullable": true, + "type": { + "dict": [ + "i32", + "str" + ] + } + } + ], + [ + 2, + { + "id": 2, + "influx_type": "time", + "name": "time", + "nullable": false, + "type": { + "time": [ + "ns", + null + ] + } + } + ] ], - "next_column_id": 4, "table_id": 0, "table_name": "table" } diff --git a/influxdb3_write/src/write_buffer/table_buffer.rs b/influxdb3_write/src/write_buffer/table_buffer.rs index 08eb27efc1..f897c661b1 100644 --- a/influxdb3_write/src/write_buffer/table_buffer.rs +++ b/influxdb3_write/src/write_buffer/table_buffer.rs @@ -5,15 +5,18 @@ use arrow::array::{ Int64Builder, StringArray, StringBuilder, StringDictionaryBuilder, TimestampNanosecondBuilder, UInt64Builder, }; -use arrow::datatypes::{GenericStringType, Int32Type, SchemaRef}; +use arrow::datatypes::{GenericStringType, Int32Type}; use arrow::record_batch::RecordBatch; use data_types::TimestampMinMax; use datafusion::logical_expr::{BinaryExpr, Expr}; use hashbrown::HashMap; +use influxdb3_catalog::catalog::TableDefinition; +use influxdb3_id::ColumnId; use influxdb3_wal::{FieldData, Row}; use observability_deps::tracing::{debug, error, info}; use schema::sort::SortKey; use schema::{InfluxColumnType, InfluxFieldType, Schema, SchemaBuilder}; +use std::collections::btree_map::Entry; use std::collections::{BTreeMap, HashSet}; use std::mem::size_of; use std::sync::Arc; @@ -38,7 +41,7 @@ pub struct TableBuffer { } impl TableBuffer { - pub fn new(index_columns: &[&str], sort_key: SortKey) -> Self { + pub fn new(index_columns: Vec, sort_key: SortKey) -> Self { Self { chunk_time_to_chunks: BTreeMap::default(), snapshotting_chunks: vec![], @@ -67,10 +70,11 @@ impl TableBuffer { /// The partitions are stored and returned in a `HashMap`, keyed on the generation time. pub fn partitioned_record_batches( &self, - schema: SchemaRef, + table_def: Arc, filter: &[Expr], ) -> Result)>> { let mut batches = HashMap::new(); + let schema = table_def.schema.as_arrow(); for sc in &self.snapshotting_chunks { let cols: std::result::Result, _> = schema .fields() @@ -97,14 +101,19 @@ impl TableBuffer { .entry(*t) .or_insert_with(|| (ts_min_max, Vec::new())); *ts = ts.union(&ts_min_max); - v.push(c.record_batch(schema.clone(), filter)?); + v.push(c.record_batch(Arc::clone(&table_def), filter)?); } Ok(batches) } - pub fn record_batches(&self, schema: SchemaRef, filter: &[Expr]) -> Result> { + pub fn record_batches( + &self, + table_def: Arc, + filter: &[Expr], + ) -> Result> { let mut batches = Vec::with_capacity(self.snapshotting_chunks.len() + self.chunk_time_to_chunks.len()); + let schema = table_def.schema.as_arrow(); for sc in &self.snapshotting_chunks { let cols: std::result::Result, _> = schema @@ -125,7 +134,7 @@ impl TableBuffer { } for c in self.chunk_time_to_chunks.values() { - batches.push(c.record_batch(schema.clone(), filter)?) + batches.push(c.record_batch(Arc::clone(&table_def), filter)?) } Ok(batches) @@ -157,8 +166,8 @@ impl TableBuffer { let mut size = size_of::(); for c in self.chunk_time_to_chunks.values() { - for (k, v) in &c.data { - size += k.len() + size_of::() + v.size(); + for biulder in c.data.values() { + size += size_of::() + size_of::() + biulder.size(); } size += c.index.size(); @@ -167,7 +176,11 @@ impl TableBuffer { size } - pub fn snapshot(&mut self, older_than_chunk_time: i64) -> Vec { + pub fn snapshot( + &mut self, + table_def: Arc, + older_than_chunk_time: i64, + ) -> Vec { info!(%older_than_chunk_time, "Snapshotting table buffer"); let keys_to_remove = self .chunk_time_to_chunks @@ -180,7 +193,7 @@ impl TableBuffer { .map(|chunk_time| { let chunk = self.chunk_time_to_chunks.remove(&chunk_time).unwrap(); let timestamp_min_max = chunk.timestamp_min_max(); - let (schema, record_batch) = chunk.into_schema_record_batch(); + let (schema, record_batch) = chunk.into_schema_record_batch(Arc::clone(&table_def)); SnapshotChunk { chunk_time, @@ -232,7 +245,7 @@ impl std::fmt::Debug for TableBuffer { struct MutableTableChunk { timestamp_min: i64, timestamp_max: i64, - data: BTreeMap, Builder>, + data: BTreeMap, row_count: usize, index: BufferIndex, } @@ -245,14 +258,14 @@ impl MutableTableChunk { let mut value_added = HashSet::with_capacity(r.fields.len()); for f in r.fields { - value_added.insert(f.name.clone()); + value_added.insert(f.id); match f.value { FieldData::Timestamp(v) => { self.timestamp_min = self.timestamp_min.min(v); self.timestamp_max = self.timestamp_max.max(v); - let b = self.data.entry(f.name).or_insert_with(|| { + let b = self.data.entry(f.id).or_insert_with(|| { debug!("Creating new timestamp builder"); let mut time_builder = TimestampNanosecondBuilder::new(); // append nulls for all previous rows @@ -269,20 +282,17 @@ impl MutableTableChunk { } } FieldData::Tag(v) => { - if !self.data.contains_key(&f.name) { + if let Entry::Vacant(e) = self.data.entry(f.id) { let mut tag_builder = StringDictionaryBuilder::new(); // append nulls for all previous rows for _ in 0..(row_index + self.row_count) { tag_builder.append_null(); } - self.data.insert(f.name.clone(), Builder::Tag(tag_builder)); + e.insert(Builder::Tag(tag_builder)); } - let b = self - .data - .get_mut(&f.name) - .expect("tag builder should exist"); + let b = self.data.get_mut(&f.id).expect("tag builder should exist"); if let Builder::Tag(b) = b { - self.index.add_row_if_indexed_column(b.len(), &f.name, &v); + self.index.add_row_if_indexed_column(b.len(), f.id, &v); b.append(v) .expect("shouldn't be able to overflow 32 bit dictionary"); } else { @@ -290,25 +300,22 @@ impl MutableTableChunk { } } FieldData::Key(v) => { - if !self.data.contains_key(&f.name) { + if let Entry::Vacant(e) = self.data.entry(f.id) { let key_builder = StringDictionaryBuilder::new(); if self.row_count > 0 { panic!("series key columns must be passed in the very first write for a table"); } - self.data.insert(f.name.clone(), Builder::Key(key_builder)); + e.insert(Builder::Key(key_builder)); } - let b = self - .data - .get_mut(&f.name) - .expect("key builder should exist"); + let b = self.data.get_mut(&f.id).expect("key builder should exist"); let Builder::Key(b) = b else { panic!("unexpected field type"); }; - self.index.add_row_if_indexed_column(b.len(), &f.name, &v); + self.index.add_row_if_indexed_column(b.len(), f.id, &v); b.append_value(v); } FieldData::String(v) => { - let b = self.data.entry(f.name).or_insert_with(|| { + let b = self.data.entry(f.id).or_insert_with(|| { let mut string_builder = StringBuilder::new(); // append nulls for all previous rows for _ in 0..(row_index + self.row_count) { @@ -323,7 +330,7 @@ impl MutableTableChunk { } } FieldData::Integer(v) => { - let b = self.data.entry(f.name).or_insert_with(|| { + let b = self.data.entry(f.id).or_insert_with(|| { let mut int_builder = Int64Builder::new(); // append nulls for all previous rows for _ in 0..(row_index + self.row_count) { @@ -338,7 +345,7 @@ impl MutableTableChunk { } } FieldData::UInteger(v) => { - let b = self.data.entry(f.name).or_insert_with(|| { + let b = self.data.entry(f.id).or_insert_with(|| { let mut uint_builder = UInt64Builder::new(); // append nulls for all previous rows for _ in 0..(row_index + self.row_count) { @@ -353,7 +360,7 @@ impl MutableTableChunk { } } FieldData::Float(v) => { - let b = self.data.entry(f.name).or_insert_with(|| { + let b = self.data.entry(f.id).or_insert_with(|| { let mut float_builder = Float64Builder::new(); // append nulls for all previous rows for _ in 0..(row_index + self.row_count) { @@ -368,7 +375,7 @@ impl MutableTableChunk { } } FieldData::Boolean(v) => { - let b = self.data.entry(f.name).or_insert_with(|| { + let b = self.data.entry(f.id).or_insert_with(|| { let mut bool_builder = BooleanBuilder::new(); // append nulls for all previous rows for _ in 0..(row_index + self.row_count) { @@ -410,25 +417,32 @@ impl MutableTableChunk { TimestampMinMax::new(self.timestamp_min, self.timestamp_max) } - fn record_batch(&self, schema: SchemaRef, filter: &[Expr]) -> Result { - let row_ids = self.index.get_rows_from_index_for_filter(filter); + fn record_batch( + &self, + table_def: Arc, + filter: &[Expr], + ) -> Result { + let row_ids = self + .index + .get_rows_from_index_for_filter(Arc::clone(&table_def), filter); + let schema = table_def.schema.as_arrow(); let mut cols = Vec::with_capacity(schema.fields().len()); for f in schema.fields() { match row_ids { Some(row_ids) => { - let b = self - .data - .get(f.name().as_str()) + let b = table_def + .column_name_to_id(f.name().as_str()) + .and_then(|id| self.data.get(&id)) .ok_or_else(|| Error::FieldNotFound(f.name().to_string()))? .get_rows(row_ids); cols.push(b); } None => { - let b = self - .data - .get(f.name().as_str()) + let b = table_def + .column_name_to_id(f.name().as_str()) + .and_then(|id| self.data.get(&id)) .ok_or_else(|| Error::FieldNotFound(f.name().to_string()))? .as_arrow(); cols.push(b); @@ -439,12 +453,18 @@ impl MutableTableChunk { Ok(RecordBatch::try_new(schema, cols)?) } - fn into_schema_record_batch(self) -> (Schema, RecordBatch) { + fn into_schema_record_batch(self, table_def: Arc) -> (Schema, RecordBatch) { let mut cols = Vec::with_capacity(self.data.len()); let mut schema_builder = SchemaBuilder::new(); - for (col_name, builder) in self.data.into_iter() { + for (col_id, builder) in self.data.into_iter() { let (col_type, col) = builder.into_influxcol_and_arrow(); - schema_builder.influx_column(col_name.as_ref(), col_type); + schema_builder.influx_column( + table_def + .column_id_to_name(col_id) + .expect("valid column id") + .as_ref(), + col_type, + ); cols.push(col); } let schema = schema_builder @@ -471,25 +491,25 @@ impl std::fmt::Debug for MutableTableChunk { } } -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] struct BufferIndex { - // column name -> string value -> row indexes - columns: HashMap, HashMap>>, + // column id -> string value -> row indexes + columns: HashMap>>, } impl BufferIndex { - fn new(column_names: &[&str]) -> Self { + fn new(column_ids: Vec) -> Self { let mut columns = HashMap::new(); - for c in column_names { - columns.insert(c.to_string().into(), HashMap::new()); + for id in column_ids { + columns.insert(id, HashMap::new()); } Self { columns } } - fn add_row_if_indexed_column(&mut self, row_index: usize, column_name: &str, value: &str) { - if let Some(column) = self.columns.get_mut(column_name) { + fn add_row_if_indexed_column(&mut self, row_index: usize, column_id: ColumnId, value: &str) { + if let Some(column) = self.columns.get_mut(&column_id) { column .entry_ref(value) .and_modify(|c| c.push(row_index)) @@ -497,7 +517,11 @@ impl BufferIndex { } } - fn get_rows_from_index_for_filter(&self, filter: &[Expr]) -> Option<&Vec> { + fn get_rows_from_index_for_filter( + &self, + table_def: Arc, + filter: &[Expr], + ) -> Option<&Vec> { for expr in filter { if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr { if *op == datafusion::logical_expr::Operator::Eq { @@ -505,9 +529,9 @@ impl BufferIndex { if let Expr::Literal(datafusion::scalar::ScalarValue::Utf8(Some(v))) = right.as_ref() { - return self - .columns - .get(c.name.as_str()) + return table_def + .column_name_to_id(c.name()) + .and_then(|id| self.columns.get(&id)) .and_then(|m| m.get(v.as_str())); } } @@ -521,8 +545,10 @@ impl BufferIndex { #[allow(dead_code)] fn size(&self) -> usize { let mut size = size_of::(); - for (k, v) in &self.columns { - size += k.len() + size_of::() + size_of::>>(); + for (_, v) in &self.columns { + size += size_of::() + + size_of::() + + size_of::>>(); for (k, v) in v { size += k.len() + size_of::() + size_of::>(); size += v.len() * size_of::(); @@ -690,18 +716,34 @@ mod tests { use super::*; use arrow_util::{assert_batches_eq, assert_batches_sorted_eq}; use datafusion::common::Column; + use influxdb3_id::TableId; use influxdb3_wal::Field; - use schema::{InfluxFieldType, SchemaBuilder}; + use schema::InfluxFieldType; #[test] fn partitioned_table_buffer_batches() { - let mut table_buffer = TableBuffer::new(&["tag"], SortKey::empty()); - let schema = SchemaBuilder::with_capacity(3) - .tag("tag") - .influx_field("val", InfluxFieldType::String) - .timestamp() - .build() - .unwrap(); + let table_def = Arc::new( + TableDefinition::new( + TableId::new(), + "test_table".into(), + vec![ + (ColumnId::from(0), "tag".into(), InfluxColumnType::Tag), + ( + ColumnId::from(1), + "val".into(), + InfluxColumnType::Field(InfluxFieldType::String), + ), + ( + ColumnId::from(2), + "time".into(), + InfluxColumnType::Timestamp, + ), + ], + None, + ) + .unwrap(), + ); + let mut table_buffer = TableBuffer::new(vec![ColumnId::from(0)], SortKey::empty()); for t in 0..10 { let offset = t * 10; @@ -710,15 +752,15 @@ mod tests { time: offset + 1, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("a".to_string()), }, Field { - name: "val".into(), + id: ColumnId::from(1), value: FieldData::String(format!("thing {t}-1")), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(offset + 1), }, ], @@ -727,15 +769,15 @@ mod tests { time: offset + 2, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("b".to_string()), }, Field { - name: "val".into(), + id: ColumnId::from(1), value: FieldData::String(format!("thing {t}-2")), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(offset + 2), }, ], @@ -746,7 +788,7 @@ mod tests { } let partitioned_batches = table_buffer - .partitioned_record_batches(schema.as_arrow(), &[]) + .partitioned_record_batches(Arc::clone(&table_def), &[]) .unwrap(); println!("{partitioned_batches:#?}"); @@ -759,20 +801,20 @@ mod tests { assert_eq!(TimestampMinMax::new(offset + 1, offset + 2), *ts_min_max); assert_batches_sorted_eq!( [ - "+-----+-----------+--------------------------------+", - "| tag | val | time |", - "+-----+-----------+--------------------------------+", + "+-----+--------------------------------+-----------+", + "| tag | time | val |", + "+-----+--------------------------------+-----------+", format!( - "| a | thing {t}-1 | 1970-01-01T00:00:00.{:0>9}Z |", + "| a | 1970-01-01T00:00:00.{:0>9}Z | thing {t}-1 |", offset + 1 ) .as_str(), format!( - "| b | thing {t}-2 | 1970-01-01T00:00:00.{:0>9}Z |", + "| b | 1970-01-01T00:00:00.{:0>9}Z | thing {t}-2 |", offset + 2 ) .as_str(), - "+-----+-----------+--------------------------------+", + "+-----+--------------------------------+-----------+", ], batches ); @@ -781,28 +823,43 @@ mod tests { #[test] fn tag_row_index() { - let mut table_buffer = TableBuffer::new(&["tag"], SortKey::empty()); - let schema = SchemaBuilder::with_capacity(3) - .tag("tag") - .influx_field("value", InfluxFieldType::Integer) - .timestamp() - .build() - .unwrap(); + let table_def = Arc::new( + TableDefinition::new( + TableId::new(), + "test_table".into(), + vec![ + (ColumnId::from(0), "tag".into(), InfluxColumnType::Tag), + ( + ColumnId::from(1), + "value".into(), + InfluxColumnType::Field(InfluxFieldType::Integer), + ), + ( + ColumnId::from(2), + "time".into(), + InfluxColumnType::Timestamp, + ), + ], + None, + ) + .unwrap(), + ); + let mut table_buffer = TableBuffer::new(vec![ColumnId::from(0)], SortKey::empty()); let rows = vec![ Row { time: 1, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("a".to_string()), }, Field { - name: "value".into(), + id: ColumnId::from(1), value: FieldData::Integer(1), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(1), }, ], @@ -811,15 +868,15 @@ mod tests { time: 2, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("b".to_string()), }, Field { - name: "value".into(), + id: ColumnId::from(1), value: FieldData::Integer(2), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(2), }, ], @@ -828,15 +885,15 @@ mod tests { time: 3, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("a".to_string()), }, Field { - name: "value".into(), + id: ColumnId::from(1), value: FieldData::Integer(3), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(3), }, ], @@ -860,20 +917,20 @@ mod tests { .get(&0) .unwrap() .index - .get_rows_from_index_for_filter(filter) + .get_rows_from_index_for_filter(Arc::clone(&table_def), filter) .unwrap(); assert_eq!(a_rows, &[0, 2]); let a = table_buffer - .record_batches(schema.as_arrow(), filter) + .record_batches(Arc::clone(&table_def), filter) .unwrap(); let expected_a = vec![ - "+-----+-------+--------------------------------+", - "| tag | value | time |", - "+-----+-------+--------------------------------+", - "| a | 1 | 1970-01-01T00:00:00.000000001Z |", - "| a | 3 | 1970-01-01T00:00:00.000000003Z |", - "+-----+-------+--------------------------------+", + "+-----+--------------------------------+-------+", + "| tag | time | value |", + "+-----+--------------------------------+-------+", + "| a | 1970-01-01T00:00:00.000000001Z | 1 |", + "| a | 1970-01-01T00:00:00.000000003Z | 3 |", + "+-----+--------------------------------+-------+", ]; assert_batches_eq!(&expected_a, &a); @@ -893,41 +950,41 @@ mod tests { .get(&0) .unwrap() .index - .get_rows_from_index_for_filter(filter) + .get_rows_from_index_for_filter(Arc::clone(&table_def), filter) .unwrap(); assert_eq!(b_rows, &[1]); let b = table_buffer - .record_batches(schema.as_arrow(), filter) + .record_batches(Arc::clone(&table_def), filter) .unwrap(); let expected_b = vec![ - "+-----+-------+--------------------------------+", - "| tag | value | time |", - "+-----+-------+--------------------------------+", - "| b | 2 | 1970-01-01T00:00:00.000000002Z |", - "+-----+-------+--------------------------------+", + "+-----+--------------------------------+-------+", + "| tag | time | value |", + "+-----+--------------------------------+-------+", + "| b | 1970-01-01T00:00:00.000000002Z | 2 |", + "+-----+--------------------------------+-------+", ]; assert_batches_eq!(&expected_b, &b); } #[test] fn computed_size_of_buffer() { - let mut table_buffer = TableBuffer::new(&["tag"], SortKey::empty()); + let mut table_buffer = TableBuffer::new(vec![ColumnId::from(0)], SortKey::empty()); let rows = vec![ Row { time: 1, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("a".to_string()), }, Field { - name: "value".into(), + id: ColumnId::from(1), value: FieldData::Integer(1), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(1), }, ], @@ -936,15 +993,15 @@ mod tests { time: 2, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("b".to_string()), }, Field { - name: "value".into(), + id: ColumnId::from(1), value: FieldData::Integer(2), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(2), }, ], @@ -953,15 +1010,15 @@ mod tests { time: 3, fields: vec![ Field { - name: "tag".into(), + id: ColumnId::from(0), value: FieldData::Tag("this is a long tag value to store".to_string()), }, Field { - name: "value".into(), + id: ColumnId::from(1), value: FieldData::Integer(3), }, Field { - name: "time".into(), + id: ColumnId::from(2), value: FieldData::Timestamp(3), }, ], @@ -971,12 +1028,12 @@ mod tests { table_buffer.buffer_chunk(0, rows); let size = table_buffer.computed_size(); - assert_eq!(size, 18094); + assert_eq!(size, 18095); } #[test] fn timestamp_min_max_works_when_empty() { - let table_buffer = TableBuffer::new(&["tag"], SortKey::empty()); + let table_buffer = TableBuffer::new(vec![ColumnId::from(0)], SortKey::empty()); let timestamp_min_max = table_buffer.timestamp_min_max(); assert_eq!(timestamp_min_max.min, 0); assert_eq!(timestamp_min_max.max, 0); diff --git a/influxdb3_write/src/write_buffer/validator.rs b/influxdb3_write/src/write_buffer/validator.rs index 524c8e4e40..e2d480f0fa 100644 --- a/influxdb3_write/src/write_buffer/validator.rs +++ b/influxdb3_write/src/write_buffer/validator.rs @@ -2,17 +2,17 @@ use std::{borrow::Cow, sync::Arc}; use crate::{write_buffer::Result, Precision, WriteLineError}; use data_types::{NamespaceName, Timestamp}; -use hashbrown::HashMap; +use indexmap::IndexMap; use influxdb3_catalog::catalog::{ influx_column_type_from_field_value, Catalog, DatabaseSchema, TableDefinition, }; -use influxdb3_id::TableId; +use influxdb3_id::{ColumnId, TableId}; use influxdb3_wal::{ - CatalogBatch, CatalogOp, Field, FieldAdditions, FieldData, FieldDataType, FieldDefinition, - Gen1Duration, Row, TableChunks, WriteBatch, + CatalogBatch, CatalogOp, Field, FieldAdditions, FieldData, FieldDefinition, Gen1Duration, Row, + TableChunks, WriteBatch, }; -use influxdb_line_protocol::{parse_lines, v3, FieldValue, ParsedLine}; +use influxdb_line_protocol::{parse_lines, v3, ParsedLine}; use iox_time::Time; use schema::{InfluxColumnType, TIME_COLUMN_NAME}; @@ -28,10 +28,9 @@ pub(crate) struct WithCatalog { /// Type state for the [`WriteValidator`] after it has parsed v1 or v3 /// line protocol. -pub(crate) struct LinesParsed<'raw, PL> { +pub(crate) struct LinesParsed { catalog: WithCatalog, - db_schema: Arc, - lines: Vec<(PL, &'raw str)>, + lines: Vec, catalog_batch: Option, errors: Vec, } @@ -74,28 +73,34 @@ impl WriteValidator { self, lp: &str, accept_partial: bool, - ) -> Result>>> { + ) -> Result> { let mut errors = vec![]; - let mut lp_lines = lp.lines().peekable(); + let mut lp_lines = lp.lines(); let mut lines = vec![]; let mut catalog_updates = vec![]; let mut schema = Cow::Borrowed(self.state.db_schema.as_ref()); for (line_idx, maybe_line) in v3::parse_lines(lp).enumerate() { - let (line, catalog_op) = match maybe_line + let (qualified_line, catalog_op) = match maybe_line .map_err(|e| WriteLineError { original_line: lp_lines.next().unwrap().to_string(), line_number: line_idx + 1, error_message: e.to_string(), }) - .and_then(|l| validate_v3_line(&mut schema, line_idx, l, lp_lines.peek().unwrap())) - { - Ok(line) => line, - Err(e) => { - if !accept_partial { - return Err(Error::ParseError(e)); + .and_then(|line| { + validate_and_qualify_v3_line( + &mut schema, + line_idx, + line, + lp_lines.next().unwrap(), + ) + }) { + Ok((qualified_line, catalog_ops)) => (qualified_line, catalog_ops), + Err(error) => { + if accept_partial { + errors.push(error); } else { - errors.push(e); + return Err(Error::ParseError(error)); } continue; } @@ -105,7 +110,7 @@ impl WriteValidator { catalog_updates.push(op); } - lines.push((line, lp_lines.next().unwrap())); + lines.push(qualified_line); } let catalog_batch = if catalog_updates.is_empty() { @@ -121,18 +126,9 @@ impl WriteValidator { Some(catalog_batch) }; - // if the schema has changed then the Cow will be owned so - // Arc it and pass that forward, otherwise just reuse the - // existing one - let db_schema = match schema { - Cow::Borrowed(_) => Arc::clone(&self.state.db_schema), - Cow::Owned(s) => Arc::new(s), - }; - Ok(WriteValidator { state: LinesParsed { catalog: self.state, - db_schema, lines, catalog_batch, errors, @@ -154,7 +150,7 @@ impl WriteValidator { self, lp: &str, accept_partial: bool, - ) -> Result>>> { + ) -> Result> { let mut errors = vec![]; let mut lp_lines = lp.lines(); let mut lines = vec![]; @@ -162,7 +158,7 @@ impl WriteValidator { let mut schema = Cow::Borrowed(self.state.db_schema.as_ref()); for (line_idx, maybe_line) in parse_lines(lp).enumerate() { - let (line, catalog_op) = match maybe_line + let (qualified_line, catalog_op) = match maybe_line .map_err(|e| WriteLineError { // This unwrap is fine because we're moving line by line // alongside the output from parse_lines @@ -170,9 +166,10 @@ impl WriteValidator { line_number: line_idx + 1, error_message: e.to_string(), }) - .and_then(|l| validate_v1_line(&mut schema, line_idx, l)) - { - Ok(line) => line, + .and_then(|l| { + validate_and_qualify_v1_line(&mut schema, line_idx, l, lp_lines.next().unwrap()) + }) { + Ok((qualified_line, catalog_op)) => (qualified_line, catalog_op), Err(e) => { if !accept_partial { return Err(Error::ParseError(e)); @@ -187,7 +184,7 @@ impl WriteValidator { } // This unwrap is fine because we're moving line by line // alongside the output from parse_lines - lines.push((line, lp_lines.next().unwrap())); + lines.push(qualified_line); } // All lines are parsed and validated, so all steps after this @@ -206,18 +203,9 @@ impl WriteValidator { Some(catalog_batch) }; - // if the schema has changed then the Cow will be owned so - // Arc it and pass that forward, otherwise just reuse the - // existing one - let db_schema = match schema { - Cow::Borrowed(_) => Arc::clone(&self.state.db_schema), - Cow::Owned(s) => Arc::new(s), - }; - Ok(WriteValidator { state: LinesParsed { catalog: self.state, - db_schema, lines, errors, catalog_batch, @@ -226,6 +214,9 @@ impl WriteValidator { } } +/// Type alias for storing new columns added by a write +type ColumnTracker = Vec<(ColumnId, Arc, InfluxColumnType)>; + /// Validate an individual line of v3 line protocol and update the database /// schema /// @@ -235,15 +226,15 @@ impl WriteValidator { /// /// This errors if the write is being performed against a v1 table, i.e., one that does not have /// a series key. -fn validate_v3_line<'a>( +fn validate_and_qualify_v3_line( db_schema: &mut Cow<'_, DatabaseSchema>, line_number: usize, - line: v3::ParsedLine<'a>, + line: v3::ParsedLine, raw_line: &str, -) -> Result<(v3::ParsedLine<'a>, Option), WriteLineError> { +) -> Result<(QualifiedLine, Option), WriteLineError> { let mut catalog_op = None; let table_name = line.series.measurement.as_str(); - if let Some(table_def) = db_schema.table_definition(table_name) { + let qualified = if let Some(table_def) = db_schema.table_definition(table_name) { let table_id = table_def.table_id; if !table_def.is_v3() { return Err(WriteLineError { @@ -253,8 +244,11 @@ fn validate_v3_line<'a>( .to_string(), }); } - let mut columns = Vec::with_capacity(line.column_count() + 1); - match (table_def.schema().series_key(), &line.series.series_key) { + // TODO: may be faster to compare using table def/column IDs than comparing with schema: + match ( + table_def.influx_schema().series_key(), + &line.series.series_key, + ) { (Some(s), Some(l)) => { let l = l.iter().map(|sk| sk.0.as_str()).collect::>(); if s != l { @@ -287,17 +281,38 @@ fn validate_v3_line<'a>( } (None, _) => unreachable!(), } - if let Some(series_key) = &line.series.series_key { - for (sk, _) in series_key.iter() { - if !table_def.column_exists(sk) { - columns.push((sk.to_string(), InfluxColumnType::Tag)); - } + + let mut columns = ColumnTracker::with_capacity(line.column_count() + 1); + + // qualify the series key members: + let tag_set = if let Some(sk) = &line.series.series_key { + let mut ts = Vec::with_capacity(sk.len()); + for (key, val) in sk.iter() { + let col_id = + table_def + .column_name_to_id(key.as_str()) + .ok_or_else(|| WriteLineError { + original_line: raw_line.to_string(), + line_number, + error_message: format!( + "write contained invalid series key column ({key})\ + that does not exist in the catalog table definition" + ), + })?; + ts.push(Field::new(col_id, val)); } - } + Some(ts) + } else { + None + }; + + // qualify the fields: + let mut field_set = Vec::with_capacity(line.field_set.len()); for (field_name, field_val) in line.field_set.iter() { - if let Some(schema_col_type) = table_def.field_type_by_name(field_name) { + if let Some((col_id, col_def)) = table_def.column_def_and_id(field_name.as_str()) { let field_col_type = influx_column_type_from_field_value(field_val); - if field_col_type != schema_col_type { + let existing_col_type = col_def.data_type; + if field_col_type != existing_col_type { let field_name = field_name.to_string(); return Err(WriteLineError { original_line: raw_line.to_string(), @@ -305,19 +320,29 @@ fn validate_v3_line<'a>( error_message: format!( "invalid field value in line protocol for field '{field_name}' on line \ {line_number}: expected type {expected}, but got {got}", - expected = schema_col_type, + expected = existing_col_type, got = field_col_type, ), }); } + field_set.push(Field::new(col_id, field_val)); } else { + let col_id = ColumnId::new(); columns.push(( - field_name.to_string(), + col_id, + Arc::from(field_name.as_str()), influx_column_type_from_field_value(field_val), )); + field_set.push(Field::new(col_id, field_val)); } } + // qualify the timestamp: + let time_col_id = table_def + .column_name_to_id(TIME_COLUMN_NAME) + .unwrap_or_else(ColumnId::new); + let timestamp = (time_col_id, line.timestamp); + // if we have new columns defined, add them to the db_schema table so that subsequent lines // won't try to add the same definitions. Collect these additions into a catalog op, which // will be applied to the catalog with any other ops after all lines in the write request @@ -325,56 +350,81 @@ fn validate_v3_line<'a>( if !columns.is_empty() { let database_name = Arc::clone(&db_schema.name); let database_id = db_schema.id; - let t = db_schema.to_mut().tables.get_mut(&table_id).unwrap(); + let db_schema = db_schema.to_mut(); + let mut new_table_def = db_schema + .tables + .get_mut(&table_id) + .unwrap() + .as_ref() + .clone(); let mut fields = Vec::with_capacity(columns.len()); - for (name, influx_type) in &columns { - fields.push(FieldDefinition { - name: name.as_str().into(), - data_type: FieldDataType::from(influx_type), - }); + for (id, name, influx_type) in columns.iter() { + fields.push(FieldDefinition::new(*id, Arc::clone(name), influx_type)); } catalog_op = Some(CatalogOp::AddFields(FieldAdditions { database_id, database_name, - table_id: t.table_id, - table_name: Arc::clone(&t.table_name), + table_id: new_table_def.table_id, + table_name: Arc::clone(&new_table_def.table_name), field_definitions: fields, })); - t.add_columns(columns).map_err(|e| WriteLineError { - original_line: raw_line.to_string(), - line_number: line_number + 1, - error_message: e.to_string(), - })?; + new_table_def + .add_columns(columns) + .map_err(|e| WriteLineError { + original_line: raw_line.to_string(), + line_number: line_number + 1, + error_message: e.to_string(), + })?; + db_schema.insert_table(table_id, Arc::new(new_table_def)); + } + QualifiedLine { + table_id, + tag_set, + field_set, + timestamp, } } else { let table_id = TableId::new(); let mut columns = Vec::new(); let mut key = Vec::new(); - if let Some(series_key) = &line.series.series_key { - for (sk, _) in series_key.iter() { - key.push(sk.to_string()); - columns.push((sk.to_string(), InfluxColumnType::Tag)); + let tag_set = if let Some(series_key) = &line.series.series_key { + let mut ts = Vec::with_capacity(series_key.len()); + for (sk, sv) in series_key.iter() { + let col_id = ColumnId::new(); + key.push(col_id); + columns.push((col_id, Arc::from(sk.as_str()), InfluxColumnType::Tag)); + ts.push(Field::new(col_id, sv)); } - } + Some(ts) + } else { + None + }; + let mut field_set = Vec::with_capacity(line.field_set.len()); for (field_name, field_val) in line.field_set.iter() { + let col_id = ColumnId::new(); columns.push(( - field_name.to_string(), + col_id, + Arc::from(field_name.as_str()), influx_column_type_from_field_value(field_val), )); + field_set.push(Field::new(col_id, field_val)); } // Always add time last on new table: - columns.push((TIME_COLUMN_NAME.to_string(), InfluxColumnType::Timestamp)); + let time_col_id = ColumnId::new(); + columns.push(( + time_col_id, + Arc::from(TIME_COLUMN_NAME), + InfluxColumnType::Timestamp, + )); + let timestamp = (time_col_id, line.timestamp); let table_name = table_name.into(); let mut fields = Vec::with_capacity(columns.len()); - for (name, influx_type) in &columns { - fields.push(FieldDefinition { - name: name.as_str().into(), - data_type: FieldDataType::from(influx_type), - }); + for (id, name, influx_type) in &columns { + fields.push(FieldDefinition::new(*id, Arc::clone(name), influx_type)); } let table = TableDefinition::new( @@ -399,14 +449,20 @@ fn validate_v3_line<'a>( }); catalog_op = Some(table_definition_op); + let db_schema = db_schema.to_mut(); assert!( - db_schema.to_mut().tables.insert(table_id, table).is_none(), + db_schema.insert_table(table_id, Arc::new(table)).is_none(), "attempted to overwrite existing table" ); - db_schema.to_mut().table_map.insert(table_id, table_name); - } + QualifiedLine { + table_id, + tag_set, + field_set, + timestamp, + } + }; - Ok((line, catalog_op)) + Ok((qualified, catalog_op)) } /// Validate a line of line protocol against the given schema definition @@ -416,14 +472,15 @@ fn validate_v3_line<'a>( /// /// An error will also be produced if the write, which is for the v1 data model, is targetting /// a v3 table. -fn validate_v1_line<'a>( +fn validate_and_qualify_v1_line( db_schema: &mut Cow<'_, DatabaseSchema>, line_number: usize, - line: ParsedLine<'a>, -) -> Result<(ParsedLine<'a>, Option), WriteLineError> { + line: ParsedLine, + _raw_line: &str, +) -> Result<(QualifiedLine, Option), WriteLineError> { let mut catalog_op = None; let table_name = line.series.measurement.as_str(); - if let Some(table_def) = db_schema.table_definition(table_name) { + let qualified = if let Some(table_def) = db_schema.table_definition(table_name) { if table_def.is_v3() { return Err(WriteLineError { original_line: line.to_string(), @@ -433,39 +490,58 @@ fn validate_v1_line<'a>( }); } // This table already exists, so update with any new columns if present: - let mut columns = Vec::with_capacity(line.column_count() + 1); - if let Some(tag_set) = &line.series.tag_set { - for (tag_key, _) in tag_set { - if !table_def.column_exists(tag_key) { - columns.push((tag_key.to_string(), InfluxColumnType::Tag)); + let mut columns = ColumnTracker::with_capacity(line.column_count() + 1); + let tag_set = if let Some(tag_set) = &line.series.tag_set { + let mut ts = Vec::with_capacity(tag_set.len()); + for (tag_key, tag_val) in tag_set { + if let Some(col_id) = table_def.column_name_to_id(tag_key.as_str()) { + ts.push(Field::new(col_id, FieldData::Tag(tag_val.to_string()))); + } else { + let col_id = ColumnId::new(); + columns.push((col_id, Arc::from(tag_key.as_str()), InfluxColumnType::Tag)); + ts.push(Field::new(col_id, FieldData::Tag(tag_val.to_string()))); } } - } + Some(ts) + } else { + None + }; + let mut field_set = Vec::with_capacity(line.field_set.len()); for (field_name, field_val) in line.field_set.iter() { // This field already exists, so check the incoming type matches existing type: - if let Some(schema_col_type) = table_def.field_type_by_name(field_name) { + if let Some((col_id, col_def)) = table_def.column_def_and_id(field_name.as_str()) { let field_col_type = influx_column_type_from_field_value(field_val); - if field_col_type != schema_col_type { + let existing_col_type = col_def.data_type; + if field_col_type != existing_col_type { let field_name = field_name.to_string(); return Err(WriteLineError { original_line: line.to_string(), line_number: line_number + 1, error_message: format!( - "invalid field value in line protocol for field '{field_name}' on line \ - {line_number}: expected type {expected}, but got {got}", - expected = schema_col_type, - got = field_col_type, - ), + "invalid field value in line protocol for field '{field_name}' on line \ + {line_number}: expected type {expected}, but got {got}", + expected = existing_col_type, + got = field_col_type, + ), }); } + field_set.push(Field::new(col_id, field_val)); } else { + let col_id = ColumnId::new(); columns.push(( - field_name.to_string(), + col_id, + Arc::from(field_name.as_str()), influx_column_type_from_field_value(field_val), )); + field_set.push(Field::new(col_id, field_val)); } } + let time_col_id = table_def + .column_name_to_id(TIME_COLUMN_NAME) + .unwrap_or_default(); + let timestamp = (time_col_id, line.timestamp); + // if we have new columns defined, add them to the db_schema table so that subsequent lines // won't try to add the same definitions. Collect these additions into a catalog op, which // will be applied to the catalog with any other ops after all lines in the write request @@ -477,20 +553,26 @@ fn validate_v1_line<'a>( let table_id = table_def.table_id; let mut fields = Vec::with_capacity(columns.len()); - for (name, influx_type) in &columns { - fields.push(FieldDefinition { - name: name.as_str().into(), - data_type: FieldDataType::from(influx_type), - }); + for (id, name, influx_type) in &columns { + fields.push(FieldDefinition::new(*id, Arc::clone(name), influx_type)); } - // unwrap is safe due to the surrounding if let condition: - let t = db_schema.to_mut().tables.get_mut(&table_id).unwrap(); - t.add_columns(columns).map_err(|e| WriteLineError { - original_line: line.to_string(), - line_number: line_number + 1, - error_message: e.to_string(), - })?; + let db_schema = db_schema.to_mut(); + let mut new_table_def = db_schema + .tables + .get_mut(&table_id) + // unwrap is safe due to the surrounding if let condition: + .unwrap() + .as_ref() + .clone(); + new_table_def + .add_columns(columns) + .map_err(|e| WriteLineError { + original_line: line.to_string(), + line_number: line_number + 1, + error_message: e.to_string(), + })?; + db_schema.insert_table(table_id, Arc::new(new_table_def)); catalog_op = Some(CatalogOp::AddFields(FieldAdditions { database_name, @@ -500,32 +582,51 @@ fn validate_v1_line<'a>( field_definitions: fields, })); } + QualifiedLine { + table_id: table_def.table_id, + tag_set, + field_set, + timestamp, + } } else { let table_id = TableId::new(); // This is a new table, so build up its columns: let mut columns = Vec::new(); - if let Some(tag_set) = &line.series.tag_set { - for (tag_key, _) in tag_set { - columns.push((tag_key.to_string(), InfluxColumnType::Tag)); + let tag_set = if let Some(tag_set) = &line.series.tag_set { + let mut ts = Vec::with_capacity(tag_set.len()); + for (tag_key, tag_val) in tag_set { + let col_id = ColumnId::new(); + ts.push(Field::new(col_id, FieldData::Tag(tag_val.to_string()))); + columns.push((col_id, Arc::from(tag_key.as_str()), InfluxColumnType::Tag)); } - } + Some(ts) + } else { + None + }; + let mut field_set = Vec::with_capacity(line.field_set.len()); for (field_name, field_val) in &line.field_set { + let col_id = ColumnId::new(); columns.push(( - field_name.to_string(), + col_id, + Arc::from(field_name.as_str()), influx_column_type_from_field_value(field_val), )); + field_set.push(Field::new(col_id, field_val)); } // Always add time last on new table: - columns.push((TIME_COLUMN_NAME.to_string(), InfluxColumnType::Timestamp)); + let time_col_id = ColumnId::new(); + columns.push(( + time_col_id, + Arc::from(TIME_COLUMN_NAME), + InfluxColumnType::Timestamp, + )); + let timestamp = (time_col_id, line.timestamp); let table_name = table_name.into(); let mut fields = Vec::with_capacity(columns.len()); - for (name, influx_type) in &columns { - fields.push(FieldDefinition { - name: name.as_str().into(), - data_type: FieldDataType::from(influx_type), - }); + for (id, name, influx_type) in &columns { + fields.push(FieldDefinition::new(*id, Arc::clone(name), influx_type)); } catalog_op = Some(CatalogOp::CreateTable(influxdb3_wal::TableDefinition { table_id, @@ -536,22 +637,22 @@ fn validate_v1_line<'a>( key: None, })); - let table = TableDefinition::new( - table_id, - Arc::clone(&table_name), - columns, - Option::>::None, - ) - .unwrap(); + let table = TableDefinition::new(table_id, Arc::clone(&table_name), columns, None).unwrap(); + let db_schema = db_schema.to_mut(); assert!( - db_schema.to_mut().tables.insert(table_id, table).is_none(), + db_schema.insert_table(table_id, Arc::new(table)).is_none(), "attempted to overwrite existing table" ); - db_schema.to_mut().table_map.insert(table_id, table_name); - } + QualifiedLine { + table_id, + tag_set, + field_set, + timestamp, + } + }; - Ok((line, catalog_op)) + Ok((qualified, catalog_op)) } /// Result of conversion from line protocol to valid chunked data @@ -572,7 +673,7 @@ pub(crate) struct ValidatedLines { pub(crate) catalog_updates: Option, } -impl<'lp> WriteValidator>> { +impl WriteValidator { /// Convert a set of valid parsed `v3` lines to a [`ValidatedLines`] which will /// be buffered and written to the WAL, if configured. /// @@ -585,22 +686,16 @@ impl<'lp> WriteValidator>> { gen1_duration: Gen1Duration, precision: Precision, ) -> ValidatedLines { - let mut table_chunks = HashMap::new(); + let mut table_chunks = IndexMap::new(); let line_count = self.state.lines.len(); let mut field_count = 0; - let mut series_key_count = 0; + let mut index_count = 0; - for (line, _raw_line) in self.state.lines.into_iter() { + for line in self.state.lines.into_iter() { field_count += line.field_set.len(); - series_key_count += line - .series - .series_key - .as_ref() - .map(|sk| sk.len()) - .unwrap_or(0); + index_count += line.tag_set.as_ref().map(|tags| tags.len()).unwrap_or(0); - convert_v3_parsed_line( - Arc::clone(&self.state.db_schema), + convert_qualified_line( line, &mut table_chunks, ingest_time, @@ -618,7 +713,7 @@ impl<'lp> WriteValidator>> { ValidatedLines { line_count, field_count, - index_count: series_key_count, + index_count, errors: self.state.errors, valid_data: write_batch, catalog_updates: self.state.catalog_batch, @@ -626,10 +721,9 @@ impl<'lp> WriteValidator>> { } } -fn convert_v3_parsed_line( - db_schema: Arc, - line: v3::ParsedLine<'_>, - table_chunk_map: &mut HashMap, +fn convert_qualified_line( + line: QualifiedLine, + table_chunk_map: &mut IndexMap, ingest_time: Time, gen1_duration: Gen1Duration, precision: Precision, @@ -638,41 +732,27 @@ fn convert_v3_parsed_line( let mut fields = Vec::with_capacity(line.column_count() + 1); // Add series key columns: - if let Some(series_key) = line.series.series_key { - for (sk, sv) in series_key.iter() { - fields.push(Field { - name: sk.to_string().into(), - value: sv.into(), - }); - } + if let Some(tag_set) = line.tag_set { + fields.extend(tag_set); } // Add fields columns: - for (name, val) in line.field_set { - fields.push(Field { - name: name.to_string().into(), - value: val.into(), - }); - } + fields.extend(line.field_set); // Add time column: - // TODO: change the default time resolution to microseconds in v3 let time_value_nanos = line .timestamp + .1 .map(|ts| apply_precision_to_timestamp(precision, ts)) .unwrap_or(ingest_time.timestamp_nanos()); - fields.push(Field { - name: TIME_COLUMN_NAME.to_string().into(), - value: FieldData::Timestamp(time_value_nanos), - }); + fields.push(Field::new( + line.timestamp.0, + FieldData::Timestamp(time_value_nanos), + )); // Add the row into the correct chunk in the table let chunk_time = gen1_duration.chunk_time_for_timestamp(Timestamp::new(time_value_nanos)); - let table_name = line.series.measurement.as_str(); - let table_id = db_schema - .table_name_to_id(table_name) - .expect("table should exist by this point"); - let table_chunks = table_chunk_map.entry(table_id).or_default(); + let table_chunks = table_chunk_map.entry(line.table_id).or_default(); table_chunks.push_row( chunk_time, Row { @@ -682,119 +762,17 @@ fn convert_v3_parsed_line( ); } -impl<'lp> WriteValidator>> { - /// Convert a set of valid parsed lines to a [`ValidatedLines`] which will - /// be buffered and written to the WAL, if configured. - /// - /// This involves splitting out the writes into different batches for each chunk, which will - /// map to the `Gen1Duration`. This function should be infallible, because - /// the schema for incoming writes has been fully validated. - pub(crate) fn convert_lines_to_buffer( - self, - ingest_time: Time, - gen1_duration: Gen1Duration, - precision: Precision, - ) -> ValidatedLines { - let mut table_chunks = HashMap::new(); - let line_count = self.state.lines.len(); - let mut field_count = 0; - let mut tag_count = 0; - - for (line, _raw_line) in self.state.lines.into_iter() { - field_count += line.field_set.len(); - tag_count += line.series.tag_set.as_ref().map(|t| t.len()).unwrap_or(0); - - convert_v1_parsed_line( - Arc::clone(&self.state.db_schema), - line, - &mut table_chunks, - ingest_time, - gen1_duration, - precision, - ); - } - - let write_batch = WriteBatch::new( - self.state.catalog.db_schema.id, - Arc::clone(&self.state.catalog.db_schema.name), - table_chunks, - ); - - ValidatedLines { - line_count, - field_count, - index_count: tag_count, - errors: self.state.errors, - valid_data: write_batch, - catalog_updates: self.state.catalog_batch, - } - } +struct QualifiedLine { + table_id: TableId, + tag_set: Option>, + field_set: Vec, + timestamp: (ColumnId, Option), } -fn convert_v1_parsed_line( - db_schema: Arc, - line: ParsedLine<'_>, - table_chunk_map: &mut HashMap, - ingest_time: Time, - gen1_duration: Gen1Duration, - precision: Precision, -) { - // now that we've ensured all columns exist in the schema, construct the actual row and values - // while validating the column types match. - let mut values = Vec::with_capacity(line.column_count() + 1); - - // validate tags, collecting any new ones that must be inserted, or adding the values - if let Some(tag_set) = line.series.tag_set { - for (tag_key, value) in tag_set { - let value = Field { - name: tag_key.to_string().into(), - value: FieldData::Tag(value.to_string()), - }; - values.push(value); - } +impl QualifiedLine { + fn column_count(&self) -> usize { + self.tag_set.as_ref().map(|ts| ts.len()).unwrap_or(0) + self.field_set.len() + 1 } - - // validate fields, collecting any new ones that must be inserted, or adding values - for (field_name, value) in line.field_set { - let field_data = match value { - FieldValue::I64(v) => FieldData::Integer(v), - FieldValue::F64(v) => FieldData::Float(v), - FieldValue::U64(v) => FieldData::UInteger(v), - FieldValue::Boolean(v) => FieldData::Boolean(v), - FieldValue::String(v) => FieldData::String(v.to_string()), - }; - let value = Field { - name: field_name.to_string().into(), - value: field_data, - }; - values.push(value); - } - - // set the time value - let time_value_nanos = line - .timestamp - .map(|ts| apply_precision_to_timestamp(precision, ts)) - .unwrap_or(ingest_time.timestamp_nanos()); - - let chunk_time = gen1_duration.chunk_time_for_timestamp(Timestamp::new(time_value_nanos)); - - values.push(Field { - name: TIME_COLUMN_NAME.to_string().into(), - value: FieldData::Timestamp(time_value_nanos), - }); - - let table_name: Arc = line.series.measurement.to_string().into(); - let table_id = db_schema - .table_name_to_id(table_name) - .expect("table should exist by this point"); - let table_chunks = table_chunk_map.entry(table_id).or_default(); - table_chunks.push_row( - chunk_time, - Row { - time: time_value_nanos, - fields: values, - }, - ); } fn apply_precision_to_timestamp(precision: Precision, ts: i64) -> i64 {