feat: Enforce table column limits from the schema cache (#5819)
* fix: Avoid some allocations by collecting instead of inserting into a vec * refactor: Encode that adding columns is for one table at a time * test: Add another test of column limits * test: Add below/above limit tests for create_or_get_many * fix: Explicitly DO NOT check column limits when inserting many columns * feat: Cache the max_columns_per_table on the NamespaceSchema * feat: Add a function to validate column limits in-memory * fix: Provide more useful information when over column limits * fix: Swap types to remove intermediate allocation * docs: Explain the interactions of the cache and the column limits * test: Actually set up test that showcases column limit race condition * fix: Allow writing to existing columns even if table is over column limit Co-authored-by: Dom <dom@itsallbroken.com>pull/24376/head
parent
81722dc19b
commit
efb964c390
|
@ -4447,6 +4447,7 @@ dependencies = [
|
||||||
"hashbrown",
|
"hashbrown",
|
||||||
"hyper",
|
"hyper",
|
||||||
"iox_catalog",
|
"iox_catalog",
|
||||||
|
"iox_tests",
|
||||||
"iox_time",
|
"iox_time",
|
||||||
"metric",
|
"metric",
|
||||||
"mutable_batch",
|
"mutable_batch",
|
||||||
|
|
|
@ -25,7 +25,7 @@ use snafu::{ResultExt, Snafu};
|
||||||
use sqlx::postgres::PgHasArrayType;
|
use sqlx::postgres::PgHasArrayType;
|
||||||
use std::{
|
use std::{
|
||||||
borrow::{Borrow, Cow},
|
borrow::{Borrow, Cow},
|
||||||
collections::{BTreeMap, HashMap},
|
collections::{BTreeMap, BTreeSet, HashMap},
|
||||||
convert::TryFrom,
|
convert::TryFrom,
|
||||||
fmt::{Display, Write},
|
fmt::{Display, Write},
|
||||||
mem::{self, size_of_val},
|
mem::{self, size_of_val},
|
||||||
|
@ -464,16 +464,24 @@ pub struct NamespaceSchema {
|
||||||
pub query_pool_id: QueryPoolId,
|
pub query_pool_id: QueryPoolId,
|
||||||
/// the tables in the namespace by name
|
/// the tables in the namespace by name
|
||||||
pub tables: BTreeMap<String, TableSchema>,
|
pub tables: BTreeMap<String, TableSchema>,
|
||||||
|
/// the number of columns per table this namespace allows
|
||||||
|
pub max_columns_per_table: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NamespaceSchema {
|
impl NamespaceSchema {
|
||||||
/// Create a new `NamespaceSchema`
|
/// Create a new `NamespaceSchema`
|
||||||
pub fn new(id: NamespaceId, topic_id: TopicId, query_pool_id: QueryPoolId) -> Self {
|
pub fn new(
|
||||||
|
id: NamespaceId,
|
||||||
|
topic_id: TopicId,
|
||||||
|
query_pool_id: QueryPoolId,
|
||||||
|
max_columns_per_table: i32,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
id,
|
id,
|
||||||
tables: BTreeMap::new(),
|
tables: BTreeMap::new(),
|
||||||
topic_id,
|
topic_id,
|
||||||
query_pool_id,
|
query_pool_id,
|
||||||
|
max_columns_per_table: max_columns_per_table as usize,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -547,6 +555,12 @@ impl TableSchema {
|
||||||
.map(|(name, c)| (c.id, name.as_str()))
|
.map(|(name, c)| (c.id, name.as_str()))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the set of column names for this table. Used in combination with a write operation's
|
||||||
|
/// column names to determine whether a write would exceed the max allowed columns.
|
||||||
|
pub fn column_names(&self) -> BTreeSet<&str> {
|
||||||
|
self.columns.keys().map(|name| name.as_str()).collect()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Data object for a column
|
/// Data object for a column
|
||||||
|
@ -3367,12 +3381,14 @@ mod tests {
|
||||||
topic_id: TopicId::new(2),
|
topic_id: TopicId::new(2),
|
||||||
query_pool_id: QueryPoolId::new(3),
|
query_pool_id: QueryPoolId::new(3),
|
||||||
tables: BTreeMap::from([]),
|
tables: BTreeMap::from([]),
|
||||||
|
max_columns_per_table: 4,
|
||||||
};
|
};
|
||||||
let schema2 = NamespaceSchema {
|
let schema2 = NamespaceSchema {
|
||||||
id: NamespaceId::new(1),
|
id: NamespaceId::new(1),
|
||||||
topic_id: TopicId::new(2),
|
topic_id: TopicId::new(2),
|
||||||
query_pool_id: QueryPoolId::new(3),
|
query_pool_id: QueryPoolId::new(3),
|
||||||
tables: BTreeMap::from([(String::from("foo"), TableSchema::new(TableId::new(1)))]),
|
tables: BTreeMap::from([(String::from("foo"), TableSchema::new(TableId::new(1)))]),
|
||||||
|
max_columns_per_table: 4,
|
||||||
};
|
};
|
||||||
assert!(schema1.size() < schema2.size());
|
assert!(schema1.size() < schema2.size());
|
||||||
}
|
}
|
||||||
|
|
|
@ -220,7 +220,6 @@ where
|
||||||
// column doesn't exist; add it
|
// column doesn't exist; add it
|
||||||
column_batch.push(ColumnUpsertRequest {
|
column_batch.push(ColumnUpsertRequest {
|
||||||
name: tag.name.as_str(),
|
name: tag.name.as_str(),
|
||||||
table_id: table.id,
|
|
||||||
column_type: ColumnType::Tag,
|
column_type: ColumnType::Tag,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -257,7 +256,6 @@ where
|
||||||
// column doesn't exist; add it
|
// column doesn't exist; add it
|
||||||
column_batch.push(ColumnUpsertRequest {
|
column_batch.push(ColumnUpsertRequest {
|
||||||
name: field.name.as_str(),
|
name: field.name.as_str(),
|
||||||
table_id: table.id,
|
|
||||||
column_type: ColumnType::from(influx_column_type),
|
column_type: ColumnType::from(influx_column_type),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -270,7 +268,10 @@ where
|
||||||
// that with short-lived loop variables.
|
// that with short-lived loop variables.
|
||||||
// since this is a CLI tool rather than something called a lot on the write path, i
|
// since this is a CLI tool rather than something called a lot on the write path, i
|
||||||
// figure it's okay.
|
// figure it's okay.
|
||||||
repos.columns().create_or_get_many(&column_batch).await?;
|
repos
|
||||||
|
.columns()
|
||||||
|
.create_or_get_many_unchecked(table.id, &column_batch)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
// create a partition for every day in the date range.
|
// create a partition for every day in the date range.
|
||||||
// N.B. this will need updating if we someday support partitioning by inputs other than
|
// N.B. this will need updating if we someday support partitioning by inputs other than
|
||||||
|
|
|
@ -599,7 +599,7 @@ mod tests {
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
));
|
));
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id);
|
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id, 100);
|
||||||
|
|
||||||
let ignored_ts = Time::from_timestamp_millis(42);
|
let ignored_ts = Time::from_timestamp_millis(42);
|
||||||
|
|
||||||
|
@ -681,7 +681,7 @@ mod tests {
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
));
|
));
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id);
|
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id, 100);
|
||||||
|
|
||||||
let w1 = DmlWrite::new(
|
let w1 = DmlWrite::new(
|
||||||
"foo",
|
"foo",
|
||||||
|
@ -788,7 +788,7 @@ mod tests {
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
));
|
));
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id);
|
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id, 100);
|
||||||
|
|
||||||
let ignored_ts = Time::from_timestamp_millis(42);
|
let ignored_ts = Time::from_timestamp_millis(42);
|
||||||
|
|
||||||
|
@ -1058,7 +1058,7 @@ mod tests {
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
));
|
));
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id);
|
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id, 100);
|
||||||
|
|
||||||
let ignored_ts = Time::from_timestamp_millis(42);
|
let ignored_ts = Time::from_timestamp_millis(42);
|
||||||
|
|
||||||
|
@ -1174,7 +1174,7 @@ mod tests {
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id);
|
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id, 100);
|
||||||
|
|
||||||
let ignored_ts = Time::from_timestamp_millis(42);
|
let ignored_ts = Time::from_timestamp_millis(42);
|
||||||
|
|
||||||
|
@ -1357,7 +1357,7 @@ mod tests {
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
));
|
));
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id);
|
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id, 100);
|
||||||
|
|
||||||
let ignored_ts = Time::from_timestamp_millis(42);
|
let ignored_ts = Time::from_timestamp_millis(42);
|
||||||
|
|
||||||
|
|
|
@ -456,6 +456,7 @@ mod tests {
|
||||||
ingester.namespace.id,
|
ingester.namespace.id,
|
||||||
ingester.topic.id,
|
ingester.topic.id,
|
||||||
ingester.query_pool.id,
|
ingester.query_pool.id,
|
||||||
|
100,
|
||||||
);
|
);
|
||||||
let mut txn = ingester.catalog.start_transaction().await.unwrap();
|
let mut txn = ingester.catalog.start_transaction().await.unwrap();
|
||||||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
let ingest_ts1 = Time::from_timestamp_millis(42);
|
||||||
|
@ -708,7 +709,12 @@ mod tests {
|
||||||
let write_buffer_state =
|
let write_buffer_state =
|
||||||
MockBufferSharedState::empty_with_n_shards(NonZeroU32::try_from(1).unwrap());
|
MockBufferSharedState::empty_with_n_shards(NonZeroU32::try_from(1).unwrap());
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id);
|
let schema = NamespaceSchema::new(
|
||||||
|
namespace.id,
|
||||||
|
topic.id,
|
||||||
|
query_pool.id,
|
||||||
|
namespace.max_columns_per_table,
|
||||||
|
);
|
||||||
for write_operation in write_operations {
|
for write_operation in write_operations {
|
||||||
validate_or_insert_schema(write_operation.tables(), &schema, txn.deref_mut())
|
validate_or_insert_schema(write_operation.tables(), &schema, txn.deref_mut())
|
||||||
.await
|
.await
|
||||||
|
|
|
@ -332,13 +332,11 @@ pub trait TableRepo: Send + Sync {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parameters necessary to perform a batch insert of
|
/// Parameters necessary to perform a batch insert of
|
||||||
/// [`ColumnRepo::create_or_get()`].
|
/// [`ColumnRepo::create_or_get()`] for one table (specified separately)
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ColumnUpsertRequest<'a> {
|
pub struct ColumnUpsertRequest<'a> {
|
||||||
/// The name of the column.
|
/// The name of the column.
|
||||||
pub name: &'a str,
|
pub name: &'a str,
|
||||||
/// The table ID to which it belongs.
|
|
||||||
pub table_id: TableId,
|
|
||||||
/// The data type of the column.
|
/// The data type of the column.
|
||||||
pub column_type: ColumnType,
|
pub column_type: ColumnType,
|
||||||
}
|
}
|
||||||
|
@ -361,8 +359,13 @@ pub trait ColumnRepo: Send + Sync {
|
||||||
/// Implementations make no guarantees as to the ordering or atomicity of
|
/// Implementations make no guarantees as to the ordering or atomicity of
|
||||||
/// the batch of column upsert operations - a batch upsert may partially
|
/// the batch of column upsert operations - a batch upsert may partially
|
||||||
/// commit, in which case an error MUST be returned by the implementation.
|
/// commit, in which case an error MUST be returned by the implementation.
|
||||||
async fn create_or_get_many(
|
///
|
||||||
|
/// Per-namespace limits on the number of columns allowed per table are explicitly NOT checked
|
||||||
|
/// by this function, hence the name containing `unchecked`. It is expected that the caller
|
||||||
|
/// will check this first-- and yes, this is racy.
|
||||||
|
async fn create_or_get_many_unchecked(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
table_id: TableId,
|
||||||
columns: &[ColumnUpsertRequest<'_>],
|
columns: &[ColumnUpsertRequest<'_>],
|
||||||
) -> Result<Vec<Column>>;
|
) -> Result<Vec<Column>>;
|
||||||
|
|
||||||
|
@ -714,8 +717,12 @@ where
|
||||||
let columns = repos.columns().list_by_namespace_id(namespace.id).await?;
|
let columns = repos.columns().list_by_namespace_id(namespace.id).await?;
|
||||||
let tables = repos.tables().list_by_namespace_id(namespace.id).await?;
|
let tables = repos.tables().list_by_namespace_id(namespace.id).await?;
|
||||||
|
|
||||||
let mut namespace =
|
let mut namespace = NamespaceSchema::new(
|
||||||
NamespaceSchema::new(namespace.id, namespace.topic_id, namespace.query_pool_id);
|
namespace.id,
|
||||||
|
namespace.topic_id,
|
||||||
|
namespace.query_pool_id,
|
||||||
|
namespace.max_columns_per_table,
|
||||||
|
);
|
||||||
|
|
||||||
let mut table_id_to_schema = BTreeMap::new();
|
let mut table_id_to_schema = BTreeMap::new();
|
||||||
for t in tables {
|
for t in tables {
|
||||||
|
@ -846,7 +853,8 @@ pub async fn list_schemas(
|
||||||
// was created, or have no tables/columns (and therefore have no entry
|
// was created, or have no tables/columns (and therefore have no entry
|
||||||
// in "joined").
|
// in "joined").
|
||||||
.filter_map(move |v| {
|
.filter_map(move |v| {
|
||||||
let mut ns = NamespaceSchema::new(v.id, v.topic_id, v.query_pool_id);
|
let mut ns =
|
||||||
|
NamespaceSchema::new(v.id, v.topic_id, v.query_pool_id, v.max_columns_per_table);
|
||||||
ns.tables = joined.remove(&v.id)?;
|
ns.tables = joined.remove(&v.id)?;
|
||||||
Some((v, ns))
|
Some((v, ns))
|
||||||
});
|
});
|
||||||
|
@ -1179,14 +1187,7 @@ pub(crate) mod test_helpers {
|
||||||
.create_or_get("column_test", table.id, ColumnType::U64)
|
.create_or_get("column_test", table.id, ColumnType::U64)
|
||||||
.await
|
.await
|
||||||
.expect_err("should error with wrong column type");
|
.expect_err("should error with wrong column type");
|
||||||
assert!(matches!(
|
assert!(matches!(err, Error::ColumnTypeMismatch { .. }));
|
||||||
err,
|
|
||||||
Error::ColumnTypeMismatch {
|
|
||||||
name: _,
|
|
||||||
existing: _,
|
|
||||||
new: _
|
|
||||||
}
|
|
||||||
));
|
|
||||||
|
|
||||||
// test that we can create a column of the same name under a different table
|
// test that we can create a column of the same name under a different table
|
||||||
let table2 = repos
|
let table2 = repos
|
||||||
|
@ -1201,23 +1202,6 @@ pub(crate) mod test_helpers {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_ne!(c, ccc);
|
assert_ne!(c, ccc);
|
||||||
|
|
||||||
let cols3 = repos
|
|
||||||
.columns()
|
|
||||||
.create_or_get_many(&[
|
|
||||||
ColumnUpsertRequest {
|
|
||||||
name: "a",
|
|
||||||
table_id: table2.id,
|
|
||||||
column_type: ColumnType::U64,
|
|
||||||
},
|
|
||||||
ColumnUpsertRequest {
|
|
||||||
name: "a",
|
|
||||||
table_id: table.id,
|
|
||||||
column_type: ColumnType::U64,
|
|
||||||
},
|
|
||||||
])
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let columns = repos
|
let columns = repos
|
||||||
.columns()
|
.columns()
|
||||||
.list_by_namespace_id(namespace.id)
|
.list_by_namespace_id(namespace.id)
|
||||||
|
@ -1225,12 +1209,11 @@ pub(crate) mod test_helpers {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let mut want = vec![c.clone(), ccc];
|
let mut want = vec![c.clone(), ccc];
|
||||||
want.extend(cols3.clone());
|
|
||||||
assert_eq!(want, columns);
|
assert_eq!(want, columns);
|
||||||
|
|
||||||
let columns = repos.columns().list_by_table_id(table.id).await.unwrap();
|
let columns = repos.columns().list_by_table_id(table.id).await.unwrap();
|
||||||
|
|
||||||
let want2 = vec![c, cols3[1].clone()];
|
let want2 = vec![c];
|
||||||
assert_eq!(want2, columns);
|
assert_eq!(want2, columns);
|
||||||
|
|
||||||
// Add another tag column into table2
|
// Add another tag column into table2
|
||||||
|
@ -1252,7 +1235,7 @@ pub(crate) mod test_helpers {
|
||||||
},
|
},
|
||||||
ColumnTypeCount {
|
ColumnTypeCount {
|
||||||
col_type: ColumnType::U64,
|
col_type: ColumnType::U64,
|
||||||
count: 2,
|
count: 1,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
expect.sort_by_key(|c| c.col_type);
|
expect.sort_by_key(|c| c.col_type);
|
||||||
|
@ -1264,6 +1247,28 @@ pub(crate) mod test_helpers {
|
||||||
want.extend([c3]);
|
want.extend([c3]);
|
||||||
assert_eq!(list, want);
|
assert_eq!(list, want);
|
||||||
|
|
||||||
|
// test create_or_get_many_unchecked, below column limit
|
||||||
|
let table1_columns = repos
|
||||||
|
.columns()
|
||||||
|
.create_or_get_many_unchecked(
|
||||||
|
table.id,
|
||||||
|
&[
|
||||||
|
ColumnUpsertRequest {
|
||||||
|
name: "column_test",
|
||||||
|
column_type: ColumnType::Tag,
|
||||||
|
},
|
||||||
|
ColumnUpsertRequest {
|
||||||
|
name: "new_column",
|
||||||
|
column_type: ColumnType::Tag,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let mut table1_column_names: Vec<_> = table1_columns.iter().map(|c| &c.name).collect();
|
||||||
|
table1_column_names.sort();
|
||||||
|
assert_eq!(table1_column_names, vec!["column_test", "new_column"]);
|
||||||
|
|
||||||
// test per-namespace column limits
|
// test per-namespace column limits
|
||||||
repos
|
repos
|
||||||
.namespaces()
|
.namespaces()
|
||||||
|
@ -1282,6 +1287,33 @@ pub(crate) mod test_helpers {
|
||||||
table_id: _,
|
table_id: _,
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// test per-namespace column limits are NOT enforced with create_or_get_many_unchecked
|
||||||
|
let table3 = repos
|
||||||
|
.tables()
|
||||||
|
.create_or_get("test_table_3", namespace.id)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let table3_columns = repos
|
||||||
|
.columns()
|
||||||
|
.create_or_get_many_unchecked(
|
||||||
|
table3.id,
|
||||||
|
&[
|
||||||
|
ColumnUpsertRequest {
|
||||||
|
name: "apples",
|
||||||
|
column_type: ColumnType::Tag,
|
||||||
|
},
|
||||||
|
ColumnUpsertRequest {
|
||||||
|
name: "oranges",
|
||||||
|
column_type: ColumnType::Tag,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let mut table3_column_names: Vec<_> = table3_columns.iter().map(|c| &c.name).collect();
|
||||||
|
table3_column_names.sort();
|
||||||
|
assert_eq!(table3_column_names, vec!["apples", "oranges"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn test_shards(catalog: Arc<dyn Catalog>) {
|
async fn test_shards(catalog: Arc<dyn Catalog>) {
|
||||||
|
@ -3989,7 +4021,7 @@ pub(crate) mod test_helpers {
|
||||||
|
|
||||||
let batches = mutable_batch_lp::lines_to_batches(lines, 42).unwrap();
|
let batches = mutable_batch_lp::lines_to_batches(lines, 42).unwrap();
|
||||||
let batches = batches.iter().map(|(table, batch)| (table.as_str(), batch));
|
let batches = batches.iter().map(|(table, batch)| (table.as_str(), batch));
|
||||||
let ns = NamespaceSchema::new(namespace.id, topic.id, pool.id);
|
let ns = NamespaceSchema::new(namespace.id, topic.id, pool.id, 1000);
|
||||||
|
|
||||||
let schema = validate_or_insert_schema(batches, &ns, repos)
|
let schema = validate_or_insert_schema(batches, &ns, repos)
|
||||||
.await
|
.await
|
||||||
|
|
|
@ -144,43 +144,47 @@ where
|
||||||
// If the table itself needs to be updated during column validation it
|
// If the table itself needs to be updated during column validation it
|
||||||
// becomes a Cow::owned() copy and the modified copy should be inserted into
|
// becomes a Cow::owned() copy and the modified copy should be inserted into
|
||||||
// the schema before returning.
|
// the schema before returning.
|
||||||
let mut column_batch = Vec::default();
|
let column_batch: Vec<_> = mb
|
||||||
for (name, col) in mb.columns() {
|
.columns()
|
||||||
// Check if the column exists in the cached schema.
|
.filter_map(|(name, col)| {
|
||||||
//
|
// Check if the column exists in the cached schema.
|
||||||
// If it does, validate it. If it does not exist, create it and insert
|
//
|
||||||
// it into the cached schema.
|
// If it does, validate it. If it does not exist, create it and insert
|
||||||
match table.columns.get(name.as_str()) {
|
// it into the cached schema.
|
||||||
Some(existing) if existing.matches_type(col.influx_type()) => {
|
match table.columns.get(name.as_str()) {
|
||||||
// No action is needed as the column matches the existing column
|
Some(existing) if existing.matches_type(col.influx_type()) => {
|
||||||
// schema.
|
// No action is needed as the column matches the existing column
|
||||||
}
|
// schema.
|
||||||
Some(existing) => {
|
None
|
||||||
// The column schema, and the column in the mutable batch are of
|
}
|
||||||
// different types.
|
Some(existing) => {
|
||||||
return ColumnTypeMismatchSnafu {
|
// The column schema, and the column in the mutable batch are of
|
||||||
name,
|
// different types.
|
||||||
existing: existing.column_type,
|
Some(
|
||||||
new: col.influx_type(),
|
ColumnTypeMismatchSnafu {
|
||||||
|
name,
|
||||||
|
existing: existing.column_type,
|
||||||
|
new: col.influx_type(),
|
||||||
|
}
|
||||||
|
.fail(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// The column does not exist in the cache, add it to the column
|
||||||
|
// batch to be bulk inserted later.
|
||||||
|
Some(Ok(ColumnUpsertRequest {
|
||||||
|
name: name.as_str(),
|
||||||
|
column_type: ColumnType::from(col.influx_type()),
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
.fail();
|
|
||||||
}
|
}
|
||||||
None => {
|
})
|
||||||
// The column does not exist in the cache, add it to the column
|
.collect::<Result<Vec<_>>>()?;
|
||||||
// batch to be bulk inserted later.
|
|
||||||
column_batch.push(ColumnUpsertRequest {
|
|
||||||
name: name.as_str(),
|
|
||||||
table_id: table.id,
|
|
||||||
column_type: ColumnType::from(col.influx_type()),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
if !column_batch.is_empty() {
|
if !column_batch.is_empty() {
|
||||||
repos
|
repos
|
||||||
.columns()
|
.columns()
|
||||||
.create_or_get_many(&column_batch)
|
.create_or_get_many_unchecked(table.id, &column_batch)
|
||||||
.await?
|
.await?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.for_each(|c| table.to_mut().add_column(&c));
|
.for_each(|c| table.to_mut().add_column(&c));
|
||||||
|
@ -270,6 +274,7 @@ mod tests {
|
||||||
namespace.id,
|
namespace.id,
|
||||||
namespace.topic_id,
|
namespace.topic_id,
|
||||||
namespace.query_pool_id,
|
namespace.query_pool_id,
|
||||||
|
namespace.max_columns_per_table,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Apply all the lp literals as individual writes, feeding
|
// Apply all the lp literals as individual writes, feeding
|
||||||
|
|
|
@ -522,17 +522,51 @@ impl ColumnRepo for MemTxn {
|
||||||
|
|
||||||
Ok(column.clone())
|
Ok(column.clone())
|
||||||
}
|
}
|
||||||
async fn create_or_get_many(
|
|
||||||
|
async fn create_or_get_many_unchecked(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
table_id: TableId,
|
||||||
columns: &[ColumnUpsertRequest<'_>],
|
columns: &[ColumnUpsertRequest<'_>],
|
||||||
) -> Result<Vec<Column>> {
|
) -> Result<Vec<Column>> {
|
||||||
let mut out = Vec::new();
|
// Explicitly NOT using `create_or_get` in this function: the Postgres catalog doesn't
|
||||||
for column in columns {
|
// check column limits when inserting many columns because it's complicated and expensive,
|
||||||
out.push(
|
// and for testing purposes the in-memory catalog needs to match its functionality.
|
||||||
ColumnRepo::create_or_get(self, column.name, column.table_id, column.column_type)
|
|
||||||
.await?,
|
let stage = self.stage();
|
||||||
);
|
|
||||||
}
|
let out: Vec<_> = columns
|
||||||
|
.iter()
|
||||||
|
.map(|column| {
|
||||||
|
match stage
|
||||||
|
.columns
|
||||||
|
.iter()
|
||||||
|
.find(|t| t.name == column.name && t.table_id == table_id)
|
||||||
|
{
|
||||||
|
Some(c) => {
|
||||||
|
ensure!(
|
||||||
|
column.column_type == c.column_type,
|
||||||
|
ColumnTypeMismatchSnafu {
|
||||||
|
name: column.name,
|
||||||
|
existing: c.column_type,
|
||||||
|
new: column.column_type
|
||||||
|
}
|
||||||
|
);
|
||||||
|
Ok(c.clone())
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let new_column = Column {
|
||||||
|
id: ColumnId::new(stage.columns.len() as i64 + 1),
|
||||||
|
table_id,
|
||||||
|
name: column.name.to_string(),
|
||||||
|
column_type: column.column_type,
|
||||||
|
};
|
||||||
|
stage.columns.push(new_column);
|
||||||
|
Ok(stage.columns.last().unwrap().clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<Column>>>()?;
|
||||||
|
|
||||||
Ok(out)
|
Ok(out)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -219,7 +219,7 @@ decorate!(
|
||||||
"column_create_or_get" = create_or_get(&mut self, name: &str, table_id: TableId, column_type: ColumnType) -> Result<Column>;
|
"column_create_or_get" = create_or_get(&mut self, name: &str, table_id: TableId, column_type: ColumnType) -> Result<Column>;
|
||||||
"column_list_by_namespace_id" = list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Column>>;
|
"column_list_by_namespace_id" = list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Column>>;
|
||||||
"column_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Column>>;
|
"column_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Column>>;
|
||||||
"column_create_or_get_many" = create_or_get_many(&mut self, columns: &[ColumnUpsertRequest<'_>]) -> Result<Vec<Column>>;
|
"column_create_or_get_many_unchecked" = create_or_get_many_unchecked(&mut self, table_id: TableId, columns: &[ColumnUpsertRequest<'_>]) -> Result<Vec<Column>>;
|
||||||
"column_list" = list(&mut self) -> Result<Vec<Column>>;
|
"column_list" = list(&mut self) -> Result<Vec<Column>>;
|
||||||
"column_list_type_count_by_table_id" = list_type_count_by_table_id(&mut self, table_id: TableId) -> Result<Vec<ColumnTypeCount>>;
|
"column_list_type_count_by_table_id" = list_type_count_by_table_id(&mut self, table_id: TableId) -> Result<Vec<ColumnTypeCount>>;
|
||||||
]
|
]
|
||||||
|
|
|
@ -939,31 +939,30 @@ WHERE table_id = $1;
|
||||||
Ok(rec)
|
Ok(rec)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_or_get_many(
|
async fn create_or_get_many_unchecked(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
table_id: TableId,
|
||||||
columns: &[ColumnUpsertRequest<'_>],
|
columns: &[ColumnUpsertRequest<'_>],
|
||||||
) -> Result<Vec<Column>> {
|
) -> Result<Vec<Column>> {
|
||||||
let mut v_name = Vec::new();
|
let mut v_name = Vec::new();
|
||||||
let mut v_table_id = Vec::new();
|
|
||||||
let mut v_column_type = Vec::new();
|
let mut v_column_type = Vec::new();
|
||||||
for c in columns {
|
for c in columns {
|
||||||
v_name.push(c.name.to_string());
|
v_name.push(c.name.to_string());
|
||||||
v_table_id.push(c.table_id.get());
|
|
||||||
v_column_type.push(c.column_type as i16);
|
v_column_type.push(c.column_type as i16);
|
||||||
}
|
}
|
||||||
|
|
||||||
let out = sqlx::query_as::<_, Column>(
|
let out = sqlx::query_as::<_, Column>(
|
||||||
r#"
|
r#"
|
||||||
INSERT INTO column_name ( name, table_id, column_type )
|
INSERT INTO column_name ( name, table_id, column_type )
|
||||||
SELECT name, table_id, column_type FROM UNNEST($1, $2, $3) as a(name, table_id, column_type)
|
SELECT name, $1, column_type FROM UNNEST($2, $3) as a(name, column_type)
|
||||||
ON CONFLICT ON CONSTRAINT column_name_unique
|
ON CONFLICT ON CONSTRAINT column_name_unique
|
||||||
DO UPDATE SET name = column_name.name
|
DO UPDATE SET name = column_name.name
|
||||||
RETURNING *;
|
RETURNING *;
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(&v_name)
|
.bind(&table_id) // $1
|
||||||
.bind(&v_table_id)
|
.bind(&v_name) // $2
|
||||||
.bind(&v_column_type)
|
.bind(&v_column_type) // $3
|
||||||
.fetch_all(&mut self.inner)
|
.fetch_all(&mut self.inner)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
|
@ -2622,7 +2621,7 @@ mod tests {
|
||||||
assert_eq!(application_name, TEST_APPLICATION_NAME_NEW);
|
assert_eq!(application_name, TEST_APPLICATION_NAME_NEW);
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! test_column_create_or_get_many {
|
macro_rules! test_column_create_or_get_many_unchecked {
|
||||||
(
|
(
|
||||||
$name:ident,
|
$name:ident,
|
||||||
calls = {$([$($col_name:literal => $col_type:expr),+ $(,)?]),+},
|
calls = {$([$($col_name:literal => $col_type:expr),+ $(,)?]),+},
|
||||||
|
@ -2630,7 +2629,7 @@ mod tests {
|
||||||
) => {
|
) => {
|
||||||
paste::paste! {
|
paste::paste! {
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn [<test_column_create_or_get_many_ $name>]() {
|
async fn [<test_column_create_or_get_many_unchecked_ $name>]() {
|
||||||
// If running an integration test on your laptop, this requires that you have
|
// If running an integration test on your laptop, this requires that you have
|
||||||
// Postgres running and that you've done the sqlx migrations. See the README in
|
// Postgres running and that you've done the sqlx migrations. See the README in
|
||||||
// this crate for info to set it up.
|
// this crate for info to set it up.
|
||||||
|
@ -2668,7 +2667,6 @@ mod tests {
|
||||||
$(
|
$(
|
||||||
ColumnUpsertRequest {
|
ColumnUpsertRequest {
|
||||||
name: $col_name,
|
name: $col_name,
|
||||||
table_id,
|
|
||||||
column_type: $col_type,
|
column_type: $col_type,
|
||||||
},
|
},
|
||||||
)+
|
)+
|
||||||
|
@ -2677,7 +2675,7 @@ mod tests {
|
||||||
.repositories()
|
.repositories()
|
||||||
.await
|
.await
|
||||||
.columns()
|
.columns()
|
||||||
.create_or_get_many(&insert)
|
.create_or_get_many_unchecked(table_id, &insert)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
// The returned columns MUST always match the requested
|
// The returned columns MUST always match the requested
|
||||||
|
@ -2686,13 +2684,13 @@ mod tests {
|
||||||
assert_eq!(insert.len(), got.len());
|
assert_eq!(insert.len(), got.len());
|
||||||
insert.iter().zip(got).for_each(|(req, got)| {
|
insert.iter().zip(got).for_each(|(req, got)| {
|
||||||
assert_eq!(req.name, got.name);
|
assert_eq!(req.name, got.name);
|
||||||
assert_eq!(req.table_id, got.table_id);
|
assert_eq!(table_id, got.table_id);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
req.column_type,
|
req.column_type,
|
||||||
ColumnType::try_from(got.column_type).expect("invalid column type")
|
ColumnType::try_from(got.column_type).expect("invalid column type")
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
assert_metric_hit(&metrics, "column_create_or_get_many");
|
assert_metric_hit(&metrics, "column_create_or_get_many_unchecked");
|
||||||
}
|
}
|
||||||
)+
|
)+
|
||||||
|
|
||||||
|
@ -2704,7 +2702,7 @@ mod tests {
|
||||||
|
|
||||||
// Issue a few calls to create_or_get_many that contain distinct columns and
|
// Issue a few calls to create_or_get_many that contain distinct columns and
|
||||||
// covers the full set of column types.
|
// covers the full set of column types.
|
||||||
test_column_create_or_get_many!(
|
test_column_create_or_get_many_unchecked!(
|
||||||
insert,
|
insert,
|
||||||
calls = {
|
calls = {
|
||||||
[
|
[
|
||||||
|
@ -2726,7 +2724,7 @@ mod tests {
|
||||||
|
|
||||||
// Issue two calls with overlapping columns - request should succeed (upsert
|
// Issue two calls with overlapping columns - request should succeed (upsert
|
||||||
// semantics).
|
// semantics).
|
||||||
test_column_create_or_get_many!(
|
test_column_create_or_get_many_unchecked!(
|
||||||
partial_upsert,
|
partial_upsert,
|
||||||
calls = {
|
calls = {
|
||||||
[
|
[
|
||||||
|
@ -2750,7 +2748,7 @@ mod tests {
|
||||||
);
|
);
|
||||||
|
|
||||||
// Issue two calls with the same columns and types.
|
// Issue two calls with the same columns and types.
|
||||||
test_column_create_or_get_many!(
|
test_column_create_or_get_many_unchecked!(
|
||||||
full_upsert,
|
full_upsert,
|
||||||
calls = {
|
calls = {
|
||||||
[
|
[
|
||||||
|
@ -2771,7 +2769,7 @@ mod tests {
|
||||||
|
|
||||||
// Issue two calls with overlapping columns with conflicting types and
|
// Issue two calls with overlapping columns with conflicting types and
|
||||||
// observe a correctly populated ColumnTypeMismatch error.
|
// observe a correctly populated ColumnTypeMismatch error.
|
||||||
test_column_create_or_get_many!(
|
test_column_create_or_get_many_unchecked!(
|
||||||
partial_type_conflict,
|
partial_type_conflict,
|
||||||
calls = {
|
calls = {
|
||||||
[
|
[
|
||||||
|
@ -2802,7 +2800,7 @@ mod tests {
|
||||||
|
|
||||||
// Issue one call containing a column specified twice, with differing types
|
// Issue one call containing a column specified twice, with differing types
|
||||||
// and observe an error different from the above test case.
|
// and observe an error different from the above test case.
|
||||||
test_column_create_or_get_many!(
|
test_column_create_or_get_many_unchecked!(
|
||||||
intra_request_type_conflict,
|
intra_request_type_conflict,
|
||||||
calls = {
|
calls = {
|
||||||
[
|
[
|
||||||
|
|
|
@ -265,6 +265,16 @@ impl TestNamespace {
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the number of columns per table allowed in this namespace.
|
||||||
|
pub async fn update_column_limit(&self, new_max: i32) {
|
||||||
|
let mut repos = self.catalog.catalog.repositories().await;
|
||||||
|
repos
|
||||||
|
.namespaces()
|
||||||
|
.update_column_limit(&self.namespace.name, new_max)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A test shard with its namespace in the catalog
|
/// A test shard with its namespace in the catalog
|
||||||
|
|
|
@ -25,7 +25,7 @@ use iox_time::Time;
|
||||||
use schema::selection::Selection;
|
use schema::selection::Selection;
|
||||||
use schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME};
|
use schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME};
|
||||||
use snafu::{OptionExt, ResultExt, Snafu};
|
use snafu::{OptionExt, ResultExt, Snafu};
|
||||||
use std::ops::Range;
|
use std::{collections::BTreeSet, ops::Range};
|
||||||
|
|
||||||
pub mod column;
|
pub mod column;
|
||||||
pub mod payload;
|
pub mod payload;
|
||||||
|
@ -137,6 +137,12 @@ impl MutableBatch {
|
||||||
.map(move |(name, idx)| (name, &self.columns[*idx]))
|
.map(move |(name, idx)| (name, &self.columns[*idx]))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the set of column names for this table. Used in combination with a write operation's
|
||||||
|
/// column names to determine whether a write would exceed the max allowed columns.
|
||||||
|
pub fn column_names(&self) -> BTreeSet<&str> {
|
||||||
|
self.column_names.keys().map(|name| name.as_str()).collect()
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the number of rows in this chunk
|
/// Return the number of rows in this chunk
|
||||||
pub fn rows(&self) -> usize {
|
pub fn rows(&self) -> usize {
|
||||||
self.row_count
|
self.row_count
|
||||||
|
|
|
@ -43,6 +43,7 @@ write_summary = { path = "../write_summary" }
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_matches = "1.5"
|
assert_matches = "1.5"
|
||||||
criterion = { version = "0.4", default-features = false, features = ["async_tokio", "rayon"]}
|
criterion = { version = "0.4", default-features = false, features = ["async_tokio", "rayon"]}
|
||||||
|
iox_tests = { path = "../iox_tests" }
|
||||||
once_cell = "1"
|
once_cell = "1"
|
||||||
paste = "1.0.9"
|
paste = "1.0.9"
|
||||||
pretty_assertions = "1.3.0"
|
pretty_assertions = "1.3.0"
|
||||||
|
|
|
@ -149,6 +149,7 @@ mod tests {
|
||||||
topic_id: TopicId::new(2),
|
topic_id: TopicId::new(2),
|
||||||
query_pool_id: QueryPoolId::new(3),
|
query_pool_id: QueryPoolId::new(3),
|
||||||
tables: Default::default(),
|
tables: Default::default(),
|
||||||
|
max_columns_per_table: 4,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use super::DmlHandler;
|
use super::DmlHandler;
|
||||||
use crate::namespace_cache::{metrics::InstrumentedCache, MemoryNamespaceCache, NamespaceCache};
|
use crate::namespace_cache::{metrics::InstrumentedCache, MemoryNamespaceCache, NamespaceCache};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use data_types::{DatabaseName, DeletePredicate};
|
use data_types::{DatabaseName, DeletePredicate, NamespaceSchema};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use iox_catalog::{
|
use iox_catalog::{
|
||||||
interface::{get_schema_by_name, Catalog, Error as CatalogError},
|
interface::{get_schema_by_name, Catalog, Error as CatalogError},
|
||||||
|
@ -23,7 +23,7 @@ pub enum SchemaError {
|
||||||
|
|
||||||
/// The user has hit their column/table limit.
|
/// The user has hit their column/table limit.
|
||||||
#[error("service limit reached: {0}")]
|
#[error("service limit reached: {0}")]
|
||||||
ServiceLimit(iox_catalog::interface::Error),
|
ServiceLimit(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||||
|
|
||||||
/// The request schema conflicts with the existing namespace schema.
|
/// The request schema conflicts with the existing namespace schema.
|
||||||
#[error("schema conflict: {0}")]
|
#[error("schema conflict: {0}")]
|
||||||
|
@ -67,6 +67,22 @@ pub enum SchemaError {
|
||||||
/// relatively rare - it results in additional requests being made to the
|
/// relatively rare - it results in additional requests being made to the
|
||||||
/// catalog until the cached schema converges to match the catalog schema.
|
/// catalog until the cached schema converges to match the catalog schema.
|
||||||
///
|
///
|
||||||
|
/// Note that the namespace-wide limit of the number of columns allowed per table
|
||||||
|
/// is also cached, which has two implications:
|
||||||
|
///
|
||||||
|
/// 1. If the namespace's column limit is updated in the catalog, the new limit
|
||||||
|
/// will not be enforced until the whole namespace is recached, likely only
|
||||||
|
/// on startup. In other words, updating the namespace's column limit requires
|
||||||
|
/// both a catalog update and service restart.
|
||||||
|
/// 2. There's a race condition that can result in a table ending up with more
|
||||||
|
/// columns than the namespace limit should allow. When multiple concurrent
|
||||||
|
/// writes come in to different service instances that each have their own
|
||||||
|
/// cache, and each of those writes add a disjoint set of new columns, the
|
||||||
|
/// requests will all succeed because when considered separately, they do
|
||||||
|
/// not exceed the number of columns in the cache. Once all the writes have
|
||||||
|
/// completed, the total set of columns in the table will be some multiple
|
||||||
|
/// of the limit.
|
||||||
|
///
|
||||||
/// # Correctness
|
/// # Correctness
|
||||||
///
|
///
|
||||||
/// The correct functioning of this schema validator relies on the catalog
|
/// The correct functioning of this schema validator relies on the catalog
|
||||||
|
@ -178,6 +194,12 @@ where
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
validate_column_limits(&batches, &schema).map_err(|e| {
|
||||||
|
warn!(%namespace, error=%e, "service protection limit reached");
|
||||||
|
self.service_limit_hit.inc(1);
|
||||||
|
SchemaError::ServiceLimit(Box::new(e))
|
||||||
|
})?;
|
||||||
|
|
||||||
let maybe_new_schema = validate_or_insert_schema(
|
let maybe_new_schema = validate_or_insert_schema(
|
||||||
batches.iter().map(|(k, v)| (k.as_str(), v)),
|
batches.iter().map(|(k, v)| (k.as_str(), v)),
|
||||||
&schema,
|
&schema,
|
||||||
|
@ -208,7 +230,7 @@ where
|
||||||
| CatalogError::TableCreateLimitError { .. } => {
|
| CatalogError::TableCreateLimitError { .. } => {
|
||||||
warn!(%namespace, error=%e, "service protection limit reached");
|
warn!(%namespace, error=%e, "service protection limit reached");
|
||||||
self.service_limit_hit.inc(1);
|
self.service_limit_hit.inc(1);
|
||||||
SchemaError::ServiceLimit(e.into_err())
|
SchemaError::ServiceLimit(Box::new(e.into_err()))
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
error!(%namespace, error=%e, "schema validation failed");
|
error!(%namespace, error=%e, "schema validation failed");
|
||||||
|
@ -253,17 +275,221 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
#[error(
|
||||||
|
"couldn't create columns in table `{table_name}`; table contains \
|
||||||
|
{existing_column_count} existing columns, applying this write would result \
|
||||||
|
in {merged_column_count} columns, limit is {max_columns_per_table}"
|
||||||
|
)]
|
||||||
|
struct OverColumnLimit {
|
||||||
|
table_name: String,
|
||||||
|
// Number of columns already in the table.
|
||||||
|
existing_column_count: usize,
|
||||||
|
// Number of resultant columns after merging the write with existing columns.
|
||||||
|
merged_column_count: usize,
|
||||||
|
// The configured limit.
|
||||||
|
max_columns_per_table: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_column_limits(
|
||||||
|
batches: &HashMap<String, MutableBatch>,
|
||||||
|
schema: &NamespaceSchema,
|
||||||
|
) -> Result<(), OverColumnLimit> {
|
||||||
|
for (table_name, batch) in batches {
|
||||||
|
let mut existing_columns = schema
|
||||||
|
.tables
|
||||||
|
.get(table_name)
|
||||||
|
.map(|t| t.column_names())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let existing_column_count = existing_columns.len();
|
||||||
|
|
||||||
|
let merged_column_count = {
|
||||||
|
existing_columns.append(&mut batch.column_names());
|
||||||
|
existing_columns.len()
|
||||||
|
};
|
||||||
|
|
||||||
|
// If the table is currently over the column limit but this write only includes existing
|
||||||
|
// columns and doesn't exceed the limit more, this is allowed.
|
||||||
|
let columns_were_added_in_this_batch = merged_column_count > existing_column_count;
|
||||||
|
let column_limit_exceeded = merged_column_count > schema.max_columns_per_table;
|
||||||
|
|
||||||
|
if columns_were_added_in_this_batch && column_limit_exceeded {
|
||||||
|
return Err(OverColumnLimit {
|
||||||
|
table_name: table_name.into(),
|
||||||
|
merged_column_count,
|
||||||
|
existing_column_count,
|
||||||
|
max_columns_per_table: schema.max_columns_per_table,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use assert_matches::assert_matches;
|
use assert_matches::assert_matches;
|
||||||
use data_types::{ColumnType, QueryPoolId, TimestampRange, TopicId};
|
use data_types::{ColumnType, TimestampRange};
|
||||||
use iox_catalog::mem::MemCatalog;
|
use iox_tests::util::{TestCatalog, TestNamespace};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
static NAMESPACE: Lazy<DatabaseName<'static>> = Lazy::new(|| "bananas".try_into().unwrap());
|
static NAMESPACE: Lazy<DatabaseName<'static>> = Lazy::new(|| "bananas".try_into().unwrap());
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn validate_limits() {
|
||||||
|
let (catalog, namespace) = test_setup().await;
|
||||||
|
|
||||||
|
namespace.update_column_limit(3).await;
|
||||||
|
|
||||||
|
// Table not found in schema,
|
||||||
|
{
|
||||||
|
let schema = namespace.schema().await;
|
||||||
|
// Columns under the limit is ok
|
||||||
|
let batches = lp_to_writes("nonexistent val=42i 123456");
|
||||||
|
assert!(validate_column_limits(&batches, &schema).is_ok());
|
||||||
|
// Columns over the limit is an error
|
||||||
|
let batches = lp_to_writes("nonexistent,tag1=A,tag2=B val=42i 123456");
|
||||||
|
assert_matches!(
|
||||||
|
validate_column_limits(&batches, &schema),
|
||||||
|
Err(OverColumnLimit {
|
||||||
|
table_name: _,
|
||||||
|
existing_column_count: 0,
|
||||||
|
merged_column_count: 4,
|
||||||
|
max_columns_per_table: 3,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table exists but no columns in schema,
|
||||||
|
{
|
||||||
|
namespace.create_table("no_columns_in_schema").await;
|
||||||
|
let schema = namespace.schema().await;
|
||||||
|
// Columns under the limit is ok
|
||||||
|
let batches = lp_to_writes("no_columns_in_schema val=42i 123456");
|
||||||
|
assert!(validate_column_limits(&batches, &schema).is_ok());
|
||||||
|
// Columns over the limit is an error
|
||||||
|
let batches = lp_to_writes("no_columns_in_schema,tag1=A,tag2=B val=42i 123456");
|
||||||
|
assert_matches!(
|
||||||
|
validate_column_limits(&batches, &schema),
|
||||||
|
Err(OverColumnLimit {
|
||||||
|
table_name: _,
|
||||||
|
existing_column_count: 0,
|
||||||
|
merged_column_count: 4,
|
||||||
|
max_columns_per_table: 3,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table exists with a column in the schema,
|
||||||
|
{
|
||||||
|
let table = namespace.create_table("i_got_columns").await;
|
||||||
|
table.create_column("i_got_music", ColumnType::I64).await;
|
||||||
|
let schema = namespace.schema().await;
|
||||||
|
// Columns already existing is ok
|
||||||
|
let batches = lp_to_writes("i_got_columns i_got_music=42i 123456");
|
||||||
|
assert!(validate_column_limits(&batches, &schema).is_ok());
|
||||||
|
// Adding columns under the limit is ok
|
||||||
|
let batches = lp_to_writes("i_got_columns,tag1=A i_got_music=42i 123456");
|
||||||
|
assert!(validate_column_limits(&batches, &schema).is_ok());
|
||||||
|
// Adding columns over the limit is an error
|
||||||
|
let batches = lp_to_writes("i_got_columns,tag1=A,tag2=B i_got_music=42i 123456");
|
||||||
|
assert_matches!(
|
||||||
|
validate_column_limits(&batches, &schema),
|
||||||
|
Err(OverColumnLimit {
|
||||||
|
table_name: _,
|
||||||
|
existing_column_count: 1,
|
||||||
|
merged_column_count: 4,
|
||||||
|
max_columns_per_table: 3,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table exists and is at the column limit,
|
||||||
|
{
|
||||||
|
let table = namespace.create_table("bananas").await;
|
||||||
|
table.create_column("greatness", ColumnType::I64).await;
|
||||||
|
table.create_column("tastiness", ColumnType::I64).await;
|
||||||
|
table
|
||||||
|
.create_column(schema::TIME_COLUMN_NAME, ColumnType::Time)
|
||||||
|
.await;
|
||||||
|
let schema = namespace.schema().await;
|
||||||
|
// Columns already existing is allowed
|
||||||
|
let batches = lp_to_writes("bananas greatness=42i 123456");
|
||||||
|
assert!(validate_column_limits(&batches, &schema).is_ok());
|
||||||
|
// Adding columns over the limit is an error
|
||||||
|
let batches = lp_to_writes("bananas i_got_music=42i 123456");
|
||||||
|
assert_matches!(
|
||||||
|
validate_column_limits(&batches, &schema),
|
||||||
|
Err(OverColumnLimit {
|
||||||
|
table_name: _,
|
||||||
|
existing_column_count: 3,
|
||||||
|
merged_column_count: 4,
|
||||||
|
max_columns_per_table: 3,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table exists and is over the column limit because of the race condition,
|
||||||
|
{
|
||||||
|
// Make two schema validator instances each with their own cache
|
||||||
|
let handler1 = SchemaValidator::new(
|
||||||
|
catalog.catalog(),
|
||||||
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
|
&catalog.metric_registry,
|
||||||
|
);
|
||||||
|
let handler2 = SchemaValidator::new(
|
||||||
|
catalog.catalog(),
|
||||||
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
|
&catalog.metric_registry,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Make a valid write with one column + timestamp through each validator so the
|
||||||
|
// namespace schema gets cached
|
||||||
|
let writes1_valid = lp_to_writes("dragonfruit val=42i 123456");
|
||||||
|
handler1
|
||||||
|
.write(&*NAMESPACE, writes1_valid, None)
|
||||||
|
.await
|
||||||
|
.expect("request should succeed");
|
||||||
|
let writes2_valid = lp_to_writes("dragonfruit val=43i 123457");
|
||||||
|
handler2
|
||||||
|
.write(&*NAMESPACE, writes2_valid, None)
|
||||||
|
.await
|
||||||
|
.expect("request should succeed");
|
||||||
|
|
||||||
|
// Make "valid" writes through each validator that each add a different column, thus
|
||||||
|
// putting the table over the limit
|
||||||
|
let writes1_add_column = lp_to_writes("dragonfruit,tag1=A val=42i 123456");
|
||||||
|
handler1
|
||||||
|
.write(&*NAMESPACE, writes1_add_column, None)
|
||||||
|
.await
|
||||||
|
.expect("request should succeed");
|
||||||
|
let writes2_add_column = lp_to_writes("dragonfruit,tag2=B val=43i 123457");
|
||||||
|
handler2
|
||||||
|
.write(&*NAMESPACE, writes2_add_column, None)
|
||||||
|
.await
|
||||||
|
.expect("request should succeed");
|
||||||
|
|
||||||
|
let schema = namespace.schema().await;
|
||||||
|
|
||||||
|
// Columns already existing is allowed
|
||||||
|
let batches = lp_to_writes("dragonfruit val=42i 123456");
|
||||||
|
assert!(validate_column_limits(&batches, &schema).is_ok());
|
||||||
|
// Adding more columns over the limit is an error
|
||||||
|
let batches = lp_to_writes("dragonfruit i_got_music=42i 123456");
|
||||||
|
assert_matches!(
|
||||||
|
validate_column_limits(&batches, &schema),
|
||||||
|
Err(OverColumnLimit {
|
||||||
|
table_name: _,
|
||||||
|
existing_column_count: 4,
|
||||||
|
merged_column_count: 5,
|
||||||
|
max_columns_per_table: 3,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Parse `lp` into a table-keyed MutableBatch map.
|
// Parse `lp` into a table-keyed MutableBatch map.
|
||||||
fn lp_to_writes(lp: &str) -> HashMap<String, MutableBatch> {
|
fn lp_to_writes(lp: &str) -> HashMap<String, MutableBatch> {
|
||||||
let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
|
let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
|
||||||
|
@ -273,23 +499,11 @@ mod tests {
|
||||||
|
|
||||||
/// Initialise an in-memory [`MemCatalog`] and create a single namespace
|
/// Initialise an in-memory [`MemCatalog`] and create a single namespace
|
||||||
/// named [`NAMESPACE`].
|
/// named [`NAMESPACE`].
|
||||||
async fn create_catalog() -> Arc<dyn Catalog> {
|
async fn test_setup() -> (Arc<TestCatalog>, Arc<TestNamespace>) {
|
||||||
let metrics = Arc::new(metric::Registry::new());
|
let catalog = TestCatalog::new();
|
||||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(metrics));
|
let namespace = catalog.create_namespace(&NAMESPACE).await;
|
||||||
|
|
||||||
let mut repos = catalog.repositories().await;
|
(catalog, namespace)
|
||||||
repos
|
|
||||||
.namespaces()
|
|
||||||
.create(
|
|
||||||
NAMESPACE.as_str(),
|
|
||||||
"inf",
|
|
||||||
TopicId::new(42),
|
|
||||||
QueryPoolId::new(24),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.expect("failed to create test namespace");
|
|
||||||
|
|
||||||
catalog
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn assert_cache<C>(handler: &SchemaValidator<C>, table: &str, col: &str, want: ColumnType)
|
fn assert_cache<C>(handler: &SchemaValidator<C>, table: &str, col: &str, want: ColumnType)
|
||||||
|
@ -314,10 +528,10 @@ mod tests {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_write_ok() {
|
async fn test_write_ok() {
|
||||||
let catalog = create_catalog().await;
|
let (catalog, _namespace) = test_setup().await;
|
||||||
let metrics = Arc::new(metric::Registry::default());
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
let handler = SchemaValidator::new(
|
let handler = SchemaValidator::new(
|
||||||
catalog,
|
catalog.catalog(),
|
||||||
Arc::new(MemoryNamespaceCache::default()),
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
&*metrics,
|
&*metrics,
|
||||||
);
|
);
|
||||||
|
@ -337,10 +551,10 @@ mod tests {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_write_schema_not_found() {
|
async fn test_write_schema_not_found() {
|
||||||
let catalog = create_catalog().await;
|
let (catalog, _namespace) = test_setup().await;
|
||||||
let metrics = Arc::new(metric::Registry::default());
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
let handler = SchemaValidator::new(
|
let handler = SchemaValidator::new(
|
||||||
catalog,
|
catalog.catalog(),
|
||||||
Arc::new(MemoryNamespaceCache::default()),
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
&*metrics,
|
&*metrics,
|
||||||
);
|
);
|
||||||
|
@ -361,10 +575,10 @@ mod tests {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_write_validation_failure() {
|
async fn test_write_validation_failure() {
|
||||||
let catalog = create_catalog().await;
|
let (catalog, _namespace) = test_setup().await;
|
||||||
let metrics = Arc::new(metric::Registry::default());
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
let handler = SchemaValidator::new(
|
let handler = SchemaValidator::new(
|
||||||
catalog,
|
catalog.catalog(),
|
||||||
Arc::new(MemoryNamespaceCache::default()),
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
&*metrics,
|
&*metrics,
|
||||||
);
|
);
|
||||||
|
@ -399,10 +613,10 @@ mod tests {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_write_table_service_limit() {
|
async fn test_write_table_service_limit() {
|
||||||
let catalog = create_catalog().await;
|
let (catalog, _namespace) = test_setup().await;
|
||||||
let metrics = Arc::new(metric::Registry::default());
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
let handler = SchemaValidator::new(
|
let handler = SchemaValidator::new(
|
||||||
Arc::clone(&catalog),
|
catalog.catalog(),
|
||||||
Arc::new(MemoryNamespaceCache::default()),
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
&*metrics,
|
&*metrics,
|
||||||
);
|
);
|
||||||
|
@ -417,6 +631,7 @@ mod tests {
|
||||||
|
|
||||||
// Configure the service limit to be hit next request
|
// Configure the service limit to be hit next request
|
||||||
catalog
|
catalog
|
||||||
|
.catalog()
|
||||||
.repositories()
|
.repositories()
|
||||||
.await
|
.await
|
||||||
.namespaces()
|
.namespaces()
|
||||||
|
@ -437,10 +652,10 @@ mod tests {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_write_column_service_limit() {
|
async fn test_write_column_service_limit() {
|
||||||
let catalog = create_catalog().await;
|
let (catalog, namespace) = test_setup().await;
|
||||||
let metrics = Arc::new(metric::Registry::default());
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
let handler = SchemaValidator::new(
|
let handler = SchemaValidator::new(
|
||||||
Arc::clone(&catalog),
|
catalog.catalog(),
|
||||||
Arc::new(MemoryNamespaceCache::default()),
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
&*metrics,
|
&*metrics,
|
||||||
);
|
);
|
||||||
|
@ -453,16 +668,46 @@ mod tests {
|
||||||
.expect("request should succeed");
|
.expect("request should succeed");
|
||||||
assert_eq!(writes.len(), got.len());
|
assert_eq!(writes.len(), got.len());
|
||||||
|
|
||||||
|
// Configure the service limit to be hit next request
|
||||||
|
namespace.update_column_limit(1).await;
|
||||||
|
let handler = SchemaValidator::new(
|
||||||
|
catalog.catalog(),
|
||||||
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
|
&*metrics,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Second write attempts to violate limits, causing an error
|
||||||
|
let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i,val2=42i 123456");
|
||||||
|
let err = handler
|
||||||
|
.write(&*NAMESPACE, writes, None)
|
||||||
|
.await
|
||||||
|
.expect_err("request should fail");
|
||||||
|
|
||||||
|
assert_matches!(err, SchemaError::ServiceLimit(_));
|
||||||
|
assert_eq!(1, handler.service_limit_hit.fetch());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_first_write_many_columns_service_limit() {
|
||||||
|
let (catalog, _namespace) = test_setup().await;
|
||||||
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
|
let handler = SchemaValidator::new(
|
||||||
|
catalog.catalog(),
|
||||||
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
|
&*metrics,
|
||||||
|
);
|
||||||
|
|
||||||
// Configure the service limit to be hit next request
|
// Configure the service limit to be hit next request
|
||||||
catalog
|
catalog
|
||||||
|
.catalog()
|
||||||
.repositories()
|
.repositories()
|
||||||
.await
|
.await
|
||||||
.namespaces()
|
.namespaces()
|
||||||
.update_column_limit(NAMESPACE.as_str(), 1)
|
.update_column_limit(NAMESPACE.as_str(), 3)
|
||||||
.await
|
.await
|
||||||
.expect("failed to set column limit");
|
.expect("failed to set column limit");
|
||||||
|
|
||||||
// Second write attempts to violate limits, causing an error
|
// First write attempts to add columns over the limit, causing an error
|
||||||
let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i,val2=42i 123456");
|
let writes = lp_to_writes("bananas,tag1=A,tag2=B val=42i,val2=42i 123456");
|
||||||
let err = handler
|
let err = handler
|
||||||
.write(&*NAMESPACE, writes, None)
|
.write(&*NAMESPACE, writes, None)
|
||||||
|
@ -478,10 +723,10 @@ mod tests {
|
||||||
const NAMESPACE: &str = "NAMESPACE_IS_NOT_VALIDATED";
|
const NAMESPACE: &str = "NAMESPACE_IS_NOT_VALIDATED";
|
||||||
const TABLE: &str = "bananas";
|
const TABLE: &str = "bananas";
|
||||||
|
|
||||||
let catalog = create_catalog().await;
|
let (catalog, _namespace) = test_setup().await;
|
||||||
let metrics = Arc::new(metric::Registry::default());
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
let handler = SchemaValidator::new(
|
let handler = SchemaValidator::new(
|
||||||
catalog,
|
catalog.catalog(),
|
||||||
Arc::new(MemoryNamespaceCache::default()),
|
Arc::new(MemoryNamespaceCache::default()),
|
||||||
&*metrics,
|
&*metrics,
|
||||||
);
|
);
|
||||||
|
|
|
@ -42,6 +42,7 @@ mod tests {
|
||||||
topic_id: TopicId::new(24),
|
topic_id: TopicId::new(24),
|
||||||
query_pool_id: QueryPoolId::new(1234),
|
query_pool_id: QueryPoolId::new(1234),
|
||||||
tables: Default::default(),
|
tables: Default::default(),
|
||||||
|
max_columns_per_table: 50,
|
||||||
};
|
};
|
||||||
assert!(cache.put_schema(ns.clone(), schema1.clone()).is_none());
|
assert!(cache.put_schema(ns.clone(), schema1.clone()).is_none());
|
||||||
assert_eq!(*cache.get_schema(&ns).expect("lookup failure"), schema1);
|
assert_eq!(*cache.get_schema(&ns).expect("lookup failure"), schema1);
|
||||||
|
@ -51,6 +52,7 @@ mod tests {
|
||||||
topic_id: TopicId::new(2),
|
topic_id: TopicId::new(2),
|
||||||
query_pool_id: QueryPoolId::new(2),
|
query_pool_id: QueryPoolId::new(2),
|
||||||
tables: Default::default(),
|
tables: Default::default(),
|
||||||
|
max_columns_per_table: 10,
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
|
@ -194,6 +194,7 @@ mod tests {
|
||||||
topic_id: TopicId::new(24),
|
topic_id: TopicId::new(24),
|
||||||
query_pool_id: QueryPoolId::new(1234),
|
query_pool_id: QueryPoolId::new(1234),
|
||||||
tables,
|
tables,
|
||||||
|
max_columns_per_table: 100,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,7 @@ mod tests {
|
||||||
topic_id: TopicId::new(1),
|
topic_id: TopicId::new(1),
|
||||||
query_pool_id: QueryPoolId::new(1),
|
query_pool_id: QueryPoolId::new(1),
|
||||||
tables: Default::default(),
|
tables: Default::default(),
|
||||||
|
max_columns_per_table: 7,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -330,16 +330,13 @@ async fn test_schema_limit() {
|
||||||
&err,
|
&err,
|
||||||
router::server::http::Error::DmlHandler(
|
router::server::http::Error::DmlHandler(
|
||||||
DmlError::Schema(
|
DmlError::Schema(
|
||||||
SchemaError::ServiceLimit(
|
SchemaError::ServiceLimit(e)
|
||||||
iox_catalog::interface::Error::TableCreateLimitError {
|
|
||||||
table_name,
|
|
||||||
namespace_id,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
) => {
|
) => {
|
||||||
assert_eq!(table_name, "platanos2");
|
assert_eq!(
|
||||||
assert_eq!(namespace_id.to_string(), "1");
|
e.to_string(),
|
||||||
|
"couldn't create table platanos2; limit reached on namespace 1"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
assert_eq!(err.as_status_code(), StatusCode::TOO_MANY_REQUESTS);
|
assert_eq!(err.as_status_code(), StatusCode::TOO_MANY_REQUESTS);
|
||||||
|
|
Loading…
Reference in New Issue