3097 lines
104 KiB
Rust
3097 lines
104 KiB
Rust
//! This module contains the traits and data objects for the Catalog API.
|
|
|
|
use async_trait::async_trait;
|
|
use data_types::{
|
|
Column, ColumnSchema, ColumnType, KafkaPartition, KafkaTopic, KafkaTopicId, Namespace,
|
|
NamespaceId, NamespaceSchema, ParquetFile, ParquetFileId, ParquetFileParams,
|
|
ParquetFileWithMetadata, Partition, PartitionId, PartitionInfo, PartitionKey,
|
|
ProcessedTombstone, QueryPool, QueryPoolId, SequenceNumber, Sequencer, SequencerId, Table,
|
|
TableId, TablePartition, TableSchema, Timestamp, Tombstone, TombstoneId,
|
|
};
|
|
use iox_time::TimeProvider;
|
|
use snafu::{OptionExt, Snafu};
|
|
use std::{
|
|
collections::{BTreeMap, HashMap, HashSet},
|
|
convert::TryFrom,
|
|
fmt::Debug,
|
|
sync::Arc,
|
|
};
|
|
use uuid::Uuid;
|
|
|
|
#[derive(Debug, Snafu)]
|
|
#[allow(missing_copy_implementations, missing_docs)]
|
|
pub enum Error {
|
|
#[snafu(display("name {} already exists", name))]
|
|
NameExists { name: String },
|
|
|
|
#[snafu(display("unhandled sqlx error: {}", source))]
|
|
SqlxError { source: sqlx::Error },
|
|
|
|
#[snafu(display("foreign key violation: {}", source))]
|
|
ForeignKeyViolation { source: sqlx::Error },
|
|
|
|
#[snafu(display("column {} is type {} but write has type {}", name, existing, new))]
|
|
ColumnTypeMismatch {
|
|
name: String,
|
|
existing: String,
|
|
new: String,
|
|
},
|
|
|
|
#[snafu(display(
|
|
"column type {} is in the db for column {}, which is unknown",
|
|
data_type,
|
|
name
|
|
))]
|
|
UnknownColumnType { data_type: i16, name: String },
|
|
|
|
#[snafu(display("namespace {} not found", name))]
|
|
NamespaceNotFound { name: String },
|
|
|
|
#[snafu(display("table {} not found", id))]
|
|
TableNotFound { id: TableId },
|
|
|
|
#[snafu(display("partition {} not found", id))]
|
|
PartitionNotFound { id: PartitionId },
|
|
|
|
#[snafu(display(
|
|
"couldn't create column {} in table {}; limit reached on namespace",
|
|
column_name,
|
|
table_id,
|
|
))]
|
|
ColumnCreateLimitError {
|
|
column_name: String,
|
|
table_id: TableId,
|
|
},
|
|
|
|
#[snafu(display(
|
|
"couldn't create table {}; limit reached on namespace {}",
|
|
table_name,
|
|
namespace_id
|
|
))]
|
|
TableCreateLimitError {
|
|
table_name: String,
|
|
namespace_id: NamespaceId,
|
|
},
|
|
|
|
#[snafu(display("parquet file with object_store_id {} already exists", object_store_id))]
|
|
FileExists { object_store_id: Uuid },
|
|
|
|
#[snafu(display("parquet file with id {} does not exist. Foreign key violation", id))]
|
|
FileNotFound { id: i64 },
|
|
|
|
#[snafu(display("tombstone with id {} does not exist. Foreign key violation", id))]
|
|
TombstoneNotFound { id: i64 },
|
|
|
|
#[snafu(display("parquet_file record {} not found", id))]
|
|
ParquetRecordNotFound { id: ParquetFileId },
|
|
|
|
#[snafu(display("cannot derive valid column schema from column {}: {}", name, source))]
|
|
InvalidColumn {
|
|
source: Box<dyn std::error::Error + Send + Sync>,
|
|
name: String,
|
|
},
|
|
|
|
#[snafu(display("cannot start a transaction: {}", source))]
|
|
StartTransaction { source: sqlx::Error },
|
|
|
|
#[snafu(display("no transaction provided"))]
|
|
NoTransaction,
|
|
|
|
#[snafu(display(
|
|
"the tombstone {} already processed for parquet file {}",
|
|
tombstone_id,
|
|
parquet_file_id
|
|
))]
|
|
ProcessTombstoneExists {
|
|
tombstone_id: i64,
|
|
parquet_file_id: i64,
|
|
},
|
|
|
|
#[snafu(display("error while converting usize {} to i64", value))]
|
|
InvalidValue { value: usize },
|
|
|
|
#[snafu(display("database setup error: {}", source))]
|
|
Setup { source: sqlx::Error },
|
|
}
|
|
|
|
/// A specialized `Error` for Catalog errors
|
|
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|
|
|
/// Trait that contains methods for working with the catalog
|
|
#[async_trait]
|
|
pub trait Catalog: Send + Sync + Debug {
|
|
/// Setup catalog for usage and apply possible migrations.
|
|
async fn setup(&self) -> Result<(), Error>;
|
|
|
|
/// Create a new transaction.
|
|
///
|
|
/// Creating transactions is potentially expensive. Holding one consumes resources. The number of parallel active
|
|
/// transactions might be limited per catalog, so you MUST NOT rely on the ability to create multiple transactions in
|
|
/// parallel for correctness but only for scaling.
|
|
async fn start_transaction(&self) -> Result<Box<dyn Transaction>, Error>;
|
|
|
|
/// Access the repositories w/o a transaction scope.
|
|
async fn repositories(&self) -> Box<dyn RepoCollection>;
|
|
|
|
/// Get metric registry associated w/ this catalog.
|
|
fn metrics(&self) -> Arc<metric::Registry>;
|
|
|
|
/// Get the time provider associated w/ this catalog.
|
|
fn time_provider(&self) -> Arc<dyn TimeProvider>;
|
|
}
|
|
|
|
/// Secret module for [sealed traits].
|
|
///
|
|
/// [sealed traits]: https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed
|
|
pub(crate) mod sealed {
|
|
use super::*;
|
|
|
|
/// Helper trait to implement commit and abort of an transaction.
|
|
///
|
|
/// The problem is that both methods cannot take `self` directly, otherwise the [`Transaction`] would not be object
|
|
/// safe. Therefore we can only take a reference. To avoid that a user uses a transaction after calling one of the
|
|
/// finalizers, we use a tiny trick and take `Box<dyn Transaction>` in our public interface and use a sealed trait
|
|
/// for the actual implementation.
|
|
#[async_trait]
|
|
pub trait TransactionFinalize: Send + Sync + Debug {
|
|
async fn commit_inplace(&mut self) -> Result<(), Error>;
|
|
async fn abort_inplace(&mut self) -> Result<(), Error>;
|
|
}
|
|
}
|
|
|
|
/// transaction of a [`Catalog`].
|
|
///
|
|
/// A transaction provides a consistent view on data and stages writes (this normally maps to a database transaction).
|
|
/// Repositories can cheaply be borrowed from it. To finalize a transaction, call [commit](Self::commit) or [abort](Self::abort).
|
|
///
|
|
/// Note that after any method in this transaction (including all repositories derived from it) returned an error, the
|
|
/// transaction MIGHT be poisoned and will return errors for all operations, depending on the backend.
|
|
///
|
|
///
|
|
/// # Drop
|
|
/// Dropping a transaction without calling [`commit`](Self::commit) or [`abort`](Self::abort) will abort the
|
|
/// transaction. However resources might not be released immediately, so it is adviced to always call
|
|
/// [`abort`](Self::abort) when you want to enforce that. Dropping w/o commiting/aborting will also log a warning.
|
|
#[async_trait]
|
|
pub trait Transaction: Send + Sync + Debug + sealed::TransactionFinalize + RepoCollection {
|
|
/// Commit transaction.
|
|
///
|
|
/// # Error Handling
|
|
/// If successfull, all changes will be visible to other transactions.
|
|
///
|
|
/// If an error is returned, the transaction may or or not be committed. This might be due to IO errors after the
|
|
/// transaction was finished. However in either case, the transaction is atomic and can only succeed or fail
|
|
/// entirely.
|
|
async fn commit(mut self: Box<Self>) -> Result<(), Error> {
|
|
self.commit_inplace().await
|
|
}
|
|
|
|
/// Abort transaction, throwing away all changes.
|
|
async fn abort(mut self: Box<Self>) -> Result<(), Error> {
|
|
self.abort_inplace().await
|
|
}
|
|
}
|
|
|
|
impl<T> Transaction for T where T: Send + Sync + Debug + sealed::TransactionFinalize + RepoCollection
|
|
{}
|
|
|
|
/// Collection of the different repositories that the catalog offers.
|
|
///
|
|
/// The methods (e.g. "get or create") for handling entities (e.g. namespaces, tombstones, ...) are grouped into
|
|
/// *repositories* with one *repository* per entity. A repository can be thought of a collection of a single entity.
|
|
/// Getting repositories from the transaction is cheap.
|
|
///
|
|
/// Note that a repository might internally map to a wide range of different storage abstractions, ranging from one or
|
|
/// more SQL tables over key-value key spaces to simple in-memory vectors. The user should and must not care how these
|
|
/// are implemented.
|
|
#[async_trait]
|
|
pub trait RepoCollection: Send + Sync + Debug {
|
|
/// repo for kafka topics
|
|
fn kafka_topics(&mut self) -> &mut dyn KafkaTopicRepo;
|
|
|
|
/// repo fo rquery pools
|
|
fn query_pools(&mut self) -> &mut dyn QueryPoolRepo;
|
|
|
|
/// repo for namespaces
|
|
fn namespaces(&mut self) -> &mut dyn NamespaceRepo;
|
|
|
|
/// repo for tables
|
|
fn tables(&mut self) -> &mut dyn TableRepo;
|
|
|
|
/// repo for columns
|
|
fn columns(&mut self) -> &mut dyn ColumnRepo;
|
|
|
|
/// repo for sequencers
|
|
fn sequencers(&mut self) -> &mut dyn SequencerRepo;
|
|
|
|
/// repo for partitions
|
|
fn partitions(&mut self) -> &mut dyn PartitionRepo;
|
|
|
|
/// repo for tombstones
|
|
fn tombstones(&mut self) -> &mut dyn TombstoneRepo;
|
|
|
|
/// repo for parquet_files
|
|
fn parquet_files(&mut self) -> &mut dyn ParquetFileRepo;
|
|
|
|
/// repo for processed_tombstones
|
|
fn processed_tombstones(&mut self) -> &mut dyn ProcessedTombstoneRepo;
|
|
}
|
|
|
|
/// Functions for working with Kafka topics in the catalog.
|
|
#[async_trait]
|
|
pub trait KafkaTopicRepo: Send + Sync {
|
|
/// Creates the kafka topic in the catalog or gets the existing record by name.
|
|
async fn create_or_get(&mut self, name: &str) -> Result<KafkaTopic>;
|
|
|
|
/// Gets the kafka topic by its unique name
|
|
async fn get_by_name(&mut self, name: &str) -> Result<Option<KafkaTopic>>;
|
|
}
|
|
|
|
/// Functions for working with query pools in the catalog.
|
|
#[async_trait]
|
|
pub trait QueryPoolRepo: Send + Sync {
|
|
/// Creates the query pool in the catalog or gets the existing record by name.
|
|
async fn create_or_get(&mut self, name: &str) -> Result<QueryPool>;
|
|
}
|
|
|
|
/// Functions for working with namespaces in the catalog
|
|
#[async_trait]
|
|
pub trait NamespaceRepo: Send + Sync {
|
|
/// Creates the namespace in the catalog. If one by the same name already exists, an
|
|
/// error is returned.
|
|
async fn create(
|
|
&mut self,
|
|
name: &str,
|
|
retention_duration: &str,
|
|
kafka_topic_id: KafkaTopicId,
|
|
query_pool_id: QueryPoolId,
|
|
) -> Result<Namespace>;
|
|
|
|
/// List all namespaces.
|
|
async fn list(&mut self) -> Result<Vec<Namespace>>;
|
|
|
|
/// Gets the namespace by its ID.
|
|
async fn get_by_id(&mut self, id: NamespaceId) -> Result<Option<Namespace>>;
|
|
|
|
/// Gets the namespace by its unique name.
|
|
async fn get_by_name(&mut self, name: &str) -> Result<Option<Namespace>>;
|
|
|
|
/// Update the limit on the number of tables that can exist per namespace.
|
|
async fn update_table_limit(&mut self, name: &str, new_max: i32) -> Result<Namespace>;
|
|
|
|
/// Update the limit on the number of columns that can exist per table in a given namespace.
|
|
async fn update_column_limit(&mut self, name: &str, new_max: i32) -> Result<Namespace>;
|
|
}
|
|
|
|
/// Functions for working with tables in the catalog
|
|
#[async_trait]
|
|
pub trait TableRepo: Send + Sync {
|
|
/// Creates the table in the catalog or get the existing record by name.
|
|
async fn create_or_get(&mut self, name: &str, namespace_id: NamespaceId) -> Result<Table>;
|
|
|
|
/// get table by ID
|
|
async fn get_by_id(&mut self, table_id: TableId) -> Result<Option<Table>>;
|
|
|
|
/// get table by namespace ID and name
|
|
async fn get_by_namespace_and_name(
|
|
&mut self,
|
|
namespace_id: NamespaceId,
|
|
name: &str,
|
|
) -> Result<Option<Table>>;
|
|
|
|
/// Lists all tables in the catalog for the given namespace id.
|
|
async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Table>>;
|
|
|
|
/// List all tables.
|
|
async fn list(&mut self) -> Result<Vec<Table>>;
|
|
|
|
/// Gets the table persistence info for the given sequencer
|
|
async fn get_table_persist_info(
|
|
&mut self,
|
|
sequencer_id: SequencerId,
|
|
namespace_id: NamespaceId,
|
|
table_name: &str,
|
|
) -> Result<Option<TablePersistInfo>>;
|
|
}
|
|
|
|
/// Information for a table's persistence information for a specific sequencer from the catalog
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, sqlx::FromRow)]
|
|
pub struct TablePersistInfo {
|
|
/// sequencer the sequence numbers are associated with
|
|
pub sequencer_id: SequencerId,
|
|
/// the global identifier for the table
|
|
pub table_id: TableId,
|
|
/// max sequence number from this table's tombstones for this sequencer
|
|
pub tombstone_max_sequence_number: Option<SequenceNumber>,
|
|
}
|
|
|
|
/// Parameters necessary to perform a batch insert of
|
|
/// [`ColumnRepo::create_or_get()`].
|
|
#[derive(Debug)]
|
|
pub struct ColumnUpsertRequest<'a> {
|
|
/// The name of the column.
|
|
pub name: &'a str,
|
|
/// The table ID to which it belongs.
|
|
pub table_id: TableId,
|
|
/// The data type of the column.
|
|
pub column_type: ColumnType,
|
|
}
|
|
|
|
/// Functions for working with columns in the catalog
|
|
#[async_trait]
|
|
pub trait ColumnRepo: Send + Sync {
|
|
/// Creates the column in the catalog or returns the existing column. Will return a
|
|
/// `Error::ColumnTypeMismatch` if the existing column type doesn't match the type
|
|
/// the caller is attempting to create.
|
|
async fn create_or_get(
|
|
&mut self,
|
|
name: &str,
|
|
table_id: TableId,
|
|
column_type: ColumnType,
|
|
) -> Result<Column>;
|
|
|
|
/// Perform a bulk upsert of columns.
|
|
///
|
|
/// Implementations make no guarantees as to the ordering or atomicity of
|
|
/// the batch of column upsert operations - a batch upsert may partially
|
|
/// commit, in which case an error MUST be returned by the implementation.
|
|
async fn create_or_get_many(
|
|
&mut self,
|
|
columns: &[ColumnUpsertRequest<'_>],
|
|
) -> Result<Vec<Column>>;
|
|
|
|
/// Lists all columns in the passed in namespace id.
|
|
async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Column>>;
|
|
|
|
/// List all columns.
|
|
async fn list(&mut self) -> Result<Vec<Column>>;
|
|
}
|
|
|
|
/// Functions for working with sequencers in the catalog
|
|
#[async_trait]
|
|
pub trait SequencerRepo: Send + Sync {
|
|
/// create a sequencer record for the kafka topic and partition or return the existing record
|
|
async fn create_or_get(
|
|
&mut self,
|
|
topic: &KafkaTopic,
|
|
partition: KafkaPartition,
|
|
) -> Result<Sequencer>;
|
|
|
|
/// get the sequencer record by `KafkaTopicId` and `KafkaPartition`
|
|
async fn get_by_topic_id_and_partition(
|
|
&mut self,
|
|
topic_id: KafkaTopicId,
|
|
partition: KafkaPartition,
|
|
) -> Result<Option<Sequencer>>;
|
|
|
|
/// list all sequencers
|
|
async fn list(&mut self) -> Result<Vec<Sequencer>>;
|
|
|
|
/// list all sequencers for a given kafka topic
|
|
async fn list_by_kafka_topic(&mut self, topic: &KafkaTopic) -> Result<Vec<Sequencer>>;
|
|
|
|
/// updates the `min_unpersisted_sequence_number` for a sequencer
|
|
async fn update_min_unpersisted_sequence_number(
|
|
&mut self,
|
|
sequencer_id: SequencerId,
|
|
sequence_number: SequenceNumber,
|
|
) -> Result<()>;
|
|
}
|
|
|
|
/// Functions for working with IOx partitions in the catalog. Note that these are how IOx splits up
|
|
/// data within a database, which is differenet than Kafka partitions.
|
|
#[async_trait]
|
|
pub trait PartitionRepo: Send + Sync {
|
|
/// create or get a partition record for the given partition key, sequencer and table
|
|
async fn create_or_get(
|
|
&mut self,
|
|
key: &PartitionKey,
|
|
sequencer_id: SequencerId,
|
|
table_id: TableId,
|
|
) -> Result<Partition>;
|
|
|
|
/// get partition by ID
|
|
async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
|
|
|
|
/// return partitions for a given sequencer
|
|
async fn list_by_sequencer(&mut self, sequencer_id: SequencerId) -> Result<Vec<Partition>>;
|
|
|
|
/// return partitions for a given namespace
|
|
async fn list_by_namespace(&mut self, namespace_id: NamespaceId) -> Result<Vec<Partition>>;
|
|
|
|
/// return the partitions by table id
|
|
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
|
|
|
|
/// return the partition record, the namespace name it belongs to, and the table name it is
|
|
/// under
|
|
async fn partition_info_by_id(
|
|
&mut self,
|
|
partition_id: PartitionId,
|
|
) -> Result<Option<PartitionInfo>>;
|
|
|
|
/// Update the sort key for the partition
|
|
async fn update_sort_key(
|
|
&mut self,
|
|
partition_id: PartitionId,
|
|
sort_key: &[&str],
|
|
) -> Result<Partition>;
|
|
}
|
|
|
|
/// Functions for working with tombstones in the catalog
|
|
#[async_trait]
|
|
pub trait TombstoneRepo: Send + Sync {
|
|
/// create or get a tombstone
|
|
async fn create_or_get(
|
|
&mut self,
|
|
table_id: TableId,
|
|
sequencer_id: SequencerId,
|
|
sequence_number: SequenceNumber,
|
|
min_time: Timestamp,
|
|
max_time: Timestamp,
|
|
predicate: &str,
|
|
) -> Result<Tombstone>;
|
|
|
|
/// list all tombstones for a given namespace
|
|
async fn list_by_namespace(&mut self, namespace_id: NamespaceId) -> Result<Vec<Tombstone>>;
|
|
|
|
/// list all tombstones for a given table
|
|
async fn list_by_table(&mut self, table_id: TableId) -> Result<Vec<Tombstone>>;
|
|
|
|
/// get tombstones of the given id
|
|
async fn get_by_id(&mut self, tombstone_id: TombstoneId) -> Result<Option<Tombstone>>;
|
|
|
|
/// return all tombstones for the sequencer with a sequence number greater than that
|
|
/// passed in. This will be used by the ingester on startup to see what tombstones
|
|
/// might have to be applied to data that is read from the write buffer.
|
|
async fn list_tombstones_by_sequencer_greater_than(
|
|
&mut self,
|
|
sequencer_id: SequencerId,
|
|
sequence_number: SequenceNumber,
|
|
) -> Result<Vec<Tombstone>>;
|
|
|
|
/// Remove given tombstones
|
|
async fn remove(&mut self, tombstone_ids: &[TombstoneId]) -> Result<()>;
|
|
|
|
/// Return all tombstones that have:
|
|
///
|
|
/// - the specified sequencer ID and table ID
|
|
/// - a sequence number greater than the specified sequence number
|
|
/// - a time period that overlaps with the specified time period
|
|
///
|
|
/// Used during compaction.
|
|
async fn list_tombstones_for_time_range(
|
|
&mut self,
|
|
sequencer_id: SequencerId,
|
|
table_id: TableId,
|
|
sequence_number: SequenceNumber,
|
|
min_time: Timestamp,
|
|
max_time: Timestamp,
|
|
) -> Result<Vec<Tombstone>>;
|
|
}
|
|
|
|
/// The starting compaction level for parquet files is zero.
|
|
pub const INITIAL_COMPACTION_LEVEL: i16 = 0;
|
|
|
|
/// Functions for working with parquet file pointers in the catalog
|
|
#[async_trait]
|
|
pub trait ParquetFileRepo: Send + Sync {
|
|
/// create the parquet file
|
|
async fn create(&mut self, parquet_file_params: ParquetFileParams) -> Result<ParquetFile>;
|
|
|
|
/// Flag the parquet file for deletion
|
|
async fn flag_for_delete(&mut self, id: ParquetFileId) -> Result<()>;
|
|
|
|
/// Get all parquet files for a sequencer with a max_sequence_number greater than the
|
|
/// one passed in. The ingester will use this on startup to see which files were persisted
|
|
/// that are greater than its min_unpersisted_number so that it can discard any data in
|
|
/// these partitions on replay.
|
|
async fn list_by_sequencer_greater_than(
|
|
&mut self,
|
|
sequencer_id: SequencerId,
|
|
sequence_number: SequenceNumber,
|
|
) -> Result<Vec<ParquetFile>>;
|
|
|
|
/// List all parquet files within a given namespace that are NOT marked as
|
|
/// [`to_delete`](ParquetFile::to_delete).
|
|
async fn list_by_namespace_not_to_delete(
|
|
&mut self,
|
|
namespace_id: NamespaceId,
|
|
) -> Result<Vec<ParquetFile>>;
|
|
|
|
/// List all parquet files within a given table that are NOT marked as
|
|
/// [`to_delete`](ParquetFile::to_delete).
|
|
async fn list_by_table_not_to_delete(&mut self, table_id: TableId) -> Result<Vec<ParquetFile>>;
|
|
|
|
/// List all parquet files and their metadata within a given table that are NOT marked as
|
|
/// [`to_delete`](ParquetFile::to_delete). Fetching metadata can be expensive.
|
|
async fn list_by_table_not_to_delete_with_metadata(
|
|
&mut self,
|
|
table_id: TableId,
|
|
) -> Result<Vec<ParquetFileWithMetadata>>;
|
|
|
|
/// Delete all parquet files that were marked to be deleted earlier than the specified time.
|
|
/// Returns the deleted records.
|
|
async fn delete_old(&mut self, older_than: Timestamp) -> Result<Vec<ParquetFile>>;
|
|
|
|
/// List parquet files for a given sequencer with compaction level 0 and other criteria that
|
|
/// define a file as a candidate for compaction
|
|
async fn level_0(&mut self, sequencer_id: SequencerId) -> Result<Vec<ParquetFile>>;
|
|
|
|
/// List parquet files for a given table partition, in a given time range, with compaction
|
|
/// level 1, and other criteria that define a file as a candidate for compaction with a level 0
|
|
/// file
|
|
async fn level_1(
|
|
&mut self,
|
|
table_partition: TablePartition,
|
|
min_time: Timestamp,
|
|
max_time: Timestamp,
|
|
) -> Result<Vec<ParquetFile>>;
|
|
|
|
/// List parquet files for a given partition that are NOT marked as
|
|
/// [`to_delete`](ParquetFile::to_delete).
|
|
async fn list_by_partition_not_to_delete(
|
|
&mut self,
|
|
partition_id: PartitionId,
|
|
) -> Result<Vec<ParquetFile>>;
|
|
|
|
/// List parquet files and their metadata for a given partition that are NOT marked as
|
|
/// [`to_delete`](ParquetFile::to_delete). Fetching metadata can be expensive.
|
|
async fn list_by_partition_not_to_delete_with_metadata(
|
|
&mut self,
|
|
partition_id: PartitionId,
|
|
) -> Result<Vec<ParquetFileWithMetadata>>;
|
|
|
|
/// Update the compaction level of the specified parquet files to level 1. Returns the IDs
|
|
/// of the files that were successfully updated.
|
|
async fn update_to_level_1(
|
|
&mut self,
|
|
parquet_file_ids: &[ParquetFileId],
|
|
) -> Result<Vec<ParquetFileId>>;
|
|
|
|
/// Verify if the parquet file exists by selecting its id
|
|
async fn exist(&mut self, id: ParquetFileId) -> Result<bool>;
|
|
|
|
/// Fetch the parquet_metadata bytes for the given id. Potentially expensive.
|
|
async fn parquet_metadata(&mut self, id: ParquetFileId) -> Result<Vec<u8>>;
|
|
|
|
/// Return count
|
|
async fn count(&mut self) -> Result<i64>;
|
|
|
|
/// Return count of files of given tableId and sequenceId that
|
|
/// overlap with the given min_time and max_time and have sequencer number
|
|
/// smaller the given one
|
|
async fn count_by_overlaps(
|
|
&mut self,
|
|
table_id: TableId,
|
|
sequencer_id: SequencerId,
|
|
min_time: Timestamp,
|
|
max_time: Timestamp,
|
|
sequence_number: SequenceNumber,
|
|
) -> Result<i64>;
|
|
|
|
/// Return the parquet file with the given object store id
|
|
async fn get_by_object_store_id(
|
|
&mut self,
|
|
object_store_id: Uuid,
|
|
) -> Result<Option<ParquetFile>>;
|
|
}
|
|
|
|
/// Functions for working with processed tombstone pointers in the catalog
|
|
#[async_trait]
|
|
pub trait ProcessedTombstoneRepo: Send + Sync {
|
|
/// create a processed tombstone
|
|
async fn create(
|
|
&mut self,
|
|
parquet_file_id: ParquetFileId,
|
|
tombstone_id: TombstoneId,
|
|
) -> Result<ProcessedTombstone>;
|
|
|
|
/// Verify if a processed tombstone exists in the catalog
|
|
async fn exist(
|
|
&mut self,
|
|
parquet_file_id: ParquetFileId,
|
|
tombstone_id: TombstoneId,
|
|
) -> Result<bool>;
|
|
|
|
/// Return count
|
|
async fn count(&mut self) -> Result<i64>;
|
|
|
|
/// Return count for a given tombstone id
|
|
async fn count_by_tombstone_id(&mut self, tombstone_id: TombstoneId) -> Result<i64>;
|
|
}
|
|
|
|
/// Gets the namespace schema including all tables and columns.
|
|
pub async fn get_schema_by_name<R>(name: &str, repos: &mut R) -> Result<NamespaceSchema>
|
|
where
|
|
R: RepoCollection + ?Sized,
|
|
{
|
|
let namespace = repos
|
|
.namespaces()
|
|
.get_by_name(name)
|
|
.await?
|
|
.context(NamespaceNotFoundSnafu { name })?;
|
|
|
|
// get the columns first just in case someone else is creating schema while we're doing this.
|
|
let columns = repos.columns().list_by_namespace_id(namespace.id).await?;
|
|
let tables = repos.tables().list_by_namespace_id(namespace.id).await?;
|
|
|
|
let mut namespace = NamespaceSchema::new(
|
|
namespace.id,
|
|
namespace.kafka_topic_id,
|
|
namespace.query_pool_id,
|
|
);
|
|
|
|
let mut table_id_to_schema = BTreeMap::new();
|
|
for t in tables {
|
|
table_id_to_schema.insert(t.id, (t.name, TableSchema::new(t.id)));
|
|
}
|
|
|
|
for c in columns {
|
|
let (_, t) = table_id_to_schema.get_mut(&c.table_id).unwrap();
|
|
match ColumnType::try_from(c.column_type) {
|
|
Ok(column_type) => {
|
|
t.columns.insert(
|
|
c.name,
|
|
ColumnSchema {
|
|
id: c.id,
|
|
column_type,
|
|
},
|
|
);
|
|
}
|
|
_ => {
|
|
return Err(Error::UnknownColumnType {
|
|
data_type: c.column_type,
|
|
name: c.name.to_string(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
for (_, (table_name, schema)) in table_id_to_schema {
|
|
namespace.tables.insert(table_name, schema);
|
|
}
|
|
|
|
Ok(namespace)
|
|
}
|
|
|
|
/// Fetch all [`NamespaceSchema`] in the catalog.
|
|
///
|
|
/// This method performs the minimal number of queries needed to build the
|
|
/// result set. No table lock is obtained, nor are queries executed within a
|
|
/// transaction, but this method does return a point-in-time snapshot of the
|
|
/// catalog state.
|
|
pub async fn list_schemas(
|
|
catalog: &dyn Catalog,
|
|
) -> Result<impl Iterator<Item = (Namespace, NamespaceSchema)>> {
|
|
let mut repos = catalog.repositories().await;
|
|
|
|
// In order to obtain a point-in-time snapshot, first fetch the columns,
|
|
// then the tables, and then resolve the namespace IDs to Namespace in order
|
|
// to construct the schemas.
|
|
//
|
|
// The set of columns returned forms the state snapshot, with the subsequent
|
|
// queries resolving only what is needed to construct schemas for the
|
|
// retrieved columns (ignoring any newly added tables/namespaces since the
|
|
// column snapshot was taken).
|
|
|
|
// First fetch all the columns - this is the state snapshot of the catalog
|
|
// schemas.
|
|
let columns = repos.columns().list().await?;
|
|
|
|
// Construct the set of table IDs these columns belong to.
|
|
let retain_table_ids = columns.iter().map(|c| c.table_id).collect::<HashSet<_>>();
|
|
|
|
// Fetch all tables, and filter for those that are needed to construct
|
|
// schemas for "columns" only.
|
|
//
|
|
// Discard any tables that have no columns or have been created since
|
|
// the "columns" snapshot was retrieved, and construct a map of ID->Table.
|
|
let tables = repos
|
|
.tables()
|
|
.list()
|
|
.await?
|
|
.into_iter()
|
|
.filter_map(|t| {
|
|
if !retain_table_ids.contains(&t.id) {
|
|
return None;
|
|
}
|
|
|
|
Some((t.id, t))
|
|
})
|
|
.collect::<HashMap<_, _>>();
|
|
|
|
// Drop the table ID set as it will not be referenced again.
|
|
drop(retain_table_ids);
|
|
|
|
// Do all the I/O to fetch the namespaces in the background, while this
|
|
// thread constructs the NamespaceId->TableSchema map below.
|
|
let namespaces = tokio::spawn(async move { repos.namespaces().list().await });
|
|
|
|
// A set of tables within a single namespace.
|
|
type NamespaceTables = BTreeMap<String, TableSchema>;
|
|
|
|
let mut joined = HashMap::<NamespaceId, NamespaceTables>::default();
|
|
for column in columns {
|
|
// Resolve the table this column references
|
|
let table = tables.get(&column.table_id).expect("no table for column");
|
|
|
|
let table_schema = joined
|
|
// Find or create a record in the joined <NamespaceId, Tables> map
|
|
// for this namespace ID.
|
|
.entry(table.namespace_id)
|
|
.or_default()
|
|
// Fetch the schema record for this table, or create an empty one.
|
|
.entry(table.name.clone())
|
|
.or_insert_with(|| TableSchema::new(column.table_id));
|
|
|
|
table_schema.add_column(&column);
|
|
}
|
|
|
|
// The table map is no longer needed - immediately reclaim the memory.
|
|
drop(tables);
|
|
|
|
// Convert the Namespace instances into NamespaceSchema instances.
|
|
let iter = namespaces
|
|
.await
|
|
.expect("namespace list task panicked")?
|
|
.into_iter()
|
|
// Ignore any namespaces that did not exist when the "columns" snapshot
|
|
// was created, or have no tables/columns (and therefore have no entry
|
|
// in "joined").
|
|
.filter_map(move |v| {
|
|
let mut ns = NamespaceSchema::new(v.id, v.kafka_topic_id, v.query_pool_id);
|
|
ns.tables = joined.remove(&v.id)?;
|
|
Some((v, ns))
|
|
});
|
|
|
|
Ok(iter)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub(crate) mod test_helpers {
|
|
use crate::validate_or_insert_schema;
|
|
|
|
use super::*;
|
|
use ::test_helpers::{assert_contains, tracing::TracingCapture};
|
|
use data_types::ColumnId;
|
|
use metric::{Attributes, DurationHistogram, Metric};
|
|
use std::{
|
|
ops::{Add, DerefMut},
|
|
sync::Arc,
|
|
time::Duration,
|
|
};
|
|
|
|
pub(crate) async fn test_catalog(catalog: Arc<dyn Catalog>) {
|
|
test_setup(Arc::clone(&catalog)).await;
|
|
test_kafka_topic(Arc::clone(&catalog)).await;
|
|
test_query_pool(Arc::clone(&catalog)).await;
|
|
test_namespace(Arc::clone(&catalog)).await;
|
|
test_table(Arc::clone(&catalog)).await;
|
|
test_column(Arc::clone(&catalog)).await;
|
|
test_sequencer(Arc::clone(&catalog)).await;
|
|
test_partition(Arc::clone(&catalog)).await;
|
|
test_tombstone(Arc::clone(&catalog)).await;
|
|
test_tombstones_by_parquet_file(Arc::clone(&catalog)).await;
|
|
test_parquet_file(Arc::clone(&catalog)).await;
|
|
test_parquet_file_compaction_level_0(Arc::clone(&catalog)).await;
|
|
test_parquet_file_compaction_level_1(Arc::clone(&catalog)).await;
|
|
test_update_to_compaction_level_1(Arc::clone(&catalog)).await;
|
|
test_processed_tombstones(Arc::clone(&catalog)).await;
|
|
test_list_by_partiton_not_to_delete(Arc::clone(&catalog)).await;
|
|
test_txn_isolation(Arc::clone(&catalog)).await;
|
|
test_txn_drop(Arc::clone(&catalog)).await;
|
|
test_list_schemas(Arc::clone(&catalog)).await;
|
|
|
|
let metrics = catalog.metrics();
|
|
assert_metric_hit(&*metrics, "kafka_create_or_get");
|
|
assert_metric_hit(&*metrics, "query_create_or_get");
|
|
assert_metric_hit(&*metrics, "namespace_create");
|
|
assert_metric_hit(&*metrics, "table_create_or_get");
|
|
assert_metric_hit(&*metrics, "column_create_or_get");
|
|
assert_metric_hit(&*metrics, "sequencer_create_or_get");
|
|
assert_metric_hit(&*metrics, "partition_create_or_get");
|
|
assert_metric_hit(&*metrics, "tombstone_create_or_get");
|
|
assert_metric_hit(&*metrics, "parquet_create");
|
|
}
|
|
|
|
async fn test_setup(catalog: Arc<dyn Catalog>) {
|
|
catalog.setup().await.expect("first catalog setup");
|
|
catalog.setup().await.expect("second catalog setup");
|
|
}
|
|
|
|
async fn test_kafka_topic(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka_repo = repos.kafka_topics();
|
|
|
|
let k = kafka_repo.create_or_get("foo").await.unwrap();
|
|
assert!(k.id > KafkaTopicId::new(0));
|
|
assert_eq!(k.name, "foo");
|
|
let k2 = kafka_repo.create_or_get("foo").await.unwrap();
|
|
assert_eq!(k, k2);
|
|
let k3 = kafka_repo.get_by_name("foo").await.unwrap().unwrap();
|
|
assert_eq!(k3, k);
|
|
let k3 = kafka_repo.get_by_name("asdf").await.unwrap();
|
|
assert!(k3.is_none());
|
|
}
|
|
|
|
async fn test_query_pool(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let query_repo = repos.query_pools();
|
|
|
|
let q = query_repo.create_or_get("foo").await.unwrap();
|
|
assert!(q.id > QueryPoolId::new(0));
|
|
assert_eq!(q.name, "foo");
|
|
let q2 = query_repo.create_or_get("foo").await.unwrap();
|
|
assert_eq!(q, q2);
|
|
}
|
|
|
|
async fn test_namespace(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
|
|
let namespace_name = "test_namespace";
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(namespace_name, "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
assert!(namespace.id > NamespaceId::new(0));
|
|
assert_eq!(namespace.name, namespace_name);
|
|
|
|
let conflict = repos
|
|
.namespaces()
|
|
.create(namespace_name, "inf", kafka.id, pool.id)
|
|
.await;
|
|
assert!(matches!(
|
|
conflict.unwrap_err(),
|
|
Error::NameExists { name: _ }
|
|
));
|
|
|
|
let found = repos
|
|
.namespaces()
|
|
.get_by_id(namespace.id)
|
|
.await
|
|
.unwrap()
|
|
.expect("namespace should be there");
|
|
assert_eq!(namespace, found);
|
|
|
|
let not_found = repos
|
|
.namespaces()
|
|
.get_by_id(NamespaceId::new(i64::MAX))
|
|
.await
|
|
.unwrap();
|
|
assert!(not_found.is_none());
|
|
|
|
let found = repos
|
|
.namespaces()
|
|
.get_by_name(namespace_name)
|
|
.await
|
|
.unwrap()
|
|
.expect("namespace should be there");
|
|
assert_eq!(namespace, found);
|
|
|
|
let not_found = repos
|
|
.namespaces()
|
|
.get_by_name("does_not_exist")
|
|
.await
|
|
.unwrap();
|
|
assert!(not_found.is_none());
|
|
|
|
let namespace2_name = "test_namespace2";
|
|
let namespace2 = repos
|
|
.namespaces()
|
|
.create(namespace2_name, "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let mut namespaces = repos.namespaces().list().await.unwrap();
|
|
namespaces.sort_by_key(|ns| ns.name.clone());
|
|
assert_eq!(namespaces, vec![namespace, namespace2]);
|
|
|
|
const NEW_TABLE_LIMIT: i32 = 15000;
|
|
let modified = repos
|
|
.namespaces()
|
|
.update_table_limit(namespace_name, NEW_TABLE_LIMIT)
|
|
.await
|
|
.expect("namespace should be updateable");
|
|
assert_eq!(NEW_TABLE_LIMIT, modified.max_tables);
|
|
|
|
const NEW_COLUMN_LIMIT: i32 = 1500;
|
|
let modified = repos
|
|
.namespaces()
|
|
.update_column_limit(namespace_name, NEW_COLUMN_LIMIT)
|
|
.await
|
|
.expect("namespace should be updateable");
|
|
assert_eq!(NEW_COLUMN_LIMIT, modified.max_columns_per_table);
|
|
}
|
|
|
|
async fn test_table(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create("namespace_table_test", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// test we can create or get a table
|
|
let t = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let tt = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
assert!(t.id > TableId::new(0));
|
|
assert_eq!(t, tt);
|
|
|
|
// get by id
|
|
assert_eq!(t, repos.tables().get_by_id(t.id).await.unwrap().unwrap());
|
|
assert!(repos
|
|
.tables()
|
|
.get_by_id(TableId::new(i64::MAX))
|
|
.await
|
|
.unwrap()
|
|
.is_none());
|
|
|
|
let tables = repos
|
|
.tables()
|
|
.list_by_namespace_id(namespace.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![t.clone()], tables);
|
|
|
|
// test we can create a table of the same name in a different namespace
|
|
let namespace2 = repos
|
|
.namespaces()
|
|
.create("two", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
assert_ne!(namespace, namespace2);
|
|
let test_table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_ne!(tt, test_table);
|
|
assert_eq!(test_table.namespace_id, namespace2.id);
|
|
|
|
// test get by namespace and name
|
|
let foo_table = repos
|
|
.tables()
|
|
.create_or_get("foo", namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
repos
|
|
.tables()
|
|
.get_by_namespace_and_name(NamespaceId::new(i64::MAX), "test_table")
|
|
.await
|
|
.unwrap(),
|
|
None
|
|
);
|
|
assert_eq!(
|
|
repos
|
|
.tables()
|
|
.get_by_namespace_and_name(namespace.id, "not_existing")
|
|
.await
|
|
.unwrap(),
|
|
None
|
|
);
|
|
assert_eq!(
|
|
repos
|
|
.tables()
|
|
.get_by_namespace_and_name(namespace.id, "test_table")
|
|
.await
|
|
.unwrap(),
|
|
Some(t.clone())
|
|
);
|
|
assert_eq!(
|
|
repos
|
|
.tables()
|
|
.get_by_namespace_and_name(namespace2.id, "test_table")
|
|
.await
|
|
.unwrap()
|
|
.as_ref(),
|
|
Some(&test_table)
|
|
);
|
|
assert_eq!(
|
|
repos
|
|
.tables()
|
|
.get_by_namespace_and_name(namespace2.id, "foo")
|
|
.await
|
|
.unwrap()
|
|
.as_ref(),
|
|
Some(&foo_table)
|
|
);
|
|
|
|
// All tables should be returned by list(), regardless of namespace
|
|
let list = repos.tables().list().await.unwrap();
|
|
assert_eq!(list.as_slice(), [tt, test_table, foo_table]);
|
|
|
|
// test we can get table persistence info with no persistence so far
|
|
let seq = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(555))
|
|
.await
|
|
.unwrap();
|
|
let ti = repos
|
|
.tables()
|
|
.get_table_persist_info(seq.id, t.namespace_id, &t.name)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
assert_eq!(
|
|
ti,
|
|
TablePersistInfo {
|
|
sequencer_id: seq.id,
|
|
table_id: t.id,
|
|
tombstone_max_sequence_number: None
|
|
}
|
|
);
|
|
|
|
// and now with a tombstone persisted
|
|
let tombstone = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
t.id,
|
|
seq.id,
|
|
SequenceNumber::new(2001),
|
|
Timestamp::new(1),
|
|
Timestamp::new(10),
|
|
"wahtevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let ti = repos
|
|
.tables()
|
|
.get_table_persist_info(seq.id, t.namespace_id, &t.name)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
assert_eq!(
|
|
ti,
|
|
TablePersistInfo {
|
|
sequencer_id: seq.id,
|
|
table_id: t.id,
|
|
tombstone_max_sequence_number: Some(tombstone.sequence_number),
|
|
}
|
|
);
|
|
|
|
// test per-namespace table limits
|
|
let latest = repos
|
|
.namespaces()
|
|
.update_table_limit("namespace_table_test", 1)
|
|
.await
|
|
.expect("namespace should be updateable");
|
|
let err = repos
|
|
.tables()
|
|
.create_or_get("definitely_unique", latest.id)
|
|
.await
|
|
.expect_err("should error with table create limit error");
|
|
assert!(matches!(
|
|
err,
|
|
Error::TableCreateLimitError {
|
|
table_name: _,
|
|
namespace_id: _
|
|
}
|
|
));
|
|
}
|
|
|
|
async fn test_column(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create("namespace_column_test", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(table.namespace_id, namespace.id);
|
|
|
|
// test we can create or get a column
|
|
let c = repos
|
|
.columns()
|
|
.create_or_get("column_test", table.id, ColumnType::Tag)
|
|
.await
|
|
.unwrap();
|
|
let cc = repos
|
|
.columns()
|
|
.create_or_get("column_test", table.id, ColumnType::Tag)
|
|
.await
|
|
.unwrap();
|
|
assert!(c.id > ColumnId::new(0));
|
|
assert_eq!(c, cc);
|
|
|
|
// test that attempting to create an already defined column of a different type returns error
|
|
let err = repos
|
|
.columns()
|
|
.create_or_get("column_test", table.id, ColumnType::U64)
|
|
.await
|
|
.expect_err("should error with wrong column type");
|
|
assert!(matches!(
|
|
err,
|
|
Error::ColumnTypeMismatch {
|
|
name: _,
|
|
existing: _,
|
|
new: _
|
|
}
|
|
));
|
|
|
|
// test that we can create a column of the same name under a different table
|
|
let table2 = repos
|
|
.tables()
|
|
.create_or_get("test_table_2", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let ccc = repos
|
|
.columns()
|
|
.create_or_get("column_test", table2.id, ColumnType::U64)
|
|
.await
|
|
.unwrap();
|
|
assert_ne!(c, ccc);
|
|
|
|
let cols3 = repos
|
|
.columns()
|
|
.create_or_get_many(&[
|
|
ColumnUpsertRequest {
|
|
name: "a",
|
|
table_id: table2.id,
|
|
column_type: ColumnType::U64,
|
|
},
|
|
ColumnUpsertRequest {
|
|
name: "a",
|
|
table_id: table.id,
|
|
column_type: ColumnType::U64,
|
|
},
|
|
])
|
|
.await
|
|
.unwrap();
|
|
|
|
let columns = repos
|
|
.columns()
|
|
.list_by_namespace_id(namespace.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
let mut want = vec![c, ccc];
|
|
want.extend(cols3);
|
|
assert_eq!(want, columns);
|
|
|
|
// Listing columns should return all columns in the catalog
|
|
let list = repos.columns().list().await.unwrap();
|
|
assert_eq!(list, want);
|
|
|
|
// test per-namespace column limits
|
|
repos
|
|
.namespaces()
|
|
.update_column_limit("namespace_column_test", 1)
|
|
.await
|
|
.expect("namespace should be updateable");
|
|
let err = repos
|
|
.columns()
|
|
.create_or_get("definitely unique", table.id, ColumnType::Tag)
|
|
.await
|
|
.expect_err("should error with table create limit error");
|
|
assert!(matches!(
|
|
err,
|
|
Error::ColumnCreateLimitError {
|
|
column_name: _,
|
|
table_id: _,
|
|
}
|
|
));
|
|
}
|
|
|
|
async fn test_sequencer(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos
|
|
.kafka_topics()
|
|
.create_or_get("sequencer_test")
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create 10 sequencers
|
|
let mut created = BTreeMap::new();
|
|
for partition in 1..=10 {
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(partition))
|
|
.await
|
|
.expect("failed to create sequencer");
|
|
created.insert(sequencer.id, sequencer);
|
|
}
|
|
|
|
// List them and assert they match
|
|
let listed = repos
|
|
.sequencers()
|
|
.list_by_kafka_topic(&kafka)
|
|
.await
|
|
.expect("failed to list sequencers")
|
|
.into_iter()
|
|
.map(|v| (v.id, v))
|
|
.collect::<BTreeMap<_, _>>();
|
|
|
|
assert_eq!(created, listed);
|
|
|
|
// get by the sequencer id and partition
|
|
let kafka_partition = KafkaPartition::new(1);
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.get_by_topic_id_and_partition(kafka.id, kafka_partition)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
assert_eq!(kafka.id, sequencer.kafka_topic_id);
|
|
assert_eq!(kafka_partition, sequencer.kafka_partition);
|
|
|
|
// update the number
|
|
repos
|
|
.sequencers()
|
|
.update_min_unpersisted_sequence_number(sequencer.id, SequenceNumber::new(53))
|
|
.await
|
|
.unwrap();
|
|
let updated_sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, kafka_partition)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(updated_sequencer.id, sequencer.id);
|
|
assert_eq!(
|
|
updated_sequencer.min_unpersisted_sequence_number,
|
|
SequenceNumber::new(53)
|
|
);
|
|
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.get_by_topic_id_and_partition(kafka.id, KafkaPartition::new(523))
|
|
.await
|
|
.unwrap();
|
|
assert!(sequencer.is_none());
|
|
}
|
|
|
|
async fn test_partition(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create("namespace_partition_test", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(1))
|
|
.await
|
|
.unwrap();
|
|
let other_sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(2))
|
|
.await
|
|
.unwrap();
|
|
|
|
let mut created = BTreeMap::new();
|
|
for key in ["foo", "bar"] {
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&key.into(), sequencer.id, table.id)
|
|
.await
|
|
.expect("failed to create partition");
|
|
created.insert(partition.id, partition);
|
|
}
|
|
let other_partition = repos
|
|
.partitions()
|
|
.create_or_get(&"asdf".into(), other_sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// partitions can be retrieved easily
|
|
assert_eq!(
|
|
other_partition,
|
|
repos
|
|
.partitions()
|
|
.get_by_id(other_partition.id)
|
|
.await
|
|
.unwrap()
|
|
.unwrap()
|
|
);
|
|
assert!(repos
|
|
.partitions()
|
|
.get_by_id(PartitionId::new(i64::MAX))
|
|
.await
|
|
.unwrap()
|
|
.is_none());
|
|
|
|
// List them and assert they match
|
|
let listed = repos
|
|
.partitions()
|
|
.list_by_sequencer(sequencer.id)
|
|
.await
|
|
.expect("failed to list partitions")
|
|
.into_iter()
|
|
.map(|v| (v.id, v))
|
|
.collect::<BTreeMap<_, _>>();
|
|
|
|
assert_eq!(created, listed);
|
|
|
|
let listed = repos
|
|
.partitions()
|
|
.list_by_table_id(table.id)
|
|
.await
|
|
.expect("failed to list partitions")
|
|
.into_iter()
|
|
.map(|v| (v.id, v))
|
|
.collect::<BTreeMap<_, _>>();
|
|
|
|
created.insert(other_partition.id, other_partition.clone());
|
|
assert_eq!(created, listed);
|
|
|
|
// test get_partition_info_by_id
|
|
let info = repos
|
|
.partitions()
|
|
.partition_info_by_id(other_partition.id)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
assert_eq!(info.partition, other_partition);
|
|
assert_eq!(info.table_name, "test_table");
|
|
assert_eq!(info.namespace_name, "namespace_partition_test");
|
|
|
|
// test list_by_namespace
|
|
let namespace2 = repos
|
|
.namespaces()
|
|
.create("namespace_partition_test2", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let table2 = repos
|
|
.tables()
|
|
.create_or_get("test_table2", namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
repos
|
|
.partitions()
|
|
.create_or_get(&"some_key".into(), sequencer.id, table2.id)
|
|
.await
|
|
.expect("failed to create partition");
|
|
let listed = repos
|
|
.partitions()
|
|
.list_by_namespace(namespace.id)
|
|
.await
|
|
.expect("failed to list partitions")
|
|
.into_iter()
|
|
.map(|v| (v.id, v))
|
|
.collect::<BTreeMap<_, _>>();
|
|
let expected: BTreeMap<_, _> = created
|
|
.iter()
|
|
.map(|(k, v)| (*k, v.clone()))
|
|
.chain(std::iter::once((
|
|
other_partition.id,
|
|
other_partition.clone(),
|
|
)))
|
|
.collect();
|
|
assert_eq!(expected, listed);
|
|
|
|
// sort_key should be empty on creation
|
|
assert!(other_partition.sort_key.is_empty());
|
|
|
|
// test update_sort_key from None to Some
|
|
repos
|
|
.partitions()
|
|
.update_sort_key(other_partition.id, &["tag2", "tag1", "time"])
|
|
.await
|
|
.unwrap();
|
|
|
|
// test getting the new sort key
|
|
let updated_other_partition = repos
|
|
.partitions()
|
|
.get_by_id(other_partition.id)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
assert_eq!(
|
|
updated_other_partition.sort_key,
|
|
vec!["tag2", "tag1", "time"]
|
|
);
|
|
|
|
// test update_sort_key from Some value to Some other value
|
|
repos
|
|
.partitions()
|
|
.update_sort_key(
|
|
other_partition.id,
|
|
&["tag2", "tag1", "tag3 , with comma", "time"],
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// test getting the new sort key
|
|
let updated_other_partition = repos
|
|
.partitions()
|
|
.get_by_id(other_partition.id)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
assert_eq!(
|
|
updated_other_partition.sort_key,
|
|
vec!["tag2", "tag1", "tag3 , with comma", "time"]
|
|
);
|
|
}
|
|
|
|
async fn test_tombstone(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create("namespace_tombstone_test", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let other_table = repos
|
|
.tables()
|
|
.create_or_get("other", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(1))
|
|
.await
|
|
.unwrap();
|
|
|
|
let min_time = Timestamp::new(1);
|
|
let max_time = Timestamp::new(10);
|
|
let t1 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(1),
|
|
min_time,
|
|
max_time,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
assert!(t1.id > TombstoneId::new(0));
|
|
assert_eq!(t1.sequencer_id, sequencer.id);
|
|
assert_eq!(t1.sequence_number, SequenceNumber::new(1));
|
|
assert_eq!(t1.min_time, min_time);
|
|
assert_eq!(t1.max_time, max_time);
|
|
assert_eq!(t1.serialized_predicate, "whatevs");
|
|
let t2 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
other_table.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(2),
|
|
min_time.add(10),
|
|
max_time.add(10),
|
|
"bleh",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let t3 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(3),
|
|
min_time.add(10),
|
|
max_time.add(10),
|
|
"sdf",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
let listed = repos
|
|
.tombstones()
|
|
.list_tombstones_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1))
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![t2.clone(), t3.clone()], listed);
|
|
|
|
// test list_by_table
|
|
let listed = repos.tombstones().list_by_table(table.id).await.unwrap();
|
|
assert_eq!(vec![t1.clone(), t3.clone()], listed);
|
|
let listed = repos
|
|
.tombstones()
|
|
.list_by_table(other_table.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![t2.clone()], listed);
|
|
|
|
// test list_by_namespace
|
|
let namespace2 = repos
|
|
.namespaces()
|
|
.create("namespace_tombstone_test2", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let table2 = repos
|
|
.tables()
|
|
.create_or_get("test_table2", namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
let t4 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table2.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(1),
|
|
min_time.add(10),
|
|
max_time.add(10),
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let t5 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table2.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(2),
|
|
min_time.add(10),
|
|
max_time.add(10),
|
|
"foo",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let listed = repos
|
|
.tombstones()
|
|
.list_by_namespace(namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![t4.clone(), t5.clone()], listed);
|
|
let listed = repos
|
|
.tombstones()
|
|
.list_by_namespace(NamespaceId::new(i64::MAX))
|
|
.await
|
|
.unwrap();
|
|
assert!(listed.is_empty());
|
|
|
|
// test get_by_id
|
|
let ts = repos.tombstones().get_by_id(t1.id).await.unwrap().unwrap();
|
|
assert_eq!(ts, t1.clone());
|
|
let ts = repos.tombstones().get_by_id(t2.id).await.unwrap().unwrap();
|
|
assert_eq!(ts, t2.clone());
|
|
let ts = repos
|
|
.tombstones()
|
|
.get_by_id(TombstoneId::new(
|
|
t1.id.get() + t2.id.get() + t3.id.get() + t4.id.get() + t5.id.get(),
|
|
)) // not exist id
|
|
.await
|
|
.unwrap();
|
|
assert!(ts.is_none());
|
|
|
|
// test remove
|
|
repos.tombstones().remove(&[t1.id, t3.id]).await.unwrap();
|
|
let ts = repos.tombstones().get_by_id(t1.id).await.unwrap();
|
|
assert!(ts.is_none()); // no longer there
|
|
let ts = repos.tombstones().get_by_id(t3.id).await.unwrap();
|
|
assert!(ts.is_none()); // no longer there
|
|
let ts = repos.tombstones().get_by_id(t2.id).await.unwrap().unwrap();
|
|
assert_eq!(ts, t2.clone()); // still there
|
|
let ts = repos.tombstones().get_by_id(t4.id).await.unwrap().unwrap();
|
|
assert_eq!(ts, t4.clone()); // still there
|
|
let ts = repos.tombstones().get_by_id(t5.id).await.unwrap().unwrap();
|
|
assert_eq!(ts, t5.clone()); // still there
|
|
}
|
|
|
|
async fn test_tombstones_by_parquet_file(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(
|
|
"namespace_tombstones_by_parquet_file_test",
|
|
"inf",
|
|
kafka.id,
|
|
pool.id,
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let other_table = repos
|
|
.tables()
|
|
.create_or_get("other", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(57))
|
|
.await
|
|
.unwrap();
|
|
let other_sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(58))
|
|
.await
|
|
.unwrap();
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
let min_time = Timestamp::new(10);
|
|
let max_time = Timestamp::new(20);
|
|
let max_sequence_number = SequenceNumber::new(140);
|
|
|
|
let parquet_file_params = ParquetFileParams {
|
|
sequencer_id: sequencer.id,
|
|
namespace_id: namespace.id,
|
|
table_id: partition.table_id,
|
|
partition_id: partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(10),
|
|
max_sequence_number,
|
|
min_time,
|
|
max_time,
|
|
file_size_bytes: 1337,
|
|
parquet_metadata: b"md1".to_vec(),
|
|
row_count: 0,
|
|
compaction_level: INITIAL_COMPACTION_LEVEL,
|
|
created_at: Timestamp::new(1),
|
|
};
|
|
let parquet_file = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone with another sequencer
|
|
repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
other_sequencer.id,
|
|
max_sequence_number + 100,
|
|
min_time,
|
|
max_time,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone with the same sequencer but a different table
|
|
repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
other_table.id,
|
|
sequencer.id,
|
|
max_sequence_number + 101,
|
|
min_time,
|
|
max_time,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone with a sequence number before the parquet file's max
|
|
repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
max_sequence_number - 10,
|
|
min_time,
|
|
max_time,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone with a sequence number exactly equal to the parquet file's max
|
|
repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
max_sequence_number,
|
|
min_time,
|
|
max_time,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone with a time range less than the parquet file's times
|
|
repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
max_sequence_number + 102,
|
|
min_time - 5,
|
|
min_time - 4,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone with a time range greater than the parquet file's times
|
|
repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
max_sequence_number + 103,
|
|
max_time + 1,
|
|
max_time + 2,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone that matches all criteria
|
|
let matching_tombstone1 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
max_sequence_number + 104,
|
|
min_time,
|
|
max_time,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone that overlaps the file's min
|
|
let matching_tombstone2 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
max_sequence_number + 105,
|
|
min_time - 1,
|
|
min_time + 1,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a tombstone that overlaps the file's max
|
|
let matching_tombstone3 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
max_sequence_number + 106,
|
|
max_time - 1,
|
|
max_time + 1,
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
let tombstones = repos
|
|
.tombstones()
|
|
.list_tombstones_for_time_range(
|
|
sequencer.id,
|
|
table.id,
|
|
max_sequence_number,
|
|
min_time,
|
|
max_time,
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let mut tombstones_ids: Vec<_> = tombstones.iter().map(|t| t.id).collect();
|
|
tombstones_ids.sort();
|
|
let expected = vec![
|
|
matching_tombstone1,
|
|
matching_tombstone2,
|
|
matching_tombstone3,
|
|
];
|
|
let mut expected_ids: Vec<_> = expected.iter().map(|t| t.id).collect();
|
|
expected_ids.sort();
|
|
|
|
assert_eq!(
|
|
tombstones_ids, expected_ids,
|
|
"\ntombstones: {:#?}\nexpected: {:#?}\nparquet_file: {:#?}",
|
|
tombstones, expected, parquet_file
|
|
);
|
|
}
|
|
|
|
async fn test_parquet_file(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create("namespace_parquet_file_test", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let other_table = repos
|
|
.tables()
|
|
.create_or_get("other", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(1))
|
|
.await
|
|
.unwrap();
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
let other_partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, other_table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
let parquet_file_params = ParquetFileParams {
|
|
sequencer_id: sequencer.id,
|
|
namespace_id: namespace.id,
|
|
table_id: partition.table_id,
|
|
partition_id: partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(10),
|
|
max_sequence_number: SequenceNumber::new(140),
|
|
min_time: Timestamp::new(1),
|
|
max_time: Timestamp::new(10),
|
|
file_size_bytes: 1337,
|
|
parquet_metadata: b"md1".to_vec(),
|
|
row_count: 0,
|
|
compaction_level: INITIAL_COMPACTION_LEVEL,
|
|
created_at: Timestamp::new(1),
|
|
};
|
|
let parquet_file = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
// verify we can get it by its object store id
|
|
let pfg = repos
|
|
.parquet_files()
|
|
.get_by_object_store_id(parquet_file.object_store_id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(parquet_file, pfg.unwrap());
|
|
|
|
let metadata = repos
|
|
.parquet_files()
|
|
.parquet_metadata(parquet_file.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(metadata, b"md1".to_vec());
|
|
|
|
// verify that trying to create a file with the same UUID throws an error
|
|
let err = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap_err();
|
|
assert!(matches!(err, Error::FileExists { object_store_id: _ }));
|
|
|
|
let other_params = ParquetFileParams {
|
|
table_id: other_partition.table_id,
|
|
partition_id: other_partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(45),
|
|
max_sequence_number: SequenceNumber::new(200),
|
|
min_time: Timestamp::new(50),
|
|
max_time: Timestamp::new(60),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let other_file = repos.parquet_files().create(other_params).await.unwrap();
|
|
|
|
let exist_id = parquet_file.id;
|
|
let non_exist_id = ParquetFileId::new(other_file.id.get() + 10);
|
|
// make sure exists_id != non_exist_id
|
|
assert_ne!(exist_id, non_exist_id);
|
|
assert!(repos.parquet_files().exist(exist_id).await.unwrap());
|
|
assert!(!repos.parquet_files().exist(non_exist_id).await.unwrap());
|
|
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1))
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![parquet_file, other_file], files);
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(150))
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![other_file], files);
|
|
|
|
// verify that to_delete is initially set to null and the file does not get deleted
|
|
assert!(parquet_file.to_delete.is_none());
|
|
let older_than = Timestamp::new(
|
|
(catalog.time_provider().now() + Duration::from_secs(100)).timestamp_nanos(),
|
|
);
|
|
let deleted_files = repos.parquet_files().delete_old(older_than).await.unwrap();
|
|
assert!(deleted_files.is_empty());
|
|
assert!(repos.parquet_files().exist(parquet_file.id).await.unwrap());
|
|
|
|
// verify to_delete can be updated to a timestamp
|
|
repos
|
|
.parquet_files()
|
|
.flag_for_delete(parquet_file.id)
|
|
.await
|
|
.unwrap();
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_sequencer_greater_than(sequencer.id, SequenceNumber::new(1))
|
|
.await
|
|
.unwrap();
|
|
let marked_deleted = files.first().unwrap();
|
|
assert!(marked_deleted.to_delete.is_some());
|
|
|
|
// File is not deleted if it was marked to be deleted after the specified time
|
|
let before_deleted = Timestamp::new(
|
|
(catalog.time_provider().now() - Duration::from_secs(100)).timestamp_nanos(),
|
|
);
|
|
let deleted_files = repos
|
|
.parquet_files()
|
|
.delete_old(before_deleted)
|
|
.await
|
|
.unwrap();
|
|
assert!(deleted_files.is_empty());
|
|
assert!(repos.parquet_files().exist(parquet_file.id).await.unwrap());
|
|
|
|
// File is deleted if it was marked to be deleted before the specified time
|
|
let deleted_files = repos.parquet_files().delete_old(older_than).await.unwrap();
|
|
assert_eq!(deleted_files.len(), 1);
|
|
assert_eq!(marked_deleted, &deleted_files[0]);
|
|
assert!(!repos.parquet_files().exist(parquet_file.id).await.unwrap());
|
|
|
|
// test list_by_table_not_to_delete
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_table_not_to_delete(table.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(files, vec![]);
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_table_not_to_delete(other_table.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(files, vec![other_file]);
|
|
|
|
// test list_by_table_not_to_delete_with_metadata
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_table_not_to_delete_with_metadata(table.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(files, vec![]);
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_table_not_to_delete_with_metadata(other_table.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
files,
|
|
vec![ParquetFileWithMetadata::new(other_file, b"md1".to_vec())]
|
|
);
|
|
|
|
// test list_by_namespace_not_to_delete
|
|
let namespace2 = repos
|
|
.namespaces()
|
|
.create("namespace_parquet_file_test1", "inf", kafka.id, pool.id)
|
|
.await
|
|
.unwrap();
|
|
let table2 = repos
|
|
.tables()
|
|
.create_or_get("test_table2", namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
let partition2 = repos
|
|
.partitions()
|
|
.create_or_get(&"foo".into(), sequencer.id, table2.id)
|
|
.await
|
|
.unwrap();
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_namespace_not_to_delete(namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
assert!(files.is_empty());
|
|
|
|
let f1_params = ParquetFileParams {
|
|
table_id: partition2.table_id,
|
|
partition_id: partition2.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_time: Timestamp::new(1),
|
|
max_time: Timestamp::new(10),
|
|
min_sequence_number: SequenceNumber::new(10),
|
|
max_sequence_number: SequenceNumber::new(10),
|
|
..parquet_file_params
|
|
};
|
|
let f1 = repos
|
|
.parquet_files()
|
|
.create(f1_params.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
let f2_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
min_time: Timestamp::new(50),
|
|
max_time: Timestamp::new(60),
|
|
min_sequence_number: SequenceNumber::new(11),
|
|
max_sequence_number: SequenceNumber::new(11),
|
|
..f1_params
|
|
};
|
|
let f2 = repos
|
|
.parquet_files()
|
|
.create(f2_params.clone())
|
|
.await
|
|
.unwrap();
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_namespace_not_to_delete(namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![f1, f2], files);
|
|
|
|
let f3_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
min_time: Timestamp::new(50),
|
|
max_time: Timestamp::new(60),
|
|
min_sequence_number: SequenceNumber::new(12),
|
|
max_sequence_number: SequenceNumber::new(12),
|
|
..f2_params
|
|
};
|
|
let f3 = repos.parquet_files().create(f3_params).await.unwrap();
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_namespace_not_to_delete(namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![f1, f2, f3], files);
|
|
|
|
repos.parquet_files().flag_for_delete(f2.id).await.unwrap();
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_namespace_not_to_delete(namespace2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(vec![f1, f3], files);
|
|
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_namespace_not_to_delete(NamespaceId::new(i64::MAX))
|
|
.await
|
|
.unwrap();
|
|
assert!(files.is_empty());
|
|
|
|
// test count_by_overlaps
|
|
// not time overlap
|
|
let count = repos
|
|
.parquet_files()
|
|
.count_by_overlaps(
|
|
partition2.table_id,
|
|
sequencer.id,
|
|
Timestamp::new(11),
|
|
Timestamp::new(20),
|
|
SequenceNumber::new(20),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 0);
|
|
// overlaps with f1
|
|
let count = repos
|
|
.parquet_files()
|
|
.count_by_overlaps(
|
|
partition2.table_id,
|
|
sequencer.id,
|
|
Timestamp::new(1),
|
|
Timestamp::new(10),
|
|
SequenceNumber::new(20),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 1);
|
|
// overlaps with f1 and f3
|
|
// f2 is deleted and should not be counted
|
|
let count = repos
|
|
.parquet_files()
|
|
.count_by_overlaps(
|
|
partition2.table_id,
|
|
sequencer.id,
|
|
Timestamp::new(7),
|
|
Timestamp::new(55),
|
|
SequenceNumber::new(20),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 2);
|
|
// overlaps with f1 and f3 but on different time range
|
|
let count = repos
|
|
.parquet_files()
|
|
.count_by_overlaps(
|
|
partition2.table_id,
|
|
sequencer.id,
|
|
Timestamp::new(1),
|
|
Timestamp::new(100),
|
|
SequenceNumber::new(20),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 2);
|
|
// overlaps with f3
|
|
let count = repos
|
|
.parquet_files()
|
|
.count_by_overlaps(
|
|
partition2.table_id,
|
|
sequencer.id,
|
|
Timestamp::new(15),
|
|
Timestamp::new(100),
|
|
SequenceNumber::new(20),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 1);
|
|
// no overlaps due to smaller sequnce number
|
|
let count = repos
|
|
.parquet_files()
|
|
.count_by_overlaps(
|
|
partition2.table_id,
|
|
sequencer.id,
|
|
Timestamp::new(15),
|
|
Timestamp::new(100),
|
|
SequenceNumber::new(2),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 0);
|
|
}
|
|
|
|
async fn test_parquet_file_compaction_level_0(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(
|
|
"namespace_parquet_file_compaction_level_0_test",
|
|
"inf",
|
|
kafka.id,
|
|
pool.id,
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(100))
|
|
.await
|
|
.unwrap();
|
|
let other_sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(101))
|
|
.await
|
|
.unwrap();
|
|
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
let min_time = Timestamp::new(1);
|
|
let max_time = Timestamp::new(10);
|
|
|
|
let parquet_file_params = ParquetFileParams {
|
|
sequencer_id: sequencer.id,
|
|
namespace_id: namespace.id,
|
|
table_id: partition.table_id,
|
|
partition_id: partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(10),
|
|
max_sequence_number: SequenceNumber::new(140),
|
|
min_time,
|
|
max_time,
|
|
file_size_bytes: 1337,
|
|
parquet_metadata: b"md1".to_vec(),
|
|
row_count: 0,
|
|
compaction_level: INITIAL_COMPACTION_LEVEL,
|
|
created_at: Timestamp::new(1),
|
|
};
|
|
|
|
let parquet_file = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a compaction level 0 file for some other sequencer
|
|
let other_sequencer_params = ParquetFileParams {
|
|
sequencer_id: other_sequencer.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
|
|
let _other_sequencer_file = repos
|
|
.parquet_files()
|
|
.create(other_sequencer_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a compaction level 0 file marked to delete
|
|
let to_delete_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let to_delete_file = repos
|
|
.parquet_files()
|
|
.create(to_delete_params)
|
|
.await
|
|
.unwrap();
|
|
repos
|
|
.parquet_files()
|
|
.flag_for_delete(to_delete_file.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a compaction level 1 file
|
|
let level_1_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let level_1_file = repos.parquet_files().create(level_1_params).await.unwrap();
|
|
repos
|
|
.parquet_files()
|
|
.update_to_level_1(&[level_1_file.id])
|
|
.await
|
|
.unwrap();
|
|
|
|
// Level 0 parquet files for a sequencer should contain only those that match the right
|
|
// criteria
|
|
let level_0 = repos.parquet_files().level_0(sequencer.id).await.unwrap();
|
|
let mut level_0_ids: Vec<_> = level_0.iter().map(|pf| pf.id).collect();
|
|
level_0_ids.sort();
|
|
let expected = vec![parquet_file];
|
|
let mut expected_ids: Vec<_> = expected.iter().map(|pf| pf.id).collect();
|
|
expected_ids.sort();
|
|
|
|
assert_eq!(
|
|
level_0_ids, expected_ids,
|
|
"\nlevel 0: {:#?}\nexpected: {:#?}",
|
|
level_0, expected,
|
|
);
|
|
}
|
|
|
|
async fn test_parquet_file_compaction_level_1(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(
|
|
"namespace_parquet_file_compaction_level_1_test",
|
|
"inf",
|
|
kafka.id,
|
|
pool.id,
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let other_table = repos
|
|
.tables()
|
|
.create_or_get("test_table2", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(100))
|
|
.await
|
|
.unwrap();
|
|
let other_sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(101))
|
|
.await
|
|
.unwrap();
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
let other_partition = repos
|
|
.partitions()
|
|
.create_or_get(&"two".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Set up the window of times we're interested in level 1 files for
|
|
let query_min_time = Timestamp::new(5);
|
|
let query_max_time = Timestamp::new(10);
|
|
|
|
// Create a file with times entirely within the window
|
|
let parquet_file_params = ParquetFileParams {
|
|
sequencer_id: sequencer.id,
|
|
namespace_id: namespace.id,
|
|
table_id: partition.table_id,
|
|
partition_id: partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(10),
|
|
max_sequence_number: SequenceNumber::new(140),
|
|
min_time: query_min_time + 1,
|
|
max_time: query_max_time - 1,
|
|
file_size_bytes: 1337,
|
|
parquet_metadata: b"md1".to_vec(),
|
|
row_count: 0,
|
|
compaction_level: INITIAL_COMPACTION_LEVEL,
|
|
created_at: Timestamp::new(1),
|
|
};
|
|
let parquet_file = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file that will remain as level 0
|
|
let level_0_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let _level_0_file = repos.parquet_files().create(level_0_params).await.unwrap();
|
|
|
|
// Create a file completely before the window
|
|
let too_early_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
min_time: query_min_time - 2,
|
|
max_time: query_min_time - 1,
|
|
..parquet_file_params.clone()
|
|
};
|
|
let too_early_file = repos
|
|
.parquet_files()
|
|
.create(too_early_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file overlapping the window on the lower end
|
|
let overlap_lower_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
min_time: query_min_time - 1,
|
|
max_time: query_min_time + 1,
|
|
..parquet_file_params.clone()
|
|
};
|
|
let overlap_lower_file = repos
|
|
.parquet_files()
|
|
.create(overlap_lower_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file overlapping the window on the upper end
|
|
let overlap_upper_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
min_time: query_max_time - 1,
|
|
max_time: query_max_time + 1,
|
|
..parquet_file_params.clone()
|
|
};
|
|
let overlap_upper_file = repos
|
|
.parquet_files()
|
|
.create(overlap_upper_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file completely after the window
|
|
let too_late_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
min_time: query_max_time + 1,
|
|
max_time: query_max_time + 2,
|
|
..parquet_file_params.clone()
|
|
};
|
|
let too_late_file = repos.parquet_files().create(too_late_params).await.unwrap();
|
|
|
|
// Create a file for some other sequencer
|
|
let other_sequencer_params = ParquetFileParams {
|
|
sequencer_id: other_sequencer.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let other_sequencer_file = repos
|
|
.parquet_files()
|
|
.create(other_sequencer_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file for the same sequencer but a different table
|
|
let other_table_params = ParquetFileParams {
|
|
table_id: other_table.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let other_table_file = repos
|
|
.parquet_files()
|
|
.create(other_table_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file for the same sequencer and table but a different partition
|
|
let other_partition_params = ParquetFileParams {
|
|
partition_id: other_partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let other_partition_file = repos
|
|
.parquet_files()
|
|
.create(other_partition_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file marked to be deleted
|
|
let to_delete_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let to_delete_file = repos
|
|
.parquet_files()
|
|
.create(to_delete_params)
|
|
.await
|
|
.unwrap();
|
|
repos
|
|
.parquet_files()
|
|
.flag_for_delete(to_delete_file.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Make all but _level_0_file compaction level 1
|
|
repos
|
|
.parquet_files()
|
|
.update_to_level_1(&[
|
|
parquet_file.id,
|
|
too_early_file.id,
|
|
too_late_file.id,
|
|
overlap_lower_file.id,
|
|
overlap_upper_file.id,
|
|
other_sequencer_file.id,
|
|
other_table_file.id,
|
|
other_partition_file.id,
|
|
to_delete_file.id,
|
|
])
|
|
.await
|
|
.unwrap();
|
|
|
|
// Level 1 parquet files for a sequencer should contain only those that match the right
|
|
// criteria
|
|
let table_partition = TablePartition::new(sequencer.id, table.id, partition.id);
|
|
let level_1 = repos
|
|
.parquet_files()
|
|
.level_1(table_partition, query_min_time, query_max_time)
|
|
.await
|
|
.unwrap();
|
|
let mut level_1_ids: Vec<_> = level_1.iter().map(|pf| pf.id).collect();
|
|
level_1_ids.sort();
|
|
let expected = vec![parquet_file, overlap_lower_file, overlap_upper_file];
|
|
let mut expected_ids: Vec<_> = expected.iter().map(|pf| pf.id).collect();
|
|
expected_ids.sort();
|
|
|
|
assert_eq!(
|
|
level_1_ids, expected_ids,
|
|
"\nlevel 1: {:#?}\nexpected: {:#?}",
|
|
level_1, expected,
|
|
);
|
|
}
|
|
|
|
async fn test_list_by_partiton_not_to_delete(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(
|
|
"namespace_parquet_file_test_list_by_partiton_not_to_delete",
|
|
"inf",
|
|
kafka.id,
|
|
pool.id,
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(100))
|
|
.await
|
|
.unwrap();
|
|
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
let partition2 = repos
|
|
.partitions()
|
|
.create_or_get(&"two".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
let min_time = Timestamp::new(1);
|
|
let max_time = Timestamp::new(10);
|
|
|
|
let parquet_file_params = ParquetFileParams {
|
|
sequencer_id: sequencer.id,
|
|
namespace_id: namespace.id,
|
|
table_id: partition.table_id,
|
|
partition_id: partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(10),
|
|
max_sequence_number: SequenceNumber::new(140),
|
|
min_time,
|
|
max_time,
|
|
file_size_bytes: 1337,
|
|
parquet_metadata: b"md1".to_vec(),
|
|
row_count: 0,
|
|
compaction_level: INITIAL_COMPACTION_LEVEL,
|
|
created_at: Timestamp::new(1),
|
|
};
|
|
|
|
let parquet_file = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap();
|
|
let delete_file_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let delete_file = repos
|
|
.parquet_files()
|
|
.create(delete_file_params)
|
|
.await
|
|
.unwrap();
|
|
repos
|
|
.parquet_files()
|
|
.flag_for_delete(delete_file.id)
|
|
.await
|
|
.unwrap();
|
|
let level1_file_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let mut level1_file = repos
|
|
.parquet_files()
|
|
.create(level1_file_params)
|
|
.await
|
|
.unwrap();
|
|
repos
|
|
.parquet_files()
|
|
.update_to_level_1(&[level1_file.id])
|
|
.await
|
|
.unwrap();
|
|
level1_file.compaction_level = 1;
|
|
|
|
let other_partition_params = ParquetFileParams {
|
|
partition_id: partition2.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let _partition2_file = repos
|
|
.parquet_files()
|
|
.create(other_partition_params)
|
|
.await
|
|
.unwrap();
|
|
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_partition_not_to_delete(partition.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(files, vec![parquet_file, level1_file]);
|
|
|
|
let files = repos
|
|
.parquet_files()
|
|
.list_by_partition_not_to_delete_with_metadata(partition.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
files,
|
|
vec![
|
|
ParquetFileWithMetadata::new(parquet_file, b"md1".to_vec()),
|
|
ParquetFileWithMetadata::new(level1_file, b"md1".to_vec()),
|
|
]
|
|
);
|
|
}
|
|
|
|
async fn test_update_to_compaction_level_1(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(
|
|
"namespace_update_to_compaction_level_1_test",
|
|
"inf",
|
|
kafka.id,
|
|
pool.id,
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("update_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(1000))
|
|
.await
|
|
.unwrap();
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Set up the window of times we're interested in level 1 files for
|
|
let query_min_time = Timestamp::new(5);
|
|
let query_max_time = Timestamp::new(10);
|
|
|
|
// Create a file with times entirely within the window
|
|
let parquet_file_params = ParquetFileParams {
|
|
sequencer_id: sequencer.id,
|
|
namespace_id: namespace.id,
|
|
table_id: partition.table_id,
|
|
partition_id: partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(10),
|
|
max_sequence_number: SequenceNumber::new(140),
|
|
min_time: query_min_time + 1,
|
|
max_time: query_max_time - 1,
|
|
file_size_bytes: 1337,
|
|
parquet_metadata: b"md1".to_vec(),
|
|
row_count: 0,
|
|
compaction_level: INITIAL_COMPACTION_LEVEL,
|
|
created_at: Timestamp::new(1),
|
|
};
|
|
let parquet_file = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
// Create a file that will remain as level 0
|
|
let level_0_params = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
..parquet_file_params.clone()
|
|
};
|
|
let level_0_file = repos.parquet_files().create(level_0_params).await.unwrap();
|
|
|
|
// Create a ParquetFileId that doesn't actually exist in the catalog
|
|
let nonexistent_parquet_file_id = ParquetFileId::new(level_0_file.id.get() + 1);
|
|
|
|
// Level 0 parquet files should contain both existing files at this point
|
|
let expected = vec![parquet_file, level_0_file];
|
|
let level_0 = repos.parquet_files().level_0(sequencer.id).await.unwrap();
|
|
let mut level_0_ids: Vec<_> = level_0.iter().map(|pf| pf.id).collect();
|
|
level_0_ids.sort();
|
|
let mut expected_ids: Vec<_> = expected.iter().map(|pf| pf.id).collect();
|
|
expected_ids.sort();
|
|
assert_eq!(
|
|
level_0_ids, expected_ids,
|
|
"\nlevel 0: {:#?}\nexpected: {:#?}",
|
|
level_0, expected,
|
|
);
|
|
|
|
// Make parquet_file compaction level 1, attempt to mark the nonexistent file; operation
|
|
// should succeed
|
|
let updated = repos
|
|
.parquet_files()
|
|
.update_to_level_1(&[parquet_file.id, nonexistent_parquet_file_id])
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(updated, vec![parquet_file.id]);
|
|
|
|
// Level 0 parquet files should only contain level_0_file
|
|
let expected = vec![level_0_file];
|
|
let level_0 = repos.parquet_files().level_0(sequencer.id).await.unwrap();
|
|
let mut level_0_ids: Vec<_> = level_0.iter().map(|pf| pf.id).collect();
|
|
level_0_ids.sort();
|
|
let mut expected_ids: Vec<_> = expected.iter().map(|pf| pf.id).collect();
|
|
expected_ids.sort();
|
|
assert_eq!(
|
|
level_0_ids, expected_ids,
|
|
"\nlevel 0: {:#?}\nexpected: {:#?}",
|
|
level_0, expected,
|
|
);
|
|
|
|
// Level 1 parquet files for a sequencer should only contain parquet_file
|
|
let expected = vec![parquet_file];
|
|
let table_partition = TablePartition::new(sequencer.id, table.id, partition.id);
|
|
let level_1 = repos
|
|
.parquet_files()
|
|
.level_1(table_partition, query_min_time, query_max_time)
|
|
.await
|
|
.unwrap();
|
|
let mut level_1_ids: Vec<_> = level_1.iter().map(|pf| pf.id).collect();
|
|
level_1_ids.sort();
|
|
let mut expected_ids: Vec<_> = expected.iter().map(|pf| pf.id).collect();
|
|
expected_ids.sort();
|
|
assert_eq!(
|
|
level_1_ids, expected_ids,
|
|
"\nlevel 1: {:#?}\nexpected: {:#?}",
|
|
level_1, expected,
|
|
);
|
|
}
|
|
|
|
async fn test_processed_tombstones(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(
|
|
"namespace_processed_tombstone_test",
|
|
"inf",
|
|
kafka.id,
|
|
pool.id,
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let table = repos
|
|
.tables()
|
|
.create_or_get("test_table", namespace.id)
|
|
.await
|
|
.unwrap();
|
|
let sequencer = repos
|
|
.sequencers()
|
|
.create_or_get(&kafka, KafkaPartition::new(1))
|
|
.await
|
|
.unwrap();
|
|
let partition = repos
|
|
.partitions()
|
|
.create_or_get(&"one".into(), sequencer.id, table.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// parquet files
|
|
let parquet_file_params = ParquetFileParams {
|
|
namespace_id: namespace.id,
|
|
sequencer_id: sequencer.id,
|
|
table_id: partition.table_id,
|
|
partition_id: partition.id,
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(1),
|
|
max_sequence_number: SequenceNumber::new(1),
|
|
min_time: Timestamp::new(100),
|
|
max_time: Timestamp::new(250),
|
|
file_size_bytes: 1337,
|
|
parquet_metadata: b"md1".to_vec(),
|
|
row_count: 0,
|
|
compaction_level: INITIAL_COMPACTION_LEVEL,
|
|
created_at: Timestamp::new(1),
|
|
};
|
|
let p1 = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params.clone())
|
|
.await
|
|
.unwrap();
|
|
let parquet_file_params_2 = ParquetFileParams {
|
|
object_store_id: Uuid::new_v4(),
|
|
min_sequence_number: SequenceNumber::new(2),
|
|
max_sequence_number: SequenceNumber::new(3),
|
|
min_time: Timestamp::new(200),
|
|
max_time: Timestamp::new(300),
|
|
..parquet_file_params
|
|
};
|
|
let p2 = repos
|
|
.parquet_files()
|
|
.create(parquet_file_params_2.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
// tombstones
|
|
let t1 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(10),
|
|
Timestamp::new(1),
|
|
Timestamp::new(10),
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let t2 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(11),
|
|
Timestamp::new(100),
|
|
Timestamp::new(110),
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let t3 = repos
|
|
.tombstones()
|
|
.create_or_get(
|
|
table.id,
|
|
sequencer.id,
|
|
SequenceNumber::new(12),
|
|
Timestamp::new(200),
|
|
Timestamp::new(210),
|
|
"whatevs",
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// processed tombstones
|
|
// p1, t2
|
|
let _pt1 = repos
|
|
.processed_tombstones()
|
|
.create(p1.id, t2.id)
|
|
.await
|
|
.unwrap();
|
|
// p1, t3
|
|
let _pt2 = repos
|
|
.processed_tombstones()
|
|
.create(p1.id, t3.id)
|
|
.await
|
|
.unwrap();
|
|
// p2, t3
|
|
let _pt3 = repos
|
|
.processed_tombstones()
|
|
.create(p2.id, t3.id)
|
|
.await
|
|
.unwrap();
|
|
|
|
// test exist
|
|
let exist = repos
|
|
.processed_tombstones()
|
|
.exist(p1.id, t1.id)
|
|
.await
|
|
.unwrap();
|
|
assert!(!exist);
|
|
let exist = repos
|
|
.processed_tombstones()
|
|
.exist(p1.id, t2.id)
|
|
.await
|
|
.unwrap();
|
|
assert!(exist);
|
|
|
|
// test count
|
|
let count = repos.processed_tombstones().count().await.unwrap();
|
|
assert_eq!(count, 3);
|
|
|
|
// test count_by_tombstone_id
|
|
let count = repos
|
|
.processed_tombstones()
|
|
.count_by_tombstone_id(t1.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 0);
|
|
let count = repos
|
|
.processed_tombstones()
|
|
.count_by_tombstone_id(t2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 1);
|
|
let count = repos
|
|
.processed_tombstones()
|
|
.count_by_tombstone_id(t3.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 2);
|
|
|
|
// test remove
|
|
repos.tombstones().remove(&[t3.id]).await.unwrap();
|
|
// should still be 1 because t2 was not deleted
|
|
let count = repos
|
|
.processed_tombstones()
|
|
.count_by_tombstone_id(t2.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 1);
|
|
// should be 0 because t3 was deleted
|
|
let count = repos
|
|
.processed_tombstones()
|
|
.count_by_tombstone_id(t3.id)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count, 0);
|
|
}
|
|
|
|
async fn test_txn_isolation(catalog: Arc<dyn Catalog>) {
|
|
let barrier = Arc::new(tokio::sync::Barrier::new(2));
|
|
|
|
let barrier_captured = Arc::clone(&barrier);
|
|
let catalog_captured = Arc::clone(&catalog);
|
|
let insertion_task = tokio::spawn(async move {
|
|
barrier_captured.wait().await;
|
|
|
|
let mut txn = catalog_captured.start_transaction().await.unwrap();
|
|
txn.kafka_topics()
|
|
.create_or_get("test_txn_isolation")
|
|
.await
|
|
.unwrap();
|
|
|
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
|
txn.abort().await.unwrap();
|
|
});
|
|
|
|
let mut txn = catalog.start_transaction().await.unwrap();
|
|
|
|
barrier.wait().await;
|
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
|
|
|
let topic = txn
|
|
.kafka_topics()
|
|
.get_by_name("test_txn_isolation")
|
|
.await
|
|
.unwrap();
|
|
assert!(topic.is_none());
|
|
txn.abort().await.unwrap();
|
|
|
|
insertion_task.await.unwrap();
|
|
|
|
let mut txn = catalog.start_transaction().await.unwrap();
|
|
let topic = txn
|
|
.kafka_topics()
|
|
.get_by_name("test_txn_isolation")
|
|
.await
|
|
.unwrap();
|
|
assert!(topic.is_none());
|
|
txn.abort().await.unwrap();
|
|
}
|
|
|
|
async fn test_txn_drop(catalog: Arc<dyn Catalog>) {
|
|
let capture = TracingCapture::new();
|
|
let mut txn = catalog.start_transaction().await.unwrap();
|
|
txn.kafka_topics()
|
|
.create_or_get("test_txn_drop")
|
|
.await
|
|
.unwrap();
|
|
drop(txn);
|
|
|
|
// got a warning
|
|
assert_contains!(capture.to_string(), "Dropping ");
|
|
assert_contains!(capture.to_string(), " w/o finalizing (commit or abort)");
|
|
|
|
// data is NOT committed
|
|
let mut txn = catalog.start_transaction().await.unwrap();
|
|
let topic = txn
|
|
.kafka_topics()
|
|
.get_by_name("test_txn_drop")
|
|
.await
|
|
.unwrap();
|
|
assert!(topic.is_none());
|
|
txn.abort().await.unwrap();
|
|
}
|
|
|
|
/// Upsert a namespace called `namespace_name` and write `lines` to it.
|
|
async fn populate_namespace<R>(
|
|
repos: &mut R,
|
|
namespace_name: &str,
|
|
lines: &str,
|
|
) -> (Namespace, NamespaceSchema)
|
|
where
|
|
R: RepoCollection + ?Sized,
|
|
{
|
|
let kafka = repos.kafka_topics().create_or_get("foo").await.unwrap();
|
|
let pool = repos.query_pools().create_or_get("foo").await.unwrap();
|
|
let namespace = repos
|
|
.namespaces()
|
|
.create(namespace_name, "inf", kafka.id, pool.id)
|
|
.await;
|
|
|
|
let namespace = match namespace {
|
|
Ok(v) => v,
|
|
Err(Error::NameExists { .. }) => repos
|
|
.namespaces()
|
|
.get_by_name(namespace_name)
|
|
.await
|
|
.unwrap()
|
|
.unwrap(),
|
|
e @ Err(_) => e.unwrap(),
|
|
};
|
|
|
|
let batches = mutable_batch_lp::lines_to_batches(lines, 42).unwrap();
|
|
let batches = batches.iter().map(|(table, batch)| (table.as_str(), batch));
|
|
let ns = NamespaceSchema::new(namespace.id, kafka.id, pool.id);
|
|
|
|
let schema = validate_or_insert_schema(batches, &ns, repos)
|
|
.await
|
|
.expect("validate schema failed")
|
|
.unwrap_or(ns);
|
|
|
|
(namespace, schema)
|
|
}
|
|
|
|
async fn test_list_schemas(catalog: Arc<dyn Catalog>) {
|
|
let mut repos = catalog.repositories().await;
|
|
|
|
let ns1 = populate_namespace(
|
|
repos.deref_mut(),
|
|
"ns1",
|
|
"cpu,tag=1 field=1i\nanother,tag=1 field=1.0",
|
|
)
|
|
.await;
|
|
let ns2 = populate_namespace(
|
|
repos.deref_mut(),
|
|
"ns2",
|
|
"cpu,tag=1 field=1i\nsomethingelse field=1u",
|
|
)
|
|
.await;
|
|
|
|
// Otherwise the in-mem catalog deadlocks.... (but not postgres)
|
|
drop(repos);
|
|
|
|
let got = list_schemas(&*catalog)
|
|
.await
|
|
.expect("should be able to list the schemas")
|
|
.collect::<Vec<_>>();
|
|
|
|
assert!(got.contains(&ns1), "{:#?}\n\nwant{:#?}", got, &ns1);
|
|
assert!(got.contains(&ns2), "{:#?}\n\nwant{:#?}", got, &ns2);
|
|
}
|
|
|
|
fn assert_metric_hit(metrics: &metric::Registry, name: &'static str) {
|
|
let histogram = metrics
|
|
.get_instrument::<Metric<DurationHistogram>>("catalog_op_duration")
|
|
.expect("failed to read metric")
|
|
.get_observer(&Attributes::from(&[("op", name), ("result", "success")]))
|
|
.expect("failed to get observer")
|
|
.fetch();
|
|
|
|
let hit_count = histogram.sample_count();
|
|
assert!(hit_count > 1, "metric did not record any calls");
|
|
}
|
|
}
|