diff --git a/Cargo.lock b/Cargo.lock index 41e75ea901..62ffc53913 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2224,7 +2224,6 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "server", "snafu", "tempfile", "test_helpers", @@ -5213,46 +5212,6 @@ dependencies = [ "yaml-rust", ] -[[package]] -name = "server" -version = "0.1.0" -dependencies = [ - "arrow_util", - "async-trait", - "bytes", - "chrono", - "crc32fast", - "data_types", - "dml", - "futures", - "futures-util", - "generated_types", - "hashbrown 0.12.0", - "influxdb_line_protocol", - "iox_object_store", - "iox_time", - "job_registry", - "metric", - "mutable_batch_lp", - "num_cpus", - "object_store", - "observability_deps", - "parking_lot 0.12.0", - "query", - "rand", - "regex", - "service_common", - "snafu", - "test_helpers", - "tokio", - "tokio-util 0.7.1", - "trace", - "tracker", - "uuid 0.8.2", - "workspace-hack", - "write_buffer", -] - [[package]] name = "service_common" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 07876b5f63..0796f6596d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,6 @@ members = [ "read_buffer", "router2", "schema", - "server", "service_common", "service_grpc_influxrpc", "service_grpc_flight", diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml index 5a85599a99..6a76c856aa 100644 --- a/influxdb_iox/Cargo.toml +++ b/influxdb_iox/Cargo.toml @@ -44,7 +44,6 @@ query = { path = "../query" } read_buffer = { path = "../read_buffer" } router2 = { path = "../router2" } schema = { path = "../schema" } -server = { path = "../server" } iox_time = { path = "../iox_time" } trace = { path = "../trace" } trace_exporters = { path = "../trace_exporters" } diff --git a/server/Cargo.toml b/server/Cargo.toml deleted file mode 100644 index 56227f2cfa..0000000000 --- a/server/Cargo.toml +++ /dev/null @@ -1,43 +0,0 @@ -[package] -name = "server" -version = "0.1.0" -authors = ["pauldix "] -edition = "2021" - -[dependencies] # In alphabetical order -arrow_util = { path = "../arrow_util" } -async-trait = "0.1" -bytes = "1.0" -chrono = { version = "0.4", default-features = false } -crc32fast = "1.3.2" -data_types = { path = "../data_types" } -futures = "0.3" -generated_types = { path = "../generated_types" } -hashbrown = "0.12" -influxdb_line_protocol = { path = "../influxdb_line_protocol" } -iox_object_store = { path = "../iox_object_store" } -job_registry = { path = "../job_registry" } -metric = { path = "../metric" } -mutable_batch_lp = { path = "../mutable_batch_lp" } -num_cpus = "1.13.0" -object_store = { path = "../object_store" } -observability_deps = { path = "../observability_deps" } -parking_lot = "0.12" -query = { path = "../query" } -rand = "0.8.3" -service_common = { path = "../service_common" } -snafu = "0.7" -iox_time = { path = "../iox_time" } -trace = { path = "../trace" } -tokio = { version = "1.18", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] } -tokio-util = { version = "0.7.1" } -tracker = { path = "../tracker" } -uuid = { version = "0.8", features = ["v4"] } -write_buffer = { path = "../write_buffer" } -workspace-hack = { path = "../workspace-hack"} - -[dev-dependencies] # In alphabetical order -dml = { path = "../dml" } -futures-util = { version = "0.3" } -regex = "1" -test_helpers = { path = "../test_helpers" } diff --git a/server/src/application.rs b/server/src/application.rs deleted file mode 100644 index 9eb5ff0493..0000000000 --- a/server/src/application.rs +++ /dev/null @@ -1,104 +0,0 @@ -use crate::config::{object_store::ConfigProviderObjectStorage, ConfigProvider}; -use iox_time::TimeProvider; -use job_registry::JobRegistry; -use object_store::DynObjectStore; -use observability_deps::tracing::info; -use query::exec::Executor; -use std::sync::Arc; -use trace::TraceCollector; -use write_buffer::config::WriteBufferConfigFactory; - -/// A container for application-global resources -/// shared between server and all DatabaseInstances -#[derive(Debug, Clone)] -pub struct ApplicationState { - object_store: Arc, - write_buffer_factory: Arc, - executor: Arc, - job_registry: Arc, - metric_registry: Arc, - time_provider: Arc, - trace_collector: Option>, - config_provider: Arc, -} - -impl ApplicationState { - /// Creates a new `ApplicationState` - /// - /// Uses number of CPUs in the system if num_worker_threads is not set - pub fn new( - object_store: Arc, - num_worker_threads: Option, - trace_collector: Option>, - config_provider: Option>, - ) -> Self { - let num_threads = num_worker_threads.unwrap_or_else(num_cpus::get); - info!(%num_threads, "using specified number of threads per thread pool"); - - let metric_registry = Arc::new(metric::Registry::new()); - let time_provider: Arc = Arc::new(iox_time::SystemProvider::new()); - let job_registry = Arc::new(JobRegistry::new( - Arc::clone(&metric_registry), - Arc::clone(&time_provider), - )); - - let write_buffer_factory = Arc::new(WriteBufferConfigFactory::new( - Arc::clone(&time_provider), - Arc::clone(&metric_registry), - )); - - let config_provider = config_provider.unwrap_or_else(|| { - Arc::new(ConfigProviderObjectStorage::new( - Arc::clone(&object_store), - Arc::clone(&time_provider), - )) - }); - - Self { - object_store, - write_buffer_factory, - executor: Arc::new(Executor::new(num_threads)), - job_registry, - metric_registry, - time_provider, - trace_collector, - config_provider, - } - } - - pub fn object_store(&self) -> Arc { - Arc::clone(&self.object_store) - } - - pub fn write_buffer_factory(&self) -> &Arc { - &self.write_buffer_factory - } - - pub fn job_registry(&self) -> &Arc { - &self.job_registry - } - - pub fn metric_registry(&self) -> &Arc { - &self.metric_registry - } - - pub fn time_provider(&self) -> &Arc { - &self.time_provider - } - - pub fn trace_collector(&self) -> &Option> { - &self.trace_collector - } - - pub fn config_provider(&self) -> &Arc { - &self.config_provider - } - - pub fn executor(&self) -> &Arc { - &self.executor - } - - pub async fn join(&self) { - self.executor.join().await; - } -} diff --git a/server/src/config.rs b/server/src/config.rs deleted file mode 100644 index 7835e61c61..0000000000 --- a/server/src/config.rs +++ /dev/null @@ -1,47 +0,0 @@ -use crate::ProvidedDatabaseRules; -use async_trait::async_trait; -use data_types::server_id::ServerId; -use generated_types::influxdata::iox::management::v1::OwnerInfo; -use uuid::Uuid; - -pub mod object_store; -mod owner; - -/// A generic opaque error returned by [`ConfigProvider`] -pub type Error = Box; - -/// Result type returned by [`ConfigProvider`] -pub type Result = std::result::Result; - -/// A generic trait for interacting with the configuration -/// of a database server -#[async_trait] -pub trait ConfigProvider: std::fmt::Debug + Send + Sync { - /// Returns a list of database name and UUID pairs - async fn fetch_server_config(&self, server_id: ServerId) -> Result>; - - /// Persists a list of database names and UUID pairs overwriting any - /// pre-existing persisted server configuration - async fn store_server_config( - &self, - server_id: ServerId, - config: &[(String, Uuid)], - ) -> Result<()>; - - /// Returns the configuration for the database with the given `uuid` - async fn fetch_rules(&self, uuid: Uuid) -> Result; - - /// Persists the configuration for the database with the given `uuid` - async fn store_rules(&self, uuid: Uuid, rules: &ProvidedDatabaseRules) -> Result<()>; - - /// Returns the owner information for the database with the given `uuid` - async fn fetch_owner_info(&self, server_id: ServerId, uuid: Uuid) -> Result; - - /// Updates the owner information for the database with the given `uuid` - /// and records it as owned by `server_id` - async fn update_owner_info(&self, server_id: Option, uuid: Uuid) -> Result<()>; - - /// Updates the owner information for the database with the given `uuid` - /// and records it as owned by `server_id` - async fn create_owner_info(&self, server_id: ServerId, uuid: Uuid) -> Result<()>; -} diff --git a/server/src/config/object_store.rs b/server/src/config/object_store.rs deleted file mode 100644 index f0049342f9..0000000000 --- a/server/src/config/object_store.rs +++ /dev/null @@ -1,256 +0,0 @@ -use super::Result as ConfigResult; -use crate::{ - config::{ - owner::{ - create_owner_info, fetch_owner_info, update_owner_info, OwnerInfoCreateError, - OwnerInfoFetchError, OwnerInfoUpdateError, - }, - ConfigProvider, - }, - PersistedDatabaseRules, ProvidedDatabaseRules, -}; -use async_trait::async_trait; -use data_types::server_id::ServerId; -use generated_types::database_rules::encode_persisted_database_rules; -use generated_types::google::FieldViolation; -use generated_types::influxdata::iox::management; -use generated_types::influxdata::iox::management::v1::OwnerInfo; -use iox_object_store::IoxObjectStore; -use iox_time::TimeProvider; -use object_store::DynObjectStore; -use snafu::{ensure, ResultExt, Snafu}; -use std::sync::Arc; -use uuid::Uuid; - -/// Error enumeration for [`ConfigProviderObjectStorage`] -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("error saving server config to object storage: {}", source))] - StoreServer { source: object_store::Error }, - - #[snafu(display("error getting server config from object storage: {}", source))] - FetchServer { source: object_store::Error }, - - #[snafu(display("error deserializing server config: {}", source))] - DeserializeServer { - source: generated_types::DecodeError, - }, - - #[snafu(display("error serializing server config: {}", source))] - SerializeServer { - source: generated_types::EncodeError, - }, - - #[snafu(display( - "UUID mismatch reading server config from object storage, expected {}, got {}", - expected, - actual - ))] - UuidMismatch { expected: Uuid, actual: Uuid }, - - #[snafu(display( - "invalid database uuid in server config while finding location: {}", - source - ))] - InvalidDatabaseLocation { source: uuid::Error }, - - #[snafu(display("Error saving rules for {}: {}", db_name, source))] - StoreRules { - db_name: String, - source: object_store::Error, - }, - - #[snafu(display("error getting database rules from object storage: {}", source))] - RulesFetch { source: object_store::Error }, - - #[snafu(display("error deserializing database rules: {}", source))] - DeserializeRules { - source: generated_types::DecodeError, - }, - - #[snafu(display("error serializing database rules: {}", source))] - SerializeRules { - source: generated_types::EncodeError, - }, - - #[snafu(display("error converting to database rules: {}", source))] - ConvertingRules { source: FieldViolation }, - - #[snafu(display("error creating database owner info: {}", source))] - CreatingOwnerInfo { source: OwnerInfoCreateError }, - - #[snafu(display("error getting database owner info: {}", source))] - FetchingOwnerInfo { source: OwnerInfoFetchError }, - - #[snafu(display("error updating database owner info: {}", source))] - UpdatingOwnerInfo { source: OwnerInfoUpdateError }, -} - -type Result = std::result::Result; - -/// Parse the UUID from an object storage path -/// -/// TODO: Encode this data directly in server config -fn parse_location(location: &str) -> Result { - // Strip trailing / if any - let location = location.strip_suffix('/').unwrap_or(location); - let uuid = location.rsplit('/').next().unwrap(); - - std::str::FromStr::from_str(uuid).context(InvalidDatabaseLocationSnafu) -} - -#[derive(Debug)] -pub struct ConfigProviderObjectStorage { - object_store: Arc, - time_provider: Arc, -} - -impl ConfigProviderObjectStorage { - pub fn new(object_store: Arc, time_provider: Arc) -> Self { - Self { - object_store, - time_provider, - } - } - - fn iox_object_store(&self, uuid: Uuid) -> IoxObjectStore { - let root_path = IoxObjectStore::root_path_for(&*self.object_store, uuid); - IoxObjectStore::existing(Arc::clone(&self.object_store), root_path) - } -} - -#[async_trait] -impl ConfigProvider for ConfigProviderObjectStorage { - async fn fetch_server_config(&self, server_id: ServerId) -> ConfigResult> { - let fetch_result = - IoxObjectStore::get_server_config_file(&*self.object_store, server_id).await; - - let server_config_bytes = match fetch_result { - Ok(bytes) => bytes, - // If this is the first time starting up this server and there is no config file yet, - // this isn't a problem. Start an empty server config. - Err(object_store::Error::NotFound { .. }) => bytes::Bytes::new(), - Err(source) => return Err(Error::FetchServer { source }.into()), - }; - - let server_config = - generated_types::server_config::decode_persisted_server_config(server_config_bytes) - .context(DeserializeServerSnafu)?; - - let config = server_config - .databases - .into_iter() - .map(|(name, location)| Ok((name, parse_location(&location)?))) - .collect::>>()?; - - self.store_server_config(server_id, &config).await?; - Ok(config) - } - - async fn store_server_config( - &self, - server_id: ServerId, - config: &[(String, Uuid)], - ) -> ConfigResult<()> { - let databases = config - .iter() - .map(|(name, database)| { - ( - name.to_string(), - IoxObjectStore::root_path_for(&*self.object_store, *database).to_string(), - ) - }) - .collect(); - - let data = management::v1::ServerConfig { databases }; - - let mut encoded = bytes::BytesMut::new(); - generated_types::server_config::encode_persisted_server_config(&data, &mut encoded) - .context(SerializeServerSnafu)?; - - let bytes = encoded.freeze(); - - IoxObjectStore::put_server_config_file(&*self.object_store, server_id, bytes) - .await - .context(StoreServerSnafu)?; - - Ok(()) - } - - async fn fetch_rules(&self, uuid: Uuid) -> ConfigResult { - let bytes = IoxObjectStore::load_database_rules(Arc::clone(&self.object_store), uuid) - .await - .context(RulesFetchSnafu)?; - - let proto: management::v1::PersistedDatabaseRules = - generated_types::database_rules::decode_persisted_database_rules(bytes) - .context(DeserializeRulesSnafu)?; - - let rules: PersistedDatabaseRules = proto.try_into().context(ConvertingRulesSnafu)?; - - ensure!( - uuid == rules.uuid(), - UuidMismatchSnafu { - expected: uuid, - actual: rules.uuid() - } - ); - - Ok(rules.into_inner().1) - } - - async fn store_rules(&self, uuid: Uuid, rules: &ProvidedDatabaseRules) -> ConfigResult<()> { - let persisted_database_rules = management::v1::PersistedDatabaseRules { - uuid: uuid.as_bytes().to_vec(), - // Note we save the original version - rules: Some(rules.original().clone()), - }; - - let mut data = bytes::BytesMut::new(); - encode_persisted_database_rules(&persisted_database_rules, &mut data) - .context(SerializeRulesSnafu)?; - - self.iox_object_store(uuid) - .put_database_rules_file(data.freeze()) - .await - .context(StoreRulesSnafu { - db_name: rules.db_name(), - })?; - - Ok(()) - } - - async fn fetch_owner_info(&self, _server_id: ServerId, uuid: Uuid) -> ConfigResult { - let config = fetch_owner_info(&self.iox_object_store(uuid)) - .await - .context(FetchingOwnerInfoSnafu)?; - - Ok(config) - } - - async fn update_owner_info(&self, server_id: Option, uuid: Uuid) -> ConfigResult<()> { - let path = server_id.map(|server_id| { - IoxObjectStore::server_config_path(&*self.object_store, server_id).to_string() - }); - - update_owner_info( - server_id, - path, - self.time_provider.now(), - &self.iox_object_store(uuid), - ) - .await - .context(UpdatingOwnerInfoSnafu)?; - - Ok(()) - } - - async fn create_owner_info(&self, server_id: ServerId, uuid: Uuid) -> ConfigResult<()> { - let path = IoxObjectStore::server_config_path(&*self.object_store, server_id).to_string(); - create_owner_info(server_id, path, &self.iox_object_store(uuid)) - .await - .context(CreatingOwnerInfoSnafu)?; - - Ok(()) - } -} diff --git a/server/src/config/owner.rs b/server/src/config/owner.rs deleted file mode 100644 index 0fa25fa8e1..0000000000 --- a/server/src/config/owner.rs +++ /dev/null @@ -1,128 +0,0 @@ -//! Code related to managing ownership information in owner.pb - -use data_types::server_id::ServerId; -use generated_types::influxdata::iox::management; -use iox_object_store::IoxObjectStore; -use iox_time::Time; -use snafu::{ensure, ResultExt, Snafu}; - -#[derive(Debug, Snafu)] -pub enum OwnerInfoFetchError { - #[snafu(display("error loading owner info: {}", source))] - Loading { source: object_store::Error }, - - #[snafu(display("error decoding owner info: {}", source))] - Decoding { - source: generated_types::DecodeError, - }, -} - -pub(crate) async fn fetch_owner_info( - iox_object_store: &IoxObjectStore, -) -> Result { - let raw_owner_info = iox_object_store - .get_owner_file() - .await - .context(LoadingSnafu)?; - - generated_types::server_config::decode_database_owner_info(raw_owner_info) - .context(DecodingSnafu) -} - -#[derive(Debug, Snafu)] -pub enum OwnerInfoCreateError { - #[snafu(display("could not create new owner info file; it already exists"))] - OwnerFileAlreadyExists, - - #[snafu(display("error creating database owner info file: {}", source))] - CreatingOwnerFile { source: Box }, -} - -/// Create a new owner info file for this database. Existing content at this location in object -/// storage is an error. -pub(crate) async fn create_owner_info( - server_id: ServerId, - server_location: String, - iox_object_store: &IoxObjectStore, -) -> Result<(), OwnerInfoCreateError> { - ensure!( - matches!( - iox_object_store.get_owner_file().await, - Err(object_store::Error::NotFound { .. }) - ), - OwnerFileAlreadyExistsSnafu, - ); - - let owner_info = management::v1::OwnerInfo { - id: server_id.get_u32(), - location: server_location, - transactions: vec![], - }; - let mut encoded = bytes::BytesMut::new(); - generated_types::server_config::encode_database_owner_info(&owner_info, &mut encoded) - .expect("owner info serialization should be valid"); - let encoded = encoded.freeze(); - - iox_object_store - .put_owner_file(encoded) - .await - .map_err(Box::new) - .context(CreatingOwnerFileSnafu)?; - - Ok(()) -} - -#[derive(Debug, Snafu)] -pub enum OwnerInfoUpdateError { - #[snafu(display("could not fetch existing owner info: {}", source))] - CouldNotFetch { source: OwnerInfoFetchError }, - - #[snafu(display("error updating database owner info file: {}", source))] - UpdatingOwnerFile { source: object_store::Error }, -} - -/// Fetch existing owner info, set the `id` and `location`, insert a new entry into the transaction -/// history, and overwrite the contents of the owner file. Errors if the owner info file does NOT -/// currently exist. -pub(crate) async fn update_owner_info( - new_server_id: Option, - new_server_location: Option, - timestamp: Time, - iox_object_store: &IoxObjectStore, -) -> Result<(), OwnerInfoUpdateError> { - let management::v1::OwnerInfo { - id, - location, - mut transactions, - } = fetch_owner_info(iox_object_store) - .await - .context(CouldNotFetchSnafu)?; - - let new_transaction = management::v1::OwnershipTransaction { - id, - location, - timestamp: Some(timestamp.date_time().into()), - }; - transactions.push(new_transaction); - - // TODO: only save latest 100 transactions - - let new_owner_info = management::v1::OwnerInfo { - // 0 is not a valid server ID, so it indicates "unowned". - id: new_server_id.map(|s| s.get_u32()).unwrap_or_default(), - // Owner location is empty when the database is unowned. - location: new_server_location.unwrap_or_default(), - transactions, - }; - - let mut encoded = bytes::BytesMut::new(); - generated_types::server_config::encode_database_owner_info(&new_owner_info, &mut encoded) - .expect("owner info serialization should be valid"); - let encoded = encoded.freeze(); - - iox_object_store - .put_owner_file(encoded) - .await - .context(UpdatingOwnerFileSnafu)?; - Ok(()) -} diff --git a/server/src/database.rs b/server/src/database.rs deleted file mode 100644 index b34b7cf5a2..0000000000 --- a/server/src/database.rs +++ /dev/null @@ -1,1247 +0,0 @@ -use std::{future::Future, sync::Arc}; - -use futures::{future::FusedFuture, FutureExt}; -use parking_lot::{Mutex, RwLock, RwLockReadGuard}; -use snafu::{ResultExt, Snafu}; -use tokio::task::JoinError; -use tokio_util::sync::CancellationToken; -use uuid::Uuid; - -use data_types::error::ErrorLogger; -use data_types::{job::Job, DatabaseName}; -use db::Db; -use generated_types::{ - database_state::DatabaseState as DatabaseStateCode, influxdata::iox::management, -}; -use internal_types::freezable::Freezable; -use iox_object_store::IoxObjectStore; -use observability_deps::tracing::{error, info, warn}; -use parquet_catalog::core::{PreservedCatalog, PreservedCatalogConfig}; -use tracker::{TaskTracker, TrackedFutureExt}; - -use crate::{ - database::{ - init::{initialize_database, DatabaseState, InitError}, - state::{DatabaseConfig, DatabaseShared}, - }, - rules::ProvidedDatabaseRules, - ApplicationState, -}; - -pub(crate) mod init; -pub(crate) mod state; - -/// Matches an error [`DatabaseState`] and clones the contained state -macro_rules! error_state { - ($s:expr, $transition: literal, $code: pat) => { - match $s.state_code() { - $code => {} - state => { - return InvalidStateSnafu { - db_name: &$s.shared.config.read().name, - state, - transition: $transition, - } - .fail() - } - } - }; -} - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display( - "database ({}) in invalid state ({:?}) for transition ({})", - db_name, - state, - transition - ))] - InvalidState { - db_name: String, - state: DatabaseStateCode, - transition: String, - }, - - #[snafu(display( - "failed to wipe preserved catalog of database ({}): {}", - db_name, - source - ))] - WipePreservedCatalog { - db_name: String, - source: Box, - }, - - #[snafu(display( - "database ({}) in invalid state for catalog rebuild ({:?}). Expected {}", - db_name, - state, - expected - ))] - InvalidStateForRebuild { - db_name: String, - expected: String, - state: DatabaseStateCode, - }, - - #[snafu(display( - "Internal error during rebuild. Database ({}) transitioned to unexpected state ({:?})", - db_name, - state, - ))] - UnexpectedTransitionForRebuild { - db_name: String, - state: DatabaseStateCode, - }, - - #[snafu(display( - "failed to rebuild preserved catalog of database ({}): {}", - db_name, - source - ))] - RebuildPreservedCatalog { - db_name: String, - source: Box, - }, - - #[snafu(display("failed to skip replay for database ({}): {}", db_name, source))] - SkipReplay { - db_name: String, - source: Box, - }, - - #[snafu(display("cannot update database rules for {} in state {}", db_name, state))] - RulesNotUpdateable { - db_name: String, - state: DatabaseStateCode, - }, - - #[snafu(display("cannot persisted updated rules: {}", source))] - CannotPersistUpdatedRules { source: crate::config::Error }, - - #[snafu(display( - "cannot release database named {} that has already been released", - db_name - ))] - CannotReleaseUnowned { db_name: String }, - - #[snafu(display("cannot release database {}: {}", db_name, source))] - CannotRelease { - db_name: String, - source: crate::config::Error, - }, -} - -/// A `Database` represents a single configured IOx database - i.e. an -/// entity with a corresponding set of `DatabaseRules`. -/// -/// `Database` composes together the various subsystems responsible for implementing -/// `DatabaseRules` and handles their startup and shutdown. This includes instance-local -/// data storage (i.e. `Db`), the write buffer, request routing, data lifecycle, etc... -#[derive(Debug)] -pub struct Database { - /// The state shared with the background worker - shared: Arc, - - /// The cancellation token for the current background worker - shutdown: Mutex, -} - -impl Database { - #[allow(rustdoc::private_intra_doc_links)] - /// Create in-mem database object. - /// - /// This is backed by an existing database, which was - /// [created](crate::database::init::create_empty_db_in_object_store) some time in the - /// past. - pub fn new(application: Arc, config: DatabaseConfig) -> Self { - info!( - db_name=%config.name, - "new database" - ); - - let path = - IoxObjectStore::root_path_for(&*application.object_store(), config.database_uuid); - // The database state machine handles the case of this path not existing, as it will end - // up in [`DatabaseState::RulesLoadError`] or [`DatabaseState::OwnerInfoLoadError`] - let iox_object_store = Arc::new(IoxObjectStore::existing( - Arc::clone(&application.object_store()), - path, - )); - - let shared = Arc::new(DatabaseShared { - config: RwLock::new(config), - application, - iox_object_store, - state: RwLock::new(Freezable::new(DatabaseState::Shutdown(None))), - state_notify: Default::default(), - }); - - let shutdown = new_database_worker(Arc::clone(&shared)); - - Self { - shared, - shutdown: Mutex::new(shutdown), - } - } - - /// Shutdown and release this database - pub async fn release(&self) -> Result { - let db_name = self.name(); - let db_name = db_name.as_str(); - - self.shutdown(); - let _ = self.join().await.log_if_error("releasing database"); - - let uuid = self.uuid(); - - self.shared - .application - .config_provider() - .update_owner_info(None, uuid) - .await - .context(CannotReleaseSnafu { db_name })?; - - Ok(uuid) - } - - /// Triggers shutdown of this `Database` if it is running - pub fn shutdown(&self) { - let db_name = self.name(); - info!(%db_name, "database shutting down"); - self.shutdown.lock().cancel() - } - - /// Trigger a restart of this `Database` and wait for it to re-initialize - pub async fn restart(&self) -> Result<(), Arc> { - self.restart_with_options(false).await - } - - /// Trigger a restart of this `Database` and wait for it to re-initialize - pub async fn restart_with_options(&self, skip_replay: bool) -> Result<(), Arc> { - let db_name = self.name(); - info!(%db_name, "restarting database"); - - // Ensure database is shut down - self.shutdown(); - let _ = self.join().await.log_if_error("restarting database"); - - self.shared.config.write().skip_replay = skip_replay; - - { - let mut shutdown = self.shutdown.lock(); - *shutdown = new_database_worker(Arc::clone(&self.shared)); - } - - self.wait_for_init().await - } - - /// Waits for the background worker of this `Database` to exit - /// - /// TODO: Rename to wait_for_shutdown - pub fn join(&self) -> impl Future>> { - let shared = Arc::clone(&self.shared); - async move { - loop { - // Register interest before checking to avoid race - let notify = shared.state_notify.notified(); - - match &**shared.state.read() { - DatabaseState::Shutdown(Some(e)) => return Err(Arc::clone(e)), - DatabaseState::Shutdown(None) => return Ok(()), - state => info!(%state, "waiting for database shutdown"), - } - - notify.await; - } - } - } - - /// Returns the config of this database - pub fn config(&self) -> DatabaseConfig { - self.shared.config.read().clone() - } - - /// Returns the initialization status of this database - pub fn state_code(&self) -> DatabaseStateCode { - self.shared.state.read().state_code() - } - - /// Returns the initialization error of this database if any - pub fn init_error(&self) -> Option> { - self.shared.state.read().error().cloned() - } - - /// Gets the current database state - pub fn is_initialized(&self) -> bool { - self.shared.state.read().get_initialized().is_some() - } - - /// Returns true if this database is shutdown - pub fn is_shutdown(&self) -> bool { - self.shared.state.read().is_shutdown() - } - - /// Returns the database rules if they're loaded - pub fn provided_rules(&self) -> Option> { - self.shared.state.read().provided_rules() - } - - /// Returns the database UUID - pub fn uuid(&self) -> Uuid { - self.shared.config.read().database_uuid - } - - /// Returns the info about the owning server if it has been loaded - pub fn owner_info(&self) -> Option { - self.shared.state.read().owner_info() - } - - /// Database name - pub fn name(&self) -> DatabaseName<'static> { - self.shared.config.read().name.clone() - } - - /// Update the database rules, panic'ing if the state is invalid - pub async fn update_provided_rules( - &self, - provided_rules: ProvidedDatabaseRules, - ) -> Result, Error> { - // get a handle to signal our intention to update the state - let handle = self.shared.state.read().freeze(); - - // wait for the freeze handle. Only one thread can ever have - // it at any time so we know past this point no other thread - // can change the DatabaseState (even though this code - // doesn't hold a lock for the entire time) - let handle = handle.await; - - error_state!(self, "UpdateProvidedRules", DatabaseStateCode::Initialized); - - // Attempt to persist to object store, if that fails, roll - // back the whole transaction (leave the rules unchanged). - // - // Even though we don't hold a lock here, the freeze handle - // ensures the state can not be modified. - self.shared - .application - .config_provider() - .store_rules(self.uuid(), &provided_rules) - .await - .context(CannotPersistUpdatedRulesSnafu)?; - - let mut state = self.shared.state.write(); - - // Exchange FreezeHandle for mutable access to DatabaseState - // via WriteGuard - let mut state = state.unfreeze(handle); - - if let DatabaseState::Initialized(initialized) = &mut *state { - initialized - .db() - .update_rules(Arc::clone(provided_rules.rules())); - - let rules = Arc::new(provided_rules); - initialized.set_provided_rules(Arc::clone(&rules)); - Ok(rules) - } else { - // The freeze handle should have prevented any changes to - // the database state between when it was checked above - // and now - unreachable!() - } - } - - /// Returns the IoxObjectStore - pub fn iox_object_store(&self) -> Arc { - Arc::clone(&self.shared.iox_object_store) - } - - /// Gets access to an initialized `Db` - pub fn initialized_db(&self) -> Option> { - let initialized = - RwLockReadGuard::try_map(self.shared.state.read(), |state| state.get_initialized()) - .ok()?; - Some(Arc::clone(initialized.db())) - } - - /// Returns Ok(()) when the Database is initialized, or the error - /// if one is encountered - pub async fn wait_for_init(&self) -> Result<(), Arc> { - loop { - // Register interest before checking to avoid race - let notify = self.shared.state_notify.notified(); - - // Note: this is not guaranteed to see non-terminal states - // as the state machine may advance past them between - // the notification being fired, and this task waking up - match &**self.shared.state.read() { - DatabaseState::Known(_) - | DatabaseState::OwnerInfoLoaded(_) - | DatabaseState::RulesLoaded(_) - | DatabaseState::CatalogLoaded(_) => {} // Non-terminal state - DatabaseState::Initialized(_) => return Ok(()), - DatabaseState::OwnerInfoLoadError(_, e) - | DatabaseState::RulesLoadError(_, e) - | DatabaseState::CatalogLoadError(_, e) - | DatabaseState::WriteBufferCreationError(_, e) - | DatabaseState::ReplayError(_, e) => return Err(Arc::clone(e)), - DatabaseState::Shutdown(_) => return Err(Arc::new(InitError::Shutdown)), - } - - notify.await; - } - } - - /// Recover from a CatalogLoadError by wiping the catalog - pub async fn wipe_preserved_catalog(self: &Arc) -> Result, Error> { - let db_name = self.name(); - - error_state!( - self, - "WipePreservedCatalog", - DatabaseStateCode::CatalogLoadError - | DatabaseStateCode::WriteBufferCreationError - | DatabaseStateCode::ReplayError - | DatabaseStateCode::Shutdown - ); - - // Shutdown database - self.shutdown(); - let _ = self.join().await.log_if_error("wipe preserved catalog"); - - // Hold a freeze handle to prevent other processes from restarting the database - let handle = self.shared.state.read().freeze(); - let handle = handle.await; - - error_state!(self, "WipePreservedCatalog", DatabaseStateCode::Shutdown); - - let registry = self.shared.application.job_registry(); - let (tracker, registration) = registry.register(Job::WipePreservedCatalog { - db_name: Arc::from(db_name.as_str()), - }); - - let this = Arc::clone(self); - tokio::spawn( - async move { - // wipe the actual catalog - PreservedCatalog::wipe(&this.shared.iox_object_store) - .await - .map_err(Box::new) - .context(WipePreservedCatalogSnafu { db_name: &db_name })?; - - info!(%db_name, "wiped preserved catalog"); - - // Should be guaranteed by the freeze handle - assert_eq!(this.state_code(), DatabaseStateCode::Shutdown); - - std::mem::drop(handle); - - let _ = this.restart().await; - info!(%db_name, "restarted database following wipe"); - - Ok::<_, Error>(()) - } - .track(registration), - ); - - Ok(tracker) - } - - /// Rebuilding the catalog from parquet files. This can be used to - /// recover from a CatalogLoadError, or if new parquet files are - /// added to the data directory - pub async fn rebuild_preserved_catalog( - self: &Arc, - force: bool, - ) -> Result, Error> { - if !force { - error_state!( - self, - "RebuildPreservedCatalog", - DatabaseStateCode::CatalogLoadError | DatabaseStateCode::Shutdown - ); - } - - let shared = Arc::clone(&self.shared); - let db_name = self.name(); - - // Shutdown database - self.shutdown(); - let _ = self.join().await.log_if_error("rebuilding catalog"); - - // Obtain and hold a freeze handle to ensure nothing restarts the database - let handle = self.shared.state.read().freeze(); - let handle = handle.await; - - error_state!(self, "RebuildPreservedCatalog", DatabaseStateCode::Shutdown); - - let registry = self.shared.application.job_registry(); - let (tracker, registration) = registry.register(Job::RebuildPreservedCatalog { - db_name: Arc::from(db_name.as_str()), - }); - - let this = Arc::clone(self); - tokio::spawn( - async move { - info!(%db_name, "rebuilding catalog from parquet files"); - - // Now wipe the catalog and rebuild it from parquet files - PreservedCatalog::wipe(&this.shared.iox_object_store) - .await - .map_err(Box::new) - .context(WipePreservedCatalogSnafu { db_name: &db_name })?; - - info!(%db_name, "wiped preserved catalog"); - - let config = PreservedCatalogConfig::new( - Arc::clone(&this.shared.iox_object_store), - db_name.to_string(), - Arc::clone(shared.application.time_provider()), - ); - - parquet_catalog::rebuild::rebuild_catalog(config, false) - .await - .map_err(Box::new) - .context(RebuildPreservedCatalogSnafu { db_name: &db_name })?; - - // Double check the state hasn't changed (we hold the - // freeze handle to make sure it does not) - assert_eq!(this.state_code(), DatabaseStateCode::Shutdown); - - std::mem::drop(handle); - - info!(%db_name, "rebuilt preserved catalog"); - - let _ = this.restart().await; - info!(%db_name, "restarted following rebuild"); - - Ok::<_, Error>(()) - } - .track(registration), - ); - - Ok(tracker) - } - - /// Recover from a ReplayError by skipping replay - pub async fn skip_replay(&self) -> Result<(), Error> { - error_state!(self, "SkipReplay", DatabaseStateCode::ReplayError); - - self.shared.config.write().skip_replay = true; - - // wait for DB to leave a potential `ReplayError` state - loop { - // Register interest before checking to avoid race - let notify = self.shared.state_notify.notified(); - - match &**self.shared.state.read() { - DatabaseState::ReplayError(_, _) => {} - _ => break, - } - - notify.await; - } - - Ok(()) - } -} - -impl Drop for Database { - fn drop(&mut self) { - let db_name = self.name(); - let shutdown = self.shutdown.lock().clone(); - if !shutdown.is_cancelled() { - warn!(%db_name, "database dropped without calling shutdown()"); - shutdown.cancel(); - } - - if !self.shared.state.read().is_shutdown() { - warn!(%db_name, "database dropped without waiting for worker termination"); - } - } -} - -/// Spawn a new background worker for a database in the `shutdown` state -/// The state is reset to `Known` then the background worker attempts to drive the -/// Database through initialization -fn new_database_worker(shared: Arc) -> CancellationToken { - let shutdown = CancellationToken::new(); - - let db_name = shared.config.read().name.clone(); - - { - let mut state = shared.state.write(); - if !state.is_shutdown() { - panic!( - "cannot spawn worker for database {} that is not shutdown!", - db_name - ) - } - let handle = state.try_freeze().expect("restart race"); - *state.unfreeze(handle) = DatabaseState::new_known(); - } - - // Spawn a worker task - let worker = tokio::spawn(background_worker(Arc::clone(&shared), shutdown.clone())); - - // We spawn a watchdog task to detect and respond to the background worker exiting - let _ = tokio::spawn(async move { - let error = match worker.await { - Ok(_) => { - info!(%db_name, "observed clean shutdown of database worker"); - None - } - Err(e) => { - if e.is_panic() { - error!( - %db_name, - %e, - "panic in database worker" - ); - } else { - error!( - %db_name, - %e, - "unexpected database worker shut down - shutting down server" - ); - } - - Some(Arc::new(e)) - } - }; - - let handle_fut = shared.state.read().freeze(); - let handle = handle_fut.await; - - // This is the only place that sets the shutdown state ensuring that - // the shutdown state is guaranteed to not have a background worker running - *shared.state.write().unfreeze(handle) = DatabaseState::Shutdown(error); - shared.state_notify.notify_waiters(); - }); - - shutdown -} - -/// The background worker for `Database` - there should only ever be one -async fn background_worker(shared: Arc, shutdown: CancellationToken) { - let db_name = shared.config.read().name.clone(); - info!(%db_name, "started database background worker"); - - initialize_database(shared.as_ref(), shutdown.clone()).await; - - if shutdown.is_cancelled() { - // TODO: Shutdown intermediate workers (#2813) - info!(%db_name, "database shutdown before finishing initialization"); - return; - } - - let (db, write_buffer_consumer, lifecycle_worker) = { - let state = shared.state.read(); - let initialized = state.get_initialized().expect("expected initialized"); - - ( - Arc::clone(initialized.db()), - initialized.write_buffer_consumer().map(Arc::clone), - Arc::clone(initialized.lifecycle_worker()), - ) - }; - - info!(%db_name, "database finished initialization - starting Db worker"); - - db::utils::panic_test(|| Some(format!("database background worker: {}", db_name,))); - - let db_shutdown = CancellationToken::new(); - let db_worker = db.background_worker(db_shutdown.clone()).fuse(); - futures::pin_mut!(db_worker); - - // Future that completes if the WriteBufferConsumer exits - let consumer_join = match &write_buffer_consumer { - Some(consumer) => futures::future::Either::Left(consumer.join()), - None => futures::future::Either::Right(futures::future::pending()), - } - .fuse(); - futures::pin_mut!(consumer_join); - - // Future that completes if the LifecycleWorker exits - let lifecycle_join = lifecycle_worker.join().fuse(); - futures::pin_mut!(lifecycle_join); - - // This inner loop runs until either: - // - // - Something calls `Database::shutdown` - // - The Database transitions away from `DatabaseState::Initialized` - // - // In the later case it will restart the initialization procedure - while !shutdown.is_cancelled() { - if shared.state.read().get_initialized().is_none() { - info!(%db_name, "database no longer initialized"); - break; - } - - let shutdown_fut = shutdown.cancelled().fuse(); - futures::pin_mut!(shutdown_fut); - - // We must use `futures::select` as opposed to the often more ergonomic `tokio::select` - // Because of the need to "re-use" the background worker future - // TODO: Make Db own its own background loop (or remove it) - futures::select! { - _ = shutdown_fut => info!("database shutting down"), - _ = consumer_join => { - error!(%db_name, "unexpected shutdown of write buffer consumer - bailing out"); - shutdown.cancel(); - } - _ = lifecycle_join => { - error!(%db_name, "unexpected shutdown of lifecycle worker - bailing out"); - shutdown.cancel(); - } - _ = db_worker => { - error!(%db_name, "unexpected shutdown of db - bailing out"); - shutdown.cancel(); - } - } - } - - if let Some(consumer) = write_buffer_consumer { - info!(%db_name, "shutting down write buffer consumer"); - consumer.shutdown(); - if let Err(e) = consumer.join().await { - error!(%db_name, %e, "error shutting down write buffer consumer") - } - } - - if !lifecycle_join.is_terminated() { - info!(%db_name, "shutting down lifecycle worker"); - lifecycle_worker.shutdown(); - if let Err(e) = lifecycle_worker.join().await { - error!(%db_name, %e, "error shutting down lifecycle worker") - } - } - - if !db_worker.is_terminated() { - info!(%db_name, "waiting for db worker shutdown"); - db_shutdown.cancel(); - db_worker.await - } - - info!(%db_name, "draining tasks"); - - // Loop in case tasks are spawned during shutdown - loop { - use futures::stream::{FuturesUnordered, StreamExt}; - - // We get a list of jobs from the global registry and filter them for this database - let jobs = shared.application.job_registry().running(); - let mut futures: FuturesUnordered<_> = jobs - .iter() - .filter_map(|tracker| { - let db_name2 = tracker.metadata().db_name()?; - if db_name2.as_ref() != db_name.as_str() { - return None; - } - Some(tracker.join()) - }) - .collect(); - - if futures.is_empty() { - break; - } - - info!(%db_name, count=futures.len(), "waiting for jobs"); - - while futures.next().await.is_some() {} - } - - info!(%db_name, "database worker finished"); -} - -#[cfg(test)] -mod tests { - use std::time::Duration; - use std::{num::NonZeroU32, time::Instant}; - - use uuid::Uuid; - - use data_types::server_id::ServerId; - use data_types::{ - database_rules::{PartitionTemplate, TemplatePart}, - sequence::Sequence, - write_buffer::WriteBufferConnection, - }; - use object_store::{DynObjectStore, ObjectStoreImpl, ObjectStoreIntegration, ThrottleConfig}; - use test_helpers::assert_contains; - use write_buffer::mock::MockBufferSharedState; - - use crate::test_utils::make_application; - - use super::init::{claim_database_in_object_store, create_empty_db_in_object_store}; - use super::*; - - #[tokio::test] - async fn database_shutdown_waits_for_jobs() { - let (application, database) = initialized_database().await; - - // Database should be running - assert!(database.join().now_or_never().is_none()); - - // Spawn a dummy job associated with this database - let database_dummy_job = application - .job_registry() - .spawn_dummy_job(vec![50_000_000], Some(Arc::from("test"))); - - // Spawn a dummy job not associated with this database - let server_dummy_job = application - .job_registry() - .spawn_dummy_job(vec![10_000_000_000], None); - - // Trigger database shutdown - database.shutdown(); - - // Expect timeout to not complete - tokio::time::timeout(tokio::time::Duration::from_millis(1), database.join()) - .await - .unwrap_err(); - - // Database task shouldn't have finished yet - assert!(!database_dummy_job.is_complete()); - - // Wait for database to finish - database.join().await.unwrap(); - - // Database task should have finished - assert!(database_dummy_job.is_complete()); - - // Shouldn't have waited for server tracker to finish - assert!(!server_dummy_job.is_complete()); - } - - async fn initialized_database() -> (Arc, Database) { - let server_id = ServerId::try_from(1).unwrap(); - let application = make_application(); - - let name = DatabaseName::new("test").unwrap(); - let database_uuid = Uuid::new_v4(); - let provided_rules = ProvidedDatabaseRules::new_empty(name.clone()); - - create_empty_db_in_object_store( - Arc::clone(&application), - database_uuid, - provided_rules, - server_id, - ) - .await - .unwrap(); - - let db_config = DatabaseConfig { - name, - server_id, - database_uuid, - wipe_catalog_on_error: false, - skip_replay: false, - }; - let database = Database::new(Arc::clone(&application), db_config.clone()); - database.wait_for_init().await.unwrap(); - (application, database) - } - - #[tokio::test] - async fn database_release() { - let (application, database) = initialized_database().await; - let server_id = database.shared.config.read().server_id; - let server_location = - IoxObjectStore::server_config_path(&*application.object_store(), server_id).to_string(); - - let uuid = database.release().await.unwrap(); - - assert_eq!(database.state_code(), DatabaseStateCode::Shutdown); - assert!(database.init_error().is_none()); - - let owner_info = application - .config_provider() - .fetch_owner_info(server_id, uuid) - .await - .unwrap(); - - assert_eq!(owner_info.id, 0); - assert_eq!(owner_info.location, ""); - assert_eq!(owner_info.transactions.len(), 1); - - let transaction = &owner_info.transactions[0]; - assert_eq!(transaction.id, server_id.get_u32()); - assert_eq!(transaction.location, server_location); - } - - #[tokio::test] - async fn database_claim() { - let (application, database) = initialized_database().await; - let db_name = &database.shared.config.read().name.clone(); - let server_id = database.shared.config.read().server_id; - let server_location = - IoxObjectStore::server_config_path(&*application.object_store(), server_id).to_string(); - let new_server_id = ServerId::try_from(2).unwrap(); - let new_server_location = - IoxObjectStore::server_config_path(&*application.object_store(), new_server_id) - .to_string(); - let uuid = database.release().await.unwrap(); - - // database is in error state - assert_eq!(database.state_code(), DatabaseStateCode::Shutdown); - assert!(database.init_error().is_none()); - - claim_database_in_object_store( - Arc::clone(&application), - db_name, - uuid, - new_server_id, - false, - ) - .await - .unwrap(); - - let owner_info = application - .config_provider() - .fetch_owner_info(server_id, uuid) - .await - .unwrap(); - - assert_eq!(owner_info.id, new_server_id.get_u32()); - assert_eq!(owner_info.location, new_server_location); - assert_eq!(owner_info.transactions.len(), 2); - - let release_transaction = &owner_info.transactions[0]; - assert_eq!(release_transaction.id, server_id.get_u32()); - assert_eq!(release_transaction.location, server_location); - - let claim_transaction = &owner_info.transactions[1]; - assert_eq!(claim_transaction.id, 0); - assert_eq!(claim_transaction.location, ""); - - // put it back to first DB - let db_config = DatabaseConfig { - server_id: new_server_id, - ..database.shared.config.read().clone() - }; - let new_database = Database::new(Arc::clone(&application), db_config.clone()); - new_database.wait_for_init().await.unwrap(); - new_database.release().await.unwrap(); - claim_database_in_object_store(application, db_name, uuid, server_id, false) - .await - .unwrap(); - - // database should recover - tokio::time::timeout(Duration::from_secs(10), database.restart()) - .await - .unwrap() - .unwrap(); - } - - #[tokio::test] - async fn database_restart() { - test_helpers::maybe_start_logging(); - let (_application, database) = initialized_database().await; - - // Restart successful - database.restart().await.unwrap(); - - assert!(database.is_initialized()); - - // Delete the rules - let iox_object_store = database.iox_object_store(); - iox_object_store.delete_database_rules_file().await.unwrap(); - - // Restart should fail - let err = database.restart().await.unwrap_err().to_string(); - assert_contains!(&err, "error loading database rules"); - assert_contains!(&err, "not found"); - } - - #[tokio::test] - async fn database_abort() { - test_helpers::maybe_start_logging(); - - // Create a throttled object store that will stall the init process - let throttle_config = ThrottleConfig { - wait_get_per_call: Duration::from_secs(100), - ..Default::default() - }; - - let store = ObjectStoreImpl::new_in_memory_throttled(throttle_config); - let throttle_config = match &store.integration { - ObjectStoreIntegration::InMemoryThrottled(t) => Arc::clone(&t.config), - _ => unreachable!(), - }; - let store: Arc = Arc::new(store); - let application = Arc::new(ApplicationState::new(Arc::clone(&store), None, None, None)); - - let db_config = DatabaseConfig { - name: DatabaseName::new("test").unwrap(), - database_uuid: Uuid::new_v4(), - server_id: ServerId::try_from(1).unwrap(), - wipe_catalog_on_error: false, - skip_replay: false, - }; - - let database = Database::new(Arc::clone(&application), db_config.clone()); - - // Should fail to initialize in a timely manner - tokio::time::timeout(Duration::from_millis(10), database.wait_for_init()) - .await - .expect_err("should timeout"); - - assert_eq!(database.state_code(), DatabaseStateCode::Known); - - database.shutdown(); - database.join().await.unwrap(); - - assert_eq!(database.state_code(), DatabaseStateCode::Shutdown); - - // Disable throttling - *throttle_config.lock().unwrap() = Default::default(); - - // Restart should recover from aborted state, but will now error due to missing config - let error = tokio::time::timeout(Duration::from_secs(1), database.restart()) - .await - .expect("no timeout") - .unwrap_err() - .to_string(); - - assert_contains!(error, "error getting database owner info"); - } - - #[tokio::test] - async fn skip_replay() { - // create write buffer - let state = - MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); - let partition_template = PartitionTemplate { - parts: vec![TemplatePart::Column("partition_by".to_string())], - }; - state.push_lp(Sequence::new(0, 10), "table_1,partition_by=a foo=1 10"); - state.push_lp(Sequence::new(0, 11), "table_1,partition_by=b foo=2 20"); - - // setup application - let application = make_application(); - application - .write_buffer_factory() - .register_mock("my_mock".to_string(), state.clone()); - - let server_id = ServerId::try_from(1).unwrap(); - - // setup DB - let db_name = DatabaseName::new("test_db").unwrap(); - let uuid = Uuid::new_v4(); - let rules = data_types::database_rules::DatabaseRules { - name: db_name.clone(), - partition_template: partition_template.clone(), - lifecycle_rules: data_types::database_rules::LifecycleRules { - late_arrive_window_seconds: NonZeroU32::try_from(1).unwrap(), - ..Default::default() - }, - worker_cleanup_avg_sleep: Duration::from_secs(2), - write_buffer_connection: Some(WriteBufferConnection { - type_: "mock".to_string(), - connection: "my_mock".to_string(), - ..Default::default() - }), - }; - - create_empty_db_in_object_store( - Arc::clone(&application), - uuid, - make_provided_rules(rules), - server_id, - ) - .await - .unwrap(); - - let db_config = DatabaseConfig { - name: db_name, - server_id, - database_uuid: uuid, - wipe_catalog_on_error: false, - skip_replay: false, - }; - let database = Database::new(Arc::clone(&application), db_config.clone()); - database.wait_for_init().await.unwrap(); - - // wait for ingest - let db = database.initialized_db().unwrap(); - let t_0 = Instant::now(); - loop { - // use later partition here so that we can implicitly wait for both entries - if db.partition_summary("table_1", "partition_by_b").is_some() { - break; - } - - assert!(t_0.elapsed() < Duration::from_secs(10)); - tokio::time::sleep(Duration::from_millis(100)).await; - } - - // partition a was forgotten, partition b is still persisted - assert!(db.partition_summary("table_1", "partition_by_a").is_some()); - - // persist one partition - db.persist_partition("table_1", "partition_by_b", true) - .await - .unwrap(); - - // shutdown first database - database.shutdown(); - database.join().await.unwrap(); - - // break write buffer by removing entries - state.clear_messages(0); - state.push_lp(Sequence::new(0, 12), "table_1,partition_by=c foo=3 30"); - - // boot actual test database - let database = Database::new(Arc::clone(&application), db_config.clone()); - - // db is broken - let err = database.wait_for_init().await.unwrap_err(); - assert!(matches!(err.as_ref(), InitError::Replay { .. })); - - // skip replay - database.skip_replay().await.unwrap(); - database.wait_for_init().await.unwrap(); - - // wait for ingest - state.push_lp(Sequence::new(0, 13), "table_1,partition_by=d foo=4 40"); - let db = database.initialized_db().unwrap(); - let t_0 = Instant::now(); - loop { - if db.partition_summary("table_1", "partition_by_d").is_some() { - break; - } - - assert!(t_0.elapsed() < Duration::from_secs(10)); - tokio::time::sleep(Duration::from_millis(100)).await; - } - - // partition a was forgotten, partition b is still persisted, partition c was skipped - assert!(db.partition_summary("table_1", "partition_by_a").is_none()); - assert!(db.partition_summary("table_1", "partition_by_b").is_some()); - assert!(db.partition_summary("table_1", "partition_by_c").is_none()); - - // cannot skip when database is initialized - let res = database.skip_replay().await; - assert!(matches!(res, Err(Error::InvalidState { .. }))); - - // clean up - database.shutdown(); - database.join().await.unwrap(); - } - - #[tokio::test] - async fn write_buffer_creation_error() { - // ensure that we're retrying write buffer creation (e.g. after connection errors or cluster issues) - - // setup application - let application = make_application(); - application.write_buffer_factory(); - - let server_id = ServerId::try_from(1).unwrap(); - - // setup DB - let db_name = DatabaseName::new("test_db").unwrap(); - let uuid = Uuid::new_v4(); - let rules = data_types::database_rules::DatabaseRules { - name: db_name.clone(), - lifecycle_rules: Default::default(), - partition_template: Default::default(), - worker_cleanup_avg_sleep: Duration::from_secs(2), - write_buffer_connection: Some(WriteBufferConnection { - type_: "mock".to_string(), - connection: "my_mock".to_string(), - ..Default::default() - }), - }; - - create_empty_db_in_object_store( - Arc::clone(&application), - uuid, - make_provided_rules(rules), - server_id, - ) - .await - .unwrap(); - - let db_config = DatabaseConfig { - name: db_name, - server_id, - database_uuid: uuid, - wipe_catalog_on_error: false, - skip_replay: false, - }; - let database = Database::new(Arc::clone(&application), db_config.clone()); - - // wait for a bit so the database fails because the mock is missing - database.wait_for_init().await.unwrap_err(); - - // create write buffer - let state = - MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); - application - .write_buffer_factory() - .register_mock("my_mock".to_string(), state.clone()); - - tokio::time::timeout(Duration::from_secs(10), async move { - loop { - if database.wait_for_init().await.is_ok() { - return; - } - - tokio::time::sleep(Duration::from_millis(100)).await; - } - }) - .await - .unwrap(); - } - - #[tokio::test] - async fn database_init_recovery() { - let (application, database) = initialized_database().await; - let iox_object_store = database.iox_object_store(); - let config = database.shared.config.read().clone(); - - // shutdown first database - database.shutdown(); - database.join().await.unwrap(); - - // mess up owner file - let owner_backup = iox_object_store.get_owner_file().await.unwrap(); - iox_object_store - .delete_owner_file_for_testing() - .await - .unwrap(); - - // create second database - let database = Database::new(Arc::clone(&application), config); - database.wait_for_init().await.unwrap_err(); - - // recover database by fixing owner file - iox_object_store.put_owner_file(owner_backup).await.unwrap(); - tokio::time::timeout(Duration::from_secs(10), async move { - loop { - if database.wait_for_init().await.is_ok() { - return; - } - tokio::time::sleep(Duration::from_millis(10)).await; - } - }) - .await - .unwrap(); - } - - /// Normally database rules are provided as grpc messages, but in - /// tests they are constructed from database rules structures - /// themselves. - fn make_provided_rules( - rules: data_types::database_rules::DatabaseRules, - ) -> ProvidedDatabaseRules { - let rules: generated_types::influxdata::iox::management::v1::DatabaseRules = rules - .try_into() - .expect("tests should construct valid DatabaseRules"); - ProvidedDatabaseRules::new_rules(rules) - .expect("tests should construct valid ProvidedDatabaseRules") - } -} diff --git a/server/src/database/init.rs b/server/src/database/init.rs deleted file mode 100644 index 9b8def8d1d..0000000000 --- a/server/src/database/init.rs +++ /dev/null @@ -1,731 +0,0 @@ -//! Database initialization / creation logic -use crate::{rules::ProvidedDatabaseRules, ApplicationState}; -use data_types::{server_id::ServerId, DatabaseName}; -use db::{ - load::{create_preserved_catalog, load_or_create_preserved_catalog}, - write_buffer::WriteBufferConsumer, - DatabaseToCommit, Db, LifecycleWorker, -}; -use generated_types::{ - database_state::DatabaseState as DatabaseStateCode, influxdata::iox::management, -}; -use iox_object_store::IoxObjectStore; -use observability_deps::tracing::{error, info, warn}; -use persistence_windows::checkpoint::ReplayPlan; -use rand::{thread_rng, Rng}; -use snafu::{ResultExt, Snafu}; -use std::{sync::Arc, time::Duration}; -use tokio::task::JoinError; -use tokio_util::sync::CancellationToken; -use uuid::Uuid; - -use super::state::DatabaseShared; - -/// Errors encountered during initialization of a database -#[derive(Debug, Snafu)] -pub enum InitError { - #[snafu(display("error finding database directory in object storage: {}", source))] - DatabaseObjectStoreLookup { - source: iox_object_store::IoxObjectStoreError, - }, - - #[snafu(display( - "Database name in deserialized rules ({}) does not match expected value ({})", - actual, - expected - ))] - RulesDatabaseNameMismatch { actual: String, expected: String }, - - #[snafu(display("error loading catalog: {}", source))] - CatalogLoad { source: db::load::Error }, - - #[snafu(display("error creating write buffer: {}", source))] - CreateWriteBuffer { - source: write_buffer::core::WriteBufferError, - }, - - #[snafu(display("error during replay: {}", source))] - Replay { source: db::Error }, - - #[snafu(display("error creating database owner info: {}", source))] - CreatingOwnerInfo { source: crate::config::Error }, - - #[snafu(display("error getting database owner info: {}", source))] - FetchingOwnerInfo { source: crate::config::Error }, - - #[snafu(display("error updating database owner info: {}", source))] - UpdatingOwnerInfo { source: crate::config::Error }, - - #[snafu(display( - "Server ID in the database's owner info file ({}) does not match this server's ID ({})", - actual, - expected - ))] - DatabaseOwnerMismatch { actual: u32, expected: u32 }, - - #[snafu(display( - "The database with UUID `{}` is already owned by the server with ID {}", - uuid, - server_id - ))] - CantClaimDatabaseCurrentlyOwned { uuid: Uuid, server_id: u32 }, - - #[snafu(display("error saving database rules: {}", source))] - SavingRules { source: crate::config::Error }, - - #[snafu(display("error loading database rules: {}", source))] - LoadingRules { source: crate::config::Error }, - - #[snafu(display("{}", source))] - IoxObjectStoreError { - source: iox_object_store::IoxObjectStoreError, - }, - - #[snafu(display("The database with UUID `{}` named `{}` is already active", uuid, name))] - AlreadyActive { name: String, uuid: Uuid }, - - #[snafu(display("cannot create preserved catalog: {}", source))] - CannotCreatePreservedCatalog { source: db::load::Error }, - - #[snafu(display("database is not running"))] - Shutdown, -} - -/// The Database startup state machine -/// -/// ```text -/// (start) -/// | -/// |----------------------------o o-o -/// V V V | -/// [Known]-------------->[OwnerInfoLoadError] -/// | | -/// +---------------------------o -/// | -/// | o-o -/// V V | -/// [OwnerInfoLoaded]----------->[RulesLoadError] -/// | | -/// +---------------------------o -/// | -/// | o-o -/// V V | -/// [RulesLoaded]-------------->[CatalogLoadError] -/// | | -/// +---------------------------o -/// | -/// | o-o -/// V V | -/// [CatalogLoaded]---------->[WriteBufferCreationError] -/// | | | | -/// | | | | o-o -/// | | | V V | -/// | o---------------|-->[ReplayError] -/// | | | -/// +--------------------+-------o -/// | -/// | -/// V -/// [Initialized] -/// -/// | -/// V -/// [Shutdown] -/// ``` -/// -/// A Database starts in [`DatabaseState::Known`] and advances through the -/// non error states in sequential order until either: -/// -/// 1. It reaches [`DatabaseState::Initialized`]: Database is initialized -/// 2. An error is encountered, in which case it transitions to one of -/// the error states. We try to recover from all of them. For all except [`DatabaseState::ReplayError`] this is a -/// rather cheap operation since we can just retry the actual operation. For [`DatabaseState::ReplayError`] we need -/// to dump the potentially half-modified in-memory catalog before retrying. -#[derive(Debug, Clone)] -pub(crate) enum DatabaseState { - // Database not running, with an optional shutdown error - Shutdown(Option>), - // Basic initialization sequence states: - Known(DatabaseStateKnown), - OwnerInfoLoaded(DatabaseStateOwnerInfoLoaded), - RulesLoaded(DatabaseStateRulesLoaded), - CatalogLoaded(DatabaseStateCatalogLoaded), - - // Terminal state (success) - Initialized(DatabaseStateInitialized), - - // Error states, we'll try to recover from them - OwnerInfoLoadError(DatabaseStateKnown, Arc), - RulesLoadError(DatabaseStateOwnerInfoLoaded, Arc), - CatalogLoadError(DatabaseStateRulesLoaded, Arc), - WriteBufferCreationError(DatabaseStateCatalogLoaded, Arc), - ReplayError(DatabaseStateCatalogLoaded, Arc), -} - -impl std::fmt::Display for DatabaseState { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.state_code().fmt(f) - } -} - -impl DatabaseState { - // Construct the start state of the database machine - pub fn new_known() -> Self { - Self::Known(DatabaseStateKnown {}) - } - - pub(crate) fn state_code(&self) -> DatabaseStateCode { - match self { - DatabaseState::Shutdown(_) => DatabaseStateCode::Shutdown, - DatabaseState::Known(_) => DatabaseStateCode::Known, - DatabaseState::OwnerInfoLoaded(_) => DatabaseStateCode::OwnerInfoLoaded, - DatabaseState::RulesLoaded(_) => DatabaseStateCode::RulesLoaded, - DatabaseState::CatalogLoaded(_) => DatabaseStateCode::CatalogLoaded, - DatabaseState::Initialized(_) => DatabaseStateCode::Initialized, - DatabaseState::OwnerInfoLoadError(_, _) => DatabaseStateCode::OwnerInfoLoadError, - DatabaseState::RulesLoadError(_, _) => DatabaseStateCode::RulesLoadError, - DatabaseState::CatalogLoadError(_, _) => DatabaseStateCode::CatalogLoadError, - DatabaseState::WriteBufferCreationError(_, _) => { - DatabaseStateCode::WriteBufferCreationError - } - DatabaseState::ReplayError(_, _) => DatabaseStateCode::ReplayError, - } - } - - pub(crate) fn error(&self) -> Option<&Arc> { - match self { - DatabaseState::Known(_) - | DatabaseState::Shutdown(_) - | DatabaseState::OwnerInfoLoaded(_) - | DatabaseState::RulesLoaded(_) - | DatabaseState::CatalogLoaded(_) - | DatabaseState::Initialized(_) => None, - DatabaseState::OwnerInfoLoadError(_, e) - | DatabaseState::RulesLoadError(_, e) - | DatabaseState::CatalogLoadError(_, e) - | DatabaseState::WriteBufferCreationError(_, e) - | DatabaseState::ReplayError(_, e) => Some(e), - } - } - - pub(crate) fn provided_rules(&self) -> Option> { - match self { - DatabaseState::Known(_) - | DatabaseState::Shutdown(_) - | DatabaseState::OwnerInfoLoaded(_) - | DatabaseState::OwnerInfoLoadError(_, _) - | DatabaseState::RulesLoadError(_, _) => None, - DatabaseState::RulesLoaded(state) | DatabaseState::CatalogLoadError(state, _) => { - Some(Arc::clone(&state.provided_rules)) - } - DatabaseState::CatalogLoaded(state) - | DatabaseState::WriteBufferCreationError(state, _) - | DatabaseState::ReplayError(state, _) => Some(Arc::clone(&state.provided_rules)), - DatabaseState::Initialized(state) => Some(Arc::clone(&state.provided_rules)), - } - } - - pub(crate) fn owner_info(&self) -> Option { - match self { - DatabaseState::Known(_) - | DatabaseState::Shutdown(_) - | DatabaseState::OwnerInfoLoadError(_, _) - | DatabaseState::RulesLoadError(_, _) => None, - DatabaseState::OwnerInfoLoaded(state) => Some(state.owner_info.clone()), - DatabaseState::RulesLoaded(state) | DatabaseState::CatalogLoadError(state, _) => { - Some(state.owner_info.clone()) - } - DatabaseState::CatalogLoaded(state) - | DatabaseState::WriteBufferCreationError(state, _) - | DatabaseState::ReplayError(state, _) => Some(state.owner_info.clone()), - DatabaseState::Initialized(state) => Some(state.owner_info.clone()), - } - } - - /// Whether this is shutdown - pub(crate) fn is_shutdown(&self) -> bool { - matches!(self, DatabaseState::Shutdown(_)) - } - - pub(crate) fn get_initialized(&self) -> Option<&DatabaseStateInitialized> { - match self { - DatabaseState::Initialized(state) => Some(state), - _ => None, - } - } - - /// Try to advance to the next state - /// - /// # Panic - /// - /// Panics if the database cannot be advanced (already initialized or shutdown) - async fn advance(self, shared: &DatabaseShared) -> Self { - match self { - Self::Known(state) | Self::OwnerInfoLoadError(state, _) => { - match state.advance(shared).await { - Ok(state) => Self::OwnerInfoLoaded(state), - Err(e) => Self::OwnerInfoLoadError(state, Arc::new(e)), - } - } - Self::OwnerInfoLoaded(state) | Self::RulesLoadError(state, _) => { - match state.advance(shared).await { - Ok(state) => Self::RulesLoaded(state), - Err(e) => Self::RulesLoadError(state, Arc::new(e)), - } - } - Self::RulesLoaded(state) | Self::CatalogLoadError(state, _) => { - match state.advance(shared).await { - Ok(state) => Self::CatalogLoaded(state), - Err(e) => Self::CatalogLoadError(state, Arc::new(e)), - } - } - Self::CatalogLoaded(state) | Self::WriteBufferCreationError(state, _) => { - match state.advance(shared).await { - Ok(state) => Self::Initialized(state), - Err(e @ InitError::CreateWriteBuffer { .. }) => { - Self::WriteBufferCreationError(state, Arc::new(e)) - } - Err(e) => Self::ReplayError(state, Arc::new(e)), - } - } - Self::ReplayError(state, _) => { - let state2 = state.rollback(); - match state2.advance(shared).await { - Ok(state2) => match state2.advance(shared).await { - Ok(state2) => Self::Initialized(state2), - Err(e) => Self::ReplayError(state, Arc::new(e)), - }, - Err(e) => Self::ReplayError(state, Arc::new(e)), - } - } - Self::Initialized(_) => unreachable!(), - Self::Shutdown(_) => unreachable!(), - } - } -} - -#[derive(Debug, Clone)] -pub(crate) struct DatabaseStateKnown {} - -impl DatabaseStateKnown { - /// Load owner info from object storage and verify it matches the current owner - async fn advance( - &self, - shared: &DatabaseShared, - ) -> Result { - let (server_id, uuid) = { - let config = shared.config.read(); - (config.server_id, config.database_uuid) - }; - - let owner_info = shared - .application - .config_provider() - .fetch_owner_info(server_id, uuid) - .await - .context(FetchingOwnerInfoSnafu)?; - - if owner_info.id != server_id.get_u32() { - return DatabaseOwnerMismatchSnafu { - actual: owner_info.id, - expected: server_id.get_u32(), - } - .fail(); - } - - Ok(DatabaseStateOwnerInfoLoaded { owner_info }) - } -} - -#[derive(Debug, Clone)] -pub(crate) struct DatabaseStateOwnerInfoLoaded { - owner_info: management::v1::OwnerInfo, -} - -impl DatabaseStateOwnerInfoLoaded { - /// Load database rules from object storage - async fn advance( - &self, - shared: &DatabaseShared, - ) -> Result { - let uuid = shared.config.read().database_uuid; - let provided_rules = shared - .application - .config_provider() - .fetch_rules(uuid) - .await - .context(LoadingRulesSnafu)?; - - let db_name = shared.config.read().name.clone(); - if provided_rules.db_name() != &db_name { - return RulesDatabaseNameMismatchSnafu { - actual: provided_rules.db_name(), - expected: db_name.as_str(), - } - .fail(); - } - - Ok(DatabaseStateRulesLoaded { - provided_rules: Arc::new(provided_rules), - owner_info: self.owner_info.clone(), - }) - } -} - -#[derive(Debug, Clone)] -pub(crate) struct DatabaseStateRulesLoaded { - provided_rules: Arc, - owner_info: management::v1::OwnerInfo, -} - -impl DatabaseStateRulesLoaded { - /// Load catalog from object storage - async fn advance( - &self, - shared: &DatabaseShared, - ) -> Result { - let (db_name, wipe_catalog_on_error, skip_replay, server_id) = { - let config = shared.config.read(); - ( - config.name.clone(), - config.wipe_catalog_on_error, - config.skip_replay, - config.server_id, - ) - }; - let (preserved_catalog, catalog, replay_plan) = load_or_create_preserved_catalog( - db_name.as_str(), - Arc::clone(&shared.iox_object_store), - Arc::clone(shared.application.metric_registry()), - Arc::clone(shared.application.time_provider()), - wipe_catalog_on_error, - skip_replay, - ) - .await - .context(CatalogLoadSnafu)?; - - let database_to_commit = DatabaseToCommit { - server_id, - iox_object_store: Arc::clone(&shared.iox_object_store), - exec: Arc::clone(shared.application.executor()), - rules: Arc::clone(self.provided_rules.rules()), - preserved_catalog, - catalog, - metric_registry: Arc::clone(shared.application.metric_registry()), - time_provider: Arc::clone(shared.application.time_provider()), - }; - - let db = Arc::new(Db::new( - database_to_commit, - Arc::clone(shared.application.job_registry()), - )); - - let lifecycle_worker = Arc::new(LifecycleWorker::new(Arc::clone(&db))); - - Ok(DatabaseStateCatalogLoaded { - db, - lifecycle_worker, - replay_plan: Arc::new(replay_plan), - provided_rules: Arc::clone(&self.provided_rules), - owner_info: self.owner_info.clone(), - }) - } -} - -#[derive(Debug, Clone)] -pub(crate) struct DatabaseStateCatalogLoaded { - db: Arc, - lifecycle_worker: Arc, - replay_plan: Arc>, - provided_rules: Arc, - owner_info: management::v1::OwnerInfo, -} - -impl DatabaseStateCatalogLoaded { - /// Perform replay - async fn advance( - &self, - shared: &DatabaseShared, - ) -> Result { - let db = Arc::clone(&self.db); - - let rules = self.provided_rules.rules(); - let trace_collector = shared.application.trace_collector(); - let write_buffer_factory = shared.application.write_buffer_factory(); - let (db_name, skip_replay) = { - let config = shared.config.read(); - (config.name.clone(), config.skip_replay) - }; - let write_buffer_consumer = match rules.write_buffer_connection.as_ref() { - Some(connection) => { - let consumer = write_buffer_factory - .new_config_read(db_name.as_str(), trace_collector.as_ref(), connection) - .await - .context(CreateWriteBufferSnafu)?; - - let replay_plan = if skip_replay { - None - } else { - self.replay_plan.as_ref().as_ref() - }; - - let streams = db - .perform_replay(replay_plan, Arc::clone(&consumer)) - .await - .context(ReplaySnafu)?; - - Some(Arc::new(WriteBufferConsumer::new( - consumer, - streams, - Arc::clone(&db), - shared.application.metric_registry().as_ref(), - ))) - } - _ => None, - }; - - self.lifecycle_worker.unsuppress_persistence(); - - Ok(DatabaseStateInitialized { - db, - write_buffer_consumer, - lifecycle_worker: Arc::clone(&self.lifecycle_worker), - provided_rules: Arc::clone(&self.provided_rules), - owner_info: self.owner_info.clone(), - }) - } - - /// Rolls back state to an unloaded catalog. - pub(crate) fn rollback(&self) -> DatabaseStateRulesLoaded { - warn!(db_name=%self.db.name(), "throwing away loaded catalog to recover from replay error"); - DatabaseStateRulesLoaded { - provided_rules: Arc::clone(&self.provided_rules), - owner_info: self.owner_info.clone(), - } - } -} - -#[derive(Debug, Clone)] -pub(crate) struct DatabaseStateInitialized { - db: Arc, - write_buffer_consumer: Option>, - lifecycle_worker: Arc, - provided_rules: Arc, - owner_info: management::v1::OwnerInfo, -} - -impl DatabaseStateInitialized { - pub fn db(&self) -> &Arc { - &self.db - } - - pub fn write_buffer_consumer(&self) -> Option<&Arc> { - self.write_buffer_consumer.as_ref() - } - - pub fn set_provided_rules(&mut self, provided_rules: Arc) { - self.provided_rules = provided_rules - } - - /// Get a reference to the database state initialized's lifecycle worker. - pub(crate) fn lifecycle_worker(&self) -> &Arc { - &self.lifecycle_worker - } -} - -const INIT_BACKOFF: Duration = Duration::from_secs(1); -const MAX_BACKOFF: Duration = Duration::from_secs(500); - -/// Try to drive the database to `DatabaseState::Initialized` returns when -/// this is achieved or the shutdown signal is triggered -pub(crate) async fn initialize_database(shared: &DatabaseShared, shutdown: CancellationToken) { - let db_name = shared.config.read().name.clone(); - info!(%db_name, "database initialization started"); - - // A backoff duration for retrying errors that will change over the course of multiple errors - let mut backoff = INIT_BACKOFF; - - while !shutdown.is_cancelled() { - let handle = shared.state.read().freeze(); - let handle = handle.await; - - // Re-acquire read lock to avoid holding lock across await point - let state = DatabaseState::clone(&shared.state.read()); - - info!(%db_name, %state, "attempting to advance database initialization state"); - - match &state { - DatabaseState::Initialized(_) => break, - DatabaseState::Shutdown(_) => { - info!(%db_name, "database in shutdown - aborting initialization"); - shutdown.cancel(); - return; - } - _ => {} - } - - // Try to advance to the next state - let next_state = tokio::select! { - next_state = state.advance(shared) => next_state, - _ = shutdown.cancelled() => { - info!(%db_name, "initialization aborted by shutdown"); - return - } - }; - - let state_code = next_state.state_code(); - let maybe_error = next_state.error().cloned(); - // Commit the next state - { - let mut state = shared.state.write(); - info!(%db_name, from=%state, to=%next_state, "database initialization state transition"); - - *state.unfreeze(handle) = next_state; - shared.state_notify.notify_waiters(); - } - - match maybe_error { - Some(error) => { - // exponential backoff w/ jitter, decorrelated - // see https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ - backoff = Duration::from_secs_f64( - MAX_BACKOFF.as_secs_f64().min( - thread_rng() - .gen_range(INIT_BACKOFF.as_secs_f64()..(backoff.as_secs_f64() * 3.0)), - ), - ); - - error!( - %db_name, - %error, - state=%state_code, - backoff_secs = backoff.as_secs_f64(), - "database in error state - backing off initialization" - ); - - // Wait for timeout or shutdown signal - tokio::select! { - _ = tokio::time::sleep(backoff) => {}, - _ = shutdown.cancelled() => {} - } - } - None => { - // reset backoff - backoff = INIT_BACKOFF; - } - } - } -} - -/// Create fresh database without any any state. Returns its location in object storage -/// for saving in the server config file. -pub async fn create_empty_db_in_object_store( - application: Arc, - uuid: Uuid, - provided_rules: ProvidedDatabaseRules, - server_id: ServerId, -) -> Result { - let db_name = provided_rules.db_name().clone(); - let iox_object_store = Arc::new( - match IoxObjectStore::create(Arc::clone(&application.object_store()), uuid).await { - Ok(ios) => ios, - Err(source) => return Err(InitError::IoxObjectStoreError { source }), - }, - ); - - let database_location = iox_object_store.root_path(); - - application - .config_provider() - .create_owner_info(server_id, uuid) - .await - .context(CreatingOwnerInfoSnafu)?; - - application - .config_provider() - .store_rules(uuid, &provided_rules) - .await - .context(SavingRulesSnafu)?; - - create_preserved_catalog( - &db_name, - iox_object_store, - Arc::clone(application.metric_registry()), - Arc::clone(application.time_provider()), - true, - ) - .await - .context(CannotCreatePreservedCatalogSnafu)?; - - Ok(database_location) -} - -/// Create an claimed database without any state. Returns its -/// location in object storage for saving in the server config -/// file. -/// -/// if `force` is true, a missing owner info or owner info that is -/// for the wrong server id are ignored (do not cause errors) -pub async fn claim_database_in_object_store( - application: Arc, - db_name: &DatabaseName<'static>, - uuid: Uuid, - server_id: ServerId, - force: bool, -) -> Result { - info!(%db_name, %uuid, %force, "claiming database"); - - let iox_object_store = IoxObjectStore::load(Arc::clone(&application.object_store()), uuid) - .await - .context(IoxObjectStoreSnafu)?; - - let owner_info = application - .config_provider() - .fetch_owner_info(server_id, uuid) - .await - .context(FetchingOwnerInfoSnafu); - - // try to recreate owner_info if force is specified - let owner_info = match owner_info { - Err(_) if force => { - warn!("Attempting to recreate missing owner info due to force"); - - application - .config_provider() - .create_owner_info(server_id, uuid) - .await - .context(CreatingOwnerInfoSnafu)?; - - application - .config_provider() - .fetch_owner_info(server_id, uuid) - .await - .context(FetchingOwnerInfoSnafu) - } - t => t, - }?; - - if owner_info.id != 0 { - if !force { - return CantClaimDatabaseCurrentlyOwnedSnafu { - uuid, - server_id: owner_info.id, - } - .fail(); - } else { - warn!( - owner_id = owner_info.id, - "Ignoring owner info mismatch due to force" - ); - } - } - - let database_location = iox_object_store.root_path(); - application - .config_provider() - .update_owner_info(Some(server_id), uuid) - .await - .context(UpdatingOwnerInfoSnafu)?; - - Ok(database_location) -} diff --git a/server/src/database/state.rs b/server/src/database/state.rs deleted file mode 100644 index 750c756a49..0000000000 --- a/server/src/database/state.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! Database initialization states -use crate::ApplicationState; -use data_types::{server_id::ServerId, DatabaseName}; -use internal_types::freezable::Freezable; -use iox_object_store::IoxObjectStore; -use parking_lot::RwLock; -use std::sync::Arc; -use tokio::sync::Notify; -use uuid::Uuid; - -use super::init::DatabaseState; - -#[derive(Debug, Clone)] -/// Information about where a database is located on object store, -/// and how to perform startup activities. -pub struct DatabaseConfig { - pub name: DatabaseName<'static>, - pub server_id: ServerId, - pub database_uuid: Uuid, - pub wipe_catalog_on_error: bool, - pub skip_replay: bool, -} - -/// State shared with the `Database` background worker -#[derive(Debug)] -pub(crate) struct DatabaseShared { - /// Configuration provided to the database at startup - pub(crate) config: RwLock, - - /// Application-global state - pub(crate) application: Arc, - - /// Database object store - pub(crate) iox_object_store: Arc, - - /// The initialization state of the `Database`, wrapped in a - /// `Freezable` to ensure there is only one task with an - /// outstanding intent to write at any time. - pub(crate) state: RwLock>, - - /// Notify that the database state has changed - pub(crate) state_notify: Notify, -} diff --git a/server/src/lib.rs b/server/src/lib.rs deleted file mode 100644 index 4eec4b1dd3..0000000000 --- a/server/src/lib.rs +++ /dev/null @@ -1,2365 +0,0 @@ -//! This crate contains code that defines the logic for a running InfluxDB IOx -//! server. It also has the logic for how servers talk to each other, which -//! includes replication, subscriptions, querying, and traits that abstract -//! these methods away for testing purposes. -//! -//! This diagram shows the lifecycle of a write coming into a set of IOx servers -//! configured in different roles. This doesn't include ensuring that the -//! replicated writes are durable, or snapshotting partitions in the write -//! buffer. Have a read through the comments in the source before trying to make -//! sense of this diagram. -//! -//! Each level of servers exists to serve a specific function, ideally isolating -//! the kinds of failures that would cause one portion to go down. -//! -//! The router level simply converts the line protocol to the flatbuffer format -//! and computes the partition key. It keeps no state. -//! -//! The HostGroup/AZ level is for receiving the replicated writes and keeping -//! multiple copies of those writes in memory before they are persisted to -//! object storage. Individual databases or groups of databases can be routed to -//! the same set of host groups, which will limit the blast radius for databases -//! that overload the system with writes or for situations where subscribers lag -//! too far behind. -//! -//! The Subscriber level is for actually pulling in the data and making it -//! available for query through indexing in the write buffer or writing that -//! data out to Parquet in object storage. Subscribers can also be used for -//! real-time alerting and monitoring. -//! -//! ```text -//! ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! ┌────────┐ ┌────────┐ Step 1: │ -//! │Router 1│ │Router 2│ │ Parse LP -//! │ │ │ │ Create SequencedEntry │ -//! └───┬─┬──┘ └────────┘ └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! │ │ -//! │ │ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! ┌────────┘ └───┬──────────────┐ Step 2: │ -//! │ │ │ │ Replicate to -//! │ │ │ all host groups │ -//! ▼ ▼ ▼ └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! ┌───────────┐ ┌───────────┐ ┌───────────┐ -//! │HostGroup 1│ │HostGroup 2│ │HostGroup 3│ -//! │(AZ 1) │ │(AZ 2) │ │(AZ 3) │ -//! └───────────┘ └───────────┘ └───────────┘ -//! │ -//! │ -//! │ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! │ Step 3: │ -//! └──┐ │ Push subscription -//! │ │ -//! │ └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! │ -//! ▼ -//! ┌────────────┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! │Query Server│ Step 4: │ -//! │(subscriber)│ │ Store in WriteBuffer -//! │ │ │ -//! └────────────┘ └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! ``` - -#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] -#![warn( - missing_debug_implementations, - clippy::explicit_iter_loop, - clippy::use_self, - clippy::clone_on_ref_ptr, - clippy::future_not_send -)] - -use async_trait::async_trait; -use data_types::{ - chunk_metadata::ChunkId, - error::ErrorLogger, - job::Job, - server_id::ServerId, - {DatabaseName, DatabaseNameError}, -}; -use database::{ - init::{claim_database_in_object_store, create_empty_db_in_object_store}, - state::DatabaseConfig, - Database, -}; -use object_store::DynObjectStore; -use std::any::Any; - -use db::Db; -use futures::future::{BoxFuture, Future, FutureExt, Shared, TryFutureExt}; -use generated_types::google::FieldViolation; -use hashbrown::HashMap; -use internal_types::freezable::Freezable; -use iox_object_store::IoxObjectStore; -use lifecycle::{LockableChunk, LockablePartition}; -use observability_deps::tracing::{error, info, warn}; -use parking_lot::{Mutex, RwLock}; -use snafu::{ensure, OptionExt, ResultExt, Snafu}; -use std::sync::{Arc, Weak}; -use tokio::{sync::Notify, task::JoinError}; -use tokio_util::sync::CancellationToken; -use tracker::TaskTracker; -use uuid::Uuid; - -pub use application::ApplicationState; -use metric::{Attributes, MetricKind, Observation, Reporter}; - -mod application; -pub mod config; -pub mod database; -pub mod rules; -use rules::{PersistedDatabaseRules, ProvidedDatabaseRules}; - -type DatabaseError = Box; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("id not set"))] - IdNotSet, - - #[snafu(display( - "Server ID is set ({}) but server is not yet initialized (e.g. DBs and remotes are not \ - loaded). Server is not yet ready to read/write data.", - server_id - ))] - ServerNotInitialized { server_id: ServerId }, - - #[snafu(display("id already set"))] - IdAlreadySet, - - #[snafu(display("database not initialized"))] - DatabaseNotInitialized { db_name: String }, - - #[snafu(display("cannot update database rules"))] - CanNotUpdateRules { - db_name: String, - source: crate::database::Error, - }, - - #[snafu(display("cannot create database: {}", source))] - CannotCreateDatabase { - source: crate::database::init::InitError, - }, - - #[snafu(display("database not found: {}", db_name))] - DatabaseNotFound { db_name: String }, - - #[snafu(display("database uuid not found: {}", uuid))] - DatabaseUuidNotFound { uuid: Uuid }, - - #[snafu(display("cannot get database name from rules: {}", source))] - CouldNotGetDatabaseNameFromRules { source: DatabaseNameFromRulesError }, - - #[snafu(display("{}", source))] - CannotReleaseDatabase { source: crate::database::Error }, - - #[snafu(display("{}", source))] - CannotClaimDatabase { - source: crate::database::init::InitError, - }, - - #[snafu(display("A database with the name `{}` already exists", db_name))] - DatabaseAlreadyExists { db_name: String }, - - #[snafu(display("The database with UUID `{}` is already owned by this server", uuid))] - DatabaseAlreadyOwnedByThisServer { uuid: Uuid }, - - #[snafu(display( - "Could not release {}: the UUID specified ({}) does not match the current UUID ({})", - db_name, - specified, - current - ))] - UuidMismatch { - db_name: String, - specified: Uuid, - current: Uuid, - }, - - #[snafu(display("invalid database: {}", source))] - InvalidDatabaseName { source: DatabaseNameError }, - - #[snafu(display("error wiping preserved catalog: {}", source))] - WipePreservedCatalog { source: database::Error }, - - #[snafu(display("database error: {}", source))] - UnknownDatabaseError { source: DatabaseError }, - - #[snafu(display("partition not found: {}", source))] - PartitionNotFound { source: db::catalog::Error }, - - #[snafu(display( - "chunk: {} not found in partition '{}' and table '{}'", - chunk_id, - partition, - table - ))] - ChunkNotFound { - chunk_id: ChunkId, - partition: String, - table: String, - }, - - #[snafu(display("database failed to initialize: {}", source))] - DatabaseInit { - source: Arc, - }, - - #[snafu(display("error persisting server config: {}", source))] - PersistServerConfig { source: crate::config::Error }, -} - -pub type Result = std::result::Result; - -/// Configuration options for `Server` -#[derive(Debug, Default)] -pub struct ServerConfig { - pub wipe_catalog_on_error: bool, - - pub skip_replay_and_seek_instead: bool, -} - -/// `Server` is the container struct for how servers store data internally, as -/// well as how they communicate with other servers. Each server will have one -/// of these structs, which keeps track of all replication and query rules. -#[derive(Debug)] -pub struct Server { - /// Future that resolves when the background worker exits - join: Shared>>>, - - /// State shared with the background worker - shared: Arc, -} - -impl Drop for Server { - fn drop(&mut self) { - if !self.shared.shutdown.is_cancelled() { - warn!("server dropped without calling shutdown()"); - self.shared.shutdown.cancel(); - } - - if self.join.clone().now_or_never().is_none() { - warn!("server dropped without waiting for worker termination"); - } - } -} - -#[async_trait] -impl service_common::QueryDatabaseProvider for Server { - type Db = Db; - - async fn db(&self, name: &str) -> Option> { - DatabaseName::new(name) - .ok() - .and_then(|name| self.db(&name).ok()) - } -} - -/// A [`metric::Instrument`] that reports on the state of the server and its databases -/// -/// This internally stores a weak reference to a [`ServerShared`] and is designed -/// to potentially outlive the server itself. -/// -/// If multiple servers are created using the same metric registry, only the state -/// of the last server to be registered will be reported -#[derive(Debug, Clone, Default)] -struct ServerMetricReporter { - shared: Arc>>, -} - -impl ServerMetricReporter { - fn set_shared(&self, shared: &Arc) { - *self.shared.lock() = Arc::downgrade(shared); - } -} - -impl metric::Instrument for ServerMetricReporter { - fn report(&self, reporter: &mut dyn Reporter) { - let shared = match self.shared.lock().upgrade() { - Some(shared) => shared, - None => return, - }; - - reporter.start_metric("server_state", "IOx server status", MetricKind::U64Gauge); - - let (server_state, server_id) = { - let state = shared.state.read(); - (state.description(), state.server_id()) - }; - - let mut attributes = Attributes::from(&[("state", server_state)]); - if let Some(server_id) = server_id { - attributes.insert("server_id", server_id.to_string()) - } - - reporter.report_observation(&attributes, Observation::U64Gauge(1)); - reporter.finish_metric(); - - let databases: Vec<_> = { - let state = shared.state.read(); - match state.initialized() { - Ok(initialized) => initialized - .databases - .values() - .map(|x| (x.name(), x.state_code())) - .collect(), - Err(_) => vec![], - } - }; - - reporter.start_metric( - "database_state", - "IOx database status", - MetricKind::U64Gauge, - ); - - for (name, state) in databases { - reporter.report_observation( - &Attributes::from([ - ("name", name.to_string().into()), - ("state", state.description().into()), - ]), - Observation::U64Gauge(1), - ) - } - - reporter.finish_metric(); - } - - fn as_any(&self) -> &dyn Any { - self - } -} - -#[derive(Debug)] -struct ServerShared { - /// A token that is used to trigger shutdown of the background worker - shutdown: CancellationToken, - - /// Application-global state - application: Arc, - - /// The state of the `Server` - state: RwLock>, - - /// Notify that the database state has changed - state_notify: Notify, -} - -#[derive(Debug, Snafu)] -pub enum InitError { - #[snafu(display("error getting server config: {}", source))] - GetServerConfig { source: crate::config::Error }, - - #[snafu(display("invalid database name in server config: {}", source))] - InvalidDatabase { source: DatabaseNameError }, -} - -/// The stage of the server in the startup process -/// -/// The progression is linear Startup -> InitReady -> Initialized -/// -/// If an error is encountered trying to transition InitReady -> Initialized it enters -/// state InitError and the background worker will continue to try to advance to Initialized -/// -/// Any error encountered is exposed by Server::init_error() -/// -#[derive(Debug)] -enum ServerState { - /// Server has started but doesn't have a server id yet - Startup(ServerStateStartup), - - /// Server can be initialized - InitReady(ServerStateInitReady), - - /// Server encountered error initializing - InitError(ServerStateInitReady, Arc), - - /// Server has finish initializing - Initialized(ServerStateInitialized), -} - -impl ServerState { - fn initialized(&self) -> Result<&ServerStateInitialized> { - match self { - ServerState::Startup(_) => Err(Error::IdNotSet), - ServerState::InitReady(state) | ServerState::InitError(state, _) => { - Err(Error::ServerNotInitialized { - server_id: state.server_id, - }) - } - ServerState::Initialized(state) => Ok(state), - } - } - - fn server_id(&self) -> Option { - match self { - ServerState::Startup(_) => None, - ServerState::InitReady(state) => Some(state.server_id), - ServerState::InitError(state, _) => Some(state.server_id), - ServerState::Initialized(state) => Some(state.server_id), - } - } - - fn description(&self) -> &'static str { - match self { - ServerState::Startup(_) => "Startup", - ServerState::InitReady(_) => "InitReady", - ServerState::InitError(_, _) => "InitError", - ServerState::Initialized(_) => "Initialized", - } - } -} - -#[derive(Debug, Clone)] -struct ServerStateStartup { - wipe_catalog_on_error: bool, - skip_replay_and_seek_instead: bool, -} - -#[derive(Debug, Clone)] -struct ServerStateInitReady { - wipe_catalog_on_error: bool, - skip_replay_and_seek_instead: bool, - server_id: ServerId, -} - -impl ServerStateInitReady { - async fn advance(&self, shared: &ServerShared) -> Result { - let server_config = shared - .application - .config_provider() - .fetch_server_config(self.server_id) - .await - .context(GetServerConfigSnafu)?; - - let databases = server_config - .into_iter() - .map(|(name, database_uuid)| { - let database_name = DatabaseName::new(name).context(InvalidDatabaseSnafu)?; - - let database = Database::new( - Arc::clone(&shared.application), - DatabaseConfig { - name: database_name.clone(), - database_uuid, - server_id: self.server_id, - wipe_catalog_on_error: self.wipe_catalog_on_error, - skip_replay: self.skip_replay_and_seek_instead, - }, - ); - - Ok((database_name, Arc::new(database))) - }) - .collect::>()?; - - Ok(ServerStateInitialized { - server_id: self.server_id, - databases, - }) - } -} - -#[derive(Debug)] -struct ServerStateInitialized { - server_id: ServerId, - - /// A map of possibly initialized `Database` owned by this `Server` - databases: HashMap, Arc>, -} - -impl ServerStateInitialized { - /// Add a new database to the state - /// - /// Returns an error if an active database (either initialized or errored, but not deleted) - /// with the same name already exists - fn new_database( - &mut self, - shared: &ServerShared, - config: DatabaseConfig, - ) -> Result<&Arc> { - use hashbrown::hash_map::Entry; - - let db_name = config.name.clone(); - match self.databases.entry(db_name.clone()) { - Entry::Vacant(vacant) => Ok(vacant.insert(Arc::new(Database::new( - Arc::clone(&shared.application), - config, - )))), - Entry::Occupied(_) => DatabaseAlreadyExistsSnafu { - db_name: config.name, - } - .fail(), - } - } -} - -impl Server { - pub fn new(application: Arc, config: ServerConfig) -> Self { - let shared = Arc::new(ServerShared { - shutdown: Default::default(), - application: Arc::clone(&application), - state: RwLock::new(Freezable::new(ServerState::Startup(ServerStateStartup { - wipe_catalog_on_error: config.wipe_catalog_on_error, - skip_replay_and_seek_instead: config.skip_replay_and_seek_instead, - }))), - state_notify: Default::default(), - }); - - application - .metric_registry() - .register_instrument("server_metrics", ServerMetricReporter::default) - .set_shared(&shared); - - let handle = tokio::spawn(background_worker(Arc::clone(&shared))); - let join = handle.map_err(Arc::new).boxed().shared(); - - Self { shared, join } - } - - /// sets the id of the server, which is used for replication and the base - /// path in object storage. - /// - /// A valid server ID Must be non-zero. - pub fn set_id(&self, server_id: ServerId) -> Result<()> { - let mut state = self.shared.state.write(); - let startup = match &**state { - ServerState::Startup(startup) => startup.clone(), - state - if state - .server_id() - .map(|existing| existing == server_id) - .unwrap_or_default() => - { - // already set to same ID - return Ok(()); - } - _ => return Err(Error::IdAlreadySet), - }; - - *state.get_mut().expect("transaction in progress") = - ServerState::InitReady(ServerStateInitReady { - wipe_catalog_on_error: startup.wipe_catalog_on_error, - skip_replay_and_seek_instead: startup.skip_replay_and_seek_instead, - server_id, - }); - - Ok(()) - } - - /// Returns the server id for this server if set - pub fn server_id(&self) -> Option { - self.shared.state.read().server_id() - } - - /// Returns true if the server is initialized - /// - /// NB: not all databases may be initialized - pub fn initialized(&self) -> bool { - self.shared.state.read().initialized().is_ok() - } - - /// Triggers shutdown of this `Server` - pub fn shutdown(&self) { - info!("server shutting down"); - self.shared.shutdown.cancel() - } - - /// Waits for this `Server` background worker to exit - pub fn join(&self) -> impl Future>> { - self.join.clone() - } - - /// Returns Ok(()) when the Server is initialized, or the error - /// if one is encountered - pub async fn wait_for_init(&self) -> Result<(), Arc> { - loop { - // Register interest before checking to avoid race - let notify = self.shared.state_notify.notified(); - - // Note: this is not guaranteed to see non-terminal states - // as the state machine may advance past them between - // the notification being fired, and this task waking up - match &**self.shared.state.read() { - ServerState::InitError(_, e) => return Err(Arc::clone(e)), - ServerState::Initialized(_) => return Ok(()), - _ => {} - } - - notify.await; - } - } - - /// Error occurred during generic server init (e.g. listing store content). - pub fn server_init_error(&self) -> Option> { - match &**self.shared.state.read() { - ServerState::InitError(_, e) => Some(Arc::clone(e)), - _ => None, - } - } - - /// Returns a list of `Database` for this `Server` sorted by name - pub fn databases(&self) -> Result>> { - let state = self.shared.state.read(); - let initialized = state.initialized()?; - let mut databases: Vec<_> = initialized.databases.iter().collect(); - - // ensure the databases come back sorted by name - databases.sort_by_key(|(name, _db)| name.as_str()); - - let databases = databases - .into_iter() - .map(|(_name, db)| Arc::clone(db)) - .collect(); - - Ok(databases) - } - - /// Get the `Database` by name - pub fn database(&self, db_name: &DatabaseName<'_>) -> Result> { - let state = self.shared.state.read(); - let initialized = state.initialized()?; - let db = initialized - .databases - .get(db_name) - .context(DatabaseNotFoundSnafu { db_name })?; - - Ok(Arc::clone(db)) - } - - /// Returns an initialized `Db` by name - pub fn db(&self, db_name: &DatabaseName<'_>) -> Result> { - let database = self.database(db_name)?; - - database - .initialized_db() - .context(DatabaseNotInitializedSnafu { db_name }) - } - - /// Tells the server the set of rules for a database. - /// - /// Waits until the database has initialized or failed to do so - pub async fn create_database(&self, rules: ProvidedDatabaseRules) -> Result> { - let uuid = Uuid::new_v4(); - let db_name = rules.db_name().clone(); - - info!(%db_name, %uuid, "creating new database"); - - // Wait for exclusive access to mutate server state - let handle_fut = self.shared.state.read().freeze(); - let handle = handle_fut.await; - - let server_id = { - let state = self.shared.state.read(); - let initialized = state.initialized()?; - - ensure!( - !initialized.databases.contains_key(&db_name), - DatabaseAlreadyExistsSnafu { db_name } - ); - - initialized.server_id - }; - - create_empty_db_in_object_store( - Arc::clone(&self.shared.application), - uuid, - rules, - server_id, - ) - .await - .context(CannotCreateDatabaseSnafu)?; - - let database = { - let mut state = self.shared.state.write(); - - // Exchange FreezeHandle for mutable access via WriteGuard - let mut state = state.unfreeze(handle); - - let database = match &mut *state { - ServerState::Initialized(initialized) => initialized - .new_database( - &self.shared, - DatabaseConfig { - name: db_name, - server_id, - database_uuid: uuid, - wipe_catalog_on_error: false, - skip_replay: false, - }, - ) - .expect("database unique"), - _ => unreachable!(), - }; - Arc::clone(database) - }; - - // Save the database to the server config as soon as it's added to the `ServerState` - self.persist_server_config().await?; - - database.wait_for_init().await.context(DatabaseInitSnafu)?; - - Ok(database) - } - - /// Release an existing, active database with this name from this server. Return an error if no - /// active database with this name can be found. - pub async fn release_database( - &self, - db_name: &DatabaseName<'static>, - uuid: Option, - ) -> Result { - // Wait for exclusive access to mutate server state - let handle_fut = self.shared.state.read().freeze(); - let handle = handle_fut.await; - - let database = self.database(db_name)?; - let current = database.uuid(); - - // If a UUID has been specified, it has to match this database's UUID - // Should this check be here or in database.release? - if matches!(uuid, Some(specified) if specified != current) { - return UuidMismatchSnafu { - db_name: db_name.to_string(), - specified: uuid.unwrap(), - current, - } - .fail(); - } - - let returned_uuid = database - .release() - .await - .context(CannotReleaseDatabaseSnafu)?; - - { - let mut state = self.shared.state.write(); - - // Exchange FreezeHandle for mutable access via WriteGuard - let mut state = state.unfreeze(handle); - - match &mut *state { - ServerState::Initialized(initialized) => { - initialized.databases.remove(db_name); - } - _ => unreachable!(), - } - } - - self.persist_server_config().await?; - - Ok(returned_uuid) - } - - /// Claim a database that has been released. Return an error if: - /// - /// * No database with this UUID can be found - /// * There's already an active database with this name - /// * This database is already owned by this server - /// * This database is already owned by a different server (unless force is true) - pub async fn claim_database(&self, uuid: Uuid, force: bool) -> Result> { - // Wait for exclusive access to mutate server state - let handle_fut = self.shared.state.read().freeze(); - let handle = handle_fut.await; - - // Don't proceed without a server ID - let server_id = { - let state = self.shared.state.read(); - let initialized = state.initialized()?; - - initialized.server_id - }; - - // Read the database's rules from object storage to get the database name - let db_name = database_name_from_rules_file( - Arc::clone(&self.shared.application.object_store()), - uuid, - ) - .await - .map_err(|e| match e { - DatabaseNameFromRulesError::DatabaseRulesNotFound { .. } => { - Error::DatabaseUuidNotFound { uuid } - } - _ => Error::CouldNotGetDatabaseNameFromRules { source: e }, - })?; - - info!(%db_name, %uuid, "start restoring database"); - - // Check that this name is unique among currently active databases - if let Ok(existing_db) = self.database(&db_name) { - if existing_db.uuid() == uuid { - return DatabaseAlreadyOwnedByThisServerSnafu { uuid }.fail(); - } else { - return DatabaseAlreadyExistsSnafu { db_name }.fail(); - } - } - - // Mark the database as claimed in object storage and get its location for the server - // config file - claim_database_in_object_store( - Arc::clone(&self.shared.application), - &db_name, - uuid, - server_id, - force, - ) - .await - .context(CannotClaimDatabaseSnafu)?; - - let database = { - let mut state = self.shared.state.write(); - - // Exchange FreezeHandle for mutable access via WriteGuard - let mut state = state.unfreeze(handle); - - let database = match &mut *state { - ServerState::Initialized(initialized) => initialized - .new_database( - &self.shared, - DatabaseConfig { - name: db_name.clone(), - server_id, - database_uuid: uuid, - wipe_catalog_on_error: false, - skip_replay: false, - }, - ) - .expect("database unique"), - _ => unreachable!(), - }; - Arc::clone(database) - }; - - // Save the database to the server config as soon as it's added to the `ServerState` - self.persist_server_config().await?; - - database.wait_for_init().await.context(DatabaseInitSnafu)?; - - Ok(db_name) - } - - /// Write this server's databases out to the server config in object storage. - async fn persist_server_config(&self) -> Result<()> { - let (server_id, config) = { - let state = self.shared.state.read(); - let initialized = state.initialized()?; - - let config: Vec<_> = initialized - .databases - .iter() - .map(|(name, database)| (name.to_string(), database.uuid())) - .collect(); - - (initialized.server_id, config) - }; - - self.shared - .application - .config_provider() - .store_server_config(server_id, &config) - .await - .context(PersistServerConfigSnafu)?; - - Ok(()) - } - - /// Update database rules and save on success. - pub async fn update_db_rules( - &self, - rules: ProvidedDatabaseRules, - ) -> Result> { - let db_name = rules.db_name().clone(); - let database = self.database(&db_name)?; - - // attempt to save provided rules in the current state - database - .update_provided_rules(rules) - .await - .context(CanNotUpdateRulesSnafu { db_name }) - } - - /// Closes a chunk and starts moving its data to the read buffer, as a - /// background job, dropping when complete. - pub fn close_chunk( - &self, - db_name: &DatabaseName<'_>, - table_name: impl Into, - partition_key: impl Into, - chunk_id: ChunkId, - ) -> Result> { - let db = self.db(db_name)?; - let partition_key = partition_key.into(); - let table_name = table_name.into(); - - let partition = db - .lockable_partition(&table_name, &partition_key) - .context(PartitionNotFoundSnafu)?; - - let partition = partition.read(); - let chunk = - LockablePartition::chunk(&partition, chunk_id).ok_or_else(|| Error::ChunkNotFound { - chunk_id, - partition: partition_key.to_string(), - table: table_name.to_string(), - })?; - - let partition = partition.upgrade(); - let chunk = chunk.write(); - - LockablePartition::compact_chunks(partition, vec![chunk]).map_err(|e| { - Error::UnknownDatabaseError { - source: Box::new(e), - } - }) - } - - /// Recover database that has failed to load its catalog by wiping it - /// - /// The DB must exist in the server and have failed to load the catalog for this to work - /// This is done to prevent race conditions between DB jobs and this command - pub async fn wipe_preserved_catalog( - &self, - db_name: &DatabaseName<'static>, - ) -> Result> { - self.database(db_name)? - .wipe_preserved_catalog() - .await - .context(WipePreservedCatalogSnafu) - } -} - -/// Background worker function for the server -async fn background_worker(shared: Arc) { - info!("started server background worker"); - - let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(1)); - - // TODO: Move out of Server background worker - let job_registry = shared.application.job_registry(); - - while !shared.shutdown.is_cancelled() { - maybe_initialize_server(shared.as_ref()).await; - job_registry.reclaim(); - - db::utils::panic_test(|| { - let server_id = shared.state.read().initialized().ok()?.server_id; - Some(format!("server background worker: {}", server_id)) - }); - - tokio::select! { - _ = interval.tick() => {}, - _ = shared.shutdown.cancelled() => break - } - } - - info!("shutting down databases"); - let databases: Vec<_> = shared - .state - .read() - .initialized() - .into_iter() - .flat_map(|x| x.databases.values().cloned()) - .collect(); - - for database in databases { - database.shutdown(); - let _ = database - .join() - .await - .log_if_error("database background worker"); - } - - info!("draining tracker registry"); - - // Wait for any outstanding jobs to finish - frontend shutdown should be - // sequenced before shutting down the background workers and so there - // shouldn't be any - while job_registry.reclaim() != 0 { - interval.tick().await; - } - - info!("drained tracker registry"); -} - -/// Loads the database configurations based on the databases in the -/// object store. Any databases in the config already won't be -/// replaced. -/// -/// This requires the serverID to be set. -/// -/// It will be a no-op if the configs are already loaded and the server is ready. -/// -async fn maybe_initialize_server(shared: &ServerShared) { - if shared.state.read().initialized().is_ok() { - return; - } - - let (init_ready, handle) = { - let state = shared.state.read(); - - let init_ready = match &**state { - ServerState::Startup(_) => { - info!("server not initialized - ID not set"); - return; - } - ServerState::InitReady(state) => { - info!(server_id=%state.server_id, "server init ready"); - state.clone() - } - ServerState::InitError(state, e) => { - info!(server_id=%state.server_id, %e, "retrying server init"); - state.clone() - } - ServerState::Initialized(_) => return, - }; - - let handle = match state.try_freeze() { - Some(handle) => handle, - None => return, - }; - - (init_ready, handle) - }; - - let next_state = match init_ready.advance(shared).await { - Ok(initialized) => ServerState::Initialized(initialized), - Err(e) => { - error!(%e, "error attempting to initialize server"); - ServerState::InitError(init_ready, Arc::new(e)) - } - }; - - *shared.state.write().unfreeze(handle) = next_state; - shared.state_notify.notify_waiters(); -} - -#[cfg(test)] -impl Server { - /// For tests: list of database names in this server, regardless - /// of their initialization state - fn db_names_sorted(&self) -> Vec { - self.shared - .state - .read() - .initialized() - .map(|initialized| { - let mut keys: Vec<_> = initialized - .databases - .keys() - .map(ToString::to_string) - .collect(); - - keys.sort_unstable(); - keys - }) - .unwrap_or_default() - } -} - -#[derive(Snafu, Debug)] -pub enum DatabaseNameFromRulesError { - #[snafu(display( - "database rules for UUID {} not found at expected location `{}`", - uuid, - path - ))] - DatabaseRulesNotFound { uuid: Uuid, path: String }, - - #[snafu(display("error loading rules from object storage: {} ({:?})", source, source))] - CannotLoadRules { source: object_store::Error }, - - #[snafu(display("error deserializing database rules from protobuf: {}", source))] - CannotDeserializeRules { - source: generated_types::DecodeError, - }, - - #[snafu(display("error converting grpc to database rules: {}", source))] - ConvertingRules { source: FieldViolation }, -} - -async fn database_name_from_rules_file( - object_store: Arc, - uuid: Uuid, -) -> Result, DatabaseNameFromRulesError> { - let rules_bytes = IoxObjectStore::load_database_rules(object_store, uuid) - .await - .map_err(|e| match e { - object_store::Error::NotFound { path, .. } => { - DatabaseNameFromRulesError::DatabaseRulesNotFound { uuid, path } - } - other => DatabaseNameFromRulesError::CannotLoadRules { source: other }, - })?; - - let rules: PersistedDatabaseRules = - generated_types::database_rules::decode_persisted_database_rules(rules_bytes) - .context(CannotDeserializeRulesSnafu)? - .try_into() - .context(ConvertingRulesSnafu)?; - - Ok(rules.db_name().to_owned()) -} - -pub mod test_utils { - use super::*; - use object_store::ObjectStoreImpl; - - /// Create a new [`ApplicationState`] with an in-memory object store - pub fn make_application() -> Arc { - Arc::new(ApplicationState::new( - Arc::new(ObjectStoreImpl::new_in_memory()), - None, - None, - None, - )) - } - - /// Creates a new server with the provided [`ApplicationState`] - pub fn make_server(application: Arc) -> Arc { - Arc::new(Server::new(application, Default::default())) - } - - /// Creates a new server with the provided [`ApplicationState`] - /// - /// Sets the `server_id` provided and waits for it to initialize - pub async fn make_initialized_server( - server_id: ServerId, - application: Arc, - ) -> Arc { - let server = make_server(application); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - server - } -} - -#[cfg(test)] -mod tests { - use super::{ - test_utils::{make_application, make_server}, - *, - }; - use bytes::Bytes; - use data_types::{ - chunk_metadata::{ChunkAddr, ChunkStorage}, - database_rules::{DatabaseRules, LifecycleRules, PartitionTemplate, TemplatePart}, - write_buffer::WriteBufferConnection, - }; - use dml::DmlWrite; - use generated_types::influxdata::iox::management; - use iox_object_store::IoxObjectStore; - use mutable_batch_lp::lines_to_batches; - use object_store::{path::ObjectStorePath, DynObjectStore, ObjectStoreImpl}; - use parquet_catalog::{ - core::{PreservedCatalog, PreservedCatalogConfig}, - test_helpers::{load_ok, new_empty}, - }; - use std::num::NonZeroU32; - use std::{ - convert::TryFrom, - sync::Arc, - time::{Duration, Instant}, - }; - use test_helpers::{assert_contains, assert_error}; - - #[tokio::test] - async fn server_api_calls_return_error_with_no_id_set() { - let server = make_server(make_application()); - - let resp = server.db(&DatabaseName::new("foo").unwrap()).unwrap_err(); - assert!(matches!(resp, Error::IdNotSet)); - } - - async fn server_config_contents(object_store: &DynObjectStore, server_id: ServerId) -> Bytes { - IoxObjectStore::get_server_config_file(object_store, server_id) - .await - .unwrap_or_else(|_| Bytes::new()) - } - - async fn server_config( - object_store: &DynObjectStore, - server_id: ServerId, - ) -> management::v1::ServerConfig { - let server_config_contents = server_config_contents(object_store, server_id).await; - generated_types::server_config::decode_persisted_server_config(server_config_contents) - .unwrap() - } - - fn assert_config_contents( - config: &management::v1::ServerConfig, - expected: &[(&DatabaseName<'_>, String)], - ) { - assert_eq!(config.databases.len(), expected.len()); - - for entry in expected { - let (expected_name, expected_location) = entry; - let location = config - .databases - .get(expected_name.as_str()) - .unwrap_or_else(|| { - panic!( - "Could not find database named {} in server config", - expected_name - ) - }); - - assert_eq!(location, expected_location); - } - } - - #[tokio::test] - async fn create_database_persists_rules_owner_and_server_config() { - let application = make_application(); - let server = make_server(Arc::clone(&application)); - let server_id = ServerId::try_from(1).unwrap(); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - // assert server config file either doesn't exist or exists but has 0 entries - let server_config_contents = - server_config_contents(&*application.object_store(), server_id).await; - assert_eq!(server_config_contents.len(), 0); - - let name = DatabaseName::new("bananas").unwrap(); - - let rules = DatabaseRules { - name: name.clone(), - partition_template: PartitionTemplate { - parts: vec![TemplatePart::TimeFormat("YYYY-MM".to_string())], - }, - lifecycle_rules: LifecycleRules { - catalog_transactions_until_checkpoint: std::num::NonZeroU64::new(13).unwrap(), - ..Default::default() - }, - worker_cleanup_avg_sleep: Duration::from_secs(2), - write_buffer_connection: None, - }; - let provided_rules = make_provided_rules(rules); - - // Create a database - let bananas = server - .create_database(provided_rules.clone()) - .await - .expect("failed to create database"); - - let bananas_uuid = bananas.uuid(); - let read_rules = application - .config_provider() - .fetch_rules(bananas_uuid) - .await - .unwrap(); - - // Same rules that were provided are read - assert_eq!(provided_rules.original(), read_rules.original()); - - // rules that are being used are the same - assert_eq!(provided_rules.rules(), read_rules.rules()); - - // assert this database knows it's owned by this server - let owner_info = bananas.owner_info().unwrap(); - assert_eq!(owner_info.id, server_id.get_u32()); - assert_eq!( - owner_info.location, - IoxObjectStore::server_config_path(&*application.object_store(), server_id).to_string() - ); - - // assert server config file exists and has 1 entry - let config = server_config(&*application.object_store(), server_id).await; - assert_config_contents(&config, &[(&name, format!("dbs/{}/", bananas_uuid))]); - - let db2 = DatabaseName::new("db_awesome").unwrap(); - let rules2 = DatabaseRules::new(db2.clone()); - let provided_rules2 = make_provided_rules(rules2); - - let awesome = server - .create_database(provided_rules2) - .await - .expect("failed to create 2nd db"); - let awesome_uuid = awesome.uuid(); - - // assert server config file exists and has 2 entries - let config = server_config(&*application.object_store(), server_id).await; - assert_config_contents( - &config, - &[ - (&name, format!("dbs/{}/", bananas_uuid)), - (&db2, format!("dbs/{}/", awesome_uuid)), - ], - ); - - let server2 = make_server(Arc::clone(&application)); - server2.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server2.wait_for_init().await.unwrap(); - - let database1 = server2.database(&name).unwrap(); - let database2 = server2.database(&db2).unwrap(); - - database1.wait_for_init().await.unwrap(); - database2.wait_for_init().await.unwrap(); - - assert!(server2.db(&db2).is_ok()); - assert!(server2.db(&name).is_ok()); - - // assert server config file still exists and has 2 entries - let config = server_config(&*application.object_store(), server_id).await; - assert_config_contents( - &config, - &[ - (&name, format!("dbs/{}/", bananas_uuid)), - (&db2, format!("dbs/{}/", awesome_uuid)), - ], - ); - } - - #[tokio::test] - async fn duplicate_database_name_rejected() { - // Covers #643 - - let server = make_server(make_application()); - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server.wait_for_init().await.unwrap(); - - let name = DatabaseName::new("bananas").unwrap(); - - // Create a database - server - .create_database(default_rules(name.clone())) - .await - .expect("failed to create database"); - - // Then try and create another with the same name - let got = server - .create_database(default_rules(name.clone())) - .await - .unwrap_err(); - - if !matches!(got, Error::DatabaseAlreadyExists { .. }) { - panic!("expected already exists error"); - } - } - - async fn create_simple_database( - server: &Server, - name: impl Into + Send, - ) -> Result> { - let name = DatabaseName::new(name.into()).unwrap(); - - let rules = DatabaseRules { - name, - partition_template: PartitionTemplate { - parts: vec![TemplatePart::TimeFormat("YYYY-MM".to_string())], - }, - lifecycle_rules: Default::default(), - worker_cleanup_avg_sleep: Duration::from_secs(2), - write_buffer_connection: None, - }; - - // Create a database - server.create_database(make_provided_rules(rules)).await - } - - #[tokio::test] - async fn load_databases() { - let apples_name = DatabaseName::new("apples").unwrap(); - let bananas_name = DatabaseName::new("bananas").unwrap(); - - let application = make_application(); - - let server = make_server(Arc::clone(&application)); - let server_id = ServerId::try_from(1).unwrap(); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - let bananas = create_simple_database(&server, "bananas") - .await - .expect("failed to create database"); - let bananas_uuid = bananas.uuid(); - - assert!(bananas.is_initialized()); - - // Shutdown server - server.shutdown(); - server.join().await.unwrap(); - - assert!(bananas.is_shutdown()); - - let server = make_server(Arc::clone(&application)); - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server.wait_for_init().await.unwrap(); - - let bananas = server.database(&bananas_name).unwrap(); - bananas.wait_for_init().await.unwrap(); - - let apples = create_simple_database(&server, "apples") - .await - .expect("failed to create database"); - assert!(apples.is_initialized()); - - let apples_uuid = apples.uuid(); - - assert_eq!(server.db_names_sorted(), vec!["apples", "bananas"]); - - let bananas_object_store = bananas.iox_object_store(); - - // Shutdown server to demonstrate that the server shutdown - // causes the databases to shutdown - server.shutdown(); - server.join().await.unwrap(); - - assert!(apples.is_shutdown()); - assert!(bananas.is_shutdown()); - - // Delete rules so bananas fails to startup - bananas_object_store - .delete_database_rules_file() - .await - .expect("cannot delete rules file"); - - let server = make_server(Arc::clone(&application)); - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server.wait_for_init().await.unwrap(); - - assert_eq!(server.db_names_sorted(), vec!["apples", "bananas"]); - - // assert server config file has been recreated and contains 2 entries, even though - // the databases fail to initialize - let config = server_config(&*application.object_store(), server_id).await; - assert_config_contents( - &config, - &[ - (&apples.config().name, format!("dbs/{}/", apples_uuid)), - (&bananas.config().name, format!("dbs/{}/", bananas_uuid)), - ], - ); - - let apples = server.database(&apples_name).unwrap(); - let bananas = server.database(&bananas_name).unwrap(); - - apples.wait_for_init().await.unwrap(); - assert!(apples.init_error().is_none()); - - let err = bananas.wait_for_init().await.unwrap_err(); - assert_contains!(err.to_string(), "rules.pb not found"); - assert!(Arc::ptr_eq(&err, &bananas.init_error().unwrap())); - } - - #[tokio::test] - async fn old_server_config_object_store_path() { - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - let object_store = application.object_store(); - - // Server config used to be stored under /[server id]/config.pb. Construct a config in that - // old location that points to a database - let mut old_server_config_path = object_store.new_path(); - old_server_config_path.push_dir(&server_id.to_string()); - old_server_config_path.set_file_name("config.pb"); - - // Create database rules and database owner info for a database in object storage - let db_uuid = Uuid::new_v4(); - let db_name = DatabaseName::new("mydb").unwrap(); - let db_rules = DatabaseRules::new(db_name.clone()); - - let mut db_path = object_store.new_path(); - db_path.push_dir("dbs"); - db_path.push_dir(db_uuid.to_string()); - let mut db_rules_path = db_path.clone(); - db_rules_path.set_file_name("rules.pb"); - - let persisted_database_rules = management::v1::PersistedDatabaseRules { - uuid: db_uuid.as_bytes().to_vec(), - rules: Some(db_rules.into()), - }; - let mut encoded_rules = bytes::BytesMut::new(); - generated_types::database_rules::encode_persisted_database_rules( - &persisted_database_rules, - &mut encoded_rules, - ) - .unwrap(); - let encoded_rules = encoded_rules.freeze(); - object_store - .put(&db_rules_path, encoded_rules) - .await - .unwrap(); - - let mut db_owner_info_path = db_path.clone(); - db_owner_info_path.set_file_name("owner.pb"); - let owner_info = management::v1::OwnerInfo { - id: server_id.get_u32(), - location: old_server_config_path.to_string(), - transactions: vec![], - }; - let mut encoded_owner_info = bytes::BytesMut::new(); - generated_types::server_config::encode_database_owner_info( - &owner_info, - &mut encoded_owner_info, - ) - .unwrap(); - let encoded_owner_info = encoded_owner_info.freeze(); - object_store - .put(&db_owner_info_path, encoded_owner_info) - .await - .unwrap(); - - let config = management::v1::ServerConfig { - databases: [(db_name.to_string(), db_path.to_raw())] - .into_iter() - .collect(), - }; - let mut encoded_server_config = bytes::BytesMut::new(); - generated_types::server_config::encode_persisted_server_config( - &config, - &mut encoded_server_config, - ) - .unwrap(); - let encoded_server_config = encoded_server_config.freeze(); - object_store - .put(&old_server_config_path, encoded_server_config) - .await - .unwrap(); - - // Start up server - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - // Database should init - let database = server.database(&db_name).unwrap(); - database.wait_for_init().await.unwrap(); - - // Server config should be transitioned to the new location - let config = server_config(&*application.object_store(), server_id).await; - assert_config_contents(&config, &[(&db_name, format!("dbs/{}/", db_uuid))]); - } - - #[tokio::test] - async fn db_names_sorted() { - let server = make_server(make_application()); - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server.wait_for_init().await.unwrap(); - - let names = vec!["bar", "baz"]; - - for name in &names { - let name = DatabaseName::new(name.to_string()).unwrap(); - server - .create_database(default_rules(name)) - .await - .expect("failed to create database"); - } - - let db_names_sorted = server.db_names_sorted(); - assert_eq!(names, db_names_sorted); - } - - #[tokio::test] - async fn close_chunk() { - test_helpers::maybe_start_logging(); - let server = make_server(make_application()); - - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server.wait_for_init().await.unwrap(); - - let db_name = DatabaseName::new("foo").unwrap(); - server - .create_database(default_rules(db_name.clone())) - .await - .unwrap(); - - let tables = lines_to_batches("cpu bar=1 10", 0).unwrap(); - let db = server.db(&db_name).unwrap(); - let write = DmlWrite::new(db_name.as_str(), tables, Default::default()); - db.store_write(&write).unwrap(); - - // get chunk ID - let chunks = db.chunk_summaries(); - assert_eq!(chunks.len(), 1); - let chunk_id = chunks[0].id; - - // start the close (note this is not an async) - let chunk_addr = ChunkAddr { - db_name: Arc::from(db_name.as_str()), - table_name: Arc::from("cpu"), - partition_key: Arc::from(""), - chunk_id, - }; - let tracker = server - .close_chunk( - &db_name, - chunk_addr.table_name.as_ref(), - chunk_addr.partition_key.as_ref(), - chunk_addr.chunk_id, - ) - .unwrap(); - - let metadata = tracker.metadata(); - let expected_metadata = Job::CompactChunks { - partition: chunk_addr.clone().into_partition(), - chunks: vec![chunk_addr.chunk_id], - }; - assert_eq!(metadata, &expected_metadata); - - // wait for the job to complete - tracker.join().await; - - // Data should be in the read buffer and not in mutable buffer - let db_name = DatabaseName::new("foo").unwrap(); - let db = server.db(&db_name).unwrap(); - - let chunk_summaries = db.chunk_summaries(); - assert_eq!(chunk_summaries.len(), 1); - assert_eq!(chunk_summaries[0].storage, ChunkStorage::ReadBuffer); - } - - #[tokio::test] - async fn background_task_cleans_jobs() { - let application = make_application(); - let server = make_server(Arc::clone(&application)); - - let wait_nanos = 1000; - let job = application - .job_registry() - .spawn_dummy_job(vec![wait_nanos], None); - - job.join().await; - - assert!(job.is_complete()); - - server.shutdown(); - server.join().await.unwrap(); - } - - #[tokio::test] - async fn cannot_create_db_until_server_is_initialized() { - let server = make_server(make_application()); - - // calling before serverID set leads to `IdNotSet` - let err = create_simple_database(&server, "bananas") - .await - .unwrap_err(); - assert!(matches!(err, Error::IdNotSet)); - - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - // do NOT call `server.maybe_load_database_configs` so DBs are not loaded and server is not ready - - // calling with serverId but before loading is done leads to - let err = create_simple_database(&server, "bananas") - .await - .unwrap_err(); - assert!(matches!(err, Error::ServerNotInitialized { .. })); - } - - #[tokio::test] - async fn background_worker_eventually_inits_server() { - let server = make_server(make_application()); - - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - - let t_0 = Instant::now(); - loop { - if server.initialized() { - break; - } - assert!(t_0.elapsed() < Duration::from_secs(10)); - tokio::time::sleep(Duration::from_millis(100)).await; - } - } - - #[tokio::test] - async fn init_error_generic() { - // use an object store that will hopefully fail to read - let store = Arc::new(ObjectStoreImpl::new_failing_store().unwrap()); - let application = Arc::new(ApplicationState::new(store, None, None, None)); - let server = make_server(application); - - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - let err = server.wait_for_init().await.unwrap_err(); - assert!( - matches!(err.as_ref(), InitError::GetServerConfig { .. }), - "got: {:?}", - err - ); - assert_contains!( - server.server_init_error().unwrap().to_string(), - "error getting server config from object storage:" - ); - } - - #[tokio::test] - async fn init_error_database() { - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - let foo_db_name = DatabaseName::new("foo").unwrap(); - let bar_db_name = DatabaseName::new("bar").unwrap(); - let baz_db_name = DatabaseName::new("baz").unwrap(); - - // create database foo - create_simple_database(&server, "foo") - .await - .expect("failed to create database"); - - // create database bar so it gets written to the server config - let bar = create_simple_database(&server, "bar") - .await - .expect("failed to create database"); - - // make the db rules for bar invalid - let iox_object_store = bar.iox_object_store(); - - iox_object_store - .put_database_rules_file(Bytes::from("x")) - .await - .unwrap(); - iox_object_store.get_database_rules_file().await.unwrap(); - - // create database bar so it gets written to the server config - let baz = create_simple_database(&server, "baz") - .await - .expect("failed to create database"); - - // make the owner info for baz say it's owned by a different server - let baz_iox_object_store = baz.iox_object_store(); - let owner_info = management::v1::OwnerInfo { - id: 2, - location: "nodes/2/config.pb".to_string(), - transactions: vec![], - }; - let mut encoded = bytes::BytesMut::new(); - generated_types::server_config::encode_database_owner_info(&owner_info, &mut encoded) - .expect("owner info serialization should be valid"); - let encoded = encoded.freeze(); - - baz_iox_object_store.put_owner_file(encoded).await.unwrap(); - - // restart server - let server = make_server(application); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - // generic error MUST NOT be set - assert!(server.server_init_error().is_none()); - - // server is initialized - assert!(server.initialized()); - - // DB names contain all DBs - assert_eq!( - server.db_names_sorted(), - vec!["bar".to_string(), "baz".to_string(), "foo".to_string()] - ); - - let foo_database = server.database(&foo_db_name).unwrap(); - let bar_database = server.database(&bar_db_name).unwrap(); - let baz_database = server.database(&baz_db_name).unwrap(); - - foo_database.wait_for_init().await.unwrap(); - assert!(foo_database.init_error().is_none()); - - let err = bar_database.wait_for_init().await.unwrap_err(); - assert_contains!(err.to_string(), "error deserializing database rules"); - assert_contains!( - err.to_string(), - "failed to decode Protobuf message: invalid varint" - ); - assert!(Arc::ptr_eq(&err, &bar_database.init_error().unwrap())); - - let baz_err = baz_database.wait_for_init().await.unwrap_err(); - assert_contains!( - baz_err.to_string(), - "Server ID in the database's owner info file (2) does not match this server's ID (1)" - ); - - // can only write to successfully created DBs - let tables = lines_to_batches("cpu foo=1 10", 0).unwrap(); - let write = DmlWrite::new(foo_db_name.as_str(), tables, Default::default()); - server - .db(&foo_db_name) - .unwrap() - .store_write(&write) - .unwrap(); - - let err = server.db(&bar_db_name).unwrap_err(); - assert!(matches!(err, Error::DatabaseNotInitialized { .. })); - - // creating failed DBs does not work - let err = create_simple_database(&server, "bar").await.unwrap_err(); - assert!(matches!(err, Error::DatabaseAlreadyExists { .. })); - } - - #[tokio::test] - async fn init_without_uuid() { - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - let db_name = DatabaseName::new("foo").unwrap(); - - // Create database - create_simple_database(&server, &db_name) - .await - .expect("failed to create database"); - - // restart the server - std::mem::drop(server); - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - assert!(server.initialized()); - - // database should not be in an error state - let database = server.database(&db_name).unwrap(); - database.wait_for_init().await.unwrap(); - - // update the database's rules - let rules = DatabaseRules { - name: db_name.clone(), - partition_template: PartitionTemplate { - parts: vec![TemplatePart::TimeFormat("YYYY-MM".to_string())], - }, - lifecycle_rules: Default::default(), - worker_cleanup_avg_sleep: Duration::from_secs(2), - write_buffer_connection: Some(WriteBufferConnection { - type_: "mock".to_string(), - connection: "my_mock".to_string(), - ..Default::default() - }), - }; - - let provided_rules = make_provided_rules(rules); - - server.update_db_rules(provided_rules).await.unwrap(); - } - - #[tokio::test] - async fn release_database_removes_from_memory_and_persisted_config() { - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - - let foo_db_name = DatabaseName::new("foo").unwrap(); - - // start server - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - // create database - let foo = create_simple_database(&server, &foo_db_name).await.unwrap(); - let first_foo_uuid = foo.uuid(); - - // release database by name - let released_uuid = server.release_database(&foo_db_name, None).await.unwrap(); - assert_eq!(first_foo_uuid, released_uuid); - - assert_error!( - server.database(&foo_db_name), - Error::DatabaseNotFound { .. }, - ); - - let config = server_config(&*application.object_store(), server_id).await; - assert_config_contents(&config, &[]); - - // create another database - let foo = create_simple_database(&server, &foo_db_name).await.unwrap(); - let second_foo_uuid = foo.uuid(); - - // release database specifying UUID; error if UUID doesn't match - let incorrect_uuid = Uuid::new_v4(); - assert_error!( - server - .release_database(&foo_db_name, Some(incorrect_uuid)) - .await, - Error::UuidMismatch { .. } - ); - - // release database specifying UUID works if UUID *does* match - server - .release_database(&foo_db_name, Some(second_foo_uuid)) - .await - .unwrap(); - } - - #[tokio::test] - async fn cant_release_nonexistent_database() { - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - - let foo_db_name = DatabaseName::new("foo").unwrap(); - - // start server - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - assert_error!( - server.release_database(&foo_db_name, None).await, - Error::DatabaseNotFound { .. }, - ); - } - - #[tokio::test] - async fn claim_database_adds_to_memory_and_persisted_config() { - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - - let foo_db_name = DatabaseName::new("foo").unwrap(); - - // start server - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - // create database - create_simple_database(&server, &foo_db_name).await.unwrap(); - - // release database by name - let released_uuid = server.release_database(&foo_db_name, None).await.unwrap(); - - // claim database by UUID - server.claim_database(released_uuid, false).await.unwrap(); - - let claimed = server.database(&foo_db_name).unwrap(); - claimed.wait_for_init().await.unwrap(); - - let config = server_config(&*application.object_store(), server_id).await; - assert_config_contents( - &config, - &[(&foo_db_name, format!("dbs/{}/", released_uuid))], - ); - } - - #[tokio::test] - async fn cant_claim_nonexistent_database() { - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - - let invalid_uuid = Uuid::new_v4(); - - // start server - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - assert_error!( - server.claim_database(invalid_uuid, false).await, - Error::DatabaseUuidNotFound { .. }, - ); - } - - /// create servers (1 and 2) with a database on server 1 - async fn make_2_servers() -> (Arc, Arc, DatabaseName<'static>, Uuid) { - let application = make_application(); - let server_id1 = ServerId::try_from(1).unwrap(); - let server_id2 = ServerId::try_from(2).unwrap(); - let foo_db_name = DatabaseName::new("foo").unwrap(); - - // start server 1 - let server1 = make_server(Arc::clone(&application)); - server1.set_id(server_id1).unwrap(); - server1.wait_for_init().await.unwrap(); - - // create database owned by server 1 - let database = create_simple_database(&server1, &foo_db_name) - .await - .unwrap(); - let uuid = database.uuid(); - - // start server 2 - let server2 = make_server(Arc::clone(&application)); - server2.set_id(server_id2).unwrap(); - server2.wait_for_init().await.unwrap(); - - (server1, server2, foo_db_name, uuid) - } - - #[tokio::test] - async fn cant_claim_database_owned_by_another_server() { - let (server1, server2, db_name, db_uuid) = make_2_servers().await; - - // Attempting to claim on server 2 will fail - assert_error!( - server2.claim_database(db_uuid, false).await, - Error::CannotClaimDatabase { - source: database::init::InitError::CantClaimDatabaseCurrentlyOwned { server_id, .. } - } if server_id == server1.server_id().unwrap().get_u32() - ); - - // Have to release from server 1 first - server1.release_database(&db_name, None).await.unwrap(); - - // Then claiming on server 2 will work - server2.claim_database(db_uuid, false).await.unwrap(); - } - - #[tokio::test] - async fn can_force_claim_database_owned_by_another_server() { - let (server1, server2, _db_name, db_uuid) = make_2_servers().await; - - // shutdown server 1 - server1.shutdown(); - server1 - .join() - .await - .expect("Server successfully terminated"); - - // Attempting to claim on server 2 will fail - assert_error!( - server2.claim_database(db_uuid, false).await, - Error::CannotClaimDatabase { - source: database::init::InitError::CantClaimDatabaseCurrentlyOwned { server_id, .. } - } if server_id == server1.server_id().unwrap().get_u32() - ); - - // Then claiming on server 2 with `force=true` will work - server2.claim_database(db_uuid, true).await.unwrap(); - } - - #[tokio::test] - async fn wipe_preserved_catalog() { - // have the following DBs: - // 1. existing => cannot be wiped - // 2. non-existing => can be wiped, will not exist afterwards - // 3. existing one, but rules file is broken => can be wiped, will not exist afterwards - // 4. existing one, but catalog is broken => can be wiped, will exist afterwards - // 5. recently (during server lifecycle) created one => cannot be wiped - let db_name_existing = DatabaseName::new("db_existing").unwrap(); - - let db_name_non_existing = DatabaseName::new("db_non_existing").unwrap(); - let db_uuid_non_existing = Uuid::new_v4(); - - let db_name_rules_broken = DatabaseName::new("db_broken_rules").unwrap(); - let db_name_catalog_broken = DatabaseName::new("db_broken_catalog").unwrap(); - let db_name_created = DatabaseName::new("db_created").unwrap(); - - // setup - let application = make_application(); - let server_id = ServerId::try_from(1).unwrap(); - - // Create temporary server to create existing databases - let server = make_server(Arc::clone(&application)); - server.set_id(server_id).unwrap(); - server.wait_for_init().await.unwrap(); - - create_simple_database(&server, db_name_existing.clone()) - .await - .expect("failed to create database"); - - let rules_broken = create_simple_database(&server, db_name_rules_broken.clone()) - .await - .expect("failed to create database"); - - let catalog_broken = create_simple_database(&server, db_name_catalog_broken.clone()) - .await - .expect("failed to create database"); - - // tamper store to break one database - rules_broken - .iox_object_store() - .put_database_rules_file(Bytes::from("x")) - .await - .unwrap(); - - let config = PreservedCatalogConfig::new( - catalog_broken.iox_object_store(), - db_name_catalog_broken.to_string(), - Arc::clone(application.time_provider()), - ); - - let (preserved_catalog, _catalog) = load_ok(config).await.unwrap(); - - parquet_catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog).await; - drop(preserved_catalog); - - rules_broken - .iox_object_store() - .get_database_rules_file() - .await - .unwrap(); - - server.shutdown(); - server.join().await.unwrap(); - - // boot actual test server - let server = make_server(Arc::clone(&application)); - - // cannot wipe if server ID is not set - assert_eq!( - server - .wipe_preserved_catalog(&db_name_non_existing) - .await - .unwrap_err() - .to_string(), - "id not set" - ); - - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server.wait_for_init().await.unwrap(); - - let existing = server.database(&db_name_existing).unwrap(); - let catalog_broken = server.database(&db_name_catalog_broken).unwrap(); - let rules_broken = server.database(&db_name_rules_broken).unwrap(); - - // Wait for databases to finish startup - existing.wait_for_init().await.unwrap(); - - let err = catalog_broken.wait_for_init().await.unwrap_err(); - assert!(matches!( - err.as_ref(), - database::init::InitError::CatalogLoad { .. } - )); - - let err = rules_broken.wait_for_init().await.unwrap_err(); - assert_contains!(err.to_string(), "error deserializing database rules"); - - // 1. cannot wipe if DB exists - assert_eq!( - server - .wipe_preserved_catalog(&db_name_existing) - .await - .unwrap_err() - .to_string(), - "error wiping preserved catalog: database (db_existing) in invalid state (Initialized) \ - for transition (WipePreservedCatalog)" - ); - assert!(PreservedCatalog::exists(&existing.iox_object_store()) - .await - .unwrap()); - - // 2. cannot wipe non-existent DB - assert!(matches!( - server.database(&db_name_non_existing).unwrap_err(), - Error::DatabaseNotFound { .. } - )); - let non_existing_iox_object_store = Arc::new( - IoxObjectStore::create( - Arc::clone(&application.object_store()), - db_uuid_non_existing, - ) - .await - .unwrap(), - ); - - let config = PreservedCatalogConfig::new( - non_existing_iox_object_store, - db_name_non_existing.to_string(), - Arc::clone(application.time_provider()), - ); - new_empty(config).await; - - assert_eq!( - server - .wipe_preserved_catalog(&db_name_non_existing) - .await - .unwrap_err() - .to_string(), - "database not found: db_non_existing" - ); - - // 3. cannot wipe DB with broken rules file - assert!(server - .database(&db_name_rules_broken) - .unwrap() - .init_error() - .is_some()); - - assert_eq!( - server - .wipe_preserved_catalog(&db_name_rules_broken) - .await - .unwrap_err() - .to_string(), - "error wiping preserved catalog: database (db_broken_rules) in invalid state (RulesLoadError) \ - for transition (WipePreservedCatalog)" - ); - - // 4. wipe DB with broken catalog, this will bring the DB back to life - let database = server.database(&db_name_catalog_broken).unwrap(); - assert!(database.init_error().is_some()); - - let tracker = server - .wipe_preserved_catalog(&db_name_catalog_broken) - .await - .unwrap(); - - let metadata = tracker.metadata(); - let expected_metadata = Job::WipePreservedCatalog { - db_name: Arc::from(db_name_catalog_broken.as_str()), - }; - assert_eq!(metadata, &expected_metadata); - tracker.join().await; - - database.wait_for_init().await.unwrap(); - - assert!(PreservedCatalog::exists(&catalog_broken.iox_object_store()) - .await - .unwrap()); - assert!(database.init_error().is_none()); - - let db = server.db(&db_name_catalog_broken).unwrap(); - let tables = lines_to_batches("cpu bar=1 10", 0).unwrap(); - let write = DmlWrite::new(db_name_catalog_broken.as_str(), tables, Default::default()); - db.store_write(&write).unwrap(); - - // 5. cannot wipe if DB was just created - let created = server - .create_database(default_rules(db_name_created.clone())) - .await - .unwrap(); - - assert_eq!( - server - .wipe_preserved_catalog(&db_name_created) - .await - .unwrap_err() - .to_string(), - "error wiping preserved catalog: database (db_created) in invalid state (Initialized) \ - for transition (WipePreservedCatalog)" - ); - assert!(PreservedCatalog::exists(&created.iox_object_store()) - .await - .unwrap()); - } - - fn default_rules(db_name: DatabaseName<'static>) -> ProvidedDatabaseRules { - make_provided_rules(DatabaseRules::new(db_name)) - } - - /// Normally database rules are provided as grpc messages, but in - /// tests they are constructed from database rules structures - /// themselves. - fn make_provided_rules(rules: DatabaseRules) -> ProvidedDatabaseRules { - ProvidedDatabaseRules::new_rules(rules.into()) - .expect("Tests should create valid DatabaseRules") - } - - #[tokio::test] - async fn job_metrics() { - let application = make_application(); - let server = make_server(Arc::clone(&application)); - - let wait_nanos = 1000; - let job = application - .job_registry() - .spawn_dummy_job(vec![wait_nanos], Some(Arc::from("some_db"))); - - job.join().await; - - // need to force-update metrics - application.job_registry().reclaim(); - - let mut reporter = metric::RawReporter::default(); - application.metric_registry().report(&mut reporter); - - server.shutdown(); - server.join().await.unwrap(); - - // ========== influxdb_iox_job_count ========== - let metric = reporter.metric("influxdb_iox_job_count").unwrap(); - assert_eq!(metric.kind, metric::MetricKind::U64Gauge); - let observation = metric - .observation(&[ - ("description", "Dummy Job, for testing"), - ("status", "Success"), - ("db_name", "some_db"), - ]) - .unwrap(); - assert_eq!(observation, &metric::Observation::U64Gauge(1)); - - // ========== influxdb_iox_job_completed_cpu ========== - let metric = reporter.metric("influxdb_iox_job_completed_cpu").unwrap(); - assert_eq!(metric.kind, metric::MetricKind::DurationHistogram); - metric - .observation(&[ - ("description", "Dummy Job, for testing"), - ("status", "Success"), - ("db_name", "some_db"), - ]) - .unwrap(); - - // ========== influxdb_iox_job_completed_wall ========== - let metric = reporter.metric("influxdb_iox_job_completed_wall").unwrap(); - assert_eq!(metric.kind, metric::MetricKind::DurationHistogram); - metric - .observation(&[ - ("description", "Dummy Job, for testing"), - ("status", "Success"), - ("db_name", "some_db"), - ]) - .unwrap(); - } - - #[tokio::test] - async fn test_server_reporter() { - let application = make_application(); - let server = make_server(Arc::clone(&application)); - - let report = || { - let mut reporter = metric::RawReporter::default(); - application.metric_registry().report(&mut reporter); - reporter - }; - - let reporter = report(); - let server_state = reporter.metric("server_state").unwrap(); - assert_eq!(server_state.observations.len(), 1); - assert_eq!( - server_state.observations[0].0, - Attributes::from(&[("state", "Startup")]) - ); - - server - .set_id(ServerId::new(NonZeroU32::new(123).unwrap())) - .unwrap(); - - server.wait_for_init().await.unwrap(); - - // Should report ID with state - let reporter = report(); - let server_state = reporter.metric("server_state").unwrap(); - assert_eq!(server_state.observations.len(), 1); - assert_eq!( - server_state.observations[0].0, - Attributes::from(&[("state", "Initialized"), ("server_id", "123")]) - ); - - let database = create_simple_database(&server, "test_db").await.unwrap(); - let reporter = report(); - let db_state = reporter.metric("database_state").unwrap(); - assert_eq!(db_state.observations.len(), 1); - assert_eq!( - db_state.observations[0].0, - Attributes::from(&[("name", "test_db"), ("state", "Initialized")]) - ); - - database.shutdown(); - database.join().await.unwrap(); - - let reporter = report(); - let db_state = reporter.metric("database_state").unwrap(); - assert_eq!(db_state.observations.len(), 1); - assert_eq!( - db_state.observations[0].0, - Attributes::from(&[("name", "test_db"), ("state", "Shutdown")]) - ); - } - - #[tokio::test] - async fn set_server_id_twice() { - test_helpers::maybe_start_logging(); - let server = make_server(make_application()); - - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - server.wait_for_init().await.unwrap(); - - server.set_id(ServerId::try_from(1).unwrap()).unwrap(); - - assert_error!( - server.set_id(ServerId::try_from(2).unwrap()), - Error::IdAlreadySet - ); - } -} diff --git a/server/src/rules.rs b/server/src/rules.rs deleted file mode 100644 index 522fb7d798..0000000000 --- a/server/src/rules.rs +++ /dev/null @@ -1,134 +0,0 @@ -use data_types::{database_rules::DatabaseRules, DatabaseName}; -use generated_types::{ - google::{FieldViolation, FieldViolationExt}, - influxdata::iox::management, -}; -use std::{ - convert::{TryFrom, TryInto}, - sync::Arc, -}; -use uuid::Uuid; - -/// The configuration ([`DatabaseRules`]) used to create and update -/// databases, both in original and "materialized" (with defaults filled in) form. -/// -/// The rationale for storing both the rules as they were provided -/// *and* materialized form is provide the property that if the same -/// rules are sent to a database that were previously sent the -/// database will still be runing the same configuration. If the -/// materialized configuration was stored, and then the defaults were -/// changed in a new version of the software, the required property -/// would not hold. -/// -/// While this may sound like an esoteric corner case with little real -/// world impact, it has non trivial real world implications for -/// keeping the configurations of fleets of IOx servers in sync. See -/// for -/// further gory details. -/// -/// A design goal is to keep the notion of "what user provided" as -/// isolated as much as possible so only the server crate worries -/// about what the user actually provided and the rest of the system -/// can use `data_types::database_rules::PersistedDatabaseRules` in -/// blissful ignorance of such subtlties -#[derive(Debug, Clone)] -pub struct ProvidedDatabaseRules { - /// Full database rules, with all fields set. Derived from - /// `original` by applying default values. - full: Arc, - - /// Encoded database rules, as provided by the user and as stored - /// in the object store (may not have all fields set). - original: management::v1::DatabaseRules, -} - -impl ProvidedDatabaseRules { - // Create a new database with a default database - pub fn new_empty(db_name: DatabaseName<'static>) -> Self { - let original = management::v1::DatabaseRules { - name: db_name.to_string(), - ..Default::default() - }; - - // Should always be able to create a DBRules with default values - let full = Arc::new(original.clone().try_into().expect("creating empty rules")); - - Self { full, original } - } - - pub fn new_rules(original: management::v1::DatabaseRules) -> Result { - let full = Arc::new(original.clone().try_into()?); - - Ok(Self { full, original }) - } - - /// returns the name of the database in the rules - pub fn db_name(&self) -> &DatabaseName<'static> { - &self.full.name - } - - /// Return the full database rules - pub fn rules(&self) -> &Arc { - &self.full - } - - /// Return the original rules provided to this - pub fn original(&self) -> &management::v1::DatabaseRules { - &self.original - } -} - -#[derive(Debug, Clone)] -pub struct PersistedDatabaseRules { - uuid: Uuid, - provided: ProvidedDatabaseRules, -} - -impl PersistedDatabaseRules { - pub fn new(uuid: Uuid, provided: ProvidedDatabaseRules) -> Self { - Self { uuid, provided } - } - - pub fn uuid(&self) -> Uuid { - self.uuid - } - - pub fn db_name(&self) -> &DatabaseName<'static> { - &self.provided.full.name - } - - /// Return the full database rules - pub fn rules(&self) -> &Arc { - &self.provided.full - } - - /// Return the original rules provided to this - pub fn original(&self) -> &management::v1::DatabaseRules { - &self.provided.original - } - - /// Convert to its inner representation - pub fn into_inner(self) -> (Uuid, ProvidedDatabaseRules) { - (self.uuid, self.provided) - } -} - -impl TryFrom for PersistedDatabaseRules { - type Error = FieldViolation; - - /// Create a new PersistedDatabaseRules from a grpc message - fn try_from(proto: management::v1::PersistedDatabaseRules) -> Result { - let original: management::v1::DatabaseRules = proto - .rules - .ok_or_else(|| FieldViolation::required("rules"))?; - - let full = Arc::new(original.clone().try_into()?); - - let uuid = Uuid::from_slice(&proto.uuid).scope("uuid")?; - - Ok(Self { - uuid, - provided: ProvidedDatabaseRules { full, original }, - }) - } -} diff --git a/server/tests/delete.rs b/server/tests/delete.rs deleted file mode 100644 index 2db805b0b4..0000000000 --- a/server/tests/delete.rs +++ /dev/null @@ -1,255 +0,0 @@ -use arrow_util::assert_batches_sorted_eq; -use data_types::{ - chunk_metadata::ChunkStorage, - database_rules::{DatabaseRules, LifecycleRules, PartitionTemplate, TemplatePart}, - delete_predicate::{DeleteExpr, DeletePredicate}, - server_id::ServerId, - timestamp::TimestampRange, - DatabaseName, -}; -use db::{ - test_helpers::{run_query, write_lp}, - Db, -}; -use futures::TryStreamExt; -use query::{QueryChunk, QueryDatabase}; -use server::{ - rules::ProvidedDatabaseRules, - test_utils::{make_application, make_initialized_server}, -}; -use std::{ - num::{NonZeroU32, NonZeroU64}, - sync::Arc, - time::{Duration, Instant}, -}; -use test_helpers::maybe_start_logging; - -#[tokio::test] -async fn delete_predicate_preservation() { - maybe_start_logging(); - - // ==================== setup ==================== - let server_id = ServerId::new(NonZeroU32::new(1).unwrap()); - let db_name = DatabaseName::new("delete_predicate_preservation_test").unwrap(); - - let application = make_application(); - let server = make_initialized_server(server_id, Arc::clone(&application)).await; - - // Test that delete predicates are stored within the preserved catalog - - // ==================== do: create DB ==================== - // Create a DB given a server id, an object store and a db name - - let rules = DatabaseRules { - partition_template: PartitionTemplate { - parts: vec![TemplatePart::Column("part".to_string())], - }, - lifecycle_rules: LifecycleRules { - catalog_transactions_until_checkpoint: NonZeroU64::new(1).unwrap(), - // do not prune transactions files because this tests relies on them - catalog_transaction_prune_age: Duration::from_secs(1_000), - late_arrive_window_seconds: NonZeroU32::new(1).unwrap(), - ..Default::default() - }, - ..DatabaseRules::new(db_name.clone()) - }; - - let database = server - .create_database(ProvidedDatabaseRules::new_rules(rules.clone().into()).unwrap()) - .await - .unwrap(); - let db = database.initialized_db().unwrap(); - - // ==================== do: create chunks ==================== - let table_name = "cpu"; - - // 1: preserved - let partition_key = "part_a"; - write_lp(&db, "cpu,part=a row=10,selector=0i 10"); - write_lp(&db, "cpu,part=a row=11,selector=1i 11"); - db.persist_partition(table_name, partition_key, true) - .await - .unwrap(); - - // 2: RUB - let partition_key = "part_b"; - write_lp(&db, "cpu,part=b row=20,selector=0i 20"); - write_lp(&db, "cpu,part=b row=21,selector=1i 21"); - db.compact_partition(table_name, partition_key) - .await - .unwrap(); - - // 3: MUB - let _partition_key = "part_c"; - write_lp(&db, "cpu,part=c row=30,selector=0i 30"); - write_lp(&db, "cpu,part=c row=31,selector=1i 31"); - - // 4: preserved and unloaded - let partition_key = "part_d"; - write_lp(&db, "cpu,part=d row=40,selector=0i 40"); - write_lp(&db, "cpu,part=d row=41,selector=1i 41"); - - let chunk_id = db - .persist_partition(table_name, partition_key, true) - .await - .unwrap() - .unwrap() - .id(); - - db.unload_read_buffer(table_name, partition_key, chunk_id) - .unwrap(); - - // ==================== do: delete ==================== - let pred = Arc::new(DeletePredicate { - range: TimestampRange::new(0, 1_000), - exprs: vec![DeleteExpr::new( - "selector".to_string(), - data_types::delete_predicate::Op::Eq, - data_types::delete_predicate::Scalar::I64(1), - )], - }); - db.delete("cpu", Arc::clone(&pred)).unwrap(); - - // ==================== do: preserve another partition ==================== - let partition_key = "part_b"; - db.persist_partition(table_name, partition_key, true) - .await - .unwrap(); - - // ==================== do: use background worker for a short while ==================== - let iters_start = db.worker_iterations_delete_predicate_preservation(); - // time_provider.inc(rules.lifecycle_rules.late_arrive_window()); - - let t_0 = Instant::now(); - loop { - let did_delete_predicate_preservation = - db.worker_iterations_delete_predicate_preservation() > iters_start; - let did_compaction = db.chunk_summaries().into_iter().any(|summary| { - (summary.partition_key.as_ref() == "part_c") - && (summary.storage == ChunkStorage::ReadBuffer) - }); - if did_delete_predicate_preservation && did_compaction { - break; - } - assert!(t_0.elapsed() < Duration::from_secs(10)); - tokio::time::sleep(Duration::from_millis(100)).await; - } - - // ==================== check: delete predicates ==================== - - let closure_check_delete_predicates = |db: &Arc| { - let db = Arc::clone(db); - let pred = pred.clone(); - - async move { - let chunks = db - .chunks(table_name, &Default::default()) - .await - .expect("error getting chunks"); - for chunk in chunks { - let addr = chunk.addr(); - let partition_key = addr.partition_key.as_ref(); - if partition_key == "part_b" { - // Strictly speaking not required because the chunk was persisted AFTER the delete predicate was - // registered so we can get away with materializing it during persistence. - continue; - } - if partition_key == "part_c" { - // This partition was compacted, so the delete predicates were materialized. - continue; - } - let predicates = chunk.delete_predicates(); - assert_eq!(predicates.len(), 1); - assert_eq!(predicates[0].as_ref(), pred.as_ref()); - } - } - }; - closure_check_delete_predicates(&db).await; - - // ==================== check: query ==================== - let expected = vec![ - "+------+-----+----------+--------------------------------+", - "| part | row | selector | time |", - "+------+-----+----------+--------------------------------+", - "| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |", - "| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |", - "| c | 30 | 0 | 1970-01-01T00:00:00.000000030Z |", - "| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |", - "+------+-----+----------+--------------------------------+", - ]; - let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await; - assert_batches_sorted_eq!(&expected, &batches); - - // ==================== do: re-load DB ==================== - // Re-create database with same store, serverID, and DB name - database.restart().await.unwrap(); - let db = database.initialized_db().unwrap(); - - // ==================== check: delete predicates ==================== - closure_check_delete_predicates(&db).await; - - // ==================== check: query ==================== - // NOTE: partition "c" is gone here because it was not written to object store - let expected = vec![ - "+------+-----+----------+--------------------------------+", - "| part | row | selector | time |", - "+------+-----+----------+--------------------------------+", - "| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |", - "| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |", - "| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |", - "+------+-----+----------+--------------------------------+", - ]; - - let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await; - assert_batches_sorted_eq!(&expected, &batches); - - database.restart().await.unwrap(); - - // ==================== do: remove checkpoint files ==================== - let iox_object_store = database.iox_object_store(); - - let files = iox_object_store - .catalog_transaction_files() - .await - .unwrap() - .try_concat() - .await - .unwrap(); - - let mut deleted_one = false; - for file in files { - if file.is_checkpoint() { - iox_object_store - .delete_catalog_transaction_file(&file) - .await - .unwrap(); - deleted_one = true; - } - } - assert!(deleted_one); - - // ==================== do: re-load DB ==================== - // Re-create database with same store, serverID, and DB name - database.restart().await.unwrap(); - let db = database.initialized_db().unwrap(); - - // ==================== check: delete predicates ==================== - closure_check_delete_predicates(&db).await; - - // ==================== check: query ==================== - // NOTE: partition "c" is gone here because it was not written to object store - let _expected = vec![ - "+------+-----+----------+--------------------------------+", - "| part | row | selector | time |", - "+------+-----+----------+--------------------------------+", - "| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |", - "| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |", - "| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |", - "+------+-----+----------+--------------------------------+", - ]; - let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await; - assert_batches_sorted_eq!(&expected, &batches); - - server.shutdown(); - server.join().await.unwrap(); -} diff --git a/server/tests/write_buffer_delete.rs b/server/tests/write_buffer_delete.rs deleted file mode 100644 index 6d66f2fd6b..0000000000 --- a/server/tests/write_buffer_delete.rs +++ /dev/null @@ -1,230 +0,0 @@ -use arrow_util::assert_batches_eq; -use data_types::{ - delete_predicate::{DeleteExpr, DeletePredicate, Op, Scalar}, - router::{ - Matcher, MatcherToShard, QuerySinks, Router as RouterConfig, ShardConfig, ShardId, - WriteSink, WriteSinkSet, WriteSinkVariant, - }, - server_id::ServerId, - timestamp::TimestampRange, - DatabaseName, -}; -use db::{test_helpers::wait_for_tables, Db}; -use dml::{DmlDelete, DmlOperation, DmlWrite}; -use generated_types::influxdata::iox::{ - management::v1::DatabaseRules, write_buffer::v1::WriteBufferConnection, -}; -use mutable_batch_lp::lines_to_batches; -use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner}; -use regex::Regex; -use router::{router::Router, server::RouterServer}; -use server::{ - rules::ProvidedDatabaseRules, - test_utils::{make_application, make_initialized_server}, - Server, -}; -use std::{collections::BTreeMap, num::NonZeroU32, sync::Arc}; -use write_buffer::mock::MockBufferSharedState; - -/// A distributed IOx topology consisting of a router and a database, separated by a write buffer -/// -/// There is some overlap with `ReplayTest` in `server` and `ServerFixture` in the end-to-end -/// tests. The former is primarily concerned with the interaction of replay and persistence, -/// whilst the latter is concerned with the behaviour of the process as a whole. -/// -/// `DistributedTest` sits somewhere in the middle, it is not concerned with the details of -/// persistence or replay, but is still at a low enough level that it can manipulate the server -/// APIs directly and is not restricted to what is exposed over gRPC. -/// -/// It primarily exists to test the routing logic. -/// -struct DistributedTest { - router: Arc, - - consumer: Arc, - consumer_db: Arc, -} - -impl DistributedTest { - /// Create a new DistributedTest - pub async fn new(db_name: &DatabaseName<'static>) -> Self { - let write_buffer_state = - MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::new(1).unwrap()); - - let application = make_application(); - application - .write_buffer_factory() - .register_mock("my_mock".to_string(), write_buffer_state); - - let write_buffer_connection = WriteBufferConnection { - r#type: "mock".to_string(), - connection: "my_mock".to_string(), - connection_config: Default::default(), - creation_config: None, - }; - - // Create a router - let router_server = RouterServer::for_testing( - None, - None, - Arc::clone(application.time_provider()), - Some(Arc::clone(application.write_buffer_factory())), - ) - .await; - let router_id = ServerId::new(NonZeroU32::new(1).unwrap()); - router_server.set_server_id(router_id).unwrap(); - - router_server.update_router(RouterConfig { - name: db_name.to_string(), - write_sharder: ShardConfig { - specific_targets: vec![MatcherToShard { - matcher: Matcher { - table_name_regex: Some(Regex::new(".*").unwrap()), - }, - shard: ShardId::new(1), - }], - hash_ring: None, - }, - write_sinks: BTreeMap::from([( - ShardId::new(1), - WriteSinkSet { - sinks: vec![WriteSink { - sink: WriteSinkVariant::WriteBuffer( - write_buffer_connection.clone().try_into().unwrap(), - ), - ignore_errors: false, - }], - }, - )]), - query_sinks: QuerySinks::default(), - }); - let router = router_server.router(db_name).unwrap(); - - // Create a consumer - let consumer_id = ServerId::new(NonZeroU32::new(2).unwrap()); - let consumer = make_initialized_server(consumer_id, Arc::clone(&application)).await; - - let consumer_db = consumer - .create_database( - ProvidedDatabaseRules::new_rules(DatabaseRules { - name: db_name.to_string(), - write_buffer_connection: Some(write_buffer_connection.clone()), - ..Default::default() - }) - .unwrap(), - ) - .await - .unwrap() - .initialized_db() - .unwrap(); - - Self { - router, - consumer, - consumer_db, - } - } - - /// Wait for the consumer to have the following tables - pub async fn wait_for_tables(&self, expected_tables: &[&str]) { - wait_for_tables(&self.consumer_db, expected_tables).await - } - - /// Write line protocol - pub async fn write(&self, lp: &str) { - self.router - .write(DmlOperation::Write(DmlWrite::new( - self.consumer_db.name().as_ref(), - lines_to_batches(lp, 0).unwrap(), - Default::default(), - ))) - .await - .unwrap(); - } - - pub async fn delete(&self, delete: DmlDelete) { - // TODO: Write to router not Db (#2980) - self.router - .write(DmlOperation::Delete(delete)) - .await - .unwrap(); - } - - /// Perform a query and assert the result - pub async fn query(&self, query: &str, expected: &[&'static str]) { - let ctx = self.consumer_db.new_query_context(None); - let physical_plan = SqlQueryPlanner::new().query(query, &ctx).await.unwrap(); - - let batches = ctx.collect(physical_plan).await.unwrap(); - - assert_batches_eq!(expected, &batches); - } - - /// Shuts down the fixture and waits for the servers to exit - pub async fn drain(&self) { - self.consumer.shutdown(); - self.consumer.join().await.unwrap(); - } -} - -#[tokio::test] -async fn write_buffer_deletes() { - let db_name = DatabaseName::new("distributed").unwrap(); - let fixture = DistributedTest::new(&db_name).await; - - // Write some data - fixture.write("foo x=1 1").await; - fixture.write("foo x=3 2").await; - - // Send a delete over the write buffer - fixture - .delete(DmlDelete::new( - db_name.as_str(), - DeletePredicate { - range: TimestampRange::new(0, 20), - exprs: vec![DeleteExpr { - column: "x".to_string(), - op: Op::Eq, - scalar: Scalar::I64(1), - }], - }, - None, - Default::default(), - )) - .await; - - // Use a write to a different table to signal consumption has completed by waiting - // for the this new table to exist in the consumer database - fixture.write("bar x=2 1").await; - - // Wait for consumer to catch up - fixture.wait_for_tables(&["bar", "foo"]).await; - - fixture - .query( - "select * from foo;", - &[ - "+--------------------------------+---+", - "| time | x |", - "+--------------------------------+---+", - "| 1970-01-01T00:00:00.000000002Z | 3 |", - "+--------------------------------+---+", - ], - ) - .await; - - fixture - .query( - "select * from bar;", - &[ - "+--------------------------------+---+", - "| time | x |", - "+--------------------------------+---+", - "| 1970-01-01T00:00:00.000000001Z | 2 |", - "+--------------------------------+---+", - ], - ) - .await; - - fixture.drain().await; -} diff --git a/server/tests/write_buffer_lifecycle.rs b/server/tests/write_buffer_lifecycle.rs deleted file mode 100644 index 80b4193039..0000000000 --- a/server/tests/write_buffer_lifecycle.rs +++ /dev/null @@ -1,274 +0,0 @@ -use arrow_util::assert_batches_eq; -use data_types::{ - chunk_metadata::ChunkStorage, - database_rules::{DatabaseRules, LifecycleRules, PartitionTemplate, TemplatePart}, - sequence::Sequence, - server_id::ServerId, - write_buffer::WriteBufferConnection, - DatabaseName, -}; -use db::{ - test_helpers::{run_query, wait_for_tables}, - Db, -}; -use futures_util::FutureExt; -use server::{ - rules::ProvidedDatabaseRules, - test_utils::{make_application, make_initialized_server}, -}; -use std::{ - num::{NonZeroU32, NonZeroUsize}, - sync::Arc, - time::{Duration, Instant}, -}; -use test_helpers::{assert_contains, tracing::TracingCapture}; -use write_buffer::mock::MockBufferSharedState; - -#[tokio::test] -async fn write_buffer_lifecycle() { - // Test the interaction between the write buffer and the lifecycle - - let tracing_capture = TracingCapture::new(); - - // ==================== setup ==================== - let server_id = ServerId::new(NonZeroU32::new(1).unwrap()); - let db_name = DatabaseName::new("delete_predicate_preservation_test").unwrap(); - - let application = make_application(); - - let mock_shared_state = - MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::new(1).unwrap()); - - // The writes are split into two groups to allow "pausing replay" by playing back from - // a MockBufferSharedState with only the first set of writes - write_group1(&mock_shared_state); - write_group2(&mock_shared_state); - - application - .write_buffer_factory() - .register_mock("my_mock".to_string(), mock_shared_state.clone()); - - let server = make_initialized_server(server_id, Arc::clone(&application)).await; - - let partition_template = PartitionTemplate { - parts: vec![TemplatePart::Column("tag_partition_by".to_string())], - }; - - let write_buffer_connection = WriteBufferConnection { - type_: "mock".to_string(), - connection: "my_mock".to_string(), - ..Default::default() - }; - - // - // Phase 1: Verify that consuming from a write buffer will wait for compaction in the event - // the hard limit is exceeded - // - - // create DB - let rules = DatabaseRules { - partition_template: partition_template.clone(), - lifecycle_rules: LifecycleRules { - buffer_size_hard: Some(NonZeroUsize::new(10_000).unwrap()), - mub_row_threshold: NonZeroUsize::new(10).unwrap(), - ..Default::default() - }, - write_buffer_connection: Some(write_buffer_connection.clone()), - ..DatabaseRules::new(db_name.clone()) - }; - - let database = server - .create_database(ProvidedDatabaseRules::new_rules(rules.into()).unwrap()) - .await - .unwrap(); - - let db = database.initialized_db().unwrap(); - - // after a while the table should exist - wait_for_tables(&db, &["table_1", "table_2"]).await; - - // no rows should be dropped - let batches = run_query(Arc::clone(&db), "select sum(bar) as n from table_1").await; - let expected = vec!["+----+", "| n |", "+----+", "| 25 |", "+----+"]; - assert_batches_eq!(expected, &batches); - - // check that hard buffer limit was actually hit (otherwise this test is pointless/outdated) - assert_contains!( - tracing_capture.to_string(), - "Hard limit reached while reading from write buffer, waiting for compaction to catch up" - ); - - // Persist the final write, this will ensure that we have to replay the data for table_1 - db.persist_partition("table_2", "tag_partition_by_a", true) - .await - .unwrap(); - - // Only table_2 should be persisted - assert_eq!(count_persisted_chunks(&db), 1); - - // Shutdown server - server.shutdown(); - server.join().await.unwrap(); - - // Drop so they don't contribute to metrics - std::mem::drop(server); - std::mem::drop(database); - std::mem::drop(db); - std::mem::drop(tracing_capture); - - // - // Phase 2: Verify that replaying from a write buffer will wait for compaction in the event - // the hard limit is exceeded - // - - // Recreate server - let tracing_capture = TracingCapture::new(); - - let server = make_initialized_server(server_id, Arc::clone(&application)).await; - let databases = server.databases().unwrap(); - assert_eq!(databases.len(), 1); - - let database = databases.into_iter().next().unwrap(); - database.wait_for_init().await.unwrap(); - let database_uuid = database.uuid(); - - let db = database.initialized_db().unwrap(); - let batches = run_query(Arc::clone(&db), "select sum(bar) as n from table_1").await; - let expected = vec!["+----+", "| n |", "+----+", "| 25 |", "+----+"]; - assert_batches_eq!(expected, &batches); - - assert_contains!( - tracing_capture.to_string(), - "Hard limit reached while replaying, waiting for compaction to catch up" - ); - - // Only table_2 should be persisted - assert_eq!(count_persisted_chunks(&db), 1); - - server.shutdown(); - server.join().await.unwrap(); - - // - // Phase 3: Verify that persistence is disabled during replay - // - - // Override rules to set persist row threshold lower on restart - let rules = ProvidedDatabaseRules::new_rules( - DatabaseRules { - partition_template, - lifecycle_rules: LifecycleRules { - persist: true, - late_arrive_window_seconds: NonZeroU32::new(1).unwrap(), - persist_row_threshold: NonZeroUsize::new(5).unwrap(), - ..Default::default() - }, - write_buffer_connection: Some(write_buffer_connection), - ..DatabaseRules::new(db_name.clone()) - } - .into(), - ) - .unwrap(); - - application - .config_provider() - .store_rules(database_uuid, &rules) - .await - .unwrap(); - - std::mem::drop(server); - std::mem::drop(database); - std::mem::drop(db); - std::mem::drop(tracing_capture); - - // Clear the write buffer and only write in the first group of writes - mock_shared_state.clear_messages(0); - write_group1(&mock_shared_state); - - // Restart server - this will load new rules written above - let server = make_initialized_server(server_id, Arc::clone(&application)).await; - let databases = server.databases().unwrap(); - assert_eq!(databases.len(), 1); - let database = databases.into_iter().next().unwrap(); - - // Sleep for a bit to allow the lifecycle policy to run a bit - // - // During this time replay should still be running as there is insufficient - // writes within the write buffer to "complete" replay. - // - // However, there are sufficient rows to exceed the persist row threshold. - // - // Therefore, if persist were not disabled by replay, the lifecycle would try - // to persist without the full set of writes. This would in turn result - // in two separate chunks being persisted for table_1 - tokio::time::sleep(Duration::from_secs(1)).await; - - assert!( - database.wait_for_init().now_or_never().is_none(), - "replay shouldn't have finished as insufficient data" - ); - - // Write in remainder of data to allow replay to finish - write_group2(&mock_shared_state); - - database.wait_for_init().await.unwrap(); - let db = database.initialized_db().unwrap(); - - let start = Instant::now(); - loop { - if count_persisted_chunks(&db) > 1 { - // As soon as replay finishes the lifecycle should have persisted everything in - // table_1 into a single chunk. We should therefore have two chunks, one for - // each of table_1 and table_2 - assert_eq!(db.chunk_summaries().len(), 2, "persisted during replay!"); - break; - } - - tokio::time::sleep(Duration::from_millis(10)).await; - assert!( - start.elapsed() < Duration::from_secs(10), - "failed to persist chunk" - ) - } - - server.shutdown(); - server.join().await.unwrap(); -} - -/// The first set of writes for the write buffer -fn write_group1(write_buffer_state: &MockBufferSharedState) { - // setup write buffer - // these numbers are handtuned to trigger hard buffer limits w/o making the test too big - let n_entries = 50u64; - - for sequence_number in 0..n_entries { - let lp = format!( - "table_1,tag_partition_by=a foo=\"hello\",bar=1 {}", - sequence_number / 2 - ); - write_buffer_state.push_lp(Sequence::new(0, sequence_number), &lp); - } -} - -/// The second set of writes for the write buffer -fn write_group2(write_buffer_state: &MockBufferSharedState) { - // Write line with timestamp 0 - this forces persistence to persist all - // prior writes if the server has read this line - write_buffer_state.push_lp( - Sequence::new(0, 100), - "table_1,tag_partition_by=a foo=\"hello\",bar=1 0", - ); - - write_buffer_state.push_lp(Sequence::new(0, 101), "table_2,tag_partition_by=a foo=1 0"); -} - -fn count_persisted_chunks(db: &Db) -> usize { - db.chunk_summaries() - .into_iter() - .filter(|x| { - matches!( - x.storage, - ChunkStorage::ObjectStoreOnly | ChunkStorage::ReadBufferAndObjectStore - ) - }) - .count() -}