fix: Remove server
parent
e0bc1801ac
commit
b88d071ce7
|
@ -2224,7 +2224,6 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"server",
|
||||
"snafu",
|
||||
"tempfile",
|
||||
"test_helpers",
|
||||
|
@ -5213,46 +5212,6 @@ dependencies = [
|
|||
"yaml-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "server"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrow_util",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc32fast",
|
||||
"data_types",
|
||||
"dml",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"generated_types",
|
||||
"hashbrown 0.12.0",
|
||||
"influxdb_line_protocol",
|
||||
"iox_object_store",
|
||||
"iox_time",
|
||||
"job_registry",
|
||||
"metric",
|
||||
"mutable_batch_lp",
|
||||
"num_cpus",
|
||||
"object_store",
|
||||
"observability_deps",
|
||||
"parking_lot 0.12.0",
|
||||
"query",
|
||||
"rand",
|
||||
"regex",
|
||||
"service_common",
|
||||
"snafu",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"tokio-util 0.7.1",
|
||||
"trace",
|
||||
"tracker",
|
||||
"uuid 0.8.2",
|
||||
"workspace-hack",
|
||||
"write_buffer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "service_common"
|
||||
version = "0.1.0"
|
||||
|
|
|
@ -58,7 +58,6 @@ members = [
|
|||
"read_buffer",
|
||||
"router2",
|
||||
"schema",
|
||||
"server",
|
||||
"service_common",
|
||||
"service_grpc_influxrpc",
|
||||
"service_grpc_flight",
|
||||
|
|
|
@ -44,7 +44,6 @@ query = { path = "../query" }
|
|||
read_buffer = { path = "../read_buffer" }
|
||||
router2 = { path = "../router2" }
|
||||
schema = { path = "../schema" }
|
||||
server = { path = "../server" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
trace = { path = "../trace" }
|
||||
trace_exporters = { path = "../trace_exporters" }
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
[package]
|
||||
name = "server"
|
||||
version = "0.1.0"
|
||||
authors = ["pauldix <paul@pauldix.net>"]
|
||||
edition = "2021"
|
||||
|
||||
[dependencies] # In alphabetical order
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
async-trait = "0.1"
|
||||
bytes = "1.0"
|
||||
chrono = { version = "0.4", default-features = false }
|
||||
crc32fast = "1.3.2"
|
||||
data_types = { path = "../data_types" }
|
||||
futures = "0.3"
|
||||
generated_types = { path = "../generated_types" }
|
||||
hashbrown = "0.12"
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
iox_object_store = { path = "../iox_object_store" }
|
||||
job_registry = { path = "../job_registry" }
|
||||
metric = { path = "../metric" }
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
num_cpus = "1.13.0"
|
||||
object_store = { path = "../object_store" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parking_lot = "0.12"
|
||||
query = { path = "../query" }
|
||||
rand = "0.8.3"
|
||||
service_common = { path = "../service_common" }
|
||||
snafu = "0.7"
|
||||
iox_time = { path = "../iox_time" }
|
||||
trace = { path = "../trace" }
|
||||
tokio = { version = "1.18", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
tokio-util = { version = "0.7.1" }
|
||||
tracker = { path = "../tracker" }
|
||||
uuid = { version = "0.8", features = ["v4"] }
|
||||
write_buffer = { path = "../write_buffer" }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
dml = { path = "../dml" }
|
||||
futures-util = { version = "0.3" }
|
||||
regex = "1"
|
||||
test_helpers = { path = "../test_helpers" }
|
|
@ -1,104 +0,0 @@
|
|||
use crate::config::{object_store::ConfigProviderObjectStorage, ConfigProvider};
|
||||
use iox_time::TimeProvider;
|
||||
use job_registry::JobRegistry;
|
||||
use object_store::DynObjectStore;
|
||||
use observability_deps::tracing::info;
|
||||
use query::exec::Executor;
|
||||
use std::sync::Arc;
|
||||
use trace::TraceCollector;
|
||||
use write_buffer::config::WriteBufferConfigFactory;
|
||||
|
||||
/// A container for application-global resources
|
||||
/// shared between server and all DatabaseInstances
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ApplicationState {
|
||||
object_store: Arc<DynObjectStore>,
|
||||
write_buffer_factory: Arc<WriteBufferConfigFactory>,
|
||||
executor: Arc<Executor>,
|
||||
job_registry: Arc<JobRegistry>,
|
||||
metric_registry: Arc<metric::Registry>,
|
||||
time_provider: Arc<dyn TimeProvider>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
config_provider: Arc<dyn ConfigProvider>,
|
||||
}
|
||||
|
||||
impl ApplicationState {
|
||||
/// Creates a new `ApplicationState`
|
||||
///
|
||||
/// Uses number of CPUs in the system if num_worker_threads is not set
|
||||
pub fn new(
|
||||
object_store: Arc<DynObjectStore>,
|
||||
num_worker_threads: Option<usize>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
config_provider: Option<Arc<dyn ConfigProvider>>,
|
||||
) -> Self {
|
||||
let num_threads = num_worker_threads.unwrap_or_else(num_cpus::get);
|
||||
info!(%num_threads, "using specified number of threads per thread pool");
|
||||
|
||||
let metric_registry = Arc::new(metric::Registry::new());
|
||||
let time_provider: Arc<dyn TimeProvider> = Arc::new(iox_time::SystemProvider::new());
|
||||
let job_registry = Arc::new(JobRegistry::new(
|
||||
Arc::clone(&metric_registry),
|
||||
Arc::clone(&time_provider),
|
||||
));
|
||||
|
||||
let write_buffer_factory = Arc::new(WriteBufferConfigFactory::new(
|
||||
Arc::clone(&time_provider),
|
||||
Arc::clone(&metric_registry),
|
||||
));
|
||||
|
||||
let config_provider = config_provider.unwrap_or_else(|| {
|
||||
Arc::new(ConfigProviderObjectStorage::new(
|
||||
Arc::clone(&object_store),
|
||||
Arc::clone(&time_provider),
|
||||
))
|
||||
});
|
||||
|
||||
Self {
|
||||
object_store,
|
||||
write_buffer_factory,
|
||||
executor: Arc::new(Executor::new(num_threads)),
|
||||
job_registry,
|
||||
metric_registry,
|
||||
time_provider,
|
||||
trace_collector,
|
||||
config_provider,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn object_store(&self) -> Arc<DynObjectStore> {
|
||||
Arc::clone(&self.object_store)
|
||||
}
|
||||
|
||||
pub fn write_buffer_factory(&self) -> &Arc<WriteBufferConfigFactory> {
|
||||
&self.write_buffer_factory
|
||||
}
|
||||
|
||||
pub fn job_registry(&self) -> &Arc<JobRegistry> {
|
||||
&self.job_registry
|
||||
}
|
||||
|
||||
pub fn metric_registry(&self) -> &Arc<metric::Registry> {
|
||||
&self.metric_registry
|
||||
}
|
||||
|
||||
pub fn time_provider(&self) -> &Arc<dyn TimeProvider> {
|
||||
&self.time_provider
|
||||
}
|
||||
|
||||
pub fn trace_collector(&self) -> &Option<Arc<dyn TraceCollector>> {
|
||||
&self.trace_collector
|
||||
}
|
||||
|
||||
pub fn config_provider(&self) -> &Arc<dyn ConfigProvider> {
|
||||
&self.config_provider
|
||||
}
|
||||
|
||||
pub fn executor(&self) -> &Arc<Executor> {
|
||||
&self.executor
|
||||
}
|
||||
|
||||
pub async fn join(&self) {
|
||||
self.executor.join().await;
|
||||
}
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
use crate::ProvidedDatabaseRules;
|
||||
use async_trait::async_trait;
|
||||
use data_types::server_id::ServerId;
|
||||
use generated_types::influxdata::iox::management::v1::OwnerInfo;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub mod object_store;
|
||||
mod owner;
|
||||
|
||||
/// A generic opaque error returned by [`ConfigProvider`]
|
||||
pub type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
/// Result type returned by [`ConfigProvider`]
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// A generic trait for interacting with the configuration
|
||||
/// of a database server
|
||||
#[async_trait]
|
||||
pub trait ConfigProvider: std::fmt::Debug + Send + Sync {
|
||||
/// Returns a list of database name and UUID pairs
|
||||
async fn fetch_server_config(&self, server_id: ServerId) -> Result<Vec<(String, Uuid)>>;
|
||||
|
||||
/// Persists a list of database names and UUID pairs overwriting any
|
||||
/// pre-existing persisted server configuration
|
||||
async fn store_server_config(
|
||||
&self,
|
||||
server_id: ServerId,
|
||||
config: &[(String, Uuid)],
|
||||
) -> Result<()>;
|
||||
|
||||
/// Returns the configuration for the database with the given `uuid`
|
||||
async fn fetch_rules(&self, uuid: Uuid) -> Result<ProvidedDatabaseRules>;
|
||||
|
||||
/// Persists the configuration for the database with the given `uuid`
|
||||
async fn store_rules(&self, uuid: Uuid, rules: &ProvidedDatabaseRules) -> Result<()>;
|
||||
|
||||
/// Returns the owner information for the database with the given `uuid`
|
||||
async fn fetch_owner_info(&self, server_id: ServerId, uuid: Uuid) -> Result<OwnerInfo>;
|
||||
|
||||
/// Updates the owner information for the database with the given `uuid`
|
||||
/// and records it as owned by `server_id`
|
||||
async fn update_owner_info(&self, server_id: Option<ServerId>, uuid: Uuid) -> Result<()>;
|
||||
|
||||
/// Updates the owner information for the database with the given `uuid`
|
||||
/// and records it as owned by `server_id`
|
||||
async fn create_owner_info(&self, server_id: ServerId, uuid: Uuid) -> Result<()>;
|
||||
}
|
|
@ -1,256 +0,0 @@
|
|||
use super::Result as ConfigResult;
|
||||
use crate::{
|
||||
config::{
|
||||
owner::{
|
||||
create_owner_info, fetch_owner_info, update_owner_info, OwnerInfoCreateError,
|
||||
OwnerInfoFetchError, OwnerInfoUpdateError,
|
||||
},
|
||||
ConfigProvider,
|
||||
},
|
||||
PersistedDatabaseRules, ProvidedDatabaseRules,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use data_types::server_id::ServerId;
|
||||
use generated_types::database_rules::encode_persisted_database_rules;
|
||||
use generated_types::google::FieldViolation;
|
||||
use generated_types::influxdata::iox::management;
|
||||
use generated_types::influxdata::iox::management::v1::OwnerInfo;
|
||||
use iox_object_store::IoxObjectStore;
|
||||
use iox_time::TimeProvider;
|
||||
use object_store::DynObjectStore;
|
||||
use snafu::{ensure, ResultExt, Snafu};
|
||||
use std::sync::Arc;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Error enumeration for [`ConfigProviderObjectStorage`]
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("error saving server config to object storage: {}", source))]
|
||||
StoreServer { source: object_store::Error },
|
||||
|
||||
#[snafu(display("error getting server config from object storage: {}", source))]
|
||||
FetchServer { source: object_store::Error },
|
||||
|
||||
#[snafu(display("error deserializing server config: {}", source))]
|
||||
DeserializeServer {
|
||||
source: generated_types::DecodeError,
|
||||
},
|
||||
|
||||
#[snafu(display("error serializing server config: {}", source))]
|
||||
SerializeServer {
|
||||
source: generated_types::EncodeError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"UUID mismatch reading server config from object storage, expected {}, got {}",
|
||||
expected,
|
||||
actual
|
||||
))]
|
||||
UuidMismatch { expected: Uuid, actual: Uuid },
|
||||
|
||||
#[snafu(display(
|
||||
"invalid database uuid in server config while finding location: {}",
|
||||
source
|
||||
))]
|
||||
InvalidDatabaseLocation { source: uuid::Error },
|
||||
|
||||
#[snafu(display("Error saving rules for {}: {}", db_name, source))]
|
||||
StoreRules {
|
||||
db_name: String,
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("error getting database rules from object storage: {}", source))]
|
||||
RulesFetch { source: object_store::Error },
|
||||
|
||||
#[snafu(display("error deserializing database rules: {}", source))]
|
||||
DeserializeRules {
|
||||
source: generated_types::DecodeError,
|
||||
},
|
||||
|
||||
#[snafu(display("error serializing database rules: {}", source))]
|
||||
SerializeRules {
|
||||
source: generated_types::EncodeError,
|
||||
},
|
||||
|
||||
#[snafu(display("error converting to database rules: {}", source))]
|
||||
ConvertingRules { source: FieldViolation },
|
||||
|
||||
#[snafu(display("error creating database owner info: {}", source))]
|
||||
CreatingOwnerInfo { source: OwnerInfoCreateError },
|
||||
|
||||
#[snafu(display("error getting database owner info: {}", source))]
|
||||
FetchingOwnerInfo { source: OwnerInfoFetchError },
|
||||
|
||||
#[snafu(display("error updating database owner info: {}", source))]
|
||||
UpdatingOwnerInfo { source: OwnerInfoUpdateError },
|
||||
}
|
||||
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Parse the UUID from an object storage path
|
||||
///
|
||||
/// TODO: Encode this data directly in server config
|
||||
fn parse_location(location: &str) -> Result<Uuid> {
|
||||
// Strip trailing / if any
|
||||
let location = location.strip_suffix('/').unwrap_or(location);
|
||||
let uuid = location.rsplit('/').next().unwrap();
|
||||
|
||||
std::str::FromStr::from_str(uuid).context(InvalidDatabaseLocationSnafu)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ConfigProviderObjectStorage {
|
||||
object_store: Arc<DynObjectStore>,
|
||||
time_provider: Arc<dyn TimeProvider>,
|
||||
}
|
||||
|
||||
impl ConfigProviderObjectStorage {
|
||||
pub fn new(object_store: Arc<DynObjectStore>, time_provider: Arc<dyn TimeProvider>) -> Self {
|
||||
Self {
|
||||
object_store,
|
||||
time_provider,
|
||||
}
|
||||
}
|
||||
|
||||
fn iox_object_store(&self, uuid: Uuid) -> IoxObjectStore {
|
||||
let root_path = IoxObjectStore::root_path_for(&*self.object_store, uuid);
|
||||
IoxObjectStore::existing(Arc::clone(&self.object_store), root_path)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ConfigProvider for ConfigProviderObjectStorage {
|
||||
async fn fetch_server_config(&self, server_id: ServerId) -> ConfigResult<Vec<(String, Uuid)>> {
|
||||
let fetch_result =
|
||||
IoxObjectStore::get_server_config_file(&*self.object_store, server_id).await;
|
||||
|
||||
let server_config_bytes = match fetch_result {
|
||||
Ok(bytes) => bytes,
|
||||
// If this is the first time starting up this server and there is no config file yet,
|
||||
// this isn't a problem. Start an empty server config.
|
||||
Err(object_store::Error::NotFound { .. }) => bytes::Bytes::new(),
|
||||
Err(source) => return Err(Error::FetchServer { source }.into()),
|
||||
};
|
||||
|
||||
let server_config =
|
||||
generated_types::server_config::decode_persisted_server_config(server_config_bytes)
|
||||
.context(DeserializeServerSnafu)?;
|
||||
|
||||
let config = server_config
|
||||
.databases
|
||||
.into_iter()
|
||||
.map(|(name, location)| Ok((name, parse_location(&location)?)))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
self.store_server_config(server_id, &config).await?;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
async fn store_server_config(
|
||||
&self,
|
||||
server_id: ServerId,
|
||||
config: &[(String, Uuid)],
|
||||
) -> ConfigResult<()> {
|
||||
let databases = config
|
||||
.iter()
|
||||
.map(|(name, database)| {
|
||||
(
|
||||
name.to_string(),
|
||||
IoxObjectStore::root_path_for(&*self.object_store, *database).to_string(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let data = management::v1::ServerConfig { databases };
|
||||
|
||||
let mut encoded = bytes::BytesMut::new();
|
||||
generated_types::server_config::encode_persisted_server_config(&data, &mut encoded)
|
||||
.context(SerializeServerSnafu)?;
|
||||
|
||||
let bytes = encoded.freeze();
|
||||
|
||||
IoxObjectStore::put_server_config_file(&*self.object_store, server_id, bytes)
|
||||
.await
|
||||
.context(StoreServerSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn fetch_rules(&self, uuid: Uuid) -> ConfigResult<ProvidedDatabaseRules> {
|
||||
let bytes = IoxObjectStore::load_database_rules(Arc::clone(&self.object_store), uuid)
|
||||
.await
|
||||
.context(RulesFetchSnafu)?;
|
||||
|
||||
let proto: management::v1::PersistedDatabaseRules =
|
||||
generated_types::database_rules::decode_persisted_database_rules(bytes)
|
||||
.context(DeserializeRulesSnafu)?;
|
||||
|
||||
let rules: PersistedDatabaseRules = proto.try_into().context(ConvertingRulesSnafu)?;
|
||||
|
||||
ensure!(
|
||||
uuid == rules.uuid(),
|
||||
UuidMismatchSnafu {
|
||||
expected: uuid,
|
||||
actual: rules.uuid()
|
||||
}
|
||||
);
|
||||
|
||||
Ok(rules.into_inner().1)
|
||||
}
|
||||
|
||||
async fn store_rules(&self, uuid: Uuid, rules: &ProvidedDatabaseRules) -> ConfigResult<()> {
|
||||
let persisted_database_rules = management::v1::PersistedDatabaseRules {
|
||||
uuid: uuid.as_bytes().to_vec(),
|
||||
// Note we save the original version
|
||||
rules: Some(rules.original().clone()),
|
||||
};
|
||||
|
||||
let mut data = bytes::BytesMut::new();
|
||||
encode_persisted_database_rules(&persisted_database_rules, &mut data)
|
||||
.context(SerializeRulesSnafu)?;
|
||||
|
||||
self.iox_object_store(uuid)
|
||||
.put_database_rules_file(data.freeze())
|
||||
.await
|
||||
.context(StoreRulesSnafu {
|
||||
db_name: rules.db_name(),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn fetch_owner_info(&self, _server_id: ServerId, uuid: Uuid) -> ConfigResult<OwnerInfo> {
|
||||
let config = fetch_owner_info(&self.iox_object_store(uuid))
|
||||
.await
|
||||
.context(FetchingOwnerInfoSnafu)?;
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
async fn update_owner_info(&self, server_id: Option<ServerId>, uuid: Uuid) -> ConfigResult<()> {
|
||||
let path = server_id.map(|server_id| {
|
||||
IoxObjectStore::server_config_path(&*self.object_store, server_id).to_string()
|
||||
});
|
||||
|
||||
update_owner_info(
|
||||
server_id,
|
||||
path,
|
||||
self.time_provider.now(),
|
||||
&self.iox_object_store(uuid),
|
||||
)
|
||||
.await
|
||||
.context(UpdatingOwnerInfoSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_owner_info(&self, server_id: ServerId, uuid: Uuid) -> ConfigResult<()> {
|
||||
let path = IoxObjectStore::server_config_path(&*self.object_store, server_id).to_string();
|
||||
create_owner_info(server_id, path, &self.iox_object_store(uuid))
|
||||
.await
|
||||
.context(CreatingOwnerInfoSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
//! Code related to managing ownership information in owner.pb
|
||||
|
||||
use data_types::server_id::ServerId;
|
||||
use generated_types::influxdata::iox::management;
|
||||
use iox_object_store::IoxObjectStore;
|
||||
use iox_time::Time;
|
||||
use snafu::{ensure, ResultExt, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum OwnerInfoFetchError {
|
||||
#[snafu(display("error loading owner info: {}", source))]
|
||||
Loading { source: object_store::Error },
|
||||
|
||||
#[snafu(display("error decoding owner info: {}", source))]
|
||||
Decoding {
|
||||
source: generated_types::DecodeError,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) async fn fetch_owner_info(
|
||||
iox_object_store: &IoxObjectStore,
|
||||
) -> Result<management::v1::OwnerInfo, OwnerInfoFetchError> {
|
||||
let raw_owner_info = iox_object_store
|
||||
.get_owner_file()
|
||||
.await
|
||||
.context(LoadingSnafu)?;
|
||||
|
||||
generated_types::server_config::decode_database_owner_info(raw_owner_info)
|
||||
.context(DecodingSnafu)
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum OwnerInfoCreateError {
|
||||
#[snafu(display("could not create new owner info file; it already exists"))]
|
||||
OwnerFileAlreadyExists,
|
||||
|
||||
#[snafu(display("error creating database owner info file: {}", source))]
|
||||
CreatingOwnerFile { source: Box<object_store::Error> },
|
||||
}
|
||||
|
||||
/// Create a new owner info file for this database. Existing content at this location in object
|
||||
/// storage is an error.
|
||||
pub(crate) async fn create_owner_info(
|
||||
server_id: ServerId,
|
||||
server_location: String,
|
||||
iox_object_store: &IoxObjectStore,
|
||||
) -> Result<(), OwnerInfoCreateError> {
|
||||
ensure!(
|
||||
matches!(
|
||||
iox_object_store.get_owner_file().await,
|
||||
Err(object_store::Error::NotFound { .. })
|
||||
),
|
||||
OwnerFileAlreadyExistsSnafu,
|
||||
);
|
||||
|
||||
let owner_info = management::v1::OwnerInfo {
|
||||
id: server_id.get_u32(),
|
||||
location: server_location,
|
||||
transactions: vec![],
|
||||
};
|
||||
let mut encoded = bytes::BytesMut::new();
|
||||
generated_types::server_config::encode_database_owner_info(&owner_info, &mut encoded)
|
||||
.expect("owner info serialization should be valid");
|
||||
let encoded = encoded.freeze();
|
||||
|
||||
iox_object_store
|
||||
.put_owner_file(encoded)
|
||||
.await
|
||||
.map_err(Box::new)
|
||||
.context(CreatingOwnerFileSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum OwnerInfoUpdateError {
|
||||
#[snafu(display("could not fetch existing owner info: {}", source))]
|
||||
CouldNotFetch { source: OwnerInfoFetchError },
|
||||
|
||||
#[snafu(display("error updating database owner info file: {}", source))]
|
||||
UpdatingOwnerFile { source: object_store::Error },
|
||||
}
|
||||
|
||||
/// Fetch existing owner info, set the `id` and `location`, insert a new entry into the transaction
|
||||
/// history, and overwrite the contents of the owner file. Errors if the owner info file does NOT
|
||||
/// currently exist.
|
||||
pub(crate) async fn update_owner_info(
|
||||
new_server_id: Option<ServerId>,
|
||||
new_server_location: Option<String>,
|
||||
timestamp: Time,
|
||||
iox_object_store: &IoxObjectStore,
|
||||
) -> Result<(), OwnerInfoUpdateError> {
|
||||
let management::v1::OwnerInfo {
|
||||
id,
|
||||
location,
|
||||
mut transactions,
|
||||
} = fetch_owner_info(iox_object_store)
|
||||
.await
|
||||
.context(CouldNotFetchSnafu)?;
|
||||
|
||||
let new_transaction = management::v1::OwnershipTransaction {
|
||||
id,
|
||||
location,
|
||||
timestamp: Some(timestamp.date_time().into()),
|
||||
};
|
||||
transactions.push(new_transaction);
|
||||
|
||||
// TODO: only save latest 100 transactions
|
||||
|
||||
let new_owner_info = management::v1::OwnerInfo {
|
||||
// 0 is not a valid server ID, so it indicates "unowned".
|
||||
id: new_server_id.map(|s| s.get_u32()).unwrap_or_default(),
|
||||
// Owner location is empty when the database is unowned.
|
||||
location: new_server_location.unwrap_or_default(),
|
||||
transactions,
|
||||
};
|
||||
|
||||
let mut encoded = bytes::BytesMut::new();
|
||||
generated_types::server_config::encode_database_owner_info(&new_owner_info, &mut encoded)
|
||||
.expect("owner info serialization should be valid");
|
||||
let encoded = encoded.freeze();
|
||||
|
||||
iox_object_store
|
||||
.put_owner_file(encoded)
|
||||
.await
|
||||
.context(UpdatingOwnerFileSnafu)?;
|
||||
Ok(())
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,731 +0,0 @@
|
|||
//! Database initialization / creation logic
|
||||
use crate::{rules::ProvidedDatabaseRules, ApplicationState};
|
||||
use data_types::{server_id::ServerId, DatabaseName};
|
||||
use db::{
|
||||
load::{create_preserved_catalog, load_or_create_preserved_catalog},
|
||||
write_buffer::WriteBufferConsumer,
|
||||
DatabaseToCommit, Db, LifecycleWorker,
|
||||
};
|
||||
use generated_types::{
|
||||
database_state::DatabaseState as DatabaseStateCode, influxdata::iox::management,
|
||||
};
|
||||
use iox_object_store::IoxObjectStore;
|
||||
use observability_deps::tracing::{error, info, warn};
|
||||
use persistence_windows::checkpoint::ReplayPlan;
|
||||
use rand::{thread_rng, Rng};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{sync::Arc, time::Duration};
|
||||
use tokio::task::JoinError;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::state::DatabaseShared;
|
||||
|
||||
/// Errors encountered during initialization of a database
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum InitError {
|
||||
#[snafu(display("error finding database directory in object storage: {}", source))]
|
||||
DatabaseObjectStoreLookup {
|
||||
source: iox_object_store::IoxObjectStoreError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Database name in deserialized rules ({}) does not match expected value ({})",
|
||||
actual,
|
||||
expected
|
||||
))]
|
||||
RulesDatabaseNameMismatch { actual: String, expected: String },
|
||||
|
||||
#[snafu(display("error loading catalog: {}", source))]
|
||||
CatalogLoad { source: db::load::Error },
|
||||
|
||||
#[snafu(display("error creating write buffer: {}", source))]
|
||||
CreateWriteBuffer {
|
||||
source: write_buffer::core::WriteBufferError,
|
||||
},
|
||||
|
||||
#[snafu(display("error during replay: {}", source))]
|
||||
Replay { source: db::Error },
|
||||
|
||||
#[snafu(display("error creating database owner info: {}", source))]
|
||||
CreatingOwnerInfo { source: crate::config::Error },
|
||||
|
||||
#[snafu(display("error getting database owner info: {}", source))]
|
||||
FetchingOwnerInfo { source: crate::config::Error },
|
||||
|
||||
#[snafu(display("error updating database owner info: {}", source))]
|
||||
UpdatingOwnerInfo { source: crate::config::Error },
|
||||
|
||||
#[snafu(display(
|
||||
"Server ID in the database's owner info file ({}) does not match this server's ID ({})",
|
||||
actual,
|
||||
expected
|
||||
))]
|
||||
DatabaseOwnerMismatch { actual: u32, expected: u32 },
|
||||
|
||||
#[snafu(display(
|
||||
"The database with UUID `{}` is already owned by the server with ID {}",
|
||||
uuid,
|
||||
server_id
|
||||
))]
|
||||
CantClaimDatabaseCurrentlyOwned { uuid: Uuid, server_id: u32 },
|
||||
|
||||
#[snafu(display("error saving database rules: {}", source))]
|
||||
SavingRules { source: crate::config::Error },
|
||||
|
||||
#[snafu(display("error loading database rules: {}", source))]
|
||||
LoadingRules { source: crate::config::Error },
|
||||
|
||||
#[snafu(display("{}", source))]
|
||||
IoxObjectStoreError {
|
||||
source: iox_object_store::IoxObjectStoreError,
|
||||
},
|
||||
|
||||
#[snafu(display("The database with UUID `{}` named `{}` is already active", uuid, name))]
|
||||
AlreadyActive { name: String, uuid: Uuid },
|
||||
|
||||
#[snafu(display("cannot create preserved catalog: {}", source))]
|
||||
CannotCreatePreservedCatalog { source: db::load::Error },
|
||||
|
||||
#[snafu(display("database is not running"))]
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
/// The Database startup state machine
|
||||
///
|
||||
/// ```text
|
||||
/// (start)
|
||||
/// |
|
||||
/// |----------------------------o o-o
|
||||
/// V V V |
|
||||
/// [Known]-------------->[OwnerInfoLoadError]
|
||||
/// | |
|
||||
/// +---------------------------o
|
||||
/// |
|
||||
/// | o-o
|
||||
/// V V |
|
||||
/// [OwnerInfoLoaded]----------->[RulesLoadError]
|
||||
/// | |
|
||||
/// +---------------------------o
|
||||
/// |
|
||||
/// | o-o
|
||||
/// V V |
|
||||
/// [RulesLoaded]-------------->[CatalogLoadError]
|
||||
/// | |
|
||||
/// +---------------------------o
|
||||
/// |
|
||||
/// | o-o
|
||||
/// V V |
|
||||
/// [CatalogLoaded]---------->[WriteBufferCreationError]
|
||||
/// | | | |
|
||||
/// | | | | o-o
|
||||
/// | | | V V |
|
||||
/// | o---------------|-->[ReplayError]
|
||||
/// | | |
|
||||
/// +--------------------+-------o
|
||||
/// |
|
||||
/// |
|
||||
/// V
|
||||
/// [Initialized]
|
||||
///
|
||||
/// |
|
||||
/// V
|
||||
/// [Shutdown]
|
||||
/// ```
|
||||
///
|
||||
/// A Database starts in [`DatabaseState::Known`] and advances through the
|
||||
/// non error states in sequential order until either:
|
||||
///
|
||||
/// 1. It reaches [`DatabaseState::Initialized`]: Database is initialized
|
||||
/// 2. An error is encountered, in which case it transitions to one of
|
||||
/// the error states. We try to recover from all of them. For all except [`DatabaseState::ReplayError`] this is a
|
||||
/// rather cheap operation since we can just retry the actual operation. For [`DatabaseState::ReplayError`] we need
|
||||
/// to dump the potentially half-modified in-memory catalog before retrying.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum DatabaseState {
|
||||
// Database not running, with an optional shutdown error
|
||||
Shutdown(Option<Arc<JoinError>>),
|
||||
// Basic initialization sequence states:
|
||||
Known(DatabaseStateKnown),
|
||||
OwnerInfoLoaded(DatabaseStateOwnerInfoLoaded),
|
||||
RulesLoaded(DatabaseStateRulesLoaded),
|
||||
CatalogLoaded(DatabaseStateCatalogLoaded),
|
||||
|
||||
// Terminal state (success)
|
||||
Initialized(DatabaseStateInitialized),
|
||||
|
||||
// Error states, we'll try to recover from them
|
||||
OwnerInfoLoadError(DatabaseStateKnown, Arc<InitError>),
|
||||
RulesLoadError(DatabaseStateOwnerInfoLoaded, Arc<InitError>),
|
||||
CatalogLoadError(DatabaseStateRulesLoaded, Arc<InitError>),
|
||||
WriteBufferCreationError(DatabaseStateCatalogLoaded, Arc<InitError>),
|
||||
ReplayError(DatabaseStateCatalogLoaded, Arc<InitError>),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for DatabaseState {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.state_code().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl DatabaseState {
|
||||
// Construct the start state of the database machine
|
||||
pub fn new_known() -> Self {
|
||||
Self::Known(DatabaseStateKnown {})
|
||||
}
|
||||
|
||||
pub(crate) fn state_code(&self) -> DatabaseStateCode {
|
||||
match self {
|
||||
DatabaseState::Shutdown(_) => DatabaseStateCode::Shutdown,
|
||||
DatabaseState::Known(_) => DatabaseStateCode::Known,
|
||||
DatabaseState::OwnerInfoLoaded(_) => DatabaseStateCode::OwnerInfoLoaded,
|
||||
DatabaseState::RulesLoaded(_) => DatabaseStateCode::RulesLoaded,
|
||||
DatabaseState::CatalogLoaded(_) => DatabaseStateCode::CatalogLoaded,
|
||||
DatabaseState::Initialized(_) => DatabaseStateCode::Initialized,
|
||||
DatabaseState::OwnerInfoLoadError(_, _) => DatabaseStateCode::OwnerInfoLoadError,
|
||||
DatabaseState::RulesLoadError(_, _) => DatabaseStateCode::RulesLoadError,
|
||||
DatabaseState::CatalogLoadError(_, _) => DatabaseStateCode::CatalogLoadError,
|
||||
DatabaseState::WriteBufferCreationError(_, _) => {
|
||||
DatabaseStateCode::WriteBufferCreationError
|
||||
}
|
||||
DatabaseState::ReplayError(_, _) => DatabaseStateCode::ReplayError,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn error(&self) -> Option<&Arc<InitError>> {
|
||||
match self {
|
||||
DatabaseState::Known(_)
|
||||
| DatabaseState::Shutdown(_)
|
||||
| DatabaseState::OwnerInfoLoaded(_)
|
||||
| DatabaseState::RulesLoaded(_)
|
||||
| DatabaseState::CatalogLoaded(_)
|
||||
| DatabaseState::Initialized(_) => None,
|
||||
DatabaseState::OwnerInfoLoadError(_, e)
|
||||
| DatabaseState::RulesLoadError(_, e)
|
||||
| DatabaseState::CatalogLoadError(_, e)
|
||||
| DatabaseState::WriteBufferCreationError(_, e)
|
||||
| DatabaseState::ReplayError(_, e) => Some(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn provided_rules(&self) -> Option<Arc<ProvidedDatabaseRules>> {
|
||||
match self {
|
||||
DatabaseState::Known(_)
|
||||
| DatabaseState::Shutdown(_)
|
||||
| DatabaseState::OwnerInfoLoaded(_)
|
||||
| DatabaseState::OwnerInfoLoadError(_, _)
|
||||
| DatabaseState::RulesLoadError(_, _) => None,
|
||||
DatabaseState::RulesLoaded(state) | DatabaseState::CatalogLoadError(state, _) => {
|
||||
Some(Arc::clone(&state.provided_rules))
|
||||
}
|
||||
DatabaseState::CatalogLoaded(state)
|
||||
| DatabaseState::WriteBufferCreationError(state, _)
|
||||
| DatabaseState::ReplayError(state, _) => Some(Arc::clone(&state.provided_rules)),
|
||||
DatabaseState::Initialized(state) => Some(Arc::clone(&state.provided_rules)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn owner_info(&self) -> Option<management::v1::OwnerInfo> {
|
||||
match self {
|
||||
DatabaseState::Known(_)
|
||||
| DatabaseState::Shutdown(_)
|
||||
| DatabaseState::OwnerInfoLoadError(_, _)
|
||||
| DatabaseState::RulesLoadError(_, _) => None,
|
||||
DatabaseState::OwnerInfoLoaded(state) => Some(state.owner_info.clone()),
|
||||
DatabaseState::RulesLoaded(state) | DatabaseState::CatalogLoadError(state, _) => {
|
||||
Some(state.owner_info.clone())
|
||||
}
|
||||
DatabaseState::CatalogLoaded(state)
|
||||
| DatabaseState::WriteBufferCreationError(state, _)
|
||||
| DatabaseState::ReplayError(state, _) => Some(state.owner_info.clone()),
|
||||
DatabaseState::Initialized(state) => Some(state.owner_info.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this is shutdown
|
||||
pub(crate) fn is_shutdown(&self) -> bool {
|
||||
matches!(self, DatabaseState::Shutdown(_))
|
||||
}
|
||||
|
||||
pub(crate) fn get_initialized(&self) -> Option<&DatabaseStateInitialized> {
|
||||
match self {
|
||||
DatabaseState::Initialized(state) => Some(state),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to advance to the next state
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// Panics if the database cannot be advanced (already initialized or shutdown)
|
||||
async fn advance(self, shared: &DatabaseShared) -> Self {
|
||||
match self {
|
||||
Self::Known(state) | Self::OwnerInfoLoadError(state, _) => {
|
||||
match state.advance(shared).await {
|
||||
Ok(state) => Self::OwnerInfoLoaded(state),
|
||||
Err(e) => Self::OwnerInfoLoadError(state, Arc::new(e)),
|
||||
}
|
||||
}
|
||||
Self::OwnerInfoLoaded(state) | Self::RulesLoadError(state, _) => {
|
||||
match state.advance(shared).await {
|
||||
Ok(state) => Self::RulesLoaded(state),
|
||||
Err(e) => Self::RulesLoadError(state, Arc::new(e)),
|
||||
}
|
||||
}
|
||||
Self::RulesLoaded(state) | Self::CatalogLoadError(state, _) => {
|
||||
match state.advance(shared).await {
|
||||
Ok(state) => Self::CatalogLoaded(state),
|
||||
Err(e) => Self::CatalogLoadError(state, Arc::new(e)),
|
||||
}
|
||||
}
|
||||
Self::CatalogLoaded(state) | Self::WriteBufferCreationError(state, _) => {
|
||||
match state.advance(shared).await {
|
||||
Ok(state) => Self::Initialized(state),
|
||||
Err(e @ InitError::CreateWriteBuffer { .. }) => {
|
||||
Self::WriteBufferCreationError(state, Arc::new(e))
|
||||
}
|
||||
Err(e) => Self::ReplayError(state, Arc::new(e)),
|
||||
}
|
||||
}
|
||||
Self::ReplayError(state, _) => {
|
||||
let state2 = state.rollback();
|
||||
match state2.advance(shared).await {
|
||||
Ok(state2) => match state2.advance(shared).await {
|
||||
Ok(state2) => Self::Initialized(state2),
|
||||
Err(e) => Self::ReplayError(state, Arc::new(e)),
|
||||
},
|
||||
Err(e) => Self::ReplayError(state, Arc::new(e)),
|
||||
}
|
||||
}
|
||||
Self::Initialized(_) => unreachable!(),
|
||||
Self::Shutdown(_) => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct DatabaseStateKnown {}
|
||||
|
||||
impl DatabaseStateKnown {
|
||||
/// Load owner info from object storage and verify it matches the current owner
|
||||
async fn advance(
|
||||
&self,
|
||||
shared: &DatabaseShared,
|
||||
) -> Result<DatabaseStateOwnerInfoLoaded, InitError> {
|
||||
let (server_id, uuid) = {
|
||||
let config = shared.config.read();
|
||||
(config.server_id, config.database_uuid)
|
||||
};
|
||||
|
||||
let owner_info = shared
|
||||
.application
|
||||
.config_provider()
|
||||
.fetch_owner_info(server_id, uuid)
|
||||
.await
|
||||
.context(FetchingOwnerInfoSnafu)?;
|
||||
|
||||
if owner_info.id != server_id.get_u32() {
|
||||
return DatabaseOwnerMismatchSnafu {
|
||||
actual: owner_info.id,
|
||||
expected: server_id.get_u32(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
Ok(DatabaseStateOwnerInfoLoaded { owner_info })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct DatabaseStateOwnerInfoLoaded {
|
||||
owner_info: management::v1::OwnerInfo,
|
||||
}
|
||||
|
||||
impl DatabaseStateOwnerInfoLoaded {
|
||||
/// Load database rules from object storage
|
||||
async fn advance(
|
||||
&self,
|
||||
shared: &DatabaseShared,
|
||||
) -> Result<DatabaseStateRulesLoaded, InitError> {
|
||||
let uuid = shared.config.read().database_uuid;
|
||||
let provided_rules = shared
|
||||
.application
|
||||
.config_provider()
|
||||
.fetch_rules(uuid)
|
||||
.await
|
||||
.context(LoadingRulesSnafu)?;
|
||||
|
||||
let db_name = shared.config.read().name.clone();
|
||||
if provided_rules.db_name() != &db_name {
|
||||
return RulesDatabaseNameMismatchSnafu {
|
||||
actual: provided_rules.db_name(),
|
||||
expected: db_name.as_str(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
Ok(DatabaseStateRulesLoaded {
|
||||
provided_rules: Arc::new(provided_rules),
|
||||
owner_info: self.owner_info.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct DatabaseStateRulesLoaded {
|
||||
provided_rules: Arc<ProvidedDatabaseRules>,
|
||||
owner_info: management::v1::OwnerInfo,
|
||||
}
|
||||
|
||||
impl DatabaseStateRulesLoaded {
|
||||
/// Load catalog from object storage
|
||||
async fn advance(
|
||||
&self,
|
||||
shared: &DatabaseShared,
|
||||
) -> Result<DatabaseStateCatalogLoaded, InitError> {
|
||||
let (db_name, wipe_catalog_on_error, skip_replay, server_id) = {
|
||||
let config = shared.config.read();
|
||||
(
|
||||
config.name.clone(),
|
||||
config.wipe_catalog_on_error,
|
||||
config.skip_replay,
|
||||
config.server_id,
|
||||
)
|
||||
};
|
||||
let (preserved_catalog, catalog, replay_plan) = load_or_create_preserved_catalog(
|
||||
db_name.as_str(),
|
||||
Arc::clone(&shared.iox_object_store),
|
||||
Arc::clone(shared.application.metric_registry()),
|
||||
Arc::clone(shared.application.time_provider()),
|
||||
wipe_catalog_on_error,
|
||||
skip_replay,
|
||||
)
|
||||
.await
|
||||
.context(CatalogLoadSnafu)?;
|
||||
|
||||
let database_to_commit = DatabaseToCommit {
|
||||
server_id,
|
||||
iox_object_store: Arc::clone(&shared.iox_object_store),
|
||||
exec: Arc::clone(shared.application.executor()),
|
||||
rules: Arc::clone(self.provided_rules.rules()),
|
||||
preserved_catalog,
|
||||
catalog,
|
||||
metric_registry: Arc::clone(shared.application.metric_registry()),
|
||||
time_provider: Arc::clone(shared.application.time_provider()),
|
||||
};
|
||||
|
||||
let db = Arc::new(Db::new(
|
||||
database_to_commit,
|
||||
Arc::clone(shared.application.job_registry()),
|
||||
));
|
||||
|
||||
let lifecycle_worker = Arc::new(LifecycleWorker::new(Arc::clone(&db)));
|
||||
|
||||
Ok(DatabaseStateCatalogLoaded {
|
||||
db,
|
||||
lifecycle_worker,
|
||||
replay_plan: Arc::new(replay_plan),
|
||||
provided_rules: Arc::clone(&self.provided_rules),
|
||||
owner_info: self.owner_info.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct DatabaseStateCatalogLoaded {
|
||||
db: Arc<Db>,
|
||||
lifecycle_worker: Arc<LifecycleWorker>,
|
||||
replay_plan: Arc<Option<ReplayPlan>>,
|
||||
provided_rules: Arc<ProvidedDatabaseRules>,
|
||||
owner_info: management::v1::OwnerInfo,
|
||||
}
|
||||
|
||||
impl DatabaseStateCatalogLoaded {
|
||||
/// Perform replay
|
||||
async fn advance(
|
||||
&self,
|
||||
shared: &DatabaseShared,
|
||||
) -> Result<DatabaseStateInitialized, InitError> {
|
||||
let db = Arc::clone(&self.db);
|
||||
|
||||
let rules = self.provided_rules.rules();
|
||||
let trace_collector = shared.application.trace_collector();
|
||||
let write_buffer_factory = shared.application.write_buffer_factory();
|
||||
let (db_name, skip_replay) = {
|
||||
let config = shared.config.read();
|
||||
(config.name.clone(), config.skip_replay)
|
||||
};
|
||||
let write_buffer_consumer = match rules.write_buffer_connection.as_ref() {
|
||||
Some(connection) => {
|
||||
let consumer = write_buffer_factory
|
||||
.new_config_read(db_name.as_str(), trace_collector.as_ref(), connection)
|
||||
.await
|
||||
.context(CreateWriteBufferSnafu)?;
|
||||
|
||||
let replay_plan = if skip_replay {
|
||||
None
|
||||
} else {
|
||||
self.replay_plan.as_ref().as_ref()
|
||||
};
|
||||
|
||||
let streams = db
|
||||
.perform_replay(replay_plan, Arc::clone(&consumer))
|
||||
.await
|
||||
.context(ReplaySnafu)?;
|
||||
|
||||
Some(Arc::new(WriteBufferConsumer::new(
|
||||
consumer,
|
||||
streams,
|
||||
Arc::clone(&db),
|
||||
shared.application.metric_registry().as_ref(),
|
||||
)))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
self.lifecycle_worker.unsuppress_persistence();
|
||||
|
||||
Ok(DatabaseStateInitialized {
|
||||
db,
|
||||
write_buffer_consumer,
|
||||
lifecycle_worker: Arc::clone(&self.lifecycle_worker),
|
||||
provided_rules: Arc::clone(&self.provided_rules),
|
||||
owner_info: self.owner_info.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Rolls back state to an unloaded catalog.
|
||||
pub(crate) fn rollback(&self) -> DatabaseStateRulesLoaded {
|
||||
warn!(db_name=%self.db.name(), "throwing away loaded catalog to recover from replay error");
|
||||
DatabaseStateRulesLoaded {
|
||||
provided_rules: Arc::clone(&self.provided_rules),
|
||||
owner_info: self.owner_info.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct DatabaseStateInitialized {
|
||||
db: Arc<Db>,
|
||||
write_buffer_consumer: Option<Arc<WriteBufferConsumer>>,
|
||||
lifecycle_worker: Arc<LifecycleWorker>,
|
||||
provided_rules: Arc<ProvidedDatabaseRules>,
|
||||
owner_info: management::v1::OwnerInfo,
|
||||
}
|
||||
|
||||
impl DatabaseStateInitialized {
|
||||
pub fn db(&self) -> &Arc<Db> {
|
||||
&self.db
|
||||
}
|
||||
|
||||
pub fn write_buffer_consumer(&self) -> Option<&Arc<WriteBufferConsumer>> {
|
||||
self.write_buffer_consumer.as_ref()
|
||||
}
|
||||
|
||||
pub fn set_provided_rules(&mut self, provided_rules: Arc<ProvidedDatabaseRules>) {
|
||||
self.provided_rules = provided_rules
|
||||
}
|
||||
|
||||
/// Get a reference to the database state initialized's lifecycle worker.
|
||||
pub(crate) fn lifecycle_worker(&self) -> &Arc<LifecycleWorker> {
|
||||
&self.lifecycle_worker
|
||||
}
|
||||
}
|
||||
|
||||
const INIT_BACKOFF: Duration = Duration::from_secs(1);
|
||||
const MAX_BACKOFF: Duration = Duration::from_secs(500);
|
||||
|
||||
/// Try to drive the database to `DatabaseState::Initialized` returns when
|
||||
/// this is achieved or the shutdown signal is triggered
|
||||
pub(crate) async fn initialize_database(shared: &DatabaseShared, shutdown: CancellationToken) {
|
||||
let db_name = shared.config.read().name.clone();
|
||||
info!(%db_name, "database initialization started");
|
||||
|
||||
// A backoff duration for retrying errors that will change over the course of multiple errors
|
||||
let mut backoff = INIT_BACKOFF;
|
||||
|
||||
while !shutdown.is_cancelled() {
|
||||
let handle = shared.state.read().freeze();
|
||||
let handle = handle.await;
|
||||
|
||||
// Re-acquire read lock to avoid holding lock across await point
|
||||
let state = DatabaseState::clone(&shared.state.read());
|
||||
|
||||
info!(%db_name, %state, "attempting to advance database initialization state");
|
||||
|
||||
match &state {
|
||||
DatabaseState::Initialized(_) => break,
|
||||
DatabaseState::Shutdown(_) => {
|
||||
info!(%db_name, "database in shutdown - aborting initialization");
|
||||
shutdown.cancel();
|
||||
return;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Try to advance to the next state
|
||||
let next_state = tokio::select! {
|
||||
next_state = state.advance(shared) => next_state,
|
||||
_ = shutdown.cancelled() => {
|
||||
info!(%db_name, "initialization aborted by shutdown");
|
||||
return
|
||||
}
|
||||
};
|
||||
|
||||
let state_code = next_state.state_code();
|
||||
let maybe_error = next_state.error().cloned();
|
||||
// Commit the next state
|
||||
{
|
||||
let mut state = shared.state.write();
|
||||
info!(%db_name, from=%state, to=%next_state, "database initialization state transition");
|
||||
|
||||
*state.unfreeze(handle) = next_state;
|
||||
shared.state_notify.notify_waiters();
|
||||
}
|
||||
|
||||
match maybe_error {
|
||||
Some(error) => {
|
||||
// exponential backoff w/ jitter, decorrelated
|
||||
// see https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
|
||||
backoff = Duration::from_secs_f64(
|
||||
MAX_BACKOFF.as_secs_f64().min(
|
||||
thread_rng()
|
||||
.gen_range(INIT_BACKOFF.as_secs_f64()..(backoff.as_secs_f64() * 3.0)),
|
||||
),
|
||||
);
|
||||
|
||||
error!(
|
||||
%db_name,
|
||||
%error,
|
||||
state=%state_code,
|
||||
backoff_secs = backoff.as_secs_f64(),
|
||||
"database in error state - backing off initialization"
|
||||
);
|
||||
|
||||
// Wait for timeout or shutdown signal
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(backoff) => {},
|
||||
_ = shutdown.cancelled() => {}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// reset backoff
|
||||
backoff = INIT_BACKOFF;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create fresh database without any any state. Returns its location in object storage
|
||||
/// for saving in the server config file.
|
||||
pub async fn create_empty_db_in_object_store(
|
||||
application: Arc<ApplicationState>,
|
||||
uuid: Uuid,
|
||||
provided_rules: ProvidedDatabaseRules,
|
||||
server_id: ServerId,
|
||||
) -> Result<String, InitError> {
|
||||
let db_name = provided_rules.db_name().clone();
|
||||
let iox_object_store = Arc::new(
|
||||
match IoxObjectStore::create(Arc::clone(&application.object_store()), uuid).await {
|
||||
Ok(ios) => ios,
|
||||
Err(source) => return Err(InitError::IoxObjectStoreError { source }),
|
||||
},
|
||||
);
|
||||
|
||||
let database_location = iox_object_store.root_path();
|
||||
|
||||
application
|
||||
.config_provider()
|
||||
.create_owner_info(server_id, uuid)
|
||||
.await
|
||||
.context(CreatingOwnerInfoSnafu)?;
|
||||
|
||||
application
|
||||
.config_provider()
|
||||
.store_rules(uuid, &provided_rules)
|
||||
.await
|
||||
.context(SavingRulesSnafu)?;
|
||||
|
||||
create_preserved_catalog(
|
||||
&db_name,
|
||||
iox_object_store,
|
||||
Arc::clone(application.metric_registry()),
|
||||
Arc::clone(application.time_provider()),
|
||||
true,
|
||||
)
|
||||
.await
|
||||
.context(CannotCreatePreservedCatalogSnafu)?;
|
||||
|
||||
Ok(database_location)
|
||||
}
|
||||
|
||||
/// Create an claimed database without any state. Returns its
|
||||
/// location in object storage for saving in the server config
|
||||
/// file.
|
||||
///
|
||||
/// if `force` is true, a missing owner info or owner info that is
|
||||
/// for the wrong server id are ignored (do not cause errors)
|
||||
pub async fn claim_database_in_object_store(
|
||||
application: Arc<ApplicationState>,
|
||||
db_name: &DatabaseName<'static>,
|
||||
uuid: Uuid,
|
||||
server_id: ServerId,
|
||||
force: bool,
|
||||
) -> Result<String, InitError> {
|
||||
info!(%db_name, %uuid, %force, "claiming database");
|
||||
|
||||
let iox_object_store = IoxObjectStore::load(Arc::clone(&application.object_store()), uuid)
|
||||
.await
|
||||
.context(IoxObjectStoreSnafu)?;
|
||||
|
||||
let owner_info = application
|
||||
.config_provider()
|
||||
.fetch_owner_info(server_id, uuid)
|
||||
.await
|
||||
.context(FetchingOwnerInfoSnafu);
|
||||
|
||||
// try to recreate owner_info if force is specified
|
||||
let owner_info = match owner_info {
|
||||
Err(_) if force => {
|
||||
warn!("Attempting to recreate missing owner info due to force");
|
||||
|
||||
application
|
||||
.config_provider()
|
||||
.create_owner_info(server_id, uuid)
|
||||
.await
|
||||
.context(CreatingOwnerInfoSnafu)?;
|
||||
|
||||
application
|
||||
.config_provider()
|
||||
.fetch_owner_info(server_id, uuid)
|
||||
.await
|
||||
.context(FetchingOwnerInfoSnafu)
|
||||
}
|
||||
t => t,
|
||||
}?;
|
||||
|
||||
if owner_info.id != 0 {
|
||||
if !force {
|
||||
return CantClaimDatabaseCurrentlyOwnedSnafu {
|
||||
uuid,
|
||||
server_id: owner_info.id,
|
||||
}
|
||||
.fail();
|
||||
} else {
|
||||
warn!(
|
||||
owner_id = owner_info.id,
|
||||
"Ignoring owner info mismatch due to force"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let database_location = iox_object_store.root_path();
|
||||
application
|
||||
.config_provider()
|
||||
.update_owner_info(Some(server_id), uuid)
|
||||
.await
|
||||
.context(UpdatingOwnerInfoSnafu)?;
|
||||
|
||||
Ok(database_location)
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
//! Database initialization states
|
||||
use crate::ApplicationState;
|
||||
use data_types::{server_id::ServerId, DatabaseName};
|
||||
use internal_types::freezable::Freezable;
|
||||
use iox_object_store::IoxObjectStore;
|
||||
use parking_lot::RwLock;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Notify;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::init::DatabaseState;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Information about where a database is located on object store,
|
||||
/// and how to perform startup activities.
|
||||
pub struct DatabaseConfig {
|
||||
pub name: DatabaseName<'static>,
|
||||
pub server_id: ServerId,
|
||||
pub database_uuid: Uuid,
|
||||
pub wipe_catalog_on_error: bool,
|
||||
pub skip_replay: bool,
|
||||
}
|
||||
|
||||
/// State shared with the `Database` background worker
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct DatabaseShared {
|
||||
/// Configuration provided to the database at startup
|
||||
pub(crate) config: RwLock<DatabaseConfig>,
|
||||
|
||||
/// Application-global state
|
||||
pub(crate) application: Arc<ApplicationState>,
|
||||
|
||||
/// Database object store
|
||||
pub(crate) iox_object_store: Arc<IoxObjectStore>,
|
||||
|
||||
/// The initialization state of the `Database`, wrapped in a
|
||||
/// `Freezable` to ensure there is only one task with an
|
||||
/// outstanding intent to write at any time.
|
||||
pub(crate) state: RwLock<Freezable<DatabaseState>>,
|
||||
|
||||
/// Notify that the database state has changed
|
||||
pub(crate) state_notify: Notify,
|
||||
}
|
2365
server/src/lib.rs
2365
server/src/lib.rs
File diff suppressed because it is too large
Load Diff
|
@ -1,134 +0,0 @@
|
|||
use data_types::{database_rules::DatabaseRules, DatabaseName};
|
||||
use generated_types::{
|
||||
google::{FieldViolation, FieldViolationExt},
|
||||
influxdata::iox::management,
|
||||
};
|
||||
use std::{
|
||||
convert::{TryFrom, TryInto},
|
||||
sync::Arc,
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// The configuration ([`DatabaseRules`]) used to create and update
|
||||
/// databases, both in original and "materialized" (with defaults filled in) form.
|
||||
///
|
||||
/// The rationale for storing both the rules as they were provided
|
||||
/// *and* materialized form is provide the property that if the same
|
||||
/// rules are sent to a database that were previously sent the
|
||||
/// database will still be runing the same configuration. If the
|
||||
/// materialized configuration was stored, and then the defaults were
|
||||
/// changed in a new version of the software, the required property
|
||||
/// would not hold.
|
||||
///
|
||||
/// While this may sound like an esoteric corner case with little real
|
||||
/// world impact, it has non trivial real world implications for
|
||||
/// keeping the configurations of fleets of IOx servers in sync. See
|
||||
/// <https://github.com/influxdata/influxdb_iox/issues/2409> for
|
||||
/// further gory details.
|
||||
///
|
||||
/// A design goal is to keep the notion of "what user provided" as
|
||||
/// isolated as much as possible so only the server crate worries
|
||||
/// about what the user actually provided and the rest of the system
|
||||
/// can use `data_types::database_rules::PersistedDatabaseRules` in
|
||||
/// blissful ignorance of such subtlties
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ProvidedDatabaseRules {
|
||||
/// Full database rules, with all fields set. Derived from
|
||||
/// `original` by applying default values.
|
||||
full: Arc<DatabaseRules>,
|
||||
|
||||
/// Encoded database rules, as provided by the user and as stored
|
||||
/// in the object store (may not have all fields set).
|
||||
original: management::v1::DatabaseRules,
|
||||
}
|
||||
|
||||
impl ProvidedDatabaseRules {
|
||||
// Create a new database with a default database
|
||||
pub fn new_empty(db_name: DatabaseName<'static>) -> Self {
|
||||
let original = management::v1::DatabaseRules {
|
||||
name: db_name.to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Should always be able to create a DBRules with default values
|
||||
let full = Arc::new(original.clone().try_into().expect("creating empty rules"));
|
||||
|
||||
Self { full, original }
|
||||
}
|
||||
|
||||
pub fn new_rules(original: management::v1::DatabaseRules) -> Result<Self, FieldViolation> {
|
||||
let full = Arc::new(original.clone().try_into()?);
|
||||
|
||||
Ok(Self { full, original })
|
||||
}
|
||||
|
||||
/// returns the name of the database in the rules
|
||||
pub fn db_name(&self) -> &DatabaseName<'static> {
|
||||
&self.full.name
|
||||
}
|
||||
|
||||
/// Return the full database rules
|
||||
pub fn rules(&self) -> &Arc<DatabaseRules> {
|
||||
&self.full
|
||||
}
|
||||
|
||||
/// Return the original rules provided to this
|
||||
pub fn original(&self) -> &management::v1::DatabaseRules {
|
||||
&self.original
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PersistedDatabaseRules {
|
||||
uuid: Uuid,
|
||||
provided: ProvidedDatabaseRules,
|
||||
}
|
||||
|
||||
impl PersistedDatabaseRules {
|
||||
pub fn new(uuid: Uuid, provided: ProvidedDatabaseRules) -> Self {
|
||||
Self { uuid, provided }
|
||||
}
|
||||
|
||||
pub fn uuid(&self) -> Uuid {
|
||||
self.uuid
|
||||
}
|
||||
|
||||
pub fn db_name(&self) -> &DatabaseName<'static> {
|
||||
&self.provided.full.name
|
||||
}
|
||||
|
||||
/// Return the full database rules
|
||||
pub fn rules(&self) -> &Arc<DatabaseRules> {
|
||||
&self.provided.full
|
||||
}
|
||||
|
||||
/// Return the original rules provided to this
|
||||
pub fn original(&self) -> &management::v1::DatabaseRules {
|
||||
&self.provided.original
|
||||
}
|
||||
|
||||
/// Convert to its inner representation
|
||||
pub fn into_inner(self) -> (Uuid, ProvidedDatabaseRules) {
|
||||
(self.uuid, self.provided)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<management::v1::PersistedDatabaseRules> for PersistedDatabaseRules {
|
||||
type Error = FieldViolation;
|
||||
|
||||
/// Create a new PersistedDatabaseRules from a grpc message
|
||||
fn try_from(proto: management::v1::PersistedDatabaseRules) -> Result<Self, Self::Error> {
|
||||
let original: management::v1::DatabaseRules = proto
|
||||
.rules
|
||||
.ok_or_else(|| FieldViolation::required("rules"))?;
|
||||
|
||||
let full = Arc::new(original.clone().try_into()?);
|
||||
|
||||
let uuid = Uuid::from_slice(&proto.uuid).scope("uuid")?;
|
||||
|
||||
Ok(Self {
|
||||
uuid,
|
||||
provided: ProvidedDatabaseRules { full, original },
|
||||
})
|
||||
}
|
||||
}
|
|
@ -1,255 +0,0 @@
|
|||
use arrow_util::assert_batches_sorted_eq;
|
||||
use data_types::{
|
||||
chunk_metadata::ChunkStorage,
|
||||
database_rules::{DatabaseRules, LifecycleRules, PartitionTemplate, TemplatePart},
|
||||
delete_predicate::{DeleteExpr, DeletePredicate},
|
||||
server_id::ServerId,
|
||||
timestamp::TimestampRange,
|
||||
DatabaseName,
|
||||
};
|
||||
use db::{
|
||||
test_helpers::{run_query, write_lp},
|
||||
Db,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use query::{QueryChunk, QueryDatabase};
|
||||
use server::{
|
||||
rules::ProvidedDatabaseRules,
|
||||
test_utils::{make_application, make_initialized_server},
|
||||
};
|
||||
use std::{
|
||||
num::{NonZeroU32, NonZeroU64},
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use test_helpers::maybe_start_logging;
|
||||
|
||||
#[tokio::test]
|
||||
async fn delete_predicate_preservation() {
|
||||
maybe_start_logging();
|
||||
|
||||
// ==================== setup ====================
|
||||
let server_id = ServerId::new(NonZeroU32::new(1).unwrap());
|
||||
let db_name = DatabaseName::new("delete_predicate_preservation_test").unwrap();
|
||||
|
||||
let application = make_application();
|
||||
let server = make_initialized_server(server_id, Arc::clone(&application)).await;
|
||||
|
||||
// Test that delete predicates are stored within the preserved catalog
|
||||
|
||||
// ==================== do: create DB ====================
|
||||
// Create a DB given a server id, an object store and a db name
|
||||
|
||||
let rules = DatabaseRules {
|
||||
partition_template: PartitionTemplate {
|
||||
parts: vec![TemplatePart::Column("part".to_string())],
|
||||
},
|
||||
lifecycle_rules: LifecycleRules {
|
||||
catalog_transactions_until_checkpoint: NonZeroU64::new(1).unwrap(),
|
||||
// do not prune transactions files because this tests relies on them
|
||||
catalog_transaction_prune_age: Duration::from_secs(1_000),
|
||||
late_arrive_window_seconds: NonZeroU32::new(1).unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
..DatabaseRules::new(db_name.clone())
|
||||
};
|
||||
|
||||
let database = server
|
||||
.create_database(ProvidedDatabaseRules::new_rules(rules.clone().into()).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
let db = database.initialized_db().unwrap();
|
||||
|
||||
// ==================== do: create chunks ====================
|
||||
let table_name = "cpu";
|
||||
|
||||
// 1: preserved
|
||||
let partition_key = "part_a";
|
||||
write_lp(&db, "cpu,part=a row=10,selector=0i 10");
|
||||
write_lp(&db, "cpu,part=a row=11,selector=1i 11");
|
||||
db.persist_partition(table_name, partition_key, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// 2: RUB
|
||||
let partition_key = "part_b";
|
||||
write_lp(&db, "cpu,part=b row=20,selector=0i 20");
|
||||
write_lp(&db, "cpu,part=b row=21,selector=1i 21");
|
||||
db.compact_partition(table_name, partition_key)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// 3: MUB
|
||||
let _partition_key = "part_c";
|
||||
write_lp(&db, "cpu,part=c row=30,selector=0i 30");
|
||||
write_lp(&db, "cpu,part=c row=31,selector=1i 31");
|
||||
|
||||
// 4: preserved and unloaded
|
||||
let partition_key = "part_d";
|
||||
write_lp(&db, "cpu,part=d row=40,selector=0i 40");
|
||||
write_lp(&db, "cpu,part=d row=41,selector=1i 41");
|
||||
|
||||
let chunk_id = db
|
||||
.persist_partition(table_name, partition_key, true)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.id();
|
||||
|
||||
db.unload_read_buffer(table_name, partition_key, chunk_id)
|
||||
.unwrap();
|
||||
|
||||
// ==================== do: delete ====================
|
||||
let pred = Arc::new(DeletePredicate {
|
||||
range: TimestampRange::new(0, 1_000),
|
||||
exprs: vec![DeleteExpr::new(
|
||||
"selector".to_string(),
|
||||
data_types::delete_predicate::Op::Eq,
|
||||
data_types::delete_predicate::Scalar::I64(1),
|
||||
)],
|
||||
});
|
||||
db.delete("cpu", Arc::clone(&pred)).unwrap();
|
||||
|
||||
// ==================== do: preserve another partition ====================
|
||||
let partition_key = "part_b";
|
||||
db.persist_partition(table_name, partition_key, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// ==================== do: use background worker for a short while ====================
|
||||
let iters_start = db.worker_iterations_delete_predicate_preservation();
|
||||
// time_provider.inc(rules.lifecycle_rules.late_arrive_window());
|
||||
|
||||
let t_0 = Instant::now();
|
||||
loop {
|
||||
let did_delete_predicate_preservation =
|
||||
db.worker_iterations_delete_predicate_preservation() > iters_start;
|
||||
let did_compaction = db.chunk_summaries().into_iter().any(|summary| {
|
||||
(summary.partition_key.as_ref() == "part_c")
|
||||
&& (summary.storage == ChunkStorage::ReadBuffer)
|
||||
});
|
||||
if did_delete_predicate_preservation && did_compaction {
|
||||
break;
|
||||
}
|
||||
assert!(t_0.elapsed() < Duration::from_secs(10));
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
// ==================== check: delete predicates ====================
|
||||
|
||||
let closure_check_delete_predicates = |db: &Arc<Db>| {
|
||||
let db = Arc::clone(db);
|
||||
let pred = pred.clone();
|
||||
|
||||
async move {
|
||||
let chunks = db
|
||||
.chunks(table_name, &Default::default())
|
||||
.await
|
||||
.expect("error getting chunks");
|
||||
for chunk in chunks {
|
||||
let addr = chunk.addr();
|
||||
let partition_key = addr.partition_key.as_ref();
|
||||
if partition_key == "part_b" {
|
||||
// Strictly speaking not required because the chunk was persisted AFTER the delete predicate was
|
||||
// registered so we can get away with materializing it during persistence.
|
||||
continue;
|
||||
}
|
||||
if partition_key == "part_c" {
|
||||
// This partition was compacted, so the delete predicates were materialized.
|
||||
continue;
|
||||
}
|
||||
let predicates = chunk.delete_predicates();
|
||||
assert_eq!(predicates.len(), 1);
|
||||
assert_eq!(predicates[0].as_ref(), pred.as_ref());
|
||||
}
|
||||
}
|
||||
};
|
||||
closure_check_delete_predicates(&db).await;
|
||||
|
||||
// ==================== check: query ====================
|
||||
let expected = vec![
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
"| part | row | selector | time |",
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
"| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"| c | 30 | 0 | 1970-01-01T00:00:00.000000030Z |",
|
||||
"| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
];
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await;
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
|
||||
// ==================== do: re-load DB ====================
|
||||
// Re-create database with same store, serverID, and DB name
|
||||
database.restart().await.unwrap();
|
||||
let db = database.initialized_db().unwrap();
|
||||
|
||||
// ==================== check: delete predicates ====================
|
||||
closure_check_delete_predicates(&db).await;
|
||||
|
||||
// ==================== check: query ====================
|
||||
// NOTE: partition "c" is gone here because it was not written to object store
|
||||
let expected = vec![
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
"| part | row | selector | time |",
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
"| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
];
|
||||
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await;
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
|
||||
database.restart().await.unwrap();
|
||||
|
||||
// ==================== do: remove checkpoint files ====================
|
||||
let iox_object_store = database.iox_object_store();
|
||||
|
||||
let files = iox_object_store
|
||||
.catalog_transaction_files()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_concat()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut deleted_one = false;
|
||||
for file in files {
|
||||
if file.is_checkpoint() {
|
||||
iox_object_store
|
||||
.delete_catalog_transaction_file(&file)
|
||||
.await
|
||||
.unwrap();
|
||||
deleted_one = true;
|
||||
}
|
||||
}
|
||||
assert!(deleted_one);
|
||||
|
||||
// ==================== do: re-load DB ====================
|
||||
// Re-create database with same store, serverID, and DB name
|
||||
database.restart().await.unwrap();
|
||||
let db = database.initialized_db().unwrap();
|
||||
|
||||
// ==================== check: delete predicates ====================
|
||||
closure_check_delete_predicates(&db).await;
|
||||
|
||||
// ==================== check: query ====================
|
||||
// NOTE: partition "c" is gone here because it was not written to object store
|
||||
let _expected = vec![
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
"| part | row | selector | time |",
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
"| a | 10 | 0 | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| b | 20 | 0 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"| d | 40 | 0 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"+------+-----+----------+--------------------------------+",
|
||||
];
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu order by time").await;
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
|
||||
server.shutdown();
|
||||
server.join().await.unwrap();
|
||||
}
|
|
@ -1,230 +0,0 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{
|
||||
delete_predicate::{DeleteExpr, DeletePredicate, Op, Scalar},
|
||||
router::{
|
||||
Matcher, MatcherToShard, QuerySinks, Router as RouterConfig, ShardConfig, ShardId,
|
||||
WriteSink, WriteSinkSet, WriteSinkVariant,
|
||||
},
|
||||
server_id::ServerId,
|
||||
timestamp::TimestampRange,
|
||||
DatabaseName,
|
||||
};
|
||||
use db::{test_helpers::wait_for_tables, Db};
|
||||
use dml::{DmlDelete, DmlOperation, DmlWrite};
|
||||
use generated_types::influxdata::iox::{
|
||||
management::v1::DatabaseRules, write_buffer::v1::WriteBufferConnection,
|
||||
};
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner};
|
||||
use regex::Regex;
|
||||
use router::{router::Router, server::RouterServer};
|
||||
use server::{
|
||||
rules::ProvidedDatabaseRules,
|
||||
test_utils::{make_application, make_initialized_server},
|
||||
Server,
|
||||
};
|
||||
use std::{collections::BTreeMap, num::NonZeroU32, sync::Arc};
|
||||
use write_buffer::mock::MockBufferSharedState;
|
||||
|
||||
/// A distributed IOx topology consisting of a router and a database, separated by a write buffer
|
||||
///
|
||||
/// There is some overlap with `ReplayTest` in `server` and `ServerFixture` in the end-to-end
|
||||
/// tests. The former is primarily concerned with the interaction of replay and persistence,
|
||||
/// whilst the latter is concerned with the behaviour of the process as a whole.
|
||||
///
|
||||
/// `DistributedTest` sits somewhere in the middle, it is not concerned with the details of
|
||||
/// persistence or replay, but is still at a low enough level that it can manipulate the server
|
||||
/// APIs directly and is not restricted to what is exposed over gRPC.
|
||||
///
|
||||
/// It primarily exists to test the routing logic.
|
||||
///
|
||||
struct DistributedTest {
|
||||
router: Arc<Router>,
|
||||
|
||||
consumer: Arc<Server>,
|
||||
consumer_db: Arc<Db>,
|
||||
}
|
||||
|
||||
impl DistributedTest {
|
||||
/// Create a new DistributedTest
|
||||
pub async fn new(db_name: &DatabaseName<'static>) -> Self {
|
||||
let write_buffer_state =
|
||||
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::new(1).unwrap());
|
||||
|
||||
let application = make_application();
|
||||
application
|
||||
.write_buffer_factory()
|
||||
.register_mock("my_mock".to_string(), write_buffer_state);
|
||||
|
||||
let write_buffer_connection = WriteBufferConnection {
|
||||
r#type: "mock".to_string(),
|
||||
connection: "my_mock".to_string(),
|
||||
connection_config: Default::default(),
|
||||
creation_config: None,
|
||||
};
|
||||
|
||||
// Create a router
|
||||
let router_server = RouterServer::for_testing(
|
||||
None,
|
||||
None,
|
||||
Arc::clone(application.time_provider()),
|
||||
Some(Arc::clone(application.write_buffer_factory())),
|
||||
)
|
||||
.await;
|
||||
let router_id = ServerId::new(NonZeroU32::new(1).unwrap());
|
||||
router_server.set_server_id(router_id).unwrap();
|
||||
|
||||
router_server.update_router(RouterConfig {
|
||||
name: db_name.to_string(),
|
||||
write_sharder: ShardConfig {
|
||||
specific_targets: vec![MatcherToShard {
|
||||
matcher: Matcher {
|
||||
table_name_regex: Some(Regex::new(".*").unwrap()),
|
||||
},
|
||||
shard: ShardId::new(1),
|
||||
}],
|
||||
hash_ring: None,
|
||||
},
|
||||
write_sinks: BTreeMap::from([(
|
||||
ShardId::new(1),
|
||||
WriteSinkSet {
|
||||
sinks: vec![WriteSink {
|
||||
sink: WriteSinkVariant::WriteBuffer(
|
||||
write_buffer_connection.clone().try_into().unwrap(),
|
||||
),
|
||||
ignore_errors: false,
|
||||
}],
|
||||
},
|
||||
)]),
|
||||
query_sinks: QuerySinks::default(),
|
||||
});
|
||||
let router = router_server.router(db_name).unwrap();
|
||||
|
||||
// Create a consumer
|
||||
let consumer_id = ServerId::new(NonZeroU32::new(2).unwrap());
|
||||
let consumer = make_initialized_server(consumer_id, Arc::clone(&application)).await;
|
||||
|
||||
let consumer_db = consumer
|
||||
.create_database(
|
||||
ProvidedDatabaseRules::new_rules(DatabaseRules {
|
||||
name: db_name.to_string(),
|
||||
write_buffer_connection: Some(write_buffer_connection.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
.initialized_db()
|
||||
.unwrap();
|
||||
|
||||
Self {
|
||||
router,
|
||||
consumer,
|
||||
consumer_db,
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait for the consumer to have the following tables
|
||||
pub async fn wait_for_tables(&self, expected_tables: &[&str]) {
|
||||
wait_for_tables(&self.consumer_db, expected_tables).await
|
||||
}
|
||||
|
||||
/// Write line protocol
|
||||
pub async fn write(&self, lp: &str) {
|
||||
self.router
|
||||
.write(DmlOperation::Write(DmlWrite::new(
|
||||
self.consumer_db.name().as_ref(),
|
||||
lines_to_batches(lp, 0).unwrap(),
|
||||
Default::default(),
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub async fn delete(&self, delete: DmlDelete) {
|
||||
// TODO: Write to router not Db (#2980)
|
||||
self.router
|
||||
.write(DmlOperation::Delete(delete))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Perform a query and assert the result
|
||||
pub async fn query(&self, query: &str, expected: &[&'static str]) {
|
||||
let ctx = self.consumer_db.new_query_context(None);
|
||||
let physical_plan = SqlQueryPlanner::new().query(query, &ctx).await.unwrap();
|
||||
|
||||
let batches = ctx.collect(physical_plan).await.unwrap();
|
||||
|
||||
assert_batches_eq!(expected, &batches);
|
||||
}
|
||||
|
||||
/// Shuts down the fixture and waits for the servers to exit
|
||||
pub async fn drain(&self) {
|
||||
self.consumer.shutdown();
|
||||
self.consumer.join().await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_buffer_deletes() {
|
||||
let db_name = DatabaseName::new("distributed").unwrap();
|
||||
let fixture = DistributedTest::new(&db_name).await;
|
||||
|
||||
// Write some data
|
||||
fixture.write("foo x=1 1").await;
|
||||
fixture.write("foo x=3 2").await;
|
||||
|
||||
// Send a delete over the write buffer
|
||||
fixture
|
||||
.delete(DmlDelete::new(
|
||||
db_name.as_str(),
|
||||
DeletePredicate {
|
||||
range: TimestampRange::new(0, 20),
|
||||
exprs: vec![DeleteExpr {
|
||||
column: "x".to_string(),
|
||||
op: Op::Eq,
|
||||
scalar: Scalar::I64(1),
|
||||
}],
|
||||
},
|
||||
None,
|
||||
Default::default(),
|
||||
))
|
||||
.await;
|
||||
|
||||
// Use a write to a different table to signal consumption has completed by waiting
|
||||
// for the this new table to exist in the consumer database
|
||||
fixture.write("bar x=2 1").await;
|
||||
|
||||
// Wait for consumer to catch up
|
||||
fixture.wait_for_tables(&["bar", "foo"]).await;
|
||||
|
||||
fixture
|
||||
.query(
|
||||
"select * from foo;",
|
||||
&[
|
||||
"+--------------------------------+---+",
|
||||
"| time | x |",
|
||||
"+--------------------------------+---+",
|
||||
"| 1970-01-01T00:00:00.000000002Z | 3 |",
|
||||
"+--------------------------------+---+",
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.query(
|
||||
"select * from bar;",
|
||||
&[
|
||||
"+--------------------------------+---+",
|
||||
"| time | x |",
|
||||
"+--------------------------------+---+",
|
||||
"| 1970-01-01T00:00:00.000000001Z | 2 |",
|
||||
"+--------------------------------+---+",
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture.drain().await;
|
||||
}
|
|
@ -1,274 +0,0 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{
|
||||
chunk_metadata::ChunkStorage,
|
||||
database_rules::{DatabaseRules, LifecycleRules, PartitionTemplate, TemplatePart},
|
||||
sequence::Sequence,
|
||||
server_id::ServerId,
|
||||
write_buffer::WriteBufferConnection,
|
||||
DatabaseName,
|
||||
};
|
||||
use db::{
|
||||
test_helpers::{run_query, wait_for_tables},
|
||||
Db,
|
||||
};
|
||||
use futures_util::FutureExt;
|
||||
use server::{
|
||||
rules::ProvidedDatabaseRules,
|
||||
test_utils::{make_application, make_initialized_server},
|
||||
};
|
||||
use std::{
|
||||
num::{NonZeroU32, NonZeroUsize},
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use test_helpers::{assert_contains, tracing::TracingCapture};
|
||||
use write_buffer::mock::MockBufferSharedState;
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_buffer_lifecycle() {
|
||||
// Test the interaction between the write buffer and the lifecycle
|
||||
|
||||
let tracing_capture = TracingCapture::new();
|
||||
|
||||
// ==================== setup ====================
|
||||
let server_id = ServerId::new(NonZeroU32::new(1).unwrap());
|
||||
let db_name = DatabaseName::new("delete_predicate_preservation_test").unwrap();
|
||||
|
||||
let application = make_application();
|
||||
|
||||
let mock_shared_state =
|
||||
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::new(1).unwrap());
|
||||
|
||||
// The writes are split into two groups to allow "pausing replay" by playing back from
|
||||
// a MockBufferSharedState with only the first set of writes
|
||||
write_group1(&mock_shared_state);
|
||||
write_group2(&mock_shared_state);
|
||||
|
||||
application
|
||||
.write_buffer_factory()
|
||||
.register_mock("my_mock".to_string(), mock_shared_state.clone());
|
||||
|
||||
let server = make_initialized_server(server_id, Arc::clone(&application)).await;
|
||||
|
||||
let partition_template = PartitionTemplate {
|
||||
parts: vec![TemplatePart::Column("tag_partition_by".to_string())],
|
||||
};
|
||||
|
||||
let write_buffer_connection = WriteBufferConnection {
|
||||
type_: "mock".to_string(),
|
||||
connection: "my_mock".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
//
|
||||
// Phase 1: Verify that consuming from a write buffer will wait for compaction in the event
|
||||
// the hard limit is exceeded
|
||||
//
|
||||
|
||||
// create DB
|
||||
let rules = DatabaseRules {
|
||||
partition_template: partition_template.clone(),
|
||||
lifecycle_rules: LifecycleRules {
|
||||
buffer_size_hard: Some(NonZeroUsize::new(10_000).unwrap()),
|
||||
mub_row_threshold: NonZeroUsize::new(10).unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
write_buffer_connection: Some(write_buffer_connection.clone()),
|
||||
..DatabaseRules::new(db_name.clone())
|
||||
};
|
||||
|
||||
let database = server
|
||||
.create_database(ProvidedDatabaseRules::new_rules(rules.into()).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let db = database.initialized_db().unwrap();
|
||||
|
||||
// after a while the table should exist
|
||||
wait_for_tables(&db, &["table_1", "table_2"]).await;
|
||||
|
||||
// no rows should be dropped
|
||||
let batches = run_query(Arc::clone(&db), "select sum(bar) as n from table_1").await;
|
||||
let expected = vec!["+----+", "| n |", "+----+", "| 25 |", "+----+"];
|
||||
assert_batches_eq!(expected, &batches);
|
||||
|
||||
// check that hard buffer limit was actually hit (otherwise this test is pointless/outdated)
|
||||
assert_contains!(
|
||||
tracing_capture.to_string(),
|
||||
"Hard limit reached while reading from write buffer, waiting for compaction to catch up"
|
||||
);
|
||||
|
||||
// Persist the final write, this will ensure that we have to replay the data for table_1
|
||||
db.persist_partition("table_2", "tag_partition_by_a", true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Only table_2 should be persisted
|
||||
assert_eq!(count_persisted_chunks(&db), 1);
|
||||
|
||||
// Shutdown server
|
||||
server.shutdown();
|
||||
server.join().await.unwrap();
|
||||
|
||||
// Drop so they don't contribute to metrics
|
||||
std::mem::drop(server);
|
||||
std::mem::drop(database);
|
||||
std::mem::drop(db);
|
||||
std::mem::drop(tracing_capture);
|
||||
|
||||
//
|
||||
// Phase 2: Verify that replaying from a write buffer will wait for compaction in the event
|
||||
// the hard limit is exceeded
|
||||
//
|
||||
|
||||
// Recreate server
|
||||
let tracing_capture = TracingCapture::new();
|
||||
|
||||
let server = make_initialized_server(server_id, Arc::clone(&application)).await;
|
||||
let databases = server.databases().unwrap();
|
||||
assert_eq!(databases.len(), 1);
|
||||
|
||||
let database = databases.into_iter().next().unwrap();
|
||||
database.wait_for_init().await.unwrap();
|
||||
let database_uuid = database.uuid();
|
||||
|
||||
let db = database.initialized_db().unwrap();
|
||||
let batches = run_query(Arc::clone(&db), "select sum(bar) as n from table_1").await;
|
||||
let expected = vec!["+----+", "| n |", "+----+", "| 25 |", "+----+"];
|
||||
assert_batches_eq!(expected, &batches);
|
||||
|
||||
assert_contains!(
|
||||
tracing_capture.to_string(),
|
||||
"Hard limit reached while replaying, waiting for compaction to catch up"
|
||||
);
|
||||
|
||||
// Only table_2 should be persisted
|
||||
assert_eq!(count_persisted_chunks(&db), 1);
|
||||
|
||||
server.shutdown();
|
||||
server.join().await.unwrap();
|
||||
|
||||
//
|
||||
// Phase 3: Verify that persistence is disabled during replay
|
||||
//
|
||||
|
||||
// Override rules to set persist row threshold lower on restart
|
||||
let rules = ProvidedDatabaseRules::new_rules(
|
||||
DatabaseRules {
|
||||
partition_template,
|
||||
lifecycle_rules: LifecycleRules {
|
||||
persist: true,
|
||||
late_arrive_window_seconds: NonZeroU32::new(1).unwrap(),
|
||||
persist_row_threshold: NonZeroUsize::new(5).unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
write_buffer_connection: Some(write_buffer_connection),
|
||||
..DatabaseRules::new(db_name.clone())
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
application
|
||||
.config_provider()
|
||||
.store_rules(database_uuid, &rules)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
std::mem::drop(server);
|
||||
std::mem::drop(database);
|
||||
std::mem::drop(db);
|
||||
std::mem::drop(tracing_capture);
|
||||
|
||||
// Clear the write buffer and only write in the first group of writes
|
||||
mock_shared_state.clear_messages(0);
|
||||
write_group1(&mock_shared_state);
|
||||
|
||||
// Restart server - this will load new rules written above
|
||||
let server = make_initialized_server(server_id, Arc::clone(&application)).await;
|
||||
let databases = server.databases().unwrap();
|
||||
assert_eq!(databases.len(), 1);
|
||||
let database = databases.into_iter().next().unwrap();
|
||||
|
||||
// Sleep for a bit to allow the lifecycle policy to run a bit
|
||||
//
|
||||
// During this time replay should still be running as there is insufficient
|
||||
// writes within the write buffer to "complete" replay.
|
||||
//
|
||||
// However, there are sufficient rows to exceed the persist row threshold.
|
||||
//
|
||||
// Therefore, if persist were not disabled by replay, the lifecycle would try
|
||||
// to persist without the full set of writes. This would in turn result
|
||||
// in two separate chunks being persisted for table_1
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
assert!(
|
||||
database.wait_for_init().now_or_never().is_none(),
|
||||
"replay shouldn't have finished as insufficient data"
|
||||
);
|
||||
|
||||
// Write in remainder of data to allow replay to finish
|
||||
write_group2(&mock_shared_state);
|
||||
|
||||
database.wait_for_init().await.unwrap();
|
||||
let db = database.initialized_db().unwrap();
|
||||
|
||||
let start = Instant::now();
|
||||
loop {
|
||||
if count_persisted_chunks(&db) > 1 {
|
||||
// As soon as replay finishes the lifecycle should have persisted everything in
|
||||
// table_1 into a single chunk. We should therefore have two chunks, one for
|
||||
// each of table_1 and table_2
|
||||
assert_eq!(db.chunk_summaries().len(), 2, "persisted during replay!");
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
assert!(
|
||||
start.elapsed() < Duration::from_secs(10),
|
||||
"failed to persist chunk"
|
||||
)
|
||||
}
|
||||
|
||||
server.shutdown();
|
||||
server.join().await.unwrap();
|
||||
}
|
||||
|
||||
/// The first set of writes for the write buffer
|
||||
fn write_group1(write_buffer_state: &MockBufferSharedState) {
|
||||
// setup write buffer
|
||||
// these numbers are handtuned to trigger hard buffer limits w/o making the test too big
|
||||
let n_entries = 50u64;
|
||||
|
||||
for sequence_number in 0..n_entries {
|
||||
let lp = format!(
|
||||
"table_1,tag_partition_by=a foo=\"hello\",bar=1 {}",
|
||||
sequence_number / 2
|
||||
);
|
||||
write_buffer_state.push_lp(Sequence::new(0, sequence_number), &lp);
|
||||
}
|
||||
}
|
||||
|
||||
/// The second set of writes for the write buffer
|
||||
fn write_group2(write_buffer_state: &MockBufferSharedState) {
|
||||
// Write line with timestamp 0 - this forces persistence to persist all
|
||||
// prior writes if the server has read this line
|
||||
write_buffer_state.push_lp(
|
||||
Sequence::new(0, 100),
|
||||
"table_1,tag_partition_by=a foo=\"hello\",bar=1 0",
|
||||
);
|
||||
|
||||
write_buffer_state.push_lp(Sequence::new(0, 101), "table_2,tag_partition_by=a foo=1 0");
|
||||
}
|
||||
|
||||
fn count_persisted_chunks(db: &Db) -> usize {
|
||||
db.chunk_summaries()
|
||||
.into_iter()
|
||||
.filter(|x| {
|
||||
matches!(
|
||||
x.storage,
|
||||
ChunkStorage::ObjectStoreOnly | ChunkStorage::ReadBufferAndObjectStore
|
||||
)
|
||||
})
|
||||
.count()
|
||||
}
|
Loading…
Reference in New Issue