fix: Move write buffer data types to write_buffer crate
parent
d7304c1114
commit
44209faa8e
|
@ -1,11 +1,10 @@
|
||||||
use data_types::write_buffer::{WriteBufferConnection, WriteBufferCreationConfig};
|
|
||||||
use iox_time::SystemProvider;
|
use iox_time::SystemProvider;
|
||||||
use observability_deps::tracing::*;
|
use observability_deps::tracing::*;
|
||||||
use std::{collections::BTreeMap, num::NonZeroU32, path::PathBuf, sync::Arc};
|
use std::{collections::BTreeMap, num::NonZeroU32, path::PathBuf, sync::Arc};
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
use trace::TraceCollector;
|
use trace::TraceCollector;
|
||||||
use write_buffer::{
|
use write_buffer::{
|
||||||
config::WriteBufferConfigFactory,
|
config::{WriteBufferConfigFactory, WriteBufferConnection, WriteBufferCreationConfig},
|
||||||
core::{WriteBufferError, WriteBufferReading, WriteBufferWriting},
|
core::{WriteBufferError, WriteBufferReading, WriteBufferWriting},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -15,5 +15,4 @@ pub mod consistent_hasher;
|
||||||
pub mod job;
|
pub mod job;
|
||||||
pub mod partition_metadata;
|
pub mod partition_metadata;
|
||||||
pub mod timestamp;
|
pub mod timestamp;
|
||||||
pub mod write_buffer;
|
|
||||||
pub mod write_summary;
|
pub mod write_summary;
|
||||||
|
|
|
@ -1,64 +0,0 @@
|
||||||
use std::{collections::BTreeMap, num::NonZeroU32};
|
|
||||||
|
|
||||||
pub const DEFAULT_N_SEQUENCERS: u32 = 1;
|
|
||||||
|
|
||||||
/// Configures the use of a write buffer.
|
|
||||||
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
|
|
||||||
pub struct WriteBufferConnection {
|
|
||||||
/// Which type should be used (e.g. "kafka", "mock")
|
|
||||||
pub type_: String,
|
|
||||||
|
|
||||||
/// Connection string, depends on [`type_`](Self::type_).
|
|
||||||
pub connection: String,
|
|
||||||
|
|
||||||
/// Special configs to be applied when establishing the connection.
|
|
||||||
///
|
|
||||||
/// This depends on [`type_`](Self::type_) and can configure aspects like timeouts.
|
|
||||||
///
|
|
||||||
/// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
|
|
||||||
pub connection_config: BTreeMap<String, String>,
|
|
||||||
|
|
||||||
/// Specifies if the sequencers (e.g. for Kafka in form of a topic) should be automatically created if they do not
|
|
||||||
/// existing prior to reading or writing.
|
|
||||||
pub creation_config: Option<WriteBufferCreationConfig>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for WriteBufferConnection {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
type_: "unspecified".to_string(),
|
|
||||||
connection: Default::default(),
|
|
||||||
connection_config: Default::default(),
|
|
||||||
creation_config: Default::default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Configs sequencer auto-creation for write buffers.
|
|
||||||
///
|
|
||||||
/// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/
|
|
||||||
/// [`n_sequencers`](Self::n_sequencers) partitions.
|
|
||||||
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
|
|
||||||
pub struct WriteBufferCreationConfig {
|
|
||||||
/// Number of sequencers.
|
|
||||||
///
|
|
||||||
/// How they are implemented depends on [type](WriteBufferConnection::type_), e.g. for Kafka this is mapped to the
|
|
||||||
/// number of partitions.
|
|
||||||
pub n_sequencers: NonZeroU32,
|
|
||||||
|
|
||||||
/// Special configs to by applied when sequencers are created.
|
|
||||||
///
|
|
||||||
/// This depends on [type](WriteBufferConnection::type_) and can setup parameters like retention policy.
|
|
||||||
///
|
|
||||||
/// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
|
|
||||||
pub options: BTreeMap<String, String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for WriteBufferCreationConfig {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
n_sequencers: NonZeroU32::try_from(DEFAULT_N_SEQUENCERS).unwrap(),
|
|
||||||
options: Default::default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -7,22 +7,86 @@ use crate::{
|
||||||
MockBufferForWritingThatAlwaysErrors, MockBufferSharedState,
|
MockBufferForWritingThatAlwaysErrors, MockBufferSharedState,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use data_types::write_buffer::WriteBufferConnection;
|
|
||||||
use iox_time::TimeProvider;
|
use iox_time::TimeProvider;
|
||||||
use parking_lot::RwLock;
|
use parking_lot::RwLock;
|
||||||
use std::{
|
use std::{
|
||||||
collections::{btree_map::Entry, BTreeMap},
|
collections::{btree_map::Entry, BTreeMap},
|
||||||
|
num::NonZeroU32,
|
||||||
path::PathBuf,
|
path::PathBuf,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
};
|
};
|
||||||
use trace::TraceCollector;
|
use trace::TraceCollector;
|
||||||
|
|
||||||
|
pub const DEFAULT_N_SEQUENCERS: u32 = 1;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
enum Mock {
|
enum Mock {
|
||||||
Normal(MockBufferSharedState),
|
Normal(MockBufferSharedState),
|
||||||
AlwaysFailing,
|
AlwaysFailing,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Configures the use of a write buffer.
|
||||||
|
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
|
||||||
|
pub struct WriteBufferConnection {
|
||||||
|
/// Which type should be used (e.g. "kafka", "mock")
|
||||||
|
pub type_: String,
|
||||||
|
|
||||||
|
/// Connection string, depends on [`type_`](Self::type_).
|
||||||
|
pub connection: String,
|
||||||
|
|
||||||
|
/// Special configs to be applied when establishing the connection.
|
||||||
|
///
|
||||||
|
/// This depends on [`type_`](Self::type_) and can configure aspects like timeouts.
|
||||||
|
///
|
||||||
|
/// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
|
||||||
|
pub connection_config: BTreeMap<String, String>,
|
||||||
|
|
||||||
|
/// Specifies if the sequencers (e.g. for Kafka in form of a topic) should be automatically
|
||||||
|
/// created if they do not existing prior to reading or writing.
|
||||||
|
pub creation_config: Option<WriteBufferCreationConfig>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for WriteBufferConnection {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
type_: "unspecified".to_string(),
|
||||||
|
connection: Default::default(),
|
||||||
|
connection_config: Default::default(),
|
||||||
|
creation_config: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Configs sequencer auto-creation for write buffers.
|
||||||
|
///
|
||||||
|
/// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/
|
||||||
|
/// [`n_sequencers`](Self::n_sequencers) partitions.
|
||||||
|
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
|
||||||
|
pub struct WriteBufferCreationConfig {
|
||||||
|
/// Number of sequencers.
|
||||||
|
///
|
||||||
|
/// How they are implemented depends on [type](WriteBufferConnection::type_), e.g. for Kafka
|
||||||
|
/// this is mapped to the number of partitions.
|
||||||
|
pub n_sequencers: NonZeroU32,
|
||||||
|
|
||||||
|
/// Special configs to by applied when sequencers are created.
|
||||||
|
///
|
||||||
|
/// This depends on [type](WriteBufferConnection::type_) and can setup parameters like
|
||||||
|
/// retention policy.
|
||||||
|
///
|
||||||
|
/// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
|
||||||
|
pub options: BTreeMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for WriteBufferCreationConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
n_sequencers: NonZeroU32::try_from(DEFAULT_N_SEQUENCERS).unwrap(),
|
||||||
|
options: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Factory that creates [`WriteBufferReading`] and [`WriteBufferWriting`]
|
/// Factory that creates [`WriteBufferReading`] and [`WriteBufferWriting`]
|
||||||
/// from [`WriteBufferConnection`].
|
/// from [`WriteBufferConnection`].
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -193,7 +257,6 @@ mod tests {
|
||||||
core::test_utils::random_topic_name, maybe_skip_kafka_integration,
|
core::test_utils::random_topic_name, maybe_skip_kafka_integration,
|
||||||
mock::MockBufferSharedState,
|
mock::MockBufferSharedState,
|
||||||
};
|
};
|
||||||
use data_types::write_buffer::WriteBufferCreationConfig;
|
|
||||||
use data_types2::DatabaseName;
|
use data_types2::DatabaseName;
|
||||||
use std::{convert::TryFrom, num::NonZeroU32};
|
use std::{convert::TryFrom, num::NonZeroU32};
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
//! Write buffer that uses files to encode messages.
|
//! Write buffer that uses files to encode messages.
|
||||||
//!
|
//!
|
||||||
//! This implementation can be used by multiple readers and writers at the same time. It is ideal for local end2end
|
//! This implementation can be used by multiple readers and writers at the same time. It is ideal
|
||||||
//! testing. However it might not perform extremely well when dealing with large messages and (currently) does not
|
//! for local end2end testing. However it might not perform extremely well when dealing with large
|
||||||
//! implement any message pruning.
|
//! messages and (currently) does not implement any message pruning.
|
||||||
//!
|
//!
|
||||||
//! # Format
|
//! # Format
|
||||||
//! Given a root path, the database name and the number of sequencers, the directory structure looks like this:
|
//! Given a root path, the database name and the number of sequencers, the directory structure
|
||||||
|
//! looks like this:
|
||||||
//!
|
//!
|
||||||
//! ```text
|
//! ```text
|
||||||
//! <root>/<db_name>/
|
//! <root>/<db_name>/
|
||||||
|
@ -54,50 +55,63 @@
|
||||||
//! <payload>
|
//! <payload>
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! The payload is binary data. The headers contain metadata about it (like timestamp, format, tracing information).
|
//! The payload is binary data. The headers contain metadata about it (like timestamp, format,
|
||||||
//!
|
//! tracing information).
|
||||||
//!
|
//!
|
||||||
//! # Implementation Notes
|
//! # Implementation Notes
|
||||||
|
//!
|
||||||
//! Some notes about file system functionality that shaped this implementation
|
//! Some notes about file system functionality that shaped this implementation
|
||||||
//!
|
//!
|
||||||
//! ## Atomic File Creation
|
//! ## Atomic File Creation
|
||||||
//! It is quite easy to create a file and ensure that it did not exist beforehand using [`open(2)`] together with
|
|
||||||
//! `O_CREAT` and `O_EXCL`. However writing actual content to that file requires time and a reader could already see an
|
|
||||||
//! incomplete version of that. A workaround is to use a scratchpad file at a temporary location, write the entire
|
|
||||||
//! desired content to it and then move the file to the target location. This assumes that the target location and the
|
|
||||||
//! file content are independent, e.g. that the file itself does not contain the `sequence_number`. Now we need to find
|
|
||||||
//! a way to make this move operation reliable though.
|
|
||||||
//!
|
//!
|
||||||
//! Files can be renamed using [`rename(2)`]. There is the `RENAME_NOREPLACE` flag that prevents that we silently
|
//! It is quite easy to create a file and ensure that it did not exist beforehand using [`open(2)`]
|
||||||
//! overwrite the target file. This however is only implemented for a handful of filesystems (notable NOT [NFS]). So to
|
//! together with `O_CREAT` and `O_EXCL`. However writing actual content to that file requires time
|
||||||
//! use [`rename(2)`] we would need some additional locking.
|
//! and a reader could already see an incomplete version of that. A workaround is to use a
|
||||||
|
//! scratchpad file at a temporary location, write the entire desired content to it and then move
|
||||||
|
//! the file to the target location. This assumes that the target location and the file content are
|
||||||
|
//! independent, e.g. that the file itself does not contain the `sequence_number`. Now we need to
|
||||||
|
//! find a way to make this move operation reliable though.
|
||||||
//!
|
//!
|
||||||
//! Then there is [`link(2)`] which creates a new link to an existing file. It explicitly states that the target is
|
//! Files can be renamed using [`rename(2)`]. There is the `RENAME_NOREPLACE` flag that prevents
|
||||||
//! NEVER overwritten. According to <https://unix.stackexchange.com/a/125946> this should even work properly on [NFS].
|
//! that we silently overwrite the target file. This however is only implemented for a handful of
|
||||||
//! We then need to use [`unlink(2)`] to clean the scratchpad file.
|
//! filesystems (notable NOT [NFS]). So to use [`rename(2)`] we would need some additional locking.
|
||||||
|
//!
|
||||||
|
//! Then there is [`link(2)`] which creates a new link to an existing file. It explicitly states
|
||||||
|
//! that the target is NEVER overwritten. According to <https://unix.stackexchange.com/a/125946>
|
||||||
|
//! this should even work properly on [NFS]. We then need to use [`unlink(2)`] to clean the
|
||||||
|
//! scratchpad file.
|
||||||
//!
|
//!
|
||||||
//! ## Atomic Directory Creation
|
//! ## Atomic Directory Creation
|
||||||
//! To setup a new sequencer config we need to create the directory structure in an atomic way. Hardlinks don't work for
|
//!
|
||||||
//! directories, but [`symlink(2)`] does and -- like [`link(2)`] -- does not overwrite existing targets.
|
//! To setup a new sequencer config we need to create the directory structure in an atomic way.
|
||||||
|
//! Hardlinks don't work for directories, but [`symlink(2)`] does and -- like [`link(2)`] -- does
|
||||||
|
//! not overwrite existing targets.
|
||||||
//!
|
//!
|
||||||
//! ## File Locking
|
//! ## File Locking
|
||||||
//! Instead of atomic operations we could also use file locking. Under Linux there are a few ways this can be archived:
|
//!
|
||||||
|
//! Instead of atomic operations we could also use file locking. Under Linux there are a few ways
|
||||||
|
//! this can be archived:
|
||||||
//!
|
//!
|
||||||
//! - **[`fcntl(2)`] via `F_SETLK`, `F_SETLKW`, `F_GETLK`:** <br />
|
//! - **[`fcntl(2)`] via `F_SETLK`, `F_SETLKW`, `F_GETLK`:** <br />
|
||||||
//! Works on [NFS], but is process-bound (aka if you have multiple writers within the same process, only one can
|
//! Works on [NFS], but is process-bound (aka if you have multiple writers within the same
|
||||||
|
//! process, only one can
|
||||||
//! acquire the lock).
|
//! acquire the lock).
|
||||||
//! - **[`fcntl(2)`] via `F_OFD_SETLK`, `F_OFD_SETLKW`, `F_OFD_GETLK`:** <br />
|
//! - **[`fcntl(2)`] via `F_OFD_SETLK`, `F_OFD_SETLKW`, `F_OFD_GETLK`:** <br />
|
||||||
//! Works on [NFS] and is file-descriptor-bound.
|
//! Works on [NFS] and is file-descriptor-bound.
|
||||||
//! - **[`flock(2)`]:** <br />
|
//! - **[`flock(2)`]:** <br />
|
||||||
//! Works on [NFS] but is technically emulated via [`fcntl(2)`] so the latter should probably be preferred.
|
//! Works on [NFS] but is technically emulated via [`fcntl(2)`] so the latter should probably be
|
||||||
|
//! preferred.
|
||||||
//!
|
//!
|
||||||
//! The biggest issue with file locking is what happens when an operation fails while a lock is being held. Either the
|
//! The biggest issue with file locking is what happens when an operation fails while a lock is
|
||||||
//! resulting state is obviously unfinished (e.g. due to some checksum or size mismatch, due to some missing marker) or
|
//! being held. Either the resulting state is obviously unfinished (e.g. due to some checksum or
|
||||||
//! we would need to implement some form of lock poisoning. Since this can get quite tricky, I have decided that atomic
|
//! size mismatch, due to some missing marker) or we would need to implement some form of lock
|
||||||
//! file and directory operations are easier to reason about.
|
//! poisoning. Since this can get quite tricky, I have decided that atomic file and directory
|
||||||
|
//! operations are easier to reason about.
|
||||||
//!
|
//!
|
||||||
//! ## Message Metadata
|
//! ## Message Metadata
|
||||||
//! We are NOT using any file-based metadata (like `mtime` or extended attributes) because they are often broken.
|
//!
|
||||||
|
//! We are NOT using any file-based metadata (like `mtime` or extended attributes) because they are
|
||||||
|
//! often broken.
|
||||||
//!
|
//!
|
||||||
//!
|
//!
|
||||||
//! [`fcntl(2)`]: https://www.man7.org/linux/man-pages/man2/fcntl.2.html
|
//! [`fcntl(2)`]: https://www.man7.org/linux/man-pages/man2/fcntl.2.html
|
||||||
|
@ -108,12 +122,13 @@
|
||||||
//! [`rename(2)`]: https://man7.org/linux/man-pages/man2/rename.2.html
|
//! [`rename(2)`]: https://man7.org/linux/man-pages/man2/rename.2.html
|
||||||
//! [`symlink(2)`]: https://man7.org/linux/man-pages/man2/symlink.2.html
|
//! [`symlink(2)`]: https://man7.org/linux/man-pages/man2/symlink.2.html
|
||||||
//! [`unlink(2)`]: https://man7.org/linux/man-pages/man2/unlink.2.html
|
//! [`unlink(2)`]: https://man7.org/linux/man-pages/man2/unlink.2.html
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
codec::{ContentType, IoxHeaders},
|
codec::{ContentType, IoxHeaders},
|
||||||
|
config::WriteBufferCreationConfig,
|
||||||
core::{WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting},
|
core::{WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting},
|
||||||
};
|
};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use data_types::write_buffer::WriteBufferCreationConfig;
|
|
||||||
use data_types2::Sequence;
|
use data_types2::Sequence;
|
||||||
use dml::{DmlMeta, DmlOperation};
|
use dml::{DmlMeta, DmlOperation};
|
||||||
use futures::{stream::BoxStream, Stream, StreamExt};
|
use futures::{stream::BoxStream, Stream, StreamExt};
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
|
use crate::{config::WriteBufferCreationConfig, core::WriteBufferError};
|
||||||
use std::{collections::BTreeMap, fmt::Display, str::FromStr, time::Duration};
|
use std::{collections::BTreeMap, fmt::Display, str::FromStr, time::Duration};
|
||||||
|
|
||||||
use data_types::write_buffer::WriteBufferCreationConfig;
|
/// Generic client config that is used for consumers, producers as well as admin operations (like
|
||||||
|
/// "create topic").
|
||||||
use crate::core::WriteBufferError;
|
|
||||||
|
|
||||||
/// Generic client config that is used for consumers, producers as well as admin operations (like "create topic").
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub struct ClientConfig {
|
pub struct ClientConfig {
|
||||||
/// Maximum message size in bytes.
|
/// Maximum message size in bytes.
|
||||||
|
|
|
@ -4,13 +4,13 @@ use self::{
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
codec::IoxHeaders,
|
codec::IoxHeaders,
|
||||||
|
config::WriteBufferCreationConfig,
|
||||||
core::{
|
core::{
|
||||||
WriteBufferError, WriteBufferErrorKind, WriteBufferReading, WriteBufferStreamHandler,
|
WriteBufferError, WriteBufferErrorKind, WriteBufferReading, WriteBufferStreamHandler,
|
||||||
WriteBufferWriting,
|
WriteBufferWriting,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use data_types::write_buffer::WriteBufferCreationConfig;
|
|
||||||
use data_types2::Sequence;
|
use data_types2::Sequence;
|
||||||
use dml::{DmlMeta, DmlOperation};
|
use dml::{DmlMeta, DmlOperation};
|
||||||
use futures::{stream::BoxStream, StreamExt};
|
use futures::{stream::BoxStream, StreamExt};
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use crate::core::{
|
use crate::{
|
||||||
WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting,
|
config::WriteBufferCreationConfig,
|
||||||
|
core::{WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting},
|
||||||
};
|
};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use data_types::write_buffer::WriteBufferCreationConfig;
|
|
||||||
use data_types2::Sequence;
|
use data_types2::Sequence;
|
||||||
use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
|
use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
|
||||||
use futures::{stream::BoxStream, StreamExt};
|
use futures::{stream::BoxStream, StreamExt};
|
||||||
|
|
Loading…
Reference in New Issue