fix: Move write buffer data types to write_buffer crate

2022-05-05 14:00:48 -04:00 · 2022-05-05 14:00:48 -04:00 · 44209faa8e
parent d7304c1114
commit 44209faa8e
8 changed files with 117 additions and 107 deletions
--- a/clap_blocks/src/write_buffer.rs
+++ b/clap_blocks/src/write_buffer.rs
@ -1,11 +1,10 @@
 use data_types::write_buffer::{WriteBufferConnection, WriteBufferCreationConfig};
 use iox_time::SystemProvider;
 use observability_deps::tracing::*;
 use std::{collections::BTreeMap, num::NonZeroU32, path::PathBuf, sync::Arc};
 use tempfile::TempDir;
 use trace::TraceCollector;
 use write_buffer::{
-    config::WriteBufferConfigFactory,
+    config::{WriteBufferConfigFactory, WriteBufferConnection, WriteBufferCreationConfig},
    core::{WriteBufferError, WriteBufferReading, WriteBufferWriting},
 };
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@ -15,5 +15,4 @@ pub mod consistent_hasher;
 pub mod job;
 pub mod partition_metadata;
 pub mod timestamp;
 pub mod write_buffer;
 pub mod write_summary;
--- a/data_types/src/write_buffer.rs
+++ b/data_types/src/write_buffer.rs
@ -1,64 +0,0 @@
 use std::{collections::BTreeMap, num::NonZeroU32};
 pub const DEFAULT_N_SEQUENCERS: u32 = 1;
 /// Configures the use of a write buffer.
 #[derive(Debug, Eq, PartialEq, Clone, Hash)]
 pub struct WriteBufferConnection {
    /// Which type should be used (e.g. "kafka", "mock")
    pub type_: String,
    /// Connection string, depends on [`type_`](Self::type_).
    pub connection: String,
    /// Special configs to be applied when establishing the connection.
    ///
    /// This depends on [`type_`](Self::type_) and can configure aspects like timeouts.
    ///
    /// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
    pub connection_config: BTreeMap<String, String>,
    /// Specifies if the sequencers (e.g. for Kafka in form of a topic) should be automatically created if they do not
    /// existing prior to reading or writing.
    pub creation_config: Option<WriteBufferCreationConfig>,
 }
 impl Default for WriteBufferConnection {
    fn default() -> Self {
        Self {
            type_: "unspecified".to_string(),
            connection: Default::default(),
            connection_config: Default::default(),
            creation_config: Default::default(),
        }
    }
 }
 /// Configs sequencer auto-creation for write buffers.
 ///
 /// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/
 /// [`n_sequencers`](Self::n_sequencers) partitions.
 #[derive(Debug, Eq, PartialEq, Clone, Hash)]
 pub struct WriteBufferCreationConfig {
    /// Number of sequencers.
    ///
    /// How they are implemented depends on [type](WriteBufferConnection::type_), e.g. for Kafka this is mapped to the
    /// number of partitions.
    pub n_sequencers: NonZeroU32,
    /// Special configs to by applied when sequencers are created.
    ///
    /// This depends on [type](WriteBufferConnection::type_) and can setup parameters like retention policy.
    ///
    /// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
    pub options: BTreeMap<String, String>,
 }
 impl Default for WriteBufferCreationConfig {
    fn default() -> Self {
        Self {
            n_sequencers: NonZeroU32::try_from(DEFAULT_N_SEQUENCERS).unwrap(),
            options: Default::default(),
        }
    }
 }
--- a/write_buffer/src/config.rs
+++ b/write_buffer/src/config.rs
@ -7,22 +7,86 @@ use crate::{
        MockBufferForWritingThatAlwaysErrors, MockBufferSharedState,
    },
 };
 use data_types::write_buffer::WriteBufferConnection;
 use iox_time::TimeProvider;
 use parking_lot::RwLock;
 use std::{
    collections::{btree_map::Entry, BTreeMap},
    num::NonZeroU32,
    path::PathBuf,
    sync::Arc,
 };
 use trace::TraceCollector;
 pub const DEFAULT_N_SEQUENCERS: u32 = 1;
 #[derive(Debug, Clone)]
 enum Mock {
    Normal(MockBufferSharedState),
    AlwaysFailing,
 }
 /// Configures the use of a write buffer.
 #[derive(Debug, Eq, PartialEq, Clone, Hash)]
 pub struct WriteBufferConnection {
    /// Which type should be used (e.g. "kafka", "mock")
    pub type_: String,
    /// Connection string, depends on [`type_`](Self::type_).
    pub connection: String,
    /// Special configs to be applied when establishing the connection.
    ///
    /// This depends on [`type_`](Self::type_) and can configure aspects like timeouts.
    ///
    /// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
    pub connection_config: BTreeMap<String, String>,
    /// Specifies if the sequencers (e.g. for Kafka in form of a topic) should be automatically
    /// created if they do not existing prior to reading or writing.
    pub creation_config: Option<WriteBufferCreationConfig>,
 }
 impl Default for WriteBufferConnection {
    fn default() -> Self {
        Self {
            type_: "unspecified".to_string(),
            connection: Default::default(),
            connection_config: Default::default(),
            creation_config: Default::default(),
        }
    }
 }
 /// Configs sequencer auto-creation for write buffers.
 ///
 /// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/
 /// [`n_sequencers`](Self::n_sequencers) partitions.
 #[derive(Debug, Eq, PartialEq, Clone, Hash)]
 pub struct WriteBufferCreationConfig {
    /// Number of sequencers.
    ///
    /// How they are implemented depends on [type](WriteBufferConnection::type_), e.g. for Kafka
    /// this is mapped to the number of partitions.
    pub n_sequencers: NonZeroU32,
    /// Special configs to by applied when sequencers are created.
    ///
    /// This depends on [type](WriteBufferConnection::type_) and can setup parameters like
    /// retention policy.
    ///
    /// Note: This config should be a [`BTreeMap`] to ensure that a stable hash.
    pub options: BTreeMap<String, String>,
 }
 impl Default for WriteBufferCreationConfig {
    fn default() -> Self {
        Self {
            n_sequencers: NonZeroU32::try_from(DEFAULT_N_SEQUENCERS).unwrap(),
            options: Default::default(),
        }
    }
 }
 /// Factory that creates [`WriteBufferReading`] and [`WriteBufferWriting`]
 /// from [`WriteBufferConnection`].
 #[derive(Debug)]
@ -193,7 +257,6 @@ mod tests {
        core::test_utils::random_topic_name, maybe_skip_kafka_integration,
        mock::MockBufferSharedState,
    };
    use data_types::write_buffer::WriteBufferCreationConfig;
    use data_types2::DatabaseName;
    use std::{convert::TryFrom, num::NonZeroU32};
    use tempfile::TempDir;
--- a/write_buffer/src/file.rs
+++ b/write_buffer/src/file.rs
@ -1,11 +1,12 @@
 //! Write buffer that uses files to encode messages.
 //!
-//! This implementation can be used by multiple readers and writers at the same time. It is ideal for local end2end
+//! This implementation can be used by multiple readers and writers at the same time. It is ideal
-//! testing. However it might not perform extremely well when dealing with large messages and (currently) does not
+//! for local end2end testing. However it might not perform extremely well when dealing with large
-//! implement any message pruning.
+//! messages and (currently) does not implement any message pruning.
 //!
 //! # Format
-//! Given a root path, the database name and the number of sequencers, the directory structure looks like this:
+//! Given a root path, the database name and the number of sequencers, the directory structure
 //! looks like this:
 //!
 //! ```text
 //! <root>/<db_name>/
@ -54,50 +55,63 @@
 //! <payload>
 //! ```
 //!
-//! The payload is binary data. The headers contain metadata about it (like timestamp, format, tracing information).
+//! The payload is binary data. The headers contain metadata about it (like timestamp, format,
-//!
+//! tracing information).
 //!
 //! # Implementation Notes
 //!
 //! Some notes about file system functionality that shaped this implementation
 //!
 //! ## Atomic File Creation
 //! It is quite easy to create a file and ensure that it did not exist beforehand using [`open(2)`] together with
 //! `O_CREAT` and `O_EXCL`. However writing actual content to that file requires time and a reader could already see an
 //! incomplete version of that. A workaround is to use a scratchpad file at a temporary location, write the entire
 //! desired content to it and then move the file to the target location. This assumes that the target location and the
 //! file content are independent, e.g. that the file itself does not contain the `sequence_number`. Now we need to find
 //! a way to make this move operation reliable though.
 //!
-//! Files can be renamed using [`rename(2)`]. There is the `RENAME_NOREPLACE` flag that prevents that we silently
+//! It is quite easy to create a file and ensure that it did not exist beforehand using [`open(2)`]
-//! overwrite the target file. This however is only implemented for a handful of filesystems (notable NOT [NFS]). So to
+//! together with `O_CREAT` and `O_EXCL`. However writing actual content to that file requires time
-//! use [`rename(2)`] we would need some additional locking.
+//! and a reader could already see an incomplete version of that. A workaround is to use a
 //! scratchpad file at a temporary location, write the entire desired content to it and then move
 //! the file to the target location. This assumes that the target location and the file content are
 //! independent, e.g. that the file itself does not contain the `sequence_number`. Now we need to
 //! find a way to make this move operation reliable though.
 //!
-//! Then there is [`link(2)`] which creates a new link to an existing file. It explicitly states that the target is
+//! Files can be renamed using [`rename(2)`]. There is the `RENAME_NOREPLACE` flag that prevents
-//! NEVER overwritten. According to <https://unix.stackexchange.com/a/125946> this should even work properly on [NFS].
+//! that we silently overwrite the target file. This however is only implemented for a handful of
-//! We then need to use [`unlink(2)`] to clean the scratchpad file.
+//! filesystems (notable NOT [NFS]). So to use [`rename(2)`] we would need some additional locking.
 //!
 //! Then there is [`link(2)`] which creates a new link to an existing file. It explicitly states
 //! that the target is NEVER overwritten. According to <https://unix.stackexchange.com/a/125946>
 //! this should even work properly on [NFS]. We then need to use [`unlink(2)`] to clean the
 //! scratchpad file.
 //!
 //! ## Atomic Directory Creation
-//! To setup a new sequencer config we need to create the directory structure in an atomic way. Hardlinks don't work for
+//!
-//! directories, but [`symlink(2)`] does and -- like [`link(2)`] -- does not overwrite existing targets.
+//! To setup a new sequencer config we need to create the directory structure in an atomic way.
 //! Hardlinks don't work for directories, but [`symlink(2)`] does and -- like [`link(2)`] -- does
 //! not overwrite existing targets.
 //!
 //! ## File Locking
-//! Instead of atomic operations we could also use file locking. Under Linux there are a few ways this can be archived:
+//!
 //! Instead of atomic operations we could also use file locking. Under Linux there are a few ways
 //! this can be archived:
 //!
 //! - **[`fcntl(2)`] via `F_SETLK`, `F_SETLKW`, `F_GETLK`:** <br />
-//!   Works on [NFS], but is process-bound (aka if you have multiple writers within the same process, only one can
+//!   Works on [NFS], but is process-bound (aka if you have multiple writers within the same
 //!   process, only one can
 //!   acquire the lock).
 //! - **[`fcntl(2)`] via `F_OFD_SETLK`, `F_OFD_SETLKW`, `F_OFD_GETLK`:** <br />
 //!   Works on [NFS] and is file-descriptor-bound.
 //! - **[`flock(2)`]:** <br />
-//!   Works on [NFS] but is technically emulated via [`fcntl(2)`] so the latter should probably be preferred.
+//!   Works on [NFS] but is technically emulated via [`fcntl(2)`] so the latter should probably be
 //!   preferred.
 //!
-//! The biggest issue with file locking is what happens when an operation fails while a lock is being held. Either the
+//! The biggest issue with file locking is what happens when an operation fails while a lock is
-//! resulting state is obviously unfinished (e.g. due to some checksum or size mismatch, due to some missing marker) or
+//! being held. Either the resulting state is obviously unfinished (e.g. due to some checksum or
-//! we would need to implement some form of lock poisoning. Since this can get quite tricky, I have decided that atomic
+//! size mismatch, due to some missing marker) or we would need to implement some form of lock
-//! file and directory operations are easier to reason about.
+//! poisoning. Since this can get quite tricky, I have decided that atomic file and directory
 //! operations are easier to reason about.
 //!
 //! ## Message Metadata
-//! We are NOT using any file-based metadata (like `mtime` or extended attributes) because they are often broken.
+//!
 //! We are NOT using any file-based metadata (like `mtime` or extended attributes) because they are
 //! often broken.
 //!
 //!
 //! [`fcntl(2)`]: https://www.man7.org/linux/man-pages/man2/fcntl.2.html
@ -108,12 +122,13 @@
 //! [`rename(2)`]: https://man7.org/linux/man-pages/man2/rename.2.html
 //! [`symlink(2)`]: https://man7.org/linux/man-pages/man2/symlink.2.html
 //! [`unlink(2)`]: https://man7.org/linux/man-pages/man2/unlink.2.html
 use crate::{
    codec::{ContentType, IoxHeaders},
    config::WriteBufferCreationConfig,
    core::{WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting},
 };
 use async_trait::async_trait;
 use data_types::write_buffer::WriteBufferCreationConfig;
 use data_types2::Sequence;
 use dml::{DmlMeta, DmlOperation};
 use futures::{stream::BoxStream, Stream, StreamExt};
--- a/write_buffer/src/kafka/config.rs
+++ b/write_buffer/src/kafka/config.rs
@ -1,10 +1,8 @@
 use crate::{config::WriteBufferCreationConfig, core::WriteBufferError};
 use std::{collections::BTreeMap, fmt::Display, str::FromStr, time::Duration};
-use data_types::write_buffer::WriteBufferCreationConfig;
+/// Generic client config that is used for consumers, producers as well as admin operations (like
-
+/// "create topic").
 use crate::core::WriteBufferError;
 /// Generic client config that is used for consumers, producers as well as admin operations (like "create topic").
 #[derive(Debug, PartialEq, Eq)]
 pub struct ClientConfig {
    /// Maximum message size in bytes.
--- a/write_buffer/src/kafka/mod.rs
+++ b/write_buffer/src/kafka/mod.rs
@ -4,13 +4,13 @@ use self::{
 };
 use crate::{
    codec::IoxHeaders,
    config::WriteBufferCreationConfig,
    core::{
        WriteBufferError, WriteBufferErrorKind, WriteBufferReading, WriteBufferStreamHandler,
        WriteBufferWriting,
    },
 };
 use async_trait::async_trait;
 use data_types::write_buffer::WriteBufferCreationConfig;
 use data_types2::Sequence;
 use dml::{DmlMeta, DmlOperation};
 use futures::{stream::BoxStream, StreamExt};
--- a/write_buffer/src/mock.rs
+++ b/write_buffer/src/mock.rs
@ -1,8 +1,8 @@
-use crate::core::{
+use crate::{
-    WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting,
+    config::WriteBufferCreationConfig,
    core::{WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting},
 };
 use async_trait::async_trait;
 use data_types::write_buffer::WriteBufferCreationConfig;
 use data_types2::Sequence;
 use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
 use futures::{stream::BoxStream, StreamExt};