From 49c63d35b157c32efa96fc80662813be7b34eacf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Oct 2021 15:02:12 +0000 Subject: [PATCH 01/17] chore(deps): bump cache_loader_async from 0.1.1 to 0.1.2 Bumps [cache_loader_async](https://github.com/ZeroTwo-Bot/cache-loader-async-rs) from 0.1.1 to 0.1.2. - [Release notes](https://github.com/ZeroTwo-Bot/cache-loader-async-rs/releases) - [Changelog](https://github.com/ZeroTwo-Bot/cache-loader-async-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/ZeroTwo-Bot/cache-loader-async-rs/commits) --- updated-dependencies: - dependency-name: cache_loader_async dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> --- Cargo.lock | 4 ++-- grpc-router/Cargo.toml | 2 +- server/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 400ef7a85b..cb69b830ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -467,9 +467,9 @@ checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" [[package]] name = "cache_loader_async" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c372ea90888b43d6899b58a7831a2f3ea916d95c0042d81255ef4960e1376be" +checksum = "606d302374be324dae8264e59d63952b9d39b5180d85edbfc4a533d4046d5e43" dependencies = [ "futures", "thiserror", diff --git a/grpc-router/Cargo.toml b/grpc-router/Cargo.toml index bab1334eee..9cfc2102bd 100644 --- a/grpc-router/Cargo.toml +++ b/grpc-router/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" [dependencies] bytes = "1.0" -cache_loader_async = {version = "0.1.0", features = ["ttl-cache"] } +cache_loader_async = {version = "0.1.2", features = ["ttl-cache"] } futures = "0.3" observability_deps = { path = "../observability_deps" } paste = "1.0.5" diff --git a/server/Cargo.toml b/server/Cargo.toml index fe011fa570..0d4252d4fe 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -10,7 +10,7 @@ arrow_util = { path = "../arrow_util" } async-trait = "0.1" bytes = "1.0" chrono = "0.4" -cache_loader_async = { version = "0.1.0", features = ["ttl-cache"] } +cache_loader_async = { version = "0.1.2", features = ["ttl-cache"] } crc32fast = "1.2.0" data_types = { path = "../data_types" } datafusion = { path = "../datafusion" } From ad41b74a032c2338114994fe597060553b21be0d Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Mon, 11 Oct 2021 17:12:08 +0200 Subject: [PATCH 02/17] fix: adjust code to `cache_loader_async` 0.1.2 --- grpc-router/src/connection_manager.rs | 2 +- server/src/connection.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/grpc-router/src/connection_manager.rs b/grpc-router/src/connection_manager.rs index b61c5bb991..3ee3471b55 100644 --- a/grpc-router/src/connection_manager.rs +++ b/grpc-router/src/connection_manager.rs @@ -190,7 +190,7 @@ where /// Builds a [`CachingConnectionManager`]. pub fn build(self) -> CachingConnectionManager<T> { let make_client = self.make_client; - let (cache, _) = LoadingCache::with_backing(self.backing, move |connect| async move { + let cache = LoadingCache::with_backing(self.backing, move |connect| async move { (make_client)(connect) .await .map_err(|e| Arc::new(Box::new(e) as _)) diff --git a/server/src/connection.rs b/server/src/connection.rs index d75dc39e34..7f1f081642 100644 --- a/server/src/connection.rs +++ b/server/src/connection.rs @@ -57,7 +57,7 @@ pub enum CacheFillError { impl ConnectionManagerImpl { pub fn new() -> Self { - let (cache, _) = LoadingCache::new(Self::cached_remote_server); + let cache = LoadingCache::new(Self::cached_remote_server); Self { cache } } From 06c2c23322294cf168f70fb03b488c1b8306e928 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Mon, 11 Oct 2021 16:43:05 +0100 Subject: [PATCH 03/17] refactor: create PreservedCatalogConfig struct (#2793) * refactor: create PreservedCatalogConfig struct * chore: fmt Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- parquet_file/src/catalog/cleanup.rs | 45 ++- parquet_file/src/catalog/core.rs | 464 ++++++++++++----------- parquet_file/src/catalog/dump.rs | 49 ++- parquet_file/src/catalog/prune.rs | 37 +- parquet_file/src/catalog/rebuild.rs | 88 +++-- parquet_file/src/catalog/test_helpers.rs | 93 +++-- parquet_file/src/test_utils.rs | 7 + server/src/database.rs | 11 +- server/src/db.rs | 6 +- server/src/db/lifecycle/persist.rs | 17 +- server/src/db/load.rs | 42 +- server/src/lib.rs | 13 +- server/src/utils.rs | 4 +- 13 files changed, 457 insertions(+), 419 deletions(-) diff --git a/parquet_file/src/catalog/cleanup.rs b/parquet_file/src/catalog/cleanup.rs index c942ee969c..35f95857d6 100644 --- a/parquet_file/src/catalog/cleanup.rs +++ b/parquet_file/src/catalog/cleanup.rs @@ -9,6 +9,7 @@ use parking_lot::Mutex; use predicate::delete_predicate::DeletePredicate; use snafu::{ResultExt, Snafu}; +use crate::catalog::core::PreservedCatalogConfig; use crate::catalog::{ core::PreservedCatalog, interface::{ @@ -63,7 +64,7 @@ pub async fn get_unreferenced_parquet_files( // replay catalog transactions to track ALL (even dropped) files that are referenced let (_catalog, state) = PreservedCatalog::load::<TracerCatalogState>( db_name, - Arc::clone(&iox_object_store), + PreservedCatalogConfig::new(Arc::clone(&iox_object_store)), (), ) .await @@ -165,16 +166,16 @@ mod tests { use super::*; use crate::{ catalog::test_helpers::{new_empty, DB_NAME}, - test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize}, + test_utils::{chunk_addr, make_config, make_metadata, TestSize}, }; use std::{collections::HashSet, sync::Arc}; use tokio::sync::RwLock; #[tokio::test] async fn test_cleanup_empty() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config).await; // run clean-up let files = get_unreferenced_parquet_files(DB_NAME, &catalog, 1_000) @@ -185,9 +186,10 @@ mod tests { #[tokio::test] async fn test_cleanup_rules() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; // create some data let mut paths_keep = vec![]; @@ -197,7 +199,7 @@ mod tests { // an ordinary tracked parquet file => keep let (path, metadata) = - make_metadata(&iox_object_store, "foo", chunk_addr(1), TestSize::Full).await; + make_metadata(iox_object_store, "foo", chunk_addr(1), TestSize::Full).await; let metadata = Arc::new(metadata); let info = CatalogParquetInfo { path, @@ -211,7 +213,7 @@ mod tests { // another ordinary tracked parquet file that was added and removed => keep (for time // travel) let (path, metadata) = - make_metadata(&iox_object_store, "foo", chunk_addr(2), TestSize::Full).await; + make_metadata(iox_object_store, "foo", chunk_addr(2), TestSize::Full).await; let metadata = Arc::new(metadata); let info = CatalogParquetInfo { path, @@ -224,7 +226,7 @@ mod tests { // an untracked parquet file => delete let (path, _md) = - make_metadata(&iox_object_store, "foo", chunk_addr(3), TestSize::Full).await; + make_metadata(iox_object_store, "foo", chunk_addr(3), TestSize::Full).await; paths_delete.push(path); transaction.commit().await.unwrap(); @@ -240,7 +242,7 @@ mod tests { delete_files(&catalog, &files).await.unwrap(); // list all files - let all_files = list_all_files(&iox_object_store).await; + let all_files = list_all_files(iox_object_store).await; for p in paths_keep { assert!(dbg!(&all_files).contains(dbg!(&p))); } @@ -251,10 +253,11 @@ mod tests { #[tokio::test] async fn test_cleanup_with_parallel_transaction() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; let lock: RwLock<()> = Default::default(); - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; // try multiple times to provoke a conflict for i in 0..100 { @@ -262,15 +265,14 @@ mod tests { // not trick the cleanup logic to remove the actual file because file paths contains a // UUIDv4 part. if i % 2 == 0 { - make_metadata(&iox_object_store, "foo", chunk_addr(i), TestSize::Full).await; + make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await; } let (path, _) = tokio::join!( async { let guard = lock.read().await; let (path, md) = - make_metadata(&iox_object_store, "foo", chunk_addr(i), TestSize::Full) - .await; + make_metadata(iox_object_store, "foo", chunk_addr(i), TestSize::Full).await; let metadata = Arc::new(md); let info = CatalogParquetInfo { @@ -298,22 +300,23 @@ mod tests { }, ); - let all_files = list_all_files(&iox_object_store).await; + let all_files = list_all_files(iox_object_store).await; assert!(dbg!(all_files).contains(dbg!(&path))); } } #[tokio::test] async fn test_cleanup_max_files() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; // create some files let mut to_remove = HashSet::default(); for chunk_id in 0..3 { let (path, _md) = make_metadata( - &iox_object_store, + iox_object_store, "foo", chunk_addr(chunk_id), TestSize::Full, @@ -330,7 +333,7 @@ mod tests { delete_files(&catalog, &files).await.unwrap(); // should only delete 2 - let all_files = list_all_files(&iox_object_store).await; + let all_files = list_all_files(iox_object_store).await; let leftover: HashSet<_> = all_files.intersection(&to_remove).collect(); assert_eq!(leftover.len(), 1); @@ -342,7 +345,7 @@ mod tests { delete_files(&catalog, &files).await.unwrap(); // should delete remaining file - let all_files = list_all_files(&iox_object_store).await; + let all_files = list_all_files(iox_object_store).await; let leftover: HashSet<_> = all_files.intersection(&to_remove).collect(); assert_eq!(leftover.len(), 0); } diff --git a/parquet_file/src/catalog/core.rs b/parquet_file/src/catalog/core.rs index 65e4a18b34..b375482bc9 100644 --- a/parquet_file/src/catalog/core.rs +++ b/parquet_file/src/catalog/core.rs @@ -163,6 +163,47 @@ pub enum Error { pub type Result<T, E = Error> = std::result::Result<T, E>; +/// Configuration used to create a [`PreservedCatalog`] +#[derive(Debug, Clone)] +pub struct PreservedCatalogConfig { + /// Object store that backs the catalog + pub(crate) iox_object_store: Arc<IoxObjectStore>, + + /// Fixed UUID for testing + pub(crate) fixed_uuid: Option<Uuid>, + + /// Fixed timestamp for testing + pub(crate) fixed_timestamp: Option<DateTime<Utc>>, +} + +impl PreservedCatalogConfig { + pub fn new(iox_object_store: Arc<IoxObjectStore>) -> Self { + Self { + iox_object_store, + fixed_timestamp: None, + fixed_uuid: None, + } + } + + /// Fixed UUID to use for all transactions instead of a fresh UUIDv4 + pub fn with_fixed_uuid(self, uuid: Uuid) -> Self { + Self { + fixed_uuid: Some(uuid), + ..self + } + } + + /// Fixed timestamp to use for all transactions instead of "now" + /// + /// TODO: Replace with TimeProvider (#2722) + pub fn with_fixed_timestamp(self, timestamp: DateTime<Utc>) -> Self { + Self { + fixed_timestamp: Some(timestamp), + ..self + } + } +} + /// In-memory view of the preserved catalog. pub struct PreservedCatalog { // We need an RWLock AND a semaphore, so that readers are NOT blocked during an open @@ -260,53 +301,24 @@ impl PreservedCatalog { Ok(iox_object_store.wipe_catalog().await.context(Write)?) } + /// Deletes the catalog described by the provided config + pub async fn wipe_with_config(config: &PreservedCatalogConfig) -> Result<()> { + Self::wipe(&config.iox_object_store).await + } + /// Create new catalog w/o any data. /// /// An empty transaction will be used to mark the catalog start so that concurrent open but /// still-empty catalogs can easily be detected. pub async fn new_empty<S>( db_name: &str, - iox_object_store: Arc<IoxObjectStore>, + config: PreservedCatalogConfig, state_data: S::EmptyInput, ) -> Result<(Self, S)> where S: CatalogState + Send + Sync, { - Self::new_empty_inner::<S>(db_name, iox_object_store, state_data, None, None).await - } - - /// Same as [`new_empty`](Self::new_empty) but for testing. - pub async fn new_empty_for_testing<S>( - db_name: &str, - iox_object_store: Arc<IoxObjectStore>, - state_data: S::EmptyInput, - fixed_uuid: Uuid, - fixed_timestamp: DateTime<Utc>, - ) -> Result<(Self, S)> - where - S: CatalogState + Send + Sync, - { - Self::new_empty_inner::<S>( - db_name, - iox_object_store, - state_data, - Some(fixed_uuid), - Some(fixed_timestamp), - ) - .await - } - - pub async fn new_empty_inner<S>( - db_name: &str, - iox_object_store: Arc<IoxObjectStore>, - state_data: S::EmptyInput, - fixed_uuid: Option<Uuid>, - fixed_timestamp: Option<DateTime<Utc>>, - ) -> Result<(Self, S)> - where - S: CatalogState + Send + Sync, - { - if Self::exists(&iox_object_store).await? { + if Self::exists(&config.iox_object_store).await? { return Err(Error::AlreadyExists {}); } let state = S::new_empty(db_name, state_data); @@ -314,9 +326,9 @@ impl PreservedCatalog { let catalog = Self { previous_tkey: RwLock::new(None), transaction_semaphore: Semaphore::new(1), - iox_object_store, - fixed_uuid, - fixed_timestamp, + iox_object_store: config.iox_object_store, + fixed_uuid: config.fixed_uuid, + fixed_timestamp: config.fixed_timestamp, }; // add empty transaction @@ -336,7 +348,7 @@ impl PreservedCatalog { /// Transactions before that point are neither verified nor are they required to exist. pub async fn load<S>( db_name: &str, - iox_object_store: Arc<IoxObjectStore>, + config: PreservedCatalogConfig, state_data: S::EmptyInput, ) -> Result<Option<(Self, S)>> where @@ -347,7 +359,8 @@ impl PreservedCatalog { let mut max_revision = None; let mut last_checkpoint = None; - let mut stream = iox_object_store + let mut stream = config + .iox_object_store .catalog_transaction_files() .await .context(Read)?; @@ -426,7 +439,7 @@ impl PreservedCatalog { FileType::Transaction }; OpenTransaction::load_and_apply( - &iox_object_store, + &config.iox_object_store, tkey, &mut state, &last_tkey, @@ -440,9 +453,9 @@ impl PreservedCatalog { Self { previous_tkey: RwLock::new(last_tkey), transaction_semaphore: Semaphore::new(1), - iox_object_store, - fixed_uuid: None, - fixed_timestamp: None, + iox_object_store: config.iox_object_store, + fixed_uuid: config.fixed_uuid, + fixed_timestamp: config.fixed_timestamp, }, state, ))) @@ -1065,66 +1078,68 @@ mod tests { break_catalog_with_weird_version, create_delete_predicate, exists, load_err, load_ok, new_empty, TestCatalogState, DB_NAME, }; - use crate::test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize}; + use crate::test_utils::{ + chunk_addr, make_config, make_iox_object_store, make_metadata, TestSize, + }; #[tokio::test] async fn test_create_empty() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; - assert!(!exists(&iox_object_store).await); - assert!(load_ok(&iox_object_store).await.is_none()); + assert!(!exists(&config.iox_object_store).await); + assert!(load_ok(config.clone()).await.is_none()); - new_empty(&iox_object_store).await; + new_empty(config.clone()).await; - assert!(exists(&iox_object_store).await); - assert!(load_ok(&iox_object_store).await.is_some()); + assert!(exists(&config.iox_object_store).await); + assert!(load_ok(config).await.is_some()); } #[tokio::test] async fn test_inmem_commit_semantics() { - let iox_object_store = make_iox_object_store().await; - assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + assert_single_catalog_inmem_works(config).await; } #[tokio::test] async fn test_store_roundtrip() { - let iox_object_store = make_iox_object_store().await; - assert_catalog_roundtrip_works(&iox_object_store).await; + let config = make_config().await; + assert_catalog_roundtrip_works(config).await; } #[tokio::test] async fn test_load_from_empty_store() { - let iox_object_store = make_iox_object_store().await; - assert!(load_ok(&iox_object_store).await.is_none()); + let config = make_config().await; + assert!(load_ok(config).await.is_none()); } #[tokio::test] async fn test_missing_transaction() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // remove transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - checked_delete(&iox_object_store, &path).await; + checked_delete(&config.iox_object_store, &path).await; // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!(err.to_string(), "Missing transaction: 0",); } #[tokio::test] async fn test_transaction_version_mismatch() { - let iox_object_store = make_iox_object_store().await; - assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + assert_single_catalog_inmem_works(config.clone()).await; // break transaction file - let (catalog, _state) = load_ok(&iox_object_store).await.unwrap(); + let (catalog, _state) = load_ok(config.clone()).await.unwrap(); break_catalog_with_weird_version(&catalog).await; // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), format!( @@ -1137,23 +1152,23 @@ mod tests { #[tokio::test] async fn test_wrong_transaction_revision() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.revision_counter = 42; - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Wrong revision counter in transaction file: expected 0 but found 42" @@ -1162,25 +1177,25 @@ mod tests { #[tokio::test] async fn test_wrong_transaction_uuid() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); let uuid_expected = Uuid::from_slice(&proto.uuid).unwrap(); let uuid_actual = Uuid::nil(); proto.uuid = uuid_actual.as_bytes().to_vec().into(); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), format!( @@ -1192,23 +1207,23 @@ mod tests { #[tokio::test] async fn test_missing_transaction_uuid() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.uuid = Bytes::new(); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Error while parsing protobuf: UUID required but not provided" @@ -1217,23 +1232,23 @@ mod tests { #[tokio::test] async fn test_broken_transaction_uuid() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.uuid = Bytes::from("foo"); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Error while parsing protobuf: Cannot parse UUID: invalid bytes length: \ @@ -1243,23 +1258,23 @@ mod tests { #[tokio::test] async fn test_wrong_transaction_link_start() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.previous_uuid = Uuid::nil().as_bytes().to_vec().into(); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Wrong link to previous UUID in revision 0: expected None but found \ @@ -1269,23 +1284,23 @@ mod tests { #[tokio::test] async fn test_wrong_transaction_link_middle() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[1]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.previous_uuid = Uuid::nil().as_bytes().to_vec().into(); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), format!( @@ -1298,23 +1313,23 @@ mod tests { #[tokio::test] async fn test_wrong_transaction_link_broken() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.previous_uuid = Bytes::from("foo"); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Error while parsing protobuf: Cannot parse UUID: invalid bytes length: \ @@ -1324,8 +1339,8 @@ mod tests { #[tokio::test] async fn test_broken_protobuf() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); @@ -1333,13 +1348,14 @@ mod tests { let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); let data = Bytes::from("foo"); - iox_object_store + config + .iox_object_store .put_catalog_transaction_file(&path, data) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Error during protobuf IO: Error during protobuf deserialization: failed to decode \ @@ -1349,8 +1365,8 @@ mod tests { #[tokio::test] async fn test_transaction_handle_debug() { - let iox_object_store = make_iox_object_store().await; - let (catalog, _state) = new_empty(&iox_object_store).await; + let config = make_config().await; + let (catalog, _state) = new_empty(config).await; let mut t = catalog.open_transaction().await; // open transaction @@ -1367,14 +1383,14 @@ mod tests { #[tokio::test] async fn test_fork_transaction() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // re-create transaction file with different UUID assert!(trace.tkeys.len() >= 2); let mut tkey = trace.tkeys[1]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); let old_uuid = tkey.uuid; @@ -1383,12 +1399,12 @@ mod tests { tkey.uuid = new_uuid; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); proto.uuid = new_uuid.as_bytes().to_vec().into(); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; let (uuid1, uuid2) = if old_uuid < new_uuid { (old_uuid, new_uuid) } else { @@ -1406,14 +1422,14 @@ mod tests { #[tokio::test] async fn test_fork_checkpoint() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // create checkpoint file with different UUID assert!(trace.tkeys.len() >= 2); let mut tkey = trace.tkeys[1]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); let old_uuid = tkey.uuid; @@ -1423,12 +1439,12 @@ mod tests { let path = TransactionFilePath::new_checkpoint(tkey.revision_counter, tkey.uuid); proto.uuid = new_uuid.as_bytes().to_vec().into(); proto.encoding = proto::transaction::Encoding::Full.into(); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; let (uuid1, uuid2) = if old_uuid < new_uuid { (old_uuid, new_uuid) } else { @@ -1446,14 +1462,14 @@ mod tests { #[tokio::test] async fn test_unsupported_upgrade() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.actions.push(proto::transaction::Action { @@ -1463,12 +1479,12 @@ mod tests { }, )), }); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Upgrade path not implemented/supported: foo", @@ -1477,23 +1493,23 @@ mod tests { #[tokio::test] async fn test_missing_start_timestamp() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.start_timestamp = None; - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Error while parsing protobuf: Datetime required but missing in serialized \ @@ -1503,26 +1519,26 @@ mod tests { #[tokio::test] async fn test_broken_start_timestamp() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.start_timestamp = Some(generated_types::google::protobuf::Timestamp { seconds: 0, nanos: -1, }); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Error while parsing protobuf: Cannot parse datetime in serialized catalog: \ @@ -1532,23 +1548,23 @@ mod tests { #[tokio::test] async fn test_broken_encoding() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.encoding = -1; - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Error while parsing protobuf: Cannot parse encoding in serialized catalog: \ @@ -1558,23 +1574,23 @@ mod tests { #[tokio::test] async fn test_wrong_encoding_in_transaction_file() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.encoding = proto::transaction::Encoding::Full.into(); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Found wrong encoding in serialized catalog file: Expected Delta but got Full" @@ -1583,23 +1599,23 @@ mod tests { #[tokio::test] async fn test_missing_encoding_in_transaction_file() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); proto.encoding = 0; - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Error while parsing protobuf: Cannot parse encoding in serialized catalog: \ @@ -1609,23 +1625,23 @@ mod tests { #[tokio::test] async fn test_wrong_encoding_in_checkpoint_file() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let proto = load_transaction_proto(&iox_object_store, &path) + let proto = load_transaction_proto(&config.iox_object_store, &path) .await .unwrap(); let path = TransactionFilePath::new_checkpoint(tkey.revision_counter, tkey.uuid); - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(&config.iox_object_store, &path, &proto) .await .unwrap(); // loading catalog should fail now - let err = load_err(&iox_object_store).await; + let err = load_err(config).await; assert_eq!( err.to_string(), "Internal: Found wrong encoding in serialized catalog file: Expected Full but got Delta" @@ -1634,13 +1650,11 @@ mod tests { #[tokio::test] async fn test_checkpoint() { - let iox_object_store = make_iox_object_store().await; - - // use common test as baseline - let mut trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let mut trace = assert_single_catalog_inmem_works(config.clone()).await; // re-open catalog - let (catalog, mut state) = load_ok(&iox_object_store).await.unwrap(); + let (catalog, mut state) = load_ok(config.clone()).await.unwrap(); // create empty transaction w/ checkpoint (the delta transaction file is not required for catalog loading) { @@ -1656,8 +1670,13 @@ mod tests { // create another transaction on-top that adds a file (this transaction will be required to load the full state) { let addr = chunk_addr(1337); - let (path, metadata) = - make_metadata(&iox_object_store, "foo", addr.clone(), TestSize::Full).await; + let (path, metadata) = make_metadata( + &config.iox_object_store, + "foo", + addr.clone(), + TestSize::Full, + ) + .await; let mut transaction = catalog.open_transaction().await; let info = CatalogParquetInfo { @@ -1685,11 +1704,11 @@ mod tests { } let tkey = trace.tkeys[i]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - checked_delete(&iox_object_store, &path).await; + checked_delete(&config.iox_object_store, &path).await; } // load catalog from store and check replayed state - let (catalog, state) = load_ok(&iox_object_store).await.unwrap(); + let (catalog, state) = load_ok(config).await.unwrap(); assert_eq!( catalog.revision_counter(), trace.tkeys.last().unwrap().revision_counter @@ -1702,9 +1721,10 @@ mod tests { #[tokio::test] async fn test_delete_predicates() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; - let (catalog, mut state) = new_empty(&iox_object_store).await; + let (catalog, mut state) = new_empty(config.clone()).await; { let mut t = catalog.open_transaction().await; @@ -1714,7 +1734,7 @@ mod tests { for id in 0..3 { let chunk_addr = chunk_addr(id); let (path, metadata) = - make_metadata(&iox_object_store, "foo", chunk_addr.clone(), TestSize::Full) + make_metadata(iox_object_store, "foo", chunk_addr.clone(), TestSize::Full) .await; let info = CatalogParquetInfo { path, @@ -1742,7 +1762,7 @@ mod tests { } // restoring from the last transaction works - let (_catalog, state_recovered) = load_ok(&iox_object_store).await.unwrap(); + let (_catalog, state_recovered) = load_ok(config.clone()).await.unwrap(); assert_eq!( state.delete_predicates(), state_recovered.delete_predicates() @@ -1760,7 +1780,7 @@ mod tests { } // restoring from the last checkpoint works - let (_catalog, state_recovered) = load_ok(&iox_object_store).await.unwrap(); + let (_catalog, state_recovered) = load_ok(config.clone()).await.unwrap(); assert_eq!( state.delete_predicates(), state_recovered.delete_predicates() @@ -1859,10 +1879,9 @@ mod tests { } } - async fn assert_single_catalog_inmem_works( - iox_object_store: &Arc<IoxObjectStore>, - ) -> TestTrace { - let (catalog, mut state) = new_empty(iox_object_store).await; + async fn assert_single_catalog_inmem_works(config: PreservedCatalogConfig) -> TestTrace { + let iox_object_store = &config.iox_object_store; + let (catalog, mut state) = new_empty(config.clone()).await; // track all the intermediate results let mut trace = TestTrace::new(); @@ -1981,16 +2000,11 @@ mod tests { #[tokio::test] async fn test_create_twice() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; - new_empty(&iox_object_store).await; + new_empty(config.clone()).await; - let res = PreservedCatalog::new_empty::<TestCatalogState>( - DB_NAME, - Arc::clone(&iox_object_store), - (), - ) - .await; + let res = PreservedCatalog::new_empty::<TestCatalogState>(DB_NAME, config, ()).await; assert_eq!(res.unwrap_err().to_string(), "Catalog already exists"); } @@ -2003,48 +2017,48 @@ mod tests { #[tokio::test] async fn test_wipe_normal() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; // create a real catalog - assert_single_catalog_inmem_works(&iox_object_store).await; + assert_single_catalog_inmem_works(config.clone()).await; // wipe - PreservedCatalog::wipe(&iox_object_store).await.unwrap(); + PreservedCatalog::wipe(iox_object_store).await.unwrap(); // `exists` and `load` both report "no data" - assert!(!exists(&iox_object_store).await); - assert!(load_ok(&iox_object_store).await.is_none()); + assert!(!exists(&config.iox_object_store).await); + assert!(load_ok(config.clone()).await.is_none()); // can create new catalog - new_empty(&iox_object_store).await; + new_empty(config).await; } #[tokio::test] async fn test_wipe_broken_catalog() { - let iox_object_store = make_iox_object_store().await; - - // create a real catalog - assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; + assert_single_catalog_inmem_works(config.clone()).await; // break - let (catalog, _state) = load_ok(&iox_object_store).await.unwrap(); + let (catalog, _state) = load_ok(config.clone()).await.unwrap(); break_catalog_with_weird_version(&catalog).await; // wipe - PreservedCatalog::wipe(&iox_object_store).await.unwrap(); + PreservedCatalog::wipe(iox_object_store).await.unwrap(); // `exists` and `load` both report "no data" - assert!(!exists(&iox_object_store).await); - assert!(load_ok(&iox_object_store).await.is_none()); + assert!(!exists(&config.iox_object_store).await); + assert!(load_ok(config.clone()).await.is_none()); // can create new catalog - new_empty(&iox_object_store).await; + new_empty(config).await; } #[tokio::test] async fn test_transaction_handle_revision_counter() { - let iox_object_store = make_iox_object_store().await; - let (catalog, _state) = new_empty(&iox_object_store).await; + let config = make_config().await; + let (catalog, _state) = new_empty(config).await; let t = catalog.open_transaction().await; assert_eq!(t.revision_counter(), 1); @@ -2052,8 +2066,8 @@ mod tests { #[tokio::test] async fn test_transaction_handle_uuid() { - let iox_object_store = make_iox_object_store().await; - let (catalog, _state) = new_empty(&iox_object_store).await; + let config = make_config().await; + let (catalog, _state) = new_empty(config).await; let mut t = catalog.open_transaction().await; t.transaction.as_mut().unwrap().proto.uuid = Uuid::nil().as_bytes().to_vec().into(); @@ -2062,10 +2076,10 @@ mod tests { #[tokio::test] async fn test_find_last_transaction_timestamp_ok() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; - let ts = PreservedCatalog::find_last_transaction_timestamp(&iox_object_store) + let ts = PreservedCatalog::find_last_transaction_timestamp(&config.iox_object_store) .await .unwrap() .unwrap(); @@ -2103,22 +2117,23 @@ mod tests { #[tokio::test] async fn test_find_last_transaction_timestamp_datetime_broken() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); let tkey = trace.tkeys[0]; let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - let mut proto = load_transaction_proto(&iox_object_store, &path) + let mut proto = load_transaction_proto(iox_object_store, &path) .await .unwrap(); proto.start_timestamp = None; - store_transaction_proto(&iox_object_store, &path, &proto) + store_transaction_proto(iox_object_store, &path, &proto) .await .unwrap(); - let ts = PreservedCatalog::find_last_transaction_timestamp(&iox_object_store) + let ts = PreservedCatalog::find_last_transaction_timestamp(iox_object_store) .await .unwrap() .unwrap(); @@ -2144,8 +2159,9 @@ mod tests { #[tokio::test] async fn test_find_last_transaction_timestamp_protobuf_broken() { - let iox_object_store = make_iox_object_store().await; - let trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // break transaction file assert!(trace.tkeys.len() >= 2); @@ -2158,7 +2174,7 @@ mod tests { .await .unwrap(); - let ts = PreservedCatalog::find_last_transaction_timestamp(&iox_object_store) + let ts = PreservedCatalog::find_last_transaction_timestamp(iox_object_store) .await .unwrap() .unwrap(); @@ -2184,10 +2200,11 @@ mod tests { #[tokio::test] async fn test_find_last_transaction_timestamp_checkpoints_only() { - let iox_object_store = make_iox_object_store().await; - let mut trace = assert_single_catalog_inmem_works(&iox_object_store).await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; + let mut trace = assert_single_catalog_inmem_works(config.clone()).await; - let (catalog, state) = load_ok(&iox_object_store).await.unwrap(); + let (catalog, state) = load_ok(config.clone()).await.unwrap(); // create empty transaction w/ checkpoint { @@ -2206,11 +2223,11 @@ mod tests { continue; } let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - checked_delete(&iox_object_store, &path).await; + checked_delete(iox_object_store, &path).await; } drop(catalog); - let ts = PreservedCatalog::find_last_transaction_timestamp(&iox_object_store) + let ts = PreservedCatalog::find_last_transaction_timestamp(iox_object_store) .await .unwrap() .unwrap(); @@ -2234,12 +2251,12 @@ mod tests { ); } - async fn assert_catalog_roundtrip_works(iox_object_store: &Arc<IoxObjectStore>) { + async fn assert_catalog_roundtrip_works(config: PreservedCatalogConfig) { // use single-catalog test case as base - let trace = assert_single_catalog_inmem_works(iox_object_store).await; + let trace = assert_single_catalog_inmem_works(config.clone()).await; // load catalog from store and check replayed state - let (catalog, state) = load_ok(iox_object_store).await.unwrap(); + let (catalog, state) = load_ok(config).await.unwrap(); assert_eq!( catalog.revision_counter(), trace.tkeys.last().unwrap().revision_counter @@ -2252,16 +2269,17 @@ mod tests { #[tokio::test] async fn test_exists_considers_checkpoints() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; - assert!(!exists(&iox_object_store).await); + assert!(!exists(iox_object_store).await); - let (catalog, state) = new_empty(&iox_object_store).await; + let (catalog, state) = new_empty(config.clone()).await; // delete transaction file let tkey = catalog.previous_tkey.read().unwrap(); let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - checked_delete(&iox_object_store, &path).await; + checked_delete(iox_object_store, &path).await; // create empty transaction w/ checkpoint { @@ -2276,11 +2294,11 @@ mod tests { // delete transaction file let tkey = catalog.previous_tkey.read().unwrap(); let path = TransactionFilePath::new_transaction(tkey.revision_counter, tkey.uuid); - checked_delete(&iox_object_store, &path).await; + checked_delete(iox_object_store, &path).await; drop(catalog); - assert!(exists(&iox_object_store).await); - assert!(load_ok(&iox_object_store).await.is_some()); + assert!(exists(iox_object_store).await); + assert!(load_ok(config).await.is_some()); } } diff --git a/parquet_file/src/catalog/dump.rs b/parquet_file/src/catalog/dump.rs index 60eaf413b8..fbb560a700 100644 --- a/parquet_file/src/catalog/dump.rs +++ b/parquet_file/src/catalog/dump.rs @@ -225,30 +225,30 @@ mod tests { interface::CatalogParquetInfo, test_helpers::{TestCatalogState, DB_NAME}, }, - test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize}, + test_utils::{chunk_addr, make_config, make_metadata, TestSize}, }; use chrono::{TimeZone, Utc}; use uuid::Uuid; #[tokio::test] async fn test_dump_default_options() { - let iox_object_store = make_iox_object_store().await; + let config = make_config() + .await + .with_fixed_uuid(Uuid::nil()) + .with_fixed_timestamp(Utc.timestamp(10, 20)); + + let iox_object_store = &config.iox_object_store; // build catalog with some data - let (catalog, _state) = PreservedCatalog::new_empty_for_testing::<TestCatalogState>( - DB_NAME, - Arc::clone(&iox_object_store), - (), - Uuid::nil(), - Utc.timestamp(10, 20), - ) - .await - .unwrap(); + let (catalog, _state) = + PreservedCatalog::new_empty::<TestCatalogState>(DB_NAME, config.clone(), ()) + .await + .unwrap(); { let mut transaction = catalog.open_transaction().await; let (path, metadata) = - make_metadata(&iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await; + make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await; let info = CatalogParquetInfo { path, file_size_bytes: 33, @@ -261,7 +261,7 @@ mod tests { let mut buf = std::io::Cursor::new(Vec::new()); let options = DumpOptions::default(); - dump(&iox_object_store, &mut buf, options).await.unwrap(); + dump(iox_object_store, &mut buf, options).await.unwrap(); let actual = String::from_utf8(buf.into_inner()).unwrap(); let actual = actual.trim(); @@ -352,23 +352,22 @@ File { #[tokio::test] async fn test_dump_show_parsed_data() { - let iox_object_store = make_iox_object_store().await; + let config = make_config() + .await + .with_fixed_uuid(Uuid::nil()) + .with_fixed_timestamp(Utc.timestamp(10, 20)); + let iox_object_store = &config.iox_object_store; // build catalog with some data - let (catalog, _state) = PreservedCatalog::new_empty_for_testing::<TestCatalogState>( - DB_NAME, - Arc::clone(&iox_object_store), - (), - Uuid::nil(), - Utc.timestamp(10, 20), - ) - .await - .unwrap(); + let (catalog, _state) = + PreservedCatalog::new_empty::<TestCatalogState>(DB_NAME, config.clone(), ()) + .await + .unwrap(); { let mut transaction = catalog.open_transaction().await; let (path, metadata) = - make_metadata(&iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await; + make_metadata(iox_object_store, "foo", chunk_addr(0), TestSize::Minimal).await; let info = CatalogParquetInfo { path, file_size_bytes: 33, @@ -386,7 +385,7 @@ File { show_statistics: true, ..Default::default() }; - dump(&iox_object_store, &mut buf, options).await.unwrap(); + dump(iox_object_store, &mut buf, options).await.unwrap(); let actual = String::from_utf8(buf.into_inner()).unwrap(); let actual = actual.trim(); diff --git a/parquet_file/src/catalog/prune.rs b/parquet_file/src/catalog/prune.rs index 240f3ecaaa..666850e317 100644 --- a/parquet_file/src/catalog/prune.rs +++ b/parquet_file/src/catalog/prune.rs @@ -130,7 +130,7 @@ mod tests { interface::CheckpointData, test_helpers::{load_ok, new_empty}, }, - test_utils::make_iox_object_store, + test_utils::{make_config, make_iox_object_store}, }; use super::*; @@ -144,42 +144,44 @@ mod tests { #[tokio::test] async fn test_do_delete_wipe_last_checkpoint() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; - new_empty(&iox_object_store).await; + new_empty(config.clone()).await; - prune_history(Arc::clone(&iox_object_store), Utc::now()) + prune_history(Arc::clone(&config.iox_object_store), Utc::now()) .await .unwrap(); - load_ok(&iox_object_store).await.unwrap(); + load_ok(config).await.unwrap(); } #[tokio::test] async fn test_complex_1() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; create_transaction(&catalog).await; create_transaction_and_checkpoint(&catalog).await; let before = Utc::now(); create_transaction(&catalog).await; - prune_history(Arc::clone(&iox_object_store), before) + prune_history(Arc::clone(iox_object_store), before) .await .unwrap(); assert_eq!( - known_revisions(&iox_object_store).await, + known_revisions(iox_object_store).await, vec![(2, true), (3, false)], ); } #[tokio::test] async fn test_complex_2() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; create_transaction(&catalog).await; create_transaction_and_checkpoint(&catalog).await; create_transaction(&catalog).await; @@ -188,12 +190,12 @@ mod tests { create_transaction_and_checkpoint(&catalog).await; create_transaction(&catalog).await; - prune_history(Arc::clone(&iox_object_store), before) + prune_history(Arc::clone(iox_object_store), before) .await .unwrap(); assert_eq!( - known_revisions(&iox_object_store).await, + known_revisions(iox_object_store).await, vec![ (2, true), (3, false), @@ -207,20 +209,21 @@ mod tests { #[tokio::test] async fn test_keep_all() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; create_transaction(&catalog).await; create_transaction_and_checkpoint(&catalog).await; create_transaction(&catalog).await; let before = Utc::now() - Duration::seconds(1_000); - prune_history(Arc::clone(&iox_object_store), before) + prune_history(Arc::clone(iox_object_store), before) .await .unwrap(); assert_eq!( - known_revisions(&iox_object_store).await, + known_revisions(iox_object_store).await, vec![(0, false), (1, false), (2, false), (2, true), (3, false)], ); } diff --git a/parquet_file/src/catalog/rebuild.rs b/parquet_file/src/catalog/rebuild.rs index 1557be9b54..a47a445d92 100644 --- a/parquet_file/src/catalog/rebuild.rs +++ b/parquet_file/src/catalog/rebuild.rs @@ -6,6 +6,7 @@ use iox_object_store::{IoxObjectStore, ParquetFilePath}; use observability_deps::tracing::error; use snafu::{ResultExt, Snafu}; +use crate::catalog::core::PreservedCatalogConfig; use crate::{ catalog::{ core::PreservedCatalog, @@ -69,7 +70,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>; /// `ignore_metadata_read_failure` to `true` to ignore these cases. pub async fn rebuild_catalog<S>( db_name: &str, - iox_object_store: Arc<IoxObjectStore>, + config: PreservedCatalogConfig, catalog_empty_input: S::EmptyInput, ignore_metadata_read_failure: bool, ) -> Result<(PreservedCatalog, S)> @@ -77,23 +78,20 @@ where S: CatalogState + Debug + Send + Sync, { // collect all revisions from parquet files - let files = collect_files(&iox_object_store, ignore_metadata_read_failure).await?; + let files = collect_files(&config.iox_object_store, ignore_metadata_read_failure).await?; // create new empty catalog - let (catalog, mut state) = PreservedCatalog::new_empty::<S>( - db_name, - Arc::clone(&iox_object_store), - catalog_empty_input, - ) - .await - .context(NewEmptyFailure)?; + let (catalog, mut state) = + PreservedCatalog::new_empty::<S>(db_name, config.clone(), catalog_empty_input) + .await + .context(NewEmptyFailure)?; // create single transaction with all files if !files.is_empty() { let mut transaction = catalog.open_transaction().await; for info in files { state - .add(Arc::clone(&iox_object_store), info.clone()) + .add(Arc::clone(&config.iox_object_store), info.clone()) .context(FileRecordFailure)?; transaction.add_parquet(&info); } @@ -181,8 +179,7 @@ mod tests { metadata::IoxMetadata, storage::{MemWriter, Storage}, test_utils::{ - create_partition_and_database_checkpoint, make_iox_object_store, make_record_batch, - TestSize, + create_partition_and_database_checkpoint, make_config, make_record_batch, TestSize, }, }; use chrono::Utc; @@ -194,19 +191,20 @@ mod tests { #[tokio::test] async fn test_rebuild_successfull() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; let db_name = Arc::from("db1"); // build catalog with some data - let (catalog, mut state) = new_empty(&iox_object_store).await; + let (catalog, mut state) = new_empty(config.clone()).await; { let mut transaction = catalog.open_transaction().await; - let info = create_parquet_file(&db_name, &iox_object_store, ChunkId::new_test(0)).await; + let info = create_parquet_file(&db_name, iox_object_store, ChunkId::new_test(0)).await; state.insert(info.clone()).unwrap(); transaction.add_parquet(&info); - let info = create_parquet_file(&db_name, &iox_object_store, ChunkId::new_test(1)).await; + let info = create_parquet_file(&db_name, iox_object_store, ChunkId::new_test(1)).await; state.insert(info.clone()).unwrap(); transaction.add_parquet(&info); @@ -220,7 +218,7 @@ mod tests { { let mut transaction = catalog.open_transaction().await; - let info = create_parquet_file(&db_name, &iox_object_store, ChunkId::new_test(2)).await; + let info = create_parquet_file(&db_name, iox_object_store, ChunkId::new_test(2)).await; state.insert(info.clone()).unwrap(); transaction.add_parquet(&info); @@ -236,13 +234,12 @@ mod tests { // wipe catalog drop(catalog); - PreservedCatalog::wipe(&iox_object_store).await.unwrap(); + PreservedCatalog::wipe(iox_object_store).await.unwrap(); // rebuild - let (catalog, state) = - rebuild_catalog::<TestCatalogState>(DB_NAME, iox_object_store, (), false) - .await - .unwrap(); + let (catalog, state) = rebuild_catalog::<TestCatalogState>(DB_NAME, config, (), false) + .await + .unwrap(); // check match let paths_actual = { @@ -256,20 +253,21 @@ mod tests { #[tokio::test] async fn test_rebuild_empty() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; // build empty catalog - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; // wipe catalog drop(catalog); - PreservedCatalog::wipe(&iox_object_store).await.unwrap(); + PreservedCatalog::wipe(&config.iox_object_store) + .await + .unwrap(); // rebuild - let (catalog, state) = - rebuild_catalog::<TestCatalogState>(DB_NAME, iox_object_store, (), false) - .await - .unwrap(); + let (catalog, state) = rebuild_catalog::<TestCatalogState>(DB_NAME, config, (), false) + .await + .unwrap(); // check match assert!(state.files().next().is_none()); @@ -278,30 +276,30 @@ mod tests { #[tokio::test] async fn test_rebuild_no_metadata() { - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; + let db_name = Arc::from("db1"); // build catalog with same data - let catalog = new_empty(&iox_object_store).await; + let catalog = new_empty(config.clone()).await; // file w/o metadata - create_parquet_file_without_metadata(&db_name, &iox_object_store, ChunkId::new_test(0)) + create_parquet_file_without_metadata(&db_name, iox_object_store, ChunkId::new_test(0)) .await; // wipe catalog drop(catalog); - PreservedCatalog::wipe(&iox_object_store).await.unwrap(); + PreservedCatalog::wipe(iox_object_store).await.unwrap(); // rebuild (do not ignore errors) - let res = - rebuild_catalog::<TestCatalogState>(DB_NAME, Arc::clone(&iox_object_store), (), false) - .await; + let res = rebuild_catalog::<TestCatalogState>(DB_NAME, config.clone(), (), false).await; assert!(dbg!(res.unwrap_err().to_string()) .starts_with("Cannot read IOx metadata from parquet file")); // rebuild (ignore errors) let (catalog, state) = - rebuild_catalog::<TestCatalogState>(DB_NAME, iox_object_store, (), true) + rebuild_catalog::<TestCatalogState>(DB_NAME, config.clone(), (), true) .await .unwrap(); assert!(state.files().next().is_none()); @@ -318,21 +316,21 @@ mod tests { // transaction files and then check that rebuilt catalog will be gone afterwards. Note the // difference to the `test_rebuild_empty` case where we can indeed proof the existence of a // catalog (even though it is empty aka has no files). - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; + let iox_object_store = &config.iox_object_store; // build catalog with some data (2 transactions + initial empty one) - let (catalog, _state) = new_empty(&iox_object_store).await; + let (catalog, _state) = new_empty(config.clone()).await; assert_eq!(catalog.revision_counter(), 0); // wipe catalog drop(catalog); - PreservedCatalog::wipe(&iox_object_store).await.unwrap(); + PreservedCatalog::wipe(iox_object_store).await.unwrap(); // rebuild - let catalog = - rebuild_catalog::<TestCatalogState>(DB_NAME, Arc::clone(&iox_object_store), (), false) - .await - .unwrap(); + let catalog = rebuild_catalog::<TestCatalogState>(DB_NAME, config.clone(), (), false) + .await + .unwrap(); drop(catalog); // delete transaction files @@ -356,7 +354,7 @@ mod tests { assert!(deleted); // the catalog should be gone because there should have been no checkpoint files remaining - assert!(!exists(&iox_object_store).await); + assert!(!exists(iox_object_store).await); } pub async fn create_parquet_file( diff --git a/parquet_file/src/catalog/test_helpers.rs b/parquet_file/src/catalog/test_helpers.rs index 33f3dcdca7..e53c04740b 100644 --- a/parquet_file/src/catalog/test_helpers.rs +++ b/parquet_file/src/catalog/test_helpers.rs @@ -1,3 +1,4 @@ +use crate::catalog::core::PreservedCatalogConfig; use crate::{ catalog::{ core::PreservedCatalog, @@ -11,7 +12,7 @@ use crate::{ }, }, metadata::IoxParquetMetaData, - test_utils::{chunk_addr, make_iox_object_store, make_metadata, TestSize}, + test_utils::{chunk_addr, make_config, make_metadata, TestSize}, }; use data_types::{chunk_metadata::ChunkId, timestamp::TimestampRange}; use iox_object_store::{IoxObjectStore, ParquetFilePath, TransactionFilePath}; @@ -219,25 +220,21 @@ pub async fn exists(iox_object_store: &Arc<IoxObjectStore>) -> bool { /// Load a `PreservedCatalog` and unwrap, expecting the operation to succeed pub async fn load_ok( - iox_object_store: &Arc<IoxObjectStore>, + config: PreservedCatalogConfig, ) -> Option<(PreservedCatalog, TestCatalogState)> { - PreservedCatalog::load(DB_NAME, Arc::clone(iox_object_store), ()) - .await - .unwrap() + PreservedCatalog::load(DB_NAME, config, ()).await.unwrap() } /// Load a `PreservedCatalog` and unwrap the error, expecting the operation to fail -pub async fn load_err(iox_object_store: &Arc<IoxObjectStore>) -> crate::catalog::core::Error { - PreservedCatalog::load::<TestCatalogState>(DB_NAME, Arc::clone(iox_object_store), ()) +pub async fn load_err(config: PreservedCatalogConfig) -> crate::catalog::core::Error { + PreservedCatalog::load::<TestCatalogState>(DB_NAME, config, ()) .await .unwrap_err() } /// Create a new empty catalog with the TestCatalogState, expecting the operation to succeed -pub async fn new_empty( - iox_object_store: &Arc<IoxObjectStore>, -) -> (PreservedCatalog, TestCatalogState) { - PreservedCatalog::new_empty(DB_NAME, Arc::clone(iox_object_store), ()) +pub async fn new_empty(config: PreservedCatalogConfig) -> (PreservedCatalog, TestCatalogState) { + PreservedCatalog::new_empty(DB_NAME, config, ()) .await .unwrap() } @@ -274,9 +271,9 @@ where F: Fn(&S) -> CheckpointData + Send, { // empty state - let iox_object_store = make_iox_object_store().await; + let config = make_config().await; let (_catalog, mut state) = - PreservedCatalog::new_empty::<S>(DB_NAME, Arc::clone(&iox_object_store), state_data) + PreservedCatalog::new_empty::<S>(DB_NAME, config.clone(), state_data) .await .unwrap(); @@ -291,7 +288,7 @@ where { for chunk_id in 0..5 { let (path, metadata) = make_metadata( - &iox_object_store, + &config.iox_object_store, "ok", chunk_addr(chunk_id), TestSize::Full, @@ -299,7 +296,7 @@ where .await; state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -321,11 +318,16 @@ where // add and remove in the same transaction { - let (path, metadata) = - make_metadata(&iox_object_store, "ok", chunk_addr(5), TestSize::Full).await; + let (path, metadata) = make_metadata( + &config.iox_object_store, + "ok", + chunk_addr(5), + TestSize::Full, + ) + .await; state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -343,7 +345,7 @@ where state.remove(path).unwrap(); state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -356,11 +358,16 @@ where // add, remove, add in the same transaction { - let (path, metadata) = - make_metadata(&iox_object_store, "ok", chunk_addr(6), TestSize::Full).await; + let (path, metadata) = make_metadata( + &config.iox_object_store, + "ok", + chunk_addr(6), + TestSize::Full, + ) + .await; state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -371,7 +378,7 @@ where state.remove(&path).unwrap(); state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -389,7 +396,7 @@ where state.remove(&path).unwrap(); state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -406,11 +413,16 @@ where // TODO: Error handling should disambiguate between chunk collision and filename collision // chunk with same ID already exists (should also not change the metadata) - let (path, metadata) = - make_metadata(&iox_object_store, "fail", chunk_addr(0), TestSize::Full).await; + let (path, metadata) = make_metadata( + &config.iox_object_store, + "fail", + chunk_addr(0), + TestSize::Full, + ) + .await; let err = state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path, file_size_bytes: 33, @@ -431,7 +443,7 @@ where let (_, metadata) = expected_files.get(&ChunkId::new_test(0)).unwrap(); let err = state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { // Intentionally "incorrect" path path: ParquetFilePath::new(&chunk_addr(10)), @@ -446,12 +458,17 @@ where )); // this transaction will still work - let (path, metadata) = - make_metadata(&iox_object_store, "ok", chunk_addr(7), TestSize::Full).await; + let (path, metadata) = make_metadata( + &config.iox_object_store, + "ok", + chunk_addr(7), + TestSize::Full, + ) + .await; let metadata = Arc::new(metadata); state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -464,7 +481,7 @@ where // recently added let err = state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path, file_size_bytes: 33, @@ -495,7 +512,7 @@ where // create two chunks that we can use for delete predicate let chunk_addr_1 = chunk_addr(8); let (path, metadata) = make_metadata( - &iox_object_store, + &config.iox_object_store, "ok", chunk_addr_1.clone(), TestSize::Full, @@ -503,7 +520,7 @@ where .await; state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -515,7 +532,7 @@ where let chunk_addr_2 = chunk_addr(9); let (path, metadata) = make_metadata( - &iox_object_store, + &config.iox_object_store, "ok", chunk_addr_2.clone(), TestSize::Full, @@ -523,7 +540,7 @@ where .await; state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, @@ -548,7 +565,7 @@ where // chunks created afterwards are unaffected let chunk_addr_3 = chunk_addr(10); let (path, metadata) = make_metadata( - &iox_object_store, + &config.iox_object_store, "ok", chunk_addr_3.clone(), TestSize::Full, @@ -556,7 +573,7 @@ where .await; state .add( - Arc::clone(&iox_object_store), + Arc::clone(&config.iox_object_store), CatalogParquetInfo { path: path.clone(), file_size_bytes: 33, diff --git a/parquet_file/src/test_utils.rs b/parquet_file/src/test_utils.rs index 91da2ae474..c97471db7a 100644 --- a/parquet_file/src/test_utils.rs +++ b/parquet_file/src/test_utils.rs @@ -1,3 +1,4 @@ +use crate::catalog::core::PreservedCatalogConfig; use crate::{ chunk::{self, ChunkMetrics, ParquetChunk}, metadata::{IoxMetadata, IoxParquetMetaData}, @@ -867,6 +868,12 @@ pub async fn make_iox_object_store() -> Arc<IoxObjectStore> { ) } +/// Creates a new [`PreservedCatalogConfig`] with an in-memory object store +pub async fn make_config() -> PreservedCatalogConfig { + let iox_object_store = make_iox_object_store().await; + PreservedCatalogConfig::new(iox_object_store) +} + pub fn read_data_from_parquet_data(schema: SchemaRef, parquet_data: Vec<u8>) -> Vec<RecordBatch> { let mut record_batches = vec![]; diff --git a/server/src/database.rs b/server/src/database.rs index 1c45c184f5..c0f525ba4c 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -21,7 +21,7 @@ use internal_types::freezable::Freezable; use iox_object_store::IoxObjectStore; use observability_deps::tracing::{error, info, warn}; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; -use parquet_file::catalog::core::PreservedCatalog; +use parquet_file::catalog::core::{PreservedCatalog, PreservedCatalogConfig}; use persistence_windows::checkpoint::ReplayPlan; use snafu::{ensure, OptionExt, ResultExt, Snafu}; use std::{future::Future, sync::Arc, time::Duration}; @@ -211,9 +211,10 @@ impl Database { .await .context(SavingRules)?; + let config = PreservedCatalogConfig::new(iox_object_store); create_preserved_catalog( db_name, - Arc::clone(&iox_object_store), + config, Arc::clone(application.metric_registry()), true, ) @@ -1053,9 +1054,12 @@ impl DatabaseStateDatabaseObjectStoreFound { .fail(); } + let catalog_config = PreservedCatalogConfig::new(Arc::clone(&self.iox_object_store)); + Ok(DatabaseStateRulesLoaded { provided_rules: Arc::new(rules), iox_object_store: Arc::clone(&self.iox_object_store), + catalog_config, }) } } @@ -1064,6 +1068,7 @@ impl DatabaseStateDatabaseObjectStoreFound { struct DatabaseStateRulesLoaded { provided_rules: Arc<ProvidedDatabaseRules>, iox_object_store: Arc<IoxObjectStore>, + catalog_config: PreservedCatalogConfig, } impl DatabaseStateRulesLoaded { @@ -1074,7 +1079,7 @@ impl DatabaseStateRulesLoaded { ) -> Result<DatabaseStateCatalogLoaded, InitError> { let (preserved_catalog, catalog, replay_plan) = load_or_create_preserved_catalog( shared.config.name.as_str(), - Arc::clone(&self.iox_object_store), + self.catalog_config.clone(), Arc::clone(shared.application.metric_registry()), shared.config.wipe_catalog_on_error, shared.config.skip_replay, diff --git a/server/src/db.rs b/server/src/db.rs index 31225c949e..a61c3fc734 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -1464,6 +1464,7 @@ mod tests { use iox_object_store::ParquetFilePath; use metric::{Attributes, CumulativeGauge, Metric, Observation}; use object_store::ObjectStore; + use parquet_file::catalog::core::PreservedCatalogConfig; use parquet_file::{ catalog::test_helpers::load_ok, metadata::IoxParquetMetaData, @@ -3282,7 +3283,8 @@ mod tests { // ==================== check: empty catalog created ==================== // at this point, an empty preserved catalog exists - let maybe_preserved_catalog = load_ok(&db.iox_object_store).await; + let config = PreservedCatalogConfig::new(Arc::clone(&db.iox_object_store)); + let maybe_preserved_catalog = load_ok(config.clone()).await; assert!(maybe_preserved_catalog.is_some()); // ==================== do: write data to parquet ==================== @@ -3312,7 +3314,7 @@ mod tests { } } paths_expected.sort(); - let (_preserved_catalog, catalog) = load_ok(&db.iox_object_store).await.unwrap(); + let (_preserved_catalog, catalog) = load_ok(config).await.unwrap(); let paths_actual = { let mut tmp: Vec<_> = catalog.files().map(|info| info.path.clone()).collect(); tmp.sort(); diff --git a/server/src/db/lifecycle/persist.rs b/server/src/db/lifecycle/persist.rs index ff5f8e34d7..8f8a7433a3 100644 --- a/server/src/db/lifecycle/persist.rs +++ b/server/src/db/lifecycle/persist.rs @@ -239,6 +239,7 @@ mod tests { }; use lifecycle::{LockableChunk, LockablePartition}; use object_store::ObjectStore; + use parquet_file::catalog::core::PreservedCatalogConfig; use predicate::delete_expr::{DeleteExpr, Op, Scalar}; use query::QueryDatabase; use std::{ @@ -489,7 +490,7 @@ mod tests { }) .build() .await; - let db = Arc::new(test_db.db); + let db = test_db.db; // | foo | delete before persist | delete during persist | // | --- | --------------------- | --------------------- | @@ -566,15 +567,11 @@ mod tests { // check object store delete predicates let metric_registry = Arc::new(metric::Registry::new()); - let (_preserved_catalog, catalog, _replay_plan) = load_or_create_preserved_catalog( - db_name, - db.iox_object_store(), - metric_registry, - false, - false, - ) - .await - .unwrap(); + let config = PreservedCatalogConfig::new(Arc::clone(&db.iox_object_store)); + let (_preserved_catalog, catalog, _replay_plan) = + load_or_create_preserved_catalog(db_name, config, metric_registry, false, false) + .await + .unwrap(); check_closure(&catalog); } } diff --git a/server/src/db/load.rs b/server/src/db/load.rs index e2e4a7615a..c7e252c225 100644 --- a/server/src/db/load.rs +++ b/server/src/db/load.rs @@ -4,6 +4,7 @@ use super::catalog::{chunk::ChunkStage, table::TableSchemaUpsertHandle, Catalog}; use iox_object_store::{IoxObjectStore, ParquetFilePath}; use observability_deps::tracing::{error, info}; +use parquet_file::catalog::core::PreservedCatalogConfig; use parquet_file::{ catalog::{ core::PreservedCatalog, @@ -51,7 +52,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>; /// <https://github.com/influxdata/influxdb_iox/issues/1522> pub async fn load_or_create_preserved_catalog( db_name: &str, - iox_object_store: Arc<IoxObjectStore>, + config: PreservedCatalogConfig, metric_registry: Arc<::metric::Registry>, wipe_on_error: bool, skip_replay: bool, @@ -59,7 +60,7 @@ pub async fn load_or_create_preserved_catalog( // first try to load existing catalogs match PreservedCatalog::load( db_name, - Arc::clone(&iox_object_store), + config.clone(), LoaderEmptyInput::new(Arc::clone(&metric_registry), skip_replay), ) .await @@ -83,13 +84,7 @@ pub async fn load_or_create_preserved_catalog( db_name ); - create_preserved_catalog( - db_name, - Arc::clone(&iox_object_store), - metric_registry, - skip_replay, - ) - .await + create_preserved_catalog(db_name, config, metric_registry, skip_replay).await } Err(e) => { if wipe_on_error { @@ -97,17 +92,11 @@ pub async fn load_or_create_preserved_catalog( // broken => wipe for now (at least during early iterations) error!("cannot load catalog, so wipe it: {}", e); - PreservedCatalog::wipe(&iox_object_store) + PreservedCatalog::wipe_with_config(&config) .await .context(CannotWipeCatalog)?; - create_preserved_catalog( - db_name, - Arc::clone(&iox_object_store), - metric_registry, - skip_replay, - ) - .await + create_preserved_catalog(db_name, config, metric_registry, skip_replay).await } else { Err(Error::CannotLoadCatalog { source: e }) } @@ -120,13 +109,13 @@ pub async fn load_or_create_preserved_catalog( /// This will fail if a preserved catalog already exists. pub async fn create_preserved_catalog( db_name: &str, - iox_object_store: Arc<IoxObjectStore>, + config: PreservedCatalogConfig, metric_registry: Arc<metric::Registry>, skip_replay: bool, ) -> Result<(PreservedCatalog, Catalog, Option<ReplayPlan>)> { let (preserved_catalog, loader) = PreservedCatalog::new_empty( db_name, - Arc::clone(&iox_object_store), + config, LoaderEmptyInput::new(metric_registry, skip_replay), ) .await @@ -324,20 +313,15 @@ mod tests { .await .unwrap(), ); + let config = PreservedCatalogConfig::new(iox_object_store); - let (preserved_catalog, _catalog) = new_empty(&iox_object_store).await; + let (preserved_catalog, _catalog) = new_empty(config.clone()).await; parquet_file::catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog) .await; - load_or_create_preserved_catalog( - &db_name, - iox_object_store, - Default::default(), - true, - false, - ) - .await - .unwrap(); + load_or_create_preserved_catalog(&db_name, config, Default::default(), true, false) + .await + .unwrap(); } fn checkpoint_data_from_loader(loader: &Loader) -> CheckpointData { diff --git a/server/src/lib.rs b/server/src/lib.rs index bf740b1026..8490319a74 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -1251,6 +1251,7 @@ mod tests { path::{parsed::DirsAndFileName, ObjectStorePath}, ObjectStore, ObjectStoreApi, }; + use parquet_file::catalog::core::PreservedCatalogConfig; use parquet_file::catalog::{ core::PreservedCatalog, test_helpers::{load_ok, new_empty}, @@ -2206,9 +2207,11 @@ mod tests { .await .unwrap(); - let (preserved_catalog, _catalog) = load_ok(&catalog_broken.iox_object_store().unwrap()) - .await - .unwrap(); + let (preserved_catalog, _catalog) = load_ok(PreservedCatalogConfig::new( + catalog_broken.iox_object_store().unwrap(), + )) + .await + .unwrap(); parquet_file::catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog) .await; @@ -2287,7 +2290,7 @@ mod tests { .await .unwrap(), ); - new_empty(&non_existing_iox_object_store).await; + new_empty(PreservedCatalogConfig::new(non_existing_iox_object_store)).await; assert_eq!( server .wipe_preserved_catalog(&db_name_non_existing) @@ -2383,7 +2386,7 @@ mod tests { ); // create catalog - new_empty(&iox_object_store).await; + new_empty(PreservedCatalogConfig::new(iox_object_store)).await; // creating database will now result in an error let err = create_simple_database(&server, db_name).await.unwrap_err(); diff --git a/server/src/utils.rs b/server/src/utils.rs index 63cf0ac58c..15afbe25fc 100644 --- a/server/src/utils.rs +++ b/server/src/utils.rs @@ -10,6 +10,7 @@ use data_types::{ }; use iox_object_store::IoxObjectStore; use object_store::ObjectStore; +use parquet_file::catalog::core::PreservedCatalogConfig; use persistence_windows::checkpoint::ReplayPlan; use query::exec::ExecutorConfig; use query::{exec::Executor, QueryDatabase}; @@ -72,6 +73,7 @@ impl TestDbBuilder { }; let iox_object_store = Arc::new(iox_object_store); + let config = PreservedCatalogConfig::new(Arc::clone(&iox_object_store)); // deterministic thread and concurrency count let exec = Arc::new(Executor::new_with_config(ExecutorConfig { @@ -83,7 +85,7 @@ impl TestDbBuilder { let (preserved_catalog, catalog, replay_plan) = load_or_create_preserved_catalog( db_name.as_str(), - Arc::clone(&iox_object_store), + config, Arc::clone(&metric_registry), false, false, From b39e01f7ba4f5d19f92862c5e87b90a40879a6c9 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Mon, 11 Oct 2021 21:40:00 +0100 Subject: [PATCH 04/17] feat: migrate PersistenceWindows to TimeProvider (#2722) (#2798) --- Cargo.lock | 7 +- data_types/Cargo.toml | 1 + data_types/src/write_summary.rs | 12 +- entry/Cargo.toml | 1 + entry/src/entry.rs | 3 +- lifecycle/Cargo.toml | 1 + lifecycle/src/lib.rs | 12 +- lifecycle/src/policy.rs | 61 +- parquet_file/Cargo.toml | 1 + parquet_file/src/catalog/dump.rs | 2 +- parquet_file/src/metadata.rs | 10 +- parquet_file/src/test_utils.rs | 3 +- persistence_windows/Cargo.toml | 2 +- persistence_windows/src/checkpoint.rs | 20 +- .../src/persistence_windows.rs | 657 +++++++++--------- query_tests/src/scenarios.rs | 3 +- server/Cargo.toml | 1 + server/src/application.rs | 36 +- server/src/database.rs | 11 +- server/src/db.rs | 98 ++- server/src/db/lifecycle.rs | 16 +- server/src/db/lifecycle/persist.rs | 53 +- server/src/db/replay.rs | 32 +- server/src/db/system_tables/persistence.rs | 18 +- server/src/lib.rs | 10 +- server/src/utils.rs | 25 + 26 files changed, 583 insertions(+), 513 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb69b830ee..ea2fa655d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -814,6 +814,7 @@ dependencies = [ "regex", "snafu", "test_helpers", + "time 0.1.0", "uuid", ] @@ -1017,6 +1018,7 @@ dependencies = [ "ouroboros", "schema", "snafu", + "time 0.1.0", ] [[package]] @@ -1972,6 +1974,7 @@ dependencies = [ "hashbrown 0.11.2", "internal_types", "observability_deps", + "time 0.1.0", "tokio", "tracker", ] @@ -2713,6 +2716,7 @@ dependencies = [ "tempfile", "test_helpers", "thrift", + "time 0.1.0", "tokio", "tokio-stream", "uuid", @@ -2795,12 +2799,12 @@ checksum = "d9978962f8a4b158e97447a6d09d2d75e206d2994eff056c894019f362b27142" name = "persistence_windows" version = "0.1.0" dependencies = [ - "chrono", "data_types", "internal_types", "observability_deps", "snafu", "test_helpers", + "time 0.1.0", ] [[package]] @@ -3860,6 +3864,7 @@ dependencies = [ "snafu", "snap", "test_helpers", + "time 0.1.0", "tokio", "tokio-util", "trace", diff --git a/data_types/Cargo.toml b/data_types/Cargo.toml index b4a33ddb7d..0a729fbc25 100644 --- a/data_types/Cargo.toml +++ b/data_types/Cargo.toml @@ -15,6 +15,7 @@ observability_deps = { path = "../observability_deps" } percent-encoding = "2.1.0" regex = "1.4" snafu = "0.6" +time = { path = "../time" } uuid = { version = "0.8", features = ["v4"] } [dev-dependencies] # In alphabetical order diff --git a/data_types/src/write_summary.rs b/data_types/src/write_summary.rs index f4943aac39..ccd7e5dd51 100644 --- a/data_types/src/write_summary.rs +++ b/data_types/src/write_summary.rs @@ -1,20 +1,20 @@ use crate::partition_metadata::StatValues; -use chrono::{DateTime, Timelike, Utc}; +use time::Time; /// A description of a set of writes #[derive(Debug, Clone, Eq, PartialEq)] pub struct WriteSummary { /// The wall clock timestamp of the first write in this summary - pub time_of_first_write: DateTime<Utc>, + pub time_of_first_write: Time, /// The wall clock timestamp of the last write in this summary - pub time_of_last_write: DateTime<Utc>, + pub time_of_last_write: Time, /// The minimum row timestamp for data in this summary - pub min_timestamp: DateTime<Utc>, + pub min_timestamp: Time, /// The maximum row timestamp value for data in this summary - pub max_timestamp: DateTime<Utc>, + pub max_timestamp: Time, /// The number of rows in this summary pub row_count: usize, @@ -62,7 +62,7 @@ impl TimestampSummary { } /// Records a timestamp value - pub fn record(&mut self, timestamp: DateTime<Utc>) { + pub fn record(&mut self, timestamp: Time) { self.counts[timestamp.minute() as usize] += 1; self.stats.update(×tamp.timestamp_nanos()) } diff --git a/entry/Cargo.toml b/entry/Cargo.toml index f83bf163ef..36ad18780e 100644 --- a/entry/Cargo.toml +++ b/entry/Cargo.toml @@ -12,6 +12,7 @@ data_types = { path = "../data_types" } # version of the flatbuffers crate flatbuffers = "2" snafu = "0.6" +time = { path = "../time" } influxdb_line_protocol = { path = "../influxdb_line_protocol" } ouroboros = "0.13.0" schema = { path = "../schema" } diff --git a/entry/src/entry.rs b/entry/src/entry.rs index 3088a2552d..ebdefbfdbf 100644 --- a/entry/src/entry.rs +++ b/entry/src/entry.rs @@ -17,6 +17,7 @@ use schema::{ builder::{Error as SchemaBuilderError, SchemaBuilder}, IOxValueType, InfluxColumnType, InfluxFieldType, Schema, TIME_COLUMN_NAME, }; +use time::Time; use crate::entry_fb; @@ -926,7 +927,7 @@ impl<'a> TableBatch<'a> { let timestamps = self.timestamps()?; let mut summary = TimestampSummary::default(); for t in ×tamps { - summary.record(Utc.timestamp_nanos(t)) + summary.record(Time::from_timestamp_nanos(t)) } Ok(summary) } diff --git a/lifecycle/Cargo.toml b/lifecycle/Cargo.toml index 258251c346..3ccf784f13 100644 --- a/lifecycle/Cargo.toml +++ b/lifecycle/Cargo.toml @@ -12,6 +12,7 @@ futures = "0.3" hashbrown = "0.11" internal_types = { path = "../internal_types" } observability_deps = { path = "../observability_deps" } +time = { path = "../time" } tokio = { version = "1.11", features = ["macros", "time"] } tracker = { path = "../tracker" } diff --git a/lifecycle/src/lib.rs b/lifecycle/src/lib.rs index 643feeb30d..c9b2f9ff88 100644 --- a/lifecycle/src/lib.rs +++ b/lifecycle/src/lib.rs @@ -21,6 +21,7 @@ mod guard; pub use guard::*; mod policy; pub use policy::*; +use time::Time; /// A trait that encapsulates the database logic that is automated by `LifecyclePolicy` pub trait LifecycleDb { @@ -81,11 +82,10 @@ pub trait LockablePartition: Sized + std::fmt::Display { /// Returns None if there is a persistence operation in flight, or /// if there are no persistable windows. /// - /// `now` is the wall clock time that should be used to compute how long a given - /// write has been present in memory + /// If `force` is `true` will persist all unpersisted data regardless of arrival time fn prepare_persist( partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>, - now: DateTime<Utc>, + force: bool, ) -> Option<Self::PersistHandle>; /// Split and persist chunks. @@ -157,10 +157,10 @@ pub trait LifecyclePartition { /// /// `now` is the wall clock time that should be used to compute how long a given /// write has been present in memory - fn persistable_row_count(&self, now: DateTime<Utc>) -> usize; + fn persistable_row_count(&self) -> usize; /// Returns the age of the oldest unpersisted write - fn minimum_unpersisted_age(&self) -> Option<DateTime<Utc>>; + fn minimum_unpersisted_age(&self) -> Option<Time>; } /// The lifecycle operates on chunks implementing this trait @@ -188,5 +188,5 @@ pub trait LifecycleChunk { pub trait PersistHandle { /// Any unpersisted chunks containing rows with timestamps less than or equal to this /// must be included in the corresponding `LockablePartition::persist_chunks` call - fn timestamp(&self) -> DateTime<Utc>; + fn timestamp(&self) -> Time; } diff --git a/lifecycle/src/policy.rs b/lifecycle/src/policy.rs index 0b85c6de97..9c8ab64543 100644 --- a/lifecycle/src/policy.rs +++ b/lifecycle/src/policy.rs @@ -12,6 +12,7 @@ use futures::future::BoxFuture; use internal_types::access::AccessMetrics; use observability_deps::tracing::{debug, info, trace, warn}; use std::{convert::TryInto, fmt::Debug}; +use time::Time; use tracker::TaskTracker; /// Number of seconds to wait before retrying a failed lifecycle action @@ -350,14 +351,14 @@ where let persistable_age_seconds: u32 = partition .minimum_unpersisted_age() .and_then(|minimum_unpersisted_age| { - (now - minimum_unpersisted_age) + (now - minimum_unpersisted_age.date_time()) .num_seconds() .try_into() .ok() }) .unwrap_or_default(); - let persistable_row_count = partition.persistable_row_count(now); + let persistable_row_count = partition.persistable_row_count(); debug!(%db_name, %partition, partition_persist_row_count=persistable_row_count, rules_persist_row_count=%rules.persist_row_threshold.get(), @@ -379,7 +380,7 @@ where // Upgrade partition to be able to rotate persistence windows let mut partition = partition.upgrade(); - let persist_handle = match LockablePartition::prepare_persist(&mut partition, now) { + let persist_handle = match LockablePartition::prepare_persist(&mut partition, false) { Some(x) => x, None => { debug!(%db_name, %partition, "no persistable windows or previous outstanding persist"); @@ -624,7 +625,7 @@ fn sort_free_candidates<P>(candidates: &mut Vec<FreeCandidate<'_, P>>) { /// job that is already running). pub fn select_persistable_chunks<P, D>( chunks: &[D], - flush_ts: DateTime<Utc>, + flush_ts: Time, ) -> Result<Vec<LifecycleWriteGuard<'_, P, D>>, bool> where D: LockableChunk<Chunk = P>, @@ -655,7 +656,7 @@ where // Chunk's data is entirely after the time we are flushing // up to, and thus there is reason to include it in the // plan - if chunk.min_timestamp() > flush_ts { + if chunk.min_timestamp() > flush_ts.date_time() { // Ignore chunk for now, but we might need it later to close chunk order gaps debug!( chunk=%chunk.addr(), @@ -725,8 +726,8 @@ mod tests { struct TestPartition { chunks: BTreeMap<ChunkId, (ChunkOrder, Arc<RwLock<TestChunk>>)>, persistable_row_count: usize, - minimum_unpersisted_age: Option<DateTime<Utc>>, - max_persistable_timestamp: Option<DateTime<Utc>>, + minimum_unpersisted_age: Option<Time>, + max_persistable_timestamp: Option<Time>, next_id: u128, } @@ -734,8 +735,8 @@ mod tests { fn with_persistence( self, persistable_row_count: usize, - minimum_unpersisted_age: DateTime<Utc>, - max_persistable_timestamp: DateTime<Utc>, + minimum_unpersisted_age: Time, + max_persistable_timestamp: Time, ) -> Self { Self { chunks: self.chunks, @@ -831,11 +832,11 @@ mod tests { #[derive(Debug)] struct TestPersistHandle { - timestamp: DateTime<Utc>, + timestamp: Time, } impl PersistHandle for TestPersistHandle { - fn timestamp(&self) -> DateTime<Utc> { + fn timestamp(&self) -> Time { self.timestamp } } @@ -920,7 +921,7 @@ mod tests { fn prepare_persist( partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>, - _now: DateTime<Utc>, + _force: bool, ) -> Option<Self::PersistHandle> { Some(TestPersistHandle { timestamp: partition.max_persistable_timestamp.unwrap(), @@ -942,8 +943,9 @@ mod tests { partition.next_id += 1; // The remainder left behind after the split - let new_chunk = TestChunk::new(id, 0, ChunkStorage::ReadBuffer) - .with_min_timestamp(handle.timestamp + chrono::Duration::nanoseconds(1)); + let new_chunk = TestChunk::new(id, 0, ChunkStorage::ReadBuffer).with_min_timestamp( + handle.timestamp.date_time() + chrono::Duration::nanoseconds(1), + ); partition .chunks @@ -1013,11 +1015,11 @@ mod tests { false } - fn persistable_row_count(&self, _now: DateTime<Utc>) -> usize { + fn persistable_row_count(&self) -> usize { self.persistable_row_count } - fn minimum_unpersisted_age(&self) -> Option<DateTime<Utc>> { + fn minimum_unpersisted_age(&self) -> Option<Time> { self.minimum_unpersisted_age } } @@ -1697,6 +1699,7 @@ mod tests { ..Default::default() }; let now = from_secs(0); + let time_now = Time::from_date_time(now); let partitions = vec![ // Insufficient rows and not old enough => don't persist but can compact @@ -1706,7 +1709,7 @@ mod tests { TestChunk::new(ChunkId::new_test(1), 0, ChunkStorage::ReadBuffer) .with_min_timestamp(from_secs(5)), ]) - .with_persistence(10, now, from_secs(20)), + .with_persistence(10, time_now, Time::from_timestamp(20, 0)), // Sufficient rows => persist TestPartition::new(vec![ TestChunk::new(ChunkId::new_test(2), 0, ChunkStorage::ClosedMutableBuffer) @@ -1714,7 +1717,7 @@ mod tests { TestChunk::new(ChunkId::new_test(3), 0, ChunkStorage::ReadBuffer) .with_min_timestamp(from_secs(5)), ]) - .with_persistence(1_000, now, from_secs(20)), + .with_persistence(1_000, time_now, Time::from_timestamp(20, 0)), // Writes too old => persist TestPartition::new(vec![ // Should split open chunks @@ -1725,7 +1728,11 @@ mod tests { TestChunk::new(ChunkId::new_test(6), 0, ChunkStorage::ObjectStoreOnly) .with_min_timestamp(from_secs(5)), ]) - .with_persistence(10, now - chrono::Duration::seconds(10), from_secs(20)), + .with_persistence( + 10, + time_now - Duration::from_secs(10), + Time::from_timestamp(20, 0), + ), // Sufficient rows but conflicting compaction => prevent compaction TestPartition::new(vec![ TestChunk::new(ChunkId::new_test(7), 0, ChunkStorage::ClosedMutableBuffer) @@ -1737,7 +1744,7 @@ mod tests { TestChunk::new(ChunkId::new_test(9), 0, ChunkStorage::ReadBuffer) .with_min_timestamp(from_secs(5)), ]) - .with_persistence(1_000, now, from_secs(20)), + .with_persistence(1_000, time_now, Time::from_timestamp(20, 0)), // Sufficient rows and non-conflicting compaction => persist TestPartition::new(vec![ TestChunk::new(ChunkId::new_test(10), 0, ChunkStorage::ClosedMutableBuffer) @@ -1748,7 +1755,7 @@ mod tests { TestChunk::new(ChunkId::new_test(12), 0, ChunkStorage::ReadBuffer) .with_min_timestamp(from_secs(5)), ]) - .with_persistence(1_000, now, from_secs(20)), + .with_persistence(1_000, time_now, Time::from_timestamp(20, 0)), // Sufficient rows, non-conflicting compaction and compact-able chunk => persist + compact TestPartition::new(vec![ TestChunk::new(ChunkId::new_test(13), 0, ChunkStorage::ClosedMutableBuffer) @@ -1762,7 +1769,7 @@ mod tests { TestChunk::new(ChunkId::new_test(16), 0, ChunkStorage::ReadBuffer) .with_min_timestamp(from_secs(5)), ]) - .with_persistence(1_000, now, from_secs(20)), + .with_persistence(1_000, time_now, Time::from_timestamp(20, 0)), // Checks that we include chunks in a closed "order"-based interval. // Note that the chunks here are ordered in reverse to check if the lifecycle policy really uses the chunk // order during iteration. @@ -1783,7 +1790,7 @@ mod tests { .with_min_timestamp(from_secs(25)) .with_order(ChunkOrder::new(1).unwrap()), ]) - .with_persistence(1_000, now, from_secs(20)), + .with_persistence(1_000, time_now, Time::from_timestamp(20, 0)), ]; let db = TestDb::from_partitions(rules, partitions); @@ -1823,14 +1830,15 @@ mod tests { ..Default::default() }; let now = Utc::now(); + let time_now = Time::from_date_time(now); // This could occur if the in-memory contents of a partition are deleted, and // compaction causes the chunks to be removed. In such a scenario the persistence // windows will still think there are rows to be persisted let partitions = vec![TestPartition::new(vec![]).with_persistence( 10, - now - chrono::Duration::seconds(20), - from_secs(20), + time_now - Duration::from_secs(20), + Time::from_timestamp(20, 0), )]; let db = TestDb::from_partitions(rules, partitions); @@ -1851,6 +1859,7 @@ mod tests { ..Default::default() }; let now = Utc::now(); + let time_now = Time::from_date_time(now); let partitions = vec![ // Sufficient rows => could persist but should be suppressed @@ -1860,7 +1869,7 @@ mod tests { TestChunk::new(ChunkId::new_test(3), 0, ChunkStorage::ReadBuffer) .with_min_timestamp(from_secs(5)), ]) - .with_persistence(1_000, now, from_secs(20)), + .with_persistence(1_000, time_now, Time::from_timestamp(20, 0)), ]; let db = TestDb::from_partitions(rules, partitions); diff --git a/parquet_file/Cargo.toml b/parquet_file/Cargo.toml index 5ea3a72f9c..0dd51bd049 100644 --- a/parquet_file/Cargo.toml +++ b/parquet_file/Cargo.toml @@ -29,6 +29,7 @@ snafu = "0.6" schema = { path = "../schema" } tempfile = "3.1.0" thrift = "0.13" +time = { path = "../time" } tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } tokio-stream = "0.1" uuid = { version = "0.8", features = ["serde", "v4"] } diff --git a/parquet_file/src/catalog/dump.rs b/parquet_file/src/catalog/dump.rs index fbb560a700..618bbc682c 100644 --- a/parquet_file/src/catalog/dump.rs +++ b/parquet_file/src/catalog/dump.rs @@ -485,7 +485,7 @@ File { max: 28, }, }, - flush_timestamp: 1970-01-01T00:00:10.000000020Z, + flush_timestamp: 1970-01-01T00:00:10.000000020+00:00, }, database_checkpoint: DatabaseCheckpoint { sequencer_numbers: { diff --git a/parquet_file/src/metadata.rs b/parquet_file/src/metadata.rs index 628dc6e14b..404bbc5c87 100644 --- a/parquet_file/src/metadata.rs +++ b/parquet_file/src/metadata.rs @@ -114,6 +114,7 @@ use schema::{InfluxColumnType, InfluxFieldType, Schema}; use snafu::{ensure, OptionExt, ResultExt, Snafu}; use std::{collections::BTreeMap, convert::TryInto, sync::Arc}; use thrift::protocol::{TCompactInputProtocol, TCompactOutputProtocol, TOutputProtocol}; +use time::Time; /// Current version for serialized metadata. /// @@ -344,7 +345,7 @@ impl IoxMetadata { Arc::clone(&table_name), Arc::clone(&partition_key), sequencer_numbers, - flush_timestamp, + Time::from_date_time(flush_timestamp), ); // extract database checkpoint @@ -404,7 +405,12 @@ impl IoxMetadata { ) }) .collect(), - flush_timestamp: Some(self.partition_checkpoint.flush_timestamp().into()), + flush_timestamp: Some( + self.partition_checkpoint + .flush_timestamp() + .date_time() + .into(), + ), }; let proto_database_checkpoint = proto::DatabaseCheckpoint { diff --git a/parquet_file/src/test_utils.rs b/parquet_file/src/test_utils.rs index c97471db7a..b19dca4002 100644 --- a/parquet_file/src/test_utils.rs +++ b/parquet_file/src/test_utils.rs @@ -36,6 +36,7 @@ use schema::selection::Selection; use schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME}; use snafu::{ResultExt, Snafu}; use std::{collections::BTreeMap, num::NonZeroU32, sync::Arc}; +use time::Time; #[derive(Debug, Snafu)] pub enum Error { @@ -939,7 +940,7 @@ pub fn create_partition_and_database_checkpoint( Arc::clone(&table_name), Arc::clone(&partition_key), sequencer_numbers_1, - flush_timestamp, + Time::from_date_time(flush_timestamp), ); // create second partition diff --git a/persistence_windows/Cargo.toml b/persistence_windows/Cargo.toml index 8da9219ce0..98ce242dac 100644 --- a/persistence_windows/Cargo.toml +++ b/persistence_windows/Cargo.toml @@ -4,11 +4,11 @@ version = "0.1.0" edition = "2018" [dependencies] -chrono = "0.4" data_types = { path = "../data_types" } internal_types = { path = "../internal_types" } observability_deps = { path = "../observability_deps" } snafu = "0.6.2" +time = { path = "../time" } [dev-dependencies] test_helpers = { path = "../test_helpers" } diff --git a/persistence_windows/src/checkpoint.rs b/persistence_windows/src/checkpoint.rs index e91887e3cf..a4de28ec5c 100644 --- a/persistence_windows/src/checkpoint.rs +++ b/persistence_windows/src/checkpoint.rs @@ -76,7 +76,7 @@ //! //! # // mocking for the example below //! # use std::collections::BTreeMap; -//! # use chrono::Utc; +//! # use time::Time; //! # use persistence_windows::min_max_sequence::OptionalMinMaxSequence; //! # //! # struct Partition { @@ -105,7 +105,7 @@ //! # Arc::from("table"), //! # Arc::from("part"), //! # Default::default(), -//! # Utc::now(), +//! # Time::from_timestamp_nanos(3963), //! # ) //! # } //! # } @@ -176,7 +176,7 @@ //! //! # // mocking for the example below //! # use std::sync::Arc; -//! # use chrono::Utc; +//! # use time::Time; //! # use persistence_windows::checkpoint::{DatabaseCheckpoint, PartitionCheckpoint, PersistCheckpointBuilder}; //! # //! # struct File {} @@ -187,7 +187,7 @@ //! # Arc::from("table"), //! # Arc::from("part"), //! # Default::default(), -//! # Utc::now(), +//! # Time::from_timestamp_nanos(0), //! # ) //! # } //! # @@ -265,9 +265,9 @@ use std::{ sync::Arc, }; -use chrono::{DateTime, Utc}; use observability_deps::tracing::warn; use snafu::{OptionExt, Snafu}; +use time::Time; use crate::min_max_sequence::OptionalMinMaxSequence; @@ -373,7 +373,7 @@ pub struct PartitionCheckpoint { sequencer_numbers: BTreeMap<u32, OptionalMinMaxSequence>, /// Flush timestamp - flush_timestamp: DateTime<Utc>, + flush_timestamp: Time, } impl PartitionCheckpoint { @@ -382,7 +382,7 @@ impl PartitionCheckpoint { table_name: Arc<str>, partition_key: Arc<str>, sequencer_numbers: BTreeMap<u32, OptionalMinMaxSequence>, - flush_timestamp: DateTime<Utc>, + flush_timestamp: Time, ) -> Self { Self { table_name, @@ -424,8 +424,8 @@ impl PartitionCheckpoint { .map(|(sequencer_id, min_max)| (*sequencer_id, *min_max)) } - /// Maximum persisted timestamp. - pub fn flush_timestamp(&self) -> DateTime<Utc> { + /// Flush timestamp + pub fn flush_timestamp(&self) -> Time { self.flush_timestamp } } @@ -908,7 +908,7 @@ mod tests { ($table_name:expr, $partition_key:expr, {$($sequencer_number:expr => ($min:expr, $max:expr)),*}) => { { let sequencer_numbers = sequencer_numbers!{$($sequencer_number => ($min, $max)),*}; - let flush_timestamp = DateTime::from_utc(chrono::NaiveDateTime::from_timestamp(0, 0), Utc); + let flush_timestamp = Time::from_timestamp_nanos(0); PartitionCheckpoint::new(Arc::from($table_name), Arc::from($partition_key), sequencer_numbers, flush_timestamp) } }; diff --git a/persistence_windows/src/persistence_windows.rs b/persistence_windows/src/persistence_windows.rs index cc236e4de5..e12ecec074 100644 --- a/persistence_windows/src/persistence_windows.rs +++ b/persistence_windows/src/persistence_windows.rs @@ -4,10 +4,10 @@ use std::{ num::NonZeroUsize, ops::Deref, sync::Arc, - time::Duration as StdDuration, + time::Duration, }; -use chrono::{DateTime, Duration, Utc}; +use time::{Time, TimeProvider}; use data_types::{ partition_metadata::PartitionAddr, sequence::Sequence, write_summary::WriteSummary, @@ -17,7 +17,7 @@ use internal_types::freezable::{Freezable, FreezeHandle}; use crate::min_max_sequence::MinMaxSequence; use crate::{checkpoint::PartitionCheckpoint, min_max_sequence::OptionalMinMaxSequence}; -const DEFAULT_CLOSED_WINDOW_SECONDS: i64 = 30; +const DEFAULT_CLOSED_WINDOW: Duration = Duration::from_secs(30); /// PersistenceWindows keep track of ingested data within a partition to determine when it /// can be persisted. This allows IOx to receive out of order writes (in their timestamps) while @@ -50,13 +50,15 @@ pub struct PersistenceWindows { closed_window_period: Duration, /// The instant this PersistenceWindows was created - time_of_first_write: DateTime<Utc>, + time_of_first_write: Time, /// The maximum Wall timestamp that has been passed to PersistenceWindows::add_range - time_of_last_write: DateTime<Utc>, + time_of_last_write: Time, /// maps sequencer_id to the maximum sequence passed to PersistenceWindows::add_range max_sequence_numbers: BTreeMap<u32, u64>, + + time_provider: Arc<dyn TimeProvider>, } /// A handle for flushing data from the `PersistenceWindows` @@ -80,7 +82,7 @@ pub struct FlushHandle { addr: PartitionAddr, /// The row timestamp to flush - timestamp: DateTime<Utc>, + timestamp: Time, /// The sequence number ranges not including those persisted by this flush sequencer_numbers: BTreeMap<u32, OptionalMinMaxSequence>, @@ -88,7 +90,7 @@ pub struct FlushHandle { impl FlushHandle { /// Should flush all rows with a timestamp less than or equal to this - pub fn timestamp(&self) -> DateTime<Utc> { + pub fn timestamp(&self) -> Time { self.timestamp } @@ -105,13 +107,15 @@ impl FlushHandle { } impl PersistenceWindows { - pub fn new(addr: PartitionAddr, late_arrival_period: StdDuration, now: DateTime<Utc>) -> Self { - let late_arrival_period = Duration::from_std(late_arrival_period).unwrap(); - let closed_window_period = - late_arrival_period.min(Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS)); + pub fn new( + addr: PartitionAddr, + late_arrival_period: Duration, + time_provider: Arc<dyn TimeProvider>, + ) -> Self { + let closed_window_period = late_arrival_period.min(DEFAULT_CLOSED_WINDOW); + let closed_window_count = late_arrival_period.as_secs() / closed_window_period.as_secs(); - let closed_window_count = - late_arrival_period.num_seconds() / closed_window_period.num_seconds(); + let now = time_provider.now(); Self { persistable: Freezable::new(None), @@ -123,14 +127,13 @@ impl PersistenceWindows { time_of_first_write: now, time_of_last_write: now, max_sequence_numbers: Default::default(), + time_provider, } } /// Updates the late arrival period of this `PersistenceWindows` instance - pub fn set_late_arrival_period(&mut self, late_arrival_period: StdDuration) { - let late_arrival_period = Duration::from_std(late_arrival_period).unwrap(); - self.closed_window_period = - late_arrival_period.min(Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS)); + pub fn set_late_arrival_period(&mut self, late_arrival_period: Duration) { + self.closed_window_period = late_arrival_period.min(DEFAULT_CLOSED_WINDOW); self.late_arrival_period = late_arrival_period; } @@ -152,27 +155,23 @@ impl PersistenceWindows { /// Updates the windows with the information from a batch of rows from a single sequencer /// to the same partition. `min_time` and `max_time` are row timestamps in the written data - /// and `time_of_write` is the wall time that the write was performed. - /// - /// The `time_of_write` is used by the lifecycle manager to determine how long the data in a - /// persistence window has been sitting in memory. If it is over the configured threshold - /// the data should be persisted. /// /// The times passed in are used to determine where to split the in-memory data when persistence /// is triggered (either by crossing a row count threshold or time). /// + /// TODO: Use WriteSummary + /// /// # Panics /// - When `min_time > max_time`. pub fn add_range( &mut self, sequence: Option<&Sequence>, row_count: NonZeroUsize, - min_time: DateTime<Utc>, - max_time: DateTime<Utc>, - time_of_write: DateTime<Utc>, + min_time: Time, + max_time: Time, ) { - // DateTime<Utc> is not monotonic - let time_of_write = self.time_of_last_write.max(time_of_write); + // TimeProvider is not monotonic + let time_of_write = self.time_of_last_write.max(self.time_provider.now()); assert!( min_time <= max_time, "PersistenceWindows::add_range called with min_time ({}) > max_time ({})", @@ -199,7 +198,7 @@ impl PersistenceWindows { } } - self.rotate(time_of_write); + self.rotate(); match self.open.as_mut() { Some(w) => w.add_range(sequence, row_count, min_time, max_time, time_of_write), @@ -216,11 +215,11 @@ impl PersistenceWindows { } /// rotates open window to closed if past time and any closed windows to persistable. - /// - /// `now` is the Wall clock time of the server to use for determining how "old" a given - /// persistence window is, or in other words, how long since the writes it contains the - /// metrics for were written to this partition - fn rotate(&mut self, now: DateTime<Utc>) { + fn rotate(&mut self) { + self.rotate_impl(self.time_provider.now()) + } + + fn rotate_impl(&mut self, now: Time) { let rotate = self .open .as_ref() @@ -304,13 +303,17 @@ impl PersistenceWindows { /// Acquire a handle that flushes all unpersisted data pub fn flush_all_handle(&mut self) -> Option<FlushHandle> { - self.flush_handle(chrono::MAX_DATETIME) + self.flush_handle_impl(Time::MAX) } /// Acquire a handle that prevents mutation of the persistable window until dropped /// /// Returns `None` if there is an outstanding handle or nothing to persist - pub fn flush_handle(&mut self, now: DateTime<Utc>) -> Option<FlushHandle> { + pub fn flush_handle(&mut self) -> Option<FlushHandle> { + self.flush_handle_impl(self.time_provider.now()) + } + + fn flush_handle_impl(&mut self, now: Time) -> Option<FlushHandle> { // Verify no active flush handles before closing open window self.persistable.get_mut()?; @@ -320,7 +323,7 @@ impl PersistenceWindows { } // Rotate into persistable window - self.rotate(now); + self.rotate_impl(now); Some(FlushHandle { handle: self.persistable.try_freeze()?, @@ -353,10 +356,7 @@ impl PersistenceWindows { ); // Everything up to and including persistable max time will have been persisted - if let Some(new_min) = persistable - .max_time - .checked_add_signed(chrono::Duration::nanoseconds(1)) - { + if let Some(new_min) = persistable.max_time.checked_add(Duration::from_nanos(1)) { for w in self.closed.iter_mut().take(closed_count) { if w.min_time < new_min { w.min_time = new_min; @@ -413,22 +413,23 @@ impl PersistenceWindows { } /// Returns the minimum unpersisted age - pub fn minimum_unpersisted_age(&self) -> Option<DateTime<Utc>> { + pub fn minimum_unpersisted_age(&self) -> Option<Time> { self.minimum_window().map(|x| x.time_of_first_write) } /// Returns the minimum unpersisted timestamp - pub fn minimum_unpersisted_timestamp(&self) -> Option<DateTime<Utc>> { + pub fn minimum_unpersisted_timestamp(&self) -> Option<Time> { self.windows().map(|x| x.min_time).min() } /// Returns the maximum unpersisted timestamp - pub fn maximum_unpersisted_timestamp(&self) -> Option<DateTime<Utc>> { + pub fn maximum_unpersisted_timestamp(&self) -> Option<Time> { self.windows().map(|x| x.max_time).max() } /// Returns the number of persistable rows - pub fn persistable_row_count(&self, now: DateTime<Utc>) -> usize { + pub fn persistable_row_count(&self) -> usize { + let now = self.time_provider.now(); self.windows() .take_while(|window| window.is_persistable(now, self.late_arrival_period)) .map(|window| window.row_count.get()) @@ -440,26 +441,26 @@ impl PersistenceWindows { struct Window { /// The server time when this window was created. Used to determine how long data in this /// window has been sitting in memory. - time_of_first_write: DateTime<Utc>, + time_of_first_write: Time, /// The server time of the last write to this window - time_of_last_write: DateTime<Utc>, + time_of_last_write: Time, /// The number of rows in the window row_count: NonZeroUsize, /// min time value for data in the window - min_time: DateTime<Utc>, + min_time: Time, /// max time value for data in the window - max_time: DateTime<Utc>, + max_time: Time, /// maps sequencer_id to the minimum and maximum sequence numbers seen sequencer_numbers: BTreeMap<u32, MinMaxSequence>, } impl Window { fn new( - time_of_write: DateTime<Utc>, + time_of_write: Time, sequence: Option<&Sequence>, row_count: NonZeroUsize, - min_time: DateTime<Utc>, - max_time: DateTime<Utc>, + min_time: Time, + max_time: Time, ) -> Self { let mut sequencer_numbers = BTreeMap::new(); if let Some(sequence) = sequence { @@ -485,9 +486,9 @@ impl Window { &mut self, sequence: Option<&Sequence>, row_count: NonZeroUsize, - min_time: DateTime<Utc>, - max_time: DateTime<Utc>, - time_of_write: DateTime<Utc>, + min_time: Time, + max_time: Time, + time_of_write: Time, ) { assert!(self.time_of_first_write <= time_of_write); assert!(self.time_of_last_write <= time_of_write); @@ -545,84 +546,90 @@ impl Window { } /// If this window can be closed - fn is_closeable(&self, now: DateTime<Utc>, closed_window_period: Duration) -> bool { - (now - self.time_of_first_write) >= closed_window_period + fn is_closeable(&self, now: Time, closed_window_period: Duration) -> bool { + now.checked_duration_since(self.time_of_first_write) + .map(|x| x >= closed_window_period) + .unwrap_or(false) } /// If this window is persistable - fn is_persistable(&self, now: DateTime<Utc>, late_arrival_period: Duration) -> bool { - (now - self.time_of_first_write) >= late_arrival_period + fn is_persistable(&self, now: Time, late_arrival_period: Duration) -> bool { + now.checked_duration_since(self.time_of_first_write) + .map(|x| x >= late_arrival_period) + .unwrap_or(false) } } #[cfg(test)] mod tests { - use chrono::{TimeZone, MAX_DATETIME, MIN_DATETIME}; + use time::MockProvider; use super::*; - fn make_windows(late_arrival_period: StdDuration) -> PersistenceWindows { - PersistenceWindows::new( + fn make_windows( + late_arrival_period: Duration, + start: Time, + ) -> (PersistenceWindows, Arc<MockProvider>) { + let provider = Arc::new(MockProvider::new(start)); + let windows = PersistenceWindows::new( PartitionAddr { db_name: Arc::from("db"), table_name: Arc::from("table_name"), partition_key: Arc::from("partition_key"), }, late_arrival_period, - Utc::now(), - ) + Arc::<MockProvider>::clone(&provider), + ); + (windows, provider) } #[test] fn time_go_backwards() { - let mut w = make_windows(StdDuration::from_secs(60)); - let now = Utc::now(); + let (mut w, time) = make_windows(Duration::from_secs(60), Time::from_timestamp_nanos(0)); + time.set(Time::from_timestamp_nanos(1)); w.add_range( Some(&Sequence { id: 1, number: 1 }), NonZeroUsize::new(1).unwrap(), - Utc::now(), - Utc::now(), - now + Duration::nanoseconds(1), + Time::from_timestamp_nanos(100), + Time::from_timestamp_nanos(200), ); + time.set(Time::from_timestamp_nanos(0)); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(1).unwrap(), - Utc::now(), - Utc::now(), - now, + Time::from_timestamp_nanos(100), + Time::from_timestamp_nanos(200), ); } #[test] #[should_panic(expected = "PersistenceWindows::add_range called with min_time")] fn panics_when_min_time_gt_max_time() { - let mut w = make_windows(StdDuration::from_secs(60)); + let (mut w, _) = make_windows(Duration::from_secs(60), Time::from_timestamp_nanos(0)); - let t = Utc::now(); w.add_range( Some(&Sequence { id: 1, number: 1 }), NonZeroUsize::new(1).unwrap(), - t + chrono::Duration::nanoseconds(1), - t, - Utc::now(), + Time::from_timestamp(1, 0), + Time::from_timestamp(0, 1), ); } #[test] fn starts_open_window() { - let mut w = make_windows(StdDuration::from_secs(60)); + let row_t0 = Time::from_timestamp_nanos(23526); + let row_t1 = row_t0 + Duration::from_secs(1); + let row_t2 = row_t1 + Duration::from_millis(3); + let row_t3 = row_t2 + Duration::from_millis(3); - let row_t0 = Utc::now(); - let row_t1 = row_t0 + chrono::Duration::seconds(1); - let row_t2 = row_t1 + chrono::Duration::milliseconds(3); - let row_t3 = row_t2 + chrono::Duration::milliseconds(3); + let write_t0 = Time::from_timestamp_nanos(39832985493); + let write_t1 = write_t0 + Duration::from_secs(2); + let write_t2 = write_t1 + Duration::from_secs(2); + let write_t3 = write_t2 + Duration::from_secs(2); - let write_t0 = w.time_of_first_write; - let write_t1 = write_t0 + chrono::Duration::seconds(2); - let write_t2 = write_t1 + chrono::Duration::seconds(2); - let write_t3 = write_t2 + chrono::Duration::seconds(2); + let (mut w, time) = make_windows(Duration::from_secs(60), write_t0); // Write timestamps are purposefully out of order w.add_range( @@ -630,28 +637,27 @@ mod tests { NonZeroUsize::new(1).unwrap(), row_t0, row_t0, - write_t0, ); + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 4 }), NonZeroUsize::new(2).unwrap(), row_t1, row_t1, - write_t2, ); + time.set(write_t3); w.add_range( Some(&Sequence { id: 1, number: 10 }), NonZeroUsize::new(1).unwrap(), row_t2, row_t3, - write_t3, ); + time.set(write_t1); w.add_range( Some(&Sequence { id: 2, number: 23 }), NonZeroUsize::new(10).unwrap(), row_t2, row_t3, - write_t1, ); assert!(w.closed.is_empty()); @@ -674,35 +680,35 @@ mod tests { #[test] fn closes_open_window() { - let mut w = make_windows(StdDuration::from_secs(60)); - let created_at = w.time_of_first_write; + let created_at = Time::from_timestamp_nanos(405693840963); + let after_close_threshold = created_at + DEFAULT_CLOSED_WINDOW; - let row_t0 = Utc.timestamp_nanos(39049493); - let row_t1 = row_t0 + Duration::seconds(3); - let row_t2 = row_t1 + Duration::milliseconds(65); + let row_t0 = Time::from_timestamp_nanos(39049493); + let row_t1 = row_t0 + Duration::from_secs(3); + let row_t2 = row_t1 + Duration::from_millis(65); + + let (mut w, time) = make_windows(Duration::from_secs(60), created_at); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(1).unwrap(), row_t0, row_t1, - created_at, ); w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(1).unwrap(), row_t0, row_t1, - created_at, ); - let after_close_threshold = created_at + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); + time.set(after_close_threshold); + w.add_range( Some(&Sequence { id: 1, number: 6 }), NonZeroUsize::new(2).unwrap(), row_t1, row_t2, - after_close_threshold, ); assert!(w.persistable.is_none()); @@ -729,37 +735,42 @@ mod tests { #[test] fn moves_to_persistable() { - let mut w = make_windows(StdDuration::from_secs(120)); - let write_t0 = w.time_of_first_write; - let start_time = Utc::now(); + let write_t0 = Time::from_timestamp_nanos(23459823490); + let write_t1 = write_t0 + DEFAULT_CLOSED_WINDOW; + let write_t2 = write_t1 + DEFAULT_CLOSED_WINDOW; + let write_t3 = write_t2 + DEFAULT_CLOSED_WINDOW * 3; + let write_t4 = write_t3 + DEFAULT_CLOSED_WINDOW * 100; + + let row_t0 = Time::from_timestamp_nanos(346363); + let row_t1 = row_t0 + Duration::from_secs(4); + let row_t2 = row_t1 + Duration::from_millis(393); + let row_t3 = row_t2 + Duration::from_millis(493); + let row_t4 = row_t3 + Duration::from_millis(5956); + let row_t5 = row_t4 + Duration::from_millis(6997); + + let (mut w, time) = make_windows(Duration::from_secs(120), write_t0); - let first_end = Utc::now(); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), - start_time, - first_end, - write_t0, + row_t0, + row_t1, ); - let write_t1 = write_t0 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let second_end = Utc::now(); + time.set(write_t1); w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(3).unwrap(), - first_end, - second_end, - write_t1, + row_t1, + row_t2, ); - let write_2 = write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let third_end = Utc::now(); + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 4 }), NonZeroUsize::new(4).unwrap(), - second_end, - third_end, - write_2, + row_t2, + row_t3, ); assert!(w.persistable.is_none()); @@ -767,104 +778,104 @@ mod tests { let c = w.closed.get(0).cloned().unwrap(); assert_eq!(c.time_of_first_write, write_t0); assert_eq!(c.row_count.get(), 2); - assert_eq!(c.min_time, start_time); - assert_eq!(c.max_time, first_end); + assert_eq!(c.min_time, row_t0); + assert_eq!(c.max_time, row_t1); let c = w.closed.get(1).cloned().unwrap(); assert_eq!(c.time_of_first_write, write_t1); assert_eq!(c.row_count.get(), 3); - assert_eq!(c.min_time, first_end); - assert_eq!(c.max_time, second_end); + assert_eq!(c.min_time, row_t1); + assert_eq!(c.max_time, row_t2); let c = w.open.clone().unwrap(); - assert_eq!(c.time_of_first_write, write_2); + assert_eq!(c.time_of_first_write, write_t2); assert_eq!(c.row_count.get(), 4); - assert_eq!(c.min_time, second_end); - assert_eq!(c.max_time, third_end); + assert_eq!(c.min_time, row_t2); + assert_eq!(c.max_time, row_t3); - let write_t3 = write_2 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 3); - let fourth_end = Utc::now(); + time.set(write_t3); w.add_range( Some(&Sequence { id: 1, number: 5 }), NonZeroUsize::new(1).unwrap(), - fourth_end, - fourth_end, - write_t3, + row_t4, + row_t4, ); // confirm persistable has first and second let c = w.persistable.as_ref().unwrap(); assert_eq!(c.time_of_first_write, write_t0); assert_eq!(c.row_count.get(), 5); - assert_eq!(c.min_time, start_time); - assert_eq!(c.max_time, second_end); + assert_eq!(c.min_time, row_t0); + assert_eq!(c.max_time, row_t2); // and the third window moved to closed let c = w.closed.get(0).cloned().unwrap(); - assert_eq!(c.time_of_first_write, write_2); + assert_eq!(c.time_of_first_write, write_t2); assert_eq!(c.row_count.get(), 4); - assert_eq!(c.min_time, second_end); - assert_eq!(c.max_time, third_end); + assert_eq!(c.min_time, row_t2); + assert_eq!(c.max_time, row_t3); - let write_t4 = write_t3 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 100); + time.set(write_t4); w.add_range( Some(&Sequence { id: 1, number: 9 }), NonZeroUsize::new(2).unwrap(), - Utc::now(), - Utc::now(), - write_t4, + row_t5, + row_t5, ); let c = w.persistable.as_ref().unwrap(); assert_eq!(c.time_of_first_write, write_t0); assert_eq!(c.row_count.get(), 10); - assert_eq!(c.min_time, start_time); - assert_eq!(c.max_time, fourth_end); + assert_eq!(c.min_time, row_t0); + assert_eq!(c.max_time, row_t4); } #[test] fn flush_persistable_keeps_open_and_closed() { - let mut w = make_windows(StdDuration::from_secs(120)); - // these instants represent when the server received the data. Here we have a window that // should be in the persistable group, a closed window, and an open window that is closed on flush. - let write_t0 = w.time_of_first_write; - let write_t1 = write_t0 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 2); - let write_t2 = write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let write_t3 = write_t2 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); + let write_t0 = Time::from_timestamp_nanos(546859); + let write_t1 = write_t0 + DEFAULT_CLOSED_WINDOW * 2; + let write_t2 = write_t1 + DEFAULT_CLOSED_WINDOW; + let write_t3 = write_t2 + DEFAULT_CLOSED_WINDOW; // these times represent the value of the time column for the rows of data. Here we have // non-overlapping windows. - let start_time = Utc::now(); - let first_end = start_time + Duration::seconds(1); - let second_start = first_end + Duration::seconds(1); - let second_end = second_start + Duration::seconds(1); - let third_start = second_end + Duration::seconds(1); - let third_end = third_start + Duration::seconds(1); + let start_time = Time::from_timestamp_nanos(34693946939); + let first_end = start_time + Duration::from_secs(1); + let second_start = first_end + Duration::from_secs(1); + let second_end = second_start + Duration::from_secs(1); + let third_start = second_end + Duration::from_secs(1); + let third_end = third_start + Duration::from_secs(1); + let (mut w, time) = make_windows(Duration::from_secs(120), write_t0); + + time.set(write_t0); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), start_time, first_end, - write_t0, ); + + time.set(write_t1); w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(3).unwrap(), second_start, second_end, - write_t1, ); + + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 5 }), NonZeroUsize::new(2).unwrap(), third_start, third_end, - write_t2, ); - w.rotate(write_t3); + time.set(write_t3); + w.rotate(); let c = w.persistable.as_ref().unwrap(); assert_eq!(c.time_of_first_write, write_t0); @@ -875,7 +886,7 @@ mod tests { let mins = w.persistable.as_ref().unwrap().sequencer_numbers.clone(); assert_eq!(mins, w.minimum_unpersisted_sequence().unwrap()); - let handle = w.flush_handle(write_t3).unwrap(); + let handle = w.flush_handle().unwrap(); w.flush(handle); assert!(w.persistable.is_none()); @@ -897,47 +908,48 @@ mod tests { #[test] fn flush_persistable_overlaps_closed() { - let mut w = make_windows(StdDuration::from_secs(120)); - // these instants represent when data is received by the server. Here we have a persistable // window followed by two closed windows. - let write_t0 = w.time_of_first_write; - let write_t1 = write_t0 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 2); - let write_t2 = write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let write_t3 = write_t2 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); + let write_t0 = Time::from_timestamp_nanos(3949); + let write_t1 = write_t0 + DEFAULT_CLOSED_WINDOW * 2; + let write_t2 = write_t1 + DEFAULT_CLOSED_WINDOW; + let write_t3 = write_t2 + DEFAULT_CLOSED_WINDOW; // the times of the rows of data. this will create overlapping windows where persistable // overlaps with the oldest closed window. - let start_time = Utc::now(); - let second_start = start_time + Duration::seconds(1); - let first_end = second_start + Duration::seconds(1); - let second_end = first_end + Duration::seconds(1); - let third_start = first_end + Duration::seconds(1); - let third_end = third_start + Duration::seconds(1); + let start_time = Time::from_timestamp_nanos(97945794); + let second_start = start_time + Duration::from_secs(1); + let first_end = second_start + Duration::from_secs(1); + let second_end = first_end + Duration::from_secs(1); + let third_start = first_end + Duration::from_secs(1); + let third_end = third_start + Duration::from_secs(1); + + let (mut w, time) = make_windows(Duration::from_secs(120), write_t0); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), start_time, first_end, - write_t0, ); + + time.set(write_t1); w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(3).unwrap(), second_start, second_end, - write_t1, ); + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 5 }), NonZeroUsize::new(2).unwrap(), third_start, third_end, - write_t2, ); - w.rotate(write_t3); + time.set(write_t3); + w.rotate(); let c = w.persistable.as_ref().unwrap(); assert_eq!(c.time_of_first_write, write_t0); @@ -947,10 +959,10 @@ mod tests { let mins = w.persistable.as_ref().unwrap().sequencer_numbers.clone(); assert_eq!(mins, w.minimum_unpersisted_sequence().unwrap()); - let flush = w.flush_handle(write_t3).unwrap(); + let flush = w.flush_handle().unwrap(); assert_eq!(flush.timestamp(), first_end); - let truncated_time = flush.timestamp() + Duration::nanoseconds(1); + let truncated_time = flush.timestamp() + Duration::from_nanos(1); w.flush(flush); assert!(w.persistable.is_none()); @@ -973,46 +985,48 @@ mod tests { #[test] fn flush_persistable_overlaps_open() { - let mut w = make_windows(StdDuration::from_secs(120)); - // these instants represent when data is received by the server. Here we have a persistable // window followed by two closed windows. - let write_t0 = w.time_of_first_write; - let write_t1 = write_t0 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 3); - let write_t2 = write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let write_t3 = write_t2 + Duration::seconds(1); + let write_t0 = Time::from_timestamp_nanos(346934603); + let write_t1 = write_t0 + DEFAULT_CLOSED_WINDOW * 3; + let write_t2 = write_t1 + DEFAULT_CLOSED_WINDOW; + let write_t3 = write_t2 + Duration::from_secs(1); // the times of the rows of data. this will create overlapping windows where persistable // overlaps with the newest open window (but not the closed one). - let start_time = Utc::now(); - let third_start = start_time + Duration::seconds(1); - let first_end = third_start + Duration::seconds(1); - let second_end = first_end + Duration::seconds(1); - let third_end = second_end + Duration::seconds(1); + let start_time = Time::from_timestamp_nanos(346934603); + let third_start = start_time + Duration::from_secs(1); + let first_end = third_start + Duration::from_secs(1); + let second_end = first_end + Duration::from_secs(1); + let third_end = second_end + Duration::from_secs(1); + + let (mut w, time) = make_windows(Duration::from_secs(120), write_t0); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), start_time, first_end, - write_t0, ); + time.set(write_t1); + w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(3).unwrap(), first_end, second_end, - write_t1, ); + time.set(write_t2); + w.add_range( Some(&Sequence { id: 1, number: 5 }), NonZeroUsize::new(2).unwrap(), third_start, third_end, - write_t2, ); + time.set(write_t3); - w.rotate(write_t3); + w.rotate(); let c = w.persistable.as_ref().unwrap(); assert_eq!(c.time_of_first_write, write_t0); @@ -1023,10 +1037,10 @@ mod tests { let mins = w.persistable.as_ref().unwrap().sequencer_numbers.clone(); assert_eq!(mins, w.minimum_unpersisted_sequence().unwrap()); - let flush = w.flush_handle(write_t3).unwrap(); + let flush = w.flush_handle().unwrap(); assert_eq!(flush.timestamp(), first_end); assert!(w.open.is_none()); - let flushed_time = flush.timestamp() + chrono::Duration::nanoseconds(1); + let flushed_time = flush.timestamp() + Duration::from_nanos(1); w.flush(flush); assert!(w.persistable.is_none()); @@ -1054,44 +1068,45 @@ mod tests { #[test] fn flush_persistable_overlaps_open_and_closed() { - let mut w = make_windows(StdDuration::from_secs(120)); - // these instants represent when data is received by the server. Here we have a persistable // window followed by two closed windows. - let write_t0 = w.time_of_first_write; - let write_t1 = write_t0 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 3); - let write_t2 = write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let write_t3 = write_t2 + Duration::seconds(1); + let write_t0 = Time::from_timestamp_nanos(347094709); + let write_t1 = write_t0 + DEFAULT_CLOSED_WINDOW * 3; + let write_t2 = write_t1 + DEFAULT_CLOSED_WINDOW; + let write_t3 = write_t2 + Duration::from_secs(1); // the times of the rows of data. this will create overlapping windows where persistable // overlaps with the closed window and the open one. - let start_time = Utc::now(); - let second_start = start_time + Duration::seconds(1); - let third_start = second_start + Duration::seconds(1); - let first_end = third_start + Duration::seconds(1); - let second_end = first_end + Duration::seconds(1); - let third_end = second_end + Duration::seconds(1); + let start_time = Time::from_timestamp_nanos(435760947094); + let second_start = start_time + Duration::from_secs(1); + let third_start = second_start + Duration::from_secs(1); + let first_end = third_start + Duration::from_secs(1); + let second_end = first_end + Duration::from_secs(1); + let third_end = second_end + Duration::from_secs(1); + + let (mut w, time) = make_windows(Duration::from_secs(120), write_t0); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), start_time, first_end, - write_t0, ); + + time.set(write_t1); w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(3).unwrap(), second_start, second_end, - write_t1, ); + + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 5 }), NonZeroUsize::new(2).unwrap(), third_start, third_end, - write_t2, ); let c = w.persistable.as_ref().unwrap(); @@ -1106,10 +1121,11 @@ mod tests { // this should rotate the first window into persistable // after flush we should see no more persistable window and the closed windows // should have min timestamps equal to the previous flush end. - let flush = w.flush_handle(write_t3).unwrap(); + time.set(write_t3); + let flush = w.flush_handle().unwrap(); assert_eq!(flush.timestamp(), first_end); assert!(w.open.is_none()); - let flushed_time = flush.timestamp() + Duration::nanoseconds(1); + let flushed_time = flush.timestamp() + Duration::from_nanos(1); w.flush(flush); assert!(w.persistable.is_none()); let mins = w.closed[0].sequencer_numbers.clone(); @@ -1135,27 +1151,28 @@ mod tests { #[test] fn test_flush_guard() { - let mut w = make_windows(StdDuration::from_secs(120)); - let late_arrival_period = w.late_arrival_period; + let late_arrival_period = Duration::from_secs(120); // Space writes so each goes to a separate window - let write_t0 = w.time_of_first_write; + let write_t0 = Time::from_timestamp_nanos(565); let write_t1 = write_t0 + late_arrival_period; let write_t2 = write_t1 + late_arrival_period * 2; - let row_t0 = Utc::now(); - let row_t1 = row_t0 + Duration::seconds(2); - let row_t2 = row_t1 + Duration::seconds(2); + let row_t0 = Time::from_timestamp_nanos(340596340); + let row_t1 = row_t0 + Duration::from_secs(2); + let row_t2 = row_t1 + Duration::from_secs(2); + + let (mut w, time) = make_windows(late_arrival_period, write_t0); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), row_t0, row_t1, - write_t0, ); - w.rotate(write_t0 + late_arrival_period); + time.set(write_t1); + w.rotate(); assert!(w.persistable.is_some()); assert_eq!(w.persistable.as_ref().unwrap().row_count.get(), 2); assert_eq!(w.persistable.as_ref().unwrap().max_time, row_t1); @@ -1165,22 +1182,22 @@ mod tests { NonZeroUsize::new(5).unwrap(), row_t0, row_t2, - write_t1, ); // Should rotate into closed - w.rotate(write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS)); + time.set(write_t1 + DEFAULT_CLOSED_WINDOW); + w.rotate(); assert_eq!(w.closed.len(), 1); - let guard = w - .flush_handle(write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS)) - .unwrap(); + let guard = w.flush_handle().unwrap(); // Should only allow one at once - assert!(w.flush_handle(write_t0).is_none()); + assert!(w.flush_handle().is_none()); + + time.set(write_t1 + late_arrival_period); // This should not rotate into persistable as active flush guard - w.rotate(write_t1 + late_arrival_period); + w.rotate(); assert_eq!(w.persistable.as_ref().unwrap().row_count.get(), 2); let flush_t = guard.timestamp(); @@ -1188,7 +1205,7 @@ mod tests { // Min time should have been truncated by persist operation to be // 1 nanosecond more than was persisted - let truncated_time = flush_t + Duration::nanoseconds(1); + let truncated_time = flush_t + Duration::from_nanos(1); // The flush checkpoint should not include the writes being persisted let flush_checkpoint = guard.checkpoint(); @@ -1218,12 +1235,11 @@ mod tests { ); // This should rotate into persistable - w.rotate(write_t1 + late_arrival_period); + w.rotate(); assert_eq!(w.persistable.as_ref().unwrap().row_count.get(), 5); - assert_eq!(w.persistable.as_ref().unwrap().min_time, truncated_time); - let guard = w.flush_handle(write_t1 + late_arrival_period).unwrap(); + let guard = w.flush_handle().unwrap(); // that checkpoint has an optional minimum let flush_checkpoint = guard.checkpoint(); @@ -1232,75 +1248,77 @@ mod tests { OptionalMinMaxSequence::new(None, 4) ); + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 9 }), NonZeroUsize::new(9).unwrap(), row_t0, - row_t0 + chrono::Duration::seconds(2), - write_t2, + row_t0 + Duration::from_secs(2), ); // Should rotate into closed - w.rotate(write_t2 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS)); + time.set(write_t2 + DEFAULT_CLOSED_WINDOW); + w.rotate(); assert_eq!(w.closed.len(), 1); // This should not rotate into persistable as active flush guard - w.rotate(write_t2 + late_arrival_period); + time.set(write_t2 + late_arrival_period); + w.rotate(); assert_eq!(w.persistable.as_ref().unwrap().row_count.get(), 5); std::mem::drop(guard); // This should rotate into persistable - w.rotate(write_t2 + late_arrival_period); + w.rotate(); assert_eq!(w.persistable.as_ref().unwrap().row_count.get(), 5 + 9); assert_eq!(w.persistable.as_ref().unwrap().min_time, row_t0); } #[test] fn test_flush_guard_multiple_closed() { - let late_arrival_period = Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 3); - let mut w = make_windows(late_arrival_period.to_std().unwrap()); + let late_arrival_period = DEFAULT_CLOSED_WINDOW * 3; // Space writes so each goes to a separate window - let write_t0 = w.time_of_first_write; - let write_t1 = write_t0 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let write_t2 = write_t1 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let write_t3 = write_t2 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); - let write_t4 = write_t3 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS); + let write_t0 = Time::from_timestamp_nanos(340634); + let write_t1 = write_t0 + DEFAULT_CLOSED_WINDOW; + let write_t2 = write_t1 + DEFAULT_CLOSED_WINDOW; + let write_t3 = write_t2 + DEFAULT_CLOSED_WINDOW; + let write_t4 = write_t3 + DEFAULT_CLOSED_WINDOW; - let row_t0 = Utc::now(); - let row_t1 = row_t0 + Duration::seconds(2); - let row_t2 = row_t1 + Duration::seconds(2); + let row_t0 = Time::from_timestamp_nanos(70780); + let row_t1 = row_t0 + Duration::from_secs(2); + let row_t2 = row_t1 + Duration::from_secs(2); + + let (mut w, time) = make_windows(late_arrival_period, write_t0); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), row_t0, row_t1, - write_t0, ); + time.set(write_t1); w.add_range( Some(&Sequence { id: 1, number: 6 }), NonZeroUsize::new(5).unwrap(), row_t0, row_t2, - write_t1, ); + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 9 }), NonZeroUsize::new(9).unwrap(), row_t0, row_t1, - write_t2, ); + time.set(write_t3); w.add_range( Some(&Sequence { id: 1, number: 10 }), NonZeroUsize::new(17).unwrap(), row_t0, row_t1, - write_t3, ); assert_eq!(w.closed.len(), 2); @@ -1308,26 +1326,26 @@ mod tests { assert_eq!(w.closed[1].row_count.get(), 9); assert_eq!(w.open.as_ref().unwrap().row_count.get(), 17); - let flush = w.flush_handle(write_t0 + late_arrival_period).unwrap(); - + let flush = w.flush_handle().unwrap(); let flush_t = flush.timestamp(); assert!(w.open.is_none()); assert_eq!(flush.closed_count, 3); assert_eq!(flush_t, row_t1); - let truncated_time = flush_t + Duration::nanoseconds(1); + let truncated_time = flush_t + Duration::from_nanos(1); assert_eq!(w.persistable.as_ref().unwrap().row_count.get(), 2); + time.set(write_t4); w.add_range( Some(&Sequence { id: 1, number: 14 }), NonZeroUsize::new(11).unwrap(), row_t0, row_t1, - write_t4, ); - w.rotate(write_t4 + Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS)); + time.set(write_t4 + DEFAULT_CLOSED_WINDOW); + w.rotate(); // Despite time passing persistable window shouldn't have changed due to flush guard assert_eq!(w.persistable.as_ref().unwrap().row_count.get(), 2); @@ -1378,51 +1396,58 @@ mod tests { #[test] fn test_summaries() { - let late_arrival_period = Duration::seconds(100); - let mut w = make_windows(late_arrival_period.to_std().unwrap()); - let start = w.time_of_first_write; + let late_arrival_period = Duration::from_secs(100); + let write_t0 = Time::from_timestamp_nanos(3963); + let write_t1 = write_t0 + Duration::from_millis(1); + let write_t2 = write_t1 + Duration::from_millis(29); + let write_t3 = write_t2 + Duration::from_millis(20); + let write_t4 = write_t1 + DEFAULT_CLOSED_WINDOW; + let write_t5 = write_t0 + DEFAULT_CLOSED_WINDOW * 3; + let write_t6 = write_t4 + late_arrival_period; + + let (mut w, time) = make_windows(late_arrival_period, write_t0); // Window 1 + time.set(write_t1); w.add_range( Some(&Sequence { id: 1, number: 1 }), NonZeroUsize::new(11).unwrap(), - Utc.timestamp_nanos(10), - Utc.timestamp_nanos(11), - start + Duration::milliseconds(1), + Time::from_timestamp_nanos(10), + Time::from_timestamp_nanos(11), ); + time.set(write_t2); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(4).unwrap(), - Utc.timestamp_nanos(10), - Utc.timestamp_nanos(340), - start + Duration::milliseconds(30), + Time::from_timestamp_nanos(10), + Time::from_timestamp_nanos(340), ); + time.set(write_t3); w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(6).unwrap(), - Utc.timestamp_nanos(1), - Utc.timestamp_nanos(5), - start + Duration::milliseconds(50), + Time::from_timestamp_nanos(1), + Time::from_timestamp_nanos(5), ); // More than DEFAULT_CLOSED_WINDOW_PERIOD after start of Window 1 => Window 2 + time.set(write_t4); w.add_range( Some(&Sequence { id: 1, number: 4 }), NonZeroUsize::new(3).unwrap(), - Utc.timestamp_nanos(89), - Utc.timestamp_nanos(90), - start + w.closed_window_period + Duration::milliseconds(1), + Time::from_timestamp_nanos(89), + Time::from_timestamp_nanos(90), ); // More than DEFAULT_CLOSED_WINDOW_PERIOD after start of Window 2 => Window 3 + time.set(write_t5); w.add_range( Some(&Sequence { id: 1, number: 5 }), NonZeroUsize::new(8).unwrap(), - Utc.timestamp_nanos(3), - Utc.timestamp_nanos(4), - start + w.closed_window_period * 3, + Time::from_timestamp_nanos(3), + Time::from_timestamp_nanos(4), ); let summaries: Vec<_> = w.summaries().collect(); @@ -1432,31 +1457,32 @@ mod tests { summaries, vec![ WriteSummary { - time_of_first_write: start + Duration::milliseconds(1), - time_of_last_write: start + Duration::milliseconds(50), - min_timestamp: Utc.timestamp_nanos(1), - max_timestamp: Utc.timestamp_nanos(340), + time_of_first_write: write_t1, + time_of_last_write: write_t3, + min_timestamp: Time::from_timestamp_nanos(1), + max_timestamp: Time::from_timestamp_nanos(340), row_count: 21 }, WriteSummary { - time_of_first_write: start + w.closed_window_period + Duration::milliseconds(1), - time_of_last_write: start + w.closed_window_period + Duration::milliseconds(1), - min_timestamp: Utc.timestamp_nanos(89), - max_timestamp: Utc.timestamp_nanos(90), + time_of_first_write: write_t4, + time_of_last_write: write_t4, + min_timestamp: Time::from_timestamp_nanos(89), + max_timestamp: Time::from_timestamp_nanos(90), row_count: 3 }, WriteSummary { - time_of_first_write: start + w.closed_window_period * 3, - time_of_last_write: start + w.closed_window_period * 3, - min_timestamp: Utc.timestamp_nanos(3), - max_timestamp: Utc.timestamp_nanos(4), + time_of_first_write: write_t5, + time_of_last_write: write_t5, + min_timestamp: Time::from_timestamp_nanos(3), + max_timestamp: Time::from_timestamp_nanos(4), row_count: 8 }, ] ); // Rotate first and second windows into persistable - w.rotate(start + late_arrival_period + w.closed_window_period * 2); + time.set(write_t6); + w.rotate(); let summaries: Vec<_> = w.summaries().collect(); @@ -1465,17 +1491,17 @@ mod tests { summaries, vec![ WriteSummary { - time_of_first_write: start + Duration::milliseconds(1), - time_of_last_write: start + w.closed_window_period + Duration::milliseconds(1), - min_timestamp: Utc.timestamp_nanos(1), - max_timestamp: Utc.timestamp_nanos(340), + time_of_first_write: write_t1, + time_of_last_write: write_t4, + min_timestamp: Time::from_timestamp_nanos(1), + max_timestamp: Time::from_timestamp_nanos(340), row_count: 24 }, WriteSummary { - time_of_first_write: start + w.closed_window_period * 3, - time_of_last_write: start + w.closed_window_period * 3, - min_timestamp: Utc.timestamp_nanos(3), - max_timestamp: Utc.timestamp_nanos(4), + time_of_first_write: write_t5, + time_of_last_write: write_t5, + min_timestamp: Time::from_timestamp_nanos(3), + max_timestamp: Time::from_timestamp_nanos(4), row_count: 8 }, ] @@ -1484,33 +1510,34 @@ mod tests { #[test] fn test_regression_2206() { - let late_arrival_period = Duration::seconds(DEFAULT_CLOSED_WINDOW_SECONDS * 10); - let mut w = make_windows(late_arrival_period.to_std().unwrap()); - let now = w.time_of_first_write; + let late_arrival_period = DEFAULT_CLOSED_WINDOW * 10; + + let t0 = Time::from_timestamp_nanos(47069490749); + let t1 = t0 + late_arrival_period; + + let (mut w, time) = make_windows(late_arrival_period, t0); // window 1: to be persisted - let min_time = Utc.timestamp_nanos(10); - let max_time = Utc.timestamp_nanos(11); + let min_time = Time::from_timestamp_nanos(10); + let max_time = Time::from_timestamp_nanos(11); w.add_range( Some(&Sequence { id: 1, number: 1 }), NonZeroUsize::new(1).unwrap(), min_time, max_time, - now, ); // window 2: closed but overlaps with the persistence range - let now = now + late_arrival_period; + time.set(t1); w.add_range( Some(&Sequence { id: 1, number: 4 }), NonZeroUsize::new(1).unwrap(), min_time, max_time, - now, ); // persist - let handle = w.flush_handle(now).unwrap(); + let handle = w.flush_handle().unwrap(); let ckpt = handle.checkpoint(); w.flush(handle); @@ -1521,8 +1548,10 @@ mod tests { #[test] fn test_mark_seen_and_persisted() { - let late_arrival_period = StdDuration::from_secs(100); - let mut w = make_windows(late_arrival_period); + let late_arrival_period = Duration::from_secs(100); + let t0 = Time::from_timestamp_nanos(47069490749); + + let (mut w, _) = make_windows(late_arrival_period, t0); let mut sequencer_numbers1 = BTreeMap::new(); sequencer_numbers1.insert(1, OptionalMinMaxSequence::new(Some(1), 2)); @@ -1530,7 +1559,7 @@ mod tests { Arc::from("foo"), Arc::from("bar"), sequencer_numbers1, - Utc::now(), + Time::from_timestamp_nanos(260936036), ); w.mark_seen_and_persisted(&ckpt1); @@ -1541,7 +1570,7 @@ mod tests { Arc::from("foo"), Arc::from("bar"), sequencer_numbers2, - Utc::now(), + Time::from_timestamp_nanos(345345), ); w.mark_seen_and_persisted(&ckpt2); @@ -1554,31 +1583,27 @@ mod tests { #[test] fn flush_min_max_timestamp() { - let mut w = make_windows(StdDuration::from_secs(30)); - - let t0 = Utc::now(); - let t1 = t0 + Duration::seconds(30); - let t2 = t1 + Duration::seconds(3); + let (mut w, time) = make_windows(Duration::from_secs(30), Time::from_timestamp_nanos(0)); w.add_range( Some(&Sequence { id: 1, number: 2 }), NonZeroUsize::new(2).unwrap(), - MIN_DATETIME, - MAX_DATETIME, - t0, + Time::MIN, + Time::MAX, ); + time.inc(Duration::from_secs(30)); w.add_range( Some(&Sequence { id: 1, number: 3 }), NonZeroUsize::new(2).unwrap(), - MIN_DATETIME, - MAX_DATETIME, - t1, + Time::MIN, + Time::MAX, ); - let handle = w.flush_handle(t2).unwrap(); - assert_eq!(handle.timestamp(), MAX_DATETIME); + time.inc(Duration::from_secs(3)); + let handle = w.flush_handle().unwrap(); + assert_eq!(handle.timestamp(), Time::MAX); let ckpt = handle.checkpoint(); - assert_eq!(ckpt.flush_timestamp(), MAX_DATETIME); + assert_eq!(ckpt.flush_timestamp(), Time::MAX); w.flush(handle); assert!(w.closed.is_empty()); diff --git a/query_tests/src/scenarios.rs b/query_tests/src/scenarios.rs index 519532dcf9..5a218c2602 100644 --- a/query_tests/src/scenarios.rs +++ b/query_tests/src/scenarios.rs @@ -1085,8 +1085,7 @@ impl DbSetup for ChunkOrder { let partition = partition.read(); let chunks = LockablePartition::chunks(&partition); let mut partition = partition.upgrade(); - let flush_handle = - LockablePartition::prepare_persist(&mut partition, chrono::MAX_DATETIME).unwrap(); + let flush_handle = LockablePartition::prepare_persist(&mut partition, true).unwrap(); (chunks, flush_handle) }; diff --git a/server/Cargo.toml b/server/Cargo.toml index 0d4252d4fe..60ba7af867 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -48,6 +48,7 @@ serde = "1.0" serde_json = "1.0" snafu = "0.6" snap = "1.0.0" +time = { path = "../time" } trace = { path = "../trace" } tokio = { version = "1.11", features = ["macros", "time"] } tokio-util = { version = "0.6.3" } diff --git a/server/src/application.rs b/server/src/application.rs index f70a4ce9d1..327daecf1d 100644 --- a/server/src/application.rs +++ b/server/src/application.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use object_store::ObjectStore; use observability_deps::tracing::info; use query::exec::Executor; +use time::TimeProvider; use write_buffer::config::WriteBufferConfigFactory; use crate::JobRegistry; @@ -16,6 +17,7 @@ pub struct ApplicationState { executor: Arc<Executor>, job_registry: Arc<JobRegistry>, metric_registry: Arc<metric::Registry>, + time_provider: Arc<dyn TimeProvider>, } impl ApplicationState { @@ -23,33 +25,33 @@ impl ApplicationState { /// /// Uses number of CPUs in the system if num_worker_threads is not set pub fn new(object_store: Arc<ObjectStore>, num_worker_threads: Option<usize>) -> Self { - Self::with_write_buffer_factory( - object_store, - Arc::new(Default::default()), - num_worker_threads, - ) - } - - /// Same as [`new`](Self::new) but also specifies the write buffer factory. - /// - /// This is mostly useful for testing. - pub fn with_write_buffer_factory( - object_store: Arc<ObjectStore>, - write_buffer_factory: Arc<WriteBufferConfigFactory>, - num_worker_threads: Option<usize>, - ) -> Self { let num_threads = num_worker_threads.unwrap_or_else(num_cpus::get); info!(%num_threads, "using specified number of threads per thread pool"); let metric_registry = Arc::new(metric::Registry::new()); + let time_provider: Arc<dyn TimeProvider> = Arc::new(time::SystemProvider::new()); let job_registry = Arc::new(JobRegistry::new(Arc::clone(&metric_registry))); + let write_buffer_factory = Arc::new(WriteBufferConfigFactory::new()); + Self { object_store, write_buffer_factory, executor: Arc::new(Executor::new(num_threads)), job_registry, metric_registry, + time_provider, + } + } + + /// Overrides the write_buffer_factory + pub fn with_write_buffer_factory( + self, + write_buffer_factory: Arc<WriteBufferConfigFactory>, + ) -> Self { + Self { + write_buffer_factory, + ..self } } @@ -69,6 +71,10 @@ impl ApplicationState { &self.metric_registry } + pub fn time_provider(&self) -> &Arc<dyn TimeProvider> { + &self.time_provider + } + pub fn executor(&self) -> &Arc<Executor> { &self.executor } diff --git a/server/src/database.rs b/server/src/database.rs index c0f525ba4c..c3063b0ace 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -1109,6 +1109,7 @@ impl DatabaseStateRulesLoaded { catalog, write_buffer_producer: producer, metric_registry: Arc::clone(shared.application.metric_registry()), + time_provider: Arc::clone(shared.application.time_provider()), }; let db = Db::new( @@ -1392,13 +1393,13 @@ mod tests { )); // setup application + let application = ApplicationState::new(Arc::new(ObjectStore::new_in_memory()), None); + let mut factory = WriteBufferConfigFactory::new(); factory.register_mock("my_mock".to_string(), state.clone()); - let application = Arc::new(ApplicationState::with_write_buffer_factory( - Arc::new(ObjectStore::new_in_memory()), - Arc::new(factory), - None, - )); + + let application = Arc::new(application.with_write_buffer_factory(Arc::new(factory))); + let server_id = ServerId::try_from(1).unwrap(); // setup DB diff --git a/server/src/db.rs b/server/src/db.rs index a61c3fc734..869cb39ec7 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -14,7 +14,7 @@ use std::{ use ::lifecycle::select_persistable_chunks; use async_trait::async_trait; -use chrono::{DateTime, TimeZone, Utc}; +use chrono::{DateTime, Utc}; use parking_lot::{Mutex, RwLock}; use rand_distr::{Distribution, Poisson}; use snafu::{ensure, OptionExt, ResultExt, Snafu}; @@ -46,6 +46,7 @@ use query::{ QueryDatabase, }; use schema::Schema; +use time::{Time, TimeProvider}; use trace::ctx::SpanContext; use write_buffer::core::{WriteBufferReading, WriteBufferWriting}; @@ -300,10 +301,7 @@ pub struct Db { /// - to keep the lifecycle state (e.g. the number of running compactions) around lifecycle_policy: tokio::sync::Mutex<Option<::lifecycle::LifecyclePolicy<WeakDb>>>, - /// TESTING ONLY: Mocked `Utc::now()` for the background worker - /// - /// TODO: Replace with TimeProvider (#2722) - now_override: Mutex<Option<DateTime<Utc>>>, + time_provider: Arc<dyn TimeProvider>, /// To-be-written delete predicates. delete_predicates_mailbox: Mutex<Vec<(Arc<DeletePredicate>, Vec<ChunkAddrWithoutDatabase>)>>, @@ -321,6 +319,7 @@ pub(crate) struct DatabaseToCommit { pub(crate) preserved_catalog: PreservedCatalog, pub(crate) catalog: Catalog, pub(crate) rules: Arc<DatabaseRules>, + pub(crate) time_provider: Arc<dyn TimeProvider>, /// TODO: Move onto Database pub(crate) write_buffer_producer: Option<Arc<dyn WriteBufferWriting>>, @@ -363,7 +362,7 @@ impl Db { write_buffer_producer: database_to_commit.write_buffer_producer, cleanup_lock: Default::default(), lifecycle_policy: tokio::sync::Mutex::new(None), - now_override: Default::default(), + time_provider: database_to_commit.time_provider, delete_predicates_mailbox: Default::default(), persisted_chunk_id_override: Default::default(), }; @@ -701,7 +700,7 @@ impl Db { .persistence_windows_mut() .map(|window| match force { true => window.flush_all_handle(), - false => window.flush_handle(self.utc_now()), + false => window.flush_handle(), }) .flatten() .context(CannotFlushPartition { @@ -921,7 +920,7 @@ impl Db { /// /// TODO: Remove (#2722) fn utc_now(&self) -> DateTime<Utc> { - self.now_override.lock().unwrap_or_else(Utc::now) + self.time_provider.now().date_time() } async fn cleanup_unreferenced_parquet_files( @@ -1204,32 +1203,20 @@ impl Db { schema_handle.commit(); // TODO: PersistenceWindows use TimestampSummary - let min_time = Utc.timestamp_nanos(timestamp_summary.stats.min.unwrap()); - let max_time = Utc.timestamp_nanos(timestamp_summary.stats.max.unwrap()); + let min_time = Time::from_timestamp_nanos(timestamp_summary.stats.min.unwrap()); + let max_time = Time::from_timestamp_nanos(timestamp_summary.stats.max.unwrap()); match partition.persistence_windows_mut() { Some(windows) => { - windows.add_range( - sequence, - row_count, - min_time, - max_time, - self.utc_now(), - ); + windows.add_range(sequence, row_count, min_time, max_time); } None => { let mut windows = PersistenceWindows::new( partition.addr().clone(), late_arrival_window, - self.utc_now(), - ); - windows.add_range( - sequence, - row_count, - min_time, - max_time, - self.utc_now(), + Arc::clone(&self.time_provider), ); + windows.add_range(sequence, row_count, min_time, max_time); partition.set_persistence_windows(windows); } } @@ -1478,6 +1465,7 @@ mod tests { MockBufferForWriting, MockBufferForWritingThatAlwaysErrors, MockBufferSharedState, }; + use crate::utils::make_db_time; use crate::{ assert_store_sequenced_entry_failures, db::{ @@ -1723,11 +1711,15 @@ mod tests { #[tokio::test] async fn metrics_during_rollover() { - let test_db = make_db().await; + let time = Arc::new(time::MockProvider::new(Time::from_timestamp(11, 22))); + let test_db = TestDb::builder() + .time_provider(Arc::<time::MockProvider>::clone(&time)) + .build() + .await; let db = Arc::clone(&test_db.db); - let t1_write = Utc.timestamp(11, 22); + let t1_write = time.now().date_time(); write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await; let registry = test_db.metric_registry.as_ref(); @@ -1744,13 +1736,13 @@ mod tests { catalog_chunk_size_bytes_metric_eq(registry, "mutable_buffer", 700); // write into same chunk again. - let t2_write = t1_write + chrono::Duration::seconds(1); + let t2_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await; - let t3_write = t2_write + chrono::Duration::seconds(1); + let t3_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(db.as_ref(), "cpu bar=3 30", t3_write).await; - let t4_write = t3_write + chrono::Duration::seconds(1); + let t4_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(db.as_ref(), "cpu bar=4 40", t4_write).await; - let t5_write = t4_write + chrono::Duration::seconds(1); + let t5_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(db.as_ref(), "cpu bar=5 50", t5_write).await; // verify chunk size updated @@ -1791,8 +1783,7 @@ mod tests { let expected_read_buffer_size = 1706; catalog_chunk_size_bytes_metric_eq(registry, "read_buffer", expected_read_buffer_size); - let t6_write = t5_write + chrono::Duration::seconds(1); - *db.now_override.lock() = Some(t6_write); + time.inc(Duration::from_secs(1)); *db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337)); let chunk_id = db .persist_partition("cpu", "1970-01-01T00", true) @@ -1843,10 +1834,10 @@ mod tests { write_lp(db.as_ref(), "write_metrics_test foo=3 650000000010").await; let mut summary = TimestampSummary::default(); - summary.record(Utc.timestamp_nanos(100000000000)); - summary.record(Utc.timestamp_nanos(180000000000)); - summary.record(Utc.timestamp_nanos(650000000000)); - summary.record(Utc.timestamp_nanos(650000000010)); + summary.record(Time::from_timestamp_nanos(100000000000)); + summary.record(Time::from_timestamp_nanos(180000000000)); + summary.record(Time::from_timestamp_nanos(650000000000)); + summary.record(Time::from_timestamp_nanos(650000000010)); let mut reporter = metric::RawReporter::default(); test_db.metric_registry.report(&mut reporter); @@ -2188,6 +2179,7 @@ mod tests { async fn write_one_chunk_to_parquet_file() { // Test that data can be written into parquet files let object_store = Arc::new(ObjectStore::new_in_memory()); + let time = Arc::new(time::MockProvider::new(Time::from_timestamp(11, 22))); // Create a DB given a server id, an object store and a db name let test_db = TestDb::builder() @@ -2196,15 +2188,16 @@ mod tests { ..Default::default() }) .object_store(Arc::clone(&object_store)) + .time_provider(Arc::<time::MockProvider>::clone(&time)) .build() .await; let db = test_db.db; // Write some line protocols in Mutable buffer of the DB - let t1_write = Utc.timestamp(11, 22); + let t1_write = time.now().date_time(); write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await; - let t2_write = t1_write + chrono::Duration::seconds(1); + let t2_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await; //Now mark the MB chunk close @@ -2220,9 +2213,9 @@ mod tests { .await .unwrap() .unwrap(); + // Write the RB chunk to Object Store but keep it in RB - let t3_persist = t2_write + chrono::Duration::seconds(1); - *db.now_override.lock() = Some(t3_persist); + time.inc(Duration::from_secs(1)); *db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337)); let pq_chunk = db .persist_partition("cpu", partition_key, true) @@ -2287,6 +2280,7 @@ mod tests { // Create an object store in memory let object_store = Arc::new(ObjectStore::new_in_memory()); + let time = Arc::new(time::MockProvider::new(Time::from_timestamp(11, 22))); // Create a DB given a server id, an object store and a db name let test_db = TestDb::builder() @@ -2295,15 +2289,16 @@ mod tests { ..Default::default() }) .object_store(Arc::clone(&object_store)) + .time_provider(Arc::<time::MockProvider>::clone(&time)) .build() .await; let db = test_db.db; // Write some line protocols in Mutable buffer of the DB - let t1_write = Utc.timestamp(11, 22); + let t1_write = time.now().date_time(); write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await; - let t2_write = t1_write + chrono::Duration::seconds(1); + let t2_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await; // Now mark the MB chunk close @@ -2320,8 +2315,7 @@ mod tests { .unwrap() .unwrap(); // Write the RB chunk to Object Store but keep it in RB - let t3_persist = t2_write + chrono::Duration::seconds(1); - *db.now_override.lock() = Some(t3_persist); + time.inc(Duration::from_secs(1)); *db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337)); let pq_chunk = db .persist_partition("cpu", partition_key, true) @@ -2688,11 +2682,12 @@ mod tests { #[tokio::test] async fn chunk_summaries() { // Test that chunk id listing is hooked up - let db = make_db().await.db; + let (db, time) = make_db_time().await; + time.set(Time::from_timestamp(11, 22)); // get three chunks: one open, one closed in mb and one close in rb // In open chunk, will end up in rb/os - let t1_write = Utc.timestamp(11, 22); + let t1_write = time.now().date_time(); write_lp_with_time(&db, "cpu bar=1 1", t1_write).await; // Move open chunk to closed @@ -2700,7 +2695,7 @@ mod tests { // New open chunk in mb // This point will end up in rb/os - let t2_write = t1_write + chrono::Duration::seconds(1); + let t2_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(&db, "cpu bar=1,baz=2 2", t2_write).await; // Check first/last write times on the chunks at this point @@ -2717,7 +2712,7 @@ mod tests { assert_chunks_times_ordered(&closed_mb_t3, &open_mb_t3); // This point makes a new open mb chunk and will end up in the closed mb chunk - let t3_write = t2_write + chrono::Duration::seconds(1); + let t3_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(&db, "cpu bar=1,baz=2,frob=3 400000000000000", t3_write).await; // Check first/last write times on the chunks at this point @@ -2762,8 +2757,7 @@ mod tests { assert_chunks_times_eq(&other_open_mb_t5, &other_open_mb_t4); // Persist rb to parquet os - let t4_persist = t3_write + chrono::Duration::seconds(1); - *db.now_override.lock() = Some(t4_persist); + time.inc(Duration::from_secs(1)).date_time(); *db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337)); db.persist_partition("cpu", "1970-01-01T00", true) .await @@ -2806,7 +2800,7 @@ mod tests { // New open chunk in mb // This point will stay in this open mb chunk - let t5_write = t4_persist + chrono::Duration::seconds(1); + let t5_write = time.inc(Duration::from_secs(1)).date_time(); write_lp_with_time(&db, "cpu bar=1,baz=3,blargh=3 400000000000000", t5_write).await; // Check first/last write times on the chunks at this point diff --git a/server/src/db/lifecycle.rs b/server/src/db/lifecycle.rs index fbc5b90fac..e94a16b5eb 100644 --- a/server/src/db/lifecycle.rs +++ b/server/src/db/lifecycle.rs @@ -28,6 +28,7 @@ use std::{ fmt::Display, sync::{Arc, Weak}, }; +use time::Time; use tracker::{RwLock, TaskTracker}; pub(crate) use compact::compact_chunks; @@ -103,7 +104,7 @@ impl LockableChunk for LockableCatalogChunk { pub struct CatalogPersistHandle(FlushHandle); impl lifecycle::PersistHandle for CatalogPersistHandle { - fn timestamp(&self) -> DateTime<Utc> { + fn timestamp(&self) -> Time { self.0.timestamp() } } @@ -200,10 +201,13 @@ impl LockablePartition for LockableCatalogPartition { fn prepare_persist( partition: &mut LifecycleWriteGuard<'_, Self::Partition, Self>, - now: DateTime<Utc>, + force: bool, ) -> Option<Self::PersistHandle> { let window = partition.persistence_windows_mut().unwrap(); - let handle = window.flush_handle(now); + let handle = match force { + true => window.flush_all_handle(), + false => window.flush_handle(), + }; trace!(?handle, "preparing for persist"); Some(CatalogPersistHandle(handle?)) } @@ -285,13 +289,13 @@ impl LifecyclePartition for Partition { .unwrap_or(true) } - fn persistable_row_count(&self, now: DateTime<Utc>) -> usize { + fn persistable_row_count(&self) -> usize { self.persistence_windows() - .map(|w| w.persistable_row_count(now)) + .map(|w| w.persistable_row_count()) .unwrap_or(0) } - fn minimum_unpersisted_age(&self) -> Option<DateTime<Utc>> { + fn minimum_unpersisted_age(&self) -> Option<Time> { self.persistence_windows() .and_then(|w| w.minimum_unpersisted_age()) } diff --git a/server/src/db/lifecycle/persist.rs b/server/src/db/lifecycle/persist.rs index 8f8a7433a3..a98adcd801 100644 --- a/server/src/db/lifecycle/persist.rs +++ b/server/src/db/lifecycle/persist.rs @@ -232,7 +232,6 @@ mod tests { Db, }; - use chrono::{TimeZone, Utc}; use data_types::{ chunk_metadata::ChunkStorage, database_rules::LifecycleRules, server_id::ServerId, timestamp::TimestampRange, @@ -247,8 +246,10 @@ mod tests { num::{NonZeroU32, NonZeroU64}, time::Duration, }; + use time::Time; - async fn test_db() -> Arc<Db> { + async fn test_db() -> (Arc<Db>, Arc<time::MockProvider>) { + let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp(3409, 45))); let test_db = TestDb::builder() .lifecycle_rules(LifecycleRules { late_arrive_window_seconds: NonZeroU32::new(1).unwrap(), @@ -256,23 +257,24 @@ mod tests { worker_backoff_millis: NonZeroU64::new(u64::MAX).unwrap(), ..Default::default() }) + .time_provider(Arc::<time::MockProvider>::clone(&time_provider)) .build() .await; - test_db.db + (test_db.db, time_provider) } #[tokio::test] async fn test_flush_overlapping() { - let db = test_db().await; + let (db, time) = test_db().await; write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await; let partition_keys = db.partition_keys().unwrap(); assert_eq!(partition_keys.len(), 1); let db_partition = db.partition("cpu", &partition_keys[0]).unwrap(); - // Wait for the persistence window to be closed - tokio::time::sleep(Duration::from_secs(2)).await; + // Close window + time.inc(Duration::from_secs(2)); write_lp(db.as_ref(), "cpu,tag1=lagged bar=1 10").await; @@ -284,11 +286,11 @@ mod tests { let mut partition = partition.upgrade(); - let handle = LockablePartition::prepare_persist(&mut partition, Utc::now()) + let handle = LockablePartition::prepare_persist(&mut partition, false) .unwrap() .0; - assert_eq!(handle.timestamp(), Utc.timestamp_nanos(10)); + assert_eq!(handle.timestamp(), Time::from_timestamp_nanos(10)); let chunks: Vec<_> = chunks.map(|x| x.upgrade()).collect(); persist_chunks(partition, chunks, handle) @@ -308,18 +310,14 @@ mod tests { #[tokio::test] async fn test_persist_delete_all() { - let db = test_db().await; + let (db, time) = test_db().await; - let late_arrival = chrono::Duration::seconds(1); + let late_arrival = Duration::from_secs(1); - let t0 = Utc::now(); - let t1 = t0 + late_arrival * 10; - let t2 = t1 + late_arrival * 10; - - *db.now_override.lock() = Some(t0); + time.inc(Duration::from_secs(32)); write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await; - *db.now_override.lock() = Some(t1); + time.inc(late_arrival); write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=3 23").await; let partition_keys = db.partition_keys().unwrap(); @@ -336,7 +334,6 @@ mod tests { db.delete("cpu", predicate).await.unwrap(); // Try to persist first write but it has been deleted - *db.now_override.lock() = Some(t0 + late_arrival); let maybe_chunk = db .persist_partition("cpu", partition_key.as_str(), false) .await @@ -356,11 +353,11 @@ mod tests { .unwrap() .minimum_unpersisted_timestamp() .unwrap(), - Utc.timestamp_nanos(23) + Time::from_timestamp_nanos(23) ); // Add a second set of writes one of which overlaps the above chunk - *db.now_override.lock() = Some(t2); + time.inc(late_arrival * 10); write_lp(db.as_ref(), "cpu,tag1=foo bar=2 23").await; write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=2 26").await; @@ -381,7 +378,7 @@ mod tests { // The persistence windows only know that all rows <= 23 have been persisted // They do not know that the remaining row has timestamp 26, only that // it is in the range 24..=26 - Utc.timestamp_nanos(24) + Time::from_timestamp_nanos(24) ); let mut chunks: Vec<_> = partition.read().chunk_summaries().collect(); @@ -404,7 +401,7 @@ mod tests { db.delete("cpu", predicate).await.unwrap(); // Try to persist third set of writes - *db.now_override.lock() = Some(t2 + late_arrival); + time.inc(late_arrival); let maybe_chunk = db .persist_partition("cpu", partition_key.as_str(), false) .await @@ -423,12 +420,9 @@ mod tests { #[tokio::test] async fn persist_compacted_deletes() { - let db = test_db().await; + let (db, time) = test_db().await; - let late_arrival = chrono::Duration::seconds(1); - let t0 = Utc::now(); - - *db.now_override.lock() = Some(t0); + let late_arrival = Duration::from_secs(1); write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await; let partition_keys = db.partition_keys().unwrap(); @@ -460,7 +454,7 @@ mod tests { // Persistence windows unaware rows have been deleted assert!(!partition.read().persistence_windows().unwrap().is_empty()); - *db.now_override.lock() = Some(t0 + late_arrival); + time.inc(late_arrival); let maybe_chunk = db .persist_partition("cpu", partition_key.as_str(), false) .await @@ -528,7 +522,6 @@ mod tests { let db_partition = db.partition("cpu", &partition_keys[0]).unwrap(); // Wait for the persistence window to be closed - tokio::time::sleep(Duration::from_secs(2)).await; let partition = LockableCatalogPartition::new(Arc::clone(&db), Arc::clone(&db_partition)); let partition = partition.read(); @@ -537,11 +530,11 @@ mod tests { let mut partition = partition.upgrade(); - let handle = LockablePartition::prepare_persist(&mut partition, Utc::now()) + let handle = LockablePartition::prepare_persist(&mut partition, true) .unwrap() .0; - assert_eq!(handle.timestamp(), Utc.timestamp_nanos(20)); + assert_eq!(handle.timestamp(), Time::from_timestamp_nanos(20)); let chunks: Vec<_> = chunks.map(|x| x.upgrade()).collect(); let (_, fut) = persist_chunks(partition, chunks, handle).unwrap(); diff --git a/server/src/db/replay.rs b/server/src/db/replay.rs index e5e8353237..309e62f303 100644 --- a/server/src/db/replay.rs +++ b/server/src/db/replay.rs @@ -4,7 +4,6 @@ use std::{ time::Duration, }; -use chrono::Utc; use data_types::sequence::Sequence; use entry::TableBatch; use futures::TryStreamExt; @@ -15,6 +14,7 @@ use persistence_windows::{ persistence_windows::PersistenceWindows, }; use snafu::{ResultExt, Snafu}; +use time::Time; use write_buffer::core::WriteBufferReading; use crate::Db; @@ -120,7 +120,7 @@ pub async fn seek_to_end(db: &Db, write_buffer: &mut dyn WriteBufferReading) -> Arc::from(partition.table_name()), Arc::from(partition.key()), sequencer_numbers.clone(), - Utc::now(), + Time::from_timestamp_nanos(0), ); match partition.persistence_windows_mut() { @@ -131,7 +131,7 @@ pub async fn seek_to_end(db: &Db, write_buffer: &mut dyn WriteBufferReading) -> let mut windows = PersistenceWindows::new( partition.addr().clone(), late_arrival_window, - db.utc_now(), + Arc::clone(&db.time_provider), ); windows.mark_seen_and_persisted(&dummy_checkpoint); partition.set_persistence_windows(windows); @@ -290,7 +290,7 @@ pub async fn perform_replay( let mut windows = PersistenceWindows::new( partition.addr().clone(), late_arrival_window, - db.utc_now(), + Arc::clone(&db.time_provider), ); windows.mark_seen_and_persisted(partition_checkpoint); partition.set_persistence_windows(windows); @@ -418,7 +418,7 @@ mod tests { }; use arrow_util::assert_batches_eq; - use chrono::{DateTime, Utc}; + use chrono::Utc; use data_types::{ database_rules::{PartitionTemplate, Partitioner, TemplatePart}, sequence::Sequence, @@ -435,6 +435,7 @@ mod tests { }; use query::{exec::ExecutionContextProvider, frontend::sql::SqlQueryPlanner}; use test_helpers::{assert_contains, assert_not_contains, tracing::TracingCapture}; + use time::{Time, TimeProvider}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use write_buffer::mock::{MockBufferForReading, MockBufferSharedState}; @@ -561,6 +562,7 @@ mod tests { // ==================== setup ==================== let object_store = Arc::new(ObjectStore::new_in_memory()); + let time = Arc::new(time::MockProvider::new(Time::from_timestamp(12, 0))); let server_id = ServerId::try_from(1).unwrap(); let db_name = "replay_test"; let partition_template = PartitionTemplate { @@ -577,7 +579,7 @@ mod tests { db_name, partition_template.clone(), self.catalog_transactions_until_checkpoint, - Utc::now(), + Arc::<time::MockProvider>::clone(&time), ) .await; @@ -610,9 +612,6 @@ mod tests { shutdown.cancel(); join_handle.await.unwrap(); - // remember time - let now = test_db.db.now_override.lock().unwrap(); - // drop old DB drop(test_db); @@ -623,7 +622,7 @@ mod tests { db_name, partition_template.clone(), self.catalog_transactions_until_checkpoint, - now, + Arc::<time::MockProvider>::clone(&time), ) .await; test_db = test_db_tmp; @@ -694,8 +693,7 @@ mod tests { } } Step::MakeWritesPersistable => { - let mut guard = test_db.db.now_override.lock(); - *guard = Some(guard.unwrap() + chrono::Duration::seconds(60)); + time.inc(Duration::from_secs(60)); } Step::Assert(checks) => { Self::eval_checks(&checks, true, &test_db).await; @@ -762,7 +760,7 @@ mod tests { db_name: &'static str, partition_template: PartitionTemplate, catalog_transactions_until_checkpoint: NonZeroU64, - now: DateTime<Utc>, + time_provider: Arc<dyn TimeProvider>, ) -> (TestDb, CancellationToken, JoinHandle<()>) { let test_db = TestDb::builder() .object_store(object_store) @@ -775,13 +773,11 @@ mod tests { ..Default::default() }) .partition_template(partition_template) + .time_provider(time_provider) .db_name(db_name) .build() .await; - // Mock time - *test_db.db.now_override.lock() = Some(now); - // start background worker let shutdown: CancellationToken = Default::default(); let shutdown_captured = shutdown.clone(); @@ -2595,7 +2591,7 @@ mod tests { Arc::from("table"), Arc::from("partition"), sequencer_numbers, - Utc::now(), + Time::from_timestamp_nanos(236), ); let builder = PersistCheckpointBuilder::new(partition_checkpoint); let (partition_checkpoint, database_checkpoint) = builder.build(); @@ -2642,7 +2638,7 @@ mod tests { Arc::from("table"), Arc::from("partition"), sequencer_numbers, - Utc::now(), + Time::from_timestamp_nanos(0), ); let builder = PersistCheckpointBuilder::new(partition_checkpoint); let (partition_checkpoint, database_checkpoint) = builder.build(); diff --git a/server/src/db/system_tables/persistence.rs b/server/src/db/system_tables/persistence.rs index 3b6fa82352..858589bc35 100644 --- a/server/src/db/system_tables/persistence.rs +++ b/server/src/db/system_tables/persistence.rs @@ -101,9 +101,9 @@ fn from_write_summaries( #[cfg(test)] mod tests { - use chrono::{TimeZone, Utc}; use arrow_util::assert_batches_eq; + use time::Time; use super::*; @@ -119,20 +119,20 @@ mod tests { ( addr.clone(), WriteSummary { - time_of_first_write: Utc.timestamp_nanos(0), - time_of_last_write: Utc.timestamp_nanos(20), - min_timestamp: Utc.timestamp_nanos(50), - max_timestamp: Utc.timestamp_nanos(60), + time_of_first_write: Time::from_timestamp_nanos(0), + time_of_last_write: Time::from_timestamp_nanos(20), + min_timestamp: Time::from_timestamp_nanos(50), + max_timestamp: Time::from_timestamp_nanos(60), row_count: 320, }, ), ( addr, WriteSummary { - time_of_first_write: Utc.timestamp_nanos(6), - time_of_last_write: Utc.timestamp_nanos(21), - min_timestamp: Utc.timestamp_nanos(1), - max_timestamp: Utc.timestamp_nanos(2), + time_of_first_write: Time::from_timestamp_nanos(6), + time_of_last_write: Time::from_timestamp_nanos(21), + min_timestamp: Time::from_timestamp_nanos(1), + max_timestamp: Time::from_timestamp_nanos(2), row_count: 2, }, ), diff --git a/server/src/lib.rs b/server/src/lib.rs index 8490319a74..de5929f18f 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -2399,13 +2399,13 @@ mod tests { #[tokio::test] async fn write_buffer_errors_propagate() { + let application = ApplicationState::new(Arc::new(ObjectStore::new_in_memory()), None); + let mut factory = WriteBufferConfigFactory::new(); factory.register_always_fail_mock("my_mock".to_string()); - let application = Arc::new(ApplicationState::with_write_buffer_factory( - Arc::new(ObjectStore::new_in_memory()), - Arc::new(factory), - None, - )); + + let application = Arc::new(application.with_write_buffer_factory(Arc::new(factory))); + let server = make_server(application); server.set_id(ServerId::try_from(1).unwrap()).unwrap(); server.wait_for_init().await.unwrap(); diff --git a/server/src/utils.rs b/server/src/utils.rs index 15afbe25fc..c7caa73e85 100644 --- a/server/src/utils.rs +++ b/server/src/utils.rs @@ -15,6 +15,7 @@ use persistence_windows::checkpoint::ReplayPlan; use query::exec::ExecutorConfig; use query::{exec::Executor, QueryDatabase}; use std::{borrow::Cow, convert::TryFrom, num::NonZeroU32, sync::Arc, time::Duration}; +use time::{Time, TimeProvider}; use write_buffer::core::WriteBufferWriting; // A wrapper around a Db and a metric registry allowing for isolated testing @@ -41,6 +42,7 @@ pub struct TestDbBuilder { write_buffer_producer: Option<Arc<dyn WriteBufferWriting>>, lifecycle_rules: Option<LifecycleRules>, partition_template: Option<PartitionTemplate>, + time_provider: Option<Arc<dyn TimeProvider>>, } impl TestDbBuilder { @@ -52,10 +54,17 @@ impl TestDbBuilder { let server_id = self .server_id .unwrap_or_else(|| ServerId::try_from(1).unwrap()); + let db_name = self .db_name .unwrap_or_else(|| DatabaseName::new("placeholder").unwrap()); + let time_provider = self + .time_provider + .clone() + .take() + .unwrap_or_else(|| Arc::new(time::SystemProvider::new())); + let object_store = self .object_store .unwrap_or_else(|| Arc::new(ObjectStore::new_in_memory())); @@ -125,6 +134,7 @@ impl TestDbBuilder { write_buffer_producer: self.write_buffer_producer, exec, metric_registry: Arc::clone(&metric_registry), + time_provider, }; TestDb { @@ -174,6 +184,11 @@ impl TestDbBuilder { self.partition_template = Some(template); self } + + pub fn time_provider(mut self, time_provider: Arc<dyn TimeProvider>) -> Self { + self.time_provider = Some(time_provider); + self + } } /// Used for testing: create a Database with a local store @@ -181,6 +196,16 @@ pub async fn make_db() -> TestDb { TestDb::builder().build().await } +pub async fn make_db_time() -> (Arc<Db>, Arc<time::MockProvider>) { + let provider = Arc::new(time::MockProvider::new(Time::from_timestamp(295293, 3))); + let db = TestDb::builder() + .time_provider(Arc::<time::MockProvider>::clone(&provider)) + .build() + .await + .db; + (db, provider) +} + fn chunk_summary_iter(db: &Db) -> impl Iterator<Item = ChunkSummary> + '_ { db.partition_keys() .unwrap() From 2e8af77e41321c1e9e5275ef881e82d5b9c0519b Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Thu, 9 Sep 2021 13:38:42 +0200 Subject: [PATCH 05/17] feat: allow write buffer producers to read possible sequencer IDs --- write_buffer/src/core.rs | 77 ++++++++++++++++++++++++++++++++++++--- write_buffer/src/kafka.rs | 63 ++++++++++++++++++++++---------- write_buffer/src/mock.rs | 10 +++++ 3 files changed, 124 insertions(+), 26 deletions(-) diff --git a/write_buffer/src/core.rs b/write_buffer/src/core.rs index 55e1da6e99..c1d3070cf4 100644 --- a/write_buffer/src/core.rs +++ b/write_buffer/src/core.rs @@ -15,6 +15,11 @@ pub type WriteBufferError = Box<dyn std::error::Error + Sync + Send>; /// entries from the Write Buffer at a later time. #[async_trait] pub trait WriteBufferWriting: Sync + Send + Debug + 'static { + /// List all known sequencers. + /// + /// This list is sorted and not empty. + fn sequencer_ids(&self) -> Vec<u32>; + /// Send an `Entry` to the write buffer using the specified sequencer ID. /// /// Returns information that can be used to restore entries at a later time. @@ -134,6 +139,7 @@ pub mod test_utils { test_watermark(&adapter).await; test_timestamp(&adapter).await; test_sequencer_auto_creation(&adapter).await; + test_sequencer_ids(&adapter).await; } /// Test IO with a single writer and single reader stream. @@ -314,6 +320,7 @@ pub mod test_utils { /// Test multiple multiple writers and multiple readers on multiple sequencers. /// /// This tests that: + /// - writers retrieve consistent sequencer IDs /// - writes go to and reads come from the right sequencer, similar to [`test_multi_sequencer_io`] but less /// detailled /// - multiple writers can write to a single sequencer @@ -332,19 +339,40 @@ pub mod test_utils { let mut reader_1 = context.reading(true).await.unwrap(); let mut reader_2 = context.reading(true).await.unwrap(); - // TODO: do not hard-code sequencer IDs here but provide a proper interface - writer_1.store_entry(&entry_east_1, 0).await.unwrap(); - writer_1.store_entry(&entry_west_1, 1).await.unwrap(); - writer_2.store_entry(&entry_east_2, 0).await.unwrap(); + let mut sequencer_ids_1 = writer_1.sequencer_ids(); + let sequencer_ids_2 = writer_2.sequencer_ids(); + assert_eq!(sequencer_ids_1, sequencer_ids_2); + assert_eq!(sequencer_ids_1.len(), 2); + let sequencer_id_1 = sequencer_ids_1.pop().unwrap(); + let sequencer_id_2 = sequencer_ids_1.pop().unwrap(); + + writer_1 + .store_entry(&entry_east_1, sequencer_id_1) + .await + .unwrap(); + writer_1 + .store_entry(&entry_west_1, sequencer_id_2) + .await + .unwrap(); + writer_2 + .store_entry(&entry_east_2, sequencer_id_1) + .await + .unwrap(); assert_reader_content( &mut reader_1, - &[(0, &[&entry_east_1, &entry_east_2]), (1, &[&entry_west_1])], + &[ + (sequencer_id_1, &[&entry_east_1, &entry_east_2]), + (sequencer_id_2, &[&entry_west_1]), + ], ) .await; assert_reader_content( &mut reader_2, - &[(0, &[&entry_east_1, &entry_east_2]), (1, &[&entry_west_1])], + &[ + (sequencer_id_1, &[&entry_east_1, &entry_east_2]), + (sequencer_id_2, &[&entry_west_1]), + ], ) .await; } @@ -534,6 +562,36 @@ pub mod test_utils { context.writing(false).await.unwrap(); } + /// Test sequencer IDs reporting of writers. + /// + /// This tests that: + /// - all sequencers are reported + /// - the list is sorted + async fn test_sequencer_ids<T>(adapter: &T) + where + T: TestAdapter, + { + let n_sequencers = 10; + let context = adapter + .new_context(NonZeroU32::try_from(n_sequencers).unwrap()) + .await; + + let writer_1 = context.writing(true).await.unwrap(); + let writer_2 = context.writing(true).await.unwrap(); + + let sequencer_ids_1 = writer_1.sequencer_ids(); + let sequencer_ids_2 = writer_2.sequencer_ids(); + assert_eq!(sequencer_ids_1, sequencer_ids_2); + assert_eq!(sequencer_ids_1.len(), n_sequencers as usize); + + let sorted = { + let mut tmp = sequencer_ids_1.clone(); + tmp.sort_unstable(); + tmp + }; + assert_eq!(sequencer_ids_1, sorted); + } + /// Assert that the content of the reader is as expected. /// /// This will read `expected.len()` from the reader and then ensures that the stream is pending. @@ -541,6 +599,13 @@ pub mod test_utils { where R: WriteBufferReading, { + // normalize expected values + let expected = { + let mut expected = expected.to_vec(); + expected.sort_by_key(|(sequencer_id, _entries)| *sequencer_id); + expected + }; + // Ensure content of the streams let mut streams = reader.streams(); assert_eq!(streams.len(), expected.len()); diff --git a/write_buffer/src/kafka.rs b/write_buffer/src/kafka.rs index b5de77f1ae..2f72ed976f 100644 --- a/write_buffer/src/kafka.rs +++ b/write_buffer/src/kafka.rs @@ -34,6 +34,7 @@ pub struct KafkaBufferProducer { conn: String, database_name: String, producer: FutureProducer, + partitions: Vec<u32>, } // Needed because rdkafka's FutureProducer doesn't impl Debug @@ -48,6 +49,10 @@ impl std::fmt::Debug for KafkaBufferProducer { #[async_trait] impl WriteBufferWriting for KafkaBufferProducer { + fn sequencer_ids(&self) -> Vec<u32> { + self.partitions.clone() + } + /// Send an `Entry` to Kafka using the sequencer ID as a partition. async fn store_entry( &self, @@ -121,15 +126,8 @@ impl KafkaBufferProducer { cfg.set("allow.auto.create.topics", "false"); // handle auto-creation - if get_partitions(&database_name, &cfg).await?.is_empty() { - if let Some(cfg) = creation_config { - create_kafka_topic(&conn, &database_name, cfg.n_sequencers, &cfg.options).await?; - } else { - return Err("no partitions found and auto-creation not requested" - .to_string() - .into()); - } - } + let partitions = + maybe_auto_create_topics(&conn, &database_name, creation_config, &cfg).await?; let producer: FutureProducer = cfg.create()?; @@ -137,6 +135,7 @@ impl KafkaBufferProducer { conn, database_name, producer, + partitions, }) } } @@ -305,17 +304,8 @@ impl KafkaBufferConsumer { cfg.set("auto.offset.reset", "smallest"); // figure out which partitions exists - let mut partitions = get_partitions(&database_name, &cfg).await?; - if partitions.is_empty() { - if let Some(cfg2) = creation_config { - create_kafka_topic(&conn, &database_name, cfg2.n_sequencers, &cfg2.options).await?; - partitions = get_partitions(&database_name, &cfg).await?; - } else { - return Err("no partitions found and auto-creation not requested" - .to_string() - .into()); - } - } + let partitions = + maybe_auto_create_topics(&conn, &database_name, creation_config, &cfg).await?; info!(%database_name, ?partitions, "found Kafka partitions"); // setup a single consumer per partition, at least until https://github.com/fede1024/rust-rdkafka/pull/351 is @@ -420,6 +410,39 @@ async fn create_kafka_topic( } } +async fn maybe_auto_create_topics( + kafka_connection: &str, + database_name: &str, + creation_config: Option<&WriteBufferCreationConfig>, + cfg: &ClientConfig, +) -> Result<Vec<u32>, WriteBufferError> { + let mut partitions = get_partitions(database_name, cfg).await?; + if partitions.is_empty() { + if let Some(creation_config) = creation_config { + create_kafka_topic( + kafka_connection, + database_name, + creation_config.n_sequencers, + &creation_config.options, + ) + .await?; + partitions = get_partitions(database_name, cfg).await?; + + // while the number of partitions might be different than `creation_cfg.n_sequencers` due to a + // conflicting, concurrent topic creation, it must not be empty at this point + if partitions.is_empty() { + return Err("Cannot create non-empty topic".to_string().into()); + } + } else { + return Err("no partitions found and auto-creation not requested" + .to_string() + .into()); + } + } + + Ok(partitions) +} + pub mod test_utils { use std::{collections::HashMap, time::Duration}; diff --git a/write_buffer/src/mock.rs b/write_buffer/src/mock.rs index 9d96ab35b8..c5954f6971 100644 --- a/write_buffer/src/mock.rs +++ b/write_buffer/src/mock.rs @@ -221,6 +221,12 @@ impl MockBufferForWriting { #[async_trait] impl WriteBufferWriting for MockBufferForWriting { + fn sequencer_ids(&self) -> Vec<u32> { + let mut guard = self.state.entries.lock(); + let entries = guard.as_mut().unwrap(); + entries.keys().copied().collect() + } + async fn store_entry( &self, entry: &Entry, @@ -257,6 +263,10 @@ pub struct MockBufferForWritingThatAlwaysErrors; #[async_trait] impl WriteBufferWriting for MockBufferForWritingThatAlwaysErrors { + fn sequencer_ids(&self) -> Vec<u32> { + vec![0] + } + async fn store_entry( &self, _entry: &Entry, From 896ce0341540f51723c05890579910ab18c5fca9 Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Tue, 12 Oct 2021 11:13:09 +0200 Subject: [PATCH 06/17] refactor: use sets and maps for write buffer sequencers Use the type to reflect that entries are unique and sorted. --- write_buffer/src/core.rs | 84 ++++++++++++++++++++++++--------------- write_buffer/src/kafka.rs | 24 ++++++----- write_buffer/src/mock.rs | 29 +++++++------- 3 files changed, 81 insertions(+), 56 deletions(-) diff --git a/write_buffer/src/core.rs b/write_buffer/src/core.rs index c1d3070cf4..582181925e 100644 --- a/write_buffer/src/core.rs +++ b/write_buffer/src/core.rs @@ -1,4 +1,7 @@ -use std::fmt::Debug; +use std::{ + collections::{BTreeMap, BTreeSet}, + fmt::Debug, +}; use async_trait::async_trait; use chrono::{DateTime, Utc}; @@ -17,8 +20,8 @@ pub type WriteBufferError = Box<dyn std::error::Error + Sync + Send>; pub trait WriteBufferWriting: Sync + Send + Debug + 'static { /// List all known sequencers. /// - /// This list is sorted and not empty. - fn sequencer_ids(&self) -> Vec<u32>; + /// This set not empty. + fn sequencer_ids(&self) -> BTreeSet<u32>; /// Send an `Entry` to the write buffer using the specified sequencer ID. /// @@ -63,7 +66,7 @@ pub trait WriteBufferReading: Sync + Send + Debug + 'static { /// [`WriteBufferReading`] instance at the same time. If all streams are dropped and requested again, the last /// offsets of the old streams will be the start offsets for the new streams. If you want to prevent that either /// create a new [`WriteBufferReading`] or use [`seek`](Self::seek). - fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)>; + fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>>; /// Seek given sequencer to given sequence number. The next output of related streams will be an entry with at least /// the given sequence number (the actual sequence number might be skipped due to "holes" in the stream). @@ -81,7 +84,12 @@ pub trait WriteBufferReading: Sync + Send + Debug + 'static { pub mod test_utils { //! Generic tests for all write buffer implementations. - use std::{convert::TryFrom, num::NonZeroU32, time::Duration}; + use std::{ + collections::{BTreeMap, BTreeSet}, + convert::TryFrom, + num::NonZeroU32, + time::Duration, + }; use async_trait::async_trait; use chrono::{DateTime, TimeZone, Utc}; @@ -163,7 +171,7 @@ pub mod test_utils { let mut streams = reader.streams(); assert_eq!(streams.len(), 1); - let (sequencer_id, mut stream) = streams.pop().unwrap(); + let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); let waker = futures::task::noop_waker(); let mut cx = futures::task::Context::from_waker(&waker); @@ -224,12 +232,12 @@ pub mod test_utils { // creating stream, drop stream, re-create it => still starts at first entry let mut streams = reader.streams(); assert_eq!(streams.len(), 1); - let (_sequencer_id, stream) = streams.pop().unwrap(); + let (_sequencer_id, stream) = map_pop_first(&mut streams).unwrap(); drop(stream); drop(streams); let mut streams = reader.streams(); assert_eq!(streams.len(), 1); - let (_sequencer_id, mut stream) = streams.pop().unwrap(); + let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); assert_eq!( stream.stream.next().await.unwrap().unwrap().entry(), &entry_1 @@ -240,7 +248,7 @@ pub mod test_utils { drop(streams); let mut streams = reader.streams(); assert_eq!(streams.len(), 1); - let (_sequencer_id, mut stream) = streams.pop().unwrap(); + let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); assert_eq!( stream.stream.next().await.unwrap().unwrap().entry(), &entry_2 @@ -255,7 +263,7 @@ pub mod test_utils { drop(streams); let mut streams = reader.streams(); assert_eq!(streams.len(), 1); - let (_sequencer_id, mut stream) = streams.pop().unwrap(); + let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); assert!(stream.stream.poll_next_unpin(&mut cx).is_pending()); } @@ -279,8 +287,8 @@ pub mod test_utils { let mut streams = reader.streams(); assert_eq!(streams.len(), 2); - let (sequencer_id_1, mut stream_1) = streams.pop().unwrap(); - let (sequencer_id_2, mut stream_2) = streams.pop().unwrap(); + let (sequencer_id_1, mut stream_1) = map_pop_first(&mut streams).unwrap(); + let (sequencer_id_2, mut stream_2) = map_pop_first(&mut streams).unwrap(); assert_ne!(sequencer_id_1, sequencer_id_2); let waker = futures::task::noop_waker(); @@ -343,8 +351,8 @@ pub mod test_utils { let sequencer_ids_2 = writer_2.sequencer_ids(); assert_eq!(sequencer_ids_1, sequencer_ids_2); assert_eq!(sequencer_ids_1.len(), 2); - let sequencer_id_1 = sequencer_ids_1.pop().unwrap(); - let sequencer_id_2 = sequencer_ids_1.pop().unwrap(); + let sequencer_id_1 = set_pop_first(&mut sequencer_ids_1).unwrap(); + let sequencer_id_2 = set_pop_first(&mut sequencer_ids_1).unwrap(); writer_1 .store_entry(&entry_east_1, sequencer_id_1) @@ -432,8 +440,8 @@ pub mod test_utils { let _sequence_number_east_3 = writer.store_entry(&entry_east_3, 0).await.unwrap().0.number; let mut streams = reader_1.streams(); assert_eq!(streams.len(), 2); - let (_sequencer_id, mut stream_1) = streams.pop().unwrap(); - let (_sequencer_id, mut stream_2) = streams.pop().unwrap(); + let (_sequencer_id, mut stream_1) = map_pop_first(&mut streams).unwrap(); + let (_sequencer_id, mut stream_2) = map_pop_first(&mut streams).unwrap(); assert!(stream_1.stream.poll_next_unpin(&mut cx).is_pending()); assert!(stream_2.stream.poll_next_unpin(&mut cx).is_pending()); drop(stream_1); @@ -464,8 +472,8 @@ pub mod test_utils { let mut streams = reader.streams(); assert_eq!(streams.len(), 2); - let (sequencer_id_1, stream_1) = streams.pop().unwrap(); - let (sequencer_id_2, stream_2) = streams.pop().unwrap(); + let (sequencer_id_1, stream_1) = map_pop_first(&mut streams).unwrap(); + let (sequencer_id_2, stream_2) = map_pop_first(&mut streams).unwrap(); // start at watermark 0 assert_eq!((stream_1.fetch_high_watermark)().await.unwrap(), 0); @@ -506,7 +514,7 @@ pub mod test_utils { let mut streams = reader.streams(); assert_eq!(streams.len(), 1); - let (sequencer_id, mut stream) = streams.pop().unwrap(); + let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); // ingest data // @@ -566,7 +574,6 @@ pub mod test_utils { /// /// This tests that: /// - all sequencers are reported - /// - the list is sorted async fn test_sequencer_ids<T>(adapter: &T) where T: TestAdapter, @@ -583,13 +590,6 @@ pub mod test_utils { let sequencer_ids_2 = writer_2.sequencer_ids(); assert_eq!(sequencer_ids_1, sequencer_ids_2); assert_eq!(sequencer_ids_1.len(), n_sequencers as usize); - - let sorted = { - let mut tmp = sequencer_ids_1.clone(); - tmp.sort_unstable(); - tmp - }; - assert_eq!(sequencer_ids_1, sorted); } /// Assert that the content of the reader is as expected. @@ -607,9 +607,8 @@ pub mod test_utils { }; // Ensure content of the streams - let mut streams = reader.streams(); + let streams = reader.streams(); assert_eq!(streams.len(), expected.len()); - streams.sort_by_key(|(sequencer_id, _stream)| *sequencer_id); for ((actual_sequencer_id, actual_stream), (expected_sequencer_id, expected_entries)) in streams.into_iter().zip(expected.iter()) @@ -628,9 +627,8 @@ pub mod test_utils { } // Ensure that streams a pending - let mut streams = reader.streams(); + let streams = reader.streams(); assert_eq!(streams.len(), expected.len()); - streams.sort_by_key(|(sequencer_id, _stream)| *sequencer_id); let waker = futures::task::noop_waker(); let mut cx = futures::task::Context::from_waker(&waker); @@ -668,4 +666,28 @@ pub mod test_utils { ts2 } } + + /// Pops first entry from map. + /// + /// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable. + pub(crate) fn map_pop_first<K, V>(map: &mut BTreeMap<K, V>) -> Option<(K, V)> + where + K: Clone + Ord, + { + map.keys() + .next() + .cloned() + .map(|k| map.remove_entry(&k)) + .flatten() + } + + /// Pops first entry from set. + /// + /// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable. + pub(crate) fn set_pop_first<T>(set: &mut BTreeSet<T>) -> Option<T> + where + T: Clone + Ord, + { + set.iter().next().cloned().map(|k| set.take(&k)).flatten() + } } diff --git a/write_buffer/src/kafka.rs b/write_buffer/src/kafka.rs index 2f72ed976f..85ab00e6cf 100644 --- a/write_buffer/src/kafka.rs +++ b/write_buffer/src/kafka.rs @@ -1,5 +1,5 @@ use std::{ - collections::{BTreeMap, HashMap}, + collections::{BTreeMap, BTreeSet, HashMap}, convert::{TryFrom, TryInto}, num::NonZeroU32, sync::Arc, @@ -34,7 +34,7 @@ pub struct KafkaBufferProducer { conn: String, database_name: String, producer: FutureProducer, - partitions: Vec<u32>, + partitions: BTreeSet<u32>, } // Needed because rdkafka's FutureProducer doesn't impl Debug @@ -49,7 +49,7 @@ impl std::fmt::Debug for KafkaBufferProducer { #[async_trait] impl WriteBufferWriting for KafkaBufferProducer { - fn sequencer_ids(&self) -> Vec<u32> { + fn sequencer_ids(&self) -> BTreeSet<u32> { self.partitions.clone() } @@ -158,8 +158,8 @@ impl std::fmt::Debug for KafkaBufferConsumer { #[async_trait] impl WriteBufferReading for KafkaBufferConsumer { - fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)> { - let mut streams = vec![]; + fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>> { + let mut streams = BTreeMap::new(); for (sequencer_id, consumer) in &self.consumers { let sequencer_id = *sequencer_id; @@ -222,13 +222,13 @@ impl WriteBufferReading for KafkaBufferConsumer { }; let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>; - streams.push(( + streams.insert( sequencer_id, EntryStream { stream, fetch_high_watermark, }, - )); + ); } streams @@ -341,7 +341,10 @@ impl KafkaBufferConsumer { } } -async fn get_partitions(database_name: &str, cfg: &ClientConfig) -> Result<Vec<u32>, KafkaError> { +async fn get_partitions( + database_name: &str, + cfg: &ClientConfig, +) -> Result<BTreeSet<u32>, KafkaError> { let database_name = database_name.to_string(); let cfg = cfg.clone(); @@ -355,12 +358,11 @@ async fn get_partitions(database_name: &str, cfg: &ClientConfig) -> Result<Vec<u let topic_metadata = metadata.topics().get(0).expect("requested a single topic"); - let mut partitions: Vec<_> = topic_metadata + let partitions: BTreeSet<_> = topic_metadata .partitions() .iter() .map(|partition_metdata| partition_metdata.id().try_into().unwrap()) .collect(); - partitions.sort_unstable(); Ok(partitions) } @@ -415,7 +417,7 @@ async fn maybe_auto_create_topics( database_name: &str, creation_config: Option<&WriteBufferCreationConfig>, cfg: &ClientConfig, -) -> Result<Vec<u32>, WriteBufferError> { +) -> Result<BTreeSet<u32>, WriteBufferError> { let mut partitions = get_partitions(database_name, cfg).await?; if partitions.is_empty() { if let Some(creation_config) = creation_config { diff --git a/write_buffer/src/mock.rs b/write_buffer/src/mock.rs index c5954f6971..560548c58a 100644 --- a/write_buffer/src/mock.rs +++ b/write_buffer/src/mock.rs @@ -1,5 +1,5 @@ use std::{ - collections::BTreeMap, + collections::{BTreeMap, BTreeSet}, num::NonZeroU32, sync::Arc, task::{Poll, Waker}, @@ -221,7 +221,7 @@ impl MockBufferForWriting { #[async_trait] impl WriteBufferWriting for MockBufferForWriting { - fn sequencer_ids(&self) -> Vec<u32> { + fn sequencer_ids(&self) -> BTreeSet<u32> { let mut guard = self.state.entries.lock(); let entries = guard.as_mut().unwrap(); entries.keys().copied().collect() @@ -263,8 +263,8 @@ pub struct MockBufferForWritingThatAlwaysErrors; #[async_trait] impl WriteBufferWriting for MockBufferForWritingThatAlwaysErrors { - fn sequencer_ids(&self) -> Vec<u32> { - vec![0] + fn sequencer_ids(&self) -> BTreeSet<u32> { + IntoIterator::into_iter([0]).collect() } async fn store_entry( @@ -341,13 +341,13 @@ impl std::fmt::Debug for MockBufferForReading { #[async_trait] impl WriteBufferReading for MockBufferForReading { - fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)> { + fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>> { let sequencer_ids: Vec<_> = { let playback_states = self.playback_states.lock(); playback_states.keys().copied().collect() }; - let mut streams = vec![]; + let mut streams = BTreeMap::new(); for sequencer_id in sequencer_ids { let shared_state = self.shared_state.clone(); let playback_states = Arc::clone(&self.playback_states); @@ -409,13 +409,13 @@ impl WriteBufferReading for MockBufferForReading { }; let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>; - streams.push(( + streams.insert( sequencer_id, EntryStream { stream, fetch_high_watermark, }, - )); + ); } streams @@ -448,7 +448,7 @@ pub struct MockBufferForReadingThatAlwaysErrors; #[async_trait] impl WriteBufferReading for MockBufferForReadingThatAlwaysErrors { - fn streams(&mut self) -> Vec<(u32, EntryStream<'_>)> { + fn streams(&mut self) -> BTreeMap<u32, EntryStream<'_>> { let stream = stream::poll_fn(|_ctx| { Poll::Ready(Some(Err(String::from( "Something bad happened while reading from stream", @@ -463,13 +463,14 @@ impl WriteBufferReading for MockBufferForReadingThatAlwaysErrors { fut.boxed() as FetchHighWatermarkFut<'_> }; let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>; - vec![( + IntoIterator::into_iter([( 0, EntryStream { stream, fetch_high_watermark, }, - )] + )]) + .collect() } async fn seek( @@ -492,7 +493,7 @@ mod tests { use entry::test_helpers::lp_to_entry; - use crate::core::test_utils::{perform_generic_tests, TestAdapter, TestContext}; + use crate::core::test_utils::{map_pop_first, perform_generic_tests, TestAdapter, TestContext}; use super::*; @@ -714,7 +715,7 @@ mod tests { ); let mut streams = reader.streams(); - let (_id, mut stream) = streams.pop().unwrap(); + let (_id, mut stream) = map_pop_first(&mut streams).unwrap(); assert_eq!( stream.stream.next().await.unwrap().unwrap_err().to_string(), "Something bad happened while reading from stream" @@ -799,7 +800,7 @@ mod tests { let playback_state = Arc::clone(&read.playback_states); let consumer = tokio::spawn(async move { - let mut stream = read.streams().pop().unwrap().1.stream; + let mut stream = map_pop_first(&mut read.streams()).unwrap().1.stream; stream.next().await.unwrap().unwrap(); stream.next().await.unwrap().unwrap(); }); From 05541736840f9c8516afe2de00d42273b5a97174 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Tue, 12 Oct 2021 11:32:34 +0100 Subject: [PATCH 07/17] feat: migrate write buffer to TimeProvider (#2722) (#2804) * feat: migrate write buffer to TimeProvider (#2722) * chore: review feedback Co-authored-by: Marco Neumann <marco@crepererum.net> Co-authored-by: Marco Neumann <marco@crepererum.net> --- Cargo.lock | 2 + Cargo.toml | 1 + entry/src/entry.rs | 6 +-- server/src/application.rs | 3 +- server/src/database.rs | 12 ++--- server/src/db.rs | 17 +++++-- server/src/db/replay.rs | 19 ++++---- server/src/lib.rs | 2 +- server/src/write_buffer.rs | 19 ++++---- tests/end_to_end_cases/scenario.rs | 1 + write_buffer/Cargo.toml | 1 + write_buffer/src/config.rs | 40 +++++++++------- write_buffer/src/core.rs | 74 +++++++++++------------------- write_buffer/src/kafka.rs | 25 +++++++--- write_buffer/src/mock.rs | 48 +++++++++++-------- 15 files changed, 148 insertions(+), 122 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea2fa655d7..419f7754eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1700,6 +1700,7 @@ dependencies = [ "thiserror", "tikv-jemalloc-ctl", "tikv-jemalloc-sys", + "time 0.1.0", "tokio", "tokio-stream", "tokio-util", @@ -4987,6 +4988,7 @@ dependencies = [ "observability_deps", "parking_lot", "rdkafka", + "time 0.1.0", "tokio", "uuid", ] diff --git a/Cargo.toml b/Cargo.toml index 24571806f6..7c5e3968ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,6 +111,7 @@ trace_exporters = { path = "trace_exporters" } trace_http = { path = "trace_http" } tracker = { path = "tracker" } trogging = { path = "trogging", default-features = false, features = ["structopt"] } +time = { path = "time" } # Crates.io dependencies, in alphabetical order arrow = { version = "5.5", features = ["prettyprint"] } diff --git a/entry/src/entry.rs b/entry/src/entry.rs index ebdefbfdbf..57cfe30927 100644 --- a/entry/src/entry.rs +++ b/entry/src/entry.rs @@ -1749,13 +1749,13 @@ pub struct SequencedEntry { /// /// At the time of writing, sequences will not be present when there is no configured mechanism to define the order /// of all writes. - sequence_and_producer_ts: Option<(Sequence, DateTime<Utc>)>, + sequence_and_producer_ts: Option<(Sequence, Time)>, } impl SequencedEntry { pub fn new_from_sequence( sequence: Sequence, - producer_wallclock_timestamp: DateTime<Utc>, + producer_wallclock_timestamp: Time, entry: Entry, ) -> Self { Self { @@ -1781,7 +1781,7 @@ impl SequencedEntry { .map(|(sequence, _ts)| sequence) } - pub fn producer_wallclock_timestamp(&self) -> Option<DateTime<Utc>> { + pub fn producer_wallclock_timestamp(&self) -> Option<Time> { self.sequence_and_producer_ts .as_ref() .map(|(_sequence, ts)| *ts) diff --git a/server/src/application.rs b/server/src/application.rs index 327daecf1d..9fc686babe 100644 --- a/server/src/application.rs +++ b/server/src/application.rs @@ -32,7 +32,8 @@ impl ApplicationState { let time_provider: Arc<dyn TimeProvider> = Arc::new(time::SystemProvider::new()); let job_registry = Arc::new(JobRegistry::new(Arc::clone(&metric_registry))); - let write_buffer_factory = Arc::new(WriteBufferConfigFactory::new()); + let write_buffer_factory = + Arc::new(WriteBufferConfigFactory::new(Arc::clone(&time_provider))); Self { object_store, diff --git a/server/src/database.rs b/server/src/database.rs index c3063b0ace..d0408d5620 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -1197,7 +1197,6 @@ impl DatabaseStateInitialized { #[cfg(test)] mod tests { use super::*; - use chrono::Utc; use data_types::database_rules::{ PartitionTemplate, TemplatePart, WriteBufferConnection, WriteBufferDirection, }; @@ -1209,6 +1208,7 @@ mod tests { num::NonZeroU32, time::Instant, }; + use time::Time; use uuid::Uuid; use write_buffer::{config::WriteBufferConfigFactory, mock::MockBufferSharedState}; @@ -1383,19 +1383,19 @@ mod tests { .unwrap(); state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 10), - Utc::now(), + Time::from_timestamp_nanos(0), entry_a, )); state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 11), - Utc::now(), + Time::from_timestamp_nanos(0), entry_b, )); // setup application let application = ApplicationState::new(Arc::new(ObjectStore::new_in_memory()), None); - let mut factory = WriteBufferConfigFactory::new(); + let mut factory = WriteBufferConfigFactory::new(Arc::clone(application.time_provider())); factory.register_mock("my_mock".to_string(), state.clone()); let application = Arc::new(application.with_write_buffer_factory(Arc::new(factory))); @@ -1471,7 +1471,7 @@ mod tests { .unwrap(); state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 12), - Utc::now(), + Time::from_timestamp_nanos(0), entry_c, )); @@ -1492,7 +1492,7 @@ mod tests { .unwrap(); state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 13), - Utc::now(), + Time::from_timestamp_nanos(0), entry_d, )); let db = database.initialized_db().unwrap(); diff --git a/server/src/db.rs b/server/src/db.rs index 869cb39ec7..ae607c06f6 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -1510,8 +1510,10 @@ mod tests { // configured and the mutable buffer isn't let write_buffer_state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); - let write_buffer = - Arc::new(MockBufferForWriting::new(write_buffer_state.clone(), None).unwrap()); + let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp_nanos(0))); + let write_buffer = Arc::new( + MockBufferForWriting::new(write_buffer_state.clone(), None, time_provider).unwrap(), + ); let test_db = TestDb::builder() .write_buffer_producer(write_buffer) .lifecycle_rules(LifecycleRules { @@ -1534,8 +1536,10 @@ mod tests { // configured. let write_buffer_state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); - let write_buffer = - Arc::new(MockBufferForWriting::new(write_buffer_state.clone(), None).unwrap()); + let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp_nanos(0))); + let write_buffer = Arc::new( + MockBufferForWriting::new(write_buffer_state.clone(), None, time_provider).unwrap(), + ); let db = TestDb::builder() .write_buffer_producer(write_buffer) .build() @@ -2467,7 +2471,10 @@ mod tests { // is a write buffer configured. let write_buffer_state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); - let write_buffer = Arc::new(MockBufferForWriting::new(write_buffer_state, None).unwrap()); + let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp_nanos(0))); + let write_buffer = Arc::new( + MockBufferForWriting::new(write_buffer_state.clone(), None, time_provider).unwrap(), + ); let db = TestDb::builder() .write_buffer_producer(write_buffer) .build() diff --git a/server/src/db/replay.rs b/server/src/db/replay.rs index 309e62f303..e2e9d1d6b5 100644 --- a/server/src/db/replay.rs +++ b/server/src/db/replay.rs @@ -418,7 +418,6 @@ mod tests { }; use arrow_util::assert_batches_eq; - use chrono::Utc; use data_types::{ database_rules::{PartitionTemplate, Partitioner, TemplatePart}, sequence::Sequence, @@ -463,7 +462,7 @@ mod tests { SequencedEntry::new_from_sequence( Sequence::new(sequencer_id, sequence_number), - Utc::now(), + Time::from_timestamp_nanos(0), entries.pop().unwrap(), ) } @@ -2618,12 +2617,12 @@ mod tests { MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 0), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=1 0"), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 2), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=1 10"), )); let mut write_buffer = MockBufferForReading::new(write_buffer_state, None).unwrap(); @@ -2668,17 +2667,17 @@ mod tests { MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(3).unwrap()); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 0), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=0 0"), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 3), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=3 3"), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(1, 1), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=11 11"), )); let mut write_buffer = MockBufferForReading::new(write_buffer_state.clone(), None).unwrap(); @@ -2693,17 +2692,17 @@ mod tests { // add more data write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 4), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=4 4"), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(1, 9), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=19 19"), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(2, 0), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("cpu bar=20 20"), )); diff --git a/server/src/lib.rs b/server/src/lib.rs index de5929f18f..448ad9da17 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -2401,7 +2401,7 @@ mod tests { async fn write_buffer_errors_propagate() { let application = ApplicationState::new(Arc::new(ObjectStore::new_in_memory()), None); - let mut factory = WriteBufferConfigFactory::new(); + let mut factory = WriteBufferConfigFactory::new(Arc::clone(application.time_provider())); factory.register_always_fail_mock("my_mock".to_string()); let application = Arc::new(application.with_write_buffer_factory(Arc::new(factory))); diff --git a/server/src/write_buffer.rs b/server/src/write_buffer.rs index 7f41afbbfb..67c925d51f 100644 --- a/server/src/write_buffer.rs +++ b/server/src/write_buffer.rs @@ -196,8 +196,6 @@ mod tests { use std::convert::TryFrom; use std::num::{NonZeroU32, NonZeroUsize}; - use chrono::{TimeZone, Utc}; - use ::test_helpers::assert_contains; use arrow_util::assert_batches_eq; use data_types::database_rules::{PartitionTemplate, TemplatePart}; @@ -215,6 +213,7 @@ mod tests { use super::*; use metric::{Attributes, Metric, U64Counter, U64Gauge}; + use time::Time; #[tokio::test] async fn read_from_write_buffer_updates_persistence_windows() { @@ -225,22 +224,22 @@ mod tests { MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(2).unwrap()); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 0), - Utc::now(), + Time::from_timestamp_nanos(0), entry.clone(), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(1, 0), - Utc::now(), + Time::from_timestamp_nanos(0), entry.clone(), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(1, 2), - Utc::now(), + Time::from_timestamp_nanos(0), entry.clone(), )); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 1), - Utc::now(), + Time::from_timestamp_nanos(0), entry, )); let db = TestDb::builder().build().await.db; @@ -293,8 +292,8 @@ mod tests { async fn read_from_write_buffer_write_to_mutable_buffer() { let write_buffer_state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); - let ingest_ts1 = Utc.timestamp_millis(42); - let ingest_ts2 = Utc.timestamp_millis(1337); + let ingest_ts1 = Time::from_timestamp_millis(42); + let ingest_ts2 = Time::from_timestamp_millis(1337); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 0), ingest_ts1, @@ -454,13 +453,13 @@ mod tests { ); write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, sequence_number), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry(&lp), )); } write_buffer_state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, n_entries), - Utc::now(), + Time::from_timestamp_nanos(0), lp_to_entry("table_2,partition_by=a foo=1 0"), )); diff --git a/tests/end_to_end_cases/scenario.rs b/tests/end_to_end_cases/scenario.rs index 9ae199d0f2..55abb8ec70 100644 --- a/tests/end_to_end_cases/scenario.rs +++ b/tests/end_to_end_cases/scenario.rs @@ -717,6 +717,7 @@ pub async fn fixture_replay_broken(db_name: &str, kafka_connection: &str) -> Ser db_name, &Default::default(), creation_config.as_ref(), + Arc::new(time::SystemProvider::new()), ) .await .unwrap(); diff --git a/write_buffer/Cargo.toml b/write_buffer/Cargo.toml index 15c7b9de55..72679cd76c 100644 --- a/write_buffer/Cargo.toml +++ b/write_buffer/Cargo.toml @@ -13,6 +13,7 @@ futures = "0.3" observability_deps = { path = "../observability_deps" } parking_lot = "0.11.2" rdkafka = "0.26.0" +time = { path = "../time" } tokio = { version = "1.11", features = ["macros", "fs"] } uuid = { version = "0.8", features = ["serde", "v4"] } diff --git a/write_buffer/src/config.rs b/write_buffer/src/config.rs index 4c39832054..e3f4dc1d3d 100644 --- a/write_buffer/src/config.rs +++ b/write_buffer/src/config.rs @@ -7,6 +7,7 @@ use data_types::{ database_rules::{WriteBufferConnection, WriteBufferDirection}, server_id::ServerId, }; +use time::TimeProvider; use crate::{ core::{WriteBufferError, WriteBufferReading, WriteBufferWriting}, @@ -34,13 +35,15 @@ enum Mock { #[derive(Debug)] pub struct WriteBufferConfigFactory { mocks: BTreeMap<String, Mock>, + time_provider: Arc<dyn TimeProvider>, } impl WriteBufferConfigFactory { /// Create new factory w/o any mocks. - pub fn new() -> Self { + pub fn new(time_provider: Arc<dyn TimeProvider>) -> Self { Self { mocks: Default::default(), + time_provider, } } @@ -97,14 +100,18 @@ impl WriteBufferConfigFactory { db_name, &cfg.connection_config, cfg.creation_config.as_ref(), + Arc::clone(&self.time_provider), ) .await?; Arc::new(kafka_buffer) as _ } "mock" => match self.get_mock(&cfg.connection)? { Mock::Normal(state) => { - let mock_buffer = - MockBufferForWriting::new(state, cfg.creation_config.as_ref())?; + let mock_buffer = MockBufferForWriting::new( + state, + cfg.creation_config.as_ref(), + Arc::clone(&self.time_provider), + )?; Arc::new(mock_buffer) as _ } Mock::AlwaysFailing => { @@ -164,12 +171,6 @@ impl WriteBufferConfigFactory { } } -impl Default for WriteBufferConfigFactory { - fn default() -> Self { - Self::new() - } -} - #[cfg(test)] mod tests { use std::{convert::TryFrom, num::NonZeroU32}; @@ -186,7 +187,8 @@ mod tests { #[tokio::test] async fn test_writing_kafka() { let conn = maybe_skip_kafka_integration!(); - let factory = WriteBufferConfigFactory::new(); + let time = Arc::new(time::SystemProvider::new()); + let factory = WriteBufferConfigFactory::new(time); let db_name = DatabaseName::try_from(random_kafka_topic()).unwrap(); let cfg = WriteBufferConnection { direction: WriteBufferDirection::Write, @@ -206,7 +208,8 @@ mod tests { #[tokio::test] async fn test_reading_kafka() { let conn = maybe_skip_kafka_integration!(); - let factory = WriteBufferConfigFactory::new(); + let time = Arc::new(time::SystemProvider::new()); + let factory = WriteBufferConfigFactory::new(time); let server_id = ServerId::try_from(1).unwrap(); let db_name = DatabaseName::try_from(random_kafka_topic()).unwrap(); @@ -227,7 +230,8 @@ mod tests { #[tokio::test] async fn test_writing_mock() { - let mut factory = WriteBufferConfigFactory::new(); + let time = Arc::new(time::SystemProvider::new()); + let mut factory = WriteBufferConfigFactory::new(time); let state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); @@ -264,7 +268,8 @@ mod tests { #[tokio::test] async fn test_reading_mock() { - let mut factory = WriteBufferConfigFactory::new(); + let time = Arc::new(time::SystemProvider::new()); + let mut factory = WriteBufferConfigFactory::new(time); let state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); @@ -302,7 +307,8 @@ mod tests { #[tokio::test] async fn test_writing_mock_failing() { - let mut factory = WriteBufferConfigFactory::new(); + let time = Arc::new(time::SystemProvider::new()); + let mut factory = WriteBufferConfigFactory::new(time); let mock_name = "some_mock"; factory.register_always_fail_mock(mock_name.to_string()); @@ -337,7 +343,8 @@ mod tests { #[tokio::test] async fn test_reading_mock_failing() { - let mut factory = WriteBufferConfigFactory::new(); + let time = Arc::new(time::SystemProvider::new()); + let mut factory = WriteBufferConfigFactory::new(time); let mock_name = "some_mock"; factory.register_always_fail_mock(mock_name.to_string()); @@ -375,7 +382,8 @@ mod tests { #[test] #[should_panic(expected = "Mock with the name 'some_mock' already registered")] fn test_register_mock_twice_panics() { - let mut factory = WriteBufferConfigFactory::new(); + let time = Arc::new(time::SystemProvider::new()); + let mut factory = WriteBufferConfigFactory::new(time); let state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); diff --git a/write_buffer/src/core.rs b/write_buffer/src/core.rs index 582181925e..2efdc35e1b 100644 --- a/write_buffer/src/core.rs +++ b/write_buffer/src/core.rs @@ -4,10 +4,10 @@ use std::{ }; use async_trait::async_trait; -use chrono::{DateTime, Utc}; use data_types::sequence::Sequence; use entry::{Entry, SequencedEntry}; use futures::{future::BoxFuture, stream::BoxStream}; +use time::Time; /// Generic boxed error type that is used in this crate. /// @@ -30,7 +30,7 @@ pub trait WriteBufferWriting: Sync + Send + Debug + 'static { &self, entry: &Entry, sequencer_id: u32, - ) -> Result<(Sequence, DateTime<Utc>), WriteBufferError>; + ) -> Result<(Sequence, Time), WriteBufferError>; /// Return type (like `"mock"` or `"kafka"`) of this writer. fn type_name(&self) -> &'static str; @@ -88,13 +88,15 @@ pub mod test_utils { collections::{BTreeMap, BTreeSet}, convert::TryFrom, num::NonZeroU32, + sync::Arc, time::Duration, }; use async_trait::async_trait; - use chrono::{DateTime, TimeZone, Utc}; + use chrono::{TimeZone, Utc}; use entry::{test_helpers::lp_to_entry, Entry}; use futures::{StreamExt, TryStreamExt}; + use time::{Time, TimeProvider}; use super::{WriteBufferError, WriteBufferReading, WriteBufferWriting}; @@ -108,7 +110,16 @@ pub mod test_utils { /// /// This will be called multiple times during the test suite. Each resulting context must represent an isolated /// environment. - async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context; + async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context { + self.new_context_with_time(n_sequencers, Arc::new(time::SystemProvider::new())) + .await + } + + async fn new_context_with_time( + &self, + n_sequencers: NonZeroU32, + time_provider: Arc<dyn TimeProvider>, + ) -> Self::Context; } /// Context used during testing. @@ -505,7 +516,15 @@ pub mod test_utils { where T: TestAdapter, { - let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await; + // Note: Roundtrips are only guaranteed for millisecond-precision + let t0 = Time::from_date_time(Utc.timestamp_millis(129)); + let time = Arc::new(time::MockProvider::new(t0)); + let context = adapter + .new_context_with_time( + NonZeroU32::try_from(1).unwrap(), + Arc::<time::MockProvider>::clone(&time), + ) + .await; let entry = lp_to_entry("upc user=1 100"); @@ -516,31 +535,16 @@ pub mod test_utils { assert_eq!(streams.len(), 1); let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); - // ingest data - // - // We want to capture the time of `store_entry`. However for certain sequencers (like Kafka) the time is - // slightly imprecise in a way that it truncates the time to milliseconds. So the workaround in the test is: - // - // 1. Capture a `ts_pre` from which we know that it is close but less or equal to the store time. We use the - // wallclock for that but truncate to milliseconds. - // 2. Capture a `ts_post` from which we know that it is close but greater or equal to the store time. We use - // the wallclock but if it has a sub-millisecond part we use the next millisecond (it's like a ceil - // operation). - // 3. Wait a bit between step 2 and the restore operation so that we can be really sure that the restore - // operation must know the timestamp of the store operation and cannot just "guess" it. - let ts_pre = timestamp_floor_millis(Utc::now()); let reported_ts = writer.store_entry(&entry, sequencer_id).await.unwrap().1; - let ts_post = timestamp_ceil_millis(Utc::now()); - // wait a bit - tokio::time::sleep(Duration::from_millis(100)).await; + // advance time + time.inc(Duration::from_secs(10)); // check that the timestamp records the ingestion time, not the read time let sequenced_entry = stream.stream.next().await.unwrap().unwrap(); let ts_entry = sequenced_entry.producer_wallclock_timestamp().unwrap(); - assert!(ts_entry >= ts_pre, "{} >= {}", ts_entry, ts_pre); - assert!(ts_entry <= ts_post, "{} <= {}", ts_entry, ts_post); - assert_eq!(ts_entry, reported_ts); + assert_eq!(ts_entry, t0); + assert_eq!(reported_ts, t0); } /// Test that sequencer auto-creation works. @@ -645,28 +649,6 @@ pub mod test_utils { } } - /// Return largest "milliseconds only" timestamp less than or equal to the given timestamp. - /// - /// The result will not have micro- or nanoseconds attached. - fn timestamp_floor_millis(ts: DateTime<Utc>) -> DateTime<Utc> { - let millis = ts.timestamp_millis(); - Utc.timestamp_millis(millis) - } - - /// Return smallest "milliseconds only" timestamp greater than or equal to the given timestamp. - /// - /// The result will not have micro- or nanoseconds attached. - fn timestamp_ceil_millis(ts: DateTime<Utc>) -> DateTime<Utc> { - let millis = ts.timestamp_millis(); - let ts2 = Utc.timestamp_millis(millis); - if ts2 != ts { - // ts has sub-milli precision, increase millis by 1 (ceiling) - Utc.timestamp_millis(millis + 1) - } else { - ts2 - } - } - /// Pops first entry from map. /// /// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable. diff --git a/write_buffer/src/kafka.rs b/write_buffer/src/kafka.rs index 85ab00e6cf..582387916e 100644 --- a/write_buffer/src/kafka.rs +++ b/write_buffer/src/kafka.rs @@ -7,7 +7,7 @@ use std::{ }; use async_trait::async_trait; -use chrono::{DateTime, TimeZone, Utc}; +use chrono::{TimeZone, Utc}; use data_types::{ database_rules::WriteBufferCreationConfig, sequence::Sequence, server_id::ServerId, }; @@ -24,6 +24,7 @@ use rdkafka::{ util::Timeout, ClientConfig, Message, Offset, TopicPartitionList, }; +use time::{Time, TimeProvider}; use crate::core::{ EntryStream, FetchHighWatermark, FetchHighWatermarkFut, WriteBufferError, WriteBufferReading, @@ -33,6 +34,7 @@ use crate::core::{ pub struct KafkaBufferProducer { conn: String, database_name: String, + time_provider: Arc<dyn TimeProvider>, producer: FutureProducer, partitions: BTreeSet<u32>, } @@ -58,12 +60,13 @@ impl WriteBufferWriting for KafkaBufferProducer { &self, entry: &Entry, sequencer_id: u32, - ) -> Result<(Sequence, DateTime<Utc>), WriteBufferError> { + ) -> Result<(Sequence, Time), WriteBufferError> { let partition = i32::try_from(sequencer_id)?; // truncate milliseconds from timestamps because that's what Kafka supports - let timestamp_millis = Utc::now().timestamp_millis(); - let timestamp = Utc.timestamp_millis(timestamp_millis); + let date_time = self.time_provider.now().date_time(); + let timestamp_millis = date_time.timestamp_millis(); + let timestamp = Time::from_timestamp_millis(timestamp_millis); // This type annotation is necessary because `FutureRecord` is generic over key type, but // key is optional and we're not setting a key. `String` is arbitrary. @@ -102,6 +105,7 @@ impl KafkaBufferProducer { database_name: impl Into<String> + Send, connection_config: &HashMap<String, String>, creation_config: Option<&WriteBufferCreationConfig>, + time_provider: Arc<dyn TimeProvider>, ) -> Result<Self, WriteBufferError> { let conn = conn.into(); let database_name = database_name.into(); @@ -134,6 +138,7 @@ impl KafkaBufferProducer { Ok(Self { conn, database_name, + time_provider, producer, partitions, }) @@ -193,7 +198,7 @@ impl WriteBufferReading for KafkaBufferConsumer { number: message.offset().try_into()?, }; - Ok(SequencedEntry::new_from_sequence(sequence, timestamp, entry)) + Ok(SequencedEntry::new_from_sequence(sequence, Time::from_date_time(timestamp), entry)) }) .boxed(); @@ -543,6 +548,7 @@ mod tests { num::NonZeroU32, sync::atomic::{AtomicU32, Ordering}, }; + use time::TimeProvider; use crate::{ core::test_utils::{perform_generic_tests, TestAdapter, TestContext}, @@ -566,12 +572,17 @@ mod tests { impl TestAdapter for KafkaTestAdapter { type Context = KafkaTestContext; - async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context { + async fn new_context_with_time( + &self, + n_sequencers: NonZeroU32, + time_provider: Arc<dyn TimeProvider>, + ) -> Self::Context { KafkaTestContext { conn: self.conn.clone(), database_name: random_kafka_topic(), server_id_counter: AtomicU32::new(1), n_sequencers, + time_provider, } } } @@ -581,6 +592,7 @@ mod tests { database_name: String, server_id_counter: AtomicU32, n_sequencers: NonZeroU32, + time_provider: Arc<dyn TimeProvider>, } impl KafkaTestContext { @@ -604,6 +616,7 @@ mod tests { &self.database_name, &Default::default(), self.creation_config(creation_config).as_ref(), + Arc::clone(&self.time_provider), ) .await } diff --git a/write_buffer/src/mock.rs b/write_buffer/src/mock.rs index 560548c58a..d381b9fa67 100644 --- a/write_buffer/src/mock.rs +++ b/write_buffer/src/mock.rs @@ -6,13 +6,13 @@ use std::{ }; use async_trait::async_trait; -use chrono::{DateTime, Utc}; use futures::{stream, FutureExt, StreamExt}; use parking_lot::Mutex; use data_types::database_rules::WriteBufferCreationConfig; use data_types::sequence::Sequence; use entry::{Entry, SequencedEntry}; +use time::{Time, TimeProvider}; use crate::core::{ EntryStream, FetchHighWatermark, FetchHighWatermarkFut, WriteBufferError, WriteBufferReading, @@ -199,12 +199,14 @@ impl MockBufferSharedState { #[derive(Debug)] pub struct MockBufferForWriting { state: MockBufferSharedState, + time_provider: Arc<dyn TimeProvider>, } impl MockBufferForWriting { pub fn new( state: MockBufferSharedState, creation_config: Option<&WriteBufferCreationConfig>, + time_provider: Arc<dyn TimeProvider>, ) -> Result<Self, WriteBufferError> { state.maybe_auto_init(creation_config); @@ -215,7 +217,10 @@ impl MockBufferForWriting { } } - Ok(Self { state }) + Ok(Self { + state, + time_provider, + }) } } @@ -231,7 +236,7 @@ impl WriteBufferWriting for MockBufferForWriting { &self, entry: &Entry, sequencer_id: u32, - ) -> Result<(Sequence, DateTime<Utc>), WriteBufferError> { + ) -> Result<(Sequence, Time), WriteBufferError> { let mut guard = self.state.entries.lock(); let entries = guard.as_mut().unwrap(); let sequencer_entries = entries.get_mut(&sequencer_id).unwrap(); @@ -242,7 +247,7 @@ impl WriteBufferWriting for MockBufferForWriting { id: sequencer_id, number: sequence_number, }; - let timestamp = Utc::now(); + let timestamp = self.time_provider.now(); sequencer_entries.push(Ok(SequencedEntry::new_from_sequence( sequence, timestamp, @@ -271,7 +276,7 @@ impl WriteBufferWriting for MockBufferForWritingThatAlwaysErrors { &self, _entry: &Entry, _sequencer_id: u32, - ) -> Result<(Sequence, DateTime<Utc>), WriteBufferError> { + ) -> Result<(Sequence, Time), WriteBufferError> { Err(String::from( "Something bad happened on the way to writing an entry in the write buffer", ) @@ -492,6 +497,7 @@ mod tests { use std::time::Duration; use entry::test_helpers::lp_to_entry; + use time::TimeProvider; use crate::core::test_utils::{map_pop_first, perform_generic_tests, TestAdapter, TestContext}; @@ -503,10 +509,15 @@ mod tests { impl TestAdapter for MockTestAdapter { type Context = MockTestContext; - async fn new_context(&self, n_sequencers: NonZeroU32) -> Self::Context { + async fn new_context_with_time( + &self, + n_sequencers: NonZeroU32, + time_provider: Arc<dyn TimeProvider>, + ) -> Self::Context { MockTestContext { state: MockBufferSharedState::uninitialized(), n_sequencers, + time_provider, } } } @@ -514,6 +525,7 @@ mod tests { struct MockTestContext { state: MockBufferSharedState, n_sequencers: NonZeroU32, + time_provider: Arc<dyn TimeProvider>, } impl MockTestContext { @@ -535,6 +547,7 @@ mod tests { MockBufferForWriting::new( self.state.clone(), self.creation_config(creation_config).as_ref(), + Arc::clone(&self.time_provider), ) } @@ -569,7 +582,7 @@ mod tests { let sequence = Sequence::new(2, 0); state.push_entry(SequencedEntry::new_from_sequence( sequence, - Utc::now(), + Time::from_timestamp_nanos(0), entry, )); } @@ -582,7 +595,7 @@ mod tests { let sequence = Sequence::new(0, 0); state.push_entry(SequencedEntry::new_from_sequence( sequence, - Utc::now(), + Time::from_timestamp_nanos(0), entry, )); } @@ -598,12 +611,12 @@ mod tests { let sequence = Sequence::new(1, 13); state.push_entry(SequencedEntry::new_from_sequence( sequence, - Utc::now(), + Time::from_timestamp_nanos(0), entry.clone(), )); state.push_entry(SequencedEntry::new_from_sequence( sequence, - Utc::now(), + Time::from_timestamp_nanos(0), entry, )); } @@ -620,12 +633,12 @@ mod tests { let sequence_2 = Sequence::new(1, 12); state.push_entry(SequencedEntry::new_from_sequence( sequence_1, - Utc::now(), + Time::from_timestamp_nanos(0), entry.clone(), )); state.push_entry(SequencedEntry::new_from_sequence( sequence_2, - Utc::now(), + Time::from_timestamp_nanos(0), entry, )); } @@ -687,12 +700,12 @@ mod tests { let sequence_2 = Sequence::new(1, 12); state.push_entry(SequencedEntry::new_from_sequence( sequence_1, - Utc::now(), + Time::from_timestamp_nanos(0), entry.clone(), )); state.push_entry(SequencedEntry::new_from_sequence( sequence_2, - Utc::now(), + Time::from_timestamp_nanos(0), entry, )); @@ -749,7 +762,7 @@ mod tests { let sequence_1 = Sequence::new(0, 11); state.push_entry(SequencedEntry::new_from_sequence( sequence_1, - Utc::now(), + Time::from_timestamp_nanos(0), entry, )); @@ -786,13 +799,12 @@ mod tests { #[tokio::test] async fn test_delayed_insert() { - let now = Utc::now(); let state = MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap()); state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 0), - now, + Time::from_timestamp_nanos(0), lp_to_entry("mem foo=1 10"), )); @@ -812,7 +824,7 @@ mod tests { state.push_entry(SequencedEntry::new_from_sequence( Sequence::new(0, 1), - now, + Time::from_timestamp_nanos(0), lp_to_entry("mem foo=2 20"), )); From 3dfe400e6ba5db0813f39629c5670e4210b78829 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Tue, 12 Oct 2021 13:09:08 +0100 Subject: [PATCH 08/17] feat: migrate write path to TimeProvider (#2722) (#2807) --- Cargo.lock | 2 + data_types/src/chunk_metadata.rs | 8 +- generated_types/Cargo.toml | 1 + generated_types/src/chunk.rs | 30 ++-- internal_types/Cargo.toml | 1 + internal_types/src/access.rs | 48 +++--- lifecycle/src/lib.rs | 2 +- lifecycle/src/policy.rs | 23 ++- parquet_file/src/catalog/dump.rs | 6 +- parquet_file/src/catalog/rebuild.rs | 8 +- parquet_file/src/metadata.rs | 30 ++-- parquet_file/src/storage.rs | 14 +- parquet_file/src/test_utils.rs | 11 +- server/src/database.rs | 11 +- server/src/db.rs | 230 +++++++++++-------------- server/src/db/catalog.rs | 21 ++- server/src/db/catalog/chunk.rs | 58 ++++--- server/src/db/catalog/partition.rs | 44 +++-- server/src/db/catalog/table.rs | 13 +- server/src/db/chunk.rs | 98 ++++++----- server/src/db/lifecycle.rs | 2 +- server/src/db/lifecycle/compact.rs | 30 ++-- server/src/db/lifecycle/persist.rs | 20 ++- server/src/db/lifecycle/write.rs | 2 +- server/src/db/load.rs | 54 +++++- server/src/db/replay.rs | 1 - server/src/db/system_tables/chunks.rs | 23 ++- server/src/db/system_tables/columns.rs | 14 +- server/src/lib.rs | 3 +- server/src/utils.rs | 1 + server/src/write_buffer.rs | 2 - 31 files changed, 432 insertions(+), 379 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 419f7754eb..9f24fa9634 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1271,6 +1271,7 @@ dependencies = [ "regex", "serde", "thiserror", + "time 0.1.0", "tonic", "tonic-build", ] @@ -1802,6 +1803,7 @@ dependencies = [ "futures", "parking_lot", "snafu", + "time 0.1.0", "tokio", ] diff --git a/data_types/src/chunk_metadata.rs b/data_types/src/chunk_metadata.rs index 2c56f603c7..2b879cc48f 100644 --- a/data_types/src/chunk_metadata.rs +++ b/data_types/src/chunk_metadata.rs @@ -2,8 +2,8 @@ use std::{convert::TryFrom, num::NonZeroU32, sync::Arc}; use bytes::Bytes; -use chrono::{DateTime, Utc}; use snafu::{ResultExt, Snafu}; +use time::Time; use uuid::Uuid; use crate::partition_metadata::PartitionAddr; @@ -149,17 +149,17 @@ pub struct ChunkSummary { pub row_count: usize, /// The time at which the chunk data was accessed, by a query or a write - pub time_of_last_access: Option<DateTime<Utc>>, + pub time_of_last_access: Option<Time>, /// The earliest time at which data contained within this chunk was written /// into IOx. Note due to the compaction, etc... this may not be the chunk /// that data was originally written into - pub time_of_first_write: DateTime<Utc>, + pub time_of_first_write: Time, /// The latest time at which data contained within this chunk was written /// into IOx. Note due to the compaction, etc... this may not be the chunk /// that data was originally written into - pub time_of_last_write: DateTime<Utc>, + pub time_of_last_write: Time, } /// Represents metadata about the physical storage of a column in a chunk diff --git a/generated_types/Cargo.toml b/generated_types/Cargo.toml index 51db90847d..614432c792 100644 --- a/generated_types/Cargo.toml +++ b/generated_types/Cargo.toml @@ -15,6 +15,7 @@ regex = "1.4" serde = { version = "1.0", features = ["derive"] } thiserror = "1.0.30" tonic = "0.5" +time = { path = "../time" } [dev-dependencies] chrono = { version = "0.4", features = ["serde"] } diff --git a/generated_types/src/chunk.rs b/generated_types/src/chunk.rs index 388aa9ceaa..4aa230e0ba 100644 --- a/generated_types/src/chunk.rs +++ b/generated_types/src/chunk.rs @@ -9,6 +9,7 @@ use std::{ convert::{TryFrom, TryInto}, sync::Arc, }; +use time::Time; /// Conversion code to management API chunk structure impl From<ChunkSummary> for management::Chunk { @@ -37,9 +38,9 @@ impl From<ChunkSummary> for management::Chunk { memory_bytes: memory_bytes as u64, object_store_bytes: object_store_bytes as u64, row_count: row_count as u64, - time_of_last_access: time_of_last_access.map(Into::into), - time_of_first_write: Some(time_of_first_write.into()), - time_of_last_write: Some(time_of_last_write.into()), + time_of_last_access: time_of_last_access.map(|t| t.date_time().into()), + time_of_first_write: Some(time_of_first_write.date_time().into()), + time_of_last_write: Some(time_of_last_write.date_time().into()), order: order.get(), } } @@ -74,10 +75,11 @@ impl TryFrom<management::Chunk> for ChunkSummary { fn try_from(proto: management::Chunk) -> Result<Self, Self::Error> { let convert_timestamp = |t: pbjson_types::Timestamp, field: &'static str| { - t.try_into().map_err(|_| FieldViolation { + let date_time = t.try_into().map_err(|_| FieldViolation { field: field.to_string(), description: "Timestamp must be positive".to_string(), - }) + })?; + Ok(Time::from_date_time(date_time)) }; let timestamp = |t: Option<pbjson_types::Timestamp>, field: &'static str| { @@ -166,12 +168,12 @@ impl TryFrom<management::ChunkLifecycleAction> for Option<ChunkLifecycleAction> mod test { use super::*; use bytes::Bytes; - use chrono::{TimeZone, Utc}; use data_types::chunk_metadata::ChunkOrder; + use time::Time; #[test] fn valid_proto_to_summary() { - let now = Utc::now(); + let now = Time::from_timestamp(2, 6); let proto = management::Chunk { partition_key: "foo".to_string(), table_name: "bar".to_string(), @@ -182,8 +184,8 @@ mod test { storage: management::ChunkStorage::ObjectStoreOnly.into(), lifecycle_action: management::ChunkLifecycleAction::Compacting.into(), - time_of_first_write: Some(now.into()), - time_of_last_write: Some(now.into()), + time_of_first_write: Some(now.date_time().into()), + time_of_last_write: Some(now.date_time().into()), time_of_last_access: Some(pbjson_types::Timestamp { seconds: 50, nanos: 7, @@ -203,7 +205,7 @@ mod test { lifecycle_action: Some(ChunkLifecycleAction::Compacting), time_of_first_write: now, time_of_last_write: now, - time_of_last_access: Some(Utc.timestamp_nanos(50_000_000_007)), + time_of_last_access: Some(Time::from_timestamp_nanos(50_000_000_007)), order: ChunkOrder::new(5).unwrap(), }; @@ -216,7 +218,7 @@ mod test { #[test] fn valid_summary_to_proto() { - let now = Utc::now(); + let now = Time::from_timestamp(756, 23); let summary = ChunkSummary { partition_key: Arc::from("foo"), table_name: Arc::from("bar"), @@ -228,7 +230,7 @@ mod test { lifecycle_action: Some(ChunkLifecycleAction::Persisting), time_of_first_write: now, time_of_last_write: now, - time_of_last_access: Some(Utc.timestamp_nanos(12_000_100_007)), + time_of_last_access: Some(Time::from_timestamp_nanos(12_000_100_007)), order: ChunkOrder::new(5).unwrap(), }; @@ -243,8 +245,8 @@ mod test { row_count: 321, storage: management::ChunkStorage::ObjectStoreOnly.into(), lifecycle_action: management::ChunkLifecycleAction::Persisting.into(), - time_of_first_write: Some(now.into()), - time_of_last_write: Some(now.into()), + time_of_first_write: Some(now.date_time().into()), + time_of_last_write: Some(now.date_time().into()), time_of_last_access: Some(pbjson_types::Timestamp { seconds: 12, nanos: 100_007, diff --git a/internal_types/Cargo.toml b/internal_types/Cargo.toml index b96434c2b4..3e925b508e 100644 --- a/internal_types/Cargo.toml +++ b/internal_types/Cargo.toml @@ -10,6 +10,7 @@ readme = "README.md" chrono = "0.4" parking_lot = "0.11" snafu = "0.6" +time = { path = "../time" } tokio = { version = "1.11", features = ["sync"] } [dev-dependencies] diff --git a/internal_types/src/access.rs b/internal_types/src/access.rs index dda62ff019..327e32c1b2 100644 --- a/internal_types/src/access.rs +++ b/internal_types/src/access.rs @@ -1,19 +1,14 @@ -use chrono::{DateTime, Utc}; use parking_lot::RwLock; use std::sync::Arc; +use time::{Time, TimeProvider}; /// A struct that allows recording access by a query #[derive(Debug, Clone)] pub struct AccessRecorder { + time_provider: Arc<dyn TimeProvider>, state: Arc<RwLock<AccessMetrics>>, } -impl Default for AccessRecorder { - fn default() -> Self { - Self::new(Utc::now()) - } -} - #[derive(Debug, Clone, Eq, PartialEq)] pub struct AccessMetrics { /// The number of accesses that have been recorded @@ -21,20 +16,22 @@ pub struct AccessMetrics { /// The time of the last access or if none the /// time when the `AccessRecorder` was created - pub last_access: DateTime<Utc>, + pub last_access: Time, } impl AccessMetrics { - /// Returns the Instant of the last access if any - pub fn last_access(&self) -> Option<DateTime<Utc>> { + /// Returns the time of the last access if any + pub fn last_access(&self) -> Option<Time> { (self.count > 0).then(|| self.last_access) } } impl AccessRecorder { - /// Creates a new AccessRecorder with the provided creation DateTime - pub fn new(now: DateTime<Utc>) -> Self { + /// Creates a new AccessRecorder + pub fn new(time_provider: Arc<dyn TimeProvider>) -> Self { + let now = time_provider.now(); Self { + time_provider, state: Arc::new(RwLock::new(AccessMetrics { count: 0, last_access: now, @@ -42,18 +39,14 @@ impl AccessRecorder { } } - /// Records an access at the given DateTime - pub fn record_access(&self, now: DateTime<Utc>) { + /// Records an access + pub fn record_access(&self) { + let now = self.time_provider.now(); let mut state = self.state.write(); state.last_access = state.last_access.max(now); state.count += 1; } - /// Records an access at the current time - pub fn record_access_now(&self) { - self.record_access(Utc::now()) - } - /// Gets the access metrics pub fn get_metrics(&self) -> AccessMetrics { self.state.read().clone() @@ -63,15 +56,16 @@ impl AccessRecorder { #[cfg(test)] mod tests { use super::*; - use chrono::Duration; + use std::time::Duration; #[test] fn test_access() { - let t1 = Utc::now(); - let t2 = t1 + Duration::nanoseconds(1); - let t3 = t1 + Duration::nanoseconds(2); + let t1 = Time::from_timestamp(3044, 2); + let t2 = t1 + Duration::from_nanos(1); + let t3 = t1 + Duration::from_nanos(2); - let access_recorder = AccessRecorder::new(t1); + let time = Arc::new(time::MockProvider::new(t1)); + let access_recorder = AccessRecorder::new(Arc::<time::MockProvider>::clone(&time)); assert_eq!( access_recorder.get_metrics(), @@ -81,7 +75,8 @@ mod tests { } ); - access_recorder.record_access(t3); + time.set(t3); + access_recorder.record_access(); assert_eq!( access_recorder.get_metrics(), AccessMetrics { @@ -90,7 +85,8 @@ mod tests { } ); - access_recorder.record_access(t2); + time.set(t2); + access_recorder.record_access(); assert_eq!( access_recorder.get_metrics(), AccessMetrics { diff --git a/lifecycle/src/lib.rs b/lifecycle/src/lib.rs index c9b2f9ff88..d1a0a2e681 100644 --- a/lifecycle/src/lib.rs +++ b/lifecycle/src/lib.rs @@ -175,7 +175,7 @@ pub trait LifecycleChunk { /// Returns the access metrics for this chunk fn access_metrics(&self) -> AccessMetrics; - fn time_of_last_write(&self) -> DateTime<Utc>; + fn time_of_last_write(&self) -> Time; fn addr(&self) -> &ChunkAddr; diff --git a/lifecycle/src/policy.rs b/lifecycle/src/policy.rs index 9c8ab64543..993e568f7a 100644 --- a/lifecycle/src/policy.rs +++ b/lifecycle/src/policy.rs @@ -582,7 +582,8 @@ fn can_move<C: LifecycleChunk>(rules: &LifecycleRules, chunk: &C, now: DateTime< return true; } - elapsed_seconds(now, chunk.time_of_last_write()) >= rules.late_arrive_window_seconds.get() + elapsed_seconds(now, chunk.time_of_last_write().date_time()) + >= rules.late_arrive_window_seconds.get() } /// An action to free up memory @@ -754,7 +755,7 @@ mod tests { row_count: usize, min_timestamp: Option<DateTime<Utc>>, access_metrics: AccessMetrics, - time_of_last_write: DateTime<Utc>, + time_of_last_write: Time, lifecycle_action: Option<TaskTracker<ChunkLifecycleAction>>, storage: ChunkStorage, order: ChunkOrder, @@ -775,9 +776,9 @@ mod tests { min_timestamp: None, access_metrics: AccessMetrics { count: 0, - last_access: Utc::now(), + last_access: Time::from_timestamp(0, 0), }, - time_of_last_write: from_secs(time_of_last_write), + time_of_last_write: Time::from_timestamp(time_of_last_write, 0), lifecycle_action: None, storage, order: ChunkOrder::MIN, @@ -1041,7 +1042,7 @@ mod tests { self.access_metrics.clone() } - fn time_of_last_write(&self) -> DateTime<Utc> { + fn time_of_last_write(&self) -> Time { self.time_of_last_write } @@ -1184,10 +1185,10 @@ mod tests { #[test] fn test_sort_free_candidates() { - let now = Utc::now(); - let access_metrics = |secs: i64| AccessMetrics { + let now = Time::from_timestamp_nanos(0); + let access_metrics = |secs: u64| AccessMetrics { count: 1, - last_access: now + chrono::Duration::seconds(secs), + last_access: now + Duration::from_secs(secs), }; let mut candidates = vec![ @@ -1377,8 +1378,6 @@ mod tests { lifecycle.check_for_work(from_secs(10)); assert_eq!(*db.events.read(), vec![]); - let now = Utc::now(); - let chunks = vec![ // two "open" chunks => they must not be dropped (yet) TestChunk::new(ChunkId::new_test(0), 0, ChunkStorage::OpenMutableBuffer), @@ -1396,7 +1395,7 @@ mod tests { ) .with_access_metrics(AccessMetrics { count: 1, - last_access: now, + last_access: Time::from_timestamp(5, 0), }), // "written" chunk => can be unloaded TestChunk::new( @@ -1406,7 +1405,7 @@ mod tests { ) .with_access_metrics(AccessMetrics { count: 12, - last_access: now - chrono::Duration::seconds(1), + last_access: Time::from_timestamp(4, 0), }), ]; diff --git a/parquet_file/src/catalog/dump.rs b/parquet_file/src/catalog/dump.rs index 618bbc682c..9211ddcfb7 100644 --- a/parquet_file/src/catalog/dump.rs +++ b/parquet_file/src/catalog/dump.rs @@ -462,9 +462,9 @@ File { Metadata { iox_metadata: Ok( IoxMetadata { - creation_timestamp: 1970-01-01T00:00:10.000000020Z, - time_of_first_write: 1970-01-01T00:00:30.000000040Z, - time_of_last_write: 1970-01-01T00:00:50.000000060Z, + creation_timestamp: 1970-01-01T00:00:10.000000020+00:00, + time_of_first_write: 1970-01-01T00:00:30.000000040+00:00, + time_of_last_write: 1970-01-01T00:00:50.000000060+00:00, table_name: "table1", partition_key: "part1", chunk_id: ChunkId( diff --git a/parquet_file/src/catalog/rebuild.rs b/parquet_file/src/catalog/rebuild.rs index a47a445d92..cfa7ffee1d 100644 --- a/parquet_file/src/catalog/rebuild.rs +++ b/parquet_file/src/catalog/rebuild.rs @@ -182,11 +182,11 @@ mod tests { create_partition_and_database_checkpoint, make_config, make_record_batch, TestSize, }, }; - use chrono::Utc; use data_types::chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder}; use datafusion::physical_plan::SendableRecordBatchStream; use datafusion_util::MemoryStream; use parquet::arrow::ArrowWriter; + use time::Time; use tokio_stream::StreamExt; #[tokio::test] @@ -373,14 +373,14 @@ mod tests { Arc::clone(&partition_key), ); let metadata = IoxMetadata { - creation_timestamp: Utc::now(), + creation_timestamp: Time::from_timestamp_nanos(0), table_name: Arc::clone(&table_name), partition_key: Arc::clone(&partition_key), chunk_id, partition_checkpoint, database_checkpoint, - time_of_first_write: Utc::now(), - time_of_last_write: Utc::now(), + time_of_first_write: Time::from_timestamp_nanos(0), + time_of_last_write: Time::from_timestamp_nanos(0), chunk_order: ChunkOrder::new(5).unwrap(), }; let stream: SendableRecordBatchStream = Box::pin(MemoryStream::new(record_batches)); diff --git a/parquet_file/src/metadata.rs b/parquet_file/src/metadata.rs index 404bbc5c87..1a28889fbf 100644 --- a/parquet_file/src/metadata.rs +++ b/parquet_file/src/metadata.rs @@ -86,7 +86,6 @@ //! [Apache Parquet]: https://parquet.apache.org/ //! [Apache Thrift]: https://thrift.apache.org/ //! [Thrift Compact Protocol]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md -use chrono::{DateTime, Utc}; use data_types::{ chunk_metadata::{ChunkId, ChunkOrder}, partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics}, @@ -263,10 +262,11 @@ pub type Result<T, E = Error> = std::result::Result<T, E>; #[derive(Debug, Clone, Eq, PartialEq)] pub struct IoxMetadata { /// Timestamp when this file was created. - pub creation_timestamp: DateTime<Utc>, + pub creation_timestamp: Time, - pub time_of_first_write: DateTime<Utc>, - pub time_of_last_write: DateTime<Utc>, + pub time_of_first_write: Time, + + pub time_of_last_write: Time, /// Table that holds this parquet file. pub table_name: Arc<str>, @@ -345,7 +345,7 @@ impl IoxMetadata { Arc::clone(&table_name), Arc::clone(&partition_key), sequencer_numbers, - Time::from_date_time(flush_timestamp), + flush_timestamp, ); // extract database checkpoint @@ -433,9 +433,9 @@ impl IoxMetadata { let proto_msg = proto::IoxMetadata { version: METADATA_VERSION, - creation_timestamp: Some(self.creation_timestamp.into()), - time_of_first_write: Some(self.time_of_first_write.into()), - time_of_last_write: Some(self.time_of_last_write.into()), + creation_timestamp: Some(self.creation_timestamp.date_time().into()), + time_of_first_write: Some(self.time_of_first_write.date_time().into()), + time_of_last_write: Some(self.time_of_last_write.date_time().into()), table_name: self.table_name.to_string(), partition_key: self.partition_key.to_string(), chunk_id: self.chunk_id.into(), @@ -454,12 +454,14 @@ impl IoxMetadata { fn decode_timestamp_from_field( value: Option<pbjson_types::Timestamp>, field: &'static str, -) -> Result<DateTime<Utc>> { - value +) -> Result<Time> { + let date_time = value .context(IoxMetadataFieldMissing { field })? .try_into() .map_err(|e| Box::new(e) as _) - .context(IoxMetadataBroken) + .context(IoxMetadataBroken)?; + + Ok(Time::from_date_time(date_time)) } /// Parquet metadata with IOx-specific wrapper. @@ -1077,14 +1079,14 @@ mod tests { Arc::clone(&partition_key), ); let metadata = IoxMetadata { - creation_timestamp: Utc::now(), + creation_timestamp: Time::from_timestamp(3234, 0), table_name, partition_key, chunk_id: ChunkId::new_test(1337), partition_checkpoint, database_checkpoint, - time_of_first_write: Utc::now(), - time_of_last_write: Utc::now(), + time_of_first_write: Time::from_timestamp(3234, 0), + time_of_last_write: Time::from_timestamp(3234, 3456), chunk_order: ChunkOrder::new(5).unwrap(), }; diff --git a/parquet_file/src/storage.rs b/parquet_file/src/storage.rs index 3645c2f28a..87dc828c02 100644 --- a/parquet_file/src/storage.rs +++ b/parquet_file/src/storage.rs @@ -408,7 +408,6 @@ mod tests { }; use arrow::array::{ArrayRef, StringArray}; use arrow_util::assert_batches_eq; - use chrono::Utc; use data_types::{ chunk_metadata::{ChunkId, ChunkOrder}, partition_metadata::TableSummary, @@ -416,6 +415,7 @@ mod tests { use datafusion::physical_plan::common::SizedRecordBatchStream; use datafusion_util::MemoryStream; use parquet::schema::types::ColumnPath; + use time::Time; #[tokio::test] async fn test_parquet_contains_key_value_metadata() { @@ -426,14 +426,14 @@ mod tests { Arc::clone(&partition_key), ); let metadata = IoxMetadata { - creation_timestamp: Utc::now(), + creation_timestamp: Time::from_timestamp_nanos(3453), table_name, partition_key, chunk_id: ChunkId::new_test(1337), partition_checkpoint, database_checkpoint, - time_of_first_write: Utc::now(), - time_of_last_write: Utc::now(), + time_of_first_write: Time::from_timestamp_nanos(456), + time_of_last_write: Time::from_timestamp_nanos(43069346), chunk_order: ChunkOrder::new(5).unwrap(), }; @@ -502,14 +502,14 @@ mod tests { Arc::clone(&partition_key), ); let metadata = IoxMetadata { - creation_timestamp: Utc::now(), + creation_timestamp: Time::from_timestamp_nanos(43069346), table_name: Arc::clone(&table_name), partition_key: Arc::clone(&partition_key), chunk_id, partition_checkpoint, database_checkpoint, - time_of_first_write: Utc::now(), - time_of_last_write: Utc::now(), + time_of_first_write: Time::from_timestamp_nanos(234), + time_of_last_write: Time::from_timestamp_nanos(4784), chunk_order: ChunkOrder::new(5).unwrap(), }; diff --git a/parquet_file/src/test_utils.rs b/parquet_file/src/test_utils.rs index b19dca4002..bcbfeec222 100644 --- a/parquet_file/src/test_utils.rs +++ b/parquet_file/src/test_utils.rs @@ -12,7 +12,6 @@ use arrow::{ datatypes::{Int32Type, SchemaRef}, record_batch::RecordBatch, }; -use chrono::{TimeZone, Utc}; use data_types::{ chunk_metadata::{ChunkAddr, ChunkId, ChunkOrder}, partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary}, @@ -174,14 +173,14 @@ pub async fn make_chunk_given_record_batch( Arc::clone(&addr.partition_key), ); let metadata = IoxMetadata { - creation_timestamp: Utc.timestamp(10, 20), + creation_timestamp: Time::from_timestamp(10, 20), table_name: Arc::clone(&addr.table_name), partition_key: Arc::clone(&addr.partition_key), chunk_id: addr.chunk_id, partition_checkpoint, database_checkpoint, - time_of_first_write: Utc.timestamp(30, 40), - time_of_last_write: Utc.timestamp(50, 60), + time_of_first_write: Time::from_timestamp(30, 40), + time_of_last_write: Time::from_timestamp(50, 60), chunk_order: ChunkOrder::new(5).unwrap(), }; let (path, file_size_bytes, parquet_metadata) = storage @@ -935,12 +934,12 @@ pub fn create_partition_and_database_checkpoint( let mut sequencer_numbers_1 = BTreeMap::new(); sequencer_numbers_1.insert(1, OptionalMinMaxSequence::new(None, 18)); sequencer_numbers_1.insert(2, OptionalMinMaxSequence::new(Some(25), 28)); - let flush_timestamp = Utc.timestamp(10, 20); + let flush_timestamp = Time::from_timestamp(10, 20); let partition_checkpoint_1 = PartitionCheckpoint::new( Arc::clone(&table_name), Arc::clone(&partition_key), sequencer_numbers_1, - Time::from_date_time(flush_timestamp), + flush_timestamp, ); // create second partition diff --git a/server/src/database.rs b/server/src/database.rs index d0408d5620..d506ec249a 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -7,7 +7,6 @@ use crate::{ rules::ProvidedDatabaseRules, ApplicationState, Db, }; -use chrono::{DateTime, Utc}; use data_types::{ database_rules::WriteBufferDirection, detailed_database::GenerationId, server_id::ServerId, DatabaseName, @@ -216,6 +215,7 @@ impl Database { db_name, config, Arc::clone(application.metric_registry()), + Arc::clone(application.time_provider()), true, ) .await @@ -543,11 +543,7 @@ impl Database { /// - write it to a write buffer /// - write it to a local `Db` /// - pub async fn write_entry( - &self, - entry: entry::Entry, - time_of_write: DateTime<Utc>, - ) -> Result<(), WriteError> { + pub async fn write_entry(&self, entry: entry::Entry) -> Result<(), WriteError> { let recorder = self.shared.metrics.entry_ingest(entry.data().len()); let db = { @@ -566,7 +562,7 @@ impl Database { } }; - db.store_entry(entry, time_of_write).await.map_err(|e| { + db.store_entry(entry).await.map_err(|e| { use super::db::Error; match e { // TODO: Pull write buffer producer out of Db @@ -1081,6 +1077,7 @@ impl DatabaseStateRulesLoaded { shared.config.name.as_str(), self.catalog_config.clone(), Arc::clone(shared.application.metric_registry()), + Arc::clone(shared.application.time_provider()), shared.config.wipe_catalog_on_error, shared.config.skip_replay, ) diff --git a/server/src/db.rs b/server/src/db.rs index ae607c06f6..806d980303 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -973,7 +973,7 @@ impl Db { } /// Stores an entry based on the configuration. - pub async fn store_entry(&self, entry: Entry, time_of_write: DateTime<Utc>) -> Result<()> { + pub async fn store_entry(&self, entry: Entry) -> Result<()> { let immutable = { let rules = self.rules.read(); rules.lifecycle_rules.immutable @@ -1008,11 +1008,7 @@ impl Db { entry, )); - self.store_sequenced_entry( - sequenced_entry, - filter_table_batch_keep_all, - time_of_write, - ) + self.store_sequenced_entry(sequenced_entry, filter_table_batch_keep_all) } (_, true) => { // If not configured to send entries to the write buffer and the database is @@ -1025,11 +1021,7 @@ impl Db { // sequencing entries so skip doing so here let sequenced_entry = Arc::new(SequencedEntry::new_unsequenced(entry)); - self.store_sequenced_entry( - sequenced_entry, - filter_table_batch_keep_all, - time_of_write, - ) + self.store_sequenced_entry(sequenced_entry, filter_table_batch_keep_all) } } } @@ -1050,7 +1042,6 @@ impl Db { &self, sequenced_entry: Arc<SequencedEntry>, filter_table_batch: F, - time_of_write: DateTime<Utc>, ) -> Result<()> where F: Fn(Option<&Sequence>, &str, &TableBatch<'_>) -> (bool, Option<Vec<bool>>), @@ -1143,7 +1134,7 @@ impl Db { let mut partition = partition.write(); let handle_chunk_write = |chunk: &mut CatalogChunk| { - chunk.record_write(time_of_write, ×tamp_summary); + chunk.record_write(×tamp_summary); if chunk.storage().0 >= mub_row_threshold.get() { chunk.freeze().expect("freeze mub chunk"); } @@ -1181,8 +1172,7 @@ impl Db { match chunk_result { Ok(mb_chunk) => { - let chunk = - partition.create_open_chunk(mb_chunk, time_of_write); + let chunk = partition.create_open_chunk(mb_chunk); let mut chunk = chunk .try_write() .expect("partition lock should prevent contention"); @@ -1345,12 +1335,8 @@ pub mod test_helpers { use super::*; - /// Try to write lineprotocol data w/ specific `time_of_write` and return all tables that where written. - pub async fn try_write_lp_with_time( - db: &Db, - lp: &str, - time_of_write: DateTime<Utc>, - ) -> Result<Vec<String>> { + /// Try to write lineprotocol data and return all tables that where written. + pub async fn try_write_lp(db: &Db, lp: &str) -> Result<Vec<String>> { let entries = { let partitioner = &db.rules.read().partition_template; lp_to_entries(lp, partitioner) @@ -1364,7 +1350,7 @@ pub mod test_helpers { tables.insert(batch.name().to_string()); } } - db.store_entry(entry, time_of_write).await?; + db.store_entry(entry).await?; } } @@ -1373,20 +1359,6 @@ pub mod test_helpers { Ok(tables) } - /// Try to write lineprotocol data and return all tables that where written. - pub async fn try_write_lp(db: &Db, lp: &str) -> Result<Vec<String>> { - try_write_lp_with_time(db, lp, Utc::now()).await - } - - /// Same was [`try_write_lp_with_time`](try_write_lp_with_time) but will panic on failure. - pub async fn write_lp_with_time( - db: &Db, - lp: &str, - time_of_write: DateTime<Utc>, - ) -> Vec<String> { - try_write_lp_with_time(db, lp, time_of_write).await.unwrap() - } - /// Same was [`try_write_lp`](try_write_lp) but will panic on failure. pub async fn write_lp(db: &Db, lp: &str) -> Vec<String> { try_write_lp(db, lp).await.unwrap() @@ -1433,7 +1405,6 @@ mod tests { use arrow::record_batch::RecordBatch; use bytes::Bytes; - use chrono::{DateTime, TimeZone}; use futures::{stream, StreamExt, TryStreamExt}; use predicate::delete_expr::DeleteExpr; use tokio_util::sync::CancellationToken; @@ -1461,6 +1432,7 @@ mod tests { use query::{QueryChunk, QueryDatabase}; use schema::selection::Selection; use schema::Schema; + use time::Time; use write_buffer::mock::{ MockBufferForWriting, MockBufferForWritingThatAlwaysErrors, MockBufferSharedState, }; @@ -1470,7 +1442,7 @@ mod tests { assert_store_sequenced_entry_failures, db::{ catalog::chunk::ChunkStage, - test_helpers::{run_query, try_write_lp, write_lp, write_lp_with_time}, + test_helpers::{run_query, try_write_lp, write_lp}, }, utils::{make_db, TestDb}, }; @@ -1497,7 +1469,7 @@ mod tests { let db = immutable_db().await; let entry = lp_to_entry("cpu bar=1 10"); - let res = db.store_entry(entry, Utc::now()).await; + let res = db.store_entry(entry).await; assert_contains!( res.unwrap_err().to_string(), "Cannot write to this database: no mutable buffer configured" @@ -1525,7 +1497,7 @@ mod tests { .db; let entry = lp_to_entry("cpu bar=1 10"); - test_db.store_entry(entry, Utc::now()).await.unwrap(); + test_db.store_entry(entry).await.unwrap(); assert_eq!(write_buffer_state.get_messages(0).len(), 1); } @@ -1547,7 +1519,7 @@ mod tests { .db; let entry = lp_to_entry("cpu bar=1 10"); - db.store_entry(entry, Utc::now()).await.unwrap(); + db.store_entry(entry).await.unwrap(); assert_eq!(write_buffer_state.get_messages(0).len(), 1); @@ -1575,7 +1547,7 @@ mod tests { let entry = lp_to_entry("cpu bar=1 10"); - let res = db.store_entry(entry, Utc::now()).await; + let res = db.store_entry(entry).await; assert!( matches!(res, Err(Error::WriteBufferWritingError { .. })), @@ -1589,7 +1561,7 @@ mod tests { // Validate that writes are rejected if this database is reading from the write buffer let db = immutable_db().await; let entry = lp_to_entry("cpu bar=1 10"); - let res = db.store_entry(entry, Utc::now()).await; + let res = db.store_entry(entry).await; assert_contains!( res.unwrap_err().to_string(), "Cannot write to this database: no mutable buffer configured" @@ -1633,7 +1605,7 @@ mod tests { let entry = lp_to_entry(&lp); // This should succeed and start chunks in the MUB - db.store_entry(entry, Utc::now()).await.unwrap(); + db.store_entry(entry).await.unwrap(); // 3 more lines that should go in the 3 partitions/chunks. // Line 1 has the same schema and should end up in the MUB. @@ -1651,7 +1623,7 @@ mod tests { let entry = lp_to_entry(&lp); // This should return an error because there was at least one error in the loop - let result = db.store_entry(entry, Utc::now()).await; + let result = db.store_entry(entry).await; assert_contains!( result.unwrap_err().to_string(), "Storing sequenced entry failed with the following error(s), and possibly more:" @@ -1723,8 +1695,7 @@ mod tests { let db = Arc::clone(&test_db.db); - let t1_write = time.now().date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await; + write_lp(db.as_ref(), "cpu bar=1 10").await; let registry = test_db.metric_registry.as_ref(); @@ -1740,14 +1711,17 @@ mod tests { catalog_chunk_size_bytes_metric_eq(registry, "mutable_buffer", 700); // write into same chunk again. - let t2_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await; - let t3_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=3 30", t3_write).await; - let t4_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=4 40", t4_write).await; - let t5_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=5 50", t5_write).await; + time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=2 20").await; + + time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=3 30").await; + + time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=4 40").await; + + time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=5 50").await; // verify chunk size updated catalog_chunk_size_bytes_metric_eq(registry, "mutable_buffer", 764); @@ -2023,11 +1997,10 @@ mod tests { #[tokio::test] async fn compact() { // Test that data can be read after it is compacted - let test_db = make_db().await; - let db = Arc::new(test_db.db); + let (db, time) = make_db_time().await; - let t_write1 = Utc::now(); - write_lp_with_time(db.as_ref(), "cpu bar=1 10", t_write1).await; + let t_write1 = time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=1 10").await; let partition_key = "1970-01-01T00"; db.rollover_partition("cpu", partition_key) @@ -2047,8 +2020,8 @@ mod tests { assert_eq!(first_old_rb_write, t_write1); // Put new data into the mutable buffer - let t_write2 = Utc::now(); - write_lp_with_time(db.as_ref(), "cpu bar=2 20", t_write2).await; + let t_write2 = time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=2 20").await; // now, compact it let compacted_rb_chunk = db @@ -2199,10 +2172,9 @@ mod tests { let db = test_db.db; // Write some line protocols in Mutable buffer of the DB - let t1_write = time.now().date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await; - let t2_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await; + write_lp(db.as_ref(), "cpu bar=1 10").await; + time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=2 20").await; //Now mark the MB chunk close let partition_key = "1970-01-01T00"; @@ -2258,6 +2230,7 @@ mod tests { load_parquet_from_store_for_path(&path_list[0], Arc::clone(&db.iox_object_store)) .await .unwrap(); + let parquet_metadata = IoxParquetMetaData::from_file_bytes(parquet_data.clone()).unwrap(); // Read metadata at file level let schema = parquet_metadata.decode().unwrap().read_schema().unwrap(); @@ -2300,10 +2273,10 @@ mod tests { let db = test_db.db; // Write some line protocols in Mutable buffer of the DB - let t1_write = time.now().date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=1 10", t1_write).await; - let t2_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(db.as_ref(), "cpu bar=2 20", t2_write).await; + write_lp(db.as_ref(), "cpu bar=1 10").await; + + time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu bar=2 20").await; // Now mark the MB chunk close let partition_key = "1970-01-01T00"; @@ -2318,6 +2291,7 @@ mod tests { .await .unwrap() .unwrap(); + // Write the RB chunk to Object Store but keep it in RB time.inc(Duration::from_secs(1)); *db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337)); @@ -2405,63 +2379,64 @@ mod tests { #[tokio::test] async fn write_updates_last_write_at() { - let db = Arc::new(make_db().await.db); - let before_create = Utc::now(); + let (db, time) = make_db_time().await; + let w0 = time.inc(Duration::from_secs(23)); let partition_key = "1970-01-01T00"; write_lp(&db, "cpu bar=1 10").await; - let after_write = Utc::now(); - let last_write_prev = { + { let partition = db.catalog.partition("cpu", partition_key).unwrap(); let partition = partition.read(); - assert_ne!(partition.created_at(), partition.last_write_at()); - assert!(before_create < partition.last_write_at()); - assert!(after_write > partition.last_write_at()); - partition.last_write_at() - }; + assert_eq!(partition.created_at(), w0); + assert_eq!(partition.last_write_at(), w0); + } + + let w1 = time.inc(Duration::from_secs(1)); write_lp(&db, "cpu bar=1 20").await; { let partition = db.catalog.partition("cpu", partition_key).unwrap(); let partition = partition.read(); - assert!(last_write_prev < partition.last_write_at()); + assert_eq!(partition.created_at(), w0); + assert_eq!(partition.last_write_at(), w1); } } #[tokio::test] async fn failed_write_doesnt_update_last_write_at() { - let db = Arc::new(make_db().await.db); - let before_create = Utc::now(); + let (db, time) = make_db_time().await; + + let t0 = time.inc(Duration::from_secs(2)); let partition_key = "1970-01-01T00"; write_lp(&db, "cpu bar=1 10").await; - let after_write = Utc::now(); - let (last_write_prev, chunk_last_write_prev) = { + { let partition = db.catalog.partition("cpu", partition_key).unwrap(); let partition = partition.read(); - assert_ne!(partition.created_at(), partition.last_write_at()); - assert!(before_create < partition.last_write_at()); - assert!(after_write > partition.last_write_at()); + assert_eq!(partition.created_at(), t0); + assert_eq!(partition.last_write_at(), t0); let chunk = partition.open_chunk().unwrap(); let chunk = chunk.read(); + assert_eq!(chunk.time_of_last_write(), t0); + } - (partition.last_write_at(), chunk.time_of_last_write()) - }; + time.inc(Duration::from_secs(1)); let entry = lp_to_entry("cpu bar=true 10"); - let result = db.store_entry(entry, Utc::now()).await; + let result = db.store_entry(entry).await; assert!(result.is_err()); { let partition = db.catalog.partition("cpu", partition_key).unwrap(); let partition = partition.read(); - assert_eq!(last_write_prev, partition.last_write_at()); + assert_eq!(partition.created_at(), t0); + assert_eq!(partition.last_write_at(), t0); let chunk = partition.open_chunk().unwrap(); let chunk = chunk.read(); - assert_eq!(chunk_last_write_prev, chunk.time_of_last_write()); + assert_eq!(chunk.time_of_last_write(), t0); } } @@ -2518,12 +2493,15 @@ mod tests { #[tokio::test] async fn test_chunk_timestamps() { - let start = Utc::now(); - let db = Arc::new(make_db().await.db); + let (db, time) = make_db_time().await; + let w0 = time.inc(Duration::from_secs(95)); // Given data loaded into two chunks write_lp(&db, "cpu bar=1 10").await; - let after_data_load = Utc::now(); + + let w1 = time.inc(Duration::from_secs(2)); + + write_lp(&db, "cpu bar=1 20").await; // When the chunk is rolled over let partition_key = "1970-01-01T00"; @@ -2533,23 +2511,15 @@ mod tests { .unwrap() .unwrap() .id(); - let after_rollover = Utc::now(); let partition = db.catalog.partition("cpu", partition_key).unwrap(); let partition = partition.read(); let (chunk, _order) = partition.chunk(chunk_id).unwrap(); let chunk = chunk.read(); - println!( - "start: {:?}, after_data_load: {:?}, after_rollover: {:?}", - start, after_data_load, after_rollover - ); - println!("Chunk: {:#?}", chunk); - // then the chunk creation and rollover times are as expected - assert!(start < chunk.time_of_first_write()); - assert!(chunk.time_of_first_write() < after_data_load); - assert!(chunk.time_of_first_write() == chunk.time_of_last_write()); + assert_eq!(chunk.time_of_first_write(), w0); + assert_eq!(chunk.time_of_last_write(), w1); } #[tokio::test] @@ -2609,8 +2579,8 @@ mod tests { object_store_bytes: 0, // os_size row_count: 1, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(1), - time_of_last_write: Utc.timestamp_nanos(1), + time_of_first_write: Time::from_timestamp_nanos(1), + time_of_last_write: Time::from_timestamp_nanos(1), order: ChunkOrder::new(5).unwrap(), }]; @@ -2635,15 +2605,13 @@ mod tests { #[tokio::test] async fn partition_chunk_summaries_timestamp() { - let db = Arc::new(make_db().await.db); + let (db, time) = make_db_time().await; - let t_first_write = Utc::now(); - write_lp_with_time(&db, "cpu bar=1 1", t_first_write).await; + let t_first_write = time.inc(Duration::from_secs(2)); + write_lp(&db, "cpu bar=1 1").await; - let t_second_write = Utc::now(); - write_lp_with_time(&db, "cpu bar=2 2", t_second_write).await; - - db.rollover_partition("cpu", "1970-01-01T00").await.unwrap(); + let t_second_write = time.inc(Duration::from_secs(2)); + write_lp(&db, "cpu bar=2 2").await; let mut chunk_summaries = db.chunk_summaries().unwrap(); @@ -2654,7 +2622,7 @@ mod tests { assert_eq!(summary.time_of_last_write, t_second_write); } - fn assert_first_last_times_eq(chunk_summary: &ChunkSummary, expected: DateTime<Utc>) { + fn assert_first_last_times_eq(chunk_summary: &ChunkSummary, expected: Time) { let first_write = chunk_summary.time_of_first_write; let last_write = chunk_summary.time_of_last_write; @@ -2690,20 +2658,20 @@ mod tests { async fn chunk_summaries() { // Test that chunk id listing is hooked up let (db, time) = make_db_time().await; - time.set(Time::from_timestamp(11, 22)); // get three chunks: one open, one closed in mb and one close in rb // In open chunk, will end up in rb/os - let t1_write = time.now().date_time(); - write_lp_with_time(&db, "cpu bar=1 1", t1_write).await; + let t1_write = Time::from_timestamp(11, 22); + time.set(t1_write); + write_lp(&db, "cpu bar=1 1").await; // Move open chunk to closed db.rollover_partition("cpu", "1970-01-01T00").await.unwrap(); // New open chunk in mb // This point will end up in rb/os - let t2_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(&db, "cpu bar=1,baz=2 2", t2_write).await; + let t2_write = time.inc(Duration::from_secs(1)); + write_lp(&db, "cpu bar=1,baz=2 2").await; // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2719,8 +2687,8 @@ mod tests { assert_chunks_times_ordered(&closed_mb_t3, &open_mb_t3); // This point makes a new open mb chunk and will end up in the closed mb chunk - let t3_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(&db, "cpu bar=1,baz=2,frob=3 400000000000000", t3_write).await; + time.inc(Duration::from_secs(1)); + write_lp(&db, "cpu bar=1,baz=2,frob=3 400000000000000").await; // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2764,7 +2732,7 @@ mod tests { assert_chunks_times_eq(&other_open_mb_t5, &other_open_mb_t4); // Persist rb to parquet os - time.inc(Duration::from_secs(1)).date_time(); + time.inc(Duration::from_secs(1)); *db.persisted_chunk_id_override.lock() = Some(ChunkId::new_test(1337)); db.persist_partition("cpu", "1970-01-01T00", true) .await @@ -2807,8 +2775,8 @@ mod tests { // New open chunk in mb // This point will stay in this open mb chunk - let t5_write = time.inc(Duration::from_secs(1)).date_time(); - write_lp_with_time(&db, "cpu bar=1,baz=3,blargh=3 400000000000000", t5_write).await; + let t5_write = time.inc(Duration::from_secs(1)); + write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000").await; // Check first/last write times on the chunks at this point let mut chunk_summaries = db.chunk_summaries().expect("expected summary to return"); @@ -2842,8 +2810,8 @@ mod tests { object_store_bytes: 1557, // size of parquet file row_count: 2, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(1), - time_of_last_write: Utc.timestamp_nanos(1), + time_of_first_write: Time::from_timestamp_nanos(1), + time_of_last_write: Time::from_timestamp_nanos(1), }, ChunkSummary { partition_key: Arc::from("1970-01-05T15"), @@ -2856,8 +2824,8 @@ mod tests { object_store_bytes: 0, // no OS chunks row_count: 1, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(1), - time_of_last_write: Utc.timestamp_nanos(1), + time_of_first_write: Time::from_timestamp_nanos(1), + time_of_last_write: Time::from_timestamp_nanos(1), }, ChunkSummary { partition_key: Arc::from("1970-01-05T15"), @@ -2870,8 +2838,8 @@ mod tests { object_store_bytes: 0, // no OS chunks row_count: 1, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(1), - time_of_last_write: Utc.timestamp_nanos(1), + time_of_first_write: Time::from_timestamp_nanos(1), + time_of_last_write: Time::from_timestamp_nanos(1), }, ]; @@ -3280,7 +3248,7 @@ mod tests { .db_name(db_name) .build() .await; - let db = Arc::new(test_db.db); + let db = test_db.db; // ==================== check: empty catalog created ==================== // at this point, an empty preserved catalog exists diff --git a/server/src/db/catalog.rs b/server/src/db/catalog.rs index 322c04d783..e0adc4cfe4 100644 --- a/server/src/db/catalog.rs +++ b/server/src/db/catalog.rs @@ -18,6 +18,7 @@ use self::metrics::CatalogMetrics; use self::partition::Partition; use self::table::Table; use data_types::write_summary::WriteSummary; +use time::TimeProvider; pub mod chunk; mod metrics; @@ -92,21 +93,32 @@ pub struct Catalog { tables: RwLock<HashMap<Arc<str>, Table>>, metrics: Arc<CatalogMetrics>, + + time_provider: Arc<dyn TimeProvider>, } impl Catalog { #[cfg(test)] fn test() -> Self { - Self::new(Arc::from("test"), Default::default()) + Self::new( + Arc::from("test"), + Default::default(), + Arc::new(time::SystemProvider::new()), + ) } - pub fn new(db_name: Arc<str>, metric_registry: Arc<::metric::Registry>) -> Self { + pub fn new( + db_name: Arc<str>, + metric_registry: Arc<::metric::Registry>, + time_provider: Arc<dyn TimeProvider>, + ) -> Self { let metrics = Arc::new(CatalogMetrics::new(Arc::clone(&db_name), metric_registry)); Self { db_name, tables: Default::default(), metrics, + time_provider, } } @@ -194,6 +206,7 @@ impl Catalog { Arc::clone(&self.db_name), Arc::clone(&table_name), self.metrics.new_table_metrics(table_name.as_ref()), + Arc::clone(&self.time_provider), ); (table_name, table) @@ -317,13 +330,11 @@ mod tests { use entry::test_helpers::lp_to_entry; use super::*; - use chrono::Utc; fn create_open_chunk(partition: &Arc<RwLock<Partition>>) -> ChunkAddr { let mut partition = partition.write(); let table = partition.table_name(); let entry = lp_to_entry(&format!("{} bar=1 10", table)); - let time_of_write = Utc::now(); let write = entry.partition_writes().unwrap().remove(0); let batch = write.table_batches().remove(0); @@ -334,7 +345,7 @@ mod tests { ) .unwrap(); - let chunk = partition.create_open_chunk(mb_chunk, time_of_write); + let chunk = partition.create_open_chunk(mb_chunk); let chunk = chunk.read(); chunk.addr().clone() } diff --git a/server/src/db/catalog/chunk.rs b/server/src/db/catalog/chunk.rs index dbcf3df784..b95b2c8a83 100644 --- a/server/src/db/catalog/chunk.rs +++ b/server/src/db/catalog/chunk.rs @@ -1,6 +1,5 @@ use std::sync::Arc; -use chrono::{DateTime, Utc}; use snafu::Snafu; use data_types::{ @@ -22,6 +21,7 @@ use tracker::{TaskRegistration, TaskTracker}; use crate::db::catalog::metrics::{StorageRecorder, TimestampHistogram}; use parking_lot::Mutex; +use time::{Time, TimeProvider}; #[derive(Debug, Snafu)] pub enum Error { @@ -210,15 +210,18 @@ pub struct CatalogChunk { /// Record access to this chunk's data by queries and writes access_recorder: AccessRecorder, + /// Time provider + time_provider: Arc<dyn TimeProvider>, + /// The earliest time at which data contained within this chunk was written /// into IOx. Note due to the compaction, etc... this may not be the chunk /// that data was originally written into - time_of_first_write: DateTime<Utc>, + time_of_first_write: Time, /// The latest time at which data contained within this chunk was written /// into IOx. Note due to the compaction, etc... this may not be the chunk /// that data was originally written into - time_of_last_write: DateTime<Utc>, + time_of_last_write: Time, /// Order of this chunk relative to other overlapping chunks. order: ChunkOrder, @@ -273,22 +276,25 @@ impl CatalogChunk { pub(super) fn new_open( addr: ChunkAddr, chunk: mutable_buffer::chunk::MBChunk, - time_of_write: DateTime<Utc>, metrics: ChunkMetrics, order: ChunkOrder, + time_provider: Arc<dyn TimeProvider>, ) -> Self { assert_eq!(chunk.table_name(), &addr.table_name); let stage = ChunkStage::Open { mb_chunk: chunk }; + let now = time_provider.now(); + let chunk = Self { addr, stage, lifecycle_action: None, metrics: Mutex::new(metrics), - access_recorder: Default::default(), - time_of_first_write: time_of_write, - time_of_last_write: time_of_write, + access_recorder: AccessRecorder::new(Arc::clone(&time_provider)), + time_provider, + time_of_first_write: now, + time_of_last_write: now, order, }; chunk.update_metrics(); @@ -302,12 +308,13 @@ impl CatalogChunk { pub(super) fn new_rub_chunk( addr: ChunkAddr, chunk: read_buffer::RBChunk, - time_of_first_write: DateTime<Utc>, - time_of_last_write: DateTime<Utc>, + time_of_first_write: Time, + time_of_last_write: Time, schema: Arc<Schema>, metrics: ChunkMetrics, delete_predicates: Vec<Arc<DeletePredicate>>, order: ChunkOrder, + time_provider: Arc<dyn TimeProvider>, ) -> Self { let stage = ChunkStage::Frozen { meta: Arc::new(ChunkMetadata { @@ -323,7 +330,8 @@ impl CatalogChunk { stage, lifecycle_action: None, metrics: Mutex::new(metrics), - access_recorder: Default::default(), + access_recorder: AccessRecorder::new(Arc::clone(&time_provider)), + time_provider, time_of_first_write, time_of_last_write, order, @@ -334,14 +342,16 @@ impl CatalogChunk { /// Creates a new chunk that is only registered via an object store reference (= only exists in /// parquet). + #[allow(clippy::too_many_arguments)] pub(super) fn new_object_store_only( addr: ChunkAddr, chunk: Arc<parquet_file::chunk::ParquetChunk>, - time_of_first_write: DateTime<Utc>, - time_of_last_write: DateTime<Utc>, + time_of_first_write: Time, + time_of_last_write: Time, metrics: ChunkMetrics, delete_predicates: Vec<Arc<DeletePredicate>>, order: ChunkOrder, + time_provider: Arc<dyn TimeProvider>, ) -> Self { assert_eq!(chunk.table_name(), addr.table_name.as_ref()); @@ -363,7 +373,8 @@ impl CatalogChunk { stage, lifecycle_action: None, metrics: Mutex::new(metrics), - access_recorder: Default::default(), + access_recorder: AccessRecorder::new(Arc::clone(&time_provider)), + time_provider, time_of_first_write, time_of_last_write, order, @@ -407,11 +418,11 @@ impl CatalogChunk { .map_or(false, |action| action.metadata() == &lifecycle_action) } - pub fn time_of_first_write(&self) -> DateTime<Utc> { + pub fn time_of_first_write(&self) -> Time { self.time_of_first_write } - pub fn time_of_last_write(&self) -> DateTime<Utc> { + pub fn time_of_last_write(&self) -> Time { self.time_of_last_write } @@ -511,19 +522,18 @@ impl CatalogChunk { /// /// `time_of_write` is the wall clock time of the write /// `timestamps` is a summary of the row timestamps contained in the write - pub fn record_write(&mut self, time_of_write: DateTime<Utc>, timestamps: &TimestampSummary) { + pub fn record_write(&mut self, timestamps: &TimestampSummary) { { let metrics = self.metrics.lock(); if let Some(timestamp_histogram) = metrics.timestamp_histogram.as_ref() { timestamp_histogram.add(timestamps) } } - self.access_recorder.record_access_now(); + self.access_recorder.record_access(); - self.time_of_first_write = self.time_of_first_write.min(time_of_write); - - // DateTime<Utc> isn't necessarily monotonic - self.time_of_last_write = self.time_of_last_write.max(time_of_write); + let now = self.time_provider.now(); + self.time_of_first_write = self.time_of_first_write.min(now); + self.time_of_last_write = self.time_of_last_write.max(now); self.update_metrics(); } @@ -1128,20 +1138,19 @@ mod tests { fn make_open_chunk() -> CatalogChunk { let addr = chunk_addr(); let mb_chunk = make_mb_chunk(&addr.table_name); - let time_of_write = Utc::now(); CatalogChunk::new_open( addr, mb_chunk, - time_of_write, ChunkMetrics::new_unregistered(), ChunkOrder::new(5).unwrap(), + Arc::new(time::SystemProvider::new()), ) } async fn make_persisted_chunk() -> CatalogChunk { let addr = chunk_addr(); - let now = Utc::now(); + let now = Time::from_timestamp_nanos(43564); // assemble ParquetChunk let parquet_chunk = make_parquet_chunk(addr.clone()).await; @@ -1154,6 +1163,7 @@ mod tests { ChunkMetrics::new_unregistered(), vec![], ChunkOrder::new(6).unwrap(), + Arc::new(time::SystemProvider::new()), ) } } diff --git a/server/src/db/catalog/partition.rs b/server/src/db/catalog/partition.rs index 2e323e2967..74b9336fd5 100644 --- a/server/src/db/catalog/partition.rs +++ b/server/src/db/catalog/partition.rs @@ -2,7 +2,6 @@ use super::chunk::{CatalogChunk, Error as ChunkError}; use crate::db::catalog::metrics::PartitionMetrics; -use chrono::{DateTime, Utc}; use data_types::{ chunk_metadata::{ChunkAddr, ChunkId, ChunkLifecycleAction, ChunkOrder, ChunkSummary}, partition_metadata::{PartitionAddr, PartitionSummary}, @@ -16,6 +15,7 @@ use predicate::delete_predicate::DeletePredicate; use schema::Schema; use snafu::{OptionExt, Snafu}; use std::{collections::BTreeMap, fmt::Display, sync::Arc}; +use time::{Time, TimeProvider}; use tracker::RwLock; #[derive(Debug, Snafu)] @@ -120,11 +120,11 @@ pub struct Partition { chunks: ChunkCollection, /// When this partition was created - created_at: DateTime<Utc>, + created_at: Time, /// the last time at which write was made to this /// partition. Partition::new initializes this to now. - last_write_at: DateTime<Utc>, + last_write_at: Time, /// Partition metrics metrics: Arc<PartitionMetrics>, @@ -134,6 +134,9 @@ pub struct Partition { /// Tracks next chunk order in this partition. next_chunk_order: ChunkOrder, + + /// The time provider + time_provider: Arc<dyn TimeProvider>, } impl Partition { @@ -141,8 +144,12 @@ impl Partition { /// /// This function is not pub because `Partition`s should be created using the interfaces on /// [`Catalog`](crate::db::catalog::Catalog) and not instantiated directly. - pub(super) fn new(addr: PartitionAddr, metrics: PartitionMetrics) -> Self { - let now = Utc::now(); + pub(super) fn new( + addr: PartitionAddr, + metrics: PartitionMetrics, + time_provider: Arc<dyn TimeProvider>, + ) -> Self { + let now = time_provider.now(); Self { addr, chunks: Default::default(), @@ -151,6 +158,7 @@ impl Partition { metrics: Arc::new(metrics), persistence_windows: None, next_chunk_order: ChunkOrder::MIN, + time_provider, } } @@ -176,16 +184,16 @@ impl Partition { /// Update the last write time to now pub fn update_last_write_at(&mut self) { - self.last_write_at = Utc::now(); + self.last_write_at = self.time_provider.now(); } /// Return the time at which this partition was created - pub fn created_at(&self) -> DateTime<Utc> { + pub fn created_at(&self) -> Time { self.created_at } /// Return the time at which the last write was written to this partititon - pub fn last_write_at(&self) -> DateTime<Utc> { + pub fn last_write_at(&self) -> Time { self.last_write_at } @@ -198,7 +206,6 @@ impl Partition { pub fn create_open_chunk( &mut self, chunk: mutable_buffer::chunk::MBChunk, - time_of_write: DateTime<Utc>, ) -> &Arc<RwLock<CatalogChunk>> { assert_eq!(chunk.table_name().as_ref(), self.table_name()); @@ -210,9 +217,9 @@ impl Partition { let chunk = CatalogChunk::new_open( addr, chunk, - time_of_write, self.metrics.new_chunk_metrics(), chunk_order, + Arc::clone(&self.time_provider), ); let chunk = Arc::new(self.metrics.new_chunk_lock(chunk)); self.chunks.insert(chunk_id, chunk_order, chunk) @@ -225,8 +232,8 @@ impl Partition { pub fn create_rub_chunk( &mut self, chunk: read_buffer::RBChunk, - time_of_first_write: DateTime<Utc>, - time_of_last_write: DateTime<Utc>, + time_of_first_write: Time, + time_of_last_write: Time, schema: Arc<Schema>, delete_predicates: Vec<Arc<DeletePredicate>>, chunk_order: ChunkOrder, @@ -252,6 +259,7 @@ impl Partition { self.metrics.new_chunk_metrics(), delete_predicates, chunk_order, + Arc::clone(&self.time_provider), ))); let chunk = self.chunks.insert(chunk_id, chunk_order, chunk); @@ -269,8 +277,8 @@ impl Partition { &mut self, chunk_id: ChunkId, chunk: Arc<parquet_file::chunk::ParquetChunk>, - time_of_first_write: DateTime<Utc>, - time_of_last_write: DateTime<Utc>, + time_of_first_write: Time, + time_of_last_write: Time, delete_predicates: Vec<Arc<DeletePredicate>>, chunk_order: ChunkOrder, ) -> &Arc<RwLock<CatalogChunk>> { @@ -288,6 +296,7 @@ impl Partition { self.metrics.new_chunk_metrics(), delete_predicates, chunk_order, + Arc::clone(&self.time_provider), )), ); @@ -440,18 +449,17 @@ mod tests { Arc::clone(&addr.db_name), Arc::clone(®istry), )); + let time_provider = Arc::new(time::SystemProvider::new()); let table_metrics = Arc::new(catalog_metrics.new_table_metrics("t")); let partition_metrics = table_metrics.new_partition_metrics(); - let t = Utc::now(); - // should be in ascending order let mut expected_ids = vec![]; // Make three chunks - let mut partition = Partition::new(addr, partition_metrics); + let mut partition = Partition::new(addr, partition_metrics, time_provider); for _ in 0..3 { - let chunk = partition.create_open_chunk(make_mb_chunk("t"), t); + let chunk = partition.create_open_chunk(make_mb_chunk("t")); expected_ids.push(chunk.read().addr().chunk_id) } diff --git a/server/src/db/catalog/table.rs b/server/src/db/catalog/table.rs index 74b71f5756..812e7c2a5f 100644 --- a/server/src/db/catalog/table.rs +++ b/server/src/db/catalog/table.rs @@ -8,6 +8,7 @@ use schema::{ Schema, }; use std::{ops::Deref, result::Result, sync::Arc}; +use time::TimeProvider; use tracker::{RwLock, RwLockReadGuard, RwLockWriteGuard}; /// A `Table` is a collection of `Partition` each of which is a collection of `Chunk` @@ -31,6 +32,8 @@ pub struct Table { /// - the outer `Arc<RwLock<...>>` so so that we can reference the locked schema w/o a lifetime to the table /// - the inner `Arc<Schema>` is a schema that we don't need to copy when moving it around the query stack schema: Arc<RwLock<Arc<Schema>>>, + + time_provider: Arc<dyn TimeProvider>, } impl Table { @@ -39,7 +42,12 @@ impl Table { /// This function is not pub because `Table`s should be /// created using the interfaces on [`Catalog`](crate::db::catalog::Catalog) and not /// instantiated directly. - pub(super) fn new(db_name: Arc<str>, table_name: Arc<str>, metrics: TableMetrics) -> Self { + pub(super) fn new( + db_name: Arc<str>, + table_name: Arc<str>, + metrics: TableMetrics, + time_provider: Arc<dyn TimeProvider>, + ) -> Self { // build empty schema for this table let mut builder = SchemaBuilder::new(); builder.measurement(table_name.as_ref()); @@ -52,6 +60,7 @@ impl Table { partitions: Default::default(), metrics: Arc::new(metrics), schema, + time_provider, } } @@ -70,6 +79,7 @@ impl Table { let metrics = &self.metrics; let db_name = &self.db_name; let table_name = &self.table_name; + let time_provider = &self.time_provider; let (_, partition) = self .partitions .raw_entry_mut() @@ -84,6 +94,7 @@ impl Table { partition_key: Arc::clone(&partition_key), }, partition_metrics, + Arc::clone(time_provider), ); let partition = Arc::new(metrics.new_partition_lock(partition)); (partition_key, partition) diff --git a/server/src/db/chunk.rs b/server/src/db/chunk.rs index cf41112f35..25bc4a01da 100644 --- a/server/src/db/chunk.rs +++ b/server/src/db/chunk.rs @@ -1,7 +1,6 @@ use super::{ catalog::chunk::ChunkMetadata, pred::to_read_buffer_predicate, streams::ReadFilterResultsStream, }; -use chrono::{DateTime, Utc}; use data_types::{ chunk_metadata::{ChunkId, ChunkOrder}, partition_metadata, @@ -26,6 +25,7 @@ use std::{ collections::{BTreeMap, BTreeSet}, sync::Arc, }; +use time::Time; #[allow(clippy::enum_variant_names)] #[derive(Debug, Snafu)] @@ -81,8 +81,8 @@ pub struct DbChunk { access_recorder: AccessRecorder, state: State, meta: Arc<ChunkMetadata>, - time_of_first_write: DateTime<Utc>, - time_of_last_write: DateTime<Utc>, + time_of_first_write: Time, + time_of_last_write: Time, order: ChunkOrder, } @@ -218,11 +218,11 @@ impl DbChunk { &self.table_name } - pub fn time_of_first_write(&self) -> DateTime<Utc> { + pub fn time_of_first_write(&self) -> Time { self.time_of_first_write } - pub fn time_of_last_write(&self) -> DateTime<Utc> { + pub fn time_of_last_write(&self) -> Time { self.time_of_last_write } @@ -343,7 +343,7 @@ impl QueryChunk for DbChunk { // when possible for performance gain debug!(?predicate, "Input Predicate to read_filter"); - self.access_recorder.record_access_now(); + self.access_recorder.record_access(); debug!(?delete_predicates, "Input Delete Predicates to read_filter"); @@ -419,7 +419,7 @@ impl QueryChunk for DbChunk { // TODO: Support predicates return Ok(None); } - self.access_recorder.record_access_now(); + self.access_recorder.record_access(); Ok(chunk.column_names(columns)) } State::ReadBuffer { chunk, .. } => { @@ -431,7 +431,7 @@ impl QueryChunk for DbChunk { } }; - self.access_recorder.record_access_now(); + self.access_recorder.record_access(); Ok(Some( chunk .column_names(rb_predicate, columns, BTreeSet::new()) @@ -445,7 +445,7 @@ impl QueryChunk for DbChunk { // TODO: Support predicates when MB supports it return Ok(None); } - self.access_recorder.record_access_now(); + self.access_recorder.record_access(); Ok(chunk.column_names(columns)) } } @@ -471,7 +471,7 @@ impl QueryChunk for DbChunk { } }; - self.access_recorder.record_access_now(); + self.access_recorder.record_access(); let mut values = chunk .column_values( rb_predicate, @@ -555,70 +555,74 @@ impl QueryChunkMeta for DbChunk { #[cfg(test)] mod tests { use super::*; - use crate::{ - db::{ - catalog::chunk::{CatalogChunk, ChunkStage}, - test_helpers::{write_lp, write_lp_with_time}, - }, - utils::make_db, + use crate::db::{ + catalog::chunk::{CatalogChunk, ChunkStage}, + test_helpers::write_lp, }; + use crate::utils::make_db_time; use data_types::chunk_metadata::ChunkStorage; + use std::time::Duration; - async fn test_chunk_access(chunk: &CatalogChunk) { - let t1 = chunk.access_recorder().get_metrics(); + async fn test_chunk_access(chunk: &CatalogChunk, time: Arc<time::MockProvider>) { + let m1 = chunk.access_recorder().get_metrics(); let snapshot = DbChunk::snapshot(chunk); - let t2 = chunk.access_recorder().get_metrics(); + let m2 = chunk.access_recorder().get_metrics(); + let t1 = time.inc(Duration::from_secs(1)); snapshot .read_filter(&Default::default(), Selection::All, &[]) .unwrap(); - let t3 = chunk.access_recorder().get_metrics(); + let m3 = chunk.access_recorder().get_metrics(); + let t2 = time.inc(Duration::from_secs(1)); let column_names = snapshot .column_names(&Default::default(), Selection::All) .unwrap() .is_some(); - let t4 = chunk.access_recorder().get_metrics(); + let m4 = chunk.access_recorder().get_metrics(); + let t3 = time.inc(Duration::from_secs(1)); let column_values = snapshot .column_values("tag", &Default::default()) .unwrap() .is_some(); - let t5 = chunk.access_recorder().get_metrics(); + let m5 = chunk.access_recorder().get_metrics(); // Snapshot shouldn't count as an access - assert_eq!(t1, t2); + assert_eq!(m1, m2); // Query should count as an access - assert_eq!(t2.count + 1, t3.count); - assert!(t2.last_access < t3.last_access); + assert_eq!(m2.count + 1, m3.count); + assert!(m2.last_access < m3.last_access); + assert_eq!(m3.last_access, t1); // If column names successful should record access match column_names { true => { - assert_eq!(t3.count + 1, t4.count); - assert!(t3.last_access < t4.last_access); + assert_eq!(m3.count + 1, m4.count); + assert_eq!(m4.last_access, t2); } false => { - assert_eq!(t3, t4); + assert_eq!(m3, m4); } } // If column values successful should record access match column_values { true => { - assert_eq!(t4.count + 1, t5.count); - assert!(t4.last_access < t5.last_access); + assert_eq!(m4.count + 1, m5.count); + assert!(m4.last_access < m5.last_access); + assert_eq!(m5.last_access, t3); } false => { - assert_eq!(t4, t5); + assert_eq!(m4, m5); } } } #[tokio::test] async fn mub_records_access() { - let db = make_db().await.db; + let (db, time) = make_db_time().await; write_lp(&db, "cpu,tag=1 bar=1 1").await; @@ -628,12 +632,12 @@ mod tests { let chunk = chunk.read(); assert_eq!(chunk.storage().1, ChunkStorage::OpenMutableBuffer); - test_chunk_access(&chunk).await; + test_chunk_access(&chunk, time).await; } #[tokio::test] async fn rub_records_access() { - let db = make_db().await.db; + let (db, time) = make_db_time().await; write_lp(&db, "cpu,tag=1 bar=1 1").await; db.compact_partition("cpu", "1970-01-01T00").await.unwrap(); @@ -644,15 +648,15 @@ mod tests { let chunk = chunk.read(); assert_eq!(chunk.storage().1, ChunkStorage::ReadBuffer); - test_chunk_access(&chunk).await + test_chunk_access(&chunk, time).await } #[tokio::test] async fn parquet_records_access() { - let db = make_db().await.db; + let (db, time) = make_db_time().await; - let creation_time = Utc::now(); - write_lp_with_time(&db, "cpu,tag=1 bar=1 1", creation_time).await; + let t0 = time.inc(Duration::from_secs(324)); + write_lp(&db, "cpu,tag=1 bar=1 1").await; let id = db .persist_partition("cpu", "1970-01-01T00", true) @@ -668,22 +672,24 @@ mod tests { let chunk = chunks.into_iter().next().unwrap(); let chunk = chunk.read(); assert_eq!(chunk.storage().1, ChunkStorage::ObjectStoreOnly); + let first_write = chunk.time_of_first_write(); let last_write = chunk.time_of_last_write(); - assert_eq!(first_write, last_write); - assert_eq!(first_write, creation_time); + assert_eq!(first_write, t0); + assert_eq!(last_write, t0); - test_chunk_access(&chunk).await + test_chunk_access(&chunk, time).await } #[tokio::test] async fn parquet_snapshot() { - let db = make_db().await.db; + let (db, time) = make_db_time().await; - let w0 = Utc::now(); - write_lp_with_time(&db, "cpu,tag=1 bar=1 1", w0).await; - let w1 = w0 + chrono::Duration::seconds(4); - write_lp_with_time(&db, "cpu,tag=2 bar=2 2", w1).await; + let w0 = time.inc(Duration::from_secs(10)); + write_lp(&db, "cpu,tag=1 bar=1 1").await; + + let w1 = time.inc(Duration::from_secs(10)); + write_lp(&db, "cpu,tag=2 bar=2 2").await; db.persist_partition("cpu", "1970-01-01T00", true) .await diff --git a/server/src/db/lifecycle.rs b/server/src/db/lifecycle.rs index e94a16b5eb..a481a497af 100644 --- a/server/src/db/lifecycle.rs +++ b/server/src/db/lifecycle.rs @@ -331,7 +331,7 @@ impl LifecycleChunk for CatalogChunk { self.access_recorder().get_metrics() } - fn time_of_last_write(&self) -> DateTime<Utc> { + fn time_of_last_write(&self) -> Time { self.time_of_last_write() } diff --git a/server/src/db/lifecycle/compact.rs b/server/src/db/lifecycle/compact.rs index a9ffe76021..9800164d63 100644 --- a/server/src/db/lifecycle/compact.rs +++ b/server/src/db/lifecycle/compact.rs @@ -6,13 +6,13 @@ use crate::db::{ lifecycle::collect_rub, DbChunk, }; -use chrono::{DateTime, Utc}; use data_types::{chunk_metadata::ChunkOrder, job::Job}; use lifecycle::LifecycleWriteGuard; use observability_deps::tracing::info; use predicate::delete_predicate::DeletePredicate; use query::{compute_sort_key, exec::ExecutorType, frontend::reorg::ReorgPlanner, QueryChunkMeta}; use std::{collections::HashSet, future::Future, sync::Arc}; +use time::Time; use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt}; /// Compact the provided chunks into a single chunk, @@ -45,8 +45,8 @@ pub(crate) fn compact_chunks( // Mark and snapshot chunks, then drop locks let mut input_rows = 0; - let mut time_of_first_write: Option<DateTime<Utc>> = None; - let mut time_of_last_write: Option<DateTime<Utc>> = None; + let mut time_of_first_write: Option<Time> = None; + let mut time_of_last_write: Option<Time> = None; let mut delete_predicates_before: HashSet<Arc<DeletePredicate>> = HashSet::new(); let mut min_order = ChunkOrder::MAX; let query_chunks = chunks @@ -168,24 +168,26 @@ pub(crate) fn compact_chunks( mod tests { use super::*; use crate::db::test_helpers::write_lp; - use crate::{db::test_helpers::write_lp_with_time, utils::make_db}; + use crate::utils::{make_db, make_db_time}; use data_types::chunk_metadata::ChunkStorage; use data_types::timestamp::TimestampRange; use lifecycle::{LockableChunk, LockablePartition}; use predicate::delete_expr::{DeleteExpr, Op, Scalar}; use query::QueryDatabase; + use std::time::Duration; #[tokio::test] async fn test_compact_freeze() { - let db = make_db().await.db; + let (db, time) = make_db_time().await; - let t_first_write = Utc::now(); - write_lp_with_time(db.as_ref(), "cpu,tag1=cupcakes bar=1 10", t_first_write).await; - write_lp_with_time(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20", Utc::now()).await; - write_lp_with_time(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10", Utc::now()).await; - write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20", Utc::now()).await; - let t_last_write = Utc::now(); - write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10", t_last_write).await; + let t_first_write = time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu,tag1=cupcakes bar=1 10").await; + write_lp(db.as_ref(), "cpu,tag1=asfd,tag2=foo bar=2 20").await; + write_lp(db.as_ref(), "cpu,tag1=bingo,tag2=foo bar=2 10").await; + write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 20").await; + + let t_last_write = time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 10").await; let partition_keys = db.partition_keys().unwrap(); assert_eq!(partition_keys.len(), 1); @@ -201,8 +203,8 @@ mod tests { let (_, fut) = compact_chunks(partition.upgrade(), vec![chunk.upgrade()]).unwrap(); // NB: perform the write before spawning the background task that performs the compaction - let t_later_write = Utc::now(); - write_lp_with_time(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40", t_later_write).await; + let t_later_write = time.inc(Duration::from_secs(1)); + write_lp(db.as_ref(), "cpu,tag1=bongo,tag2=a bar=2 40").await; tokio::spawn(fut).await.unwrap().unwrap().unwrap(); let mut chunk_summaries: Vec<_> = db_partition.read().chunk_summaries().collect(); diff --git a/server/src/db/lifecycle/persist.rs b/server/src/db/lifecycle/persist.rs index a98adcd801..0358e4cbfb 100644 --- a/server/src/db/lifecycle/persist.rs +++ b/server/src/db/lifecycle/persist.rs @@ -6,7 +6,6 @@ use crate::db::{ lifecycle::{collect_rub, merge_schemas, write::write_chunk_to_object_store}, DbChunk, }; -use chrono::{DateTime, Utc}; use data_types::{chunk_metadata::ChunkOrder, job::Job}; use lifecycle::{LifecycleWriteGuard, LockableChunk, LockablePartition}; use observability_deps::tracing::info; @@ -14,6 +13,7 @@ use persistence_windows::persistence_windows::FlushHandle; use predicate::delete_predicate::DeletePredicate; use query::{compute_sort_key, exec::ExecutorType, frontend::reorg::ReorgPlanner, QueryChunkMeta}; use std::{collections::HashSet, future::Future, sync::Arc}; +use time::Time; use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt}; /// Split and then persist the provided chunks @@ -47,8 +47,8 @@ pub fn persist_chunks( // Mark and snapshot chunks, then drop locks let mut input_rows = 0; - let mut time_of_first_write: Option<DateTime<Utc>> = None; - let mut time_of_last_write: Option<DateTime<Utc>> = None; + let mut time_of_first_write: Option<Time> = None; + let mut time_of_last_write: Option<Time> = None; let mut query_chunks = vec![]; let mut delete_predicates_before: HashSet<Arc<DeletePredicate>> = HashSet::new(); let mut min_order = ChunkOrder::MAX; @@ -561,10 +561,16 @@ mod tests { // check object store delete predicates let metric_registry = Arc::new(metric::Registry::new()); let config = PreservedCatalogConfig::new(Arc::clone(&db.iox_object_store)); - let (_preserved_catalog, catalog, _replay_plan) = - load_or_create_preserved_catalog(db_name, config, metric_registry, false, false) - .await - .unwrap(); + let (_preserved_catalog, catalog, _replay_plan) = load_or_create_preserved_catalog( + db_name, + config, + metric_registry, + Arc::clone(&db.time_provider), + false, + false, + ) + .await + .unwrap(); check_closure(&catalog); } } diff --git a/server/src/db/lifecycle/write.rs b/server/src/db/lifecycle/write.rs index 9b1213ebc6..106a2a4ff7 100644 --- a/server/src/db/lifecycle/write.rs +++ b/server/src/db/lifecycle/write.rs @@ -125,7 +125,7 @@ pub(super) fn write_chunk_to_object_store( // IMPORTANT: Writing must take place while holding the cleanup lock, otherwise the file might be deleted // between creation and the transaction commit. let metadata = IoxMetadata { - creation_timestamp: db.utc_now(), + creation_timestamp: db.time_provider.now(), table_name: Arc::clone(&table_name), partition_key: Arc::clone(&partition_key), chunk_id: addr.chunk_id, diff --git a/server/src/db/load.rs b/server/src/db/load.rs index c7e252c225..923b6c7e5d 100644 --- a/server/src/db/load.rs +++ b/server/src/db/load.rs @@ -19,6 +19,7 @@ use persistence_windows::checkpoint::{ReplayPlan, ReplayPlanner}; use predicate::delete_predicate::DeletePredicate; use snafu::{ResultExt, Snafu}; use std::sync::Arc; +use time::TimeProvider; #[derive(Debug, Snafu)] pub enum Error { @@ -54,6 +55,7 @@ pub async fn load_or_create_preserved_catalog( db_name: &str, config: PreservedCatalogConfig, metric_registry: Arc<::metric::Registry>, + time_provider: Arc<dyn TimeProvider>, wipe_on_error: bool, skip_replay: bool, ) -> Result<(PreservedCatalog, Catalog, Option<ReplayPlan>)> { @@ -61,7 +63,11 @@ pub async fn load_or_create_preserved_catalog( match PreservedCatalog::load( db_name, config.clone(), - LoaderEmptyInput::new(Arc::clone(&metric_registry), skip_replay), + LoaderEmptyInput::new( + Arc::clone(&metric_registry), + Arc::clone(&time_provider), + skip_replay, + ), ) .await { @@ -84,7 +90,8 @@ pub async fn load_or_create_preserved_catalog( db_name ); - create_preserved_catalog(db_name, config, metric_registry, skip_replay).await + create_preserved_catalog(db_name, config, metric_registry, time_provider, skip_replay) + .await } Err(e) => { if wipe_on_error { @@ -96,7 +103,14 @@ pub async fn load_or_create_preserved_catalog( .await .context(CannotWipeCatalog)?; - create_preserved_catalog(db_name, config, metric_registry, skip_replay).await + create_preserved_catalog( + db_name, + config, + metric_registry, + time_provider, + skip_replay, + ) + .await } else { Err(Error::CannotLoadCatalog { source: e }) } @@ -111,12 +125,13 @@ pub async fn create_preserved_catalog( db_name: &str, config: PreservedCatalogConfig, metric_registry: Arc<metric::Registry>, + time_provider: Arc<dyn TimeProvider>, skip_replay: bool, ) -> Result<(PreservedCatalog, Catalog, Option<ReplayPlan>)> { let (preserved_catalog, loader) = PreservedCatalog::new_empty( db_name, config, - LoaderEmptyInput::new(metric_registry, skip_replay), + LoaderEmptyInput::new(metric_registry, time_provider, skip_replay), ) .await .context(CannotCreateCatalog)?; @@ -135,13 +150,19 @@ pub async fn create_preserved_catalog( #[derive(Debug)] struct LoaderEmptyInput { metric_registry: Arc<::metric::Registry>, + time_provider: Arc<dyn TimeProvider>, skip_replay: bool, } impl LoaderEmptyInput { - fn new(metric_registry: Arc<metric::Registry>, skip_replay: bool) -> Self { + fn new( + metric_registry: Arc<metric::Registry>, + time_provider: Arc<dyn TimeProvider>, + skip_replay: bool, + ) -> Self { Self { metric_registry, + time_provider, skip_replay, } } @@ -159,8 +180,14 @@ impl CatalogState for Loader { type EmptyInput = LoaderEmptyInput; fn new_empty(db_name: &str, data: Self::EmptyInput) -> Self { + let catalog = Catalog::new( + Arc::from(db_name), + Arc::clone(&data.metric_registry), + Arc::clone(&data.time_provider), + ); + Self { - catalog: Catalog::new(Arc::from(db_name), Arc::clone(&data.metric_registry)), + catalog, planner: (!data.skip_replay).then(ReplayPlanner::new), metric_registry: Arc::new(Default::default()), } @@ -306,6 +333,7 @@ mod tests { #[tokio::test] async fn load_or_create_preserved_catalog_recovers_from_error() { let object_store = Arc::new(ObjectStore::new_in_memory()); + let time_provider: Arc<dyn TimeProvider> = Arc::new(time::SystemProvider::new()); let server_id = ServerId::try_from(1).unwrap(); let db_name = DatabaseName::new("preserved_catalog_test").unwrap(); let iox_object_store = Arc::new( @@ -319,9 +347,16 @@ mod tests { parquet_file::catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog) .await; - load_or_create_preserved_catalog(&db_name, config, Default::default(), true, false) - .await - .unwrap(); + load_or_create_preserved_catalog( + &db_name, + config, + Default::default(), + time_provider, + true, + false, + ) + .await + .unwrap(); } fn checkpoint_data_from_loader(loader: &Loader) -> CheckpointData { @@ -332,6 +367,7 @@ mod tests { async fn test_catalog_state() { let empty_input = LoaderEmptyInput { metric_registry: Default::default(), + time_provider: Arc::new(time::SystemProvider::new()), skip_replay: false, }; assert_catalog_state_implementation::<Loader, _>(empty_input, checkpoint_data_from_loader) diff --git a/server/src/db/replay.rs b/server/src/db/replay.rs index e2e9d1d6b5..d77b29277d 100644 --- a/server/src/db/replay.rs +++ b/server/src/db/replay.rs @@ -237,7 +237,6 @@ pub async fn perform_replay( |sequence, partition_key, table_batch| { filter_entry(sequence, partition_key, table_batch, replay_plan) }, - db.utc_now(), ) { Ok(_) => { break; diff --git a/server/src/db/system_tables/chunks.rs b/server/src/db/system_tables/chunks.rs index ed06e8f59e..5574d2b05b 100644 --- a/server/src/db/system_tables/chunks.rs +++ b/server/src/db/system_tables/chunks.rs @@ -5,9 +5,9 @@ use arrow::{ error::Result, record_batch::RecordBatch, }; -use chrono::{DateTime, Utc}; use data_types::{chunk_metadata::ChunkSummary, error::ErrorLogger}; use std::sync::Arc; +use time::Time; /// Implementation of system.chunks table #[derive(Debug)] @@ -55,11 +55,11 @@ fn chunk_summaries_schema() -> SchemaRef { } // TODO: Use a custom proc macro or serde to reduce the boilerplate -fn optional_time_to_ts(time: Option<DateTime<Utc>>) -> Option<i64> { +fn optional_time_to_ts(time: Option<Time>) -> Option<i64> { time.and_then(time_to_ts) } -fn time_to_ts(ts: DateTime<Utc>) -> Option<i64> { +fn time_to_ts(ts: Time) -> Option<i64> { Some(ts.timestamp_nanos()) } @@ -139,7 +139,6 @@ fn from_chunk_summaries(schema: SchemaRef, chunks: Vec<ChunkSummary>) -> Result< mod tests { use super::*; use arrow_util::assert_batches_eq; - use chrono::{TimeZone, Utc}; use data_types::chunk_metadata::{ChunkId, ChunkLifecycleAction, ChunkOrder, ChunkStorage}; #[test] @@ -155,8 +154,8 @@ mod tests { object_store_bytes: 0, row_count: 11, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(10_000_000_000), - time_of_last_write: Utc.timestamp_nanos(10_000_000_000), + time_of_first_write: Time::from_timestamp_nanos(10_000_000_000), + time_of_last_write: Time::from_timestamp_nanos(10_000_000_000), order: ChunkOrder::new(5).unwrap(), }, ChunkSummary { @@ -168,9 +167,9 @@ mod tests { memory_bytes: 23455, object_store_bytes: 0, row_count: 22, - time_of_last_access: Some(Utc.timestamp_nanos(754_000_000_000)), - time_of_first_write: Utc.timestamp_nanos(80_000_000_000), - time_of_last_write: Utc.timestamp_nanos(80_000_000_000), + time_of_last_access: Some(Time::from_timestamp_nanos(754_000_000_000)), + time_of_first_write: Time::from_timestamp_nanos(80_000_000_000), + time_of_last_write: Time::from_timestamp_nanos(80_000_000_000), order: ChunkOrder::new(6).unwrap(), }, ChunkSummary { @@ -182,9 +181,9 @@ mod tests { memory_bytes: 1234, object_store_bytes: 5678, row_count: 33, - time_of_last_access: Some(Utc.timestamp_nanos(5_000_000_000)), - time_of_first_write: Utc.timestamp_nanos(100_000_000_000), - time_of_last_write: Utc.timestamp_nanos(200_000_000_000), + time_of_last_access: Some(Time::from_timestamp_nanos(5_000_000_000)), + time_of_first_write: Time::from_timestamp_nanos(100_000_000_000), + time_of_last_write: Time::from_timestamp_nanos(200_000_000_000), order: ChunkOrder::new(7).unwrap(), }, ]; diff --git a/server/src/db/system_tables/columns.rs b/server/src/db/system_tables/columns.rs index 130d96b229..1ebb0ac3ae 100644 --- a/server/src/db/system_tables/columns.rs +++ b/server/src/db/system_tables/columns.rs @@ -218,11 +218,11 @@ fn assemble_chunk_columns( mod tests { use super::*; use arrow_util::assert_batches_eq; - use chrono::{TimeZone, Utc}; use data_types::{ chunk_metadata::{ChunkColumnSummary, ChunkId, ChunkOrder, ChunkStorage, ChunkSummary}, partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics}, }; + use time::Time; #[test] fn test_from_partition_summaries() { @@ -318,8 +318,8 @@ mod tests { object_store_bytes: 0, row_count: 11, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(1), - time_of_last_write: Utc.timestamp_nanos(2), + time_of_first_write: Time::from_timestamp_nanos(1), + time_of_last_write: Time::from_timestamp_nanos(2), order: ChunkOrder::new(5).unwrap(), }, columns: vec![ @@ -354,8 +354,8 @@ mod tests { object_store_bytes: 0, row_count: 11, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(1), - time_of_last_write: Utc.timestamp_nanos(2), + time_of_first_write: Time::from_timestamp_nanos(1), + time_of_last_write: Time::from_timestamp_nanos(2), order: ChunkOrder::new(6).unwrap(), }, columns: vec![ChunkColumnSummary { @@ -384,8 +384,8 @@ mod tests { object_store_bytes: 0, row_count: 11, time_of_last_access: None, - time_of_first_write: Utc.timestamp_nanos(1), - time_of_last_write: Utc.timestamp_nanos(2), + time_of_first_write: Time::from_timestamp_nanos(1), + time_of_last_write: Time::from_timestamp_nanos(2), order: ChunkOrder::new(5).unwrap(), }, columns: vec![ChunkColumnSummary { diff --git a/server/src/lib.rs b/server/src/lib.rs index 448ad9da17..53c41031ed 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -69,7 +69,6 @@ )] use async_trait::async_trait; -use chrono::Utc; use data_types::{ chunk_metadata::ChunkId, database_rules::{NodeGroup, RoutingRules, ShardId, Sink}, @@ -918,7 +917,7 @@ where use database::WriteError; self.active_database(db_name)? - .write_entry(entry, Utc::now()) + .write_entry(entry) .await .map_err(|e| match e { WriteError::NotInitialized { .. } => Error::DatabaseNotInitialized { diff --git a/server/src/utils.rs b/server/src/utils.rs index c7caa73e85..664e18c67c 100644 --- a/server/src/utils.rs +++ b/server/src/utils.rs @@ -96,6 +96,7 @@ impl TestDbBuilder { db_name.as_str(), config, Arc::clone(&metric_registry), + Arc::clone(&time_provider), false, false, ) diff --git a/server/src/write_buffer.rs b/server/src/write_buffer.rs index 67c925d51f..d0c2e691c0 100644 --- a/server/src/write_buffer.rs +++ b/server/src/write_buffer.rs @@ -2,7 +2,6 @@ use std::future::Future; use std::sync::Arc; use std::time::{Duration, Instant}; -use chrono::Utc; use futures::future::{BoxFuture, Shared}; use futures::stream::{BoxStream, FuturesUnordered}; use futures::{FutureExt, StreamExt, TryFutureExt}; @@ -155,7 +154,6 @@ async fn stream_in_sequenced_entries<'a>( match db.store_sequenced_entry( Arc::clone(&sequenced_entry), crate::db::filter_table_batch_keep_all, - Utc::now(), ) { Ok(_) => { metrics.success(); From c31bcbced542d247f119c62f6763fbde18148ed0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb <alamb@influxdata.com> Date: Tue, 12 Oct 2021 10:28:41 -0400 Subject: [PATCH 09/17] feat: Translate null in fields correctly to timeseries (Frames) (#2799) * feat: Add gRPC frame dumping to trace output * feat: Translate null in fields correctly to timeseries (Frames) * refactor: reduce ceremony of iterating over data * docs: Update src/influxdb_ioxd/rpc/storage/data.rs * refactor: rename macro * refactor: use .then() * refactor: use try_for_each --- src/influxdb_ioxd/rpc/storage/data.rs | 387 ++++++++++++++++---------- 1 file changed, 244 insertions(+), 143 deletions(-) diff --git a/src/influxdb_ioxd/rpc/storage/data.rs b/src/influxdb_ioxd/rpc/storage/data.rs index 2658216b90..6bc7a008a7 100644 --- a/src/influxdb_ioxd/rpc/storage/data.rs +++ b/src/influxdb_ioxd/rpc/storage/data.rs @@ -1,16 +1,18 @@ //! This module contains code to translate from InfluxDB IOx data //! formats into the formats needed by gRPC -use std::{collections::BTreeSet, sync::Arc}; +use std::{collections::BTreeSet, fmt, sync::Arc}; use arrow::{ array::{ ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, TimestampNanosecondArray, UInt64Array, }, + bitmap::Bitmap, datatypes::DataType as ArrowDataType, }; +use observability_deps::tracing::trace; use query::exec::{ field::FieldIndex, fieldlist::FieldList, @@ -31,11 +33,11 @@ use snafu::Snafu; #[derive(Debug, Snafu)] pub enum Error { - #[snafu(display("Unsupported data type in gRPC data translation: {}", type_name))] - UnsupportedDataType { type_name: String }, + #[snafu(display("Unsupported data type in gRPC data translation: {}", data_type))] + UnsupportedDataType { data_type: ArrowDataType }, - #[snafu(display("Unsupported field data type in gRPC data translation: {}", type_name))] - UnsupportedFieldType { type_name: String }, + #[snafu(display("Unsupported field data type in gRPC data translation: {}", data_type))] + UnsupportedFieldType { data_type: ArrowDataType }, } pub type Result<T, E = Error> = std::result::Result<T, E>; @@ -101,6 +103,7 @@ pub fn series_set_item_to_read_response(series_set_item: SeriesSetItem) -> Resul } SeriesSetItem::Data(series_set) => series_set_to_frames(series_set)?, }; + trace!(frames=%DisplayableFrames::new(&frames), "Response gRPC frames"); Ok(ReadResponse { frames }) } @@ -145,7 +148,7 @@ fn data_type(array: &ArrayRef) -> Result<DataType> { ArrowDataType::UInt64 => Ok(DataType::Unsigned), ArrowDataType::Boolean => Ok(DataType::Boolean), _ => UnsupportedDataType { - type_name: format!("{:?}", array.data_type()), + data_type: array.data_type().clone(), } .fail(), } @@ -189,12 +192,16 @@ fn field_to_data( }; frames.push(Data::Series(series_frame)); + // Only take timestamps (and values) from the rows that have non + // null values for this field + let valid = array.data().null_bitmap().as_ref(); + let timestamps = batch .column(indexes.timestamp_index) .as_any() .downcast_ref::<TimestampNanosecondArray>() .unwrap() - .extract_values(start_row, num_rows); + .extract_values(start_row, num_rows, valid); frames.push(match array.data_type() { ArrowDataType::Utf8 => { @@ -202,7 +209,7 @@ fn field_to_data( .as_any() .downcast_ref::<StringArray>() .unwrap() - .extract_values(start_row, num_rows); + .extract_values(start_row, num_rows, valid); Data::StringPoints(StringPointsFrame { timestamps, values }) } ArrowDataType::Float64 => { @@ -210,7 +217,8 @@ fn field_to_data( .as_any() .downcast_ref::<Float64Array>() .unwrap() - .extract_values(start_row, num_rows); + .extract_values(start_row, num_rows, valid); + Data::FloatPoints(FloatPointsFrame { timestamps, values }) } ArrowDataType::Int64 => { @@ -218,7 +226,7 @@ fn field_to_data( .as_any() .downcast_ref::<Int64Array>() .unwrap() - .extract_values(start_row, num_rows); + .extract_values(start_row, num_rows, valid); Data::IntegerPoints(IntegerPointsFrame { timestamps, values }) } ArrowDataType::UInt64 => { @@ -226,7 +234,7 @@ fn field_to_data( .as_any() .downcast_ref::<UInt64Array>() .unwrap() - .extract_values(start_row, num_rows); + .extract_values(start_row, num_rows, valid); Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values }) } ArrowDataType::Boolean => { @@ -234,12 +242,12 @@ fn field_to_data( .as_any() .downcast_ref::<BooleanArray>() .unwrap() - .extract_values(start_row, num_rows); + .extract_values(start_row, num_rows, valid); Data::BooleanPoints(BooleanPointsFrame { timestamps, values }) } _ => { return UnsupportedDataType { - type_name: format!("{:?}", array.data_type()), + data_type: array.data_type().clone(), } .fail(); } @@ -275,52 +283,68 @@ fn convert_tags(table_name: &str, field_name: &str, tags: &[(Arc<str>, Arc<str>) } trait ExtractValues<T> { - /// Extracts num_rows of data starting from start_row as a vector - fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<T>; + /// Extracts num_rows of data starting from start_row as a vector, + /// for all rows `i` where `valid[i]` is set + fn extract_values(&self, start_row: usize, num_rows: usize, valid: Option<&Bitmap>) -> Vec<T>; +} + +/// Implements extract_values for a particular type of array that +macro_rules! extract_values_impl { + ($DATA_TYPE:ty) => { + fn extract_values( + &self, + start_row: usize, + num_rows: usize, + valid: Option<&Bitmap>, + ) -> Vec<$DATA_TYPE> { + let end_row = start_row + num_rows; + match valid { + Some(valid) => (start_row..end_row) + .filter_map(|row| valid.is_set(row).then(|| self.value(row))) + .collect(), + None => (start_row..end_row).map(|row| self.value(row)).collect(), + } + } + }; } impl ExtractValues<String> for StringArray { - fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<String> { + fn extract_values( + &self, + start_row: usize, + num_rows: usize, + valid: Option<&Bitmap>, + ) -> Vec<String> { let end_row = start_row + num_rows; - (start_row..end_row) - .map(|row| self.value(row).to_string()) - .collect() + match valid { + Some(valid) => (start_row..end_row) + .filter_map(|row| valid.is_set(row).then(|| self.value(row).to_string())) + .collect(), + None => (start_row..end_row) + .map(|row| self.value(row).to_string()) + .collect(), + } } } impl ExtractValues<i64> for Int64Array { - fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<i64> { - let end_row = start_row + num_rows; - (start_row..end_row).map(|row| self.value(row)).collect() - } + extract_values_impl! {i64} } impl ExtractValues<u64> for UInt64Array { - fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<u64> { - let end_row = start_row + num_rows; - (start_row..end_row).map(|row| self.value(row)).collect() - } + extract_values_impl! {u64} } impl ExtractValues<f64> for Float64Array { - fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<f64> { - let end_row = start_row + num_rows; - (start_row..end_row).map(|row| self.value(row)).collect() - } + extract_values_impl! {f64} } impl ExtractValues<bool> for BooleanArray { - fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<bool> { - let end_row = start_row + num_rows; - (start_row..end_row).map(|row| self.value(row)).collect() - } + extract_values_impl! {bool} } impl ExtractValues<i64> for TimestampNanosecondArray { - fn extract_values(&self, start_row: usize, num_rows: usize) -> Vec<i64> { - let end_row = start_row + num_rows; - (start_row..end_row).map(|row| self.value(row)).collect() - } + extract_values_impl! {i64} } /// Translates FieldList into the gRPC format @@ -350,12 +374,116 @@ fn datatype_to_measurement_field_enum(data_type: &ArrowDataType) -> Result<Field ArrowDataType::Utf8 => Ok(FieldType::String), ArrowDataType::Boolean => Ok(FieldType::Boolean), _ => UnsupportedFieldType { - type_name: format!("{:?}", data_type), + data_type: data_type.clone(), } .fail(), } } +/// Wrapper struture that implements [`std::fmt::Display`] for a slice +/// of `Frame`s +struct DisplayableFrames<'a> { + frames: &'a [Frame], +} + +impl<'a> DisplayableFrames<'a> { + fn new(frames: &'a [Frame]) -> Self { + Self { frames } + } +} + +impl<'a> fmt::Display for DisplayableFrames<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.frames.iter().try_for_each(|frame| { + format_frame(frame, f)?; + writeln!(f) + }) + } +} + +fn format_frame(frame: &Frame, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let data = &frame.data; + match data { + Some(Data::Series(SeriesFrame { tags, data_type })) => write!( + f, + "SeriesFrame, tags: {}, type: {:?}", + dump_tags(tags), + data_type + ), + Some(Data::FloatPoints(FloatPointsFrame { timestamps, values })) => write!( + f, + "FloatPointsFrame, timestamps: {:?}, values: {:?}", + timestamps, + dump_values(values) + ), + Some(Data::IntegerPoints(IntegerPointsFrame { timestamps, values })) => write!( + f, + "IntegerPointsFrame, timestamps: {:?}, values: {:?}", + timestamps, + dump_values(values) + ), + Some(Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values })) => write!( + f, + "UnsignedPointsFrame, timestamps: {:?}, values: {:?}", + timestamps, + dump_values(values) + ), + Some(Data::BooleanPoints(BooleanPointsFrame { timestamps, values })) => write!( + f, + "BooleanPointsFrame, timestamps: {:?}, values: {}", + timestamps, + dump_values(values) + ), + Some(Data::StringPoints(StringPointsFrame { timestamps, values })) => write!( + f, + "StringPointsFrame, timestamps: {:?}, values: {}", + timestamps, + dump_values(values) + ), + Some(Data::Group(GroupFrame { + tag_keys, + partition_key_vals, + })) => write!( + f, + "GroupFrame, tag_keys: {}, partition_key_vals: {}", + dump_u8_vec(tag_keys), + dump_u8_vec(partition_key_vals) + ), + None => write!(f, "<NO data field>"), + } +} + +fn dump_values<T>(v: &[T]) -> String +where + T: std::fmt::Display, +{ + v.iter() + .map(|item| format!("{}", item)) + .collect::<Vec<_>>() + .join(",") +} + +fn dump_u8_vec(encoded_strings: &[Vec<u8>]) -> String { + encoded_strings + .iter() + .map(|b| String::from_utf8_lossy(b)) + .collect::<Vec<_>>() + .join(",") +} + +fn dump_tags(tags: &[Tag]) -> String { + tags.iter() + .map(|tag| { + format!( + "{}={}", + String::from_utf8_lossy(&tag.key), + String::from_utf8_lossy(&tag.value), + ) + }) + .collect::<Vec<_>>() + .join(",") +} + #[cfg(test)] mod tests { use arrow::{datatypes::DataType as ArrowDataType, record_batch::RecordBatch}; @@ -409,11 +537,7 @@ mod tests { let response = series_set_to_read_response(series_set).expect("Correctly converted series set"); - let dumped_frames = response - .frames - .iter() - .map(|f| dump_frame(f)) - .collect::<Vec<_>>(); + let dumped_frames = dump_frames(&response.frames); let expected_frames = vec![ "SeriesFrame, tags: _field=string_field,_measurement=the_table,tag1=val1, type: 4", @@ -465,11 +589,7 @@ mod tests { let response = series_set_to_read_response(series_set).expect("Correctly converted series set"); - let dumped_frames = response - .frames - .iter() - .map(|f| dump_frame(f)) - .collect::<Vec<_>>(); + let dumped_frames = dump_frames(&response.frames); let expected_frames = vec![ "SeriesFrame, tags: _field=string_field2,_measurement=the_table,tag1=val1, type: 4", @@ -486,7 +606,7 @@ mod tests { } #[test] - fn test_series_set_conversion_with_null_field() { + fn test_series_set_conversion_with_entirely_null_field() { // single series let tag_array: ArrayRef = Arc::new(StringArray::from(vec!["MA", "MA", "MA", "MA"])); let int_array: ArrayRef = Arc::new(Int64Array::from(vec![None, None, None, None])); @@ -525,15 +645,71 @@ mod tests { let response = series_set_to_read_response(series_set).expect("Correctly converted series set"); - let dumped_frames = response - .frames - .iter() - .map(|f| dump_frame(f)) - .collect::<Vec<_>>(); + let dumped_frames = dump_frames(&response.frames); let expected_frames = vec![ "SeriesFrame, tags: _field=float_field,_measurement=the_table,state=MA, type: 0", - "FloatPointsFrame, timestamps: [1000, 2000, 3000, 4000], values: \"10.1,20.1,0,40.1\"", + "FloatPointsFrame, timestamps: [1000, 2000, 4000], values: \"10.1,20.1,40.1\"", + ]; + + assert_eq!( + dumped_frames, expected_frames, + "Expected:\n{:#?}\nActual:\n{:#?}", + expected_frames, dumped_frames + ); + } + + #[test] + fn test_series_set_conversion_with_some_null_fields() { + // single series + let tag_array = StringArray::from(vec!["MA", "MA"]); + let string_array = StringArray::from(vec![None, Some("foo")]); + let float_array = Float64Array::from(vec![None, Some(1.0)]); + let int_array = Int64Array::from(vec![None, Some(-10)]); + let uint_array = UInt64Array::from(vec![None, Some(100)]); + let bool_array = BooleanArray::from(vec![None, Some(true)]); + + let timestamp_array = TimestampNanosecondArray::from_vec(vec![1000, 2000], None); + + let batch = RecordBatch::try_from_iter_with_nullable(vec![ + ("state", Arc::new(tag_array) as ArrayRef, true), + ("srting_field", Arc::new(string_array), true), + ("float_field", Arc::new(float_array), true), + ("int_field", Arc::new(int_array), true), + ("uint_field", Arc::new(uint_array), true), + ("bool_field", Arc::new(bool_array), true), + ("time", Arc::new(timestamp_array), false), + ]) + .expect("created new record batch"); + + let series_set = SeriesSet { + table_name: Arc::from("the_table"), + tags: vec![(Arc::from("state"), Arc::from("MA"))], + field_indexes: FieldIndexes::from_timestamp_and_value_indexes(6, &[1, 2, 3, 4, 5]), + start_row: 0, + num_rows: batch.num_rows(), + batch, + }; + + // Expect only a single series (for the data in float_field, int_field is all + // nulls) + + let response = + series_set_to_read_response(series_set).expect("Correctly converted series set"); + + let dumped_frames = dump_frames(&response.frames); + + let expected_frames = vec![ + "SeriesFrame, tags: _field=srting_field,_measurement=the_table,state=MA, type: 4", + "StringPointsFrame, timestamps: [2000], values: foo", + "SeriesFrame, tags: _field=float_field,_measurement=the_table,state=MA, type: 0", + "FloatPointsFrame, timestamps: [2000], values: \"1\"", + "SeriesFrame, tags: _field=int_field,_measurement=the_table,state=MA, type: 1", + "IntegerPointsFrame, timestamps: [2000], values: \"-10\"", + "SeriesFrame, tags: _field=uint_field,_measurement=the_table,state=MA, type: 2", + "UnsignedPointsFrame, timestamps: [2000], values: \"100\"", + "SeriesFrame, tags: _field=bool_field,_measurement=the_table,state=MA, type: 3", + "BooleanPointsFrame, timestamps: [2000], values: true", ]; assert_eq!( @@ -555,11 +731,7 @@ mod tests { let response = series_set_item_to_read_response(grouped_series_set_item) .expect("Correctly converted grouped_series_set_item"); - let dumped_frames = response - .frames - .iter() - .map(|f| dump_frame(f)) - .collect::<Vec<_>>(); + let dumped_frames = dump_frames(&response.frames); let expected_frames = vec![ "GroupFrame, tag_keys: _field,_measurement,tag1,tag2, partition_key_vals: val1,val2", @@ -600,11 +772,7 @@ mod tests { let response = series_set_item_to_read_response(series_set_item) .expect("Correctly converted series_set_item"); - let dumped_frames = response - .frames - .iter() - .map(|f| dump_frame(f)) - .collect::<Vec<_>>(); + let dumped_frames = dump_frames(&response.frames); let expected_frames = vec![ "SeriesFrame, tags: _field=float_field,_measurement=the_table,tag1=val1, type: 0", @@ -713,82 +881,6 @@ mod tests { } } - fn dump_frame(frame: &Frame) -> String { - let data = &frame.data; - match data { - Some(Data::Series(SeriesFrame { tags, data_type })) => format!( - "SeriesFrame, tags: {}, type: {:?}", - dump_tags(tags), - data_type - ), - Some(Data::FloatPoints(FloatPointsFrame { timestamps, values })) => format!( - "FloatPointsFrame, timestamps: {:?}, values: {:?}", - timestamps, - dump_values(values) - ), - Some(Data::IntegerPoints(IntegerPointsFrame { timestamps, values })) => format!( - "IntegerPointsFrame, timestamps: {:?}, values: {:?}", - timestamps, - dump_values(values) - ), - Some(Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values })) => format!( - "UnsignedPointsFrame, timestamps: {:?}, values: {:?}", - timestamps, - dump_values(values) - ), - Some(Data::BooleanPoints(BooleanPointsFrame { timestamps, values })) => format!( - "BooleanPointsFrame, timestamps: {:?}, values: {}", - timestamps, - dump_values(values) - ), - Some(Data::StringPoints(StringPointsFrame { timestamps, values })) => format!( - "StringPointsFrame, timestamps: {:?}, values: {}", - timestamps, - dump_values(values) - ), - Some(Data::Group(GroupFrame { - tag_keys, - partition_key_vals, - })) => format!( - "GroupFrame, tag_keys: {}, partition_key_vals: {}", - dump_u8_vec(tag_keys), - dump_u8_vec(partition_key_vals), - ), - None => "<NO data field>".into(), - } - } - - fn dump_values<T>(v: &[T]) -> String - where - T: std::fmt::Display, - { - v.iter() - .map(|item| format!("{}", item)) - .collect::<Vec<_>>() - .join(",") - } - - fn dump_u8_vec(encoded_strings: &[Vec<u8>]) -> String { - encoded_strings - .iter() - .map(|b| String::from_utf8_lossy(b)) - .collect::<Vec<_>>() - .join(",") - } - - fn dump_tags(tags: &[Tag]) -> String { - tags.iter() - .map(|tag| { - format!( - "{}={}", - String::from_utf8_lossy(&tag.key), - String::from_utf8_lossy(&tag.value), - ) - }) - .collect::<Vec<_>>() - .join(",") - } - fn make_record_batch() -> RecordBatch { let string_array: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar", "baz", "foo"])); let int_array: ArrayRef = Arc::new(Int64Array::from(vec![1, 2, 3, 4])); @@ -811,4 +903,13 @@ mod tests { ]) .expect("created new record batch") } + + fn dump_frames(frames: &[Frame]) -> Vec<String> { + DisplayableFrames::new(frames) + .to_string() + .trim() + .split('\n') + .map(|s| s.to_string()) + .collect() + } } From b955ecc6a7e22fd699ce175601fbc44b78e667a7 Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Tue, 12 Oct 2021 13:13:58 +0200 Subject: [PATCH 10/17] feat: add format header to Kafka messages This allows us to transition to new formats in the future. --- write_buffer/src/kafka.rs | 107 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 3 deletions(-) diff --git a/write_buffer/src/kafka.rs b/write_buffer/src/kafka.rs index 582387916e..6f7ecb943a 100644 --- a/write_buffer/src/kafka.rs +++ b/write_buffer/src/kafka.rs @@ -19,6 +19,7 @@ use rdkafka::{ client::DefaultClientContext, consumer::{BaseConsumer, Consumer, StreamConsumer}, error::KafkaError, + message::{Headers, OwnedHeaders}, producer::{FutureProducer, FutureRecord}, types::RDKafkaErrorCode, util::Timeout, @@ -31,6 +32,14 @@ use crate::core::{ WriteBufferWriting, }; +/// Message header that determines message format. +pub const HEADER_FORMAT: &str = "IOX:FORMAT"; + +/// Current flatbuffer-based. +/// +/// This is a value for [`HEADER_FORMAT`]. +pub const FORMAT_FLATBUFFER: &str = "iox:flatbuffer_v1"; + pub struct KafkaBufferProducer { conn: String, database_name: String, @@ -73,7 +82,8 @@ impl WriteBufferWriting for KafkaBufferProducer { let record: FutureRecord<'_, String, _> = FutureRecord::to(&self.database_name) .payload(entry.data()) .partition(partition) - .timestamp(timestamp_millis); + .timestamp(timestamp_millis) + .headers(OwnedHeaders::new().add(HEADER_FORMAT, FORMAT_FLATBUFFER)); debug!(db_name=%self.database_name, partition, size=entry.data().len(), "writing to kafka"); @@ -175,7 +185,28 @@ impl WriteBufferReading for KafkaBufferConsumer { .stream() .map(move |message| { let message = message?; - let entry = Entry::try_from(message.payload().unwrap().to_vec())?; + + let format = message.headers().map(|headers| { + for i in 0..headers.count() { + if let Some((name, value)) = headers.get(i) { + if name == HEADER_FORMAT { + return String::from_utf8(value.to_vec()).ok() + } + } + } + + None + }).flatten(); + // Fallback for now https://github.com/influxdata/influxdb_iox/issues/2805 + let format = format.unwrap_or_else(|| FORMAT_FLATBUFFER.to_string()); + if format != FORMAT_FLATBUFFER { + return Err(format!("Unknown message format: {}", format).into()); + } + + let payload = message.payload().ok_or_else::<WriteBufferError, _>(|| { + "Payload missing".to_string().into() + })?; + let entry = Entry::try_from(payload.to_vec())?; // Timestamps were added as part of // [KIP-32](https://cwiki.apache.org/confluence/display/KAFKA/KIP-32+-+Add+timestamps+to+Kafka+message). @@ -550,8 +581,12 @@ mod tests { }; use time::TimeProvider; + use entry::test_helpers::lp_to_entry; + use crate::{ - core::test_utils::{perform_generic_tests, TestAdapter, TestContext}, + core::test_utils::{ + map_pop_first, perform_generic_tests, set_pop_first, TestAdapter, TestContext, + }, kafka::test_utils::random_kafka_topic, maybe_skip_kafka_integration, }; @@ -647,6 +682,7 @@ mod tests { async fn topic_create_twice() { let conn = maybe_skip_kafka_integration!(); let database_name = random_kafka_topic(); + create_kafka_topic( &conn, &database_name, @@ -655,6 +691,7 @@ mod tests { ) .await .unwrap(); + create_kafka_topic( &conn, &database_name, @@ -664,4 +701,68 @@ mod tests { .await .unwrap(); } + + #[tokio::test] + async fn error_no_payload() { + let conn = maybe_skip_kafka_integration!(); + let adapter = KafkaTestAdapter::new(conn); + let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await; + + let writer = ctx.writing(true).await.unwrap(); + let partition = set_pop_first(&mut writer.sequencer_ids()).unwrap() as i32; + let record: FutureRecord<'_, String, [u8]> = + FutureRecord::to(&writer.database_name).partition(partition); + writer.producer.send(record, Timeout::Never).await.unwrap(); + + let mut reader = ctx.reading(true).await.unwrap(); + let mut streams = reader.streams(); + assert_eq!(streams.len(), 1); + let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); + let err = stream.stream.next().await.unwrap().unwrap_err(); + assert_eq!(err.to_string(), "Payload missing"); + } + + #[tokio::test] + async fn format_header_missing() { + let conn = maybe_skip_kafka_integration!(); + let adapter = KafkaTestAdapter::new(conn); + let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await; + + let writer = ctx.writing(true).await.unwrap(); + let partition = set_pop_first(&mut writer.sequencer_ids()).unwrap() as i32; + let entry = lp_to_entry("upc,region=east user=1 100"); + let record: FutureRecord<'_, String, _> = FutureRecord::to(&writer.database_name) + .payload(entry.data()) + .partition(partition); + writer.producer.send(record, Timeout::Never).await.unwrap(); + + let mut reader = ctx.reading(true).await.unwrap(); + let mut streams = reader.streams(); + assert_eq!(streams.len(), 1); + let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); + stream.stream.next().await.unwrap().unwrap(); + } + + #[tokio::test] + async fn format_header_unknown() { + let conn = maybe_skip_kafka_integration!(); + let adapter = KafkaTestAdapter::new(conn); + let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await; + + let writer = ctx.writing(true).await.unwrap(); + let partition = set_pop_first(&mut writer.sequencer_ids()).unwrap() as i32; + let entry = lp_to_entry("upc,region=east user=1 100"); + let record: FutureRecord<'_, String, _> = FutureRecord::to(&writer.database_name) + .payload(entry.data()) + .partition(partition) + .headers(OwnedHeaders::new().add(HEADER_FORMAT, "foo")); + writer.producer.send(record, Timeout::Never).await.unwrap(); + + let mut reader = ctx.reading(true).await.unwrap(); + let mut streams = reader.streams(); + assert_eq!(streams.len(), 1); + let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap(); + let err = stream.stream.next().await.unwrap().unwrap_err(); + assert_eq!(err.to_string(), "Unknown message format: foo"); + } } From 90d2f1d60de2e097a00b952a9ecc6cf983c99496 Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Tue, 12 Oct 2021 16:31:00 +0200 Subject: [PATCH 11/17] refactor: use Kafka headers similar to HTTP --- write_buffer/src/kafka.rs | 115 ++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 22 deletions(-) diff --git a/write_buffer/src/kafka.rs b/write_buffer/src/kafka.rs index 6f7ecb943a..f2fc7be33c 100644 --- a/write_buffer/src/kafka.rs +++ b/write_buffer/src/kafka.rs @@ -32,13 +32,69 @@ use crate::core::{ WriteBufferWriting, }; -/// Message header that determines message format. -pub const HEADER_FORMAT: &str = "IOX:FORMAT"; +/// Message header that determines message content type. +pub const HEADER_CONTENT_TYPE: &str = "content-type"; -/// Current flatbuffer-based. +/// Current flatbuffer-based content type. /// -/// This is a value for [`HEADER_FORMAT`]. -pub const FORMAT_FLATBUFFER: &str = "iox:flatbuffer_v1"; +/// This is a value for [`HEADER_CONTENT_TYPE`]. +/// +/// Inspired by: +/// - <https://stackoverflow.com/a/56502135> +/// - <https://stackoverflow.com/a/48051331> +pub const CONTENT_TYPE_FLATBUFFER: &str = + r#"application/x-flatbuffers; schema="influxdata.iox.write.v1.Entry""#; + +/// IOx-specific headers attached to every Kafka message. +#[derive(Debug, PartialEq)] +struct IoxHeaders { + content_type: Option<String>, +} + +impl IoxHeaders { + /// Create new headers with sane default values. + fn new() -> Self { + Self { + content_type: Some(CONTENT_TYPE_FLATBUFFER.to_string()), + } + } + + /// Create new headers where all information is missing. + fn empty() -> Self { + Self { content_type: None } + } +} + +impl<H> From<&H> for IoxHeaders +where + H: Headers, +{ + fn from(headers: &H) -> Self { + let mut res = Self { content_type: None }; + + for i in 0..headers.count() { + if let Some((name, value)) = headers.get(i) { + if name.eq_ignore_ascii_case(HEADER_CONTENT_TYPE) { + res.content_type = String::from_utf8(value.to_vec()).ok(); + } + } + } + + res + } +} + +impl From<&IoxHeaders> for OwnedHeaders { + fn from(iox_headers: &IoxHeaders) -> Self { + let mut res = Self::new(); + + if let Some(content_type) = iox_headers.content_type.as_ref() { + res = res.add(HEADER_CONTENT_TYPE, content_type); + } + + res + } +} pub struct KafkaBufferProducer { conn: String, @@ -77,13 +133,15 @@ impl WriteBufferWriting for KafkaBufferProducer { let timestamp_millis = date_time.timestamp_millis(); let timestamp = Time::from_timestamp_millis(timestamp_millis); + let headers = IoxHeaders::new(); + // This type annotation is necessary because `FutureRecord` is generic over key type, but // key is optional and we're not setting a key. `String` is arbitrary. let record: FutureRecord<'_, String, _> = FutureRecord::to(&self.database_name) .payload(entry.data()) .partition(partition) .timestamp(timestamp_millis) - .headers(OwnedHeaders::new().add(HEADER_FORMAT, FORMAT_FLATBUFFER)); + .headers((&headers).into()); debug!(db_name=%self.database_name, partition, size=entry.data().len(), "writing to kafka"); @@ -186,21 +244,12 @@ impl WriteBufferReading for KafkaBufferConsumer { .map(move |message| { let message = message?; - let format = message.headers().map(|headers| { - for i in 0..headers.count() { - if let Some((name, value)) = headers.get(i) { - if name == HEADER_FORMAT { - return String::from_utf8(value.to_vec()).ok() - } - } - } + let headers: IoxHeaders = message.headers().map(|headers| headers.into()).unwrap_or_else(IoxHeaders::empty); - None - }).flatten(); // Fallback for now https://github.com/influxdata/influxdb_iox/issues/2805 - let format = format.unwrap_or_else(|| FORMAT_FLATBUFFER.to_string()); - if format != FORMAT_FLATBUFFER { - return Err(format!("Unknown message format: {}", format).into()); + let content_type = headers.content_type.unwrap_or_else(|| CONTENT_TYPE_FLATBUFFER.to_string()); + if content_type != CONTENT_TYPE_FLATBUFFER { + return Err(format!("Unknown message format: {}", content_type).into()); } let payload = message.payload().ok_or_else::<WriteBufferError, _>(|| { @@ -723,7 +772,8 @@ mod tests { } #[tokio::test] - async fn format_header_missing() { + async fn content_type_header_missing() { + // Fallback for now https://github.com/influxdata/influxdb_iox/issues/2805 let conn = maybe_skip_kafka_integration!(); let adapter = KafkaTestAdapter::new(conn); let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await; @@ -744,7 +794,7 @@ mod tests { } #[tokio::test] - async fn format_header_unknown() { + async fn content_type_header_unknown() { let conn = maybe_skip_kafka_integration!(); let adapter = KafkaTestAdapter::new(conn); let ctx = adapter.new_context(NonZeroU32::new(1).unwrap()).await; @@ -755,7 +805,7 @@ mod tests { let record: FutureRecord<'_, String, _> = FutureRecord::to(&writer.database_name) .payload(entry.data()) .partition(partition) - .headers(OwnedHeaders::new().add(HEADER_FORMAT, "foo")); + .headers(OwnedHeaders::new().add(HEADER_CONTENT_TYPE, "foo")); writer.producer.send(record, Timeout::Never).await.unwrap(); let mut reader = ctx.reading(true).await.unwrap(); @@ -765,4 +815,25 @@ mod tests { let err = stream.stream.next().await.unwrap().unwrap_err(); assert_eq!(err.to_string(), "Unknown message format: foo"); } + + #[test] + fn headers_roundtrip() { + let iox_headers1 = IoxHeaders::new(); + let kafka_headers: OwnedHeaders = (&iox_headers1).into(); + let iox_headers2: IoxHeaders = (&kafka_headers).into(); + assert_eq!(iox_headers1, iox_headers2); + } + + #[test] + fn headers_case_handling() { + let kafka_headers = OwnedHeaders::new() + .add("content-type", "a") + .add("CONTENT-TYPE", "b") + .add("content-TYPE", "c"); + let actual: IoxHeaders = (&kafka_headers).into(); + let expected = IoxHeaders { + content_type: Some("c".to_string()), + }; + assert_eq!(actual, expected); + } } From 173f9aefcf1d8b9d1b8004f2ee11de0ec05f97a6 Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Tue, 12 Oct 2021 11:10:40 +0200 Subject: [PATCH 12/17] feat: ability to link other spans in span context This can be used when aggregating from multiple parent spans, e.g. when we want to implement #1473. --- trace/src/ctx.rs | 7 +++++ trace_exporters/src/jaeger.rs | 13 +++++++++ trace_exporters/src/jaeger/span.rs | 42 +++++++++++++++++++++++++----- trace_http/src/ctx.rs | 8 ++++++ 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/trace/src/ctx.rs b/trace/src/ctx.rs index 22a7739044..e3cb155aac 100644 --- a/trace/src/ctx.rs +++ b/trace/src/ctx.rs @@ -51,6 +51,11 @@ pub struct SpanContext { pub span_id: SpanId, + /// Link to other spans, can be cross-trace if this span aggregates multiple spans. + /// + /// See <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/overview.md#links-between-spans>. + pub links: Vec<(TraceId, SpanId)>, + pub collector: Option<Arc<dyn TraceCollector>>, } @@ -67,6 +72,7 @@ impl SpanContext { trace_id: TraceId(NonZeroU128::new(trace_id).unwrap()), parent_span_id: None, span_id: SpanId(NonZeroU64::new(span_id).unwrap()), + links: Vec::with_capacity(0), collector: Some(collector), } } @@ -79,6 +85,7 @@ impl SpanContext { trace_id: self.trace_id, span_id: SpanId::gen(), collector: self.collector.clone(), + links: Vec::with_capacity(0), parent_span_id: Some(self.span_id), }, start: None, diff --git a/trace_exporters/src/jaeger.rs b/trace_exporters/src/jaeger.rs index 6ae8427909..da05196d67 100644 --- a/trace_exporters/src/jaeger.rs +++ b/trace_exporters/src/jaeger.rs @@ -236,9 +236,14 @@ mod tests { trace_id: TraceId::new(43434).unwrap(), parent_span_id: None, span_id: SpanId::new(3495993).unwrap(), + links: vec![], collector: None, }; let mut span = ctx.child("foo"); + span.ctx.links = vec![ + (TraceId::new(12).unwrap(), SpanId::new(123).unwrap()), + (TraceId::new(45).unwrap(), SpanId::new(456).unwrap()), + ]; span.status = SpanStatus::Ok; span.events = vec![SpanEvent { time: Utc.timestamp_nanos(200000), @@ -283,6 +288,14 @@ mod tests { span.ctx.parent_span_id.unwrap().get() as i64 ); + // test links + let b1_s0_refs = b1_s0.references.as_ref().unwrap(); + assert_eq!(b1_s0_refs.len(), 2); + let b1_s0_r0 = &b1_s0_refs[0]; + let b1_s0_r1 = &b1_s0_refs[1]; + assert_eq!(b1_s0_r0.span_id, span.ctx.links[0].1.get() as i64); + assert_eq!(b1_s0_r1.span_id, span.ctx.links[1].1.get() as i64); + // microseconds not nanoseconds assert_eq!(b1_s0.start_time, 100); assert_eq!(b1_s0.duration, 200); diff --git a/trace_exporters/src/jaeger/span.rs b/trace_exporters/src/jaeger/span.rs index e3af3e45d3..73ed699de5 100644 --- a/trace_exporters/src/jaeger/span.rs +++ b/trace_exporters/src/jaeger/span.rs @@ -1,12 +1,21 @@ /// Contains the conversion logic from a `trace::span::Span` to `thrift::jaeger::Span` -use crate::thrift::jaeger; -use trace::span::{MetaValue, Span, SpanEvent, SpanStatus}; +use crate::thrift::jaeger::{self, SpanRef}; +use trace::{ + ctx::TraceId, + span::{MetaValue, Span, SpanEvent, SpanStatus}, +}; + +/// Split [`TraceId`] into high and low part. +fn split_trace_id(trace_id: TraceId) -> (i64, i64) { + let trace_id = trace_id.get(); + let trace_id_high = (trace_id >> 64) as i64; + let trace_id_low = trace_id as i64; + (trace_id_high, trace_id_low) +} impl From<Span> for jaeger::Span { fn from(mut s: Span) -> Self { - let trace_id = s.ctx.trace_id.get(); - let trace_id_high = (trace_id >> 64) as i64; - let trace_id_low = trace_id as i64; + let (trace_id_high, trace_id_low) = split_trace_id(s.ctx.trace_id); // A parent span id of 0 indicates no parent span ID (span IDs are non-zero) let parent_span_id = s.ctx.parent_span_id.map(|id| id.get()).unwrap_or_default() as i64; @@ -51,13 +60,34 @@ impl From<Span> for jaeger::Span { false => Some(s.events.into_iter().map(Into::into).collect()), }; + let references = if s.ctx.links.is_empty() { + None + } else { + Some( + s.ctx + .links + .into_iter() + .map(|(trace_id, span_id)| { + // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk_exporters/jaeger.md#links + let (trace_id_high, trace_id_low) = split_trace_id(trace_id); + SpanRef { + ref_type: jaeger::SpanRefType::FollowsFrom, + trace_id_high, + trace_id_low, + span_id: span_id.get() as i64, + } + }) + .collect(), + ) + }; + Self { trace_id_low, trace_id_high, span_id: s.ctx.span_id.get() as i64, parent_span_id, operation_name: s.name.to_string(), - references: None, + references, flags: 0, start_time, duration, diff --git a/trace_http/src/ctx.rs b/trace_http/src/ctx.rs index 9946786287..67c27d1bec 100644 --- a/trace_http/src/ctx.rs +++ b/trace_http/src/ctx.rs @@ -154,10 +154,14 @@ fn decode_b3( return Ok(None); } + // Links cannot be specified via the HTTP header + let links = Vec::with_capacity(0); + Ok(Some(SpanContext { trace_id: required_header(headers, B3_TRACE_ID_HEADER, parse_trace)?, parent_span_id: parsed_header(headers, B3_PARENT_SPAN_ID_HEADER, parse_span)?, span_id: required_header(headers, B3_SPAN_ID_HEADER, parse_span)?, + links, collector: Some(Arc::clone(collector)), })) } @@ -211,10 +215,14 @@ fn decode_jaeger( return Ok(None); } + // Links cannot be specified via the HTTP header + let links = Vec::with_capacity(0); + Ok(Some(SpanContext { trace_id: decoded.trace_id, parent_span_id: decoded.parent_span_id, span_id: decoded.span_id, + links, collector: Some(Arc::clone(collector)), })) } From f62d2d2277ff441fd03262166c0819ea3ed50b38 Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Tue, 12 Oct 2021 16:36:06 +0200 Subject: [PATCH 13/17] refactor: `Vec::with_capacity(0)` => `vec![]` `vec![]` also results in a capacity of 0, see https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=184113a9afa945cf3cf3b8f854f4c5ce --- trace/src/ctx.rs | 4 ++-- trace_http/src/ctx.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/trace/src/ctx.rs b/trace/src/ctx.rs index e3cb155aac..fad4cc844a 100644 --- a/trace/src/ctx.rs +++ b/trace/src/ctx.rs @@ -72,7 +72,7 @@ impl SpanContext { trace_id: TraceId(NonZeroU128::new(trace_id).unwrap()), parent_span_id: None, span_id: SpanId(NonZeroU64::new(span_id).unwrap()), - links: Vec::with_capacity(0), + links: vec![], collector: Some(collector), } } @@ -85,7 +85,7 @@ impl SpanContext { trace_id: self.trace_id, span_id: SpanId::gen(), collector: self.collector.clone(), - links: Vec::with_capacity(0), + links: vec![], parent_span_id: Some(self.span_id), }, start: None, diff --git a/trace_http/src/ctx.rs b/trace_http/src/ctx.rs index 67c27d1bec..3e609b621b 100644 --- a/trace_http/src/ctx.rs +++ b/trace_http/src/ctx.rs @@ -155,7 +155,7 @@ fn decode_b3( } // Links cannot be specified via the HTTP header - let links = Vec::with_capacity(0); + let links = vec![]; Ok(Some(SpanContext { trace_id: required_header(headers, B3_TRACE_ID_HEADER, parse_trace)?, @@ -216,7 +216,7 @@ fn decode_jaeger( } // Links cannot be specified via the HTTP header - let links = Vec::with_capacity(0); + let links = vec![]; Ok(Some(SpanContext { trace_id: decoded.trace_id, From 8414e6edbbacd140f2f4e8b616c2610a61ca3899 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Tue, 12 Oct 2021 15:43:05 +0100 Subject: [PATCH 14/17] feat: migrate preserved catalog to TimeProvider (#2722) (#2808) * feat: migrate preserved catalog to TimeProvider (#2722) * fix: deterministic catalog prune tests * fix: failing test Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- parquet_file/src/catalog/cleanup.rs | 14 ++-- parquet_file/src/catalog/core.rs | 68 +++++++++---------- parquet_file/src/catalog/dump.rs | 8 ++- .../src/catalog/internals/proto_parse.rs | 10 ++- parquet_file/src/catalog/prune.rs | 47 +++++++++---- parquet_file/src/test_utils.rs | 3 +- server/src/database.rs | 11 +-- server/src/db.rs | 36 +++------- server/src/db/lifecycle/persist.rs | 4 +- server/src/db/load.rs | 32 +++++---- server/src/lib.rs | 22 ++++-- server/src/utils.rs | 4 +- 12 files changed, 133 insertions(+), 126 deletions(-) diff --git a/parquet_file/src/catalog/cleanup.rs b/parquet_file/src/catalog/cleanup.rs index 35f95857d6..a43eb00513 100644 --- a/parquet_file/src/catalog/cleanup.rs +++ b/parquet_file/src/catalog/cleanup.rs @@ -9,7 +9,6 @@ use parking_lot::Mutex; use predicate::delete_predicate::DeletePredicate; use snafu::{ResultExt, Snafu}; -use crate::catalog::core::PreservedCatalogConfig; use crate::catalog::{ core::PreservedCatalog, interface::{ @@ -62,14 +61,11 @@ pub async fn get_unreferenced_parquet_files( let iox_object_store = catalog.iox_object_store(); let all_known = { // replay catalog transactions to track ALL (even dropped) files that are referenced - let (_catalog, state) = PreservedCatalog::load::<TracerCatalogState>( - db_name, - PreservedCatalogConfig::new(Arc::clone(&iox_object_store)), - (), - ) - .await - .context(CatalogLoadError)? - .expect("catalog gone while reading it?"); + let (_catalog, state) = + PreservedCatalog::load::<TracerCatalogState>(db_name, catalog.config(), ()) + .await + .context(CatalogLoadError)? + .expect("catalog gone while reading it?"); state.files.into_inner() }; diff --git a/parquet_file/src/catalog/core.rs b/parquet_file/src/catalog/core.rs index b375482bc9..ccb1159488 100644 --- a/parquet_file/src/catalog/core.rs +++ b/parquet_file/src/catalog/core.rs @@ -12,7 +12,6 @@ use crate::{ metadata::IoxParquetMetaData, }; use bytes::Bytes; -use chrono::{DateTime, Utc}; use futures::{StreamExt, TryStreamExt}; use generated_types::influxdata::iox::catalog::v1 as proto; use iox_object_store::{IoxObjectStore, ParquetFilePath, TransactionFilePath}; @@ -30,6 +29,7 @@ use std::{ fmt::Debug, sync::Arc, }; +use time::{Time, TimeProvider}; use tokio::sync::{Semaphore, SemaphorePermit}; use uuid::Uuid; @@ -172,16 +172,19 @@ pub struct PreservedCatalogConfig { /// Fixed UUID for testing pub(crate) fixed_uuid: Option<Uuid>, - /// Fixed timestamp for testing - pub(crate) fixed_timestamp: Option<DateTime<Utc>>, + /// Time provider to use instead of [`time::SystemProvider`] + pub(crate) time_provider: Arc<dyn TimeProvider>, } impl PreservedCatalogConfig { - pub fn new(iox_object_store: Arc<IoxObjectStore>) -> Self { + pub fn new( + iox_object_store: Arc<IoxObjectStore>, + time_provider: Arc<dyn TimeProvider>, + ) -> Self { Self { iox_object_store, - fixed_timestamp: None, fixed_uuid: None, + time_provider, } } @@ -193,12 +196,10 @@ impl PreservedCatalogConfig { } } - /// Fixed timestamp to use for all transactions instead of "now" - /// - /// TODO: Replace with TimeProvider (#2722) - pub fn with_fixed_timestamp(self, timestamp: DateTime<Utc>) -> Self { + /// Override the time provider + pub fn with_time_provider(self, time_provider: Arc<dyn TimeProvider>) -> Self { Self { - fixed_timestamp: Some(timestamp), + time_provider, ..self } } @@ -235,10 +236,8 @@ pub struct PreservedCatalog { /// This can be useful for testing to achieve deterministic outputs. fixed_uuid: Option<Uuid>, - /// If set, this start time will be used for all transaction instead of "now". - /// - /// This can be useful for testing to achieve deterministic outputs. - fixed_timestamp: Option<DateTime<Utc>>, + /// Time provider + time_provider: Arc<dyn TimeProvider>, } impl PreservedCatalog { @@ -262,7 +261,7 @@ impl PreservedCatalog { /// most broken catalogs. pub async fn find_last_transaction_timestamp( iox_object_store: &IoxObjectStore, - ) -> Result<Option<DateTime<Utc>>> { + ) -> Result<Option<Time>> { let mut res = None; let mut stream = iox_object_store @@ -275,7 +274,7 @@ impl PreservedCatalog { match load_transaction_proto(iox_object_store, transaction_file_path).await { Ok(proto) => match proto_parse::parse_timestamp(&proto.start_timestamp) { Ok(ts) => { - res = Some(res.map_or(ts, |res: DateTime<Utc>| res.max(ts))); + res = Some(res.map_or(ts, |res: Time| res.max(ts))); } Err(e) => warn!(%e, ?transaction_file_path, "Cannot parse timestamp"), }, @@ -301,11 +300,6 @@ impl PreservedCatalog { Ok(iox_object_store.wipe_catalog().await.context(Write)?) } - /// Deletes the catalog described by the provided config - pub async fn wipe_with_config(config: &PreservedCatalogConfig) -> Result<()> { - Self::wipe(&config.iox_object_store).await - } - /// Create new catalog w/o any data. /// /// An empty transaction will be used to mark the catalog start so that concurrent open but @@ -328,7 +322,7 @@ impl PreservedCatalog { transaction_semaphore: Semaphore::new(1), iox_object_store: config.iox_object_store, fixed_uuid: config.fixed_uuid, - fixed_timestamp: config.fixed_timestamp, + time_provider: config.time_provider, }; // add empty transaction @@ -455,7 +449,7 @@ impl PreservedCatalog { transaction_semaphore: Semaphore::new(1), iox_object_store: config.iox_object_store, fixed_uuid: config.fixed_uuid, - fixed_timestamp: config.fixed_timestamp, + time_provider: config.time_provider, }, state, ))) @@ -469,8 +463,7 @@ impl PreservedCatalog { /// transactions are given out in the order they were requested. pub async fn open_transaction(&self) -> TransactionHandle<'_> { let uuid = self.fixed_uuid.unwrap_or_else(Uuid::new_v4); - let start_timestamp = self.fixed_timestamp.unwrap_or_else(Utc::now); - TransactionHandle::new(self, uuid, start_timestamp).await + TransactionHandle::new(self, uuid, self.time_provider.now()).await } /// Get latest revision counter. @@ -489,6 +482,15 @@ impl PreservedCatalog { .expect("catalog should have at least an empty transaction") } + /// Return the config for this `PreservedCatalog` + pub fn config(&self) -> PreservedCatalogConfig { + PreservedCatalogConfig { + iox_object_store: Arc::clone(&self.iox_object_store), + fixed_uuid: self.fixed_uuid, + time_provider: Arc::clone(&self.time_provider), + } + } + /// Object store used by this catalog. pub fn iox_object_store(&self) -> Arc<IoxObjectStore> { Arc::clone(&self.iox_object_store) @@ -509,11 +511,7 @@ struct OpenTransaction { impl OpenTransaction { /// Private API to create new transaction, users should always use /// [`PreservedCatalog::open_transaction`]. - fn new( - previous_tkey: &Option<TransactionKey>, - uuid: Uuid, - start_timestamp: DateTime<Utc>, - ) -> Self { + fn new(previous_tkey: &Option<TransactionKey>, uuid: Uuid, start_timestamp: Time) -> Self { let (revision_counter, previous_uuid) = match previous_tkey { Some(tkey) => ( tkey.revision_counter + 1, @@ -529,7 +527,7 @@ impl OpenTransaction { uuid: uuid.as_bytes().to_vec().into(), revision_counter, previous_uuid, - start_timestamp: Some(start_timestamp.into()), + start_timestamp: Some(start_timestamp.date_time().into()), encoding: proto::transaction::Encoding::Delta.into(), }, } @@ -744,7 +742,7 @@ impl<'c> TransactionHandle<'c> { async fn new( catalog: &'c PreservedCatalog, uuid: Uuid, - start_timestamp: DateTime<Utc>, + start_timestamp: Time, ) -> TransactionHandle<'c> { // first acquire semaphore (which is only being used for transactions), then get state lock let permit = catalog @@ -967,7 +965,7 @@ impl<'c> CheckpointHandle<'c> { previous_uuid: self .previous_tkey .map_or_else(Bytes::new, |tkey| tkey.uuid.as_bytes().to_vec().into()), - start_timestamp: Some(Utc::now().into()), + start_timestamp: Some(self.catalog.time_provider.now().date_time().into()), encoding: proto::transaction::Encoding::Full.into(), }; let path = TransactionFilePath::new_checkpoint(self.tkey.revision_counter, self.tkey.uuid); @@ -1855,7 +1853,7 @@ mod tests { states: Vec<TestCatalogState>, /// Traces timestamp after every (committed and aborted) transaction. - post_timestamps: Vec<DateTime<Utc>>, + post_timestamps: Vec<Time>, /// Traces if an transaction was aborted. aborted: Vec<bool>, @@ -1874,7 +1872,7 @@ mod tests { fn record(&mut self, catalog: &PreservedCatalog, state: &TestCatalogState, aborted: bool) { self.tkeys.push(catalog.previous_tkey.read().unwrap()); self.states.push(state.clone()); - self.post_timestamps.push(Utc::now()); + self.post_timestamps.push(catalog.time_provider.now()); self.aborted.push(aborted); } } diff --git a/parquet_file/src/catalog/dump.rs b/parquet_file/src/catalog/dump.rs index 9211ddcfb7..cbdab846a5 100644 --- a/parquet_file/src/catalog/dump.rs +++ b/parquet_file/src/catalog/dump.rs @@ -227,15 +227,16 @@ mod tests { }, test_utils::{chunk_addr, make_config, make_metadata, TestSize}, }; - use chrono::{TimeZone, Utc}; + use time::Time; use uuid::Uuid; #[tokio::test] async fn test_dump_default_options() { + let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp(10, 20))); let config = make_config() .await .with_fixed_uuid(Uuid::nil()) - .with_fixed_timestamp(Utc.timestamp(10, 20)); + .with_time_provider(time_provider); let iox_object_store = &config.iox_object_store; @@ -352,10 +353,11 @@ File { #[tokio::test] async fn test_dump_show_parsed_data() { + let time_provider = Arc::new(time::MockProvider::new(Time::from_timestamp(10, 20))); let config = make_config() .await .with_fixed_uuid(Uuid::nil()) - .with_fixed_timestamp(Utc.timestamp(10, 20)); + .with_time_provider(time_provider); let iox_object_store = &config.iox_object_store; // build catalog with some data diff --git a/parquet_file/src/catalog/internals/proto_parse.rs b/parquet_file/src/catalog/internals/proto_parse.rs index 5a7cfa38bb..86994b99cd 100644 --- a/parquet_file/src/catalog/internals/proto_parse.rs +++ b/parquet_file/src/catalog/internals/proto_parse.rs @@ -1,10 +1,10 @@ use std::{convert::TryInto, num::TryFromIntError}; -use chrono::{DateTime, Utc}; use generated_types::influxdata::iox::catalog::v1 as proto; use iox_object_store::{ParquetFilePath, ParquetFilePathParseError}; use object_store::path::{parsed::DirsAndFileName, parts::PathPart}; use snafu::{OptionExt, ResultExt, Snafu}; +use time::Time; use uuid::Uuid; #[derive(Debug, Snafu)] @@ -81,13 +81,11 @@ pub fn unparse_dirs_and_filename(path: &ParquetFilePath) -> proto::Path { } /// Parse timestamp from protobuf. -pub fn parse_timestamp( - ts: &Option<generated_types::google::protobuf::Timestamp>, -) -> Result<DateTime<Utc>> { +pub fn parse_timestamp(ts: &Option<generated_types::google::protobuf::Timestamp>) -> Result<Time> { let ts: generated_types::google::protobuf::Timestamp = ts.as_ref().context(DateTimeRequired)?.clone(); - let ts: DateTime<Utc> = ts.try_into().context(DateTimeParseError)?; - Ok(ts) + let ts = ts.try_into().context(DateTimeParseError)?; + Ok(Time::from_date_time(ts)) } /// Parse encoding from protobuf. diff --git a/parquet_file/src/catalog/prune.rs b/parquet_file/src/catalog/prune.rs index 666850e317..c9c002f28f 100644 --- a/parquet_file/src/catalog/prune.rs +++ b/parquet_file/src/catalog/prune.rs @@ -1,11 +1,11 @@ //! Tooling to remove parts of the preserved catalog that are no longer needed. use std::{collections::BTreeMap, sync::Arc}; -use chrono::{DateTime, Utc}; use futures::TryStreamExt; use iox_object_store::{IoxObjectStore, TransactionFilePath}; use object_store::{ObjectStore, ObjectStoreApi}; use snafu::{ResultExt, Snafu}; +use time::Time; use crate::catalog::{ core::{ProtoIOError, ProtoParseError}, @@ -52,10 +52,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>; /// /// This will delete the following content: C1, T2, and T3. C3 and T4 cannot be deleted because it is required to /// recover T5 which is AFTER `before`. -pub async fn prune_history( - iox_object_store: Arc<IoxObjectStore>, - before: DateTime<Utc>, -) -> Result<()> { +pub async fn prune_history(iox_object_store: Arc<IoxObjectStore>, before: Time) -> Result<()> { // collect all files so we can quickly filter them later for deletion // Use a btree-map so we can iterate from oldest to newest revision. let mut files: BTreeMap<u64, Vec<TransactionFilePath>> = Default::default(); @@ -122,7 +119,7 @@ fn is_checkpoint_or_zero(path: &TransactionFilePath) -> bool { #[cfg(test)] mod tests { - use chrono::Duration; + use std::time::Duration; use crate::{ catalog::{ @@ -139,7 +136,9 @@ mod tests { async fn test_empty_store() { let iox_object_store = make_iox_object_store().await; - prune_history(iox_object_store, Utc::now()).await.unwrap(); + prune_history(iox_object_store, Time::from_timestamp_nanos(0)) + .await + .unwrap(); } #[tokio::test] @@ -148,22 +147,32 @@ mod tests { new_empty(config.clone()).await; - prune_history(Arc::clone(&config.iox_object_store), Utc::now()) - .await - .unwrap(); + prune_history( + Arc::clone(&config.iox_object_store), + Time::from_timestamp_nanos(0), + ) + .await + .unwrap(); load_ok(config).await.unwrap(); } #[tokio::test] async fn test_complex_1() { - let config = make_config().await; + let time = Arc::new(time::MockProvider::new(Time::from_timestamp(0, 32))); + let config = make_config() + .await + .with_time_provider(Arc::<time::MockProvider>::clone(&time)); + let iox_object_store = &config.iox_object_store; let (catalog, _state) = new_empty(config.clone()).await; create_transaction(&catalog).await; create_transaction_and_checkpoint(&catalog).await; - let before = Utc::now(); + + let before = time.inc(Duration::from_secs(21)); + time.inc(Duration::from_secs(1)); + create_transaction(&catalog).await; prune_history(Arc::clone(iox_object_store), before) @@ -178,14 +187,22 @@ mod tests { #[tokio::test] async fn test_complex_2() { - let config = make_config().await; + let time = Arc::new(time::MockProvider::new(Time::from_timestamp(0, 32))); + let config = make_config() + .await + .with_time_provider(Arc::<time::MockProvider>::clone(&time)); + let iox_object_store = &config.iox_object_store; let (catalog, _state) = new_empty(config.clone()).await; + create_transaction(&catalog).await; create_transaction_and_checkpoint(&catalog).await; create_transaction(&catalog).await; - let before = Utc::now(); + + let before = time.inc(Duration::from_secs(25)); + time.inc(Duration::from_secs(1)); + create_transaction(&catalog).await; create_transaction_and_checkpoint(&catalog).await; create_transaction(&catalog).await; @@ -217,7 +234,7 @@ mod tests { create_transaction_and_checkpoint(&catalog).await; create_transaction(&catalog).await; - let before = Utc::now() - Duration::seconds(1_000); + let before = config.time_provider.now() - Duration::from_secs(1_000); prune_history(Arc::clone(iox_object_store), before) .await .unwrap(); diff --git a/parquet_file/src/test_utils.rs b/parquet_file/src/test_utils.rs index bcbfeec222..74596aa6d2 100644 --- a/parquet_file/src/test_utils.rs +++ b/parquet_file/src/test_utils.rs @@ -871,7 +871,8 @@ pub async fn make_iox_object_store() -> Arc<IoxObjectStore> { /// Creates a new [`PreservedCatalogConfig`] with an in-memory object store pub async fn make_config() -> PreservedCatalogConfig { let iox_object_store = make_iox_object_store().await; - PreservedCatalogConfig::new(iox_object_store) + let time_provider = Arc::new(time::SystemProvider::new()); + PreservedCatalogConfig::new(iox_object_store, time_provider) } pub fn read_data_from_parquet_data(schema: SchemaRef, parquet_data: Vec<u8>) -> Vec<RecordBatch> { diff --git a/server/src/database.rs b/server/src/database.rs index d506ec249a..2df42205cb 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -20,7 +20,7 @@ use internal_types::freezable::Freezable; use iox_object_store::IoxObjectStore; use observability_deps::tracing::{error, info, warn}; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; -use parquet_file::catalog::core::{PreservedCatalog, PreservedCatalogConfig}; +use parquet_file::catalog::core::PreservedCatalog; use persistence_windows::checkpoint::ReplayPlan; use snafu::{ensure, OptionExt, ResultExt, Snafu}; use std::{future::Future, sync::Arc, time::Duration}; @@ -210,10 +210,9 @@ impl Database { .await .context(SavingRules)?; - let config = PreservedCatalogConfig::new(iox_object_store); create_preserved_catalog( db_name, - config, + iox_object_store, Arc::clone(application.metric_registry()), Arc::clone(application.time_provider()), true, @@ -1050,12 +1049,9 @@ impl DatabaseStateDatabaseObjectStoreFound { .fail(); } - let catalog_config = PreservedCatalogConfig::new(Arc::clone(&self.iox_object_store)); - Ok(DatabaseStateRulesLoaded { provided_rules: Arc::new(rules), iox_object_store: Arc::clone(&self.iox_object_store), - catalog_config, }) } } @@ -1064,7 +1060,6 @@ impl DatabaseStateDatabaseObjectStoreFound { struct DatabaseStateRulesLoaded { provided_rules: Arc<ProvidedDatabaseRules>, iox_object_store: Arc<IoxObjectStore>, - catalog_config: PreservedCatalogConfig, } impl DatabaseStateRulesLoaded { @@ -1075,7 +1070,7 @@ impl DatabaseStateRulesLoaded { ) -> Result<DatabaseStateCatalogLoaded, InitError> { let (preserved_catalog, catalog, replay_plan) = load_or_create_preserved_catalog( shared.config.name.as_str(), - self.catalog_config.clone(), + Arc::clone(&self.iox_object_store), Arc::clone(shared.application.metric_registry()), Arc::clone(shared.application.time_provider()), shared.config.wipe_catalog_on_error, diff --git a/server/src/db.rs b/server/src/db.rs index 806d980303..92c2f7922d 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -14,7 +14,6 @@ use std::{ use ::lifecycle::select_persistable_chunks; use async_trait::async_trait; -use chrono::{DateTime, Utc}; use parking_lot::{Mutex, RwLock}; use rand_distr::{Distribution, Poisson}; use snafu::{ensure, OptionExt, ResultExt, Snafu}; @@ -826,7 +825,9 @@ impl Db { .as_mut() .expect("lifecycle policy should be initialized"); - policy.check_for_work(self.utc_now()).await + policy + .check_for_work(self.time_provider.now().date_time()) + .await } }; @@ -854,20 +855,13 @@ impl Db { debug!(?duration, "cleanup worker sleeps"); tokio::time::sleep(duration).await; - match chrono::Duration::from_std(catalog_transaction_prune_age) { - Ok(catalog_transaction_prune_age) => { - if let Err(e) = prune_catalog_transaction_history( - self.iox_object_store(), - Utc::now() - catalog_transaction_prune_age, - ) - .await - { - error!(%e, "error while pruning catalog transactions"); - } - } - Err(e) => { - error!(%e, "cannot convert `catalog_transaction_prune_age`, skipping transaction pruning"); - } + if let Err(e) = prune_catalog_transaction_history( + self.iox_object_store(), + self.time_provider.now() - catalog_transaction_prune_age, + ) + .await + { + error!(%e, "error while pruning catalog transactions"); } if let Err(e) = self.cleanup_unreferenced_parquet_files().await { @@ -916,13 +910,6 @@ impl Db { info!("finished db background worker"); } - /// `Utc::now()` that is used by `Db`. Can be mocked for testing. - /// - /// TODO: Remove (#2722) - fn utc_now(&self) -> DateTime<Utc> { - self.time_provider.now().date_time() - } - async fn cleanup_unreferenced_parquet_files( self: &Arc<Self>, ) -> std::result::Result<(), parquet_file::catalog::cleanup::Error> { @@ -1422,7 +1409,6 @@ mod tests { use iox_object_store::ParquetFilePath; use metric::{Attributes, CumulativeGauge, Metric, Observation}; use object_store::ObjectStore; - use parquet_file::catalog::core::PreservedCatalogConfig; use parquet_file::{ catalog::test_helpers::load_ok, metadata::IoxParquetMetaData, @@ -3252,7 +3238,7 @@ mod tests { // ==================== check: empty catalog created ==================== // at this point, an empty preserved catalog exists - let config = PreservedCatalogConfig::new(Arc::clone(&db.iox_object_store)); + let config = db.preserved_catalog.config(); let maybe_preserved_catalog = load_ok(config.clone()).await; assert!(maybe_preserved_catalog.is_some()); diff --git a/server/src/db/lifecycle/persist.rs b/server/src/db/lifecycle/persist.rs index 0358e4cbfb..663e1d0a66 100644 --- a/server/src/db/lifecycle/persist.rs +++ b/server/src/db/lifecycle/persist.rs @@ -238,7 +238,6 @@ mod tests { }; use lifecycle::{LockableChunk, LockablePartition}; use object_store::ObjectStore; - use parquet_file::catalog::core::PreservedCatalogConfig; use predicate::delete_expr::{DeleteExpr, Op, Scalar}; use query::QueryDatabase; use std::{ @@ -560,10 +559,9 @@ mod tests { // check object store delete predicates let metric_registry = Arc::new(metric::Registry::new()); - let config = PreservedCatalogConfig::new(Arc::clone(&db.iox_object_store)); let (_preserved_catalog, catalog, _replay_plan) = load_or_create_preserved_catalog( db_name, - config, + Arc::clone(&db.iox_object_store), metric_registry, Arc::clone(&db.time_provider), false, diff --git a/server/src/db/load.rs b/server/src/db/load.rs index 923b6c7e5d..34c8984d0d 100644 --- a/server/src/db/load.rs +++ b/server/src/db/load.rs @@ -4,10 +4,9 @@ use super::catalog::{chunk::ChunkStage, table::TableSchemaUpsertHandle, Catalog}; use iox_object_store::{IoxObjectStore, ParquetFilePath}; use observability_deps::tracing::{error, info}; -use parquet_file::catalog::core::PreservedCatalogConfig; use parquet_file::{ catalog::{ - core::PreservedCatalog, + core::{PreservedCatalog, PreservedCatalogConfig}, interface::{ CatalogParquetInfo, CatalogState, CatalogStateAddError, CatalogStateRemoveError, ChunkAddrWithoutDatabase, ChunkCreationFailed, @@ -53,7 +52,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>; /// <https://github.com/influxdata/influxdb_iox/issues/1522> pub async fn load_or_create_preserved_catalog( db_name: &str, - config: PreservedCatalogConfig, + iox_object_store: Arc<IoxObjectStore>, metric_registry: Arc<::metric::Registry>, time_provider: Arc<dyn TimeProvider>, wipe_on_error: bool, @@ -62,7 +61,7 @@ pub async fn load_or_create_preserved_catalog( // first try to load existing catalogs match PreservedCatalog::load( db_name, - config.clone(), + PreservedCatalogConfig::new(Arc::clone(&iox_object_store), Arc::clone(&time_provider)), LoaderEmptyInput::new( Arc::clone(&metric_registry), Arc::clone(&time_provider), @@ -90,8 +89,14 @@ pub async fn load_or_create_preserved_catalog( db_name ); - create_preserved_catalog(db_name, config, metric_registry, time_provider, skip_replay) - .await + create_preserved_catalog( + db_name, + iox_object_store, + metric_registry, + time_provider, + skip_replay, + ) + .await } Err(e) => { if wipe_on_error { @@ -99,13 +104,13 @@ pub async fn load_or_create_preserved_catalog( // broken => wipe for now (at least during early iterations) error!("cannot load catalog, so wipe it: {}", e); - PreservedCatalog::wipe_with_config(&config) + PreservedCatalog::wipe(&iox_object_store) .await .context(CannotWipeCatalog)?; create_preserved_catalog( db_name, - config, + iox_object_store, metric_registry, time_provider, skip_replay, @@ -123,11 +128,13 @@ pub async fn load_or_create_preserved_catalog( /// This will fail if a preserved catalog already exists. pub async fn create_preserved_catalog( db_name: &str, - config: PreservedCatalogConfig, + iox_object_store: Arc<IoxObjectStore>, metric_registry: Arc<metric::Registry>, time_provider: Arc<dyn TimeProvider>, skip_replay: bool, ) -> Result<(PreservedCatalog, Catalog, Option<ReplayPlan>)> { + let config = PreservedCatalogConfig::new(iox_object_store, Arc::clone(&time_provider)); + let (preserved_catalog, loader) = PreservedCatalog::new_empty( db_name, config, @@ -341,15 +348,16 @@ mod tests { .await .unwrap(), ); - let config = PreservedCatalogConfig::new(iox_object_store); + let config = + PreservedCatalogConfig::new(Arc::clone(&iox_object_store), Arc::clone(&time_provider)); - let (preserved_catalog, _catalog) = new_empty(config.clone()).await; + let (preserved_catalog, _catalog) = new_empty(config).await; parquet_file::catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog) .await; load_or_create_preserved_catalog( &db_name, - config, + iox_object_store, Default::default(), time_provider, true, diff --git a/server/src/lib.rs b/server/src/lib.rs index 53c41031ed..202d6217a0 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -2206,11 +2206,12 @@ mod tests { .await .unwrap(); - let (preserved_catalog, _catalog) = load_ok(PreservedCatalogConfig::new( + let config = PreservedCatalogConfig::new( catalog_broken.iox_object_store().unwrap(), - )) - .await - .unwrap(); + Arc::clone(application.time_provider()), + ); + + let (preserved_catalog, _catalog) = load_ok(config).await.unwrap(); parquet_file::catalog::test_helpers::break_catalog_with_weird_version(&preserved_catalog) .await; @@ -2289,7 +2290,13 @@ mod tests { .await .unwrap(), ); - new_empty(PreservedCatalogConfig::new(non_existing_iox_object_store)).await; + + let config = PreservedCatalogConfig::new( + non_existing_iox_object_store, + Arc::clone(application.time_provider()), + ); + new_empty(config).await; + assert_eq!( server .wipe_preserved_catalog(&db_name_non_existing) @@ -2384,8 +2391,11 @@ mod tests { .unwrap(), ); + let config = + PreservedCatalogConfig::new(iox_object_store, Arc::clone(application.time_provider())); + // create catalog - new_empty(PreservedCatalogConfig::new(iox_object_store)).await; + new_empty(config).await; // creating database will now result in an error let err = create_simple_database(&server, db_name).await.unwrap_err(); diff --git a/server/src/utils.rs b/server/src/utils.rs index 664e18c67c..99dfdffeed 100644 --- a/server/src/utils.rs +++ b/server/src/utils.rs @@ -10,7 +10,6 @@ use data_types::{ }; use iox_object_store::IoxObjectStore; use object_store::ObjectStore; -use parquet_file::catalog::core::PreservedCatalogConfig; use persistence_windows::checkpoint::ReplayPlan; use query::exec::ExecutorConfig; use query::{exec::Executor, QueryDatabase}; @@ -82,7 +81,6 @@ impl TestDbBuilder { }; let iox_object_store = Arc::new(iox_object_store); - let config = PreservedCatalogConfig::new(Arc::clone(&iox_object_store)); // deterministic thread and concurrency count let exec = Arc::new(Executor::new_with_config(ExecutorConfig { @@ -94,7 +92,7 @@ impl TestDbBuilder { let (preserved_catalog, catalog, replay_plan) = load_or_create_preserved_catalog( db_name.as_str(), - config, + Arc::clone(&iox_object_store), Arc::clone(&metric_registry), Arc::clone(&time_provider), false, From 293620395e111eaba5fe3fa3f4c57e214faefc27 Mon Sep 17 00:00:00 2001 From: Marco Neumann <marco@crepererum.net> Date: Tue, 12 Oct 2021 16:44:25 +0200 Subject: [PATCH 15/17] test: ensure jeager trace IDs integer conversion works --- trace_exporters/src/jaeger/span.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/trace_exporters/src/jaeger/span.rs b/trace_exporters/src/jaeger/span.rs index 73ed699de5..83addb6d97 100644 --- a/trace_exporters/src/jaeger/span.rs +++ b/trace_exporters/src/jaeger/span.rs @@ -145,3 +145,18 @@ fn tag_from_meta(key: String, value: MetaValue) -> jaeger::Tag { }; tag } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_trace_id_integer_conversion() { + // test case from + // https://github.com/open-telemetry/opentelemetry-specification/blob/639c7443e78800b085d2c9826d1b300f5e81fded/specification/trace/sdk_exporters/jaeger.md#ids + let trace_id = TraceId::new(0xFF00000000000000).unwrap(); + let (high, low) = split_trace_id(trace_id); + assert_eq!(high, 0); + assert_eq!(low, -72057594037927936); + } +} From 5b69bb0d72fea17ad10231e8cd8865b8a1e7e16a Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Tue, 12 Oct 2021 18:34:16 +0100 Subject: [PATCH 16/17] feat: reduce lifecycle lock scope (#2242) (#2810) Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- lifecycle/src/policy.rs | 2 +- server/src/database.rs | 2 +- server/src/db.rs | 23 ++++---- server/src/db/replay.rs | 2 +- tracker/src/task.rs | 127 ++++++++++++++++++++++------------------ 5 files changed, 85 insertions(+), 71 deletions(-) diff --git a/lifecycle/src/policy.rs b/lifecycle/src/policy.rs index 993e568f7a..85817ff145 100644 --- a/lifecycle/src/policy.rs +++ b/lifecycle/src/policy.rs @@ -439,7 +439,7 @@ where /// The core policy logic /// /// Returns a future that resolves when this method should be called next - pub fn check_for_work(&mut self, now: DateTime<Utc>) -> BoxFuture<'_, ()> { + pub fn check_for_work(&mut self, now: DateTime<Utc>) -> BoxFuture<'static, ()> { // Any time-consuming work should be spawned as tokio tasks and not // run directly within this loop diff --git a/server/src/database.rs b/server/src/database.rs index 2df42205cb..820c083d82 100644 --- a/server/src/database.rs +++ b/server/src/database.rs @@ -1133,7 +1133,7 @@ impl DatabaseStateCatalogLoaded { let db = Arc::clone(&self.db); // TODO: Pull write buffer and lifecycle out of Db - db.unsuppress_persistence().await; + db.unsuppress_persistence(); let rules = self.provided_rules.rules(); let write_buffer_factory = shared.application.write_buffer_factory(); diff --git a/server/src/db.rs b/server/src/db.rs index 92c2f7922d..be847d80ea 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -298,7 +298,7 @@ pub struct Db { /// This is stored here for the following reasons: /// - to control the persistence suppression via a [`Db::unsuppress_persistence`] /// - to keep the lifecycle state (e.g. the number of running compactions) around - lifecycle_policy: tokio::sync::Mutex<Option<::lifecycle::LifecyclePolicy<WeakDb>>>, + lifecycle_policy: Mutex<Option<::lifecycle::LifecyclePolicy<WeakDb>>>, time_provider: Arc<dyn TimeProvider>, @@ -360,7 +360,7 @@ impl Db { worker_iterations_delete_predicate_preservation: AtomicUsize::new(0), write_buffer_producer: database_to_commit.write_buffer_producer, cleanup_lock: Default::default(), - lifecycle_policy: tokio::sync::Mutex::new(None), + lifecycle_policy: Mutex::new(None), time_provider: database_to_commit.time_provider, delete_predicates_mailbox: Default::default(), persisted_chunk_id_override: Default::default(), @@ -378,8 +378,8 @@ impl Db { } /// Allow persistence if database rules all it. - pub async fn unsuppress_persistence(&self) { - let mut guard = self.lifecycle_policy.lock().await; + pub fn unsuppress_persistence(&self) { + let mut guard = self.lifecycle_policy.lock(); let policy = guard .as_mut() .expect("lifecycle policy should be initialized"); @@ -819,15 +819,16 @@ impl Db { loop { self.worker_iterations_lifecycle .fetch_add(1, Ordering::Relaxed); - let mut guard = self.lifecycle_policy.lock().await; - let policy = guard - .as_mut() - .expect("lifecycle policy should be initialized"); + let fut = { + let mut guard = self.lifecycle_policy.lock(); + let policy = guard + .as_mut() + .expect("lifecycle policy should be initialized"); - policy - .check_for_work(self.time_provider.now().date_time()) - .await + policy.check_for_work(self.time_provider.now().date_time()) + }; + fut.await } }; diff --git a/server/src/db/replay.rs b/server/src/db/replay.rs index d77b29277d..b6f13762ed 100644 --- a/server/src/db/replay.rs +++ b/server/src/db/replay.rs @@ -711,7 +711,7 @@ mod tests { } let db = &test_db.db; - db.unsuppress_persistence().await; + db.unsuppress_persistence(); // wait until checks pass let t_0 = Instant::now(); diff --git a/tracker/src/task.rs b/tracker/src/task.rs index 805ac59c2d..5451dbb075 100644 --- a/tracker/src/task.rs +++ b/tracker/src/task.rs @@ -116,6 +116,58 @@ struct TrackerState { notify: Notify, } +impl TrackerState { + fn get_status(&self) -> TaskStatus { + // The atomic decrement in TrackerRegistration::drop has release semantics + // acquire here ensures that if a thread observes the tracker to have + // no pending_registrations it cannot subsequently observe pending_futures + // to increase. If it could, observing pending_futures==0 would be insufficient + // to conclude there are no outstanding futures + let pending_registrations = self.pending_registrations.load(Ordering::Acquire); + + // The atomic decrement in TrackedFuture::drop has release semantics + // acquire therefore ensures that if a thread observes the completion of + // a TrackedFuture, it is guaranteed to see its updates (e.g. wall_nanos) + let pending_futures = self.pending_futures.load(Ordering::Acquire); + + match (pending_registrations == 0, pending_futures == 0) { + (false, _) => TaskStatus::Creating, + (true, false) => TaskStatus::Running { + total_count: self.created_futures.load(Ordering::Relaxed), + pending_count: self.pending_futures.load(Ordering::Relaxed), + cpu_nanos: self.cpu_nanos.load(Ordering::Relaxed), + }, + (true, true) => { + let total_count = self.created_futures.load(Ordering::Relaxed); + let success_count = self.ok_futures.load(Ordering::Relaxed); + let error_count = self.err_futures.load(Ordering::Relaxed); + let cancelled_count = self.cancelled_futures.load(Ordering::Relaxed); + + // Failure of this would imply a future reported its completion status multiple + // times or a future was created without incrementing created_futures. + // Both of these should be impossible + let dropped_count = total_count + .checked_sub(success_count + error_count + cancelled_count) + .expect("invalid tracker state"); + + TaskStatus::Complete { + total_count, + success_count, + error_count, + cancelled_count, + dropped_count, + cpu_nanos: self.cpu_nanos.load(Ordering::Relaxed), + wall_nanos: self.wall_nanos.load(Ordering::Relaxed), + } + } + } + } + + fn is_complete(&self) -> bool { + matches!(self.get_status(), TaskStatus::Complete { .. }) + } +} + /// Returns a summary of the task execution #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum TaskResult { @@ -323,54 +375,12 @@ where /// Returns true if all futures associated with this tracker have /// been dropped and no more can be created pub fn is_complete(&self) -> bool { - matches!(self.get_status(), TaskStatus::Complete { .. }) + self.state.is_complete() } /// Gets the status of the tracker pub fn get_status(&self) -> TaskStatus { - // The atomic decrement in TrackerRegistration::drop has release semantics - // acquire here ensures that if a thread observes the tracker to have - // no pending_registrations it cannot subsequently observe pending_futures - // to increase. If it could, observing pending_futures==0 would be insufficient - // to conclude there are no outstanding futures - let pending_registrations = self.state.pending_registrations.load(Ordering::Acquire); - - // The atomic decrement in TrackedFuture::drop has release semantics - // acquire therefore ensures that if a thread observes the completion of - // a TrackedFuture, it is guaranteed to see its updates (e.g. wall_nanos) - let pending_futures = self.state.pending_futures.load(Ordering::Acquire); - - match (pending_registrations == 0, pending_futures == 0) { - (false, _) => TaskStatus::Creating, - (true, false) => TaskStatus::Running { - total_count: self.state.created_futures.load(Ordering::Relaxed), - pending_count: self.state.pending_futures.load(Ordering::Relaxed), - cpu_nanos: self.state.cpu_nanos.load(Ordering::Relaxed), - }, - (true, true) => { - let total_count = self.state.created_futures.load(Ordering::Relaxed); - let success_count = self.state.ok_futures.load(Ordering::Relaxed); - let error_count = self.state.err_futures.load(Ordering::Relaxed); - let cancelled_count = self.state.cancelled_futures.load(Ordering::Relaxed); - - // Failure of this would imply a future reported its completion status multiple - // times or a future was created without incrementing created_futures. - // Both of these should be impossible - let dropped_count = total_count - .checked_sub(success_count + error_count + cancelled_count) - .expect("invalid tracker state"); - - TaskStatus::Complete { - total_count, - success_count, - error_count, - cancelled_count, - dropped_count, - cpu_nanos: self.state.cpu_nanos.load(Ordering::Relaxed), - wall_nanos: self.state.wall_nanos.load(Ordering::Relaxed), - } - } - } + self.state.get_status() } /// Returns the instant the tracker was created @@ -385,21 +395,24 @@ where /// Blocks until all futures associated with the tracker have been /// dropped and no more can be created - pub async fn join(&self) { - // Notify is notified when pending_futures hits 0 AND when pending_registrations - // hits 0. In almost all cases join won't be called before pending_registrations - // has already hit 0, but in the extremely rare case this occurs the loop - // handles the spurious wakeup - loop { - // Request notification before checking if complete - // to avoid a race condition - let notify = self.state.notify.notified(); + pub fn join(&self) -> impl std::future::Future<Output = ()> { + let state = Arc::clone(&self.state); + async move { + // Notify is notified when pending_futures hits 0 AND when pending_registrations + // hits 0. In almost all cases join won't be called before pending_registrations + // has already hit 0, but in the extremely rare case this occurs the loop + // handles the spurious wakeup + loop { + // Request notification before checking if complete + // to avoid a race condition + let notify = state.notify.notified(); - if self.is_complete() { - return; + if state.is_complete() { + return; + } + + notify.await } - - notify.await } } } From 035654b4f984a08283da0256f1d98a8a31a1f76c Mon Sep 17 00:00:00 2001 From: Andrew Lamb <alamb@influxdata.com> Date: Tue, 12 Oct 2021 15:34:54 -0400 Subject: [PATCH 17/17] refactor: do not rebuild query_test when .sql or .expected files change (#2816) * feat: Do not rebuild query_tests if .sql or .expected change * feat: Add CI check * refactor: move some sql tests to .sql files * tests: port tests / expected results to data files * fix: restore old name check-flatbuffers --- .circleci/config.yml | 6 +- query_tests/cases/in/basic.expected | 39 +++++ query_tests/cases/in/basic.sql | 32 ++++ query_tests/cases/in/timestamps.expected | 26 +++ query_tests/cases/in/timestamps.sql | 12 ++ query_tests/check-generated.sh | 26 +++ query_tests/generate/Cargo.lock | 7 + query_tests/generate/Cargo.toml | 11 ++ .../{build.rs => generate/src/main.rs} | 33 +++- query_tests/src/cases.rs | 28 +++ query_tests/src/scenarios.rs | 1 + query_tests/src/sql.rs | 162 ------------------ 12 files changed, 212 insertions(+), 171 deletions(-) create mode 100644 query_tests/cases/in/basic.expected create mode 100644 query_tests/cases/in/basic.sql create mode 100644 query_tests/cases/in/timestamps.expected create mode 100644 query_tests/cases/in/timestamps.sql create mode 100755 query_tests/check-generated.sh create mode 100644 query_tests/generate/Cargo.lock create mode 100644 query_tests/generate/Cargo.toml rename query_tests/{build.rs => generate/src/main.rs} (76%) diff --git a/.circleci/config.yml b/.circleci/config.yml index b75dbed4cf..62fc926e59 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -288,7 +288,8 @@ jobs: name: buf lint command: buf lint - # Check that the generated flatbuffers code is up-to-date with the changes in this PR. + # Check that any generated files are is up-to-date with the changes in this PR. + # named "check-flatbuffers" because that name is hardcoded into github checks check-flatbuffers: docker: - image: quay.io/influxdb/rust:ci @@ -296,6 +297,9 @@ jobs: steps: - checkout - rust_components # Regenerating flatbuffers uses rustfmt + - run: + name: Check Query Tests + command: ./query_tests/check-generated.sh - run: name: Check Flatbuffers command: INFLUXDB_IOX_INTEGRATION_LOCAL=1 ./entry/check-flatbuffers.sh diff --git a/query_tests/cases/in/basic.expected b/query_tests/cases/in/basic.expected new file mode 100644 index 0000000000..b85fd6e8fd --- /dev/null +++ b/query_tests/cases/in/basic.expected @@ -0,0 +1,39 @@ +-- Test Setup: TwoMeasurements +-- SQL: SELECT * from cpu; ++--------+--------------------------------+------+ +| region | time | user | ++--------+--------------------------------+------+ +| west | 1970-01-01T00:00:00.000000100Z | 23.2 | +| west | 1970-01-01T00:00:00.000000150Z | 21 | ++--------+--------------------------------+------+ +-- SQL: SELECT user, region from cpu; ++------+--------+ +| user | region | ++------+--------+ +| 23.2 | west | +| 21 | west | ++------+--------+ +-- SQL: SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00'); ++--------+--------------------------------+------+ +| region | time | user | ++--------+--------------------------------+------+ +| west | 1970-01-01T00:00:00.000000150Z | 21 | ++--------+--------------------------------+------+ +-- SQL: SELECT user, region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00'); ++------+--------+ +| user | region | ++------+--------+ +| 21 | west | ++------+--------+ +-- SQL: SELECT count(*) from cpu group by region; ++-----------------+ +| COUNT(UInt8(1)) | ++-----------------+ +| 2 | ++-----------------+ +-- SQL: SELECT * from disk; ++-------+--------+--------------------------------+ +| bytes | region | time | ++-------+--------+--------------------------------+ +| 99 | east | 1970-01-01T00:00:00.000000200Z | ++-------+--------+--------------------------------+ diff --git a/query_tests/cases/in/basic.sql b/query_tests/cases/in/basic.sql new file mode 100644 index 0000000000..ee97e8c320 --- /dev/null +++ b/query_tests/cases/in/basic.sql @@ -0,0 +1,32 @@ +-- Basic query tests +-- IOX_SETUP: TwoMeasurements + +-- query data +SELECT * from cpu; + + +-- BUG: https://github.com/influxdata/influxdb_iox/issues/2776 +-- "+----------------+", +-- "| MIN(cpu.region |", +-- "+----------------+", +-- "| west |", +-- "+----------------+", +--SELECT min(region) from cpu; + +-- projection +-- expect that to get a subset of the columns and in the order specified +SELECT user, region from cpu; + +-- predicate on CPU +SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00'); + +-- projection and predicate +-- expect that to get a subset of the columns and in the order specified +SELECT user, region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00'); + +-- basic grouping +SELECT count(*) from cpu group by region; + + +-- select from a different measurement +SELECT * from disk; diff --git a/query_tests/cases/in/timestamps.expected b/query_tests/cases/in/timestamps.expected new file mode 100644 index 0000000000..9e1519d047 --- /dev/null +++ b/query_tests/cases/in/timestamps.expected @@ -0,0 +1,26 @@ +-- Test Setup: OneMeasurementRealisticTimes +-- SQL: SELECT * from cpu; ++--------+----------------------+------+ +| region | time | user | ++--------+----------------------+------+ +| west | 2021-07-20T19:28:50Z | 23.2 | +| west | 2021-07-20T19:30:30Z | 21 | ++--------+----------------------+------+ +-- SQL: SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20 19:28:50+00:00'); ++--------+----------------------+------+ +| region | time | user | ++--------+----------------------+------+ +| west | 2021-07-20T19:30:30Z | 21 | ++--------+----------------------+------+ +-- SQL: SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20T19:28:50Z'); ++--------+----------------------+------+ +| region | time | user | ++--------+----------------------+------+ +| west | 2021-07-20T19:30:30Z | 21 | ++--------+----------------------+------+ +-- SQL: SELECT * FROM cpu WHERE CAST(time AS BIGINT) > CAST(to_timestamp('2021-07-20T19:28:50Z') AS BIGINT); ++--------+----------------------+------+ +| region | time | user | ++--------+----------------------+------+ +| west | 2021-07-20T19:30:30Z | 21 | ++--------+----------------------+------+ diff --git a/query_tests/cases/in/timestamps.sql b/query_tests/cases/in/timestamps.sql new file mode 100644 index 0000000000..1f5071651a --- /dev/null +++ b/query_tests/cases/in/timestamps.sql @@ -0,0 +1,12 @@ +-- Timestamp printing / output testss +-- IOX_SETUP: OneMeasurementRealisticTimes + +-- Expect the timestamp output to be formatted correctly (with `Z`) +SELECT * from cpu; +-- explicit offset format +SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20 19:28:50+00:00'); +-- Use RCF3339 format +SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20T19:28:50Z'); +--use cast workaround +SELECT * FROM cpu WHERE + CAST(time AS BIGINT) > CAST(to_timestamp('2021-07-20T19:28:50Z') AS BIGINT); diff --git a/query_tests/check-generated.sh b/query_tests/check-generated.sh new file mode 100755 index 0000000000..cfe5075af8 --- /dev/null +++ b/query_tests/check-generated.sh @@ -0,0 +1,26 @@ +#!/bin/bash -eu + +# Change to the query_tests crate directory, where this script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +pushd $DIR + +echo "Regenerating query_tests..." + +(cd generate && cargo run) + +echo "Checking for uncommitted changes..." + +if ! git diff --quiet HEAD --; then + echo "git diff found:" + git diff HEAD + echo "************************************************************" + echo "* Found uncommitted changes to generated flatbuffers code! *" + echo "* Please do:" + echo "* cd query_tests/generate" + echo "* cargo run" + echo "* to regenerate the query_tests code and check it in! *" + echo "************************************************************" + exit 1 +else + echo "No uncommitted changes; everything is awesome." +fi diff --git a/query_tests/generate/Cargo.lock b/query_tests/generate/Cargo.lock new file mode 100644 index 0000000000..41cb2aea18 --- /dev/null +++ b/query_tests/generate/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "generate" +version = "0.1.0" diff --git a/query_tests/generate/Cargo.toml b/query_tests/generate/Cargo.toml new file mode 100644 index 0000000000..bbe46ba52b --- /dev/null +++ b/query_tests/generate/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "generate" +description = "Creates rust #tests for files in .sql" +version = "0.1.0" +authors = ["Andrew Lamb <andrew@nerdnetworks.org>"] +edition = "2018" + +[dependencies] # In alphabetical order + +# Note this is a standalone binary and not part of the overall workspace +[workspace] \ No newline at end of file diff --git a/query_tests/build.rs b/query_tests/generate/src/main.rs similarity index 76% rename from query_tests/build.rs rename to query_tests/generate/src/main.rs index bb97d6f6ba..77cb07af04 100644 --- a/query_tests/build.rs +++ b/query_tests/generate/src/main.rs @@ -1,26 +1,41 @@ -//! Finds all .sql files in `cases/in/` and creates corresponding entries in src/cases.rs -//! native Rust types. +//! Finds all .sql files in `cases/in/` and creates corresponding +//! entries in src/cases.rs as native Rust test runner tests +use std::ffi::OsStr; use std::path::{Path, PathBuf}; type Error = Box<dyn std::error::Error>; type Result<T, E = Error> = std::result::Result<T, E>; fn main() -> Result<()> { + // Ignores all args and finds relative paths based on PWD and the command + + // example: query_tests/generate/target/debug/generate + let current_exe = std::env::current_exe()?; + + // walk up parent tree looking for query_tests + let mut query_tests = current_exe.clone(); + let needle = OsStr::new("query_tests"); + loop { + if query_tests.file_name() == Some(&needle) { + break; + } + if !query_tests.pop() { + panic!("Can not find 'query_tests' in the path: {:?}", current_exe); + } + } + // crate root - let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let cases = root.join("cases").join("in"); + let cases = query_tests.join("cases").join("in"); let sql_files = find_sql_files(&cases); - // Tell cargo to recompile if anything in the cases directory changes - println!("cargo:rerun-if-changed={}", cases.display()); - // Now create the generated sql file let output_content = make_cases_rs(&sql_files).join("\n"); - let output_file = root.join("src").join("cases.rs"); + let output_file = query_tests.join("src").join("cases.rs"); write_if_changed(&output_file, &output_content); + println!("Done"); Ok(()) } @@ -94,6 +109,8 @@ fn write_if_changed(path: &Path, content: &str) { }; if changed { + println!("Writing changes to {}", path.display()); + std::fs::write(path, content) .map_err(|e| format!("Error writing to {:?}: {}", path, e)) .unwrap(); diff --git a/query_tests/src/cases.rs b/query_tests/src/cases.rs index bd409a5236..19a0fd73c4 100644 --- a/query_tests/src/cases.rs +++ b/query_tests/src/cases.rs @@ -18,6 +18,20 @@ async fn test_cases_all_chunks_dropped_sql() { .expect("flush worked"); } +#[tokio::test] +// Tests from "basic.sql", +async fn test_cases_basic_sql() { + let input_path = Path::new("cases").join("in").join("basic.sql"); + let mut runner = Runner::new(); + runner + .run(input_path) + .await + .expect("test failed"); + runner + .flush() + .expect("flush worked"); +} + #[tokio::test] // Tests from "chunk_order.sql", async fn test_cases_chunk_order_sql() { @@ -86,4 +100,18 @@ async fn test_cases_stats_plans_sql() { runner .flush() .expect("flush worked"); +} + +#[tokio::test] +// Tests from "timestamps.sql", +async fn test_cases_timestamps_sql() { + let input_path = Path::new("cases").join("in").join("timestamps.sql"); + let mut runner = Runner::new(); + runner + .run(input_path) + .await + .expect("test failed"); + runner + .flush() + .expect("flush worked"); } \ No newline at end of file diff --git a/query_tests/src/scenarios.rs b/query_tests/src/scenarios.rs index 5a218c2602..307a947bfe 100644 --- a/query_tests/src/scenarios.rs +++ b/query_tests/src/scenarios.rs @@ -57,6 +57,7 @@ pub fn get_all_setups() -> &'static HashMap<String, Arc<dyn DbSetup>> { register_setup!(OneMeasurementAllChunksDropped), register_setup!(ChunkOrder), register_setup!(ThreeDeleteThreeChunks), + register_setup!(OneMeasurementRealisticTimes), ] .into_iter() .map(|(name, setup)| (name.to_string(), setup as Arc<dyn DbSetup>)) diff --git a/query_tests/src/sql.rs b/query_tests/src/sql.rs index 4866895b00..9a3f04ef28 100644 --- a/query_tests/src/sql.rs +++ b/query_tests/src/sql.rs @@ -39,168 +39,6 @@ where } } -#[tokio::test] -async fn sql_select_from_cpu() { - let expected = vec![ - "+--------+--------------------------------+------+", - "| region | time | user |", - "+--------+--------------------------------+------+", - "| west | 1970-01-01T00:00:00.000000100Z | 23.2 |", - "| west | 1970-01-01T00:00:00.000000150Z | 21 |", - "+--------+--------------------------------+------+", - ]; - run_sql_test_case(TwoMeasurements {}, "SELECT * from cpu", &expected).await; -} - -// BUG: https://github.com/influxdata/influxdb_iox/issues/2776 -#[ignore] -#[tokio::test] -async fn sql_select_from_cpu_min_utf8() { - let expected = vec![ - "+----------------+", - "| MIN(cpu.region |", - "+----------------+", - "| west |", - "+----------------+", - ]; - run_sql_test_case(TwoMeasurements {}, "SELECT min(region) from cpu", &expected).await; -} - -#[tokio::test] -async fn sql_select_from_cpu_2021() { - let expected = vec![ - "+--------+----------------------+------+", - "| region | time | user |", - "+--------+----------------------+------+", - "| west | 2021-07-20T19:28:50Z | 23.2 |", - "| west | 2021-07-20T19:30:30Z | 21 |", - "+--------+----------------------+------+", - ]; - run_sql_test_case( - OneMeasurementRealisticTimes {}, - "SELECT * from cpu", - &expected, - ) - .await; -} - -#[tokio::test] -async fn sql_select_from_cpu_with_timestamp_predicate_explicit_utc() { - let expected = vec![ - "+--------+----------------------+------+", - "| region | time | user |", - "+--------+----------------------+------+", - "| west | 2021-07-20T19:30:30Z | 21 |", - "+--------+----------------------+------+", - ]; - - run_sql_test_case( - OneMeasurementRealisticTimes {}, - "SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20 19:28:50+00:00')", - &expected, - ) - .await; - - // Use RCF3339 format - run_sql_test_case( - OneMeasurementRealisticTimes {}, - "SELECT * FROM cpu WHERE time > to_timestamp('2021-07-20T19:28:50Z')", - &expected, - ) - .await; - - // use cast workaround - run_sql_test_case( - OneMeasurementRealisticTimes {}, - "SELECT * FROM cpu WHERE \ - CAST(time AS BIGINT) > CAST(to_timestamp('2021-07-20T19:28:50Z') AS BIGINT)", - &expected, - ) - .await; -} - -#[tokio::test] -async fn sql_select_from_cpu_with_projection() { - // expect that to get a subset of the columns and in the order specified - let expected = vec![ - "+------+--------+", - "| user | region |", - "+------+--------+", - "| 23.2 | west |", - "| 21 | west |", - "+------+--------+", - ]; - run_sql_test_case( - TwoMeasurements {}, - "SELECT user, region from cpu", - &expected, - ) - .await; -} - -#[tokio::test] -async fn sql_select_from_cpu_pred() { - let expected = vec![ - "+--------+--------------------------------+------+", - "| region | time | user |", - "+--------+--------------------------------+------+", - "| west | 1970-01-01T00:00:00.000000150Z | 21 |", - "+--------+--------------------------------+------+", - ]; - run_sql_test_case( - TwoMeasurements {}, - "SELECT * from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00')", - &expected, - ) - .await; -} - -#[tokio::test] -async fn sql_select_from_cpu_with_projection_and_pred() { - // expect that to get a subset of the columns and in the order specified - let expected = vec![ - "+------+--------+", - "| user | region |", - "+------+--------+", - "| 21 | west |", - "+------+--------+", - ]; - run_sql_test_case( - TwoMeasurements {}, - "SELECT user, region from cpu where time > to_timestamp('1970-01-01T00:00:00.000000120+00:00')", - &expected - ).await; -} - -#[tokio::test] -async fn sql_select_from_cpu_group() { - let expected = vec![ - "+-----------------+", - "| COUNT(UInt8(1)) |", - "+-----------------+", - "| 2 |", - "+-----------------+", - ]; - run_sql_test_case( - TwoMeasurements {}, - "SELECT count(*) from cpu group by region", - &expected, - ) - .await; -} - -#[tokio::test] -async fn sql_select_from_disk() { - let expected = vec![ - "+-------+--------+--------------------------------+", - "| bytes | region | time |", - "+-------+--------+--------------------------------+", - "| 99 | east | 1970-01-01T00:00:00.000000200Z |", - "+-------+--------+--------------------------------+", - ]; - run_sql_test_case(TwoMeasurements {}, "SELECT * from disk", &expected).await; -} - #[tokio::test] async fn sql_select_with_schema_merge() { let expected = vec![