From 5e74ae2ee74873dbf77bd9cdb9a38d2a8f8d037d Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 19 Jan 2021 11:12:59 -0500 Subject: [PATCH 01/10] refactor: Segments should know how to persist themselves to object storage --- server/src/buffer.rs | 95 ++++++++++++++++++++++++++++++++++---------- server/src/lib.rs | 70 +++++++++----------------------- 2 files changed, 91 insertions(+), 74 deletions(-) diff --git a/server/src/buffer.rs b/server/src/buffer.rs index ed78a7d06d..97fe6a41a2 100644 --- a/server/src/buffer.rs +++ b/server/src/buffer.rs @@ -3,9 +3,10 @@ use data_types::{ data::ReplicatedWrite, database_rules::{WalBufferRollover, WriterId}, + DatabaseName, }; use generated_types::wal; -use object_store::path::ObjectStorePath; +use object_store::{path::ObjectStorePath, ObjectStore}; use std::{ collections::BTreeMap, @@ -20,7 +21,7 @@ use chrono::{DateTime, Utc}; use crc32fast::Hasher; use data_types::database_rules::WalBufferConfig; use snafu::{ensure, OptionExt, ResultExt, Snafu}; -use tracing::warn; +use tracing::{error, info, warn}; #[derive(Debug, Snafu)] pub enum Error { @@ -352,6 +353,44 @@ impl Segment { *persisted } + /// Spawns a tokio task that will continuously try to persist the bytes to + /// the given object store location. + pub fn persist_bytes_in_background( + &self, + writer_id: u32, + db_name: &DatabaseName<'_>, + store: Arc, + ) -> Result<()> { + let data = self.to_file_bytes(writer_id)?; + let location = database_object_store_path(writer_id, db_name); + let location = object_store_path_for_segment(&location, self.id)?; + + let len = data.len(); + let mut stream_data = std::io::Result::Ok(data.clone()); + + tokio::task::spawn(async move { + while let Err(err) = store + .put( + &location, + futures::stream::once(async move { stream_data }), + len, + ) + .await + { + error!("error writing bytes to store: {}", err); + tokio::time::sleep(tokio::time::Duration::from_secs( + super::STORE_ERROR_PAUSE_SECONDS, + )) + .await; + stream_data = std::io::Result::Ok(data.clone()); + } + + info!("persisted data to {}", store.convert_path(&location)); + }); + + Ok(()) + } + // converts the segment to its flatbuffer bytes fn fb_bytes(&self, writer_id: u32) -> Vec { let mut fbb = flatbuffers::FlatBufferBuilder::new_with_capacity( @@ -468,7 +507,7 @@ const SEGMENT_FILE_EXTENSION: &str = ".segment"; /// Builds the path for a given segment id, given the root object store path. /// The path should be where the root of the database is (e.g. 1/my_db/). -pub fn object_store_path_for_segment( +fn object_store_path_for_segment( root_path: &ObjectStorePath, segment_id: u64, ) -> Result { @@ -494,12 +533,19 @@ pub fn object_store_path_for_segment( Ok(path) } +// base location in object store for a given database name +fn database_object_store_path(writer_id: u32, database_name: &DatabaseName<'_>) -> ObjectStorePath { + let mut path = ObjectStorePath::default(); + path.push_dir(format!("{}", writer_id)); + path.push_dir(database_name.to_string()); + path +} + #[cfg(test)] mod tests { use super::*; use data_types::{data::lines_to_replicated_write, database_rules::DatabaseRules}; use influxdb_line_protocol::parse_lines; - use object_store::path::cloud::CloudConverter; #[test] fn append_increments_current_size_and_uses_existing_segment() { @@ -806,33 +852,38 @@ mod tests { } #[test] - fn object_store_path_for_segment() { - let path = ObjectStorePath::from_cloud_unchecked("1/mydb"); - let segment_path = super::object_store_path_for_segment(&path, 23).unwrap(); - let segment_path = CloudConverter::convert(&segment_path); + fn valid_object_store_path_for_segment() { + let mut base_path = ObjectStorePath::default(); + base_path.push_all_dirs(&["1", "mydb"]); - assert_eq!(segment_path, "1/mydb/wal/000/000/023.segment"); + let segment_path = object_store_path_for_segment(&base_path, 23).unwrap(); + let mut expected_segment_path = base_path.clone(); + expected_segment_path.push_all_dirs(&["wal", "000", "000"]); + expected_segment_path.set_file_name("023.segment"); + assert_eq!(segment_path, expected_segment_path); - let segment_path = super::object_store_path_for_segment(&path, 20_003).unwrap(); - let segment_path = CloudConverter::convert(&segment_path); + let segment_path = object_store_path_for_segment(&base_path, 20_003).unwrap(); + let mut expected_segment_path = base_path.clone(); + expected_segment_path.push_all_dirs(&["wal", "000", "020"]); + expected_segment_path.set_file_name("003.segment"); + assert_eq!(segment_path, expected_segment_path); - assert_eq!(segment_path, "1/mydb/wal/000/020/003.segment"); - - let segment_path = super::object_store_path_for_segment(&path, 45_010_105).unwrap(); - let segment_path = CloudConverter::convert(&segment_path); - - assert_eq!(segment_path, "1/mydb/wal/045/010/105.segment"); + let segment_path = object_store_path_for_segment(&base_path, 45_010_105).unwrap(); + let mut expected_segment_path = base_path.clone(); + expected_segment_path.push_all_dirs(&["wal", "045", "010"]); + expected_segment_path.set_file_name("105.segment"); + assert_eq!(segment_path, expected_segment_path); } #[test] fn object_store_path_for_segment_out_of_bounds() { - let path = ObjectStorePath::from_cloud_unchecked("1/mydb"); - let segment_path = super::object_store_path_for_segment(&path, 0) - .err() - .unwrap(); + let mut base_path = ObjectStorePath::default(); + base_path.push_all_dirs(&["1", "mydb"]); + + let segment_path = object_store_path_for_segment(&base_path, 0).err().unwrap(); matches!(segment_path, Error::SegmentIdOutOfBounds); - let segment_path = super::object_store_path_for_segment(&path, 23_000_000_000) + let segment_path = object_store_path_for_segment(&base_path, 23_000_000_000) .err() .unwrap(); matches!(segment_path, Error::SegmentIdOutOfBounds); diff --git a/server/src/lib.rs b/server/src/lib.rs index 898871458c..644be88326 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -93,7 +93,7 @@ use async_trait::async_trait; use bytes::Bytes; use futures::stream::TryStreamExt; use snafu::{OptionExt, ResultExt, Snafu}; -use tracing::{error, info}; +use tracing::error; type DatabaseError = Box; @@ -139,6 +139,8 @@ pub enum Error { pub type Result = std::result::Result; +const STORE_ERROR_PAUSE_SECONDS: u64 = 100; + /// `Server` is the container struct for how servers store data internally, as /// well as how they communicate with other servers. Each server will have one /// of these structs, which keeps track of all replication and query rules. @@ -186,7 +188,7 @@ impl Server { mut rules: DatabaseRules, ) -> Result<()> { // Return an error if this server hasn't yet been setup with an id - let id = self.require_id()?; + self.require_id()?; let name = db_name.into(); let db_name = DatabaseName::new(name.clone()).context(InvalidDatabaseName)?; @@ -197,10 +199,8 @@ impl Server { let data = Bytes::from(serde_json::to_vec(&db_reservation.db.rules).context(ErrorSerializing)?); let len = data.len(); - let location = object_store_path_for_database_config( - &server_object_store_path(id), - &db_reservation.name, - ); + let location = + object_store_path_for_database_config(&self.root_path()?, &db_reservation.name); let stream_data = std::io::Result::Ok(data); self.store @@ -217,19 +217,25 @@ impl Server { Ok(()) } + // base location in object store for this writer + fn root_path(&self) -> Result { + let id = self.require_id()?; + + let mut path = ObjectStorePath::default(); + path.push_dir(format!("{}", id)); + Ok(path) + } + /// Loads the database configurations based on the databases in the /// object store. Any databases in the config already won't be /// replaced. pub async fn load_database_configs(&self) -> Result<()> { - let id = self.require_id()?; - let root_path = server_object_store_path(id); - // get the database names from the object store prefixes // TODO: update object store to pull back all common prefixes by // following the next tokens. let list_result = self .store - .list_with_delimiter(&root_path) + .list_with_delimiter(&self.root_path()?) .await .context(StoreError)?; @@ -345,12 +351,10 @@ impl Server { if let Some(segment) = segment { if persist { let writer_id = self.require_id()?; - let data = segment.to_file_bytes(writer_id).context(WalError)?; let store = self.store.clone(); - let location = database_object_store_path(writer_id, db_name); - let location = buffer::object_store_path_for_segment(&location, segment.id) + segment + .persist_bytes_in_background(writer_id, db_name, store) .context(WalError)?; - persist_bytes_in_background(data, store, location); } } } @@ -522,44 +526,6 @@ impl RemoteServer for RemoteServerImpl { } } -// base location in object store for a given database name -fn database_object_store_path(writer_id: u32, database_name: &DatabaseName<'_>) -> ObjectStorePath { - let mut path = ObjectStorePath::default(); - path.push_dir(format!("{}", writer_id)); - path.push_dir(database_name.to_string()); - path -} - -fn server_object_store_path(writer_id: u32) -> ObjectStorePath { - ObjectStorePath::from_cloud_unchecked(format!("{}", writer_id)) -} - -const STORE_ERROR_PAUSE_SECONDS: u64 = 100; - -/// Spawns a tokio task that will continuously try to persist the bytes to the -/// given object store location. -fn persist_bytes_in_background(data: Bytes, store: Arc, location: ObjectStorePath) { - let len = data.len(); - let mut stream_data = std::io::Result::Ok(data.clone()); - - tokio::task::spawn(async move { - while let Err(err) = store - .put( - &location, - futures::stream::once(async move { stream_data }), - len, - ) - .await - { - error!("error writing bytes to store: {}", err); - tokio::time::sleep(tokio::time::Duration::from_secs(STORE_ERROR_PAUSE_SECONDS)).await; - stream_data = std::io::Result::Ok(data.clone()); - } - - info!("persisted data to {}", store.convert_path(&location)); - }); -} - // get bytes from the location in object store async fn get_store_bytes( location: &ObjectStorePath, From fdbe602e572f5c6540ec50bbb8d7c2cc3c3c1d20 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 19 Jan 2021 10:44:42 -0500 Subject: [PATCH 02/10] refactor: Always get a path to build from the object store --- object_store/src/aws.rs | 46 ++++++++++++++++++++------------ object_store/src/azure.rs | 5 ++++ object_store/src/disk.rs | 16 +++++++---- object_store/src/gcp.rs | 37 +++++++++++++++++-------- object_store/src/lib.rs | 31 +++++++++++++++------ object_store/src/memory.rs | 8 +++++- server/src/buffer.rs | 17 ++++++++---- server/src/config.rs | 5 +++- server/src/lib.rs | 20 +++++++------- server/src/snapshot.rs | 8 +++--- src/influxdb_ioxd/http_routes.rs | 7 ++--- 11 files changed, 136 insertions(+), 64 deletions(-) diff --git a/object_store/src/aws.rs b/object_store/src/aws.rs index 9bd3f2307d..db1010723a 100644 --- a/object_store/src/aws.rs +++ b/object_store/src/aws.rs @@ -54,6 +54,11 @@ impl AmazonS3 { } } + /// Return a new location path appropriate for this object storage + pub fn new_path(&self) -> ObjectStorePath { + ObjectStorePath::default() + } + /// Save the provided bytes to the specified location. pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> where @@ -309,7 +314,6 @@ impl Error { #[cfg(test)] mod tests { use crate::{ - path::ObjectStorePath, tests::{get_nonexistent_object, list_with_delimiter, put_get_delete_list}, AmazonS3, Error, ObjectStore, }; @@ -417,9 +421,10 @@ mod tests { let (_, bucket_name) = region_and_bucket_name()?; let region = rusoto_core::Region::UsWest1; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); - let err = get_nonexistent_object(&integration, Some(location_name)) + let err = get_nonexistent_object(&integration, Some(location)) .await .unwrap_err(); if let Some(Error::UnableToListDataFromS3 { source, bucket }) = @@ -439,9 +444,10 @@ mod tests { maybe_skip_integration!(); let (region, bucket_name) = region_and_bucket_name()?; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); - let err = get_nonexistent_object(&integration, Some(location_name)) + let err = get_nonexistent_object(&integration, Some(location)) .await .unwrap_err(); if let Some(Error::UnableToGetDataFromS3 { @@ -469,9 +475,10 @@ mod tests { let (region, _) = region_and_bucket_name()?; let bucket_name = NON_EXISTENT_NAME; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); - let err = get_nonexistent_object(&integration, Some(location_name)) + let err = get_nonexistent_object(&integration, Some(location)) .await .unwrap_err(); if let Some(Error::UnableToListDataFromS3 { source, bucket }) = @@ -496,13 +503,14 @@ mod tests { let (_, bucket_name) = region_and_bucket_name()?; let region = rusoto_core::Region::UsWest1; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); let data = Bytes::from("arbitrary data"); let stream_data = std::io::Result::Ok(data.clone()); let err = integration .put( - &location_name, + &location, futures::stream::once(async move { stream_data }), data.len(), ) @@ -531,13 +539,14 @@ mod tests { let (region, _) = region_and_bucket_name()?; let bucket_name = NON_EXISTENT_NAME; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); let data = Bytes::from("arbitrary data"); let stream_data = std::io::Result::Ok(data.clone()); let err = integration .put( - &location_name, + &location, futures::stream::once(async move { stream_data }), data.len(), ) @@ -565,9 +574,10 @@ mod tests { maybe_skip_integration!(); let (region, bucket_name) = region_and_bucket_name()?; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); - let result = integration.delete(&location_name).await; + let result = integration.delete(&location).await; assert!(result.is_ok()); @@ -581,9 +591,10 @@ mod tests { let (_, bucket_name) = region_and_bucket_name()?; let region = rusoto_core::Region::UsWest1; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); - let err = integration.delete(&location_name).await.unwrap_err(); + let err = integration.delete(&location).await.unwrap_err(); if let Error::UnableToDeleteDataFromS3 { source, bucket, @@ -606,9 +617,10 @@ mod tests { let (region, _) = region_and_bucket_name()?; let bucket_name = NON_EXISTENT_NAME; let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name)); - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); - let err = integration.delete(&location_name).await.unwrap_err(); + let err = integration.delete(&location).await.unwrap_err(); if let Error::UnableToDeleteDataFromS3 { source, bucket, diff --git a/object_store/src/azure.rs b/object_store/src/azure.rs index e0c9e9d011..19129cd9b5 100644 --- a/object_store/src/azure.rs +++ b/object_store/src/azure.rs @@ -65,6 +65,11 @@ impl MicrosoftAzure { Self::new(account, master_key, container_name) } + /// Return a new location path appropriate for this object storage + pub fn new_path(&self) -> ObjectStorePath { + ObjectStorePath::default() + } + /// Save the provided bytes to the specified location. pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> where diff --git a/object_store/src/disk.rs b/object_store/src/disk.rs index f28cf4012e..e106f1c6c5 100644 --- a/object_store/src/disk.rs +++ b/object_store/src/disk.rs @@ -34,6 +34,11 @@ impl File { FileConverter::convert(&path) } + /// Return a new location path appropriate for this object storage + pub fn new_path(&self) -> ObjectStorePath { + ObjectStorePath::default() + } + /// Save the provided bytes to the specified location. pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> where @@ -161,7 +166,8 @@ mod tests { let integration = ObjectStore::new_file(File::new(root.path())); let bytes = stream::once(async { Ok(Bytes::from("hello world")) }); - let location = ObjectStorePath::from_path_buf_unchecked("junk"); + let mut location = integration.new_path(); + location.set_file_name("junk"); let res = integration.put(&location, bytes, 0).await; assert!(matches!( @@ -178,14 +184,14 @@ mod tests { #[tokio::test] async fn creates_dir_if_not_present() -> Result<()> { let root = TempDir::new()?; - let storage = ObjectStore::new_file(File::new(root.path())); + let integration = ObjectStore::new_file(File::new(root.path())); let data = Bytes::from("arbitrary data"); - let mut location = ObjectStorePath::default(); + let mut location = integration.new_path(); location.push_all_dirs(&["nested", "file", "test_file"]); let stream_data = std::io::Result::Ok(data.clone()); - storage + integration .put( &location, futures::stream::once(async move { stream_data }), @@ -193,7 +199,7 @@ mod tests { ) .await?; - let read_data = storage + let read_data = integration .get(&location) .await? .map_ok(|b| bytes::BytesMut::from(&b[..])) diff --git a/object_store/src/gcp.rs b/object_store/src/gcp.rs index 3a2c40f02c..8948bfaada 100644 --- a/object_store/src/gcp.rs +++ b/object_store/src/gcp.rs @@ -24,6 +24,11 @@ impl GoogleCloudStorage { } } + /// Return a new location path appropriate for this object storage + pub fn new_path(&self) -> ObjectStorePath { + ObjectStorePath::default() + } + /// Save the provided bytes to the specified location. pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> where @@ -142,7 +147,6 @@ impl GoogleCloudStorage { #[cfg(test)] mod test { use crate::{ - path::ObjectStorePath, tests::{get_nonexistent_object, put_get_delete_list}, Error, GoogleCloudStorage, ObjectStore, }; @@ -196,11 +200,13 @@ mod test { async fn gcs_test_get_nonexistent_location() -> Result<()> { maybe_skip_integration!(); let bucket_name = bucket_name()?; - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); let integration = ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name)); - let result = get_nonexistent_object(&integration, Some(location_name)).await?; + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); + + let result = get_nonexistent_object(&integration, Some(location)).await?; assert_eq!( result, @@ -217,11 +223,13 @@ mod test { async fn gcs_test_get_nonexistent_bucket() -> Result<()> { maybe_skip_integration!(); let bucket_name = NON_EXISTENT_NAME; - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); let integration = ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name)); - let result = get_nonexistent_object(&integration, Some(location_name)).await?; + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); + + let result = get_nonexistent_object(&integration, Some(location)).await?; assert_eq!(result, Bytes::from("Not Found")); @@ -232,11 +240,13 @@ mod test { async fn gcs_test_delete_nonexistent_location() -> Result<()> { maybe_skip_integration!(); let bucket_name = bucket_name()?; - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); let integration = ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name)); - let err = integration.delete(&location_name).await.unwrap_err(); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); + + let err = integration.delete(&location).await.unwrap_err(); if let Error::UnableToDeleteDataFromGcs { source, @@ -258,11 +268,13 @@ mod test { async fn gcs_test_delete_nonexistent_bucket() -> Result<()> { maybe_skip_integration!(); let bucket_name = NON_EXISTENT_NAME; - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); let integration = ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name)); - let err = integration.delete(&location_name).await.unwrap_err(); + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); + + let err = integration.delete(&location).await.unwrap_err(); if let Error::UnableToDeleteDataFromGcs { source, @@ -284,15 +296,18 @@ mod test { async fn gcs_test_put_nonexistent_bucket() -> Result<()> { maybe_skip_integration!(); let bucket_name = NON_EXISTENT_NAME; - let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME); let integration = ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name)); + + let mut location = integration.new_path(); + location.set_file_name(NON_EXISTENT_NAME); + let data = Bytes::from("arbitrary data"); let stream_data = std::io::Result::Ok(data.clone()); let result = integration .put( - &location_name, + &location, futures::stream::once(async move { stream_data }), data.len(), ) diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 28fd6af497..78039664e0 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -65,6 +65,18 @@ impl ObjectStore { Self(ObjectStoreIntegration::MicrosoftAzure(Box::new(azure))) } + /// Return a new location path appropriate for this object storage + pub fn new_path(&self) -> ObjectStorePath { + use ObjectStoreIntegration::*; + match &self.0 { + AmazonS3(s3) => s3.new_path(), + GoogleCloudStorage(gcs) => gcs.new_path(), + InMemory(in_mem) => in_mem.new_path(), + File(file) => file.new_path(), + MicrosoftAzure(azure) => azure.new_path(), + } + } + /// Save the provided bytes to the specified location. pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> where @@ -368,7 +380,7 @@ mod tests { ); let data = Bytes::from("arbitrary data"); - let mut location = ObjectStorePath::default(); + let mut location = storage.new_path(); location.push_dir("test_dir"); location.set_file_name("test_file.json"); @@ -386,13 +398,13 @@ mod tests { assert_eq!(content_list, &[location.clone()]); // List everything starting with a prefix that should return results - let mut prefix = ObjectStorePath::default(); + let mut prefix = storage.new_path(); prefix.push_dir("test_dir"); let content_list = flatten_list_stream(storage, Some(&prefix)).await?; assert_eq!(content_list, &[location.clone()]); // List everything starting with a prefix that shouldn't return results - let mut prefix = ObjectStorePath::default(); + let mut prefix = storage.new_path(); prefix.push_dir("something"); let content_list = flatten_list_stream(storage, Some(&prefix)).await?; assert!(content_list.is_empty()); @@ -447,7 +459,7 @@ mod tests { .unwrap(); } - let mut prefix = ObjectStorePath::default(); + let mut prefix = storage.new_path(); prefix.push_all_dirs(&["mydb", "wal"]); let mut expected_000 = prefix.clone(); @@ -469,11 +481,11 @@ mod tests { assert!(object.last_modified > time_before_creation); // List with a prefix containing a partial "file name" - let mut prefix = ObjectStorePath::default(); + let mut prefix = storage.new_path(); prefix.push_all_dirs(&["mydb", "wal", "000", "000"]); prefix.set_file_name("001"); - let mut expected_location = ObjectStorePath::default(); + let mut expected_location = storage.new_path(); expected_location.push_all_dirs(&["mydb", "wal", "000", "000"]); expected_location.set_file_name("001.segment"); @@ -499,8 +511,11 @@ mod tests { storage: &ObjectStore, location: Option, ) -> Result { - let location = location - .unwrap_or_else(|| ObjectStorePath::from_cloud_unchecked("this_file_should_not_exist")); + let location = location.unwrap_or_else(|| { + let mut loc = storage.new_path(); + loc.set_file_name("this_file_should_not_exist"); + loc + }); let content_list = flatten_list_stream(storage, Some(&location)).await?; assert!(content_list.is_empty()); diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs index 0321d84614..f863db82e8 100644 --- a/object_store/src/memory.rs +++ b/object_store/src/memory.rs @@ -36,6 +36,11 @@ impl InMemory { } } + /// Return a new location path appropriate for this object storage + pub fn new_path(&self) -> ObjectStorePath { + ObjectStorePath::default() + } + /// Save the provided bytes to the specified location. pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> where @@ -186,7 +191,8 @@ mod tests { let integration = ObjectStore::new_in_memory(InMemory::new()); let bytes = stream::once(async { Ok(Bytes::from("hello world")) }); - let location = ObjectStorePath::from_cloud_unchecked("junk"); + let mut location = integration.new_path(); + location.set_file_name("junk"); let res = integration.put(&location, bytes, 0).await; assert!(matches!( diff --git a/server/src/buffer.rs b/server/src/buffer.rs index 97fe6a41a2..dd52ab9deb 100644 --- a/server/src/buffer.rs +++ b/server/src/buffer.rs @@ -362,7 +362,7 @@ impl Segment { store: Arc, ) -> Result<()> { let data = self.to_file_bytes(writer_id)?; - let location = database_object_store_path(writer_id, db_name); + let location = database_object_store_path(writer_id, db_name, &store); let location = object_store_path_for_segment(&location, self.id)?; let len = data.len(); @@ -534,8 +534,12 @@ fn object_store_path_for_segment( } // base location in object store for a given database name -fn database_object_store_path(writer_id: u32, database_name: &DatabaseName<'_>) -> ObjectStorePath { - let mut path = ObjectStorePath::default(); +fn database_object_store_path( + writer_id: u32, + database_name: &DatabaseName<'_>, + store: &ObjectStore, +) -> ObjectStorePath { + let mut path = store.new_path(); path.push_dir(format!("{}", writer_id)); path.push_dir(database_name.to_string()); path @@ -546,6 +550,7 @@ mod tests { use super::*; use data_types::{data::lines_to_replicated_write, database_rules::DatabaseRules}; use influxdb_line_protocol::parse_lines; + use object_store::memory::InMemory; #[test] fn append_increments_current_size_and_uses_existing_segment() { @@ -853,7 +858,8 @@ mod tests { #[test] fn valid_object_store_path_for_segment() { - let mut base_path = ObjectStorePath::default(); + let storage = ObjectStore::new_in_memory(InMemory::new()); + let mut base_path = storage.new_path(); base_path.push_all_dirs(&["1", "mydb"]); let segment_path = object_store_path_for_segment(&base_path, 23).unwrap(); @@ -877,7 +883,8 @@ mod tests { #[test] fn object_store_path_for_segment_out_of_bounds() { - let mut base_path = ObjectStorePath::default(); + let storage = ObjectStore::new_in_memory(InMemory::new()); + let mut base_path = storage.new_path(); base_path.push_all_dirs(&["1", "mydb"]); let segment_path = object_store_path_for_segment(&base_path, 0).err().unwrap(); diff --git a/server/src/config.rs b/server/src/config.rs index 0ce1e8d056..6e942b0f36 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -137,6 +137,7 @@ impl<'a> Drop for CreateDatabaseHandle<'a> { mod test { use super::*; use object_store::path::cloud::CloudConverter; + use object_store::{memory::InMemory, ObjectStore}; #[test] fn create_db() { @@ -157,7 +158,9 @@ mod test { #[test] fn object_store_path_for_database_config() { - let path = ObjectStorePath::from_cloud_unchecked("1"); + let storage = ObjectStore::new_in_memory(InMemory::new()); + let mut path = storage.new_path(); + path.push_dir("1"); let name = DatabaseName::new("foo").unwrap(); let rules_path = super::object_store_path_for_database_config(&path, &name); let rules_path = CloudConverter::convert(&rules_path); diff --git a/server/src/lib.rs b/server/src/lib.rs index 644be88326..9074cbe539 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -221,7 +221,7 @@ impl Server { fn root_path(&self) -> Result { let id = self.require_id()?; - let mut path = ObjectStorePath::default(); + let mut path = self.store.new_path(); path.push_dir(format!("{}", id)); Ok(path) } @@ -610,11 +610,13 @@ mod tests { .await .expect("failed to create database"); + let mut rules_path = store.new_path(); + rules_path.push_all_dirs(&["1", name]); + rules_path.set_file_name("rules.json"); + let read_data = server .store - .get(&ObjectStorePath::from_cloud_unchecked( - "1/bananas/rules.json", - )) + .get(&rules_path) .await .unwrap() .map_ok(|b| bytes::BytesMut::from(&b[..])) @@ -633,10 +635,7 @@ mod tests { .await .expect("failed to create 2nd db"); - store - .list_with_delimiter(&ObjectStorePath::from_cloud_unchecked("")) - .await - .unwrap(); + store.list_with_delimiter(&store.new_path()).await.unwrap(); let manager = TestConnectionManager::new(); let server2 = Server::new(manager, store); @@ -895,7 +894,10 @@ partition_key: // write lines should have caused a segment rollover and persist, wait tokio::task::yield_now().await; - let path = ObjectStorePath::from_cloud_unchecked("1/my_db/wal/000/000/001.segment"); + let mut path = store.new_path(); + path.push_all_dirs(&["1", "my_db", "wal", "000", "000"]); + path.set_file_name("001.segment"); + let data = store .get(&path) .await diff --git a/server/src/snapshot.rs b/server/src/snapshot.rs index ec7857c2a1..6eadf0fae3 100644 --- a/server/src/snapshot.rs +++ b/server/src/snapshot.rs @@ -365,10 +365,10 @@ mem,host=A,region=west used=45 1 let store = Arc::new(ObjectStore::new_in_memory(InMemory::new())); let chunk = Arc::new(chunk); let (tx, rx) = tokio::sync::oneshot::channel(); - let mut metadata_path = ObjectStorePath::default(); + let mut metadata_path = store.new_path(); metadata_path.push_dir("meta"); - let mut data_path = ObjectStorePath::default(); + let mut data_path = store.new_path(); data_path.push_dir("data"); let snapshot = snapshot_chunk( @@ -418,10 +418,10 @@ mem,host=A,region=west used=45 1 let store = Arc::new(ObjectStore::new_in_memory(InMemory::new())); let chunk = Arc::new(ChunkWB::new(11)); - let mut metadata_path = ObjectStorePath::default(); + let mut metadata_path = store.new_path(); metadata_path.push_dir("meta"); - let mut data_path = ObjectStorePath::default(); + let mut data_path = store.new_path(); data_path.push_dir("data"); let snapshot = Snapshot::new("testaroo", metadata_path, data_path, store, chunk, tables); diff --git a/src/influxdb_ioxd/http_routes.rs b/src/influxdb_ioxd/http_routes.rs index a416e390b5..60a2134df5 100644 --- a/src/influxdb_ioxd/http_routes.rs +++ b/src/influxdb_ioxd/http_routes.rs @@ -17,7 +17,6 @@ use data_types::{ DatabaseName, }; use influxdb_line_protocol::parse_lines; -use object_store::path::ObjectStorePath; use query::{frontend::sql::SQLQueryPlanner, Database, DatabaseStore}; use server::{ConnectionManager, Server as AppServer}; @@ -715,7 +714,9 @@ async fn snapshot_partition Date: Tue, 19 Jan 2021 14:56:44 -0500 Subject: [PATCH 03/10] fix: InMemory doesn't need pagination --- object_store/src/aws.rs | 40 +++--- object_store/src/azure.rs | 29 +++-- object_store/src/disk.rs | 34 +++--- object_store/src/gcp.rs | 34 +++--- object_store/src/lib.rs | 202 +++++++++++++++++++++---------- object_store/src/memory.rs | 56 ++++----- object_store/src/path.rs | 113 +++++++++++------ object_store/src/path/cloud.rs | 28 ++--- object_store/src/path/file.rs | 6 +- object_store/src/path/parsed.rs | 59 ++++++--- server/src/buffer.rs | 9 +- server/src/config.rs | 23 ++-- server/src/lib.rs | 10 +- server/src/snapshot.rs | 20 ++- src/influxdb_ioxd/http_routes.rs | 2 + 15 files changed, 392 insertions(+), 273 deletions(-) diff --git a/object_store/src/aws.rs b/object_store/src/aws.rs index db1010723a..324bdbdf4b 100644 --- a/object_store/src/aws.rs +++ b/object_store/src/aws.rs @@ -1,7 +1,7 @@ //! This module contains the IOx implementation for using S3 as the object //! store. use crate::{ - path::{cloud::CloudConverter, ObjectStorePath, DELIMITER}, + path::{cloud::CloudConverter, DELIMITER}, Error, ListResult, NoDataFromS3, ObjectMeta, Result, UnableToDeleteDataFromS3, UnableToGetDataFromS3, UnableToGetPieceOfDataFromS3, UnableToPutDataToS3, }; @@ -55,12 +55,12 @@ impl AmazonS3 { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> ObjectStorePath { - ObjectStorePath::default() + pub fn new_path(&self) -> crate::path::Path { + crate::path::Path::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -68,7 +68,7 @@ impl AmazonS3 { let put_request = rusoto_s3::PutObjectRequest { bucket: self.bucket_name.clone(), - key: CloudConverter::convert(&location), + key: CloudConverter::convert(&location.inner), body: Some(bytes), ..Default::default() }; @@ -78,7 +78,7 @@ impl AmazonS3 { .await .context(UnableToPutDataToS3 { bucket: &self.bucket_name, - location: CloudConverter::convert(&location), + location: CloudConverter::convert(&location.inner), })?; Ok(()) } @@ -86,9 +86,9 @@ impl AmazonS3 { /// Return the bytes that are stored at the specified location. pub async fn get( &self, - location: &ObjectStorePath, + location: &crate::path::Path, ) -> Result>> { - let key = CloudConverter::convert(&location); + let key = CloudConverter::convert(&location.inner); let get_request = rusoto_s3::GetObjectRequest { bucket: self.bucket_name.clone(), key: key.clone(), @@ -115,8 +115,8 @@ impl AmazonS3 { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> { - let key = CloudConverter::convert(&location); + pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { + let key = CloudConverter::convert(&location.inner); let delete_request = rusoto_s3::DeleteObjectRequest { bucket: self.bucket_name.clone(), key: key.clone(), @@ -136,8 +136,8 @@ impl AmazonS3 { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a ObjectStorePath>, - ) -> Result>> + 'a> { + prefix: Option<&'a crate::path::Path>, + ) -> Result>> + 'a> { #[derive(Clone)] enum ListState { Start, @@ -149,7 +149,7 @@ impl AmazonS3 { Ok(stream::unfold(ListState::Start, move |state| async move { let mut list_request = rusoto_s3::ListObjectsV2Request { bucket: self.bucket_name.clone(), - prefix: prefix.map(CloudConverter::convert), + prefix: prefix.map(|p| CloudConverter::convert(&p.inner)), ..Default::default() }; @@ -181,7 +181,7 @@ impl AmazonS3 { let contents = resp.contents.unwrap_or_default(); let names = contents .into_iter() - .flat_map(|object| object.key.map(ObjectStorePath::from_cloud_unchecked)) + .flat_map(|object| object.key.map(crate::path::Path::from_cloud_unchecked)) .collect(); // The AWS response contains a field named `is_truncated` as well as @@ -202,10 +202,10 @@ impl AmazonS3 { /// common prefixes (directories) in addition to object metadata. pub async fn list_with_delimiter<'a>( &'a self, - prefix: &'a ObjectStorePath, + prefix: &'a crate::path::Path, next_token: &Option, - ) -> Result { - let converted_prefix = CloudConverter::convert(prefix); + ) -> Result> { + let converted_prefix = CloudConverter::convert(&prefix.inner); let mut list_request = rusoto_s3::ListObjectsV2Request { bucket: self.bucket_name.clone(), @@ -233,7 +233,7 @@ impl AmazonS3 { let objects: Vec<_> = contents .into_iter() .map(|object| { - let location = ObjectStorePath::from_cloud_unchecked( + let location = crate::path::Path::from_cloud_unchecked( object.key.expect("object doesn't exist without a key"), ); let last_modified = match object.last_modified { @@ -265,7 +265,7 @@ impl AmazonS3 { .unwrap_or_default() .into_iter() .map(|p| { - ObjectStorePath::from_cloud_unchecked( + crate::path::Path::from_cloud_unchecked( p.prefix.expect("can't have a prefix without a value"), ) }) @@ -315,7 +315,7 @@ impl Error { mod tests { use crate::{ tests::{get_nonexistent_object, list_with_delimiter, put_get_delete_list}, - AmazonS3, Error, ObjectStore, + AmazonS3, Error, ObjectStore, ObjectStorePath, }; use bytes::Bytes; use std::env; diff --git a/object_store/src/azure.rs b/object_store/src/azure.rs index 19129cd9b5..8b1dfd20a0 100644 --- a/object_store/src/azure.rs +++ b/object_store/src/azure.rs @@ -1,9 +1,8 @@ //! This module contains the IOx implementation for using Azure Blob storage as //! the object store. use crate::{ - path::{cloud::CloudConverter, ObjectStorePath}, - DataDoesNotMatchLength, Result, UnableToDeleteDataFromAzure, UnableToGetDataFromAzure, - UnableToListDataFromAzure, UnableToPutDataToAzure, + path::cloud::CloudConverter, DataDoesNotMatchLength, Result, UnableToDeleteDataFromAzure, + UnableToGetDataFromAzure, UnableToListDataFromAzure, UnableToPutDataToAzure, }; use azure_core::HttpClient; use azure_storage::{ @@ -66,16 +65,16 @@ impl MicrosoftAzure { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> ObjectStorePath { - ObjectStorePath::default() + pub fn new_path(&self) -> crate::path::Path { + crate::path::Path::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { - let location = CloudConverter::convert(&location); + let location = CloudConverter::convert(&location.inner); let temporary_non_streaming = bytes .map_ok(|b| bytes::BytesMut::from(&b[..])) .try_concat() @@ -105,10 +104,10 @@ impl MicrosoftAzure { /// Return the bytes that are stored at the specified location. pub async fn get( &self, - location: &ObjectStorePath, + location: &crate::path::Path, ) -> Result>> { let container_client = self.container_client.clone(); - let location = CloudConverter::convert(&location); + let location = CloudConverter::convert(&location.inner); Ok(async move { container_client .as_blob_client(&location) @@ -124,8 +123,8 @@ impl MicrosoftAzure { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> { - let location = CloudConverter::convert(&location); + pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { + let location = CloudConverter::convert(&location.inner); self.container_client .as_blob_client(&location) .delete() @@ -142,8 +141,8 @@ impl MicrosoftAzure { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a ObjectStorePath>, - ) -> Result>> + 'a> { + prefix: Option<&'a crate::path::Path>, + ) -> Result>> + 'a> { #[derive(Clone)] enum ListState { Start, @@ -154,7 +153,7 @@ impl MicrosoftAzure { Ok(stream::unfold(ListState::Start, move |state| async move { let mut request = self.container_client.list_blobs(); - let prefix = prefix.map(CloudConverter::convert); + let prefix = prefix.map(|p| CloudConverter::convert(&p.inner)); if let Some(ref p) = prefix { request = request.prefix(p as &str); } @@ -184,7 +183,7 @@ impl MicrosoftAzure { .incomplete_vector .vector .into_iter() - .map(|blob| ObjectStorePath::from_cloud_unchecked(blob.name)) + .map(|blob| crate::path::Path::from_cloud_unchecked(blob.name)) .collect(); Some((Ok(names), next_state)) diff --git a/object_store/src/disk.rs b/object_store/src/disk.rs index e106f1c6c5..c8fd09caa1 100644 --- a/object_store/src/disk.rs +++ b/object_store/src/disk.rs @@ -1,9 +1,9 @@ //! This module contains the IOx implementation for using local disk as the //! object store. use crate::{ - path::{file::FileConverter, ObjectStorePath}, - DataDoesNotMatchLength, Result, UnableToCopyDataToFile, UnableToCreateDir, UnableToCreateFile, - UnableToDeleteFile, UnableToOpenFile, UnableToPutDataInMemory, UnableToReadBytes, + path::file::FileConverter, DataDoesNotMatchLength, Result, UnableToCopyDataToFile, + UnableToCreateDir, UnableToCreateFile, UnableToDeleteFile, UnableToOpenFile, + UnableToPutDataInMemory, UnableToReadBytes, }; use bytes::Bytes; use futures::{stream, Stream, TryStreamExt}; @@ -17,30 +17,30 @@ use walkdir::WalkDir; /// cloud storage provider. #[derive(Debug)] pub struct File { - root: ObjectStorePath, + root: crate::path::Path, } impl File { /// Create new filesystem storage. pub fn new(root: impl Into) -> Self { Self { - root: ObjectStorePath::from_path_buf_unchecked(root), + root: crate::path::Path::from_path_buf_unchecked(root), } } - fn path(&self, location: &ObjectStorePath) -> PathBuf { + fn path(&self, location: &crate::path::Path) -> PathBuf { let mut path = self.root.clone(); path.push_path(location); - FileConverter::convert(&path) + FileConverter::convert(&path.inner) } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> ObjectStorePath { - ObjectStorePath::default() + pub fn new_path(&self) -> crate::path::Path { + crate::path::Path::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -88,7 +88,7 @@ impl File { /// Return the bytes that are stored at the specified location. pub async fn get( &self, - location: &ObjectStorePath, + location: &crate::path::Path, ) -> Result>> { let path = self.path(location); @@ -103,7 +103,7 @@ impl File { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> { + pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { let path = self.path(location); fs::remove_file(&path) .await @@ -114,9 +114,9 @@ impl File { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a ObjectStorePath>, - ) -> Result>> + 'a> { - let root_path = FileConverter::convert(&self.root); + prefix: Option<&'a crate::path::Path>, + ) -> Result>> + 'a> { + let root_path = FileConverter::convert(&self.root.inner); let walkdir = WalkDir::new(&root_path) // Don't include the root directory itself .min_depth(1); @@ -129,7 +129,7 @@ impl File { let relative_path = file.path().strip_prefix(&root_path).expect( "Must start with root path because this came from walking the root", ); - ObjectStorePath::from_path_buf_unchecked(relative_path) + crate::path::Path::from_path_buf_unchecked(relative_path) }) .filter(|name| prefix.map_or(true, |p| name.prefix_matches(p))) .map(|name| Ok(vec![name])) @@ -148,7 +148,7 @@ mod tests { use tempfile::TempDir; - use crate::{tests::put_get_delete_list, Error, ObjectStore}; + use crate::{tests::put_get_delete_list, Error, ObjectStore, ObjectStorePath}; use futures::stream; #[tokio::test] diff --git a/object_store/src/gcp.rs b/object_store/src/gcp.rs index 8948bfaada..6d7eb8c65c 100644 --- a/object_store/src/gcp.rs +++ b/object_store/src/gcp.rs @@ -1,9 +1,9 @@ //! This module contains the IOx implementation for using Google Cloud Storage //! as the object store. use crate::{ - path::{cloud::CloudConverter, ObjectStorePath}, - DataDoesNotMatchLength, Result, UnableToDeleteDataFromGcs, UnableToGetDataFromGcs, - UnableToListDataFromGcs, UnableToListDataFromGcs2, UnableToPutDataToGcs, + path::cloud::CloudConverter, DataDoesNotMatchLength, Result, UnableToDeleteDataFromGcs, + UnableToGetDataFromGcs, UnableToListDataFromGcs, UnableToListDataFromGcs2, + UnableToPutDataToGcs, }; use bytes::Bytes; use futures::{stream, Stream, StreamExt, TryStreamExt}; @@ -25,12 +25,12 @@ impl GoogleCloudStorage { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> ObjectStorePath { - ObjectStorePath::default() + pub fn new_path(&self) -> crate::path::Path { + crate::path::Path::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -49,7 +49,7 @@ impl GoogleCloudStorage { } ); - let location = CloudConverter::convert(&location); + let location = CloudConverter::convert(&location.inner); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -71,9 +71,9 @@ impl GoogleCloudStorage { /// Return the bytes that are stored at the specified location. pub async fn get( &self, - location: &ObjectStorePath, + location: &crate::path::Path, ) -> Result>> { - let location = CloudConverter::convert(&location); + let location = CloudConverter::convert(&location.inner); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -88,8 +88,8 @@ impl GoogleCloudStorage { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> { - let location = CloudConverter::convert(&location); + pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { + let location = CloudConverter::convert(&location.inner); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -106,11 +106,11 @@ impl GoogleCloudStorage { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a ObjectStorePath>, - ) -> Result>> + 'a> { + prefix: Option<&'a crate::path::Path>, + ) -> Result>> + 'a> { let objects = match prefix { Some(prefix) => { - let cloud_prefix = CloudConverter::convert(prefix); + let cloud_prefix = CloudConverter::convert(&prefix.inner); let list = cloud_storage::Object::list_prefix(&self.bucket_name, &cloud_prefix) .await .context(UnableToListDataFromGcs { @@ -133,8 +133,8 @@ impl GoogleCloudStorage { let objects = objects .map_ok(|list| { list.into_iter() - .map(|o| ObjectStorePath::from_cloud_unchecked(o.name)) - .collect::>() + .map(|o| crate::path::Path::from_cloud_unchecked(o.name)) + .collect::>() }) .context(UnableToListDataFromGcs2 { bucket: &self.bucket_name, @@ -148,7 +148,7 @@ impl GoogleCloudStorage { mod test { use crate::{ tests::{get_nonexistent_object, put_get_delete_list}, - Error, GoogleCloudStorage, ObjectStore, + Error, GoogleCloudStorage, ObjectStore, ObjectStorePath, }; use bytes::Bytes; use std::env; diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 78039664e0..64527b69dc 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -31,7 +31,7 @@ use path::ObjectStorePath; use bytes::Bytes; use chrono::{DateTime, Utc}; -use futures::{Stream, StreamExt, TryStreamExt}; +use futures::{Stream, StreamExt, TryFutureExt, TryStreamExt}; use snafu::Snafu; use std::{io, path::PathBuf}; @@ -66,59 +66,76 @@ impl ObjectStore { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> ObjectStorePath { + pub fn new_path(&self) -> path::Path { use ObjectStoreIntegration::*; match &self.0 { AmazonS3(s3) => s3.new_path(), GoogleCloudStorage(gcs) => gcs.new_path(), - InMemory(in_mem) => in_mem.new_path(), + InMemory(in_mem) => path::Path { + inner: path::PathRepresentation::Parts(in_mem.new_path()), + }, File(file) => file.new_path(), MicrosoftAzure(azure) => azure.new_path(), } } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &path::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { use ObjectStoreIntegration::*; - match &self.0 { - AmazonS3(s3) => s3.put(location, bytes, length).await?, - GoogleCloudStorage(gcs) => gcs.put(location, bytes, length).await?, - InMemory(in_mem) => in_mem.put(location, bytes, length).await?, - File(file) => file.put(location, bytes, length).await?, - MicrosoftAzure(azure) => azure.put(location, bytes, length).await?, + match (&self.0, location) { + (AmazonS3(s3), _) => s3.put(location, bytes, length).await?, + (GoogleCloudStorage(gcs), _) => gcs.put(location, bytes, length).await?, + ( + InMemory(in_mem), + path::Path { + inner: path::PathRepresentation::Parts(location), + }, + ) => in_mem.put(location, bytes, length).await?, + (File(file), _) => file.put(location, bytes, length).await?, + (MicrosoftAzure(azure), _) => azure.put(location, bytes, length).await?, + _ => unreachable!(), } Ok(()) } /// Return the bytes that are stored at the specified location. - pub async fn get( - &self, - location: &ObjectStorePath, - ) -> Result>> { + pub async fn get(&self, location: &path::Path) -> Result>> { use ObjectStoreIntegration::*; - Ok(match &self.0 { - AmazonS3(s3) => s3.get(location).await?.boxed(), - GoogleCloudStorage(gcs) => gcs.get(location).await?.boxed(), - InMemory(in_mem) => in_mem.get(location).await?.boxed(), - File(file) => file.get(location).await?.boxed(), - MicrosoftAzure(azure) => azure.get(location).await?.boxed(), + Ok(match (&self.0, location) { + (AmazonS3(s3), _) => s3.get(location).await?.boxed(), + (GoogleCloudStorage(gcs), _) => gcs.get(location).await?.boxed(), + ( + InMemory(in_mem), + path::Path { + inner: path::PathRepresentation::Parts(location), + }, + ) => in_mem.get(location).await?.boxed(), + (File(file), _) => file.get(location).await?.boxed(), + (MicrosoftAzure(azure), _) => azure.get(location).await?.boxed(), + _ => unreachable!(), } .err_into()) } /// Delete the object at the specified location. - pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> { + pub async fn delete(&self, location: &path::Path) -> Result<()> { use ObjectStoreIntegration::*; - match &self.0 { - AmazonS3(s3) => s3.delete(location).await?, - GoogleCloudStorage(gcs) => gcs.delete(location).await?, - InMemory(in_mem) => in_mem.delete(location).await?, - File(file) => file.delete(location).await?, - MicrosoftAzure(azure) => azure.delete(location).await?, + match (&self.0, location) { + (AmazonS3(s3), _) => s3.delete(location).await?, + (GoogleCloudStorage(gcs), _) => gcs.delete(location).await?, + ( + InMemory(in_mem), + path::Path { + inner: path::PathRepresentation::Parts(location), + }, + ) => in_mem.delete(location).await?, + (File(file), _) => file.delete(location).await?, + (MicrosoftAzure(azure), _) => azure.delete(location).await?, + _ => unreachable!(), } Ok(()) @@ -127,15 +144,32 @@ impl ObjectStore { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a ObjectStorePath>, - ) -> Result>> + 'a> { + prefix: Option<&'a path::Path>, + ) -> Result>> + 'a> { use ObjectStoreIntegration::*; - Ok(match &self.0 { - AmazonS3(s3) => s3.list(prefix).await?.boxed(), - GoogleCloudStorage(gcs) => gcs.list(prefix).await?.boxed(), - InMemory(in_mem) => in_mem.list(prefix).await?.boxed(), - File(file) => file.list(prefix).await?.boxed(), - MicrosoftAzure(azure) => azure.list(prefix).await?.boxed(), + Ok(match (&self.0, prefix) { + (AmazonS3(s3), _) => s3.list(prefix).await?.boxed(), + (GoogleCloudStorage(gcs), _) => gcs.list(prefix).await?.boxed(), + + ( + InMemory(in_mem), + Some(path::Path { + inner: path::PathRepresentation::Parts(dirs_and_file_name), + }), + ) => in_mem + .list(Some(dirs_and_file_name)) + .await? + .map_ok(|s| s.into_iter().map(Into::into).collect()) + .boxed(), + (InMemory(in_mem), None) => in_mem + .list(None) + .await? + .map_ok(|s| s.into_iter().map(Into::into).collect()) + .boxed(), + + (File(file), _) => file.list(prefix).await?.boxed(), + (MicrosoftAzure(azure), _) => azure.list(prefix).await?.boxed(), + _ => unreachable!(), } .err_into()) } @@ -145,30 +179,26 @@ impl ObjectStore { /// metadata. pub async fn list_with_delimiter<'a>( &'a self, - prefix: &'a ObjectStorePath, - ) -> Result { + prefix: &'a path::Path, + ) -> Result> { use ObjectStoreIntegration::*; - match &self.0 { - AmazonS3(s3) => s3.list_with_delimiter(prefix, &None).await, - GoogleCloudStorage(_gcs) => unimplemented!(), - InMemory(in_mem) => in_mem.list_with_delimiter(prefix, &None).await, - File(_file) => unimplemented!(), - MicrosoftAzure(_azure) => unimplemented!(), - } - } - - /// Convert an `ObjectStorePath` to a `String` according to the appropriate - /// implementation. Suitable for printing; not suitable for sending to - /// APIs - pub fn convert_path(&self, path: &ObjectStorePath) -> String { - use ObjectStoreIntegration::*; - match &self.0 { - AmazonS3(_) | GoogleCloudStorage(_) | InMemory(_) | MicrosoftAzure(_) => { - path::cloud::CloudConverter::convert(path) + match (&self.0, prefix) { + (AmazonS3(s3), _) => s3.list_with_delimiter(prefix, &None).await, + (GoogleCloudStorage(_gcs), _) => unimplemented!(), + ( + InMemory(in_mem), + path::Path { + inner: path::PathRepresentation::Parts(dirs_and_file_name), + }, + ) => { + in_mem + .list_with_delimiter(dirs_and_file_name) + .map_ok(|list_result| list_result.map_paths(Into::into)) + .await } - File(_) => path::file::FileConverter::convert(path) - .display() - .to_string(), + (InMemory(_), _) => unreachable!(), + (File(_file), _) => unimplemented!(), + (MicrosoftAzure(_azure), _) => unimplemented!(), } } } @@ -192,26 +222,64 @@ pub enum ObjectStoreIntegration { /// token for the next set of results. Individual results sets are limited to /// 1,000 objects. #[derive(Debug)] -pub struct ListResult { +pub struct ListResult { /// Token passed to the API for the next page of list results. pub next_token: Option, /// Prefixes that are common (like directories) - pub common_prefixes: Vec, + pub common_prefixes: Vec

, /// Object metadata for the listing - pub objects: Vec, + pub objects: Vec>, +} + +impl ListResult

{ + fn map_paths(self, c: C) -> ListResult + where + C: Fn(P) -> Q, + { + let Self { + next_token, + common_prefixes, + objects, + } = self; + + ListResult { + next_token, + common_prefixes: common_prefixes.into_iter().map(&c).collect(), + objects: objects.into_iter().map(|o| o.map_paths(&c)).collect(), + } + } } /// The metadata that describes an object. #[derive(Debug)] -pub struct ObjectMeta { +pub struct ObjectMeta { /// The full path to the object - pub location: ObjectStorePath, + pub location: P, /// The last modified time pub last_modified: DateTime, /// The size in bytes of the object pub size: usize, } +impl ObjectMeta

{ + fn map_paths(self, c: C) -> ObjectMeta + where + C: Fn(P) -> Q, + { + let Self { + location, + last_modified, + size, + } = self; + + ObjectMeta { + location: c(location), + last_modified, + size, + } + } +} + /// A specialized `Result` for object store-related errors pub type Result = std::result::Result; @@ -358,8 +426,8 @@ mod tests { async fn flatten_list_stream( storage: &ObjectStore, - prefix: Option<&ObjectStorePath>, - ) -> Result> { + prefix: Option<&path::Path>, + ) -> Result> { storage .list(prefix) .await? @@ -442,7 +510,7 @@ mod tests { "mydb/data/whatevs", ] .iter() - .map(|&s| ObjectStorePath::from_cloud_unchecked(s)) + .map(|&s| path::Path::from_cloud_unchecked(s)) .collect(); let time_before_creation = Utc::now(); @@ -509,7 +577,7 @@ mod tests { pub(crate) async fn get_nonexistent_object( storage: &ObjectStore, - location: Option, + location: Option, ) -> Result { let location = location.unwrap_or_else(|| { let mut loc = storage.new_path(); @@ -540,7 +608,7 @@ mod tests { "mydb/data/whatevs", ] .iter() - .map(|&s| ObjectStorePath::from_cloud_unchecked(s)) + .map(|&s| path::Path::from_cloud_unchecked(s)) .collect(); for f in &files { diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs index f863db82e8..84c7b081c4 100644 --- a/object_store/src/memory.rs +++ b/object_store/src/memory.rs @@ -1,9 +1,8 @@ //! This module contains the IOx implementation for using memory as the object //! store. use crate::{ - path::{parsed::DirsAndFileName, ObjectStorePath}, - DataDoesNotMatchLength, ListResult, NoDataInMemory, ObjectMeta, Result, - UnableToPutDataInMemory, + path::parsed::DirsAndFileName, DataDoesNotMatchLength, ListResult, NoDataInMemory, ObjectMeta, + Result, UnableToPutDataInMemory, }; use bytes::Bytes; use chrono::Utc; @@ -37,12 +36,12 @@ impl InMemory { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> ObjectStorePath { - ObjectStorePath::default() + pub fn new_path(&self) -> DirsAndFileName { + DirsAndFileName::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &DirsAndFileName, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -62,21 +61,23 @@ impl InMemory { let content = content.freeze(); - self.storage.write().await.insert(location.into(), content); + self.storage + .write() + .await + .insert(location.to_owned(), content); Ok(()) } /// Return the bytes that are stored at the specified location. pub async fn get( &self, - location: &ObjectStorePath, + location: &DirsAndFileName, ) -> Result>> { - let location = location.into(); let data = self .storage .read() .await - .get(&location) + .get(location) .cloned() .context(NoDataInMemory)?; @@ -84,28 +85,26 @@ impl InMemory { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> { - self.storage.write().await.remove(&location.into()); + pub async fn delete(&self, location: &DirsAndFileName) -> Result<()> { + self.storage.write().await.remove(location); Ok(()) } /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a ObjectStorePath>, - ) -> Result>> + 'a> { - let prefix = prefix.map(Into::into); - + prefix: Option<&'a DirsAndFileName>, + ) -> Result>> + 'a> { let list = if let Some(prefix) = &prefix { self.storage .read() .await .keys() .filter(|k| k.prefix_matches(prefix)) - .map(Into::into) + .cloned() .collect() } else { - self.storage.read().await.keys().map(Into::into).collect() + self.storage.read().await.keys().cloned().collect() }; Ok(futures::stream::once(async move { Ok(list) })) @@ -118,14 +117,11 @@ impl InMemory { /// limitations. pub async fn list_with_delimiter<'a>( &'a self, - prefix: &'a ObjectStorePath, - _next_token: &Option, - ) -> Result { + prefix: &'a DirsAndFileName, + ) -> Result> { let mut common_prefixes = BTreeSet::new(); let last_modified = Utc::now(); - let prefix: DirsAndFileName = prefix.into(); - // Only objects in this base level should be returned in the // response. Otherwise, we just collect the common prefixes. let mut objects = vec![]; @@ -133,20 +129,20 @@ impl InMemory { .storage .read() .await - .range((&prefix)..) - .take_while(|(k, _)| k.prefix_matches(&prefix)) + .range((prefix)..) + .take_while(|(k, _)| k.prefix_matches(prefix)) { let parts = k - .parts_after_prefix(&prefix) + .parts_after_prefix(prefix) .expect("must have prefix if in range"); if parts.len() >= 2 { - let mut full_prefix = prefix.clone(); + let mut full_prefix = prefix.to_owned(); full_prefix.push_part_as_dir(&parts[0]); common_prefixes.insert(full_prefix); } else { let object = ObjectMeta { - location: k.into(), + location: k.to_owned(), last_modified, size: v.len(), }; @@ -156,7 +152,7 @@ impl InMemory { Ok(ListResult { objects, - common_prefixes: common_prefixes.into_iter().map(Into::into).collect(), + common_prefixes: common_prefixes.into_iter().collect(), next_token: None, }) } @@ -171,7 +167,7 @@ mod tests { use crate::{ tests::{list_with_delimiter, put_get_delete_list}, - Error, ObjectStore, + Error, ObjectStore, ObjectStorePath, }; use futures::stream; diff --git a/object_store/src/path.rs b/object_store/src/path.rs index 785cac55af..1769d33670 100644 --- a/object_store/src/path.rs +++ b/object_store/src/path.rs @@ -22,14 +22,52 @@ use parts::PathPart; /// It allows IOx to be completely decoupled from the underlying object store /// implementations. /// -/// Deliberately does not implement `Display` or `ToString`! Use one of the -/// converters. -#[derive(Default, Clone, PartialEq, Eq, Debug)] -pub struct ObjectStorePath { - inner: PathRepresentation, +/// Deliberately does not implement `Display` or `ToString`! +pub trait ObjectStorePath: + std::fmt::Debug + Clone + PartialEq + Eq + Send + Sync + 'static +{ + /// Set the file name of this path + fn set_file_name(&mut self, part: impl Into); + + /// Add a part to the end of the path's directories, encoding any restricted + /// characters. + fn push_dir(&mut self, part: impl Into); + + /// Push a bunch of parts as directories in one go. + fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>); + + /// Like `std::path::Path::display, converts an `ObjectStorePath` to a + /// `String` suitable for printing; not suitable for sending to + /// APIs. + fn display(&self) -> String; } -impl ObjectStorePath { +/// Temporary +#[derive(Default, Clone, PartialEq, Eq, Debug)] +pub struct Path { + /// Temporary + pub inner: PathRepresentation, +} + +impl ObjectStorePath for Path { + fn set_file_name(&mut self, part: impl Into) { + self.inner = mem::take(&mut self.inner).set_file_name(part); + } + + fn push_dir(&mut self, part: impl Into) { + self.inner = mem::take(&mut self.inner).push_dir(part); + } + + fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) { + self.inner = mem::take(&mut self.inner).push_all_dirs(parts); + } + + fn display(&self) -> String { + self.inner.display() + } +} + +impl Path { /// For use when receiving a path from an object store API directly, not /// when building a path. Assumes DELIMITER is the separator. /// @@ -53,21 +91,11 @@ impl ObjectStorePath { } } - /// Add a part to the end of the path, encoding any restricted characters. - pub fn push_dir(&mut self, part: impl Into) { - self.inner = mem::take(&mut self.inner).push_dir(part); - } - /// Add a `PathPart` to the end of the path. pub fn push_part_as_dir(&mut self, part: &PathPart) { self.inner = mem::take(&mut self.inner).push_part_as_dir(part); } - /// Set the file name of this path - pub fn set_file_name(&mut self, part: impl Into) { - self.inner = mem::take(&mut self.inner).set_file_name(part); - } - /// Add the parts of `ObjectStorePath` to the end of the path. Notably does /// *not* behave as `PathBuf::push` does: there is no way to replace the /// root. If `self` has a file name, that will be removed, then the @@ -77,11 +105,6 @@ impl ObjectStorePath { self.inner = mem::take(&mut self.inner).push_path(path) } - /// Push a bunch of parts as directories in one go. - pub fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) { - self.inner = mem::take(&mut self.inner).push_all_dirs(parts); - } - /// Pops a part from the path and returns it, or `None` if it's empty. pub fn pop(&mut self) -> Option<&PathPart> { unimplemented!() @@ -110,13 +133,13 @@ impl ObjectStorePath { } } -impl From<&'_ DirsAndFileName> for ObjectStorePath { +impl From<&'_ DirsAndFileName> for Path { fn from(other: &'_ DirsAndFileName) -> Self { other.clone().into() } } -impl From for ObjectStorePath { +impl From for Path { fn from(other: DirsAndFileName) -> Self { Self { inner: PathRepresentation::Parts(other), @@ -124,10 +147,14 @@ impl From for ObjectStorePath { } } +/// Temporary #[derive(Clone, Eq, Debug)] -enum PathRepresentation { +pub enum PathRepresentation { + /// Will be transformed into a CloudPath type RawCloud(String), + /// Will be transformed into a FilePath type RawPathBuf(PathBuf), + /// Will be able to be used directly Parts(DirsAndFileName), } @@ -169,7 +196,7 @@ impl PathRepresentation { /// root. If `self` has a file name, that will be removed, then the /// directories of `path` will be appended, then any file name of `path` /// will be assigned to `self`. - fn push_path(self, path: &ObjectStorePath) -> Self { + fn push_path(self, path: &Path) -> Self { let DirsAndFileName { directories: path_dirs, file_name: path_file_name, @@ -190,6 +217,14 @@ impl PathRepresentation { dirs_and_file_name.file_name = Some((&*part).into()); Self::Parts(dirs_and_file_name) } + + fn display(&self) -> String { + match self { + Self::Parts(dirs_and_file_name) => dirs_and_file_name.display(), + Self::RawCloud(path) => path.to_owned(), + Self::RawPathBuf(path_buf) => path_buf.display().to_string(), + } + } } impl PartialEq for PathRepresentation { @@ -235,7 +270,7 @@ mod tests { #[test] fn prefix_matches() { - let mut haystack = ObjectStorePath::default(); + let mut haystack = Path::default(); haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]); // self starts with self @@ -257,7 +292,7 @@ mod tests { ); // one dir prefix matches - let mut needle = ObjectStorePath::default(); + let mut needle = Path::default(); needle.push_dir("foo/bar"); assert!( haystack.prefix_matches(&needle), @@ -276,7 +311,7 @@ mod tests { ); // partial dir prefix matches - let mut needle = ObjectStorePath::default(); + let mut needle = Path::default(); needle.push_dir("f"); assert!( haystack.prefix_matches(&needle), @@ -286,7 +321,7 @@ mod tests { ); // one dir and one partial dir matches - let mut needle = ObjectStorePath::default(); + let mut needle = Path::default(); needle.push_all_dirs(&["foo/bar", "baz"]); assert!( haystack.prefix_matches(&needle), @@ -298,7 +333,7 @@ mod tests { #[test] fn prefix_matches_with_file_name() { - let mut haystack = ObjectStorePath::default(); + let mut haystack = Path::default(); haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]); let mut needle = haystack.clone(); @@ -326,7 +361,7 @@ mod tests { // Not all directories match; file name is a prefix of the next directory; this // matches - let mut needle = ObjectStorePath::default(); + let mut needle = Path::default(); needle.push_all_dirs(&["foo/bar", "baz%2Ftest"]); needle.set_file_name("s"); @@ -352,30 +387,30 @@ mod tests { #[test] fn convert_raw_before_partial_eq() { // dir and file_name - let cloud = ObjectStorePath::from_cloud_unchecked("test_dir/test_file.json"); - let mut built = ObjectStorePath::default(); + let cloud = Path::from_cloud_unchecked("test_dir/test_file.json"); + let mut built = Path::default(); built.push_dir("test_dir"); built.set_file_name("test_file.json"); assert_eq!(built, cloud); // dir, no file_name - let cloud = ObjectStorePath::from_cloud_unchecked("test_dir"); - let mut built = ObjectStorePath::default(); + let cloud = Path::from_cloud_unchecked("test_dir"); + let mut built = Path::default(); built.push_dir("test_dir"); assert_eq!(built, cloud); // file_name, no dir - let cloud = ObjectStorePath::from_cloud_unchecked("test_file.json"); - let mut built = ObjectStorePath::default(); + let cloud = Path::from_cloud_unchecked("test_file.json"); + let mut built = Path::default(); built.set_file_name("test_file.json"); assert_eq!(built, cloud); // empty - let cloud = ObjectStorePath::from_cloud_unchecked(""); - let built = ObjectStorePath::default(); + let cloud = Path::from_cloud_unchecked(""); + let built = Path::default(); assert_eq!(built, cloud); } diff --git a/object_store/src/path/cloud.rs b/object_store/src/path/cloud.rs index 2f7755ff50..c2d545e4d3 100644 --- a/object_store/src/path/cloud.rs +++ b/object_store/src/path/cloud.rs @@ -1,17 +1,17 @@ -use super::{ObjectStorePath, PathPart, PathRepresentation, DELIMITER}; +use super::{PathPart, PathRepresentation, DELIMITER}; use itertools::Itertools; -/// Converts `ObjectStorePath`s to `String`s that are appropriate for use as +/// Converts `PathRepresentation`s to `String`s that are appropriate for use as /// locations in cloud storage. #[derive(Debug, Clone, Copy)] pub struct CloudConverter {} impl CloudConverter { - /// Creates a cloud storage location by joining this `ObjectStorePath`'s + /// Creates a cloud storage location by joining this `PathRepresentation`'s /// parts with `DELIMITER` - pub fn convert(object_store_path: &ObjectStorePath) -> String { - match &object_store_path.inner { + pub fn convert(object_store_path: &PathRepresentation) -> String { + match object_store_path { PathRepresentation::RawCloud(path) => path.to_owned(), PathRepresentation::RawPathBuf(_path) => { todo!("convert"); @@ -44,8 +44,8 @@ mod tests { // Use case: a file named `test_file.json` exists in object storage and it // should be returned for a search on prefix `test`, so the prefix path // should not get a trailing delimiter automatically added - let mut prefix = ObjectStorePath::default(); - prefix.set_file_name("test"); + let prefix = PathRepresentation::default(); + let prefix = prefix.set_file_name("test"); let converted = CloudConverter::convert(&prefix); assert_eq!(converted, "test"); @@ -56,8 +56,8 @@ mod tests { // Use case: files exist in object storage named `foo/bar.json` and // `foo_test.json`. A search for the prefix `foo/` should return // `foo/bar.json` but not `foo_test.json'. - let mut prefix = ObjectStorePath::default(); - prefix.push_dir("test"); + let prefix = PathRepresentation::default(); + let prefix = prefix.push_dir("test"); let converted = CloudConverter::convert(&prefix); assert_eq!(converted, "test/"); @@ -65,9 +65,9 @@ mod tests { #[test] fn push_encodes() { - let mut location = ObjectStorePath::default(); - location.push_dir("foo/bar"); - location.push_dir("baz%2Ftest"); + let location = PathRepresentation::default(); + let location = location.push_dir("foo/bar"); + let location = location.push_dir("baz%2Ftest"); let converted = CloudConverter::convert(&location); assert_eq!(converted, "foo%2Fbar/baz%252Ftest/"); @@ -75,8 +75,8 @@ mod tests { #[test] fn push_all_encodes() { - let mut location = ObjectStorePath::default(); - location.push_all_dirs(&["foo/bar", "baz%2Ftest"]); + let location = PathRepresentation::default(); + let location = location.push_all_dirs(&["foo/bar", "baz%2Ftest"]); let converted = CloudConverter::convert(&location); assert_eq!(converted, "foo%2Fbar/baz%252Ftest/"); diff --git a/object_store/src/path/file.rs b/object_store/src/path/file.rs index d086d4d4cf..3551f1f9dc 100644 --- a/object_store/src/path/file.rs +++ b/object_store/src/path/file.rs @@ -1,4 +1,4 @@ -use super::{ObjectStorePath, PathPart, PathRepresentation}; +use super::{PathPart, PathRepresentation}; use std::path::PathBuf; @@ -11,8 +11,8 @@ impl FileConverter { /// Creates a filesystem `PathBuf` location by using the standard library's /// `PathBuf` building implementation appropriate for the current /// platform. - pub fn convert(object_store_path: &ObjectStorePath) -> PathBuf { - match &object_store_path.inner { + pub fn convert(object_store_path: &PathRepresentation) -> PathBuf { + match object_store_path { PathRepresentation::RawCloud(_path) => { todo!("convert"); } diff --git a/object_store/src/path/parsed.rs b/object_store/src/path/parsed.rs index 2a517bb6bb..6914779b2a 100644 --- a/object_store/src/path/parsed.rs +++ b/object_store/src/path/parsed.rs @@ -1,11 +1,47 @@ use super::{ObjectStorePath, PathPart, PathRepresentation, DELIMITER}; +use itertools::Itertools; + +/// A path stored as a collection of 0 or more directories and 0 or 1 file name #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default)] -pub(crate) struct DirsAndFileName { +pub struct DirsAndFileName { pub(crate) directories: Vec, pub(crate) file_name: Option, } +impl ObjectStorePath for DirsAndFileName { + fn set_file_name(&mut self, part: impl Into) { + let part = part.into(); + self.file_name = Some((&*part).into()); + } + + fn push_dir(&mut self, part: impl Into) { + let part = part.into(); + self.directories.push((&*part).into()); + } + + fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) { + self.directories + .extend(parts.as_ref().iter().map(|&v| v.into())); + } + + fn display(&self) -> String { + let mut s = self + .directories + .iter() + .map(PathPart::encoded) + .join(DELIMITER); + + if !s.is_empty() { + s.push_str(DELIMITER); + } + if let Some(file_name) = &self.file_name { + s.push_str(file_name.encoded()); + } + s + } +} + impl DirsAndFileName { pub(crate) fn prefix_matches(&self, prefix: &Self) -> bool { let diff = itertools::diff_with( @@ -82,19 +118,6 @@ impl DirsAndFileName { Some(parts) } - /// Add a part to the end of the path's directories, encoding any restricted - /// characters. - pub(crate) fn push_dir(&mut self, part: impl Into) { - let part = part.into(); - self.directories.push((&*part).into()); - } - - /// Push a bunch of parts as directories in one go. - pub(crate) fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) { - self.directories - .extend(parts.as_ref().iter().map(|&v| v.into())); - } - /// Add a `PathPart` to the end of the path's directories. pub(crate) fn push_part_as_dir(&mut self, part: &PathPart) { self.directories.push(part.to_owned()); @@ -153,14 +176,14 @@ impl From for DirsAndFileName { } } -impl From<&'_ ObjectStorePath> for DirsAndFileName { - fn from(other: &'_ ObjectStorePath) -> Self { +impl From<&'_ crate::path::Path> for DirsAndFileName { + fn from(other: &'_ crate::path::Path) -> Self { other.clone().into() } } -impl From for DirsAndFileName { - fn from(other: ObjectStorePath) -> Self { +impl From for DirsAndFileName { + fn from(other: crate::path::Path) -> Self { other.inner.into() } } diff --git a/server/src/buffer.rs b/server/src/buffer.rs index dd52ab9deb..f142881f84 100644 --- a/server/src/buffer.rs +++ b/server/src/buffer.rs @@ -385,7 +385,7 @@ impl Segment { stream_data = std::io::Result::Ok(data.clone()); } - info!("persisted data to {}", store.convert_path(&location)); + info!("persisted data to {}", location.display()); }); Ok(()) @@ -507,10 +507,7 @@ const SEGMENT_FILE_EXTENSION: &str = ".segment"; /// Builds the path for a given segment id, given the root object store path. /// The path should be where the root of the database is (e.g. 1/my_db/). -fn object_store_path_for_segment( - root_path: &ObjectStorePath, - segment_id: u64, -) -> Result { +fn object_store_path_for_segment(root_path: &P, segment_id: u64) -> Result

{ ensure!( segment_id < MAX_SEGMENT_ID && segment_id > 0, SegmentIdOutOfBounds @@ -538,7 +535,7 @@ fn database_object_store_path( writer_id: u32, database_name: &DatabaseName<'_>, store: &ObjectStore, -) -> ObjectStorePath { +) -> object_store::path::Path { let mut path = store.new_path(); path.push_dir(format!("{}", writer_id)); path.push_dir(database_name.to_string()); diff --git a/server/src/config.rs b/server/src/config.rs index 6e942b0f36..92e24e25c7 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -87,10 +87,10 @@ impl Config { } } -pub fn object_store_path_for_database_config( - root: &ObjectStorePath, +pub fn object_store_path_for_database_config( + root: &P, name: &DatabaseName<'_>, -) -> ObjectStorePath { +) -> P { let mut path = root.clone(); path.push_dir(name.to_string()); path.set_file_name(DB_RULES_FILE_NAME); @@ -136,7 +136,6 @@ impl<'a> Drop for CreateDatabaseHandle<'a> { #[cfg(test)] mod test { use super::*; - use object_store::path::cloud::CloudConverter; use object_store::{memory::InMemory, ObjectStore}; #[test] @@ -159,12 +158,16 @@ mod test { #[test] fn object_store_path_for_database_config() { let storage = ObjectStore::new_in_memory(InMemory::new()); - let mut path = storage.new_path(); - path.push_dir("1"); - let name = DatabaseName::new("foo").unwrap(); - let rules_path = super::object_store_path_for_database_config(&path, &name); - let rules_path = CloudConverter::convert(&rules_path); + let mut base_path = storage.new_path(); + base_path.push_dir("1"); - assert_eq!(rules_path, "1/foo/rules.json"); + let name = DatabaseName::new("foo").unwrap(); + let rules_path = super::object_store_path_for_database_config(&base_path, &name); + + let mut expected_path = base_path.clone(); + expected_path.push_dir("foo"); + expected_path.set_file_name("rules.json"); + + assert_eq!(rules_path, expected_path); } } diff --git a/server/src/lib.rs b/server/src/lib.rs index 9074cbe539..d321c8debd 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -218,7 +218,7 @@ impl Server { } // base location in object store for this writer - fn root_path(&self) -> Result { + fn root_path(&self) -> Result { let id = self.require_id()?; let mut path = self.store.new_path(); @@ -528,11 +528,11 @@ impl RemoteServer for RemoteServerImpl { // get bytes from the location in object store async fn get_store_bytes( - location: &ObjectStorePath, + location: &object_store::path::Path, store: &ObjectStore, ) -> Result { let b = store - .get(&location) + .get(location) .await .context(StoreError)? .map_ok(|b| bytes::BytesMut::from(&b[..])) @@ -555,7 +555,7 @@ mod tests { }; use futures::TryStreamExt; use influxdb_line_protocol::parse_lines; - use object_store::memory::InMemory; + use object_store::{memory::InMemory, path::ObjectStorePath}; use query::frontend::sql::SQLQueryPlanner; use snafu::Snafu; use std::collections::BTreeMap; @@ -610,7 +610,7 @@ mod tests { .await .expect("failed to create database"); - let mut rules_path = store.new_path(); + let mut rules_path = server.store.new_path(); rules_path.push_all_dirs(&["1", name]); rules_path.set_file_name("rules.json"); diff --git a/server/src/snapshot.rs b/server/src/snapshot.rs index 6eadf0fae3..ecb40204fc 100644 --- a/server/src/snapshot.rs +++ b/server/src/snapshot.rs @@ -64,8 +64,8 @@ where { pub id: Uuid, pub partition_meta: PartitionMeta, - pub metadata_path: ObjectStorePath, - pub data_path: ObjectStorePath, + pub metadata_path: object_store::path::Path, + pub data_path: object_store::path::Path, store: Arc, partition: Arc, status: Mutex, @@ -77,8 +77,8 @@ where { fn new( partition_key: impl Into, - metadata_path: ObjectStorePath, - data_path: ObjectStorePath, + metadata_path: object_store::path::Path, + data_path: object_store::path::Path, store: Arc, partition: Arc, tables: Vec, @@ -104,10 +104,6 @@ where } } - fn data_path(&self) -> String { - self.store.convert_path(&self.data_path) - } - // returns the position of the next table fn next_table(&self) -> Option<(usize, &str)> { let mut status = self.status.lock().expect("mutex poisoned"); @@ -198,7 +194,7 @@ where async fn write_batches( &self, batches: Vec, - file_name: &ObjectStorePath, + file_name: &object_store::path::Path, ) -> Result<()> { let mem_writer = MemWriter::default(); { @@ -250,8 +246,8 @@ pub struct Status { } pub fn snapshot_chunk( - metadata_path: ObjectStorePath, - data_path: ObjectStorePath, + metadata_path: object_store::path::Path, + data_path: object_store::path::Path, store: Arc, partition_key: &str, chunk: Arc, @@ -281,7 +277,7 @@ where info!( "starting snapshot of {} to {}", &snapshot.partition_meta.key, - &snapshot.data_path() + &snapshot.data_path.display() ); if let Err(e) = snapshot.run(notify).await { error!("error running snapshot: {:?}", e); diff --git a/src/influxdb_ioxd/http_routes.rs b/src/influxdb_ioxd/http_routes.rs index 60a2134df5..e1f0e3f54f 100644 --- a/src/influxdb_ioxd/http_routes.rs +++ b/src/influxdb_ioxd/http_routes.rs @@ -694,6 +694,8 @@ where async fn snapshot_partition( req: Request, ) -> Result, ApplicationError> { + use object_store::path::ObjectStorePath; + let server = req .data::>>() .expect("server state") From d39131ab49c7e6064891f46b648f45e07a4c87c2 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 19 Jan 2021 20:08:13 -0500 Subject: [PATCH 04/10] refactor: Extract a CloudPath type for use in cloud storage This is the start of using the type system to enforce that only CloudPaths will be used with S3, GCS, and Azure. Still some mess in here, cleanup coming. --- object_store/src/aws.rs | 48 +++--- object_store/src/azure.rs | 29 ++-- object_store/src/gcp.rs | 29 ++-- object_store/src/lib.rs | 214 ++++++++++++++++++++++++--- object_store/src/path.rs | 78 ++-------- object_store/src/path/cloud.rs | 251 ++++++++++++++++++++++++++++---- object_store/src/path/file.rs | 4 +- object_store/src/path/parsed.rs | 21 +-- 8 files changed, 477 insertions(+), 197 deletions(-) diff --git a/object_store/src/aws.rs b/object_store/src/aws.rs index 324bdbdf4b..86a4a1eafd 100644 --- a/object_store/src/aws.rs +++ b/object_store/src/aws.rs @@ -1,7 +1,7 @@ //! This module contains the IOx implementation for using S3 as the object //! store. use crate::{ - path::{cloud::CloudConverter, DELIMITER}, + path::{cloud::CloudPath, DELIMITER}, Error, ListResult, NoDataFromS3, ObjectMeta, Result, UnableToDeleteDataFromS3, UnableToGetDataFromS3, UnableToGetPieceOfDataFromS3, UnableToPutDataToS3, }; @@ -55,12 +55,12 @@ impl AmazonS3 { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> crate::path::Path { - crate::path::Path::default() + pub fn new_path(&self) -> CloudPath { + CloudPath::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &CloudPath, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -68,7 +68,7 @@ impl AmazonS3 { let put_request = rusoto_s3::PutObjectRequest { bucket: self.bucket_name.clone(), - key: CloudConverter::convert(&location.inner), + key: location.to_raw(), body: Some(bytes), ..Default::default() }; @@ -78,17 +78,14 @@ impl AmazonS3 { .await .context(UnableToPutDataToS3 { bucket: &self.bucket_name, - location: CloudConverter::convert(&location.inner), + location: location.to_raw(), })?; Ok(()) } /// Return the bytes that are stored at the specified location. - pub async fn get( - &self, - location: &crate::path::Path, - ) -> Result>> { - let key = CloudConverter::convert(&location.inner); + pub async fn get(&self, location: &CloudPath) -> Result>> { + let key = location.to_raw(); let get_request = rusoto_s3::GetObjectRequest { bucket: self.bucket_name.clone(), key: key.clone(), @@ -115,8 +112,8 @@ impl AmazonS3 { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { - let key = CloudConverter::convert(&location.inner); + pub async fn delete(&self, location: &CloudPath) -> Result<()> { + let key = location.to_raw(); let delete_request = rusoto_s3::DeleteObjectRequest { bucket: self.bucket_name.clone(), key: key.clone(), @@ -136,8 +133,8 @@ impl AmazonS3 { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a crate::path::Path>, - ) -> Result>> + 'a> { + prefix: Option<&'a CloudPath>, + ) -> Result>> + 'a> { #[derive(Clone)] enum ListState { Start, @@ -149,7 +146,7 @@ impl AmazonS3 { Ok(stream::unfold(ListState::Start, move |state| async move { let mut list_request = rusoto_s3::ListObjectsV2Request { bucket: self.bucket_name.clone(), - prefix: prefix.map(|p| CloudConverter::convert(&p.inner)), + prefix: prefix.map(|p| p.to_raw()), ..Default::default() }; @@ -181,7 +178,7 @@ impl AmazonS3 { let contents = resp.contents.unwrap_or_default(); let names = contents .into_iter() - .flat_map(|object| object.key.map(crate::path::Path::from_cloud_unchecked)) + .flat_map(|object| object.key.map(CloudPath::raw)) .collect(); // The AWS response contains a field named `is_truncated` as well as @@ -202,10 +199,10 @@ impl AmazonS3 { /// common prefixes (directories) in addition to object metadata. pub async fn list_with_delimiter<'a>( &'a self, - prefix: &'a crate::path::Path, + prefix: &'a CloudPath, next_token: &Option, - ) -> Result> { - let converted_prefix = CloudConverter::convert(&prefix.inner); + ) -> Result> { + let converted_prefix = prefix.to_raw(); let mut list_request = rusoto_s3::ListObjectsV2Request { bucket: self.bucket_name.clone(), @@ -233,9 +230,8 @@ impl AmazonS3 { let objects: Vec<_> = contents .into_iter() .map(|object| { - let location = crate::path::Path::from_cloud_unchecked( - object.key.expect("object doesn't exist without a key"), - ); + let location = + CloudPath::raw(object.key.expect("object doesn't exist without a key")); let last_modified = match object.last_modified { Some(lm) => { DateTime::parse_from_rfc3339(&lm) @@ -264,11 +260,7 @@ impl AmazonS3 { .common_prefixes .unwrap_or_default() .into_iter() - .map(|p| { - crate::path::Path::from_cloud_unchecked( - p.prefix.expect("can't have a prefix without a value"), - ) - }) + .map(|p| CloudPath::raw(p.prefix.expect("can't have a prefix without a value"))) .collect(); let result = ListResult { diff --git a/object_store/src/azure.rs b/object_store/src/azure.rs index 8b1dfd20a0..a1731a92b9 100644 --- a/object_store/src/azure.rs +++ b/object_store/src/azure.rs @@ -1,7 +1,7 @@ //! This module contains the IOx implementation for using Azure Blob storage as //! the object store. use crate::{ - path::cloud::CloudConverter, DataDoesNotMatchLength, Result, UnableToDeleteDataFromAzure, + path::cloud::CloudPath, DataDoesNotMatchLength, Result, UnableToDeleteDataFromAzure, UnableToGetDataFromAzure, UnableToListDataFromAzure, UnableToPutDataToAzure, }; use azure_core::HttpClient; @@ -65,16 +65,16 @@ impl MicrosoftAzure { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> crate::path::Path { - crate::path::Path::default() + pub fn new_path(&self) -> CloudPath { + CloudPath::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &CloudPath, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { - let location = CloudConverter::convert(&location.inner); + let location = location.to_raw(); let temporary_non_streaming = bytes .map_ok(|b| bytes::BytesMut::from(&b[..])) .try_concat() @@ -102,12 +102,9 @@ impl MicrosoftAzure { } /// Return the bytes that are stored at the specified location. - pub async fn get( - &self, - location: &crate::path::Path, - ) -> Result>> { + pub async fn get(&self, location: &CloudPath) -> Result>> { let container_client = self.container_client.clone(); - let location = CloudConverter::convert(&location.inner); + let location = location.to_raw(); Ok(async move { container_client .as_blob_client(&location) @@ -123,8 +120,8 @@ impl MicrosoftAzure { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { - let location = CloudConverter::convert(&location.inner); + pub async fn delete(&self, location: &CloudPath) -> Result<()> { + let location = location.to_raw(); self.container_client .as_blob_client(&location) .delete() @@ -141,8 +138,8 @@ impl MicrosoftAzure { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a crate::path::Path>, - ) -> Result>> + 'a> { + prefix: Option<&'a CloudPath>, + ) -> Result>> + 'a> { #[derive(Clone)] enum ListState { Start, @@ -153,7 +150,7 @@ impl MicrosoftAzure { Ok(stream::unfold(ListState::Start, move |state| async move { let mut request = self.container_client.list_blobs(); - let prefix = prefix.map(|p| CloudConverter::convert(&p.inner)); + let prefix = prefix.map(|p| p.to_raw()); if let Some(ref p) = prefix { request = request.prefix(p as &str); } @@ -183,7 +180,7 @@ impl MicrosoftAzure { .incomplete_vector .vector .into_iter() - .map(|blob| crate::path::Path::from_cloud_unchecked(blob.name)) + .map(|blob| CloudPath::raw(blob.name)) .collect(); Some((Ok(names), next_state)) diff --git a/object_store/src/gcp.rs b/object_store/src/gcp.rs index 6d7eb8c65c..2d19edefbf 100644 --- a/object_store/src/gcp.rs +++ b/object_store/src/gcp.rs @@ -1,7 +1,7 @@ //! This module contains the IOx implementation for using Google Cloud Storage //! as the object store. use crate::{ - path::cloud::CloudConverter, DataDoesNotMatchLength, Result, UnableToDeleteDataFromGcs, + path::cloud::CloudPath, DataDoesNotMatchLength, Result, UnableToDeleteDataFromGcs, UnableToGetDataFromGcs, UnableToListDataFromGcs, UnableToListDataFromGcs2, UnableToPutDataToGcs, }; @@ -25,12 +25,12 @@ impl GoogleCloudStorage { } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> crate::path::Path { - crate::path::Path::default() + pub fn new_path(&self) -> CloudPath { + CloudPath::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &CloudPath, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -49,7 +49,7 @@ impl GoogleCloudStorage { } ); - let location = CloudConverter::convert(&location.inner); + let location = location.to_raw(); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -69,11 +69,8 @@ impl GoogleCloudStorage { } /// Return the bytes that are stored at the specified location. - pub async fn get( - &self, - location: &crate::path::Path, - ) -> Result>> { - let location = CloudConverter::convert(&location.inner); + pub async fn get(&self, location: &CloudPath) -> Result>> { + let location = location.to_raw(); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -88,8 +85,8 @@ impl GoogleCloudStorage { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { - let location = CloudConverter::convert(&location.inner); + pub async fn delete(&self, location: &CloudPath) -> Result<()> { + let location = location.to_raw(); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -106,11 +103,11 @@ impl GoogleCloudStorage { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a crate::path::Path>, - ) -> Result>> + 'a> { + prefix: Option<&'a CloudPath>, + ) -> Result>> + 'a> { let objects = match prefix { Some(prefix) => { - let cloud_prefix = CloudConverter::convert(&prefix.inner); + let cloud_prefix = prefix.to_raw(); let list = cloud_storage::Object::list_prefix(&self.bucket_name, &cloud_prefix) .await .context(UnableToListDataFromGcs { @@ -133,7 +130,7 @@ impl GoogleCloudStorage { let objects = objects .map_ok(|list| { list.into_iter() - .map(|o| crate::path::Path::from_cloud_unchecked(o.name)) + .map(|o| CloudPath::raw(o.name)) .collect::>() }) .context(UnableToListDataFromGcs2 { diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 64527b69dc..1e6708cc7d 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -69,13 +69,19 @@ impl ObjectStore { pub fn new_path(&self) -> path::Path { use ObjectStoreIntegration::*; match &self.0 { - AmazonS3(s3) => s3.new_path(), - GoogleCloudStorage(gcs) => gcs.new_path(), + AmazonS3(s3) => path::Path { + inner: path::PathRepresentation::AmazonS3(s3.new_path()), + }, + GoogleCloudStorage(gcs) => path::Path { + inner: path::PathRepresentation::GoogleCloudStorage(gcs.new_path()), + }, InMemory(in_mem) => path::Path { inner: path::PathRepresentation::Parts(in_mem.new_path()), }, File(file) => file.new_path(), - MicrosoftAzure(azure) => azure.new_path(), + MicrosoftAzure(azure) => path::Path { + inner: path::PathRepresentation::MicrosoftAzure(azure.new_path()), + }, } } @@ -84,18 +90,34 @@ impl ObjectStore { where S: Stream> + Send + Sync + 'static, { + use path::PathRepresentation; use ObjectStoreIntegration::*; match (&self.0, location) { - (AmazonS3(s3), _) => s3.put(location, bytes, length).await?, - (GoogleCloudStorage(gcs), _) => gcs.put(location, bytes, length).await?, + ( + AmazonS3(s3), + path::Path { + inner: PathRepresentation::AmazonS3(location), + }, + ) => s3.put(location, bytes, length).await?, + ( + GoogleCloudStorage(gcs), + path::Path { + inner: PathRepresentation::GoogleCloudStorage(location), + }, + ) => gcs.put(location, bytes, length).await?, ( InMemory(in_mem), path::Path { - inner: path::PathRepresentation::Parts(location), + inner: PathRepresentation::Parts(location), }, ) => in_mem.put(location, bytes, length).await?, (File(file), _) => file.put(location, bytes, length).await?, - (MicrosoftAzure(azure), _) => azure.put(location, bytes, length).await?, + ( + MicrosoftAzure(azure), + path::Path { + inner: PathRepresentation::MicrosoftAzure(location), + }, + ) => azure.put(location, bytes, length).await?, _ => unreachable!(), } @@ -104,18 +126,34 @@ impl ObjectStore { /// Return the bytes that are stored at the specified location. pub async fn get(&self, location: &path::Path) -> Result>> { + use path::PathRepresentation; use ObjectStoreIntegration::*; Ok(match (&self.0, location) { - (AmazonS3(s3), _) => s3.get(location).await?.boxed(), - (GoogleCloudStorage(gcs), _) => gcs.get(location).await?.boxed(), + ( + AmazonS3(s3), + path::Path { + inner: PathRepresentation::AmazonS3(location), + }, + ) => s3.get(location).await?.boxed(), + ( + GoogleCloudStorage(gcs), + path::Path { + inner: PathRepresentation::GoogleCloudStorage(location), + }, + ) => gcs.get(location).await?.boxed(), ( InMemory(in_mem), path::Path { - inner: path::PathRepresentation::Parts(location), + inner: PathRepresentation::Parts(location), }, ) => in_mem.get(location).await?.boxed(), (File(file), _) => file.get(location).await?.boxed(), - (MicrosoftAzure(azure), _) => azure.get(location).await?.boxed(), + ( + MicrosoftAzure(azure), + path::Path { + inner: PathRepresentation::MicrosoftAzure(location), + }, + ) => azure.get(location).await?.boxed(), _ => unreachable!(), } .err_into()) @@ -123,18 +161,34 @@ impl ObjectStore { /// Delete the object at the specified location. pub async fn delete(&self, location: &path::Path) -> Result<()> { + use path::PathRepresentation; use ObjectStoreIntegration::*; match (&self.0, location) { - (AmazonS3(s3), _) => s3.delete(location).await?, - (GoogleCloudStorage(gcs), _) => gcs.delete(location).await?, + ( + AmazonS3(s3), + path::Path { + inner: PathRepresentation::AmazonS3(location), + }, + ) => s3.delete(location).await?, + ( + GoogleCloudStorage(gcs), + path::Path { + inner: PathRepresentation::GoogleCloudStorage(location), + }, + ) => gcs.delete(location).await?, ( InMemory(in_mem), path::Path { - inner: path::PathRepresentation::Parts(location), + inner: PathRepresentation::Parts(location), }, ) => in_mem.delete(location).await?, (File(file), _) => file.delete(location).await?, - (MicrosoftAzure(azure), _) => azure.delete(location).await?, + ( + MicrosoftAzure(azure), + path::Path { + inner: PathRepresentation::MicrosoftAzure(location), + }, + ) => azure.delete(location).await?, _ => unreachable!(), } @@ -146,15 +200,68 @@ impl ObjectStore { &'a self, prefix: Option<&'a path::Path>, ) -> Result>> + 'a> { + use path::PathRepresentation; use ObjectStoreIntegration::*; Ok(match (&self.0, prefix) { - (AmazonS3(s3), _) => s3.list(prefix).await?.boxed(), - (GoogleCloudStorage(gcs), _) => gcs.list(prefix).await?.boxed(), + ( + AmazonS3(s3), + Some(path::Path { + inner: PathRepresentation::AmazonS3(prefix), + }), + ) => s3 + .list(Some(prefix)) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::AmazonS3(p), + }) + .collect() + }) + .boxed(), + (AmazonS3(s3), None) => s3 + .list(None) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::AmazonS3(p), + }) + .collect() + }) + .boxed(), + ( + GoogleCloudStorage(gcs), + Some(path::Path { + inner: PathRepresentation::GoogleCloudStorage(prefix), + }), + ) => gcs + .list(Some(prefix)) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::GoogleCloudStorage(p), + }) + .collect() + }) + .boxed(), + (GoogleCloudStorage(gcs), None) => gcs + .list(None) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::GoogleCloudStorage(p), + }) + .collect() + }) + .boxed(), ( InMemory(in_mem), Some(path::Path { - inner: path::PathRepresentation::Parts(dirs_and_file_name), + inner: PathRepresentation::Parts(dirs_and_file_name), }), ) => in_mem .list(Some(dirs_and_file_name)) @@ -168,7 +275,33 @@ impl ObjectStore { .boxed(), (File(file), _) => file.list(prefix).await?.boxed(), - (MicrosoftAzure(azure), _) => azure.list(prefix).await?.boxed(), + ( + MicrosoftAzure(azure), + Some(path::Path { + inner: PathRepresentation::MicrosoftAzure(prefix), + }), + ) => azure + .list(Some(prefix)) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::MicrosoftAzure(p), + }) + .collect() + }) + .boxed(), + (MicrosoftAzure(azure), None) => azure + .list(None) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::MicrosoftAzure(p), + }) + .collect() + }) + .boxed(), _ => unreachable!(), } .err_into()) @@ -181,9 +314,23 @@ impl ObjectStore { &'a self, prefix: &'a path::Path, ) -> Result> { + use path::PathRepresentation; use ObjectStoreIntegration::*; match (&self.0, prefix) { - (AmazonS3(s3), _) => s3.list_with_delimiter(prefix, &None).await, + ( + AmazonS3(s3), + path::Path { + inner: PathRepresentation::AmazonS3(prefix), + }, + ) => { + s3.list_with_delimiter(prefix, &None) + .map_ok(|list_result| { + list_result.map_paths(|p| path::Path { + inner: PathRepresentation::AmazonS3(p), + }) + }) + .await + } (GoogleCloudStorage(_gcs), _) => unimplemented!(), ( InMemory(in_mem), @@ -196,9 +343,9 @@ impl ObjectStore { .map_ok(|list_result| list_result.map_paths(Into::into)) .await } - (InMemory(_), _) => unreachable!(), (File(_file), _) => unimplemented!(), (MicrosoftAzure(_azure), _) => unimplemented!(), + _ => unreachable!(), } } } @@ -419,6 +566,8 @@ pub enum Error { #[cfg(test)] mod tests { use super::*; + use crate::path::{cloud::CloudPath, parsed::DirsAndFileName, ObjectStorePath}; + use futures::stream; type Error = Box; @@ -502,6 +651,7 @@ mod tests { let data = Bytes::from("arbitrary data"); let files: Vec<_> = [ + "test_file", "mydb/wal/000/000/000.segment", "mydb/wal/000/000/001.segment", "mydb/wal/000/000/002.segment", @@ -510,7 +660,7 @@ mod tests { "mydb/data/whatevs", ] .iter() - .map(|&s| path::Path::from_cloud_unchecked(s)) + .map(|&s| str_to_path(s, storage)) .collect(); let time_before_creation = Utc::now(); @@ -597,6 +747,24 @@ mod tests { .freeze()) } + /// Parse a str as a `CloudPath` into a `DirAndFileName` even though the + /// associated storage might not be cloud storage to reuse the cloud + /// path parsing logic. Then convert into the correct type of path for + /// the given storage. + fn str_to_path(val: &str, storage: &ObjectStore) -> path::Path { + let cloud_path = CloudPath::raw(val); + let parsed: DirsAndFileName = cloud_path.into(); + + let mut path = storage.new_path(); + for dir in parsed.directories { + path.push_dir(dir.to_string()) + } + if let Some(file_name) = parsed.file_name { + path.set_file_name(file_name.to_string()); + } + path + } + async fn delete_fixtures(storage: &ObjectStore) { let files: Vec<_> = [ "test_file", @@ -608,7 +776,7 @@ mod tests { "mydb/data/whatevs", ] .iter() - .map(|&s| path::Path::from_cloud_unchecked(s)) + .map(|&s| str_to_path(s, storage)) .collect(); for f in &files { diff --git a/object_store/src/path.rs b/object_store/src/path.rs index 1769d33670..e1a875f100 100644 --- a/object_store/src/path.rs +++ b/object_store/src/path.rs @@ -5,6 +5,7 @@ use std::{mem, path::PathBuf}; /// Paths that came from or are to be used in cloud-based object storage pub mod cloud; +use cloud::CloudPath; /// Paths that come from or are to be used in file-based object storage pub mod file; @@ -68,17 +69,6 @@ impl ObjectStorePath for Path { } impl Path { - /// For use when receiving a path from an object store API directly, not - /// when building a path. Assumes DELIMITER is the separator. - /// - /// TODO: This should only be available to cloud storage - pub fn from_cloud_unchecked(path: impl Into) -> Self { - let path = path.into(); - Self { - inner: PathRepresentation::RawCloud(path), - } - } - /// For use when receiving a path from a filesystem directly, not /// when building a path. Uses the standard library's path splitting /// implementation to separate into parts. @@ -147,11 +137,16 @@ impl From for Path { } } -/// Temporary +/// Defines which object stores use which path logic. #[derive(Clone, Eq, Debug)] pub enum PathRepresentation { - /// Will be transformed into a CloudPath type - RawCloud(String), + /// Amazon storage + AmazonS3(CloudPath), + /// GCP storage + GoogleCloudStorage(CloudPath), + /// Microsoft Azure Blob storage + MicrosoftAzure(CloudPath), + /// Will be transformed into a FilePath type RawPathBuf(PathBuf), /// Will be able to be used directly @@ -221,7 +216,9 @@ impl PathRepresentation { fn display(&self) -> String { match self { Self::Parts(dirs_and_file_name) => dirs_and_file_name.display(), - Self::RawCloud(path) => path.to_owned(), + Self::AmazonS3(path) | Self::GoogleCloudStorage(path) | Self::MicrosoftAzure(path) => { + path.display().to_string() + } Self::RawPathBuf(path_buf) => path_buf.display().to_string(), } } @@ -384,43 +381,9 @@ mod tests { ); } - #[test] - fn convert_raw_before_partial_eq() { - // dir and file_name - let cloud = Path::from_cloud_unchecked("test_dir/test_file.json"); - let mut built = Path::default(); - built.push_dir("test_dir"); - built.set_file_name("test_file.json"); - - assert_eq!(built, cloud); - - // dir, no file_name - let cloud = Path::from_cloud_unchecked("test_dir"); - let mut built = Path::default(); - built.push_dir("test_dir"); - - assert_eq!(built, cloud); - - // file_name, no dir - let cloud = Path::from_cloud_unchecked("test_file.json"); - let mut built = Path::default(); - built.set_file_name("test_file.json"); - - assert_eq!(built, cloud); - - // empty - let cloud = Path::from_cloud_unchecked(""); - let built = Path::default(); - - assert_eq!(built, cloud); - } - #[test] fn path_rep_conversions() { // dir and file name - let cloud = PathRepresentation::RawCloud("foo/bar/blah.json".into()); - let cloud_parts: DirsAndFileName = cloud.into(); - let path_buf = PathRepresentation::RawPathBuf("foo/bar/blah.json".into()); let path_buf_parts: DirsAndFileName = path_buf.into(); @@ -429,44 +392,27 @@ mod tests { expected_parts.push_dir("bar"); expected_parts.file_name = Some("blah.json".into()); - assert_eq!(cloud_parts, expected_parts); assert_eq!(path_buf_parts, expected_parts); // dir, no file name - let cloud = PathRepresentation::RawCloud("foo/bar".into()); - let cloud_parts: DirsAndFileName = cloud.into(); - let path_buf = PathRepresentation::RawPathBuf("foo/bar".into()); let path_buf_parts: DirsAndFileName = path_buf.into(); expected_parts.file_name = None; - assert_eq!(cloud_parts, expected_parts); assert_eq!(path_buf_parts, expected_parts); // no dir, file name - let cloud = PathRepresentation::RawCloud("blah.json".into()); - let cloud_parts: DirsAndFileName = cloud.into(); - let path_buf = PathRepresentation::RawPathBuf("blah.json".into()); let path_buf_parts: DirsAndFileName = path_buf.into(); - assert!(cloud_parts.directories.is_empty()); - assert_eq!(cloud_parts.file_name.unwrap().encoded(), "blah.json"); - assert!(path_buf_parts.directories.is_empty()); assert_eq!(path_buf_parts.file_name.unwrap().encoded(), "blah.json"); // empty - let cloud = PathRepresentation::RawCloud("".into()); - let cloud_parts: DirsAndFileName = cloud.into(); - let path_buf = PathRepresentation::RawPathBuf("".into()); let path_buf_parts: DirsAndFileName = path_buf.into(); - assert!(cloud_parts.directories.is_empty()); - assert!(cloud_parts.file_name.is_none()); - assert!(path_buf_parts.directories.is_empty()); assert!(path_buf_parts.file_name.is_none()); } diff --git a/object_store/src/path/cloud.rs b/object_store/src/path/cloud.rs index c2d545e4d3..4d346b4fe7 100644 --- a/object_store/src/path/cloud.rs +++ b/object_store/src/path/cloud.rs @@ -1,22 +1,53 @@ -use super::{PathPart, PathRepresentation, DELIMITER}; +use super::{DirsAndFileName, ObjectStorePath, PathPart, DELIMITER}; + +use std::mem; use itertools::Itertools; -/// Converts `PathRepresentation`s to `String`s that are appropriate for use as -/// locations in cloud storage. -#[derive(Debug, Clone, Copy)] -pub struct CloudConverter {} +/// An object storage location suitable for passing to cloud storage APIs such +/// as AWS, GCS, and Azure. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct CloudPath { + inner: CloudPathRepresentation, +} -impl CloudConverter { - /// Creates a cloud storage location by joining this `PathRepresentation`'s +impl ObjectStorePath for CloudPath { + fn set_file_name(&mut self, part: impl Into) { + self.inner = mem::take(&mut self.inner).set_file_name(part); + } + + fn push_dir(&mut self, part: impl Into) { + self.inner = mem::take(&mut self.inner).push_dir(part); + } + + fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) { + self.inner = mem::take(&mut self.inner).push_all_dirs(parts); + } + + fn display(&self) -> String { + self.to_raw() + } +} + +impl CloudPath { + /// Creates a cloud storage location from a string received from a cloud + /// storage API without parsing or allocating unless other methods are + /// called on this instance that need it + pub fn raw(path: impl Into) -> Self { + let path = path.into(); + Self { + inner: CloudPathRepresentation::Raw(path), + } + } + + /// Creates a cloud storage location by joining this `CloudPath`'s /// parts with `DELIMITER` - pub fn convert(object_store_path: &PathRepresentation) -> String { - match object_store_path { - PathRepresentation::RawCloud(path) => path.to_owned(), - PathRepresentation::RawPathBuf(_path) => { - todo!("convert"); - } - PathRepresentation::Parts(dirs_and_file_name) => { + pub fn to_raw(&self) -> String { + use CloudPathRepresentation::*; + + match &self.inner { + Raw(path) => path.to_owned(), + Parsed(dirs_and_file_name) => { let mut path = dirs_and_file_name .directories .iter() @@ -35,6 +66,105 @@ impl CloudConverter { } } +impl From for DirsAndFileName { + fn from(cloud_path: CloudPath) -> Self { + cloud_path.inner.into() + } +} + +impl From for CloudPath { + fn from(dirs_and_file_name: DirsAndFileName) -> Self { + Self { + inner: CloudPathRepresentation::Parsed(dirs_and_file_name), + } + } +} + +#[derive(Debug, Clone, Eq)] +enum CloudPathRepresentation { + Raw(String), + Parsed(DirsAndFileName), +} + +impl Default for CloudPathRepresentation { + fn default() -> Self { + Self::Parsed(DirsAndFileName::default()) + } +} + +impl PartialEq for CloudPathRepresentation { + fn eq(&self, other: &Self) -> bool { + use CloudPathRepresentation::*; + match (self, other) { + (Parsed(self_parts), Parsed(other_parts)) => self_parts == other_parts, + (Parsed(self_parts), _) => { + let other_parts: DirsAndFileName = other.to_owned().into(); + *self_parts == other_parts + } + (_, Parsed(other_parts)) => { + let self_parts: DirsAndFileName = self.to_owned().into(); + self_parts == *other_parts + } + _ => { + let self_parts: DirsAndFileName = self.to_owned().into(); + let other_parts: DirsAndFileName = other.to_owned().into(); + self_parts == other_parts + } + } + } +} + +impl CloudPathRepresentation { + fn push_dir(self, part: impl Into) -> Self { + let mut dirs_and_file_name: DirsAndFileName = self.into(); + + dirs_and_file_name.push_dir(part); + Self::Parsed(dirs_and_file_name) + } + + fn push_all_dirs<'a>(self, parts: impl AsRef<[&'a str]>) -> Self { + let mut dirs_and_file_name: DirsAndFileName = self.into(); + + dirs_and_file_name.push_all_dirs(parts); + Self::Parsed(dirs_and_file_name) + } + + fn set_file_name(self, part: impl Into) -> Self { + let mut dirs_and_file_name: DirsAndFileName = self.into(); + + dirs_and_file_name.set_file_name(part); + Self::Parsed(dirs_and_file_name) + } +} + +impl From for DirsAndFileName { + fn from(cloud_path_rep: CloudPathRepresentation) -> Self { + use CloudPathRepresentation::*; + + match cloud_path_rep { + Raw(path) => { + let mut parts: Vec = path + .split_terminator(DELIMITER) + .map(|s| PathPart(s.to_string())) + .collect(); + let maybe_file_name = match parts.pop() { + Some(file) if file.encoded().contains('.') => Some(file), + Some(dir) => { + parts.push(dir); + None + } + None => None, + }; + Self { + directories: parts, + file_name: maybe_file_name, + } + } + Parsed(dirs_and_file_name) => dirs_and_file_name, + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -44,10 +174,10 @@ mod tests { // Use case: a file named `test_file.json` exists in object storage and it // should be returned for a search on prefix `test`, so the prefix path // should not get a trailing delimiter automatically added - let prefix = PathRepresentation::default(); - let prefix = prefix.set_file_name("test"); + let mut prefix = CloudPath::default(); + prefix.set_file_name("test"); - let converted = CloudConverter::convert(&prefix); + let converted = prefix.to_raw(); assert_eq!(converted, "test"); } @@ -56,29 +186,96 @@ mod tests { // Use case: files exist in object storage named `foo/bar.json` and // `foo_test.json`. A search for the prefix `foo/` should return // `foo/bar.json` but not `foo_test.json'. - let prefix = PathRepresentation::default(); - let prefix = prefix.push_dir("test"); + let mut prefix = CloudPath::default(); + prefix.push_dir("test"); - let converted = CloudConverter::convert(&prefix); + let converted = prefix.to_raw(); assert_eq!(converted, "test/"); } #[test] fn push_encodes() { - let location = PathRepresentation::default(); - let location = location.push_dir("foo/bar"); - let location = location.push_dir("baz%2Ftest"); + let mut location = CloudPath::default(); + location.push_dir("foo/bar"); + location.push_dir("baz%2Ftest"); - let converted = CloudConverter::convert(&location); + let converted = location.to_raw(); assert_eq!(converted, "foo%2Fbar/baz%252Ftest/"); } #[test] fn push_all_encodes() { - let location = PathRepresentation::default(); - let location = location.push_all_dirs(&["foo/bar", "baz%2Ftest"]); + let mut location = CloudPath::default(); + location.push_all_dirs(&["foo/bar", "baz%2Ftest"]); - let converted = CloudConverter::convert(&location); + let converted = location.to_raw(); assert_eq!(converted, "foo%2Fbar/baz%252Ftest/"); } + + #[test] + fn convert_raw_before_partial_eq() { + // dir and file_name + let cloud = CloudPath::raw("test_dir/test_file.json"); + let mut built = CloudPath::default(); + built.push_dir("test_dir"); + built.set_file_name("test_file.json"); + + assert_eq!(built, cloud); + + // dir, no file_name + let cloud = CloudPath::raw("test_dir"); + let mut built = CloudPath::default(); + built.push_dir("test_dir"); + + assert_eq!(built, cloud); + + // file_name, no dir + let cloud = CloudPath::raw("test_file.json"); + let mut built = CloudPath::default(); + built.set_file_name("test_file.json"); + + assert_eq!(built, cloud); + + // empty + let cloud = CloudPath::raw(""); + let built = CloudPath::default(); + + assert_eq!(built, cloud); + } + + #[test] + fn conversions() { + // dir and file name + let cloud = CloudPath::raw("foo/bar/blah.json"); + let cloud_parts: DirsAndFileName = cloud.into(); + + let mut expected_parts = DirsAndFileName::default(); + expected_parts.push_dir("foo"); + expected_parts.push_dir("bar"); + expected_parts.file_name = Some("blah.json".into()); + + assert_eq!(cloud_parts, expected_parts); + + // dir, no file name + let cloud = CloudPath::raw("foo/bar"); + let cloud_parts: DirsAndFileName = cloud.into(); + + expected_parts.file_name = None; + + assert_eq!(cloud_parts, expected_parts); + + // no dir, file name + let cloud = CloudPath::raw("blah.json"); + let cloud_parts: DirsAndFileName = cloud.into(); + + assert!(cloud_parts.directories.is_empty()); + assert_eq!(cloud_parts.file_name.unwrap().encoded(), "blah.json"); + + // empty + let cloud = CloudPath::raw(""); + let cloud_parts: DirsAndFileName = cloud.into(); + + assert!(cloud_parts.directories.is_empty()); + assert!(cloud_parts.file_name.is_none()); + } } diff --git a/object_store/src/path/file.rs b/object_store/src/path/file.rs index 3551f1f9dc..fc4922bc86 100644 --- a/object_store/src/path/file.rs +++ b/object_store/src/path/file.rs @@ -13,9 +13,6 @@ impl FileConverter { /// platform. pub fn convert(object_store_path: &PathRepresentation) -> PathBuf { match object_store_path { - PathRepresentation::RawCloud(_path) => { - todo!("convert"); - } PathRepresentation::RawPathBuf(path) => path.to_owned(), PathRepresentation::Parts(dirs_and_file_name) => { let mut path: PathBuf = dirs_and_file_name @@ -28,6 +25,7 @@ impl FileConverter { } path } + _ => unreachable!(), } } } diff --git a/object_store/src/path/parsed.rs b/object_store/src/path/parsed.rs index 6914779b2a..73bd4f1569 100644 --- a/object_store/src/path/parsed.rs +++ b/object_store/src/path/parsed.rs @@ -127,24 +127,9 @@ impl DirsAndFileName { impl From for DirsAndFileName { fn from(path_rep: PathRepresentation) -> Self { match path_rep { - PathRepresentation::RawCloud(path) => { - let mut parts: Vec = path - .split_terminator(DELIMITER) - .map(|s| PathPart(s.to_string())) - .collect(); - let maybe_file_name = match parts.pop() { - Some(file) if file.encoded().contains('.') => Some(file), - Some(dir) => { - parts.push(dir); - None - } - None => None, - }; - Self { - directories: parts, - file_name: maybe_file_name, - } - } + PathRepresentation::AmazonS3(path) + | PathRepresentation::GoogleCloudStorage(path) + | PathRepresentation::MicrosoftAzure(path) => path.into(), PathRepresentation::RawPathBuf(path) => { let mut parts: Vec = path .iter() From 596a73f56ae4dfbd8b33e3bd491b33ebc2fe07a0 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 19 Jan 2021 20:32:05 -0500 Subject: [PATCH 05/10] refactor: Extract a FilePath type for use in file storage Enforces that on-disk storage will only ever use file paths. More cleanup coming! --- object_store/src/disk.rs | 31 ++-- object_store/src/lib.rs | 53 +++++- object_store/src/path.rs | 99 +---------- object_store/src/path/file.rs | 304 ++++++++++++++++++++++++++++++-- object_store/src/path/parsed.rs | 27 +-- 5 files changed, 358 insertions(+), 156 deletions(-) diff --git a/object_store/src/disk.rs b/object_store/src/disk.rs index c8fd09caa1..1f015d1ff7 100644 --- a/object_store/src/disk.rs +++ b/object_store/src/disk.rs @@ -1,7 +1,7 @@ //! This module contains the IOx implementation for using local disk as the //! object store. use crate::{ - path::file::FileConverter, DataDoesNotMatchLength, Result, UnableToCopyDataToFile, + path::file::FilePath, DataDoesNotMatchLength, Result, UnableToCopyDataToFile, UnableToCreateDir, UnableToCreateFile, UnableToDeleteFile, UnableToOpenFile, UnableToPutDataInMemory, UnableToReadBytes, }; @@ -17,30 +17,30 @@ use walkdir::WalkDir; /// cloud storage provider. #[derive(Debug)] pub struct File { - root: crate::path::Path, + root: FilePath, } impl File { /// Create new filesystem storage. pub fn new(root: impl Into) -> Self { Self { - root: crate::path::Path::from_path_buf_unchecked(root), + root: FilePath::raw(root), } } - fn path(&self, location: &crate::path::Path) -> PathBuf { + fn path(&self, location: &FilePath) -> PathBuf { let mut path = self.root.clone(); path.push_path(location); - FileConverter::convert(&path.inner) + path.to_raw() } /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> crate::path::Path { - crate::path::Path::default() + pub fn new_path(&self) -> FilePath { + FilePath::default() } /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &crate::path::Path, bytes: S, length: usize) -> Result<()> + pub async fn put(&self, location: &FilePath, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -86,10 +86,7 @@ impl File { } /// Return the bytes that are stored at the specified location. - pub async fn get( - &self, - location: &crate::path::Path, - ) -> Result>> { + pub async fn get(&self, location: &FilePath) -> Result>> { let path = self.path(location); let file = fs::File::open(&path) @@ -103,7 +100,7 @@ impl File { } /// Delete the object at the specified location. - pub async fn delete(&self, location: &crate::path::Path) -> Result<()> { + pub async fn delete(&self, location: &FilePath) -> Result<()> { let path = self.path(location); fs::remove_file(&path) .await @@ -114,9 +111,9 @@ impl File { /// List all the objects with the given prefix. pub async fn list<'a>( &'a self, - prefix: Option<&'a crate::path::Path>, - ) -> Result>> + 'a> { - let root_path = FileConverter::convert(&self.root.inner); + prefix: Option<&'a FilePath>, + ) -> Result>> + 'a> { + let root_path = self.root.to_raw(); let walkdir = WalkDir::new(&root_path) // Don't include the root directory itself .min_depth(1); @@ -129,7 +126,7 @@ impl File { let relative_path = file.path().strip_prefix(&root_path).expect( "Must start with root path because this came from walking the root", ); - crate::path::Path::from_path_buf_unchecked(relative_path) + FilePath::raw(relative_path) }) .filter(|name| prefix.map_or(true, |p| name.prefix_matches(p))) .map(|name| Ok(vec![name])) diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 1e6708cc7d..797de1bdec 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -78,7 +78,9 @@ impl ObjectStore { InMemory(in_mem) => path::Path { inner: path::PathRepresentation::Parts(in_mem.new_path()), }, - File(file) => file.new_path(), + File(file) => path::Path { + inner: path::PathRepresentation::File(file.new_path()), + }, MicrosoftAzure(azure) => path::Path { inner: path::PathRepresentation::MicrosoftAzure(azure.new_path()), }, @@ -111,7 +113,12 @@ impl ObjectStore { inner: PathRepresentation::Parts(location), }, ) => in_mem.put(location, bytes, length).await?, - (File(file), _) => file.put(location, bytes, length).await?, + ( + File(file), + path::Path { + inner: PathRepresentation::File(location), + }, + ) => file.put(location, bytes, length).await?, ( MicrosoftAzure(azure), path::Path { @@ -147,7 +154,12 @@ impl ObjectStore { inner: PathRepresentation::Parts(location), }, ) => in_mem.get(location).await?.boxed(), - (File(file), _) => file.get(location).await?.boxed(), + ( + File(file), + path::Path { + inner: PathRepresentation::File(location), + }, + ) => file.get(location).await?.boxed(), ( MicrosoftAzure(azure), path::Path { @@ -182,7 +194,12 @@ impl ObjectStore { inner: PathRepresentation::Parts(location), }, ) => in_mem.delete(location).await?, - (File(file), _) => file.delete(location).await?, + ( + File(file), + path::Path { + inner: PathRepresentation::File(location), + }, + ) => file.delete(location).await?, ( MicrosoftAzure(azure), path::Path { @@ -274,7 +291,33 @@ impl ObjectStore { .map_ok(|s| s.into_iter().map(Into::into).collect()) .boxed(), - (File(file), _) => file.list(prefix).await?.boxed(), + ( + File(file), + Some(path::Path { + inner: PathRepresentation::File(prefix), + }), + ) => file + .list(Some(prefix)) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::File(p), + }) + .collect() + }) + .boxed(), + (File(file), None) => file + .list(None) + .await? + .map_ok(|s| { + s.into_iter() + .map(|p| path::Path { + inner: PathRepresentation::File(p), + }) + .collect() + }) + .boxed(), ( MicrosoftAzure(azure), Some(path::Path { diff --git a/object_store/src/path.rs b/object_store/src/path.rs index e1a875f100..4ba3b9dfc0 100644 --- a/object_store/src/path.rs +++ b/object_store/src/path.rs @@ -1,7 +1,7 @@ //! This module contains code for abstracting object locations that work //! across different backing implementations and platforms. -use std::{mem, path::PathBuf}; +use std::mem; /// Paths that came from or are to be used in cloud-based object storage pub mod cloud; @@ -9,6 +9,7 @@ use cloud::CloudPath; /// Paths that come from or are to be used in file-based object storage pub mod file; +use file::FilePath; /// Maximally processed storage-independent paths. pub mod parsed; @@ -69,18 +70,6 @@ impl ObjectStorePath for Path { } impl Path { - /// For use when receiving a path from a filesystem directly, not - /// when building a path. Uses the standard library's path splitting - /// implementation to separate into parts. - /// - /// TODO: This should only be available to file storage - pub fn from_path_buf_unchecked(path: impl Into) -> Self { - let path = path.into(); - Self { - inner: PathRepresentation::RawPathBuf(path), - } - } - /// Add a `PathPart` to the end of the path. pub fn push_part_as_dir(&mut self, part: &PathPart) { self.inner = mem::take(&mut self.inner).push_part_as_dir(part); @@ -146,9 +135,9 @@ pub enum PathRepresentation { GoogleCloudStorage(CloudPath), /// Microsoft Azure Blob storage MicrosoftAzure(CloudPath), + /// Local file system storage + File(FilePath), - /// Will be transformed into a FilePath type - RawPathBuf(PathBuf), /// Will be able to be used directly Parts(DirsAndFileName), } @@ -217,9 +206,9 @@ impl PathRepresentation { match self { Self::Parts(dirs_and_file_name) => dirs_and_file_name.display(), Self::AmazonS3(path) | Self::GoogleCloudStorage(path) | Self::MicrosoftAzure(path) => { - path.display().to_string() + path.display() } - Self::RawPathBuf(path_buf) => path_buf.display().to_string(), + Self::File(path) => path.display(), } } } @@ -381,82 +370,6 @@ mod tests { ); } - #[test] - fn path_rep_conversions() { - // dir and file name - let path_buf = PathRepresentation::RawPathBuf("foo/bar/blah.json".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - - let mut expected_parts = DirsAndFileName::default(); - expected_parts.push_dir("foo"); - expected_parts.push_dir("bar"); - expected_parts.file_name = Some("blah.json".into()); - - assert_eq!(path_buf_parts, expected_parts); - - // dir, no file name - let path_buf = PathRepresentation::RawPathBuf("foo/bar".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - - expected_parts.file_name = None; - - assert_eq!(path_buf_parts, expected_parts); - - // no dir, file name - let path_buf = PathRepresentation::RawPathBuf("blah.json".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - - assert!(path_buf_parts.directories.is_empty()); - assert_eq!(path_buf_parts.file_name.unwrap().encoded(), "blah.json"); - - // empty - let path_buf = PathRepresentation::RawPathBuf("".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - - assert!(path_buf_parts.directories.is_empty()); - assert!(path_buf_parts.file_name.is_none()); - } - - #[test] - fn path_buf_to_dirs_and_file_name_conversion() { - // Last section ending in `.json` is a file name - let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.json".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - assert_eq!(path_buf_parts.directories.len(), 3); - assert_eq!(path_buf_parts.file_name.unwrap().0, "blah.json"); - - // Last section ending in `.segment` is a file name - let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.segment".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - assert_eq!(path_buf_parts.directories.len(), 3); - assert_eq!(path_buf_parts.file_name.unwrap().0, "blah.segment"); - - // Last section ending in `.parquet` is a file name - let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.parquet".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - assert_eq!(path_buf_parts.directories.len(), 3); - assert_eq!(path_buf_parts.file_name.unwrap().0, "blah.parquet"); - - // Last section ending in `.txt` is NOT a file name; we don't recognize that - // extension - let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.txt".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - assert_eq!(path_buf_parts.directories.len(), 4); - assert!(path_buf_parts.file_name.is_none()); - - // Last section containing a `.` isn't a file name - let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.blah".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - assert_eq!(path_buf_parts.directories.len(), 4); - assert!(path_buf_parts.file_name.is_none()); - - // Last section starting with a `.` isn't a file name (macos temp dirs do this) - let path_buf = PathRepresentation::RawPathBuf("/one/two/.blah".into()); - let path_buf_parts: DirsAndFileName = path_buf.into(); - assert_eq!(path_buf_parts.directories.len(), 4); - assert!(path_buf_parts.file_name.is_none()); - } - #[test] fn parts_after_prefix_behavior() { let mut existing_path = DirsAndFileName::default(); diff --git a/object_store/src/path/file.rs b/object_store/src/path/file.rs index fc4922bc86..d431175542 100644 --- a/object_store/src/path/file.rs +++ b/object_store/src/path/file.rs @@ -1,20 +1,50 @@ -use super::{PathPart, PathRepresentation}; +use super::{DirsAndFileName, ObjectStorePath, PathPart}; -use std::path::PathBuf; +use std::{mem, path::PathBuf}; -/// Converts `ObjectStorePath`s to `String`s that are appropriate for use as -/// locations in filesystem storage. -#[derive(Debug, Clone, Copy)] -pub struct FileConverter {} +/// An object storage location suitable for passing to disk based object +/// storage. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct FilePath { + inner: FilePathRepresentation, +} -impl FileConverter { - /// Creates a filesystem `PathBuf` location by using the standard library's - /// `PathBuf` building implementation appropriate for the current - /// platform. - pub fn convert(object_store_path: &PathRepresentation) -> PathBuf { - match object_store_path { - PathRepresentation::RawPathBuf(path) => path.to_owned(), - PathRepresentation::Parts(dirs_and_file_name) => { +impl ObjectStorePath for FilePath { + fn set_file_name(&mut self, part: impl Into) { + self.inner = mem::take(&mut self.inner).set_file_name(part); + } + + fn push_dir(&mut self, part: impl Into) { + self.inner = mem::take(&mut self.inner).push_dir(part); + } + + fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) { + self.inner = mem::take(&mut self.inner).push_all_dirs(parts); + } + + fn display(&self) -> String { + todo!() + } +} + +impl FilePath { + /// Creates a file storage location from a `PathBuf` without parsing or + /// allocating unless other methods are called on this instance that + /// need it + pub fn raw(path: impl Into) -> Self { + let path = path.into(); + Self { + inner: FilePathRepresentation::Raw(path), + } + } + + /// Creates a file location by converting into a `PathBuf` + pub fn to_raw(&self) -> PathBuf { + use FilePathRepresentation::*; + + match &self.inner { + Raw(path) => path.to_owned(), + Parsed(dirs_and_file_name) => { let mut path: PathBuf = dirs_and_file_name .directories .iter() @@ -25,7 +55,251 @@ impl FileConverter { } path } - _ => unreachable!(), + } + } + /// Add the parts of `path` to the end of this path. Notably does + /// *not* behave as `PathBuf::push` does: there is no way to replace the + /// root. If `self` has a file name, that will be removed, then the + /// directories of `path` will be appended, then any file name of `path` + /// will be assigned to `self`. + pub fn push_path(&mut self, path: &Self) { + self.inner = mem::take(&mut self.inner).push_path(path) + } + + /// Whether the prefix is the start of this path or not. + pub fn prefix_matches(&self, prefix: &Self) -> bool { + self.inner.prefix_matches(&prefix.inner) + } +} + +impl From for DirsAndFileName { + fn from(file_path: FilePath) -> Self { + file_path.inner.into() + } +} + +impl From for FilePath { + fn from(dirs_and_file_name: DirsAndFileName) -> Self { + Self { + inner: FilePathRepresentation::Parsed(dirs_and_file_name), } } } + +#[derive(Debug, Clone, Eq)] +enum FilePathRepresentation { + Raw(PathBuf), + Parsed(DirsAndFileName), +} + +impl Default for FilePathRepresentation { + fn default() -> Self { + Self::Parsed(DirsAndFileName::default()) + } +} + +impl PartialEq for FilePathRepresentation { + fn eq(&self, other: &Self) -> bool { + use FilePathRepresentation::*; + match (self, other) { + (Parsed(self_parts), Parsed(other_parts)) => self_parts == other_parts, + (Parsed(self_parts), _) => { + let other_parts: DirsAndFileName = other.to_owned().into(); + *self_parts == other_parts + } + (_, Parsed(other_parts)) => { + let self_parts: DirsAndFileName = self.to_owned().into(); + self_parts == *other_parts + } + _ => { + let self_parts: DirsAndFileName = self.to_owned().into(); + let other_parts: DirsAndFileName = other.to_owned().into(); + self_parts == other_parts + } + } + } +} + +impl FilePathRepresentation { + fn push_dir(self, part: impl Into) -> Self { + let mut dirs_and_file_name: DirsAndFileName = self.into(); + + dirs_and_file_name.push_dir(part); + Self::Parsed(dirs_and_file_name) + } + + fn push_all_dirs<'a>(self, parts: impl AsRef<[&'a str]>) -> Self { + let mut dirs_and_file_name: DirsAndFileName = self.into(); + + dirs_and_file_name.push_all_dirs(parts); + Self::Parsed(dirs_and_file_name) + } + + fn set_file_name(self, part: impl Into) -> Self { + let mut dirs_and_file_name: DirsAndFileName = self.into(); + + dirs_and_file_name.set_file_name(part); + Self::Parsed(dirs_and_file_name) + } + + /// Add the parts of `path` to the end of this path. Notably does + /// *not* behave as `PathBuf::push` does: there is no way to replace the + /// root. If `self` has a file name, that will be removed, then the + /// directories of `path` will be appended, then any file name of `path` + /// will be assigned to `self`. + fn push_path(self, path: &FilePath) -> Self { + let DirsAndFileName { + directories: path_dirs, + file_name: path_file_name, + } = path.inner.to_owned().into(); + let mut dirs_and_file_name: DirsAndFileName = self.into(); + + dirs_and_file_name.directories.extend(path_dirs); + dirs_and_file_name.file_name = path_file_name; + + Self::Parsed(dirs_and_file_name) + } + fn prefix_matches(&self, prefix: &Self) -> bool { + use FilePathRepresentation::*; + match (self, prefix) { + (Parsed(self_parts), Parsed(prefix_parts)) => self_parts.prefix_matches(prefix_parts), + (Parsed(self_parts), _) => { + let prefix_parts: DirsAndFileName = prefix.to_owned().into(); + self_parts.prefix_matches(&prefix_parts) + } + (_, Parsed(prefix_parts)) => { + let self_parts: DirsAndFileName = self.to_owned().into(); + self_parts.prefix_matches(prefix_parts) + } + _ => { + let self_parts: DirsAndFileName = self.to_owned().into(); + let prefix_parts: DirsAndFileName = prefix.to_owned().into(); + self_parts.prefix_matches(&prefix_parts) + } + } + } +} + +impl From for DirsAndFileName { + fn from(file_path_rep: FilePathRepresentation) -> Self { + use FilePathRepresentation::*; + + match file_path_rep { + Raw(path) => { + let mut parts: Vec = path + .iter() + .flat_map(|s| s.to_os_string().into_string().map(PathPart)) + .collect(); + + let maybe_file_name = match parts.pop() { + Some(file) + if !file.encoded().starts_with('.') + && (file.encoded().ends_with(".json") + || file.encoded().ends_with(".parquet") + || file.encoded().ends_with(".segment")) => + { + Some(file) + } + Some(dir) => { + parts.push(dir); + None + } + None => None, + }; + Self { + directories: parts, + file_name: maybe_file_name, + } + } + Parsed(dirs_and_file_name) => dirs_and_file_name, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn path_buf_to_dirs_and_file_name_conversion() { + // Last section ending in `.json` is a file name + let path_buf: PathBuf = "/one/two/blah.json".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + assert_eq!(parts.directories.len(), 3); + assert_eq!(parts.file_name.unwrap().0, "blah.json"); + + // Last section ending in `.segment` is a file name + let path_buf: PathBuf = "/one/two/blah.segment".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + assert_eq!(parts.directories.len(), 3); + assert_eq!(parts.file_name.unwrap().0, "blah.segment"); + + // Last section ending in `.parquet` is a file name + let path_buf: PathBuf = "/one/two/blah.parquet".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + assert_eq!(parts.directories.len(), 3); + assert_eq!(parts.file_name.unwrap().0, "blah.parquet"); + + // Last section ending in `.txt` is NOT a file name; we don't recognize that + // extension + let path_buf: PathBuf = "/one/two/blah.txt".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + assert_eq!(parts.directories.len(), 4); + assert!(parts.file_name.is_none()); + // Last section containing a `.` isn't a file name + let path_buf: PathBuf = "/one/two/blah.blah".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + assert_eq!(parts.directories.len(), 4); + assert!(parts.file_name.is_none()); + + // Last section starting with a `.` isn't a file name (macos temp dirs do this) + let path_buf: PathBuf = "/one/two/.blah".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + assert_eq!(parts.directories.len(), 4); + assert!(parts.file_name.is_none()); + } + #[test] + fn conversions() { + // dir and file name + let path_buf: PathBuf = "foo/bar/blah.json".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + + let mut expected_parts = DirsAndFileName::default(); + expected_parts.push_dir("foo"); + expected_parts.push_dir("bar"); + expected_parts.file_name = Some("blah.json".into()); + + assert_eq!(parts, expected_parts); + + // dir, no file name + let path_buf: PathBuf = "foo/bar".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + + expected_parts.file_name = None; + + assert_eq!(parts, expected_parts); + + // no dir, file name + let path_buf: PathBuf = "blah.json".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + + assert!(parts.directories.is_empty()); + assert_eq!(parts.file_name.unwrap().encoded(), "blah.json"); + + // empty + let path_buf: PathBuf = "".into(); + let file_path = FilePath::raw(path_buf); + let parts: DirsAndFileName = file_path.into(); + assert!(parts.directories.is_empty()); + assert!(parts.file_name.is_none()); + } +} diff --git a/object_store/src/path/parsed.rs b/object_store/src/path/parsed.rs index 73bd4f1569..3ed062acc6 100644 --- a/object_store/src/path/parsed.rs +++ b/object_store/src/path/parsed.rs @@ -130,32 +130,7 @@ impl From for DirsAndFileName { PathRepresentation::AmazonS3(path) | PathRepresentation::GoogleCloudStorage(path) | PathRepresentation::MicrosoftAzure(path) => path.into(), - PathRepresentation::RawPathBuf(path) => { - let mut parts: Vec = path - .iter() - .flat_map(|s| s.to_os_string().into_string().map(PathPart)) - .collect(); - - let maybe_file_name = match parts.pop() { - Some(file) - if !file.encoded().starts_with('.') - && (file.encoded().ends_with(".json") - || file.encoded().ends_with(".parquet") - || file.encoded().ends_with(".segment")) => - { - Some(file) - } - Some(dir) => { - parts.push(dir); - None - } - None => None, - }; - Self { - directories: parts, - file_name: maybe_file_name, - } - } + PathRepresentation::File(path) => path.into(), PathRepresentation::Parts(dirs_and_file_name) => dirs_and_file_name, } } From c40205b37e5f95d3d4a49166aafea70e1ee5e641 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 19 Jan 2021 20:37:19 -0500 Subject: [PATCH 06/10] test: Move DirsAndFileName functionality tests with the definition --- object_store/src/path.rs | 177 -------------------------------- object_store/src/path/parsed.rs | 165 +++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+), 177 deletions(-) diff --git a/object_store/src/path.rs b/object_store/src/path.rs index 4ba3b9dfc0..a00f23f9c8 100644 --- a/object_store/src/path.rs +++ b/object_store/src/path.rs @@ -237,180 +237,3 @@ impl PartialEq for PathRepresentation { /// The delimiter to separate object namespaces, creating a directory structure. pub const DELIMITER: &str = "/"; - -#[cfg(test)] -mod tests { - use super::*; - - // Invariants to maintain/document/test: - // - // - always ends in DELIMITER if it's a directory. If it's the end object, it - // should have some sort of file extension like .parquet, .json, or .segment - // - does not contain unencoded DELIMITER - // - for file paths: does not escape root dir - // - for object storage: looks like directories - // - Paths that come from object stores directly don't need to be - // parsed/validated - // - Within a process, the same backing store will always be used - // - - #[test] - fn prefix_matches() { - let mut haystack = Path::default(); - haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]); - - // self starts with self - assert!( - haystack.prefix_matches(&haystack), - "{:?} should have started with {:?}", - haystack, - haystack - ); - - // a longer prefix doesn't match - let mut needle = haystack.clone(); - needle.push_dir("longer now"); - assert!( - !haystack.prefix_matches(&needle), - "{:?} shouldn't have started with {:?}", - haystack, - needle - ); - - // one dir prefix matches - let mut needle = Path::default(); - needle.push_dir("foo/bar"); - assert!( - haystack.prefix_matches(&needle), - "{:?} should have started with {:?}", - haystack, - needle - ); - - // two dir prefix matches - needle.push_dir("baz%2Ftest"); - assert!( - haystack.prefix_matches(&needle), - "{:?} should have started with {:?}", - haystack, - needle - ); - - // partial dir prefix matches - let mut needle = Path::default(); - needle.push_dir("f"); - assert!( - haystack.prefix_matches(&needle), - "{:?} should have started with {:?}", - haystack, - needle - ); - - // one dir and one partial dir matches - let mut needle = Path::default(); - needle.push_all_dirs(&["foo/bar", "baz"]); - assert!( - haystack.prefix_matches(&needle), - "{:?} should have started with {:?}", - haystack, - needle - ); - } - - #[test] - fn prefix_matches_with_file_name() { - let mut haystack = Path::default(); - haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]); - - let mut needle = haystack.clone(); - - // All directories match and file name is a prefix - haystack.set_file_name("foo.segment"); - needle.set_file_name("foo"); - - assert!( - haystack.prefix_matches(&needle), - "{:?} should have started with {:?}", - haystack, - needle - ); - - // All directories match but file name is not a prefix - needle.set_file_name("e"); - - assert!( - !haystack.prefix_matches(&needle), - "{:?} should not have started with {:?}", - haystack, - needle - ); - - // Not all directories match; file name is a prefix of the next directory; this - // matches - let mut needle = Path::default(); - needle.push_all_dirs(&["foo/bar", "baz%2Ftest"]); - needle.set_file_name("s"); - - assert!( - haystack.prefix_matches(&needle), - "{:?} should have started with {:?}", - haystack, - needle - ); - - // Not all directories match; file name is NOT a prefix of the next directory; - // no match - needle.set_file_name("p"); - - assert!( - !haystack.prefix_matches(&needle), - "{:?} should not have started with {:?}", - haystack, - needle - ); - } - - #[test] - fn parts_after_prefix_behavior() { - let mut existing_path = DirsAndFileName::default(); - existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); - existing_path.file_name = Some("egg.json".into()); - - // Prefix with one directory - let mut prefix = DirsAndFileName::default(); - prefix.push_dir("apple"); - let expected_parts: Vec = vec!["bear", "cow", "dog", "egg.json"] - .into_iter() - .map(Into::into) - .collect(); - let parts = existing_path.parts_after_prefix(&prefix).unwrap(); - assert_eq!(parts, expected_parts); - - // Prefix with two directories - let mut prefix = DirsAndFileName::default(); - prefix.push_all_dirs(&["apple", "bear"]); - let expected_parts: Vec = vec!["cow", "dog", "egg.json"] - .into_iter() - .map(Into::into) - .collect(); - let parts = existing_path.parts_after_prefix(&prefix).unwrap(); - assert_eq!(parts, expected_parts); - - // Not a prefix - let mut prefix = DirsAndFileName::default(); - prefix.push_dir("cow"); - assert!(existing_path.parts_after_prefix(&prefix).is_none()); - - // Prefix with a partial directory - let mut prefix = DirsAndFileName::default(); - prefix.push_dir("ap"); - assert!(existing_path.parts_after_prefix(&prefix).is_none()); - - // Prefix matches but there aren't any parts after it - let mut existing_path = DirsAndFileName::default(); - existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); - let prefix = existing_path.clone(); - let parts = existing_path.parts_after_prefix(&prefix).unwrap(); - assert!(parts.is_empty()); - } -} diff --git a/object_store/src/path/parsed.rs b/object_store/src/path/parsed.rs index 3ed062acc6..cd13217e95 100644 --- a/object_store/src/path/parsed.rs +++ b/object_store/src/path/parsed.rs @@ -147,3 +147,168 @@ impl From for DirsAndFileName { other.inner.into() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn prefix_matches() { + let mut haystack = DirsAndFileName::default(); + haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]); + + // self starts with self + assert!( + haystack.prefix_matches(&haystack), + "{:?} should have started with {:?}", + haystack, + haystack + ); + + // a longer prefix doesn't match + let mut needle = haystack.clone(); + needle.push_dir("longer now"); + assert!( + !haystack.prefix_matches(&needle), + "{:?} shouldn't have started with {:?}", + haystack, + needle + ); + + // one dir prefix matches + let mut needle = DirsAndFileName::default(); + needle.push_dir("foo/bar"); + assert!( + haystack.prefix_matches(&needle), + "{:?} should have started with {:?}", + haystack, + needle + ); + + // two dir prefix matches + needle.push_dir("baz%2Ftest"); + assert!( + haystack.prefix_matches(&needle), + "{:?} should have started with {:?}", + haystack, + needle + ); + + // partial dir prefix matches + let mut needle = DirsAndFileName::default(); + needle.push_dir("f"); + assert!( + haystack.prefix_matches(&needle), + "{:?} should have started with {:?}", + haystack, + needle + ); + + // one dir and one partial dir matches + let mut needle = DirsAndFileName::default(); + needle.push_all_dirs(&["foo/bar", "baz"]); + assert!( + haystack.prefix_matches(&needle), + "{:?} should have started with {:?}", + haystack, + needle + ); + } + + #[test] + fn prefix_matches_with_file_name() { + let mut haystack = DirsAndFileName::default(); + haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]); + + let mut needle = haystack.clone(); + + // All directories match and file name is a prefix + haystack.set_file_name("foo.segment"); + needle.set_file_name("foo"); + + assert!( + haystack.prefix_matches(&needle), + "{:?} should have started with {:?}", + haystack, + needle + ); + + // All directories match but file name is not a prefix + needle.set_file_name("e"); + + assert!( + !haystack.prefix_matches(&needle), + "{:?} should not have started with {:?}", + haystack, + needle + ); + + // Not all directories match; file name is a prefix of the next directory; this + // matches + let mut needle = DirsAndFileName::default(); + needle.push_all_dirs(&["foo/bar", "baz%2Ftest"]); + needle.set_file_name("s"); + + assert!( + haystack.prefix_matches(&needle), + "{:?} should have started with {:?}", + haystack, + needle + ); + + // Not all directories match; file name is NOT a prefix of the next directory; + // no match + needle.set_file_name("p"); + + assert!( + !haystack.prefix_matches(&needle), + "{:?} should not have started with {:?}", + haystack, + needle + ); + } + + #[test] + fn parts_after_prefix_behavior() { + let mut existing_path = DirsAndFileName::default(); + existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); + existing_path.file_name = Some("egg.json".into()); + + // Prefix with one directory + let mut prefix = DirsAndFileName::default(); + prefix.push_dir("apple"); + let expected_parts: Vec = vec!["bear", "cow", "dog", "egg.json"] + .into_iter() + .map(Into::into) + .collect(); + let parts = existing_path.parts_after_prefix(&prefix).unwrap(); + assert_eq!(parts, expected_parts); + + // Prefix with two directories + let mut prefix = DirsAndFileName::default(); + prefix.push_all_dirs(&["apple", "bear"]); + let expected_parts: Vec = vec!["cow", "dog", "egg.json"] + .into_iter() + .map(Into::into) + .collect(); + let parts = existing_path.parts_after_prefix(&prefix).unwrap(); + assert_eq!(parts, expected_parts); + + // Not a prefix + let mut prefix = DirsAndFileName::default(); + prefix.push_dir("cow"); + assert!(existing_path.parts_after_prefix(&prefix).is_none()); + + // Prefix with a partial directory + let mut prefix = DirsAndFileName::default(); + prefix.push_dir("ap"); + assert!(existing_path.parts_after_prefix(&prefix).is_none()); + + // Prefix matches but there aren't any parts after it + let mut existing_path = DirsAndFileName::default(); + existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); + let prefix = existing_path.clone(); + let parts = existing_path.parts_after_prefix(&prefix).unwrap(); + assert!(parts.is_empty()); + } +} From ff6955a43392a97ee161c21453be468b827abf7d Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 19 Jan 2021 22:36:52 -0500 Subject: [PATCH 07/10] refactor: Extract a trait for ObjectStoreApi with associated path This is the promised cleanup. This structure gets rid of a lot of intermediate structures and encodes through associated types how the object stores and path types are related. The enums are still necessary to avoid having generics leak all over the place, but the object store variants and path variants should always match because they'll always come from the object store trait implementations that use the associated types. --- Cargo.lock | 1 + object_store/Cargo.toml | 1 + object_store/src/aws.rs | 113 ++++---- object_store/src/azure.rs | 128 +++++---- object_store/src/disk.rs | 78 +++--- object_store/src/gcp.rs | 80 +++--- object_store/src/lib.rs | 458 +++++++++++++------------------ object_store/src/memory.rs | 83 +++--- object_store/src/path.rs | 216 +++------------ object_store/src/path/file.rs | 9 +- object_store/src/path/parsed.rs | 114 +++----- server/src/buffer.rs | 2 +- server/src/config.rs | 2 +- server/src/lib.rs | 2 +- server/src/snapshot.rs | 2 +- src/influxdb_ioxd/http_routes.rs | 1 + 16 files changed, 544 insertions(+), 746 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 223d803f6b..b37cd0e125 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2084,6 +2084,7 @@ checksum = "a9a7ab5d64814df0fe4a4b5ead45ed6c5f181ee3ff04ba344313a6c80446c5d4" name = "object_store" version = "0.1.0" dependencies = [ + "async-trait", "azure_core", "azure_storage", "bytes", diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml index d17ee40617..febbf4a521 100644 --- a/object_store/Cargo.toml +++ b/object_store/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Paul Dix "] edition = "2018" [dependencies] +async-trait = "0.1.42" bytes = "1.0" chrono = "0.4" futures = "0.3.5" diff --git a/object_store/src/aws.rs b/object_store/src/aws.rs index 86a4a1eafd..50166dbf03 100644 --- a/object_store/src/aws.rs +++ b/object_store/src/aws.rs @@ -2,12 +2,16 @@ //! store. use crate::{ path::{cloud::CloudPath, DELIMITER}, - Error, ListResult, NoDataFromS3, ObjectMeta, Result, UnableToDeleteDataFromS3, + Error, ListResult, NoDataFromS3, ObjectMeta, ObjectStoreApi, Result, UnableToDeleteDataFromS3, UnableToGetDataFromS3, UnableToGetPieceOfDataFromS3, UnableToPutDataToS3, }; +use async_trait::async_trait; use bytes::Bytes; use chrono::{DateTime, Utc}; -use futures::{stream, Stream, TryStreamExt}; +use futures::{ + stream::{self, BoxStream}, + Stream, StreamExt, TryStreamExt, +}; use rusoto_core::ByteStream; use rusoto_credential::ChainProvider; use rusoto_s3::S3; @@ -30,37 +34,15 @@ impl fmt::Debug for AmazonS3 { } } -impl AmazonS3 { - /// Configure a connection to Amazon S3 in the specified Amazon region and - /// bucket. Uses [`rusoto_credential::ChainProvider`][cp] to check for - /// credentials in: - /// - /// 1. Environment variables: `AWS_ACCESS_KEY_ID` and - /// `AWS_SECRET_ACCESS_KEY` - /// 2. `credential_process` command in the AWS config file, usually located - /// at `~/.aws/config`. - /// 3. AWS credentials file. Usually located at `~/.aws/credentials`. - /// 4. IAM instance profile. Will only work if running on an EC2 instance - /// with an instance profile/role. - /// - /// [cp]: https://docs.rs/rusoto_credential/0.43.0/rusoto_credential/struct.ChainProvider.html - pub fn new(region: rusoto_core::Region, bucket_name: impl Into) -> Self { - let http_client = rusoto_core::request::HttpClient::new() - .expect("Current implementation of rusoto_core has no way for this to fail"); - let credentials_provider = ChainProvider::new(); - Self { - client: rusoto_s3::S3Client::new_with(http_client, credentials_provider, region), - bucket_name: bucket_name.into(), - } - } +#[async_trait] +impl ObjectStoreApi for AmazonS3 { + type Path = CloudPath; - /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> CloudPath { + fn new_path(&self) -> Self::Path { CloudPath::default() } - /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &CloudPath, bytes: S, length: usize) -> Result<()> + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -83,8 +65,7 @@ impl AmazonS3 { Ok(()) } - /// Return the bytes that are stored at the specified location. - pub async fn get(&self, location: &CloudPath) -> Result>> { + async fn get(&self, location: &Self::Path) -> Result>> { let key = location.to_raw(); let get_request = rusoto_s3::GetObjectRequest { bucket: self.bucket_name.clone(), @@ -108,11 +89,11 @@ impl AmazonS3 { bucket: self.bucket_name.to_owned(), location: key, }) - .err_into()) + .err_into() + .boxed()) } - /// Delete the object at the specified location. - pub async fn delete(&self, location: &CloudPath) -> Result<()> { + async fn delete(&self, location: &Self::Path) -> Result<()> { let key = location.to_raw(); let delete_request = rusoto_s3::DeleteObjectRequest { bucket: self.bucket_name.clone(), @@ -130,11 +111,10 @@ impl AmazonS3 { Ok(()) } - /// List all the objects with the given prefix. - pub async fn list<'a>( + async fn list<'a>( &'a self, - prefix: Option<&'a CloudPath>, - ) -> Result>> + 'a> { + prefix: Option<&'a Self::Path>, + ) -> Result>>> { #[derive(Clone)] enum ListState { Start, @@ -192,12 +172,43 @@ impl AmazonS3 { }; Some((Ok(names), next_state)) - })) + }) + .boxed()) + } + + async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result> { + self.list_with_delimiter_and_token(prefix, &None).await + } +} + +impl AmazonS3 { + /// Configure a connection to Amazon S3 in the specified Amazon region and + /// bucket. Uses [`rusoto_credential::ChainProvider`][cp] to check for + /// credentials in: + /// + /// 1. Environment variables: `AWS_ACCESS_KEY_ID` and + /// `AWS_SECRET_ACCESS_KEY` + /// 2. `credential_process` command in the AWS config file, usually located + /// at `~/.aws/config`. + /// 3. AWS credentials file. Usually located at `~/.aws/credentials`. + /// 4. IAM instance profile. Will only work if running on an EC2 instance + /// with an instance profile/role. + /// + /// [cp]: https://docs.rs/rusoto_credential/0.43.0/rusoto_credential/struct.ChainProvider.html + pub fn new(region: rusoto_core::Region, bucket_name: impl Into) -> Self { + let http_client = rusoto_core::request::HttpClient::new() + .expect("Current implementation of rusoto_core has no way for this to fail"); + let credentials_provider = ChainProvider::new(); + Self { + client: rusoto_s3::S3Client::new_with(http_client, credentials_provider, region), + bucket_name: bucket_name.into(), + } } /// List objects with the given prefix and a set delimiter of `/`. Returns - /// common prefixes (directories) in addition to object metadata. - pub async fn list_with_delimiter<'a>( + /// common prefixes (directories) in addition to object metadata. Optionally + /// takes a continuation token for paging. + pub async fn list_with_delimiter_and_token<'a>( &'a self, prefix: &'a CloudPath, next_token: &Option, @@ -307,7 +318,7 @@ impl Error { mod tests { use crate::{ tests::{get_nonexistent_object, list_with_delimiter, put_get_delete_list}, - AmazonS3, Error, ObjectStore, ObjectStorePath, + AmazonS3, Error, ObjectStoreApi, ObjectStorePath, }; use bytes::Bytes; use std::env; @@ -398,7 +409,7 @@ mod tests { maybe_skip_integration!(); let (region, bucket_name) = region_and_bucket_name()?; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); + let integration = AmazonS3::new(region, &bucket_name); check_credentials(put_get_delete_list(&integration).await)?; check_credentials(list_with_delimiter(&integration).await).unwrap(); @@ -412,7 +423,7 @@ mod tests { // Assumes environment variables do not provide credentials to AWS US West 1 let (_, bucket_name) = region_and_bucket_name()?; let region = rusoto_core::Region::UsWest1; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); + let integration = AmazonS3::new(region, &bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -435,7 +446,7 @@ mod tests { async fn s3_test_get_nonexistent_location() -> Result<()> { maybe_skip_integration!(); let (region, bucket_name) = region_and_bucket_name()?; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); + let integration = AmazonS3::new(region, &bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -466,7 +477,7 @@ mod tests { maybe_skip_integration!(); let (region, _) = region_and_bucket_name()?; let bucket_name = NON_EXISTENT_NAME; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name)); + let integration = AmazonS3::new(region, bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -494,7 +505,7 @@ mod tests { // Assumes environment variables do not provide credentials to AWS US West 1 let (_, bucket_name) = region_and_bucket_name()?; let region = rusoto_core::Region::UsWest1; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); + let integration = AmazonS3::new(region, &bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); let data = Bytes::from("arbitrary data"); @@ -530,7 +541,7 @@ mod tests { maybe_skip_integration!(); let (region, _) = region_and_bucket_name()?; let bucket_name = NON_EXISTENT_NAME; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name)); + let integration = AmazonS3::new(region, bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); let data = Bytes::from("arbitrary data"); @@ -565,7 +576,7 @@ mod tests { async fn s3_test_delete_nonexistent_location() -> Result<()> { maybe_skip_integration!(); let (region, bucket_name) = region_and_bucket_name()?; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); + let integration = AmazonS3::new(region, &bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -582,7 +593,7 @@ mod tests { // Assumes environment variables do not provide credentials to AWS US West 1 let (_, bucket_name) = region_and_bucket_name()?; let region = rusoto_core::Region::UsWest1; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name)); + let integration = AmazonS3::new(region, &bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -608,7 +619,7 @@ mod tests { maybe_skip_integration!(); let (region, _) = region_and_bucket_name()?; let bucket_name = NON_EXISTENT_NAME; - let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name)); + let integration = AmazonS3::new(region, bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); diff --git a/object_store/src/azure.rs b/object_store/src/azure.rs index a1731a92b9..473e7f11fa 100644 --- a/object_store/src/azure.rs +++ b/object_store/src/azure.rs @@ -1,9 +1,11 @@ //! This module contains the IOx implementation for using Azure Blob storage as //! the object store. use crate::{ - path::cloud::CloudPath, DataDoesNotMatchLength, Result, UnableToDeleteDataFromAzure, - UnableToGetDataFromAzure, UnableToListDataFromAzure, UnableToPutDataToAzure, + path::cloud::CloudPath, DataDoesNotMatchLength, ListResult, ObjectStoreApi, Result, + UnableToDeleteDataFromAzure, UnableToGetDataFromAzure, UnableToListDataFromAzure, + UnableToPutDataToAzure, }; +use async_trait::async_trait; use azure_core::HttpClient; use azure_storage::{ clients::{ @@ -12,7 +14,10 @@ use azure_storage::{ DeleteSnapshotsMethod, }; use bytes::Bytes; -use futures::{stream, FutureExt, Stream, TryStreamExt}; +use futures::{ + stream::{self, BoxStream}, + FutureExt, Stream, StreamExt, TryStreamExt, +}; use snafu::{ensure, ResultExt}; use std::io; use std::sync::Arc; @@ -24,53 +29,15 @@ pub struct MicrosoftAzure { container_name: String, } -impl MicrosoftAzure { - /// Configure a connection to container with given name on Microsoft Azure - /// Blob store. - /// - /// The credentials `account` and `master_key` must provide access to the - /// store. - pub fn new(account: String, master_key: String, container_name: impl Into) -> Self { - // From https://github.com/Azure/azure-sdk-for-rust/blob/master/sdk/storage/examples/blob_00.rs#L29 - let http_client: Arc> = Arc::new(Box::new(reqwest::Client::new())); +#[async_trait] +impl ObjectStoreApi for MicrosoftAzure { + type Path = CloudPath; - let storage_account_client = - StorageAccountClient::new_access_key(http_client.clone(), &account, &master_key); - - let storage_client = storage_account_client.as_storage_client(); - - let container_name = container_name.into(); - - let container_client = storage_client.as_container_client(&container_name); - - Self { - container_client, - container_name, - } - } - - /// Configure a connection to container with given name on Microsoft Azure - /// Blob store. - /// - /// The credentials `account` and `master_key` must be set via the - /// environment variables `AZURE_STORAGE_ACCOUNT` and - /// `AZURE_STORAGE_MASTER_KEY` respectively. - pub fn new_from_env(container_name: impl Into) -> Self { - let account = std::env::var("AZURE_STORAGE_ACCOUNT") - .expect("Set env variable AZURE_STORAGE_ACCOUNT first!"); - let master_key = std::env::var("AZURE_STORAGE_MASTER_KEY") - .expect("Set env variable AZURE_STORAGE_MASTER_KEY first!"); - - Self::new(account, master_key, container_name) - } - - /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> CloudPath { + fn new_path(&self) -> Self::Path { CloudPath::default() } - /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &CloudPath, bytes: S, length: usize) -> Result<()> + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -101,8 +68,7 @@ impl MicrosoftAzure { Ok(()) } - /// Return the bytes that are stored at the specified location. - pub async fn get(&self, location: &CloudPath) -> Result>> { + async fn get(&self, location: &Self::Path) -> Result>> { let container_client = self.container_client.clone(); let location = location.to_raw(); Ok(async move { @@ -116,11 +82,11 @@ impl MicrosoftAzure { location: location.to_owned(), }) } - .into_stream()) + .into_stream() + .boxed()) } - /// Delete the object at the specified location. - pub async fn delete(&self, location: &CloudPath) -> Result<()> { + async fn delete(&self, location: &Self::Path) -> Result<()> { let location = location.to_raw(); self.container_client .as_blob_client(&location) @@ -135,11 +101,10 @@ impl MicrosoftAzure { Ok(()) } - /// List all the objects with the given prefix. - pub async fn list<'a>( + async fn list<'a>( &'a self, - prefix: Option<&'a CloudPath>, - ) -> Result>> + 'a> { + prefix: Option<&'a Self::Path>, + ) -> Result>>> { #[derive(Clone)] enum ListState { Start, @@ -184,14 +149,60 @@ impl MicrosoftAzure { .collect(); Some((Ok(names), next_state)) - })) + }) + .boxed()) + } + + async fn list_with_delimiter(&self, _prefix: &Self::Path) -> Result> { + unimplemented!(); + } +} + +impl MicrosoftAzure { + /// Configure a connection to container with given name on Microsoft Azure + /// Blob store. + /// + /// The credentials `account` and `master_key` must provide access to the + /// store. + pub fn new(account: String, master_key: String, container_name: impl Into) -> Self { + // From https://github.com/Azure/azure-sdk-for-rust/blob/master/sdk/storage/examples/blob_00.rs#L29 + let http_client: Arc> = Arc::new(Box::new(reqwest::Client::new())); + + let storage_account_client = + StorageAccountClient::new_access_key(http_client.clone(), &account, &master_key); + + let storage_client = storage_account_client.as_storage_client(); + + let container_name = container_name.into(); + + let container_client = storage_client.as_container_client(&container_name); + + Self { + container_client, + container_name, + } + } + + /// Configure a connection to container with given name on Microsoft Azure + /// Blob store. + /// + /// The credentials `account` and `master_key` must be set via the + /// environment variables `AZURE_STORAGE_ACCOUNT` and + /// `AZURE_STORAGE_MASTER_KEY` respectively. + pub fn new_from_env(container_name: impl Into) -> Self { + let account = std::env::var("AZURE_STORAGE_ACCOUNT") + .expect("Set env variable AZURE_STORAGE_ACCOUNT first!"); + let master_key = std::env::var("AZURE_STORAGE_MASTER_KEY") + .expect("Set env variable AZURE_STORAGE_MASTER_KEY first!"); + + Self::new(account, master_key, container_name) } } #[cfg(test)] mod tests { use super::*; - use crate::{tests::put_get_delete_list, ObjectStore}; + use crate::tests::put_get_delete_list; use std::env; type Error = Box; @@ -246,9 +257,8 @@ mod tests { let container_name = env::var("AZURE_STORAGE_CONTAINER") .map_err(|_| "The environment variable AZURE_STORAGE_CONTAINER must be set")?; - let azure = MicrosoftAzure::new_from_env(container_name); + let integration = MicrosoftAzure::new_from_env(container_name); - let integration = ObjectStore::new_microsoft_azure(azure); put_get_delete_list(&integration).await?; Ok(()) diff --git a/object_store/src/disk.rs b/object_store/src/disk.rs index 1f015d1ff7..b5a9f19fec 100644 --- a/object_store/src/disk.rs +++ b/object_store/src/disk.rs @@ -1,12 +1,16 @@ //! This module contains the IOx implementation for using local disk as the //! object store. use crate::{ - path::file::FilePath, DataDoesNotMatchLength, Result, UnableToCopyDataToFile, - UnableToCreateDir, UnableToCreateFile, UnableToDeleteFile, UnableToOpenFile, - UnableToPutDataInMemory, UnableToReadBytes, + path::file::FilePath, DataDoesNotMatchLength, ListResult, ObjectStoreApi, Result, + UnableToCopyDataToFile, UnableToCreateDir, UnableToCreateFile, UnableToDeleteFile, + UnableToOpenFile, UnableToPutDataInMemory, UnableToReadBytes, }; +use async_trait::async_trait; use bytes::Bytes; -use futures::{stream, Stream, TryStreamExt}; +use futures::{ + stream::{self, BoxStream}, + Stream, StreamExt, TryStreamExt, +}; use snafu::{ensure, futures::TryStreamExt as _, OptionExt, ResultExt}; use std::{io, path::PathBuf}; use tokio::fs; @@ -20,27 +24,15 @@ pub struct File { root: FilePath, } -impl File { - /// Create new filesystem storage. - pub fn new(root: impl Into) -> Self { - Self { - root: FilePath::raw(root), - } - } +#[async_trait] +impl ObjectStoreApi for File { + type Path = FilePath; - fn path(&self, location: &FilePath) -> PathBuf { - let mut path = self.root.clone(); - path.push_path(location); - path.to_raw() - } - - /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> FilePath { + fn new_path(&self) -> Self::Path { FilePath::default() } - /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &FilePath, bytes: S, length: usize) -> Result<()> + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -85,8 +77,7 @@ impl File { Ok(()) } - /// Return the bytes that are stored at the specified location. - pub async fn get(&self, location: &FilePath) -> Result>> { + async fn get(&self, location: &Self::Path) -> Result>> { let path = self.path(location); let file = fs::File::open(&path) @@ -96,11 +87,10 @@ impl File { let s = FramedRead::new(file, BytesCodec::new()) .map_ok(|b| b.freeze()) .context(UnableToReadBytes { path }); - Ok(s) + Ok(s.boxed()) } - /// Delete the object at the specified location. - pub async fn delete(&self, location: &FilePath) -> Result<()> { + async fn delete(&self, location: &Self::Path) -> Result<()> { let path = self.path(location); fs::remove_file(&path) .await @@ -108,11 +98,10 @@ impl File { Ok(()) } - /// List all the objects with the given prefix. - pub async fn list<'a>( + async fn list<'a>( &'a self, - prefix: Option<&'a FilePath>, - ) -> Result>> + 'a> { + prefix: Option<&'a Self::Path>, + ) -> Result>>> { let root_path = self.root.to_raw(); let walkdir = WalkDir::new(&root_path) // Don't include the root directory itself @@ -132,7 +121,26 @@ impl File { .map(|name| Ok(vec![name])) }); - Ok(stream::iter(s)) + Ok(stream::iter(s).boxed()) + } + + async fn list_with_delimiter(&self, _prefix: &Self::Path) -> Result> { + unimplemented!() + } +} + +impl File { + /// Create new filesystem storage. + pub fn new(root: impl Into) -> Self { + Self { + root: FilePath::raw(root), + } + } + + fn path(&self, location: &FilePath) -> PathBuf { + let mut path = self.root.clone(); + path.push_path(location); + path.to_raw() } } @@ -145,13 +153,13 @@ mod tests { use tempfile::TempDir; - use crate::{tests::put_get_delete_list, Error, ObjectStore, ObjectStorePath}; + use crate::{tests::put_get_delete_list, Error, ObjectStoreApi, ObjectStorePath}; use futures::stream; #[tokio::test] async fn file_test() -> Result<()> { let root = TempDir::new()?; - let integration = ObjectStore::new_file(File::new(root.path())); + let integration = File::new(root.path()); put_get_delete_list(&integration).await?; Ok(()) @@ -160,7 +168,7 @@ mod tests { #[tokio::test] async fn length_mismatch_is_an_error() -> Result<()> { let root = TempDir::new()?; - let integration = ObjectStore::new_file(File::new(root.path())); + let integration = File::new(root.path()); let bytes = stream::once(async { Ok(Bytes::from("hello world")) }); let mut location = integration.new_path(); @@ -181,7 +189,7 @@ mod tests { #[tokio::test] async fn creates_dir_if_not_present() -> Result<()> { let root = TempDir::new()?; - let integration = ObjectStore::new_file(File::new(root.path())); + let integration = File::new(root.path()); let data = Bytes::from("arbitrary data"); let mut location = integration.new_path(); diff --git a/object_store/src/gcp.rs b/object_store/src/gcp.rs index 2d19edefbf..9e7510cba0 100644 --- a/object_store/src/gcp.rs +++ b/object_store/src/gcp.rs @@ -1,12 +1,16 @@ //! This module contains the IOx implementation for using Google Cloud Storage //! as the object store. use crate::{ - path::cloud::CloudPath, DataDoesNotMatchLength, Result, UnableToDeleteDataFromGcs, - UnableToGetDataFromGcs, UnableToListDataFromGcs, UnableToListDataFromGcs2, - UnableToPutDataToGcs, + path::cloud::CloudPath, DataDoesNotMatchLength, ListResult, ObjectStoreApi, Result, + UnableToDeleteDataFromGcs, UnableToGetDataFromGcs, UnableToListDataFromGcs, + UnableToListDataFromGcs2, UnableToPutDataToGcs, }; +use async_trait::async_trait; use bytes::Bytes; -use futures::{stream, Stream, StreamExt, TryStreamExt}; +use futures::{ + stream::{self, BoxStream}, + Stream, StreamExt, TryStreamExt, +}; use snafu::{ensure, futures::TryStreamExt as _, ResultExt}; use std::io; @@ -16,21 +20,15 @@ pub struct GoogleCloudStorage { bucket_name: String, } -impl GoogleCloudStorage { - /// Configure a connection to Google Cloud Storage. - pub fn new(bucket_name: impl Into) -> Self { - Self { - bucket_name: bucket_name.into(), - } - } +#[async_trait] +impl ObjectStoreApi for GoogleCloudStorage { + type Path = CloudPath; - /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> CloudPath { + fn new_path(&self) -> Self::Path { CloudPath::default() } - /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &CloudPath, bytes: S, length: usize) -> Result<()> + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -68,8 +66,7 @@ impl GoogleCloudStorage { Ok(()) } - /// Return the bytes that are stored at the specified location. - pub async fn get(&self, location: &CloudPath) -> Result>> { + async fn get(&self, location: &Self::Path) -> Result>> { let location = location.to_raw(); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -81,11 +78,10 @@ impl GoogleCloudStorage { location, })?; - Ok(futures::stream::once(async move { Ok(bytes.into()) })) + Ok(futures::stream::once(async move { Ok(bytes.into()) }).boxed()) } - /// Delete the object at the specified location. - pub async fn delete(&self, location: &CloudPath) -> Result<()> { + async fn delete(&self, location: &Self::Path) -> Result<()> { let location = location.to_raw(); let location_copy = location.clone(); let bucket_name = self.bucket_name.clone(); @@ -100,11 +96,10 @@ impl GoogleCloudStorage { Ok(()) } - /// List all the objects with the given prefix. - pub async fn list<'a>( + async fn list<'a>( &'a self, - prefix: Option<&'a CloudPath>, - ) -> Result>> + 'a> { + prefix: Option<&'a Self::Path>, + ) -> Result>>> { let objects = match prefix { Some(prefix) => { let cloud_prefix = prefix.to_raw(); @@ -137,7 +132,20 @@ impl GoogleCloudStorage { bucket: &self.bucket_name, }); - Ok(objects) + Ok(objects.boxed()) + } + + async fn list_with_delimiter(&self, _prefix: &Self::Path) -> Result> { + unimplemented!(); + } +} + +impl GoogleCloudStorage { + /// Configure a connection to Google Cloud Storage. + pub fn new(bucket_name: impl Into) -> Self { + Self { + bucket_name: bucket_name.into(), + } } } @@ -145,7 +153,7 @@ impl GoogleCloudStorage { mod test { use crate::{ tests::{get_nonexistent_object, put_get_delete_list}, - Error, GoogleCloudStorage, ObjectStore, ObjectStorePath, + Error, GoogleCloudStorage, ObjectStoreApi, ObjectStorePath, }; use bytes::Bytes; use std::env; @@ -187,8 +195,7 @@ mod test { maybe_skip_integration!(); let bucket_name = bucket_name()?; - let integration = - ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name)); + let integration = GoogleCloudStorage::new(&bucket_name); put_get_delete_list(&integration).await?; Ok(()) } @@ -197,8 +204,7 @@ mod test { async fn gcs_test_get_nonexistent_location() -> Result<()> { maybe_skip_integration!(); let bucket_name = bucket_name()?; - let integration = - ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name)); + let integration = GoogleCloudStorage::new(&bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -220,9 +226,7 @@ mod test { async fn gcs_test_get_nonexistent_bucket() -> Result<()> { maybe_skip_integration!(); let bucket_name = NON_EXISTENT_NAME; - let integration = - ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name)); - + let integration = GoogleCloudStorage::new(bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -237,8 +241,7 @@ mod test { async fn gcs_test_delete_nonexistent_location() -> Result<()> { maybe_skip_integration!(); let bucket_name = bucket_name()?; - let integration = - ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name)); + let integration = GoogleCloudStorage::new(&bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -265,8 +268,7 @@ mod test { async fn gcs_test_delete_nonexistent_bucket() -> Result<()> { maybe_skip_integration!(); let bucket_name = NON_EXISTENT_NAME; - let integration = - ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name)); + let integration = GoogleCloudStorage::new(bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); @@ -293,9 +295,7 @@ mod test { async fn gcs_test_put_nonexistent_bucket() -> Result<()> { maybe_skip_integration!(); let bucket_name = NON_EXISTENT_NAME; - let integration = - ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name)); - + let integration = GoogleCloudStorage::new(bucket_name); let mut location = integration.new_path(); location.set_file_name(NON_EXISTENT_NAME); diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 797de1bdec..f0a8b45ad9 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -29,12 +29,84 @@ use gcp::GoogleCloudStorage; use memory::InMemory; use path::ObjectStorePath; +use async_trait::async_trait; use bytes::Bytes; use chrono::{DateTime, Utc}; -use futures::{Stream, StreamExt, TryFutureExt, TryStreamExt}; +use futures::{stream::BoxStream, Stream, StreamExt, TryFutureExt, TryStreamExt}; use snafu::Snafu; use std::{io, path::PathBuf}; +/// Universal API to multiple object store services. +#[async_trait] +pub trait ObjectStoreApi: Send + Sync + 'static { + /// The type of the locations used in interacting with this object store. + type Path: path::ObjectStorePath; + + /// Return a new location path appropriate for this object storage + fn new_path(&self) -> Self::Path; + + /// Save the provided bytes to the specified location. + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> + where + S: Stream> + Send + Sync + 'static; + + /// Return the bytes that are stored at the specified location. + async fn get(&self, location: &Self::Path) -> Result>>; + + /// Delete the object at the specified location. + async fn delete(&self, location: &Self::Path) -> Result<()>; + + /// List all the objects with the given prefix. + async fn list<'a>( + &'a self, + prefix: Option<&'a Self::Path>, + ) -> Result>>>; + + /// List objects with the given prefix and an implementation specific + /// delimiter. Returns common prefixes (directories) in addition to object + /// metadata. + async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result>; +} + +#[async_trait] +impl ObjectStoreApi for T +where + T: std::ops::Deref + Send + Sync + 'static, + T::Target: ObjectStoreApi, +{ + type Path = ::Path; + + fn new_path(&self) -> Self::Path { + unimplemented!() + } + + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> + where + S: Stream> + Send + Sync + 'static, + { + T::put(self, location, bytes, length).await + } + + async fn get(&self, location: &Self::Path) -> Result>> { + T::get(self, location).await + } + + async fn delete(&self, location: &Self::Path) -> Result<()> { + T::delete(self, location).await + } + + async fn list<'a>( + &'a self, + prefix: Option<&'a Self::Path>, + ) -> Result>>> { + T::list(self, prefix).await + } + + async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result> { + T::list_with_delimiter(self, prefix).await + } +} + /// Universal interface to multiple object store services. #[derive(Debug)] pub struct ObjectStore(pub ObjectStoreIntegration); @@ -64,326 +136,175 @@ impl ObjectStore { pub fn new_microsoft_azure(azure: MicrosoftAzure) -> Self { Self(ObjectStoreIntegration::MicrosoftAzure(Box::new(azure))) } +} - /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> path::Path { +#[async_trait] +impl ObjectStoreApi for ObjectStore { + type Path = path::Path; + + fn new_path(&self) -> Self::Path { use ObjectStoreIntegration::*; match &self.0 { - AmazonS3(s3) => path::Path { - inner: path::PathRepresentation::AmazonS3(s3.new_path()), - }, - GoogleCloudStorage(gcs) => path::Path { - inner: path::PathRepresentation::GoogleCloudStorage(gcs.new_path()), - }, - InMemory(in_mem) => path::Path { - inner: path::PathRepresentation::Parts(in_mem.new_path()), - }, - File(file) => path::Path { - inner: path::PathRepresentation::File(file.new_path()), - }, - MicrosoftAzure(azure) => path::Path { - inner: path::PathRepresentation::MicrosoftAzure(azure.new_path()), - }, + AmazonS3(s3) => path::Path::AmazonS3(s3.new_path()), + GoogleCloudStorage(gcs) => path::Path::GoogleCloudStorage(gcs.new_path()), + InMemory(in_mem) => path::Path::InMemory(in_mem.new_path()), + File(file) => path::Path::File(file.new_path()), + MicrosoftAzure(azure) => path::Path::MicrosoftAzure(azure.new_path()), } } - /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &path::Path, bytes: S, length: usize) -> Result<()> + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { - use path::PathRepresentation; use ObjectStoreIntegration::*; match (&self.0, location) { - ( - AmazonS3(s3), - path::Path { - inner: PathRepresentation::AmazonS3(location), - }, - ) => s3.put(location, bytes, length).await?, - ( - GoogleCloudStorage(gcs), - path::Path { - inner: PathRepresentation::GoogleCloudStorage(location), - }, - ) => gcs.put(location, bytes, length).await?, - ( - InMemory(in_mem), - path::Path { - inner: PathRepresentation::Parts(location), - }, - ) => in_mem.put(location, bytes, length).await?, - ( - File(file), - path::Path { - inner: PathRepresentation::File(location), - }, - ) => file.put(location, bytes, length).await?, - ( - MicrosoftAzure(azure), - path::Path { - inner: PathRepresentation::MicrosoftAzure(location), - }, - ) => azure.put(location, bytes, length).await?, + (AmazonS3(s3), path::Path::AmazonS3(location)) => { + s3.put(location, bytes, length).await? + } + (GoogleCloudStorage(gcs), path::Path::GoogleCloudStorage(location)) => { + gcs.put(location, bytes, length).await? + } + (InMemory(in_mem), path::Path::InMemory(location)) => { + in_mem.put(location, bytes, length).await? + } + (File(file), path::Path::File(location)) => file.put(location, bytes, length).await?, + (MicrosoftAzure(azure), path::Path::MicrosoftAzure(location)) => { + azure.put(location, bytes, length).await? + } _ => unreachable!(), } Ok(()) } - /// Return the bytes that are stored at the specified location. - pub async fn get(&self, location: &path::Path) -> Result>> { - use path::PathRepresentation; + async fn get(&self, location: &Self::Path) -> Result>> { use ObjectStoreIntegration::*; Ok(match (&self.0, location) { - ( - AmazonS3(s3), - path::Path { - inner: PathRepresentation::AmazonS3(location), - }, - ) => s3.get(location).await?.boxed(), - ( - GoogleCloudStorage(gcs), - path::Path { - inner: PathRepresentation::GoogleCloudStorage(location), - }, - ) => gcs.get(location).await?.boxed(), - ( - InMemory(in_mem), - path::Path { - inner: PathRepresentation::Parts(location), - }, - ) => in_mem.get(location).await?.boxed(), - ( - File(file), - path::Path { - inner: PathRepresentation::File(location), - }, - ) => file.get(location).await?.boxed(), - ( - MicrosoftAzure(azure), - path::Path { - inner: PathRepresentation::MicrosoftAzure(location), - }, - ) => azure.get(location).await?.boxed(), + (AmazonS3(s3), path::Path::AmazonS3(location)) => { + s3.get(location).await?.err_into().boxed() + } + (GoogleCloudStorage(gcs), path::Path::GoogleCloudStorage(location)) => { + gcs.get(location).await?.err_into().boxed() + } + (InMemory(in_mem), path::Path::InMemory(location)) => { + in_mem.get(location).await?.err_into().boxed() + } + (File(file), path::Path::File(location)) => { + file.get(location).await?.err_into().boxed() + } + (MicrosoftAzure(azure), path::Path::MicrosoftAzure(location)) => { + azure.get(location).await?.err_into().boxed() + } _ => unreachable!(), - } - .err_into()) + }) } - /// Delete the object at the specified location. - pub async fn delete(&self, location: &path::Path) -> Result<()> { - use path::PathRepresentation; + async fn delete(&self, location: &Self::Path) -> Result<()> { use ObjectStoreIntegration::*; match (&self.0, location) { - ( - AmazonS3(s3), - path::Path { - inner: PathRepresentation::AmazonS3(location), - }, - ) => s3.delete(location).await?, - ( - GoogleCloudStorage(gcs), - path::Path { - inner: PathRepresentation::GoogleCloudStorage(location), - }, - ) => gcs.delete(location).await?, - ( - InMemory(in_mem), - path::Path { - inner: PathRepresentation::Parts(location), - }, - ) => in_mem.delete(location).await?, - ( - File(file), - path::Path { - inner: PathRepresentation::File(location), - }, - ) => file.delete(location).await?, - ( - MicrosoftAzure(azure), - path::Path { - inner: PathRepresentation::MicrosoftAzure(location), - }, - ) => azure.delete(location).await?, + (AmazonS3(s3), path::Path::AmazonS3(location)) => s3.delete(location).await?, + (GoogleCloudStorage(gcs), path::Path::GoogleCloudStorage(location)) => { + gcs.delete(location).await? + } + (InMemory(in_mem), path::Path::InMemory(location)) => in_mem.delete(location).await?, + (File(file), path::Path::File(location)) => file.delete(location).await?, + (MicrosoftAzure(azure), path::Path::MicrosoftAzure(location)) => { + azure.delete(location).await? + } _ => unreachable!(), } Ok(()) } - /// List all the objects with the given prefix. - pub async fn list<'a>( + async fn list<'a>( &'a self, - prefix: Option<&'a path::Path>, - ) -> Result>> + 'a> { - use path::PathRepresentation; + prefix: Option<&'a Self::Path>, + ) -> Result>>> { use ObjectStoreIntegration::*; Ok(match (&self.0, prefix) { - ( - AmazonS3(s3), - Some(path::Path { - inner: PathRepresentation::AmazonS3(prefix), - }), - ) => s3 + (AmazonS3(s3), Some(path::Path::AmazonS3(prefix))) => s3 .list(Some(prefix)) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::AmazonS3(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::AmazonS3).collect()) + .err_into() .boxed(), (AmazonS3(s3), None) => s3 .list(None) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::AmazonS3(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::AmazonS3).collect()) + .err_into() .boxed(), - ( - GoogleCloudStorage(gcs), - Some(path::Path { - inner: PathRepresentation::GoogleCloudStorage(prefix), - }), - ) => gcs + + (GoogleCloudStorage(gcs), Some(path::Path::GoogleCloudStorage(prefix))) => gcs .list(Some(prefix)) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::GoogleCloudStorage(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::GoogleCloudStorage).collect()) + .err_into() .boxed(), (GoogleCloudStorage(gcs), None) => gcs .list(None) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::GoogleCloudStorage(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::GoogleCloudStorage).collect()) + .err_into() .boxed(), - ( - InMemory(in_mem), - Some(path::Path { - inner: PathRepresentation::Parts(dirs_and_file_name), - }), - ) => in_mem - .list(Some(dirs_and_file_name)) + (InMemory(in_mem), Some(path::Path::InMemory(prefix))) => in_mem + .list(Some(prefix)) .await? - .map_ok(|s| s.into_iter().map(Into::into).collect()) + .map_ok(|s| s.into_iter().map(path::Path::InMemory).collect()) + .err_into() .boxed(), (InMemory(in_mem), None) => in_mem .list(None) .await? - .map_ok(|s| s.into_iter().map(Into::into).collect()) + .map_ok(|s| s.into_iter().map(path::Path::InMemory).collect()) + .err_into() .boxed(), - ( - File(file), - Some(path::Path { - inner: PathRepresentation::File(prefix), - }), - ) => file + (File(file), Some(path::Path::File(prefix))) => file .list(Some(prefix)) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::File(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::File).collect()) + .err_into() .boxed(), (File(file), None) => file .list(None) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::File(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::File).collect()) + .err_into() .boxed(), - ( - MicrosoftAzure(azure), - Some(path::Path { - inner: PathRepresentation::MicrosoftAzure(prefix), - }), - ) => azure + + (MicrosoftAzure(azure), Some(path::Path::MicrosoftAzure(prefix))) => azure .list(Some(prefix)) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::MicrosoftAzure(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::MicrosoftAzure).collect()) + .err_into() .boxed(), (MicrosoftAzure(azure), None) => azure .list(None) .await? - .map_ok(|s| { - s.into_iter() - .map(|p| path::Path { - inner: PathRepresentation::MicrosoftAzure(p), - }) - .collect() - }) + .map_ok(|s| s.into_iter().map(path::Path::MicrosoftAzure).collect()) + .err_into() .boxed(), _ => unreachable!(), - } - .err_into()) + }) } - /// List objects with the given prefix and an implementation specific - /// delimiter. Returns common prefixes (directories) in addition to object - /// metadata. - pub async fn list_with_delimiter<'a>( - &'a self, - prefix: &'a path::Path, - ) -> Result> { - use path::PathRepresentation; + async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result> { use ObjectStoreIntegration::*; match (&self.0, prefix) { - ( - AmazonS3(s3), - path::Path { - inner: PathRepresentation::AmazonS3(prefix), - }, - ) => { - s3.list_with_delimiter(prefix, &None) - .map_ok(|list_result| { - list_result.map_paths(|p| path::Path { - inner: PathRepresentation::AmazonS3(p), - }) - }) + (AmazonS3(s3), path::Path::AmazonS3(prefix)) => { + s3.list_with_delimiter(prefix) + .map_ok(|list_result| list_result.map_paths(path::Path::AmazonS3)) .await } (GoogleCloudStorage(_gcs), _) => unimplemented!(), - ( - InMemory(in_mem), - path::Path { - inner: path::PathRepresentation::Parts(dirs_and_file_name), - }, - ) => { + (InMemory(in_mem), path::Path::InMemory(prefix)) => { in_mem - .list_with_delimiter(dirs_and_file_name) - .map_ok(|list_result| list_result.map_paths(Into::into)) + .list_with_delimiter(prefix) + .map_ok(|list_result| list_result.map_paths(path::Path::InMemory)) .await } (File(_file), _) => unimplemented!(), @@ -616,10 +537,10 @@ mod tests { type Error = Box; type Result = std::result::Result; - async fn flatten_list_stream( - storage: &ObjectStore, - prefix: Option<&path::Path>, - ) -> Result> { + async fn flatten_list_stream( + storage: &T, + prefix: Option<&T::Path>, + ) -> Result> { storage .list(prefix) .await? @@ -629,7 +550,10 @@ mod tests { .await } - pub(crate) async fn put_get_delete_list(storage: &ObjectStore) -> Result<()> { + pub(crate) async fn put_get_delete_list(storage: &T) -> Result<()> + where + T::Path: From, + { delete_fixtures(storage).await; let content_list = flatten_list_stream(storage, None).await?; @@ -685,7 +609,10 @@ mod tests { Ok(()) } - pub(crate) async fn list_with_delimiter(storage: &ObjectStore) -> Result<()> { + pub(crate) async fn list_with_delimiter(storage: &T) -> Result<()> + where + T::Path: From, + { delete_fixtures(storage).await; let content_list = flatten_list_stream(storage, None).await?; @@ -703,7 +630,7 @@ mod tests { "mydb/data/whatevs", ] .iter() - .map(|&s| str_to_path(s, storage)) + .map(|&s| str_to_path(s)) .collect(); let time_before_creation = Utc::now(); @@ -768,9 +695,9 @@ mod tests { Ok(()) } - pub(crate) async fn get_nonexistent_object( - storage: &ObjectStore, - location: Option, + pub(crate) async fn get_nonexistent_object( + storage: &T, + location: Option, ) -> Result { let location = location.unwrap_or_else(|| { let mut loc = storage.new_path(); @@ -790,25 +717,24 @@ mod tests { .freeze()) } - /// Parse a str as a `CloudPath` into a `DirAndFileName` even though the - /// associated storage might not be cloud storage to reuse the cloud + /// Parse a str as a `CloudPath` into a `DirAndFileName`, even though the + /// associated storage might not be cloud storage, to reuse the cloud /// path parsing logic. Then convert into the correct type of path for /// the given storage. - fn str_to_path(val: &str, storage: &ObjectStore) -> path::Path { + fn str_to_path

(val: &str) -> P + where + P: From + ObjectStorePath, + { let cloud_path = CloudPath::raw(val); let parsed: DirsAndFileName = cloud_path.into(); - let mut path = storage.new_path(); - for dir in parsed.directories { - path.push_dir(dir.to_string()) - } - if let Some(file_name) = parsed.file_name { - path.set_file_name(file_name.to_string()); - } - path + parsed.into() } - async fn delete_fixtures(storage: &ObjectStore) { + async fn delete_fixtures(storage: &T) + where + T::Path: From, + { let files: Vec<_> = [ "test_file", "mydb/wal/000/000/000.segment", @@ -819,7 +745,7 @@ mod tests { "mydb/data/whatevs", ] .iter() - .map(|&s| str_to_path(s, storage)) + .map(|&s| str_to_path(s)) .collect(); for f in &files { diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs index 84c7b081c4..a3145999a5 100644 --- a/object_store/src/memory.rs +++ b/object_store/src/memory.rs @@ -2,11 +2,12 @@ //! store. use crate::{ path::parsed::DirsAndFileName, DataDoesNotMatchLength, ListResult, NoDataInMemory, ObjectMeta, - Result, UnableToPutDataInMemory, + ObjectStoreApi, Result, UnableToPutDataInMemory, }; +use async_trait::async_trait; use bytes::Bytes; use chrono::Utc; -use futures::{Stream, TryStreamExt}; +use futures::{stream::BoxStream, Stream, StreamExt, TryStreamExt}; use snafu::{ensure, OptionExt, ResultExt}; use std::collections::BTreeSet; use std::{collections::BTreeMap, io}; @@ -19,29 +20,15 @@ pub struct InMemory { storage: RwLock>, } -impl InMemory { - /// Create new in-memory storage. - pub fn new() -> Self { - Self::default() - } +#[async_trait] +impl ObjectStoreApi for InMemory { + type Path = DirsAndFileName; - /// Creates a clone of the store - pub async fn clone(&self) -> Self { - let storage = self.storage.read().await; - let storage = storage.clone(); - - Self { - storage: RwLock::new(storage), - } - } - - /// Return a new location path appropriate for this object storage - pub fn new_path(&self) -> DirsAndFileName { + fn new_path(&self) -> Self::Path { DirsAndFileName::default() } - /// Save the provided bytes to the specified location. - pub async fn put(&self, location: &DirsAndFileName, bytes: S, length: usize) -> Result<()> + async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> where S: Stream> + Send + Sync + 'static, { @@ -68,11 +55,7 @@ impl InMemory { Ok(()) } - /// Return the bytes that are stored at the specified location. - pub async fn get( - &self, - location: &DirsAndFileName, - ) -> Result>> { + async fn get(&self, location: &Self::Path) -> Result>> { let data = self .storage .read() @@ -81,20 +64,18 @@ impl InMemory { .cloned() .context(NoDataInMemory)?; - Ok(futures::stream::once(async move { Ok(data) })) + Ok(futures::stream::once(async move { Ok(data) }).boxed()) } - /// Delete the object at the specified location. - pub async fn delete(&self, location: &DirsAndFileName) -> Result<()> { - self.storage.write().await.remove(location); + async fn delete(&self, location: &Self::Path) -> Result<()> { + self.storage.write().await.remove(&location); Ok(()) } - /// List all the objects with the given prefix. - pub async fn list<'a>( + async fn list<'a>( &'a self, - prefix: Option<&'a DirsAndFileName>, - ) -> Result>> + 'a> { + prefix: Option<&'a Self::Path>, + ) -> Result>>> { let list = if let Some(prefix) = &prefix { self.storage .read() @@ -107,18 +88,13 @@ impl InMemory { self.storage.read().await.keys().cloned().collect() }; - Ok(futures::stream::once(async move { Ok(list) })) + Ok(futures::stream::once(async move { Ok(list) }).boxed()) } - /// List objects with the given prefix and a set delimiter of `/`. Returns - /// common prefixes (directories) in addition to object metadata. The - /// memory implementation returns all results, as opposed to the cloud + /// The memory implementation returns all results, as opposed to the cloud /// versions which limit their results to 1k or more because of API /// limitations. - pub async fn list_with_delimiter<'a>( - &'a self, - prefix: &'a DirsAndFileName, - ) -> Result> { + async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result> { let mut common_prefixes = BTreeSet::new(); let last_modified = Utc::now(); @@ -158,6 +134,23 @@ impl InMemory { } } +impl InMemory { + /// Create new in-memory storage. + pub fn new() -> Self { + Self::default() + } + + /// Creates a clone of the store + pub async fn clone(&self) -> Self { + let storage = self.storage.read().await; + let storage = storage.clone(); + + Self { + storage: RwLock::new(storage), + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -167,13 +160,13 @@ mod tests { use crate::{ tests::{list_with_delimiter, put_get_delete_list}, - Error, ObjectStore, ObjectStorePath, + Error, ObjectStoreApi, ObjectStorePath, }; use futures::stream; #[tokio::test] async fn in_memory_test() -> Result<()> { - let integration = ObjectStore::new_in_memory(InMemory::new()); + let integration = InMemory::new(); put_get_delete_list(&integration).await?; @@ -184,7 +177,7 @@ mod tests { #[tokio::test] async fn length_mismatch_is_an_error() -> Result<()> { - let integration = ObjectStore::new_in_memory(InMemory::new()); + let integration = InMemory::new(); let bytes = stream::once(async { Ok(Bytes::from("hello world")) }); let mut location = integration.new_path(); diff --git a/object_store/src/path.rs b/object_store/src/path.rs index a00f23f9c8..74a0508326 100644 --- a/object_store/src/path.rs +++ b/object_store/src/path.rs @@ -1,8 +1,6 @@ //! This module contains code for abstracting object locations that work //! across different backing implementations and platforms. -use std::mem; - /// Paths that came from or are to be used in cloud-based object storage pub mod cloud; use cloud::CloudPath; @@ -18,6 +16,9 @@ use parsed::DirsAndFileName; mod parts; use parts::PathPart; +/// The delimiter to separate object namespaces, creating a directory structure. +pub const DELIMITER: &str = "/"; + /// Universal interface for handling paths and locations for objects and /// directories in the object store. /// @@ -44,196 +45,59 @@ pub trait ObjectStorePath: fn display(&self) -> String; } -/// Temporary -#[derive(Default, Clone, PartialEq, Eq, Debug)] -pub struct Path { - /// Temporary - pub inner: PathRepresentation, +/// Defines which object stores use which path logic. +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Path { + /// Amazon storage + AmazonS3(CloudPath), + /// Local file system storage + File(FilePath), + /// GCP storage + GoogleCloudStorage(CloudPath), + /// In memory storage for testing + InMemory(DirsAndFileName), + /// Microsoft Azure Blob storage + MicrosoftAzure(CloudPath), } impl ObjectStorePath for Path { fn set_file_name(&mut self, part: impl Into) { - self.inner = mem::take(&mut self.inner).set_file_name(part); + match self { + Self::AmazonS3(path) => path.set_file_name(part), + Self::File(path) => path.set_file_name(part), + Self::GoogleCloudStorage(path) => path.set_file_name(part), + Self::InMemory(path) => path.set_file_name(part), + Self::MicrosoftAzure(path) => path.set_file_name(part), + } } fn push_dir(&mut self, part: impl Into) { - self.inner = mem::take(&mut self.inner).push_dir(part); + match self { + Self::AmazonS3(path) => path.push_dir(part), + Self::File(path) => path.push_dir(part), + Self::GoogleCloudStorage(path) => path.push_dir(part), + Self::InMemory(path) => path.push_dir(part), + Self::MicrosoftAzure(path) => path.push_dir(part), + } } fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) { - self.inner = mem::take(&mut self.inner).push_all_dirs(parts); - } - - fn display(&self) -> String { - self.inner.display() - } -} - -impl Path { - /// Add a `PathPart` to the end of the path. - pub fn push_part_as_dir(&mut self, part: &PathPart) { - self.inner = mem::take(&mut self.inner).push_part_as_dir(part); - } - - /// Add the parts of `ObjectStorePath` to the end of the path. Notably does - /// *not* behave as `PathBuf::push` does: there is no way to replace the - /// root. If `self` has a file name, that will be removed, then the - /// directories of `path` will be appended, then any file name of `path` - /// will be assigned to `self`. - pub fn push_path(&mut self, path: &Self) { - self.inner = mem::take(&mut self.inner).push_path(path) - } - - /// Pops a part from the path and returns it, or `None` if it's empty. - pub fn pop(&mut self) -> Option<&PathPart> { - unimplemented!() - } - - /// Returns true if the directories in `prefix` are the same as the starting - /// directories of `self`. - pub fn prefix_matches(&self, prefix: &Self) -> bool { - use PathRepresentation::*; - match (&self.inner, &prefix.inner) { - (Parts(self_parts), Parts(other_parts)) => self_parts.prefix_matches(&other_parts), - (Parts(self_parts), _) => { - let prefix_parts: DirsAndFileName = prefix.into(); - self_parts.prefix_matches(&prefix_parts) - } - (_, Parts(prefix_parts)) => { - let self_parts: DirsAndFileName = self.into(); - self_parts.prefix_matches(&prefix_parts) - } - _ => { - let self_parts: DirsAndFileName = self.into(); - let prefix_parts: DirsAndFileName = prefix.into(); - self_parts.prefix_matches(&prefix_parts) - } + match self { + Self::AmazonS3(path) => path.push_all_dirs(parts), + Self::File(path) => path.push_all_dirs(parts), + Self::GoogleCloudStorage(path) => path.push_all_dirs(parts), + Self::InMemory(path) => path.push_all_dirs(parts), + Self::MicrosoftAzure(path) => path.push_all_dirs(parts), } } -} - -impl From<&'_ DirsAndFileName> for Path { - fn from(other: &'_ DirsAndFileName) -> Self { - other.clone().into() - } -} - -impl From for Path { - fn from(other: DirsAndFileName) -> Self { - Self { - inner: PathRepresentation::Parts(other), - } - } -} - -/// Defines which object stores use which path logic. -#[derive(Clone, Eq, Debug)] -pub enum PathRepresentation { - /// Amazon storage - AmazonS3(CloudPath), - /// GCP storage - GoogleCloudStorage(CloudPath), - /// Microsoft Azure Blob storage - MicrosoftAzure(CloudPath), - /// Local file system storage - File(FilePath), - - /// Will be able to be used directly - Parts(DirsAndFileName), -} - -impl Default for PathRepresentation { - fn default() -> Self { - Self::Parts(DirsAndFileName::default()) - } -} - -impl PathRepresentation { - /// Add a part to the end of the path's directories, encoding any restricted - /// characters. - fn push_dir(self, part: impl Into) -> Self { - let mut dirs_and_file_name: DirsAndFileName = self.into(); - - dirs_and_file_name.push_dir(part); - Self::Parts(dirs_and_file_name) - } - - /// Push a bunch of parts as directories in one go. - fn push_all_dirs<'a>(self, parts: impl AsRef<[&'a str]>) -> Self { - let mut dirs_and_file_name: DirsAndFileName = self.into(); - - dirs_and_file_name.push_all_dirs(parts); - - Self::Parts(dirs_and_file_name) - } - - /// Add a `PathPart` to the end of the path's directories. - fn push_part_as_dir(self, part: &PathPart) -> Self { - let mut dirs_and_file_name: DirsAndFileName = self.into(); - - dirs_and_file_name.push_part_as_dir(part); - Self::Parts(dirs_and_file_name) - } - - /// Add the parts of `ObjectStorePath` to the end of the path. Notably does - /// *not* behave as `PathBuf::push` does: there is no way to replace the - /// root. If `self` has a file name, that will be removed, then the - /// directories of `path` will be appended, then any file name of `path` - /// will be assigned to `self`. - fn push_path(self, path: &Path) -> Self { - let DirsAndFileName { - directories: path_dirs, - file_name: path_file_name, - } = path.inner.to_owned().into(); - let mut dirs_and_file_name: DirsAndFileName = self.into(); - - dirs_and_file_name.directories.extend(path_dirs); - dirs_and_file_name.file_name = path_file_name; - - Self::Parts(dirs_and_file_name) - } - - /// Set the file name of this path - fn set_file_name(self, part: impl Into) -> Self { - let part = part.into(); - let mut dirs_and_file_name: DirsAndFileName = self.into(); - - dirs_and_file_name.file_name = Some((&*part).into()); - Self::Parts(dirs_and_file_name) - } fn display(&self) -> String { match self { - Self::Parts(dirs_and_file_name) => dirs_and_file_name.display(), - Self::AmazonS3(path) | Self::GoogleCloudStorage(path) | Self::MicrosoftAzure(path) => { - path.display() - } + Self::AmazonS3(path) => path.display(), Self::File(path) => path.display(), + Self::GoogleCloudStorage(path) => path.display(), + Self::InMemory(path) => path.display(), + Self::MicrosoftAzure(path) => path.display(), } } } - -impl PartialEq for PathRepresentation { - fn eq(&self, other: &Self) -> bool { - use PathRepresentation::*; - match (self, other) { - (Parts(self_parts), Parts(other_parts)) => self_parts == other_parts, - (Parts(self_parts), _) => { - let other_parts: DirsAndFileName = other.to_owned().into(); - *self_parts == other_parts - } - (_, Parts(other_parts)) => { - let self_parts: DirsAndFileName = self.to_owned().into(); - self_parts == *other_parts - } - _ => { - let self_parts: DirsAndFileName = self.to_owned().into(); - let other_parts: DirsAndFileName = other.to_owned().into(); - self_parts == other_parts - } - } - } -} - -/// The delimiter to separate object namespaces, creating a directory structure. -pub const DELIMITER: &str = "/"; diff --git a/object_store/src/path/file.rs b/object_store/src/path/file.rs index d431175542..bd9be76c55 100644 --- a/object_store/src/path/file.rs +++ b/object_store/src/path/file.rs @@ -38,7 +38,9 @@ impl FilePath { } } - /// Creates a file location by converting into a `PathBuf` + /// Creates a filesystem `PathBuf` location by using the standard library's + /// `PathBuf` building implementation appropriate for the current + /// platform. pub fn to_raw(&self) -> PathBuf { use FilePathRepresentation::*; @@ -57,6 +59,7 @@ impl FilePath { } } } + /// Add the parts of `path` to the end of this path. Notably does /// *not* behave as `PathBuf::push` does: there is no way to replace the /// root. If `self` has a file name, that will be removed, then the @@ -159,6 +162,7 @@ impl FilePathRepresentation { Self::Parsed(dirs_and_file_name) } + fn prefix_matches(&self, prefix: &Self) -> bool { use FilePathRepresentation::*; match (self, prefix) { @@ -250,6 +254,7 @@ mod tests { let parts: DirsAndFileName = file_path.into(); assert_eq!(parts.directories.len(), 4); assert!(parts.file_name.is_none()); + // Last section containing a `.` isn't a file name let path_buf: PathBuf = "/one/two/blah.blah".into(); let file_path = FilePath::raw(path_buf); @@ -264,6 +269,7 @@ mod tests { assert_eq!(parts.directories.len(), 4); assert!(parts.file_name.is_none()); } + #[test] fn conversions() { // dir and file name @@ -299,6 +305,7 @@ mod tests { let path_buf: PathBuf = "".into(); let file_path = FilePath::raw(path_buf); let parts: DirsAndFileName = file_path.into(); + assert!(parts.directories.is_empty()); assert!(parts.file_name.is_none()); } diff --git a/object_store/src/path/parsed.rs b/object_store/src/path/parsed.rs index cd13217e95..2d140f1c32 100644 --- a/object_store/src/path/parsed.rs +++ b/object_store/src/path/parsed.rs @@ -1,4 +1,4 @@ -use super::{ObjectStorePath, PathPart, PathRepresentation, DELIMITER}; +use super::{ObjectStorePath, PathPart, DELIMITER}; use itertools::Itertools; @@ -124,34 +124,54 @@ impl DirsAndFileName { } } -impl From for DirsAndFileName { - fn from(path_rep: PathRepresentation) -> Self { - match path_rep { - PathRepresentation::AmazonS3(path) - | PathRepresentation::GoogleCloudStorage(path) - | PathRepresentation::MicrosoftAzure(path) => path.into(), - PathRepresentation::File(path) => path.into(), - PathRepresentation::Parts(dirs_and_file_name) => dirs_and_file_name, - } - } -} - -impl From<&'_ crate::path::Path> for DirsAndFileName { - fn from(other: &'_ crate::path::Path) -> Self { - other.clone().into() - } -} - -impl From for DirsAndFileName { - fn from(other: crate::path::Path) -> Self { - other.inner.into() - } -} - #[cfg(test)] mod tests { use super::*; + #[test] + fn parts_after_prefix_behavior() { + let mut existing_path = DirsAndFileName::default(); + existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); + existing_path.file_name = Some("egg.json".into()); + + // Prefix with one directory + let mut prefix = DirsAndFileName::default(); + prefix.push_dir("apple"); + let expected_parts: Vec = vec!["bear", "cow", "dog", "egg.json"] + .into_iter() + .map(Into::into) + .collect(); + let parts = existing_path.parts_after_prefix(&prefix).unwrap(); + assert_eq!(parts, expected_parts); + + // Prefix with two directories + let mut prefix = DirsAndFileName::default(); + prefix.push_all_dirs(&["apple", "bear"]); + let expected_parts: Vec = vec!["cow", "dog", "egg.json"] + .into_iter() + .map(Into::into) + .collect(); + let parts = existing_path.parts_after_prefix(&prefix).unwrap(); + assert_eq!(parts, expected_parts); + + // Not a prefix + let mut prefix = DirsAndFileName::default(); + prefix.push_dir("cow"); + assert!(existing_path.parts_after_prefix(&prefix).is_none()); + + // Prefix with a partial directory + let mut prefix = DirsAndFileName::default(); + prefix.push_dir("ap"); + assert!(existing_path.parts_after_prefix(&prefix).is_none()); + + // Prefix matches but there aren't any parts after it + let mut existing_path = DirsAndFileName::default(); + existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); + let prefix = existing_path.clone(); + let parts = existing_path.parts_after_prefix(&prefix).unwrap(); + assert!(parts.is_empty()); + } + #[test] fn prefix_matches() { let mut haystack = DirsAndFileName::default(); @@ -267,48 +287,4 @@ mod tests { needle ); } - - #[test] - fn parts_after_prefix_behavior() { - let mut existing_path = DirsAndFileName::default(); - existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); - existing_path.file_name = Some("egg.json".into()); - - // Prefix with one directory - let mut prefix = DirsAndFileName::default(); - prefix.push_dir("apple"); - let expected_parts: Vec = vec!["bear", "cow", "dog", "egg.json"] - .into_iter() - .map(Into::into) - .collect(); - let parts = existing_path.parts_after_prefix(&prefix).unwrap(); - assert_eq!(parts, expected_parts); - - // Prefix with two directories - let mut prefix = DirsAndFileName::default(); - prefix.push_all_dirs(&["apple", "bear"]); - let expected_parts: Vec = vec!["cow", "dog", "egg.json"] - .into_iter() - .map(Into::into) - .collect(); - let parts = existing_path.parts_after_prefix(&prefix).unwrap(); - assert_eq!(parts, expected_parts); - - // Not a prefix - let mut prefix = DirsAndFileName::default(); - prefix.push_dir("cow"); - assert!(existing_path.parts_after_prefix(&prefix).is_none()); - - // Prefix with a partial directory - let mut prefix = DirsAndFileName::default(); - prefix.push_dir("ap"); - assert!(existing_path.parts_after_prefix(&prefix).is_none()); - - // Prefix matches but there aren't any parts after it - let mut existing_path = DirsAndFileName::default(); - existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]); - let prefix = existing_path.clone(); - let parts = existing_path.parts_after_prefix(&prefix).unwrap(); - assert!(parts.is_empty()); - } } diff --git a/server/src/buffer.rs b/server/src/buffer.rs index f142881f84..52d8eb468b 100644 --- a/server/src/buffer.rs +++ b/server/src/buffer.rs @@ -6,7 +6,7 @@ use data_types::{ DatabaseName, }; use generated_types::wal; -use object_store::{path::ObjectStorePath, ObjectStore}; +use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi}; use std::{ collections::BTreeMap, diff --git a/server/src/config.rs b/server/src/config.rs index 92e24e25c7..ad9a61abd7 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -136,7 +136,7 @@ impl<'a> Drop for CreateDatabaseHandle<'a> { #[cfg(test)] mod test { use super::*; - use object_store::{memory::InMemory, ObjectStore}; + use object_store::{memory::InMemory, ObjectStore, ObjectStoreApi}; #[test] fn create_db() { diff --git a/server/src/lib.rs b/server/src/lib.rs index d321c8debd..5037a07169 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -86,7 +86,7 @@ use data_types::{ {DatabaseName, DatabaseNameError}, }; use influxdb_line_protocol::ParsedLine; -use object_store::{path::ObjectStorePath, ObjectStore}; +use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi}; use query::{exec::Executor, Database, DatabaseStore}; use async_trait::async_trait; diff --git a/server/src/snapshot.rs b/server/src/snapshot.rs index ecb40204fc..7b679c7fd1 100644 --- a/server/src/snapshot.rs +++ b/server/src/snapshot.rs @@ -8,7 +8,7 @@ use data_types::{ partition_metadata::{Partition as PartitionMeta, Table}, selection::Selection, }; -use object_store::{path::ObjectStorePath, ObjectStore}; +use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi}; use query::PartitionChunk; use std::io::{Cursor, Seek, SeekFrom, Write}; diff --git a/src/influxdb_ioxd/http_routes.rs b/src/influxdb_ioxd/http_routes.rs index e1f0e3f54f..5f46f634ff 100644 --- a/src/influxdb_ioxd/http_routes.rs +++ b/src/influxdb_ioxd/http_routes.rs @@ -17,6 +17,7 @@ use data_types::{ DatabaseName, }; use influxdb_line_protocol::parse_lines; +use object_store::ObjectStoreApi; use query::{frontend::sql::SQLQueryPlanner, Database, DatabaseStore}; use server::{ConnectionManager, Server as AppServer}; From f9539f2b748fe46d435dc6c2f8f60dd54120b68e Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Wed, 20 Jan 2021 13:57:10 -0500 Subject: [PATCH 08/10] fix: Remove blanket trait impl now causing a stack overflow --- object_store/src/lib.rs | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index f0a8b45ad9..781b214178 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -68,45 +68,6 @@ pub trait ObjectStoreApi: Send + Sync + 'static { async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result>; } -#[async_trait] -impl ObjectStoreApi for T -where - T: std::ops::Deref + Send + Sync + 'static, - T::Target: ObjectStoreApi, -{ - type Path = ::Path; - - fn new_path(&self) -> Self::Path { - unimplemented!() - } - - async fn put(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()> - where - S: Stream> + Send + Sync + 'static, - { - T::put(self, location, bytes, length).await - } - - async fn get(&self, location: &Self::Path) -> Result>> { - T::get(self, location).await - } - - async fn delete(&self, location: &Self::Path) -> Result<()> { - T::delete(self, location).await - } - - async fn list<'a>( - &'a self, - prefix: Option<&'a Self::Path>, - ) -> Result>>> { - T::list(self, prefix).await - } - - async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result> { - T::list_with_delimiter(self, prefix).await - } -} - /// Universal interface to multiple object store services. #[derive(Debug)] pub struct ObjectStore(pub ObjectStoreIntegration); From 5c8b351f574c43160b30aada33aed3db9fbf9a6f Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Wed, 20 Jan 2021 14:22:02 -0500 Subject: [PATCH 09/10] fix: Address clippy suggestions --- object_store/src/lib.rs | 2 ++ server/src/buffer.rs | 2 +- server/src/config.rs | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 781b214178..48543f6b6f 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -303,6 +303,7 @@ pub struct ListResult { pub objects: Vec>, } +#[allow(clippy::use_self)] // https://github.com/rust-lang/rust-clippy/issues/3410 impl ListResult

{ fn map_paths(self, c: C) -> ListResult where @@ -333,6 +334,7 @@ pub struct ObjectMeta { pub size: usize, } +#[allow(clippy::use_self)] // https://github.com/rust-lang/rust-clippy/issues/3410 impl ObjectMeta

{ fn map_paths(self, c: C) -> ObjectMeta where diff --git a/server/src/buffer.rs b/server/src/buffer.rs index 52d8eb468b..970c80d032 100644 --- a/server/src/buffer.rs +++ b/server/src/buffer.rs @@ -872,7 +872,7 @@ mod tests { assert_eq!(segment_path, expected_segment_path); let segment_path = object_store_path_for_segment(&base_path, 45_010_105).unwrap(); - let mut expected_segment_path = base_path.clone(); + let mut expected_segment_path = base_path; expected_segment_path.push_all_dirs(&["wal", "045", "010"]); expected_segment_path.set_file_name("105.segment"); assert_eq!(segment_path, expected_segment_path); diff --git a/server/src/config.rs b/server/src/config.rs index ad9a61abd7..980d504265 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -164,7 +164,7 @@ mod test { let name = DatabaseName::new("foo").unwrap(); let rules_path = super::object_store_path_for_database_config(&base_path, &name); - let mut expected_path = base_path.clone(); + let mut expected_path = base_path; expected_path.push_dir("foo"); expected_path.set_file_name("rules.json"); From 5d1c7dfe822570c57f6c230e2d4c826690a9414a Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Wed, 20 Jan 2021 14:23:18 -0500 Subject: [PATCH 10/10] docs: Improve descriptive code comments as suggested in review --- object_store/src/lib.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 48543f6b6f..b84f34d9f1 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -291,8 +291,8 @@ pub enum ObjectStoreIntegration { } /// Result of a list call that includes objects, prefixes (directories) and a -/// token for the next set of results. Individual results sets are limited to -/// 1,000 objects. +/// token for the next set of results. Individual result sets may be limited to +/// 1,000 objects based on the underlying object storage's limitations. #[derive(Debug)] pub struct ListResult { /// Token passed to the API for the next page of list results. @@ -305,6 +305,9 @@ pub struct ListResult { #[allow(clippy::use_self)] // https://github.com/rust-lang/rust-clippy/issues/3410 impl ListResult

{ + /// `c` is a function that can turn one type that implements an + /// `ObjectStorePath` to another type that also implements + /// `ObjectStorePath`. fn map_paths(self, c: C) -> ListResult where C: Fn(P) -> Q, @@ -336,6 +339,9 @@ pub struct ObjectMeta { #[allow(clippy::use_self)] // https://github.com/rust-lang/rust-clippy/issues/3410 impl ObjectMeta

{ + /// `c` is a function that can turn one type that implements an + /// `ObjectStorePath` to another type that also implements + /// `ObjectStorePath`. fn map_paths(self, c: C) -> ObjectMeta where C: Fn(P) -> Q,