Merge pull request #675 from influxdata/cn+jg/osp-types
commit
4954cef08d
|
@ -2095,6 +2095,7 @@ checksum = "a9a7ab5d64814df0fe4a4b5ead45ed6c5f181ee3ff04ba344313a6c80446c5d4"
|
|||
name = "object_store"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"azure_core",
|
||||
"azure_storage",
|
||||
"bytes",
|
||||
|
|
|
@ -5,6 +5,7 @@ authors = ["Paul Dix <paul@pauldix.net>"]
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.42"
|
||||
bytes = "1.0"
|
||||
chrono = "0.4"
|
||||
futures = "0.3.5"
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
//! This module contains the IOx implementation for using S3 as the object
|
||||
//! store.
|
||||
use crate::{
|
||||
path::{cloud::CloudConverter, ObjectStorePath, DELIMITER},
|
||||
Error, ListResult, NoDataFromS3, ObjectMeta, Result, UnableToDeleteDataFromS3,
|
||||
path::{cloud::CloudPath, DELIMITER},
|
||||
Error, ListResult, NoDataFromS3, ObjectMeta, ObjectStoreApi, Result, UnableToDeleteDataFromS3,
|
||||
UnableToGetDataFromS3, UnableToGetPieceOfDataFromS3, UnableToPutDataToS3,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{stream, Stream, TryStreamExt};
|
||||
use futures::{
|
||||
stream::{self, BoxStream},
|
||||
Stream, StreamExt, TryStreamExt,
|
||||
};
|
||||
use rusoto_core::ByteStream;
|
||||
use rusoto_credential::ChainProvider;
|
||||
use rusoto_s3::S3;
|
||||
|
@ -30,32 +34,15 @@ impl fmt::Debug for AmazonS3 {
|
|||
}
|
||||
}
|
||||
|
||||
impl AmazonS3 {
|
||||
/// Configure a connection to Amazon S3 in the specified Amazon region and
|
||||
/// bucket. Uses [`rusoto_credential::ChainProvider`][cp] to check for
|
||||
/// credentials in:
|
||||
///
|
||||
/// 1. Environment variables: `AWS_ACCESS_KEY_ID` and
|
||||
/// `AWS_SECRET_ACCESS_KEY`
|
||||
/// 2. `credential_process` command in the AWS config file, usually located
|
||||
/// at `~/.aws/config`.
|
||||
/// 3. AWS credentials file. Usually located at `~/.aws/credentials`.
|
||||
/// 4. IAM instance profile. Will only work if running on an EC2 instance
|
||||
/// with an instance profile/role.
|
||||
///
|
||||
/// [cp]: https://docs.rs/rusoto_credential/0.43.0/rusoto_credential/struct.ChainProvider.html
|
||||
pub fn new(region: rusoto_core::Region, bucket_name: impl Into<String>) -> Self {
|
||||
let http_client = rusoto_core::request::HttpClient::new()
|
||||
.expect("Current implementation of rusoto_core has no way for this to fail");
|
||||
let credentials_provider = ChainProvider::new();
|
||||
Self {
|
||||
client: rusoto_s3::S3Client::new_with(http_client, credentials_provider, region),
|
||||
bucket_name: bucket_name.into(),
|
||||
}
|
||||
#[async_trait]
|
||||
impl ObjectStoreApi for AmazonS3 {
|
||||
type Path = CloudPath;
|
||||
|
||||
fn new_path(&self) -> Self::Path {
|
||||
CloudPath::default()
|
||||
}
|
||||
|
||||
/// Save the provided bytes to the specified location.
|
||||
pub async fn put<S>(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()>
|
||||
async fn put<S>(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static,
|
||||
{
|
||||
|
@ -63,7 +50,7 @@ impl AmazonS3 {
|
|||
|
||||
let put_request = rusoto_s3::PutObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: CloudConverter::convert(&location),
|
||||
key: location.to_raw(),
|
||||
body: Some(bytes),
|
||||
..Default::default()
|
||||
};
|
||||
|
@ -73,17 +60,13 @@ impl AmazonS3 {
|
|||
.await
|
||||
.context(UnableToPutDataToS3 {
|
||||
bucket: &self.bucket_name,
|
||||
location: CloudConverter::convert(&location),
|
||||
location: location.to_raw(),
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the bytes that are stored at the specified location.
|
||||
pub async fn get(
|
||||
&self,
|
||||
location: &ObjectStorePath,
|
||||
) -> Result<impl Stream<Item = Result<Bytes>>> {
|
||||
let key = CloudConverter::convert(&location);
|
||||
async fn get(&self, location: &Self::Path) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
let key = location.to_raw();
|
||||
let get_request = rusoto_s3::GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: key.clone(),
|
||||
|
@ -106,12 +89,12 @@ impl AmazonS3 {
|
|||
bucket: self.bucket_name.to_owned(),
|
||||
location: key,
|
||||
})
|
||||
.err_into())
|
||||
.err_into()
|
||||
.boxed())
|
||||
}
|
||||
|
||||
/// Delete the object at the specified location.
|
||||
pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> {
|
||||
let key = CloudConverter::convert(&location);
|
||||
async fn delete(&self, location: &Self::Path) -> Result<()> {
|
||||
let key = location.to_raw();
|
||||
let delete_request = rusoto_s3::DeleteObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: key.clone(),
|
||||
|
@ -128,11 +111,10 @@ impl AmazonS3 {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// List all the objects with the given prefix.
|
||||
pub async fn list<'a>(
|
||||
async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a ObjectStorePath>,
|
||||
) -> Result<impl Stream<Item = Result<Vec<ObjectStorePath>>> + 'a> {
|
||||
prefix: Option<&'a Self::Path>,
|
||||
) -> Result<BoxStream<'a, Result<Vec<Self::Path>>>> {
|
||||
#[derive(Clone)]
|
||||
enum ListState {
|
||||
Start,
|
||||
|
@ -144,7 +126,7 @@ impl AmazonS3 {
|
|||
Ok(stream::unfold(ListState::Start, move |state| async move {
|
||||
let mut list_request = rusoto_s3::ListObjectsV2Request {
|
||||
bucket: self.bucket_name.clone(),
|
||||
prefix: prefix.map(CloudConverter::convert),
|
||||
prefix: prefix.map(|p| p.to_raw()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
@ -176,7 +158,7 @@ impl AmazonS3 {
|
|||
let contents = resp.contents.unwrap_or_default();
|
||||
let names = contents
|
||||
.into_iter()
|
||||
.flat_map(|object| object.key.map(ObjectStorePath::from_cloud_unchecked))
|
||||
.flat_map(|object| object.key.map(CloudPath::raw))
|
||||
.collect();
|
||||
|
||||
// The AWS response contains a field named `is_truncated` as well as
|
||||
|
@ -190,17 +172,48 @@ impl AmazonS3 {
|
|||
};
|
||||
|
||||
Some((Ok(names), next_state))
|
||||
}))
|
||||
})
|
||||
.boxed())
|
||||
}
|
||||
|
||||
async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result<ListResult<Self::Path>> {
|
||||
self.list_with_delimiter_and_token(prefix, &None).await
|
||||
}
|
||||
}
|
||||
|
||||
impl AmazonS3 {
|
||||
/// Configure a connection to Amazon S3 in the specified Amazon region and
|
||||
/// bucket. Uses [`rusoto_credential::ChainProvider`][cp] to check for
|
||||
/// credentials in:
|
||||
///
|
||||
/// 1. Environment variables: `AWS_ACCESS_KEY_ID` and
|
||||
/// `AWS_SECRET_ACCESS_KEY`
|
||||
/// 2. `credential_process` command in the AWS config file, usually located
|
||||
/// at `~/.aws/config`.
|
||||
/// 3. AWS credentials file. Usually located at `~/.aws/credentials`.
|
||||
/// 4. IAM instance profile. Will only work if running on an EC2 instance
|
||||
/// with an instance profile/role.
|
||||
///
|
||||
/// [cp]: https://docs.rs/rusoto_credential/0.43.0/rusoto_credential/struct.ChainProvider.html
|
||||
pub fn new(region: rusoto_core::Region, bucket_name: impl Into<String>) -> Self {
|
||||
let http_client = rusoto_core::request::HttpClient::new()
|
||||
.expect("Current implementation of rusoto_core has no way for this to fail");
|
||||
let credentials_provider = ChainProvider::new();
|
||||
Self {
|
||||
client: rusoto_s3::S3Client::new_with(http_client, credentials_provider, region),
|
||||
bucket_name: bucket_name.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// List objects with the given prefix and a set delimiter of `/`. Returns
|
||||
/// common prefixes (directories) in addition to object metadata.
|
||||
pub async fn list_with_delimiter<'a>(
|
||||
/// common prefixes (directories) in addition to object metadata. Optionally
|
||||
/// takes a continuation token for paging.
|
||||
pub async fn list_with_delimiter_and_token<'a>(
|
||||
&'a self,
|
||||
prefix: &'a ObjectStorePath,
|
||||
prefix: &'a CloudPath,
|
||||
next_token: &Option<String>,
|
||||
) -> Result<ListResult> {
|
||||
let converted_prefix = CloudConverter::convert(prefix);
|
||||
) -> Result<ListResult<CloudPath>> {
|
||||
let converted_prefix = prefix.to_raw();
|
||||
|
||||
let mut list_request = rusoto_s3::ListObjectsV2Request {
|
||||
bucket: self.bucket_name.clone(),
|
||||
|
@ -228,9 +241,8 @@ impl AmazonS3 {
|
|||
let objects: Vec<_> = contents
|
||||
.into_iter()
|
||||
.map(|object| {
|
||||
let location = ObjectStorePath::from_cloud_unchecked(
|
||||
object.key.expect("object doesn't exist without a key"),
|
||||
);
|
||||
let location =
|
||||
CloudPath::raw(object.key.expect("object doesn't exist without a key"));
|
||||
let last_modified = match object.last_modified {
|
||||
Some(lm) => {
|
||||
DateTime::parse_from_rfc3339(&lm)
|
||||
|
@ -259,11 +271,7 @@ impl AmazonS3 {
|
|||
.common_prefixes
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|p| {
|
||||
ObjectStorePath::from_cloud_unchecked(
|
||||
p.prefix.expect("can't have a prefix without a value"),
|
||||
)
|
||||
})
|
||||
.map(|p| CloudPath::raw(p.prefix.expect("can't have a prefix without a value")))
|
||||
.collect();
|
||||
|
||||
let result = ListResult {
|
||||
|
@ -309,9 +317,8 @@ impl Error {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
path::ObjectStorePath,
|
||||
tests::{get_nonexistent_object, list_with_delimiter, put_get_delete_list},
|
||||
AmazonS3, Error, ObjectStore,
|
||||
AmazonS3, Error, ObjectStoreApi, ObjectStorePath,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use std::env;
|
||||
|
@ -402,7 +409,7 @@ mod tests {
|
|||
maybe_skip_integration!();
|
||||
let (region, bucket_name) = region_and_bucket_name()?;
|
||||
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name));
|
||||
let integration = AmazonS3::new(region, &bucket_name);
|
||||
check_credentials(put_get_delete_list(&integration).await)?;
|
||||
|
||||
check_credentials(list_with_delimiter(&integration).await).unwrap();
|
||||
|
@ -416,10 +423,11 @@ mod tests {
|
|||
// Assumes environment variables do not provide credentials to AWS US West 1
|
||||
let (_, bucket_name) = region_and_bucket_name()?;
|
||||
let region = rusoto_core::Region::UsWest1;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, &bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let err = get_nonexistent_object(&integration, Some(location_name))
|
||||
let err = get_nonexistent_object(&integration, Some(location))
|
||||
.await
|
||||
.unwrap_err();
|
||||
if let Some(Error::UnableToListDataFromS3 { source, bucket }) =
|
||||
|
@ -438,10 +446,11 @@ mod tests {
|
|||
async fn s3_test_get_nonexistent_location() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
let (region, bucket_name) = region_and_bucket_name()?;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, &bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let err = get_nonexistent_object(&integration, Some(location_name))
|
||||
let err = get_nonexistent_object(&integration, Some(location))
|
||||
.await
|
||||
.unwrap_err();
|
||||
if let Some(Error::UnableToGetDataFromS3 {
|
||||
|
@ -468,10 +477,11 @@ mod tests {
|
|||
maybe_skip_integration!();
|
||||
let (region, _) = region_and_bucket_name()?;
|
||||
let bucket_name = NON_EXISTENT_NAME;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let err = get_nonexistent_object(&integration, Some(location_name))
|
||||
let err = get_nonexistent_object(&integration, Some(location))
|
||||
.await
|
||||
.unwrap_err();
|
||||
if let Some(Error::UnableToListDataFromS3 { source, bucket }) =
|
||||
|
@ -495,14 +505,15 @@ mod tests {
|
|||
// Assumes environment variables do not provide credentials to AWS US West 1
|
||||
let (_, bucket_name) = region_and_bucket_name()?;
|
||||
let region = rusoto_core::Region::UsWest1;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, &bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
let data = Bytes::from("arbitrary data");
|
||||
let stream_data = std::io::Result::Ok(data.clone());
|
||||
|
||||
let err = integration
|
||||
.put(
|
||||
&location_name,
|
||||
&location,
|
||||
futures::stream::once(async move { stream_data }),
|
||||
data.len(),
|
||||
)
|
||||
|
@ -530,14 +541,15 @@ mod tests {
|
|||
maybe_skip_integration!();
|
||||
let (region, _) = region_and_bucket_name()?;
|
||||
let bucket_name = NON_EXISTENT_NAME;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
let data = Bytes::from("arbitrary data");
|
||||
let stream_data = std::io::Result::Ok(data.clone());
|
||||
|
||||
let err = integration
|
||||
.put(
|
||||
&location_name,
|
||||
&location,
|
||||
futures::stream::once(async move { stream_data }),
|
||||
data.len(),
|
||||
)
|
||||
|
@ -564,10 +576,11 @@ mod tests {
|
|||
async fn s3_test_delete_nonexistent_location() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
let (region, bucket_name) = region_and_bucket_name()?;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, &bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let result = integration.delete(&location_name).await;
|
||||
let result = integration.delete(&location).await;
|
||||
|
||||
assert!(result.is_ok());
|
||||
|
||||
|
@ -580,10 +593,11 @@ mod tests {
|
|||
// Assumes environment variables do not provide credentials to AWS US West 1
|
||||
let (_, bucket_name) = region_and_bucket_name()?;
|
||||
let region = rusoto_core::Region::UsWest1;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, &bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, &bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let err = integration.delete(&location_name).await.unwrap_err();
|
||||
let err = integration.delete(&location).await.unwrap_err();
|
||||
if let Error::UnableToDeleteDataFromS3 {
|
||||
source,
|
||||
bucket,
|
||||
|
@ -605,10 +619,11 @@ mod tests {
|
|||
maybe_skip_integration!();
|
||||
let (region, _) = region_and_bucket_name()?;
|
||||
let bucket_name = NON_EXISTENT_NAME;
|
||||
let integration = ObjectStore::new_amazon_s3(AmazonS3::new(region, bucket_name));
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration = AmazonS3::new(region, bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let err = integration.delete(&location_name).await.unwrap_err();
|
||||
let err = integration.delete(&location).await.unwrap_err();
|
||||
if let Error::UnableToDeleteDataFromS3 {
|
||||
source,
|
||||
bucket,
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
//! This module contains the IOx implementation for using Azure Blob storage as
|
||||
//! the object store.
|
||||
use crate::{
|
||||
path::{cloud::CloudConverter, ObjectStorePath},
|
||||
DataDoesNotMatchLength, Result, UnableToDeleteDataFromAzure, UnableToGetDataFromAzure,
|
||||
UnableToListDataFromAzure, UnableToPutDataToAzure,
|
||||
path::cloud::CloudPath, DataDoesNotMatchLength, ListResult, ObjectStoreApi, Result,
|
||||
UnableToDeleteDataFromAzure, UnableToGetDataFromAzure, UnableToListDataFromAzure,
|
||||
UnableToPutDataToAzure,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use azure_core::HttpClient;
|
||||
use azure_storage::{
|
||||
clients::{
|
||||
|
@ -13,7 +14,10 @@ use azure_storage::{
|
|||
DeleteSnapshotsMethod,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use futures::{stream, FutureExt, Stream, TryStreamExt};
|
||||
use futures::{
|
||||
stream::{self, BoxStream},
|
||||
FutureExt, Stream, StreamExt, TryStreamExt,
|
||||
};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
|
@ -25,6 +29,135 @@ pub struct MicrosoftAzure {
|
|||
container_name: String,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ObjectStoreApi for MicrosoftAzure {
|
||||
type Path = CloudPath;
|
||||
|
||||
fn new_path(&self) -> Self::Path {
|
||||
CloudPath::default()
|
||||
}
|
||||
|
||||
async fn put<S>(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static,
|
||||
{
|
||||
let location = location.to_raw();
|
||||
let temporary_non_streaming = bytes
|
||||
.map_ok(|b| bytes::BytesMut::from(&b[..]))
|
||||
.try_concat()
|
||||
.await
|
||||
.expect("Should have been able to collect streaming data");
|
||||
|
||||
ensure!(
|
||||
temporary_non_streaming.len() == length,
|
||||
DataDoesNotMatchLength {
|
||||
actual: temporary_non_streaming.len(),
|
||||
expected: length,
|
||||
}
|
||||
);
|
||||
|
||||
self.container_client
|
||||
.as_blob_client(&location)
|
||||
.put_block_blob(&temporary_non_streaming)
|
||||
.execute()
|
||||
.await
|
||||
.context(UnableToPutDataToAzure {
|
||||
location: location.to_owned(),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get(&self, location: &Self::Path) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
let container_client = self.container_client.clone();
|
||||
let location = location.to_raw();
|
||||
Ok(async move {
|
||||
container_client
|
||||
.as_blob_client(&location)
|
||||
.get()
|
||||
.execute()
|
||||
.await
|
||||
.map(|blob| blob.data.into())
|
||||
.context(UnableToGetDataFromAzure {
|
||||
location: location.to_owned(),
|
||||
})
|
||||
}
|
||||
.into_stream()
|
||||
.boxed())
|
||||
}
|
||||
|
||||
async fn delete(&self, location: &Self::Path) -> Result<()> {
|
||||
let location = location.to_raw();
|
||||
self.container_client
|
||||
.as_blob_client(&location)
|
||||
.delete()
|
||||
.delete_snapshots_method(DeleteSnapshotsMethod::Include)
|
||||
.execute()
|
||||
.await
|
||||
.context(UnableToDeleteDataFromAzure {
|
||||
location: location.to_owned(),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a Self::Path>,
|
||||
) -> Result<BoxStream<'a, Result<Vec<Self::Path>>>> {
|
||||
#[derive(Clone)]
|
||||
enum ListState {
|
||||
Start,
|
||||
HasMore(String),
|
||||
Done,
|
||||
}
|
||||
|
||||
Ok(stream::unfold(ListState::Start, move |state| async move {
|
||||
let mut request = self.container_client.list_blobs();
|
||||
|
||||
let prefix = prefix.map(|p| p.to_raw());
|
||||
if let Some(ref p) = prefix {
|
||||
request = request.prefix(p as &str);
|
||||
}
|
||||
|
||||
match state {
|
||||
ListState::HasMore(ref marker) => {
|
||||
request = request.next_marker(marker as &str);
|
||||
}
|
||||
ListState::Done => {
|
||||
return None;
|
||||
}
|
||||
ListState::Start => {}
|
||||
}
|
||||
|
||||
let resp = match request.execute().await.context(UnableToListDataFromAzure) {
|
||||
Ok(resp) => resp,
|
||||
Err(err) => return Some((Err(err), state)),
|
||||
};
|
||||
|
||||
let next_state = if let Some(marker) = resp.incomplete_vector.next_marker() {
|
||||
ListState::HasMore(marker.as_str().to_string())
|
||||
} else {
|
||||
ListState::Done
|
||||
};
|
||||
|
||||
let names = resp
|
||||
.incomplete_vector
|
||||
.vector
|
||||
.into_iter()
|
||||
.map(|blob| CloudPath::raw(blob.name))
|
||||
.collect();
|
||||
|
||||
Some((Ok(names), next_state))
|
||||
})
|
||||
.boxed())
|
||||
}
|
||||
|
||||
async fn list_with_delimiter(&self, _prefix: &Self::Path) -> Result<ListResult<Self::Path>> {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
impl MicrosoftAzure {
|
||||
/// Configure a connection to container with given name on Microsoft Azure
|
||||
/// Blob store.
|
||||
|
@ -64,133 +197,12 @@ impl MicrosoftAzure {
|
|||
|
||||
Self::new(account, master_key, container_name)
|
||||
}
|
||||
|
||||
/// Save the provided bytes to the specified location.
|
||||
pub async fn put<S>(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static,
|
||||
{
|
||||
let location = CloudConverter::convert(&location);
|
||||
let temporary_non_streaming = bytes
|
||||
.map_ok(|b| bytes::BytesMut::from(&b[..]))
|
||||
.try_concat()
|
||||
.await
|
||||
.expect("Should have been able to collect streaming data");
|
||||
|
||||
ensure!(
|
||||
temporary_non_streaming.len() == length,
|
||||
DataDoesNotMatchLength {
|
||||
actual: temporary_non_streaming.len(),
|
||||
expected: length,
|
||||
}
|
||||
);
|
||||
|
||||
self.container_client
|
||||
.as_blob_client(&location)
|
||||
.put_block_blob(&temporary_non_streaming)
|
||||
.execute()
|
||||
.await
|
||||
.context(UnableToPutDataToAzure {
|
||||
location: location.to_owned(),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the bytes that are stored at the specified location.
|
||||
pub async fn get(
|
||||
&self,
|
||||
location: &ObjectStorePath,
|
||||
) -> Result<impl Stream<Item = Result<Bytes>>> {
|
||||
let container_client = self.container_client.clone();
|
||||
let location = CloudConverter::convert(&location);
|
||||
Ok(async move {
|
||||
container_client
|
||||
.as_blob_client(&location)
|
||||
.get()
|
||||
.execute()
|
||||
.await
|
||||
.map(|blob| blob.data.into())
|
||||
.context(UnableToGetDataFromAzure {
|
||||
location: location.to_owned(),
|
||||
})
|
||||
}
|
||||
.into_stream())
|
||||
}
|
||||
|
||||
/// Delete the object at the specified location.
|
||||
pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> {
|
||||
let location = CloudConverter::convert(&location);
|
||||
self.container_client
|
||||
.as_blob_client(&location)
|
||||
.delete()
|
||||
.delete_snapshots_method(DeleteSnapshotsMethod::Include)
|
||||
.execute()
|
||||
.await
|
||||
.context(UnableToDeleteDataFromAzure {
|
||||
location: location.to_owned(),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List all the objects with the given prefix.
|
||||
pub async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a ObjectStorePath>,
|
||||
) -> Result<impl Stream<Item = Result<Vec<ObjectStorePath>>> + 'a> {
|
||||
#[derive(Clone)]
|
||||
enum ListState {
|
||||
Start,
|
||||
HasMore(String),
|
||||
Done,
|
||||
}
|
||||
|
||||
Ok(stream::unfold(ListState::Start, move |state| async move {
|
||||
let mut request = self.container_client.list_blobs();
|
||||
|
||||
let prefix = prefix.map(CloudConverter::convert);
|
||||
if let Some(ref p) = prefix {
|
||||
request = request.prefix(p as &str);
|
||||
}
|
||||
|
||||
match state {
|
||||
ListState::HasMore(ref marker) => {
|
||||
request = request.next_marker(marker as &str);
|
||||
}
|
||||
ListState::Done => {
|
||||
return None;
|
||||
}
|
||||
ListState::Start => {}
|
||||
}
|
||||
|
||||
let resp = match request.execute().await.context(UnableToListDataFromAzure) {
|
||||
Ok(resp) => resp,
|
||||
Err(err) => return Some((Err(err), state)),
|
||||
};
|
||||
|
||||
let next_state = if let Some(marker) = resp.incomplete_vector.next_marker() {
|
||||
ListState::HasMore(marker.as_str().to_string())
|
||||
} else {
|
||||
ListState::Done
|
||||
};
|
||||
|
||||
let names = resp
|
||||
.incomplete_vector
|
||||
.vector
|
||||
.into_iter()
|
||||
.map(|blob| ObjectStorePath::from_cloud_unchecked(blob.name))
|
||||
.collect();
|
||||
|
||||
Some((Ok(names), next_state))
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{tests::put_get_delete_list, ObjectStore};
|
||||
use crate::tests::put_get_delete_list;
|
||||
use std::env;
|
||||
|
||||
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
@ -245,9 +257,8 @@ mod tests {
|
|||
|
||||
let container_name = env::var("AZURE_STORAGE_CONTAINER")
|
||||
.map_err(|_| "The environment variable AZURE_STORAGE_CONTAINER must be set")?;
|
||||
let azure = MicrosoftAzure::new_from_env(container_name);
|
||||
let integration = MicrosoftAzure::new_from_env(container_name);
|
||||
|
||||
let integration = ObjectStore::new_microsoft_azure(azure);
|
||||
put_get_delete_list(&integration).await?;
|
||||
|
||||
Ok(())
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
//! This module contains the IOx implementation for using local disk as the
|
||||
//! object store.
|
||||
use crate::{
|
||||
path::{file::FileConverter, ObjectStorePath},
|
||||
DataDoesNotMatchLength, Result, UnableToCopyDataToFile, UnableToCreateDir, UnableToCreateFile,
|
||||
UnableToDeleteFile, UnableToOpenFile, UnableToPutDataInMemory, UnableToReadBytes,
|
||||
path::file::FilePath, DataDoesNotMatchLength, ListResult, ObjectStoreApi, Result,
|
||||
UnableToCopyDataToFile, UnableToCreateDir, UnableToCreateFile, UnableToDeleteFile,
|
||||
UnableToOpenFile, UnableToPutDataInMemory, UnableToReadBytes,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use futures::{stream, Stream, TryStreamExt};
|
||||
use futures::{
|
||||
stream::{self, BoxStream},
|
||||
Stream, StreamExt, TryStreamExt,
|
||||
};
|
||||
use snafu::{ensure, futures::TryStreamExt as _, OptionExt, ResultExt};
|
||||
use std::{io, path::PathBuf};
|
||||
use tokio::fs;
|
||||
|
@ -17,25 +21,18 @@ use walkdir::WalkDir;
|
|||
/// cloud storage provider.
|
||||
#[derive(Debug)]
|
||||
pub struct File {
|
||||
root: ObjectStorePath,
|
||||
root: FilePath,
|
||||
}
|
||||
|
||||
impl File {
|
||||
/// Create new filesystem storage.
|
||||
pub fn new(root: impl Into<PathBuf>) -> Self {
|
||||
Self {
|
||||
root: ObjectStorePath::from_path_buf_unchecked(root),
|
||||
}
|
||||
#[async_trait]
|
||||
impl ObjectStoreApi for File {
|
||||
type Path = FilePath;
|
||||
|
||||
fn new_path(&self) -> Self::Path {
|
||||
FilePath::default()
|
||||
}
|
||||
|
||||
fn path(&self, location: &ObjectStorePath) -> PathBuf {
|
||||
let mut path = self.root.clone();
|
||||
path.push_path(location);
|
||||
FileConverter::convert(&path)
|
||||
}
|
||||
|
||||
/// Save the provided bytes to the specified location.
|
||||
pub async fn put<S>(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()>
|
||||
async fn put<S>(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static,
|
||||
{
|
||||
|
@ -80,11 +77,7 @@ impl File {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the bytes that are stored at the specified location.
|
||||
pub async fn get(
|
||||
&self,
|
||||
location: &ObjectStorePath,
|
||||
) -> Result<impl Stream<Item = Result<Bytes>>> {
|
||||
async fn get(&self, location: &Self::Path) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
let path = self.path(location);
|
||||
|
||||
let file = fs::File::open(&path)
|
||||
|
@ -94,11 +87,10 @@ impl File {
|
|||
let s = FramedRead::new(file, BytesCodec::new())
|
||||
.map_ok(|b| b.freeze())
|
||||
.context(UnableToReadBytes { path });
|
||||
Ok(s)
|
||||
Ok(s.boxed())
|
||||
}
|
||||
|
||||
/// Delete the object at the specified location.
|
||||
pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> {
|
||||
async fn delete(&self, location: &Self::Path) -> Result<()> {
|
||||
let path = self.path(location);
|
||||
fs::remove_file(&path)
|
||||
.await
|
||||
|
@ -106,12 +98,11 @@ impl File {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// List all the objects with the given prefix.
|
||||
pub async fn list<'a>(
|
||||
async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a ObjectStorePath>,
|
||||
) -> Result<impl Stream<Item = Result<Vec<ObjectStorePath>>> + 'a> {
|
||||
let root_path = FileConverter::convert(&self.root);
|
||||
prefix: Option<&'a Self::Path>,
|
||||
) -> Result<BoxStream<'a, Result<Vec<Self::Path>>>> {
|
||||
let root_path = self.root.to_raw();
|
||||
let walkdir = WalkDir::new(&root_path)
|
||||
// Don't include the root directory itself
|
||||
.min_depth(1);
|
||||
|
@ -124,13 +115,32 @@ impl File {
|
|||
let relative_path = file.path().strip_prefix(&root_path).expect(
|
||||
"Must start with root path because this came from walking the root",
|
||||
);
|
||||
ObjectStorePath::from_path_buf_unchecked(relative_path)
|
||||
FilePath::raw(relative_path)
|
||||
})
|
||||
.filter(|name| prefix.map_or(true, |p| name.prefix_matches(p)))
|
||||
.map(|name| Ok(vec![name]))
|
||||
});
|
||||
|
||||
Ok(stream::iter(s))
|
||||
Ok(stream::iter(s).boxed())
|
||||
}
|
||||
|
||||
async fn list_with_delimiter(&self, _prefix: &Self::Path) -> Result<ListResult<Self::Path>> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl File {
|
||||
/// Create new filesystem storage.
|
||||
pub fn new(root: impl Into<PathBuf>) -> Self {
|
||||
Self {
|
||||
root: FilePath::raw(root),
|
||||
}
|
||||
}
|
||||
|
||||
fn path(&self, location: &FilePath) -> PathBuf {
|
||||
let mut path = self.root.clone();
|
||||
path.push_path(location);
|
||||
path.to_raw()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -143,13 +153,13 @@ mod tests {
|
|||
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::{tests::put_get_delete_list, Error, ObjectStore};
|
||||
use crate::{tests::put_get_delete_list, Error, ObjectStoreApi, ObjectStorePath};
|
||||
use futures::stream;
|
||||
|
||||
#[tokio::test]
|
||||
async fn file_test() -> Result<()> {
|
||||
let root = TempDir::new()?;
|
||||
let integration = ObjectStore::new_file(File::new(root.path()));
|
||||
let integration = File::new(root.path());
|
||||
|
||||
put_get_delete_list(&integration).await?;
|
||||
Ok(())
|
||||
|
@ -158,10 +168,11 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn length_mismatch_is_an_error() -> Result<()> {
|
||||
let root = TempDir::new()?;
|
||||
let integration = ObjectStore::new_file(File::new(root.path()));
|
||||
let integration = File::new(root.path());
|
||||
|
||||
let bytes = stream::once(async { Ok(Bytes::from("hello world")) });
|
||||
let location = ObjectStorePath::from_path_buf_unchecked("junk");
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name("junk");
|
||||
let res = integration.put(&location, bytes, 0).await;
|
||||
|
||||
assert!(matches!(
|
||||
|
@ -178,14 +189,14 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn creates_dir_if_not_present() -> Result<()> {
|
||||
let root = TempDir::new()?;
|
||||
let storage = ObjectStore::new_file(File::new(root.path()));
|
||||
let integration = File::new(root.path());
|
||||
|
||||
let data = Bytes::from("arbitrary data");
|
||||
let mut location = ObjectStorePath::default();
|
||||
let mut location = integration.new_path();
|
||||
location.push_all_dirs(&["nested", "file", "test_file"]);
|
||||
|
||||
let stream_data = std::io::Result::Ok(data.clone());
|
||||
storage
|
||||
integration
|
||||
.put(
|
||||
&location,
|
||||
futures::stream::once(async move { stream_data }),
|
||||
|
@ -193,7 +204,7 @@ mod tests {
|
|||
)
|
||||
.await?;
|
||||
|
||||
let read_data = storage
|
||||
let read_data = integration
|
||||
.get(&location)
|
||||
.await?
|
||||
.map_ok(|b| bytes::BytesMut::from(&b[..]))
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
//! This module contains the IOx implementation for using Google Cloud Storage
|
||||
//! as the object store.
|
||||
use crate::{
|
||||
path::{cloud::CloudConverter, ObjectStorePath},
|
||||
DataDoesNotMatchLength, Result, UnableToDeleteDataFromGcs, UnableToGetDataFromGcs,
|
||||
UnableToListDataFromGcs, UnableToListDataFromGcs2, UnableToPutDataToGcs,
|
||||
path::cloud::CloudPath, DataDoesNotMatchLength, ListResult, ObjectStoreApi, Result,
|
||||
UnableToDeleteDataFromGcs, UnableToGetDataFromGcs, UnableToListDataFromGcs,
|
||||
UnableToListDataFromGcs2, UnableToPutDataToGcs,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use futures::{stream, Stream, StreamExt, TryStreamExt};
|
||||
use futures::{
|
||||
stream::{self, BoxStream},
|
||||
Stream, StreamExt, TryStreamExt,
|
||||
};
|
||||
use snafu::{ensure, futures::TryStreamExt as _, ResultExt};
|
||||
use std::io;
|
||||
|
||||
|
@ -16,16 +20,15 @@ pub struct GoogleCloudStorage {
|
|||
bucket_name: String,
|
||||
}
|
||||
|
||||
impl GoogleCloudStorage {
|
||||
/// Configure a connection to Google Cloud Storage.
|
||||
pub fn new(bucket_name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
bucket_name: bucket_name.into(),
|
||||
}
|
||||
#[async_trait]
|
||||
impl ObjectStoreApi for GoogleCloudStorage {
|
||||
type Path = CloudPath;
|
||||
|
||||
fn new_path(&self) -> Self::Path {
|
||||
CloudPath::default()
|
||||
}
|
||||
|
||||
/// Save the provided bytes to the specified location.
|
||||
pub async fn put<S>(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()>
|
||||
async fn put<S>(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static,
|
||||
{
|
||||
|
@ -44,7 +47,7 @@ impl GoogleCloudStorage {
|
|||
}
|
||||
);
|
||||
|
||||
let location = CloudConverter::convert(&location);
|
||||
let location = location.to_raw();
|
||||
let location_copy = location.clone();
|
||||
let bucket_name = self.bucket_name.clone();
|
||||
|
||||
|
@ -63,12 +66,8 @@ impl GoogleCloudStorage {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the bytes that are stored at the specified location.
|
||||
pub async fn get(
|
||||
&self,
|
||||
location: &ObjectStorePath,
|
||||
) -> Result<impl Stream<Item = Result<Bytes>>> {
|
||||
let location = CloudConverter::convert(&location);
|
||||
async fn get(&self, location: &Self::Path) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
let location = location.to_raw();
|
||||
let location_copy = location.clone();
|
||||
let bucket_name = self.bucket_name.clone();
|
||||
|
||||
|
@ -79,12 +78,11 @@ impl GoogleCloudStorage {
|
|||
location,
|
||||
})?;
|
||||
|
||||
Ok(futures::stream::once(async move { Ok(bytes.into()) }))
|
||||
Ok(futures::stream::once(async move { Ok(bytes.into()) }).boxed())
|
||||
}
|
||||
|
||||
/// Delete the object at the specified location.
|
||||
pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> {
|
||||
let location = CloudConverter::convert(&location);
|
||||
async fn delete(&self, location: &Self::Path) -> Result<()> {
|
||||
let location = location.to_raw();
|
||||
let location_copy = location.clone();
|
||||
let bucket_name = self.bucket_name.clone();
|
||||
|
||||
|
@ -98,14 +96,13 @@ impl GoogleCloudStorage {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// List all the objects with the given prefix.
|
||||
pub async fn list<'a>(
|
||||
async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a ObjectStorePath>,
|
||||
) -> Result<impl Stream<Item = Result<Vec<ObjectStorePath>>> + 'a> {
|
||||
prefix: Option<&'a Self::Path>,
|
||||
) -> Result<BoxStream<'a, Result<Vec<Self::Path>>>> {
|
||||
let objects = match prefix {
|
||||
Some(prefix) => {
|
||||
let cloud_prefix = CloudConverter::convert(prefix);
|
||||
let cloud_prefix = prefix.to_raw();
|
||||
let list = cloud_storage::Object::list_prefix(&self.bucket_name, &cloud_prefix)
|
||||
.await
|
||||
.context(UnableToListDataFromGcs {
|
||||
|
@ -128,23 +125,35 @@ impl GoogleCloudStorage {
|
|||
let objects = objects
|
||||
.map_ok(|list| {
|
||||
list.into_iter()
|
||||
.map(|o| ObjectStorePath::from_cloud_unchecked(o.name))
|
||||
.collect::<Vec<ObjectStorePath>>()
|
||||
.map(|o| CloudPath::raw(o.name))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.context(UnableToListDataFromGcs2 {
|
||||
bucket: &self.bucket_name,
|
||||
});
|
||||
|
||||
Ok(objects)
|
||||
Ok(objects.boxed())
|
||||
}
|
||||
|
||||
async fn list_with_delimiter(&self, _prefix: &Self::Path) -> Result<ListResult<Self::Path>> {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
impl GoogleCloudStorage {
|
||||
/// Configure a connection to Google Cloud Storage.
|
||||
pub fn new(bucket_name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
bucket_name: bucket_name.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{
|
||||
path::ObjectStorePath,
|
||||
tests::{get_nonexistent_object, put_get_delete_list},
|
||||
Error, GoogleCloudStorage, ObjectStore,
|
||||
Error, GoogleCloudStorage, ObjectStoreApi, ObjectStorePath,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use std::env;
|
||||
|
@ -186,8 +195,7 @@ mod test {
|
|||
maybe_skip_integration!();
|
||||
let bucket_name = bucket_name()?;
|
||||
|
||||
let integration =
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name));
|
||||
let integration = GoogleCloudStorage::new(&bucket_name);
|
||||
put_get_delete_list(&integration).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -196,11 +204,12 @@ mod test {
|
|||
async fn gcs_test_get_nonexistent_location() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
let bucket_name = bucket_name()?;
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration =
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name));
|
||||
let integration = GoogleCloudStorage::new(&bucket_name);
|
||||
|
||||
let result = get_nonexistent_object(&integration, Some(location_name)).await?;
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let result = get_nonexistent_object(&integration, Some(location)).await?;
|
||||
|
||||
assert_eq!(
|
||||
result,
|
||||
|
@ -217,11 +226,11 @@ mod test {
|
|||
async fn gcs_test_get_nonexistent_bucket() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
let bucket_name = NON_EXISTENT_NAME;
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration =
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name));
|
||||
let integration = GoogleCloudStorage::new(bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let result = get_nonexistent_object(&integration, Some(location_name)).await?;
|
||||
let result = get_nonexistent_object(&integration, Some(location)).await?;
|
||||
|
||||
assert_eq!(result, Bytes::from("Not Found"));
|
||||
|
||||
|
@ -232,11 +241,12 @@ mod test {
|
|||
async fn gcs_test_delete_nonexistent_location() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
let bucket_name = bucket_name()?;
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration =
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(&bucket_name));
|
||||
let integration = GoogleCloudStorage::new(&bucket_name);
|
||||
|
||||
let err = integration.delete(&location_name).await.unwrap_err();
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let err = integration.delete(&location).await.unwrap_err();
|
||||
|
||||
if let Error::UnableToDeleteDataFromGcs {
|
||||
source,
|
||||
|
@ -258,11 +268,12 @@ mod test {
|
|||
async fn gcs_test_delete_nonexistent_bucket() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
let bucket_name = NON_EXISTENT_NAME;
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration =
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name));
|
||||
let integration = GoogleCloudStorage::new(bucket_name);
|
||||
|
||||
let err = integration.delete(&location_name).await.unwrap_err();
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let err = integration.delete(&location).await.unwrap_err();
|
||||
|
||||
if let Error::UnableToDeleteDataFromGcs {
|
||||
source,
|
||||
|
@ -284,15 +295,16 @@ mod test {
|
|||
async fn gcs_test_put_nonexistent_bucket() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
let bucket_name = NON_EXISTENT_NAME;
|
||||
let location_name = ObjectStorePath::from_cloud_unchecked(NON_EXISTENT_NAME);
|
||||
let integration =
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name));
|
||||
let integration = GoogleCloudStorage::new(bucket_name);
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name(NON_EXISTENT_NAME);
|
||||
|
||||
let data = Bytes::from("arbitrary data");
|
||||
let stream_data = std::io::Result::Ok(data.clone());
|
||||
|
||||
let result = integration
|
||||
.put(
|
||||
&location_name,
|
||||
&location,
|
||||
futures::stream::once(async move { stream_data }),
|
||||
data.len(),
|
||||
)
|
||||
|
|
|
@ -29,12 +29,45 @@ use gcp::GoogleCloudStorage;
|
|||
use memory::InMemory;
|
||||
use path::ObjectStorePath;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{Stream, StreamExt, TryStreamExt};
|
||||
use futures::{stream::BoxStream, Stream, StreamExt, TryFutureExt, TryStreamExt};
|
||||
use snafu::Snafu;
|
||||
use std::{io, path::PathBuf};
|
||||
|
||||
/// Universal API to multiple object store services.
|
||||
#[async_trait]
|
||||
pub trait ObjectStoreApi: Send + Sync + 'static {
|
||||
/// The type of the locations used in interacting with this object store.
|
||||
type Path: path::ObjectStorePath;
|
||||
|
||||
/// Return a new location path appropriate for this object storage
|
||||
fn new_path(&self) -> Self::Path;
|
||||
|
||||
/// Save the provided bytes to the specified location.
|
||||
async fn put<S>(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static;
|
||||
|
||||
/// Return the bytes that are stored at the specified location.
|
||||
async fn get(&self, location: &Self::Path) -> Result<BoxStream<'static, Result<Bytes>>>;
|
||||
|
||||
/// Delete the object at the specified location.
|
||||
async fn delete(&self, location: &Self::Path) -> Result<()>;
|
||||
|
||||
/// List all the objects with the given prefix.
|
||||
async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a Self::Path>,
|
||||
) -> Result<BoxStream<'a, Result<Vec<Self::Path>>>>;
|
||||
|
||||
/// List objects with the given prefix and an implementation specific
|
||||
/// delimiter. Returns common prefixes (directories) in addition to object
|
||||
/// metadata.
|
||||
async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result<ListResult<Self::Path>>;
|
||||
}
|
||||
|
||||
/// Universal interface to multiple object store services.
|
||||
#[derive(Debug)]
|
||||
pub struct ObjectStore(pub ObjectStoreIntegration);
|
||||
|
@ -64,99 +97,180 @@ impl ObjectStore {
|
|||
pub fn new_microsoft_azure(azure: MicrosoftAzure) -> Self {
|
||||
Self(ObjectStoreIntegration::MicrosoftAzure(Box::new(azure)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the provided bytes to the specified location.
|
||||
pub async fn put<S>(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()>
|
||||
#[async_trait]
|
||||
impl ObjectStoreApi for ObjectStore {
|
||||
type Path = path::Path;
|
||||
|
||||
fn new_path(&self) -> Self::Path {
|
||||
use ObjectStoreIntegration::*;
|
||||
match &self.0 {
|
||||
AmazonS3(s3) => path::Path::AmazonS3(s3.new_path()),
|
||||
GoogleCloudStorage(gcs) => path::Path::GoogleCloudStorage(gcs.new_path()),
|
||||
InMemory(in_mem) => path::Path::InMemory(in_mem.new_path()),
|
||||
File(file) => path::Path::File(file.new_path()),
|
||||
MicrosoftAzure(azure) => path::Path::MicrosoftAzure(azure.new_path()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn put<S>(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static,
|
||||
{
|
||||
use ObjectStoreIntegration::*;
|
||||
match &self.0 {
|
||||
AmazonS3(s3) => s3.put(location, bytes, length).await?,
|
||||
GoogleCloudStorage(gcs) => gcs.put(location, bytes, length).await?,
|
||||
InMemory(in_mem) => in_mem.put(location, bytes, length).await?,
|
||||
File(file) => file.put(location, bytes, length).await?,
|
||||
MicrosoftAzure(azure) => azure.put(location, bytes, length).await?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the bytes that are stored at the specified location.
|
||||
pub async fn get(
|
||||
&self,
|
||||
location: &ObjectStorePath,
|
||||
) -> Result<impl Stream<Item = Result<Bytes>>> {
|
||||
use ObjectStoreIntegration::*;
|
||||
Ok(match &self.0 {
|
||||
AmazonS3(s3) => s3.get(location).await?.boxed(),
|
||||
GoogleCloudStorage(gcs) => gcs.get(location).await?.boxed(),
|
||||
InMemory(in_mem) => in_mem.get(location).await?.boxed(),
|
||||
File(file) => file.get(location).await?.boxed(),
|
||||
MicrosoftAzure(azure) => azure.get(location).await?.boxed(),
|
||||
}
|
||||
.err_into())
|
||||
}
|
||||
|
||||
/// Delete the object at the specified location.
|
||||
pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> {
|
||||
use ObjectStoreIntegration::*;
|
||||
match &self.0 {
|
||||
AmazonS3(s3) => s3.delete(location).await?,
|
||||
GoogleCloudStorage(gcs) => gcs.delete(location).await?,
|
||||
InMemory(in_mem) => in_mem.delete(location).await?,
|
||||
File(file) => file.delete(location).await?,
|
||||
MicrosoftAzure(azure) => azure.delete(location).await?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List all the objects with the given prefix.
|
||||
pub async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a ObjectStorePath>,
|
||||
) -> Result<impl Stream<Item = Result<Vec<ObjectStorePath>>> + 'a> {
|
||||
use ObjectStoreIntegration::*;
|
||||
Ok(match &self.0 {
|
||||
AmazonS3(s3) => s3.list(prefix).await?.boxed(),
|
||||
GoogleCloudStorage(gcs) => gcs.list(prefix).await?.boxed(),
|
||||
InMemory(in_mem) => in_mem.list(prefix).await?.boxed(),
|
||||
File(file) => file.list(prefix).await?.boxed(),
|
||||
MicrosoftAzure(azure) => azure.list(prefix).await?.boxed(),
|
||||
}
|
||||
.err_into())
|
||||
}
|
||||
|
||||
/// List objects with the given prefix and an implementation specific
|
||||
/// delimiter. Returns common prefixes (directories) in addition to object
|
||||
/// metadata.
|
||||
pub async fn list_with_delimiter<'a>(
|
||||
&'a self,
|
||||
prefix: &'a ObjectStorePath,
|
||||
) -> Result<ListResult> {
|
||||
use ObjectStoreIntegration::*;
|
||||
match &self.0 {
|
||||
AmazonS3(s3) => s3.list_with_delimiter(prefix, &None).await,
|
||||
GoogleCloudStorage(_gcs) => unimplemented!(),
|
||||
InMemory(in_mem) => in_mem.list_with_delimiter(prefix, &None).await,
|
||||
File(_file) => unimplemented!(),
|
||||
MicrosoftAzure(_azure) => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an `ObjectStorePath` to a `String` according to the appropriate
|
||||
/// implementation. Suitable for printing; not suitable for sending to
|
||||
/// APIs
|
||||
pub fn convert_path(&self, path: &ObjectStorePath) -> String {
|
||||
use ObjectStoreIntegration::*;
|
||||
match &self.0 {
|
||||
AmazonS3(_) | GoogleCloudStorage(_) | InMemory(_) | MicrosoftAzure(_) => {
|
||||
path::cloud::CloudConverter::convert(path)
|
||||
match (&self.0, location) {
|
||||
(AmazonS3(s3), path::Path::AmazonS3(location)) => {
|
||||
s3.put(location, bytes, length).await?
|
||||
}
|
||||
File(_) => path::file::FileConverter::convert(path)
|
||||
.display()
|
||||
.to_string(),
|
||||
(GoogleCloudStorage(gcs), path::Path::GoogleCloudStorage(location)) => {
|
||||
gcs.put(location, bytes, length).await?
|
||||
}
|
||||
(InMemory(in_mem), path::Path::InMemory(location)) => {
|
||||
in_mem.put(location, bytes, length).await?
|
||||
}
|
||||
(File(file), path::Path::File(location)) => file.put(location, bytes, length).await?,
|
||||
(MicrosoftAzure(azure), path::Path::MicrosoftAzure(location)) => {
|
||||
azure.put(location, bytes, length).await?
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get(&self, location: &Self::Path) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
use ObjectStoreIntegration::*;
|
||||
Ok(match (&self.0, location) {
|
||||
(AmazonS3(s3), path::Path::AmazonS3(location)) => {
|
||||
s3.get(location).await?.err_into().boxed()
|
||||
}
|
||||
(GoogleCloudStorage(gcs), path::Path::GoogleCloudStorage(location)) => {
|
||||
gcs.get(location).await?.err_into().boxed()
|
||||
}
|
||||
(InMemory(in_mem), path::Path::InMemory(location)) => {
|
||||
in_mem.get(location).await?.err_into().boxed()
|
||||
}
|
||||
(File(file), path::Path::File(location)) => {
|
||||
file.get(location).await?.err_into().boxed()
|
||||
}
|
||||
(MicrosoftAzure(azure), path::Path::MicrosoftAzure(location)) => {
|
||||
azure.get(location).await?.err_into().boxed()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn delete(&self, location: &Self::Path) -> Result<()> {
|
||||
use ObjectStoreIntegration::*;
|
||||
match (&self.0, location) {
|
||||
(AmazonS3(s3), path::Path::AmazonS3(location)) => s3.delete(location).await?,
|
||||
(GoogleCloudStorage(gcs), path::Path::GoogleCloudStorage(location)) => {
|
||||
gcs.delete(location).await?
|
||||
}
|
||||
(InMemory(in_mem), path::Path::InMemory(location)) => in_mem.delete(location).await?,
|
||||
(File(file), path::Path::File(location)) => file.delete(location).await?,
|
||||
(MicrosoftAzure(azure), path::Path::MicrosoftAzure(location)) => {
|
||||
azure.delete(location).await?
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a Self::Path>,
|
||||
) -> Result<BoxStream<'a, Result<Vec<Self::Path>>>> {
|
||||
use ObjectStoreIntegration::*;
|
||||
Ok(match (&self.0, prefix) {
|
||||
(AmazonS3(s3), Some(path::Path::AmazonS3(prefix))) => s3
|
||||
.list(Some(prefix))
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::AmazonS3).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
(AmazonS3(s3), None) => s3
|
||||
.list(None)
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::AmazonS3).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
|
||||
(GoogleCloudStorage(gcs), Some(path::Path::GoogleCloudStorage(prefix))) => gcs
|
||||
.list(Some(prefix))
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::GoogleCloudStorage).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
(GoogleCloudStorage(gcs), None) => gcs
|
||||
.list(None)
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::GoogleCloudStorage).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
|
||||
(InMemory(in_mem), Some(path::Path::InMemory(prefix))) => in_mem
|
||||
.list(Some(prefix))
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::InMemory).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
(InMemory(in_mem), None) => in_mem
|
||||
.list(None)
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::InMemory).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
|
||||
(File(file), Some(path::Path::File(prefix))) => file
|
||||
.list(Some(prefix))
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::File).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
(File(file), None) => file
|
||||
.list(None)
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::File).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
|
||||
(MicrosoftAzure(azure), Some(path::Path::MicrosoftAzure(prefix))) => azure
|
||||
.list(Some(prefix))
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::MicrosoftAzure).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
(MicrosoftAzure(azure), None) => azure
|
||||
.list(None)
|
||||
.await?
|
||||
.map_ok(|s| s.into_iter().map(path::Path::MicrosoftAzure).collect())
|
||||
.err_into()
|
||||
.boxed(),
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result<ListResult<Self::Path>> {
|
||||
use ObjectStoreIntegration::*;
|
||||
match (&self.0, prefix) {
|
||||
(AmazonS3(s3), path::Path::AmazonS3(prefix)) => {
|
||||
s3.list_with_delimiter(prefix)
|
||||
.map_ok(|list_result| list_result.map_paths(path::Path::AmazonS3))
|
||||
.await
|
||||
}
|
||||
(GoogleCloudStorage(_gcs), _) => unimplemented!(),
|
||||
(InMemory(in_mem), path::Path::InMemory(prefix)) => {
|
||||
in_mem
|
||||
.list_with_delimiter(prefix)
|
||||
.map_ok(|list_result| list_result.map_paths(path::Path::InMemory))
|
||||
.await
|
||||
}
|
||||
(File(_file), _) => unimplemented!(),
|
||||
(MicrosoftAzure(_azure), _) => unimplemented!(),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -177,29 +291,75 @@ pub enum ObjectStoreIntegration {
|
|||
}
|
||||
|
||||
/// Result of a list call that includes objects, prefixes (directories) and a
|
||||
/// token for the next set of results. Individual results sets are limited to
|
||||
/// 1,000 objects.
|
||||
/// token for the next set of results. Individual result sets may be limited to
|
||||
/// 1,000 objects based on the underlying object storage's limitations.
|
||||
#[derive(Debug)]
|
||||
pub struct ListResult {
|
||||
pub struct ListResult<P: ObjectStorePath> {
|
||||
/// Token passed to the API for the next page of list results.
|
||||
pub next_token: Option<String>,
|
||||
/// Prefixes that are common (like directories)
|
||||
pub common_prefixes: Vec<ObjectStorePath>,
|
||||
pub common_prefixes: Vec<P>,
|
||||
/// Object metadata for the listing
|
||||
pub objects: Vec<ObjectMeta>,
|
||||
pub objects: Vec<ObjectMeta<P>>,
|
||||
}
|
||||
|
||||
#[allow(clippy::use_self)] // https://github.com/rust-lang/rust-clippy/issues/3410
|
||||
impl<P: ObjectStorePath> ListResult<P> {
|
||||
/// `c` is a function that can turn one type that implements an
|
||||
/// `ObjectStorePath` to another type that also implements
|
||||
/// `ObjectStorePath`.
|
||||
fn map_paths<Q: ObjectStorePath, C>(self, c: C) -> ListResult<Q>
|
||||
where
|
||||
C: Fn(P) -> Q,
|
||||
{
|
||||
let Self {
|
||||
next_token,
|
||||
common_prefixes,
|
||||
objects,
|
||||
} = self;
|
||||
|
||||
ListResult {
|
||||
next_token,
|
||||
common_prefixes: common_prefixes.into_iter().map(&c).collect(),
|
||||
objects: objects.into_iter().map(|o| o.map_paths(&c)).collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The metadata that describes an object.
|
||||
#[derive(Debug)]
|
||||
pub struct ObjectMeta {
|
||||
pub struct ObjectMeta<P: ObjectStorePath> {
|
||||
/// The full path to the object
|
||||
pub location: ObjectStorePath,
|
||||
pub location: P,
|
||||
/// The last modified time
|
||||
pub last_modified: DateTime<Utc>,
|
||||
/// The size in bytes of the object
|
||||
pub size: usize,
|
||||
}
|
||||
|
||||
#[allow(clippy::use_self)] // https://github.com/rust-lang/rust-clippy/issues/3410
|
||||
impl<P: ObjectStorePath> ObjectMeta<P> {
|
||||
/// `c` is a function that can turn one type that implements an
|
||||
/// `ObjectStorePath` to another type that also implements
|
||||
/// `ObjectStorePath`.
|
||||
fn map_paths<Q: ObjectStorePath, C>(self, c: C) -> ObjectMeta<Q>
|
||||
where
|
||||
C: Fn(P) -> Q,
|
||||
{
|
||||
let Self {
|
||||
location,
|
||||
last_modified,
|
||||
size,
|
||||
} = self;
|
||||
|
||||
ObjectMeta {
|
||||
location: c(location),
|
||||
last_modified,
|
||||
size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A specialized `Result` for object store-related errors
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
|
@ -339,15 +499,17 @@ pub enum Error {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::path::{cloud::CloudPath, parsed::DirsAndFileName, ObjectStorePath};
|
||||
|
||||
use futures::stream;
|
||||
|
||||
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
async fn flatten_list_stream(
|
||||
storage: &ObjectStore,
|
||||
prefix: Option<&ObjectStorePath>,
|
||||
) -> Result<Vec<ObjectStorePath>> {
|
||||
async fn flatten_list_stream<T: ObjectStoreApi>(
|
||||
storage: &T,
|
||||
prefix: Option<&T::Path>,
|
||||
) -> Result<Vec<T::Path>> {
|
||||
storage
|
||||
.list(prefix)
|
||||
.await?
|
||||
|
@ -357,7 +519,10 @@ mod tests {
|
|||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn put_get_delete_list(storage: &ObjectStore) -> Result<()> {
|
||||
pub(crate) async fn put_get_delete_list<T: ObjectStoreApi>(storage: &T) -> Result<()>
|
||||
where
|
||||
T::Path: From<DirsAndFileName>,
|
||||
{
|
||||
delete_fixtures(storage).await;
|
||||
|
||||
let content_list = flatten_list_stream(storage, None).await?;
|
||||
|
@ -368,7 +533,7 @@ mod tests {
|
|||
);
|
||||
|
||||
let data = Bytes::from("arbitrary data");
|
||||
let mut location = ObjectStorePath::default();
|
||||
let mut location = storage.new_path();
|
||||
location.push_dir("test_dir");
|
||||
location.set_file_name("test_file.json");
|
||||
|
||||
|
@ -386,13 +551,13 @@ mod tests {
|
|||
assert_eq!(content_list, &[location.clone()]);
|
||||
|
||||
// List everything starting with a prefix that should return results
|
||||
let mut prefix = ObjectStorePath::default();
|
||||
let mut prefix = storage.new_path();
|
||||
prefix.push_dir("test_dir");
|
||||
let content_list = flatten_list_stream(storage, Some(&prefix)).await?;
|
||||
assert_eq!(content_list, &[location.clone()]);
|
||||
|
||||
// List everything starting with a prefix that shouldn't return results
|
||||
let mut prefix = ObjectStorePath::default();
|
||||
let mut prefix = storage.new_path();
|
||||
prefix.push_dir("something");
|
||||
let content_list = flatten_list_stream(storage, Some(&prefix)).await?;
|
||||
assert!(content_list.is_empty());
|
||||
|
@ -413,7 +578,10 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn list_with_delimiter(storage: &ObjectStore) -> Result<()> {
|
||||
pub(crate) async fn list_with_delimiter<T: ObjectStoreApi>(storage: &T) -> Result<()>
|
||||
where
|
||||
T::Path: From<DirsAndFileName>,
|
||||
{
|
||||
delete_fixtures(storage).await;
|
||||
|
||||
let content_list = flatten_list_stream(storage, None).await?;
|
||||
|
@ -422,6 +590,7 @@ mod tests {
|
|||
let data = Bytes::from("arbitrary data");
|
||||
|
||||
let files: Vec<_> = [
|
||||
"test_file",
|
||||
"mydb/wal/000/000/000.segment",
|
||||
"mydb/wal/000/000/001.segment",
|
||||
"mydb/wal/000/000/002.segment",
|
||||
|
@ -430,7 +599,7 @@ mod tests {
|
|||
"mydb/data/whatevs",
|
||||
]
|
||||
.iter()
|
||||
.map(|&s| ObjectStorePath::from_cloud_unchecked(s))
|
||||
.map(|&s| str_to_path(s))
|
||||
.collect();
|
||||
|
||||
let time_before_creation = Utc::now();
|
||||
|
@ -447,7 +616,7 @@ mod tests {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
let mut prefix = ObjectStorePath::default();
|
||||
let mut prefix = storage.new_path();
|
||||
prefix.push_all_dirs(&["mydb", "wal"]);
|
||||
|
||||
let mut expected_000 = prefix.clone();
|
||||
|
@ -469,11 +638,11 @@ mod tests {
|
|||
assert!(object.last_modified > time_before_creation);
|
||||
|
||||
// List with a prefix containing a partial "file name"
|
||||
let mut prefix = ObjectStorePath::default();
|
||||
let mut prefix = storage.new_path();
|
||||
prefix.push_all_dirs(&["mydb", "wal", "000", "000"]);
|
||||
prefix.set_file_name("001");
|
||||
|
||||
let mut expected_location = ObjectStorePath::default();
|
||||
let mut expected_location = storage.new_path();
|
||||
expected_location.push_all_dirs(&["mydb", "wal", "000", "000"]);
|
||||
expected_location.set_file_name("001.segment");
|
||||
|
||||
|
@ -495,12 +664,15 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn get_nonexistent_object(
|
||||
storage: &ObjectStore,
|
||||
location: Option<ObjectStorePath>,
|
||||
pub(crate) async fn get_nonexistent_object<T: ObjectStoreApi>(
|
||||
storage: &T,
|
||||
location: Option<T::Path>,
|
||||
) -> Result<Bytes> {
|
||||
let location = location
|
||||
.unwrap_or_else(|| ObjectStorePath::from_cloud_unchecked("this_file_should_not_exist"));
|
||||
let location = location.unwrap_or_else(|| {
|
||||
let mut loc = storage.new_path();
|
||||
loc.set_file_name("this_file_should_not_exist");
|
||||
loc
|
||||
});
|
||||
|
||||
let content_list = flatten_list_stream(storage, Some(&location)).await?;
|
||||
assert!(content_list.is_empty());
|
||||
|
@ -514,7 +686,24 @@ mod tests {
|
|||
.freeze())
|
||||
}
|
||||
|
||||
async fn delete_fixtures(storage: &ObjectStore) {
|
||||
/// Parse a str as a `CloudPath` into a `DirAndFileName`, even though the
|
||||
/// associated storage might not be cloud storage, to reuse the cloud
|
||||
/// path parsing logic. Then convert into the correct type of path for
|
||||
/// the given storage.
|
||||
fn str_to_path<P>(val: &str) -> P
|
||||
where
|
||||
P: From<DirsAndFileName> + ObjectStorePath,
|
||||
{
|
||||
let cloud_path = CloudPath::raw(val);
|
||||
let parsed: DirsAndFileName = cloud_path.into();
|
||||
|
||||
parsed.into()
|
||||
}
|
||||
|
||||
async fn delete_fixtures<T: ObjectStoreApi>(storage: &T)
|
||||
where
|
||||
T::Path: From<DirsAndFileName>,
|
||||
{
|
||||
let files: Vec<_> = [
|
||||
"test_file",
|
||||
"mydb/wal/000/000/000.segment",
|
||||
|
@ -525,7 +714,7 @@ mod tests {
|
|||
"mydb/data/whatevs",
|
||||
]
|
||||
.iter()
|
||||
.map(|&s| ObjectStorePath::from_cloud_unchecked(s))
|
||||
.map(|&s| str_to_path(s))
|
||||
.collect();
|
||||
|
||||
for f in &files {
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
//! This module contains the IOx implementation for using memory as the object
|
||||
//! store.
|
||||
use crate::{
|
||||
path::{parsed::DirsAndFileName, ObjectStorePath},
|
||||
DataDoesNotMatchLength, ListResult, NoDataInMemory, ObjectMeta, Result,
|
||||
UnableToPutDataInMemory,
|
||||
path::parsed::DirsAndFileName, DataDoesNotMatchLength, ListResult, NoDataInMemory, ObjectMeta,
|
||||
ObjectStoreApi, Result, UnableToPutDataInMemory,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use chrono::Utc;
|
||||
use futures::{Stream, TryStreamExt};
|
||||
use futures::{stream::BoxStream, Stream, StreamExt, TryStreamExt};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use std::collections::BTreeSet;
|
||||
use std::{collections::BTreeMap, io};
|
||||
|
@ -20,24 +20,15 @@ pub struct InMemory {
|
|||
storage: RwLock<BTreeMap<DirsAndFileName, Bytes>>,
|
||||
}
|
||||
|
||||
impl InMemory {
|
||||
/// Create new in-memory storage.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
#[async_trait]
|
||||
impl ObjectStoreApi for InMemory {
|
||||
type Path = DirsAndFileName;
|
||||
|
||||
fn new_path(&self) -> Self::Path {
|
||||
DirsAndFileName::default()
|
||||
}
|
||||
|
||||
/// Creates a clone of the store
|
||||
pub async fn clone(&self) -> Self {
|
||||
let storage = self.storage.read().await;
|
||||
let storage = storage.clone();
|
||||
|
||||
Self {
|
||||
storage: RwLock::new(storage),
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the provided bytes to the specified location.
|
||||
pub async fn put<S>(&self, location: &ObjectStorePath, bytes: S, length: usize) -> Result<()>
|
||||
async fn put<S>(&self, location: &Self::Path, bytes: S, length: usize) -> Result<()>
|
||||
where
|
||||
S: Stream<Item = io::Result<Bytes>> + Send + Sync + 'static,
|
||||
{
|
||||
|
@ -57,70 +48,56 @@ impl InMemory {
|
|||
|
||||
let content = content.freeze();
|
||||
|
||||
self.storage.write().await.insert(location.into(), content);
|
||||
self.storage
|
||||
.write()
|
||||
.await
|
||||
.insert(location.to_owned(), content);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the bytes that are stored at the specified location.
|
||||
pub async fn get(
|
||||
&self,
|
||||
location: &ObjectStorePath,
|
||||
) -> Result<impl Stream<Item = Result<Bytes>>> {
|
||||
let location = location.into();
|
||||
async fn get(&self, location: &Self::Path) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
let data = self
|
||||
.storage
|
||||
.read()
|
||||
.await
|
||||
.get(&location)
|
||||
.get(location)
|
||||
.cloned()
|
||||
.context(NoDataInMemory)?;
|
||||
|
||||
Ok(futures::stream::once(async move { Ok(data) }))
|
||||
Ok(futures::stream::once(async move { Ok(data) }).boxed())
|
||||
}
|
||||
|
||||
/// Delete the object at the specified location.
|
||||
pub async fn delete(&self, location: &ObjectStorePath) -> Result<()> {
|
||||
self.storage.write().await.remove(&location.into());
|
||||
async fn delete(&self, location: &Self::Path) -> Result<()> {
|
||||
self.storage.write().await.remove(&location);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List all the objects with the given prefix.
|
||||
pub async fn list<'a>(
|
||||
async fn list<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a ObjectStorePath>,
|
||||
) -> Result<impl Stream<Item = Result<Vec<ObjectStorePath>>> + 'a> {
|
||||
let prefix = prefix.map(Into::into);
|
||||
|
||||
prefix: Option<&'a Self::Path>,
|
||||
) -> Result<BoxStream<'a, Result<Vec<Self::Path>>>> {
|
||||
let list = if let Some(prefix) = &prefix {
|
||||
self.storage
|
||||
.read()
|
||||
.await
|
||||
.keys()
|
||||
.filter(|k| k.prefix_matches(prefix))
|
||||
.map(Into::into)
|
||||
.cloned()
|
||||
.collect()
|
||||
} else {
|
||||
self.storage.read().await.keys().map(Into::into).collect()
|
||||
self.storage.read().await.keys().cloned().collect()
|
||||
};
|
||||
|
||||
Ok(futures::stream::once(async move { Ok(list) }))
|
||||
Ok(futures::stream::once(async move { Ok(list) }).boxed())
|
||||
}
|
||||
|
||||
/// List objects with the given prefix and a set delimiter of `/`. Returns
|
||||
/// common prefixes (directories) in addition to object metadata. The
|
||||
/// memory implementation returns all results, as opposed to the cloud
|
||||
/// The memory implementation returns all results, as opposed to the cloud
|
||||
/// versions which limit their results to 1k or more because of API
|
||||
/// limitations.
|
||||
pub async fn list_with_delimiter<'a>(
|
||||
&'a self,
|
||||
prefix: &'a ObjectStorePath,
|
||||
_next_token: &Option<String>,
|
||||
) -> Result<ListResult> {
|
||||
async fn list_with_delimiter(&self, prefix: &Self::Path) -> Result<ListResult<Self::Path>> {
|
||||
let mut common_prefixes = BTreeSet::new();
|
||||
let last_modified = Utc::now();
|
||||
|
||||
let prefix: DirsAndFileName = prefix.into();
|
||||
|
||||
// Only objects in this base level should be returned in the
|
||||
// response. Otherwise, we just collect the common prefixes.
|
||||
let mut objects = vec![];
|
||||
|
@ -128,20 +105,20 @@ impl InMemory {
|
|||
.storage
|
||||
.read()
|
||||
.await
|
||||
.range((&prefix)..)
|
||||
.take_while(|(k, _)| k.prefix_matches(&prefix))
|
||||
.range((prefix)..)
|
||||
.take_while(|(k, _)| k.prefix_matches(prefix))
|
||||
{
|
||||
let parts = k
|
||||
.parts_after_prefix(&prefix)
|
||||
.parts_after_prefix(prefix)
|
||||
.expect("must have prefix if in range");
|
||||
|
||||
if parts.len() >= 2 {
|
||||
let mut full_prefix = prefix.clone();
|
||||
let mut full_prefix = prefix.to_owned();
|
||||
full_prefix.push_part_as_dir(&parts[0]);
|
||||
common_prefixes.insert(full_prefix);
|
||||
} else {
|
||||
let object = ObjectMeta {
|
||||
location: k.into(),
|
||||
location: k.to_owned(),
|
||||
last_modified,
|
||||
size: v.len(),
|
||||
};
|
||||
|
@ -151,12 +128,29 @@ impl InMemory {
|
|||
|
||||
Ok(ListResult {
|
||||
objects,
|
||||
common_prefixes: common_prefixes.into_iter().map(Into::into).collect(),
|
||||
common_prefixes: common_prefixes.into_iter().collect(),
|
||||
next_token: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl InMemory {
|
||||
/// Create new in-memory storage.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Creates a clone of the store
|
||||
pub async fn clone(&self) -> Self {
|
||||
let storage = self.storage.read().await;
|
||||
let storage = storage.clone();
|
||||
|
||||
Self {
|
||||
storage: RwLock::new(storage),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -166,13 +160,13 @@ mod tests {
|
|||
|
||||
use crate::{
|
||||
tests::{list_with_delimiter, put_get_delete_list},
|
||||
Error, ObjectStore,
|
||||
Error, ObjectStoreApi, ObjectStorePath,
|
||||
};
|
||||
use futures::stream;
|
||||
|
||||
#[tokio::test]
|
||||
async fn in_memory_test() -> Result<()> {
|
||||
let integration = ObjectStore::new_in_memory(InMemory::new());
|
||||
let integration = InMemory::new();
|
||||
|
||||
put_get_delete_list(&integration).await?;
|
||||
|
||||
|
@ -183,10 +177,11 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn length_mismatch_is_an_error() -> Result<()> {
|
||||
let integration = ObjectStore::new_in_memory(InMemory::new());
|
||||
let integration = InMemory::new();
|
||||
|
||||
let bytes = stream::once(async { Ok(Bytes::from("hello world")) });
|
||||
let location = ObjectStorePath::from_cloud_unchecked("junk");
|
||||
let mut location = integration.new_path();
|
||||
location.set_file_name("junk");
|
||||
let res = integration.put(&location, bytes, 0).await;
|
||||
|
||||
assert!(matches!(
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
//! This module contains code for abstracting object locations that work
|
||||
//! across different backing implementations and platforms.
|
||||
|
||||
use std::{mem, path::PathBuf};
|
||||
|
||||
/// Paths that came from or are to be used in cloud-based object storage
|
||||
pub mod cloud;
|
||||
use cloud::CloudPath;
|
||||
|
||||
/// Paths that come from or are to be used in file-based object storage
|
||||
pub mod file;
|
||||
use file::FilePath;
|
||||
|
||||
/// Maximally processed storage-independent paths.
|
||||
pub mod parsed;
|
||||
|
@ -16,507 +16,88 @@ use parsed::DirsAndFileName;
|
|||
mod parts;
|
||||
use parts::PathPart;
|
||||
|
||||
/// The delimiter to separate object namespaces, creating a directory structure.
|
||||
pub const DELIMITER: &str = "/";
|
||||
|
||||
/// Universal interface for handling paths and locations for objects and
|
||||
/// directories in the object store.
|
||||
///
|
||||
/// It allows IOx to be completely decoupled from the underlying object store
|
||||
/// implementations.
|
||||
///
|
||||
/// Deliberately does not implement `Display` or `ToString`! Use one of the
|
||||
/// converters.
|
||||
#[derive(Default, Clone, PartialEq, Eq, Debug)]
|
||||
pub struct ObjectStorePath {
|
||||
inner: PathRepresentation,
|
||||
}
|
||||
|
||||
impl ObjectStorePath {
|
||||
/// For use when receiving a path from an object store API directly, not
|
||||
/// when building a path. Assumes DELIMITER is the separator.
|
||||
///
|
||||
/// TODO: This should only be available to cloud storage
|
||||
pub fn from_cloud_unchecked(path: impl Into<String>) -> Self {
|
||||
let path = path.into();
|
||||
Self {
|
||||
inner: PathRepresentation::RawCloud(path),
|
||||
}
|
||||
}
|
||||
|
||||
/// For use when receiving a path from a filesystem directly, not
|
||||
/// when building a path. Uses the standard library's path splitting
|
||||
/// implementation to separate into parts.
|
||||
///
|
||||
/// TODO: This should only be available to file storage
|
||||
pub fn from_path_buf_unchecked(path: impl Into<PathBuf>) -> Self {
|
||||
let path = path.into();
|
||||
Self {
|
||||
inner: PathRepresentation::RawPathBuf(path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a part to the end of the path, encoding any restricted characters.
|
||||
pub fn push_dir(&mut self, part: impl Into<String>) {
|
||||
self.inner = mem::take(&mut self.inner).push_dir(part);
|
||||
}
|
||||
|
||||
/// Add a `PathPart` to the end of the path.
|
||||
pub fn push_part_as_dir(&mut self, part: &PathPart) {
|
||||
self.inner = mem::take(&mut self.inner).push_part_as_dir(part);
|
||||
}
|
||||
|
||||
/// Deliberately does not implement `Display` or `ToString`!
|
||||
pub trait ObjectStorePath:
|
||||
std::fmt::Debug + Clone + PartialEq + Eq + Send + Sync + 'static
|
||||
{
|
||||
/// Set the file name of this path
|
||||
pub fn set_file_name(&mut self, part: impl Into<String>) {
|
||||
self.inner = mem::take(&mut self.inner).set_file_name(part);
|
||||
}
|
||||
fn set_file_name(&mut self, part: impl Into<String>);
|
||||
|
||||
/// Add the parts of `ObjectStorePath` to the end of the path. Notably does
|
||||
/// *not* behave as `PathBuf::push` does: there is no way to replace the
|
||||
/// root. If `self` has a file name, that will be removed, then the
|
||||
/// directories of `path` will be appended, then any file name of `path`
|
||||
/// will be assigned to `self`.
|
||||
pub fn push_path(&mut self, path: &Self) {
|
||||
self.inner = mem::take(&mut self.inner).push_path(path)
|
||||
}
|
||||
|
||||
/// Push a bunch of parts as directories in one go.
|
||||
pub fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) {
|
||||
self.inner = mem::take(&mut self.inner).push_all_dirs(parts);
|
||||
}
|
||||
|
||||
/// Pops a part from the path and returns it, or `None` if it's empty.
|
||||
pub fn pop(&mut self) -> Option<&PathPart> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Returns true if the directories in `prefix` are the same as the starting
|
||||
/// directories of `self`.
|
||||
pub fn prefix_matches(&self, prefix: &Self) -> bool {
|
||||
use PathRepresentation::*;
|
||||
match (&self.inner, &prefix.inner) {
|
||||
(Parts(self_parts), Parts(other_parts)) => self_parts.prefix_matches(&other_parts),
|
||||
(Parts(self_parts), _) => {
|
||||
let prefix_parts: DirsAndFileName = prefix.into();
|
||||
self_parts.prefix_matches(&prefix_parts)
|
||||
}
|
||||
(_, Parts(prefix_parts)) => {
|
||||
let self_parts: DirsAndFileName = self.into();
|
||||
self_parts.prefix_matches(&prefix_parts)
|
||||
}
|
||||
_ => {
|
||||
let self_parts: DirsAndFileName = self.into();
|
||||
let prefix_parts: DirsAndFileName = prefix.into();
|
||||
self_parts.prefix_matches(&prefix_parts)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&'_ DirsAndFileName> for ObjectStorePath {
|
||||
fn from(other: &'_ DirsAndFileName) -> Self {
|
||||
other.clone().into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DirsAndFileName> for ObjectStorePath {
|
||||
fn from(other: DirsAndFileName) -> Self {
|
||||
Self {
|
||||
inner: PathRepresentation::Parts(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Eq, Debug)]
|
||||
enum PathRepresentation {
|
||||
RawCloud(String),
|
||||
RawPathBuf(PathBuf),
|
||||
Parts(DirsAndFileName),
|
||||
}
|
||||
|
||||
impl Default for PathRepresentation {
|
||||
fn default() -> Self {
|
||||
Self::Parts(DirsAndFileName::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl PathRepresentation {
|
||||
/// Add a part to the end of the path's directories, encoding any restricted
|
||||
/// characters.
|
||||
fn push_dir(self, part: impl Into<String>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.push_dir(part);
|
||||
Self::Parts(dirs_and_file_name)
|
||||
}
|
||||
fn push_dir(&mut self, part: impl Into<String>);
|
||||
|
||||
/// Push a bunch of parts as directories in one go.
|
||||
fn push_all_dirs<'a>(self, parts: impl AsRef<[&'a str]>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>);
|
||||
|
||||
dirs_and_file_name.push_all_dirs(parts);
|
||||
|
||||
Self::Parts(dirs_and_file_name)
|
||||
}
|
||||
|
||||
/// Add a `PathPart` to the end of the path's directories.
|
||||
fn push_part_as_dir(self, part: &PathPart) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.push_part_as_dir(part);
|
||||
Self::Parts(dirs_and_file_name)
|
||||
}
|
||||
|
||||
/// Add the parts of `ObjectStorePath` to the end of the path. Notably does
|
||||
/// *not* behave as `PathBuf::push` does: there is no way to replace the
|
||||
/// root. If `self` has a file name, that will be removed, then the
|
||||
/// directories of `path` will be appended, then any file name of `path`
|
||||
/// will be assigned to `self`.
|
||||
fn push_path(self, path: &ObjectStorePath) -> Self {
|
||||
let DirsAndFileName {
|
||||
directories: path_dirs,
|
||||
file_name: path_file_name,
|
||||
} = path.inner.to_owned().into();
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.directories.extend(path_dirs);
|
||||
dirs_and_file_name.file_name = path_file_name;
|
||||
|
||||
Self::Parts(dirs_and_file_name)
|
||||
}
|
||||
|
||||
/// Set the file name of this path
|
||||
fn set_file_name(self, part: impl Into<String>) -> Self {
|
||||
let part = part.into();
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.file_name = Some((&*part).into());
|
||||
Self::Parts(dirs_and_file_name)
|
||||
}
|
||||
/// Like `std::path::Path::display, converts an `ObjectStorePath` to a
|
||||
/// `String` suitable for printing; not suitable for sending to
|
||||
/// APIs.
|
||||
fn display(&self) -> String;
|
||||
}
|
||||
|
||||
impl PartialEq for PathRepresentation {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
use PathRepresentation::*;
|
||||
match (self, other) {
|
||||
(Parts(self_parts), Parts(other_parts)) => self_parts == other_parts,
|
||||
(Parts(self_parts), _) => {
|
||||
let other_parts: DirsAndFileName = other.to_owned().into();
|
||||
*self_parts == other_parts
|
||||
}
|
||||
(_, Parts(other_parts)) => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
self_parts == *other_parts
|
||||
}
|
||||
_ => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
let other_parts: DirsAndFileName = other.to_owned().into();
|
||||
self_parts == other_parts
|
||||
}
|
||||
/// Defines which object stores use which path logic.
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
pub enum Path {
|
||||
/// Amazon storage
|
||||
AmazonS3(CloudPath),
|
||||
/// Local file system storage
|
||||
File(FilePath),
|
||||
/// GCP storage
|
||||
GoogleCloudStorage(CloudPath),
|
||||
/// In memory storage for testing
|
||||
InMemory(DirsAndFileName),
|
||||
/// Microsoft Azure Blob storage
|
||||
MicrosoftAzure(CloudPath),
|
||||
}
|
||||
|
||||
impl ObjectStorePath for Path {
|
||||
fn set_file_name(&mut self, part: impl Into<String>) {
|
||||
match self {
|
||||
Self::AmazonS3(path) => path.set_file_name(part),
|
||||
Self::File(path) => path.set_file_name(part),
|
||||
Self::GoogleCloudStorage(path) => path.set_file_name(part),
|
||||
Self::InMemory(path) => path.set_file_name(part),
|
||||
Self::MicrosoftAzure(path) => path.set_file_name(part),
|
||||
}
|
||||
}
|
||||
|
||||
fn push_dir(&mut self, part: impl Into<String>) {
|
||||
match self {
|
||||
Self::AmazonS3(path) => path.push_dir(part),
|
||||
Self::File(path) => path.push_dir(part),
|
||||
Self::GoogleCloudStorage(path) => path.push_dir(part),
|
||||
Self::InMemory(path) => path.push_dir(part),
|
||||
Self::MicrosoftAzure(path) => path.push_dir(part),
|
||||
}
|
||||
}
|
||||
|
||||
fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) {
|
||||
match self {
|
||||
Self::AmazonS3(path) => path.push_all_dirs(parts),
|
||||
Self::File(path) => path.push_all_dirs(parts),
|
||||
Self::GoogleCloudStorage(path) => path.push_all_dirs(parts),
|
||||
Self::InMemory(path) => path.push_all_dirs(parts),
|
||||
Self::MicrosoftAzure(path) => path.push_all_dirs(parts),
|
||||
}
|
||||
}
|
||||
|
||||
fn display(&self) -> String {
|
||||
match self {
|
||||
Self::AmazonS3(path) => path.display(),
|
||||
Self::File(path) => path.display(),
|
||||
Self::GoogleCloudStorage(path) => path.display(),
|
||||
Self::InMemory(path) => path.display(),
|
||||
Self::MicrosoftAzure(path) => path.display(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The delimiter to separate object namespaces, creating a directory structure.
|
||||
pub const DELIMITER: &str = "/";
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Invariants to maintain/document/test:
|
||||
//
|
||||
// - always ends in DELIMITER if it's a directory. If it's the end object, it
|
||||
// should have some sort of file extension like .parquet, .json, or .segment
|
||||
// - does not contain unencoded DELIMITER
|
||||
// - for file paths: does not escape root dir
|
||||
// - for object storage: looks like directories
|
||||
// - Paths that come from object stores directly don't need to be
|
||||
// parsed/validated
|
||||
// - Within a process, the same backing store will always be used
|
||||
//
|
||||
|
||||
#[test]
|
||||
fn prefix_matches() {
|
||||
let mut haystack = ObjectStorePath::default();
|
||||
haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]);
|
||||
|
||||
// self starts with self
|
||||
assert!(
|
||||
haystack.prefix_matches(&haystack),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
haystack
|
||||
);
|
||||
|
||||
// a longer prefix doesn't match
|
||||
let mut needle = haystack.clone();
|
||||
needle.push_dir("longer now");
|
||||
assert!(
|
||||
!haystack.prefix_matches(&needle),
|
||||
"{:?} shouldn't have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// one dir prefix matches
|
||||
let mut needle = ObjectStorePath::default();
|
||||
needle.push_dir("foo/bar");
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// two dir prefix matches
|
||||
needle.push_dir("baz%2Ftest");
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// partial dir prefix matches
|
||||
let mut needle = ObjectStorePath::default();
|
||||
needle.push_dir("f");
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// one dir and one partial dir matches
|
||||
let mut needle = ObjectStorePath::default();
|
||||
needle.push_all_dirs(&["foo/bar", "baz"]);
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefix_matches_with_file_name() {
|
||||
let mut haystack = ObjectStorePath::default();
|
||||
haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]);
|
||||
|
||||
let mut needle = haystack.clone();
|
||||
|
||||
// All directories match and file name is a prefix
|
||||
haystack.set_file_name("foo.segment");
|
||||
needle.set_file_name("foo");
|
||||
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// All directories match but file name is not a prefix
|
||||
needle.set_file_name("e");
|
||||
|
||||
assert!(
|
||||
!haystack.prefix_matches(&needle),
|
||||
"{:?} should not have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// Not all directories match; file name is a prefix of the next directory; this
|
||||
// matches
|
||||
let mut needle = ObjectStorePath::default();
|
||||
needle.push_all_dirs(&["foo/bar", "baz%2Ftest"]);
|
||||
needle.set_file_name("s");
|
||||
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// Not all directories match; file name is NOT a prefix of the next directory;
|
||||
// no match
|
||||
needle.set_file_name("p");
|
||||
|
||||
assert!(
|
||||
!haystack.prefix_matches(&needle),
|
||||
"{:?} should not have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn convert_raw_before_partial_eq() {
|
||||
// dir and file_name
|
||||
let cloud = ObjectStorePath::from_cloud_unchecked("test_dir/test_file.json");
|
||||
let mut built = ObjectStorePath::default();
|
||||
built.push_dir("test_dir");
|
||||
built.set_file_name("test_file.json");
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
|
||||
// dir, no file_name
|
||||
let cloud = ObjectStorePath::from_cloud_unchecked("test_dir");
|
||||
let mut built = ObjectStorePath::default();
|
||||
built.push_dir("test_dir");
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
|
||||
// file_name, no dir
|
||||
let cloud = ObjectStorePath::from_cloud_unchecked("test_file.json");
|
||||
let mut built = ObjectStorePath::default();
|
||||
built.set_file_name("test_file.json");
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
|
||||
// empty
|
||||
let cloud = ObjectStorePath::from_cloud_unchecked("");
|
||||
let built = ObjectStorePath::default();
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_rep_conversions() {
|
||||
// dir and file name
|
||||
let cloud = PathRepresentation::RawCloud("foo/bar/blah.json".into());
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
let path_buf = PathRepresentation::RawPathBuf("foo/bar/blah.json".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
|
||||
let mut expected_parts = DirsAndFileName::default();
|
||||
expected_parts.push_dir("foo");
|
||||
expected_parts.push_dir("bar");
|
||||
expected_parts.file_name = Some("blah.json".into());
|
||||
|
||||
assert_eq!(cloud_parts, expected_parts);
|
||||
assert_eq!(path_buf_parts, expected_parts);
|
||||
|
||||
// dir, no file name
|
||||
let cloud = PathRepresentation::RawCloud("foo/bar".into());
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
let path_buf = PathRepresentation::RawPathBuf("foo/bar".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
|
||||
expected_parts.file_name = None;
|
||||
|
||||
assert_eq!(cloud_parts, expected_parts);
|
||||
assert_eq!(path_buf_parts, expected_parts);
|
||||
|
||||
// no dir, file name
|
||||
let cloud = PathRepresentation::RawCloud("blah.json".into());
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
let path_buf = PathRepresentation::RawPathBuf("blah.json".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
|
||||
assert!(cloud_parts.directories.is_empty());
|
||||
assert_eq!(cloud_parts.file_name.unwrap().encoded(), "blah.json");
|
||||
|
||||
assert!(path_buf_parts.directories.is_empty());
|
||||
assert_eq!(path_buf_parts.file_name.unwrap().encoded(), "blah.json");
|
||||
|
||||
// empty
|
||||
let cloud = PathRepresentation::RawCloud("".into());
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
let path_buf = PathRepresentation::RawPathBuf("".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
|
||||
assert!(cloud_parts.directories.is_empty());
|
||||
assert!(cloud_parts.file_name.is_none());
|
||||
|
||||
assert!(path_buf_parts.directories.is_empty());
|
||||
assert!(path_buf_parts.file_name.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_buf_to_dirs_and_file_name_conversion() {
|
||||
// Last section ending in `.json` is a file name
|
||||
let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.json".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
assert_eq!(path_buf_parts.directories.len(), 3);
|
||||
assert_eq!(path_buf_parts.file_name.unwrap().0, "blah.json");
|
||||
|
||||
// Last section ending in `.segment` is a file name
|
||||
let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.segment".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
assert_eq!(path_buf_parts.directories.len(), 3);
|
||||
assert_eq!(path_buf_parts.file_name.unwrap().0, "blah.segment");
|
||||
|
||||
// Last section ending in `.parquet` is a file name
|
||||
let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.parquet".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
assert_eq!(path_buf_parts.directories.len(), 3);
|
||||
assert_eq!(path_buf_parts.file_name.unwrap().0, "blah.parquet");
|
||||
|
||||
// Last section ending in `.txt` is NOT a file name; we don't recognize that
|
||||
// extension
|
||||
let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.txt".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
assert_eq!(path_buf_parts.directories.len(), 4);
|
||||
assert!(path_buf_parts.file_name.is_none());
|
||||
|
||||
// Last section containing a `.` isn't a file name
|
||||
let path_buf = PathRepresentation::RawPathBuf("/one/two/blah.blah".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
assert_eq!(path_buf_parts.directories.len(), 4);
|
||||
assert!(path_buf_parts.file_name.is_none());
|
||||
|
||||
// Last section starting with a `.` isn't a file name (macos temp dirs do this)
|
||||
let path_buf = PathRepresentation::RawPathBuf("/one/two/.blah".into());
|
||||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
assert_eq!(path_buf_parts.directories.len(), 4);
|
||||
assert!(path_buf_parts.file_name.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parts_after_prefix_behavior() {
|
||||
let mut existing_path = DirsAndFileName::default();
|
||||
existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]);
|
||||
existing_path.file_name = Some("egg.json".into());
|
||||
|
||||
// Prefix with one directory
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_dir("apple");
|
||||
let expected_parts: Vec<PathPart> = vec!["bear", "cow", "dog", "egg.json"]
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect();
|
||||
let parts = existing_path.parts_after_prefix(&prefix).unwrap();
|
||||
assert_eq!(parts, expected_parts);
|
||||
|
||||
// Prefix with two directories
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_all_dirs(&["apple", "bear"]);
|
||||
let expected_parts: Vec<PathPart> = vec!["cow", "dog", "egg.json"]
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect();
|
||||
let parts = existing_path.parts_after_prefix(&prefix).unwrap();
|
||||
assert_eq!(parts, expected_parts);
|
||||
|
||||
// Not a prefix
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_dir("cow");
|
||||
assert!(existing_path.parts_after_prefix(&prefix).is_none());
|
||||
|
||||
// Prefix with a partial directory
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_dir("ap");
|
||||
assert!(existing_path.parts_after_prefix(&prefix).is_none());
|
||||
|
||||
// Prefix matches but there aren't any parts after it
|
||||
let mut existing_path = DirsAndFileName::default();
|
||||
existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]);
|
||||
let prefix = existing_path.clone();
|
||||
let parts = existing_path.parts_after_prefix(&prefix).unwrap();
|
||||
assert!(parts.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,22 +1,53 @@
|
|||
use super::{ObjectStorePath, PathPart, PathRepresentation, DELIMITER};
|
||||
use super::{DirsAndFileName, ObjectStorePath, PathPart, DELIMITER};
|
||||
|
||||
use std::mem;
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
/// Converts `ObjectStorePath`s to `String`s that are appropriate for use as
|
||||
/// locations in cloud storage.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct CloudConverter {}
|
||||
/// An object storage location suitable for passing to cloud storage APIs such
|
||||
/// as AWS, GCS, and Azure.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct CloudPath {
|
||||
inner: CloudPathRepresentation,
|
||||
}
|
||||
|
||||
impl CloudConverter {
|
||||
/// Creates a cloud storage location by joining this `ObjectStorePath`'s
|
||||
impl ObjectStorePath for CloudPath {
|
||||
fn set_file_name(&mut self, part: impl Into<String>) {
|
||||
self.inner = mem::take(&mut self.inner).set_file_name(part);
|
||||
}
|
||||
|
||||
fn push_dir(&mut self, part: impl Into<String>) {
|
||||
self.inner = mem::take(&mut self.inner).push_dir(part);
|
||||
}
|
||||
|
||||
fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) {
|
||||
self.inner = mem::take(&mut self.inner).push_all_dirs(parts);
|
||||
}
|
||||
|
||||
fn display(&self) -> String {
|
||||
self.to_raw()
|
||||
}
|
||||
}
|
||||
|
||||
impl CloudPath {
|
||||
/// Creates a cloud storage location from a string received from a cloud
|
||||
/// storage API without parsing or allocating unless other methods are
|
||||
/// called on this instance that need it
|
||||
pub fn raw(path: impl Into<String>) -> Self {
|
||||
let path = path.into();
|
||||
Self {
|
||||
inner: CloudPathRepresentation::Raw(path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a cloud storage location by joining this `CloudPath`'s
|
||||
/// parts with `DELIMITER`
|
||||
pub fn convert(object_store_path: &ObjectStorePath) -> String {
|
||||
match &object_store_path.inner {
|
||||
PathRepresentation::RawCloud(path) => path.to_owned(),
|
||||
PathRepresentation::RawPathBuf(_path) => {
|
||||
todo!("convert");
|
||||
}
|
||||
PathRepresentation::Parts(dirs_and_file_name) => {
|
||||
pub fn to_raw(&self) -> String {
|
||||
use CloudPathRepresentation::*;
|
||||
|
||||
match &self.inner {
|
||||
Raw(path) => path.to_owned(),
|
||||
Parsed(dirs_and_file_name) => {
|
||||
let mut path = dirs_and_file_name
|
||||
.directories
|
||||
.iter()
|
||||
|
@ -35,6 +66,105 @@ impl CloudConverter {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<CloudPath> for DirsAndFileName {
|
||||
fn from(cloud_path: CloudPath) -> Self {
|
||||
cloud_path.inner.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DirsAndFileName> for CloudPath {
|
||||
fn from(dirs_and_file_name: DirsAndFileName) -> Self {
|
||||
Self {
|
||||
inner: CloudPathRepresentation::Parsed(dirs_and_file_name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq)]
|
||||
enum CloudPathRepresentation {
|
||||
Raw(String),
|
||||
Parsed(DirsAndFileName),
|
||||
}
|
||||
|
||||
impl Default for CloudPathRepresentation {
|
||||
fn default() -> Self {
|
||||
Self::Parsed(DirsAndFileName::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for CloudPathRepresentation {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
use CloudPathRepresentation::*;
|
||||
match (self, other) {
|
||||
(Parsed(self_parts), Parsed(other_parts)) => self_parts == other_parts,
|
||||
(Parsed(self_parts), _) => {
|
||||
let other_parts: DirsAndFileName = other.to_owned().into();
|
||||
*self_parts == other_parts
|
||||
}
|
||||
(_, Parsed(other_parts)) => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
self_parts == *other_parts
|
||||
}
|
||||
_ => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
let other_parts: DirsAndFileName = other.to_owned().into();
|
||||
self_parts == other_parts
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CloudPathRepresentation {
|
||||
fn push_dir(self, part: impl Into<String>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.push_dir(part);
|
||||
Self::Parsed(dirs_and_file_name)
|
||||
}
|
||||
|
||||
fn push_all_dirs<'a>(self, parts: impl AsRef<[&'a str]>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.push_all_dirs(parts);
|
||||
Self::Parsed(dirs_and_file_name)
|
||||
}
|
||||
|
||||
fn set_file_name(self, part: impl Into<String>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.set_file_name(part);
|
||||
Self::Parsed(dirs_and_file_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CloudPathRepresentation> for DirsAndFileName {
|
||||
fn from(cloud_path_rep: CloudPathRepresentation) -> Self {
|
||||
use CloudPathRepresentation::*;
|
||||
|
||||
match cloud_path_rep {
|
||||
Raw(path) => {
|
||||
let mut parts: Vec<PathPart> = path
|
||||
.split_terminator(DELIMITER)
|
||||
.map(|s| PathPart(s.to_string()))
|
||||
.collect();
|
||||
let maybe_file_name = match parts.pop() {
|
||||
Some(file) if file.encoded().contains('.') => Some(file),
|
||||
Some(dir) => {
|
||||
parts.push(dir);
|
||||
None
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
Self {
|
||||
directories: parts,
|
||||
file_name: maybe_file_name,
|
||||
}
|
||||
}
|
||||
Parsed(dirs_and_file_name) => dirs_and_file_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -44,10 +174,10 @@ mod tests {
|
|||
// Use case: a file named `test_file.json` exists in object storage and it
|
||||
// should be returned for a search on prefix `test`, so the prefix path
|
||||
// should not get a trailing delimiter automatically added
|
||||
let mut prefix = ObjectStorePath::default();
|
||||
let mut prefix = CloudPath::default();
|
||||
prefix.set_file_name("test");
|
||||
|
||||
let converted = CloudConverter::convert(&prefix);
|
||||
let converted = prefix.to_raw();
|
||||
assert_eq!(converted, "test");
|
||||
}
|
||||
|
||||
|
@ -56,29 +186,96 @@ mod tests {
|
|||
// Use case: files exist in object storage named `foo/bar.json` and
|
||||
// `foo_test.json`. A search for the prefix `foo/` should return
|
||||
// `foo/bar.json` but not `foo_test.json'.
|
||||
let mut prefix = ObjectStorePath::default();
|
||||
let mut prefix = CloudPath::default();
|
||||
prefix.push_dir("test");
|
||||
|
||||
let converted = CloudConverter::convert(&prefix);
|
||||
let converted = prefix.to_raw();
|
||||
assert_eq!(converted, "test/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_encodes() {
|
||||
let mut location = ObjectStorePath::default();
|
||||
let mut location = CloudPath::default();
|
||||
location.push_dir("foo/bar");
|
||||
location.push_dir("baz%2Ftest");
|
||||
|
||||
let converted = CloudConverter::convert(&location);
|
||||
let converted = location.to_raw();
|
||||
assert_eq!(converted, "foo%2Fbar/baz%252Ftest/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_all_encodes() {
|
||||
let mut location = ObjectStorePath::default();
|
||||
let mut location = CloudPath::default();
|
||||
location.push_all_dirs(&["foo/bar", "baz%2Ftest"]);
|
||||
|
||||
let converted = CloudConverter::convert(&location);
|
||||
let converted = location.to_raw();
|
||||
assert_eq!(converted, "foo%2Fbar/baz%252Ftest/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn convert_raw_before_partial_eq() {
|
||||
// dir and file_name
|
||||
let cloud = CloudPath::raw("test_dir/test_file.json");
|
||||
let mut built = CloudPath::default();
|
||||
built.push_dir("test_dir");
|
||||
built.set_file_name("test_file.json");
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
|
||||
// dir, no file_name
|
||||
let cloud = CloudPath::raw("test_dir");
|
||||
let mut built = CloudPath::default();
|
||||
built.push_dir("test_dir");
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
|
||||
// file_name, no dir
|
||||
let cloud = CloudPath::raw("test_file.json");
|
||||
let mut built = CloudPath::default();
|
||||
built.set_file_name("test_file.json");
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
|
||||
// empty
|
||||
let cloud = CloudPath::raw("");
|
||||
let built = CloudPath::default();
|
||||
|
||||
assert_eq!(built, cloud);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conversions() {
|
||||
// dir and file name
|
||||
let cloud = CloudPath::raw("foo/bar/blah.json");
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
let mut expected_parts = DirsAndFileName::default();
|
||||
expected_parts.push_dir("foo");
|
||||
expected_parts.push_dir("bar");
|
||||
expected_parts.file_name = Some("blah.json".into());
|
||||
|
||||
assert_eq!(cloud_parts, expected_parts);
|
||||
|
||||
// dir, no file name
|
||||
let cloud = CloudPath::raw("foo/bar");
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
expected_parts.file_name = None;
|
||||
|
||||
assert_eq!(cloud_parts, expected_parts);
|
||||
|
||||
// no dir, file name
|
||||
let cloud = CloudPath::raw("blah.json");
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
assert!(cloud_parts.directories.is_empty());
|
||||
assert_eq!(cloud_parts.file_name.unwrap().encoded(), "blah.json");
|
||||
|
||||
// empty
|
||||
let cloud = CloudPath::raw("");
|
||||
let cloud_parts: DirsAndFileName = cloud.into();
|
||||
|
||||
assert!(cloud_parts.directories.is_empty());
|
||||
assert!(cloud_parts.file_name.is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,23 +1,52 @@
|
|||
use super::{ObjectStorePath, PathPart, PathRepresentation};
|
||||
use super::{DirsAndFileName, ObjectStorePath, PathPart};
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::{mem, path::PathBuf};
|
||||
|
||||
/// Converts `ObjectStorePath`s to `String`s that are appropriate for use as
|
||||
/// locations in filesystem storage.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct FileConverter {}
|
||||
/// An object storage location suitable for passing to disk based object
|
||||
/// storage.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct FilePath {
|
||||
inner: FilePathRepresentation,
|
||||
}
|
||||
|
||||
impl ObjectStorePath for FilePath {
|
||||
fn set_file_name(&mut self, part: impl Into<String>) {
|
||||
self.inner = mem::take(&mut self.inner).set_file_name(part);
|
||||
}
|
||||
|
||||
fn push_dir(&mut self, part: impl Into<String>) {
|
||||
self.inner = mem::take(&mut self.inner).push_dir(part);
|
||||
}
|
||||
|
||||
fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) {
|
||||
self.inner = mem::take(&mut self.inner).push_all_dirs(parts);
|
||||
}
|
||||
|
||||
fn display(&self) -> String {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl FilePath {
|
||||
/// Creates a file storage location from a `PathBuf` without parsing or
|
||||
/// allocating unless other methods are called on this instance that
|
||||
/// need it
|
||||
pub fn raw(path: impl Into<PathBuf>) -> Self {
|
||||
let path = path.into();
|
||||
Self {
|
||||
inner: FilePathRepresentation::Raw(path),
|
||||
}
|
||||
}
|
||||
|
||||
impl FileConverter {
|
||||
/// Creates a filesystem `PathBuf` location by using the standard library's
|
||||
/// `PathBuf` building implementation appropriate for the current
|
||||
/// platform.
|
||||
pub fn convert(object_store_path: &ObjectStorePath) -> PathBuf {
|
||||
match &object_store_path.inner {
|
||||
PathRepresentation::RawCloud(_path) => {
|
||||
todo!("convert");
|
||||
}
|
||||
PathRepresentation::RawPathBuf(path) => path.to_owned(),
|
||||
PathRepresentation::Parts(dirs_and_file_name) => {
|
||||
pub fn to_raw(&self) -> PathBuf {
|
||||
use FilePathRepresentation::*;
|
||||
|
||||
match &self.inner {
|
||||
Raw(path) => path.to_owned(),
|
||||
Parsed(dirs_and_file_name) => {
|
||||
let mut path: PathBuf = dirs_and_file_name
|
||||
.directories
|
||||
.iter()
|
||||
|
@ -30,4 +59,254 @@ impl FileConverter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add the parts of `path` to the end of this path. Notably does
|
||||
/// *not* behave as `PathBuf::push` does: there is no way to replace the
|
||||
/// root. If `self` has a file name, that will be removed, then the
|
||||
/// directories of `path` will be appended, then any file name of `path`
|
||||
/// will be assigned to `self`.
|
||||
pub fn push_path(&mut self, path: &Self) {
|
||||
self.inner = mem::take(&mut self.inner).push_path(path)
|
||||
}
|
||||
|
||||
/// Whether the prefix is the start of this path or not.
|
||||
pub fn prefix_matches(&self, prefix: &Self) -> bool {
|
||||
self.inner.prefix_matches(&prefix.inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FilePath> for DirsAndFileName {
|
||||
fn from(file_path: FilePath) -> Self {
|
||||
file_path.inner.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DirsAndFileName> for FilePath {
|
||||
fn from(dirs_and_file_name: DirsAndFileName) -> Self {
|
||||
Self {
|
||||
inner: FilePathRepresentation::Parsed(dirs_and_file_name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq)]
|
||||
enum FilePathRepresentation {
|
||||
Raw(PathBuf),
|
||||
Parsed(DirsAndFileName),
|
||||
}
|
||||
|
||||
impl Default for FilePathRepresentation {
|
||||
fn default() -> Self {
|
||||
Self::Parsed(DirsAndFileName::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for FilePathRepresentation {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
use FilePathRepresentation::*;
|
||||
match (self, other) {
|
||||
(Parsed(self_parts), Parsed(other_parts)) => self_parts == other_parts,
|
||||
(Parsed(self_parts), _) => {
|
||||
let other_parts: DirsAndFileName = other.to_owned().into();
|
||||
*self_parts == other_parts
|
||||
}
|
||||
(_, Parsed(other_parts)) => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
self_parts == *other_parts
|
||||
}
|
||||
_ => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
let other_parts: DirsAndFileName = other.to_owned().into();
|
||||
self_parts == other_parts
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FilePathRepresentation {
|
||||
fn push_dir(self, part: impl Into<String>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.push_dir(part);
|
||||
Self::Parsed(dirs_and_file_name)
|
||||
}
|
||||
|
||||
fn push_all_dirs<'a>(self, parts: impl AsRef<[&'a str]>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.push_all_dirs(parts);
|
||||
Self::Parsed(dirs_and_file_name)
|
||||
}
|
||||
|
||||
fn set_file_name(self, part: impl Into<String>) -> Self {
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.set_file_name(part);
|
||||
Self::Parsed(dirs_and_file_name)
|
||||
}
|
||||
|
||||
/// Add the parts of `path` to the end of this path. Notably does
|
||||
/// *not* behave as `PathBuf::push` does: there is no way to replace the
|
||||
/// root. If `self` has a file name, that will be removed, then the
|
||||
/// directories of `path` will be appended, then any file name of `path`
|
||||
/// will be assigned to `self`.
|
||||
fn push_path(self, path: &FilePath) -> Self {
|
||||
let DirsAndFileName {
|
||||
directories: path_dirs,
|
||||
file_name: path_file_name,
|
||||
} = path.inner.to_owned().into();
|
||||
let mut dirs_and_file_name: DirsAndFileName = self.into();
|
||||
|
||||
dirs_and_file_name.directories.extend(path_dirs);
|
||||
dirs_and_file_name.file_name = path_file_name;
|
||||
|
||||
Self::Parsed(dirs_and_file_name)
|
||||
}
|
||||
|
||||
fn prefix_matches(&self, prefix: &Self) -> bool {
|
||||
use FilePathRepresentation::*;
|
||||
match (self, prefix) {
|
||||
(Parsed(self_parts), Parsed(prefix_parts)) => self_parts.prefix_matches(prefix_parts),
|
||||
(Parsed(self_parts), _) => {
|
||||
let prefix_parts: DirsAndFileName = prefix.to_owned().into();
|
||||
self_parts.prefix_matches(&prefix_parts)
|
||||
}
|
||||
(_, Parsed(prefix_parts)) => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
self_parts.prefix_matches(prefix_parts)
|
||||
}
|
||||
_ => {
|
||||
let self_parts: DirsAndFileName = self.to_owned().into();
|
||||
let prefix_parts: DirsAndFileName = prefix.to_owned().into();
|
||||
self_parts.prefix_matches(&prefix_parts)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FilePathRepresentation> for DirsAndFileName {
|
||||
fn from(file_path_rep: FilePathRepresentation) -> Self {
|
||||
use FilePathRepresentation::*;
|
||||
|
||||
match file_path_rep {
|
||||
Raw(path) => {
|
||||
let mut parts: Vec<PathPart> = path
|
||||
.iter()
|
||||
.flat_map(|s| s.to_os_string().into_string().map(PathPart))
|
||||
.collect();
|
||||
|
||||
let maybe_file_name = match parts.pop() {
|
||||
Some(file)
|
||||
if !file.encoded().starts_with('.')
|
||||
&& (file.encoded().ends_with(".json")
|
||||
|| file.encoded().ends_with(".parquet")
|
||||
|| file.encoded().ends_with(".segment")) =>
|
||||
{
|
||||
Some(file)
|
||||
}
|
||||
Some(dir) => {
|
||||
parts.push(dir);
|
||||
None
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
Self {
|
||||
directories: parts,
|
||||
file_name: maybe_file_name,
|
||||
}
|
||||
}
|
||||
Parsed(dirs_and_file_name) => dirs_and_file_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn path_buf_to_dirs_and_file_name_conversion() {
|
||||
// Last section ending in `.json` is a file name
|
||||
let path_buf: PathBuf = "/one/two/blah.json".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
assert_eq!(parts.directories.len(), 3);
|
||||
assert_eq!(parts.file_name.unwrap().0, "blah.json");
|
||||
|
||||
// Last section ending in `.segment` is a file name
|
||||
let path_buf: PathBuf = "/one/two/blah.segment".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
assert_eq!(parts.directories.len(), 3);
|
||||
assert_eq!(parts.file_name.unwrap().0, "blah.segment");
|
||||
|
||||
// Last section ending in `.parquet` is a file name
|
||||
let path_buf: PathBuf = "/one/two/blah.parquet".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
assert_eq!(parts.directories.len(), 3);
|
||||
assert_eq!(parts.file_name.unwrap().0, "blah.parquet");
|
||||
|
||||
// Last section ending in `.txt` is NOT a file name; we don't recognize that
|
||||
// extension
|
||||
let path_buf: PathBuf = "/one/two/blah.txt".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
assert_eq!(parts.directories.len(), 4);
|
||||
assert!(parts.file_name.is_none());
|
||||
|
||||
// Last section containing a `.` isn't a file name
|
||||
let path_buf: PathBuf = "/one/two/blah.blah".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
assert_eq!(parts.directories.len(), 4);
|
||||
assert!(parts.file_name.is_none());
|
||||
|
||||
// Last section starting with a `.` isn't a file name (macos temp dirs do this)
|
||||
let path_buf: PathBuf = "/one/two/.blah".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
assert_eq!(parts.directories.len(), 4);
|
||||
assert!(parts.file_name.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conversions() {
|
||||
// dir and file name
|
||||
let path_buf: PathBuf = "foo/bar/blah.json".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
|
||||
let mut expected_parts = DirsAndFileName::default();
|
||||
expected_parts.push_dir("foo");
|
||||
expected_parts.push_dir("bar");
|
||||
expected_parts.file_name = Some("blah.json".into());
|
||||
|
||||
assert_eq!(parts, expected_parts);
|
||||
|
||||
// dir, no file name
|
||||
let path_buf: PathBuf = "foo/bar".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
|
||||
expected_parts.file_name = None;
|
||||
|
||||
assert_eq!(parts, expected_parts);
|
||||
|
||||
// no dir, file name
|
||||
let path_buf: PathBuf = "blah.json".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
|
||||
assert!(parts.directories.is_empty());
|
||||
assert_eq!(parts.file_name.unwrap().encoded(), "blah.json");
|
||||
|
||||
// empty
|
||||
let path_buf: PathBuf = "".into();
|
||||
let file_path = FilePath::raw(path_buf);
|
||||
let parts: DirsAndFileName = file_path.into();
|
||||
|
||||
assert!(parts.directories.is_empty());
|
||||
assert!(parts.file_name.is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,47 @@
|
|||
use super::{ObjectStorePath, PathPart, PathRepresentation, DELIMITER};
|
||||
use super::{ObjectStorePath, PathPart, DELIMITER};
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
/// A path stored as a collection of 0 or more directories and 0 or 1 file name
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default)]
|
||||
pub(crate) struct DirsAndFileName {
|
||||
pub struct DirsAndFileName {
|
||||
pub(crate) directories: Vec<PathPart>,
|
||||
pub(crate) file_name: Option<PathPart>,
|
||||
}
|
||||
|
||||
impl ObjectStorePath for DirsAndFileName {
|
||||
fn set_file_name(&mut self, part: impl Into<String>) {
|
||||
let part = part.into();
|
||||
self.file_name = Some((&*part).into());
|
||||
}
|
||||
|
||||
fn push_dir(&mut self, part: impl Into<String>) {
|
||||
let part = part.into();
|
||||
self.directories.push((&*part).into());
|
||||
}
|
||||
|
||||
fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) {
|
||||
self.directories
|
||||
.extend(parts.as_ref().iter().map(|&v| v.into()));
|
||||
}
|
||||
|
||||
fn display(&self) -> String {
|
||||
let mut s = self
|
||||
.directories
|
||||
.iter()
|
||||
.map(PathPart::encoded)
|
||||
.join(DELIMITER);
|
||||
|
||||
if !s.is_empty() {
|
||||
s.push_str(DELIMITER);
|
||||
}
|
||||
if let Some(file_name) = &self.file_name {
|
||||
s.push_str(file_name.encoded());
|
||||
}
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
impl DirsAndFileName {
|
||||
pub(crate) fn prefix_matches(&self, prefix: &Self) -> bool {
|
||||
let diff = itertools::diff_with(
|
||||
|
@ -82,85 +118,173 @@ impl DirsAndFileName {
|
|||
Some(parts)
|
||||
}
|
||||
|
||||
/// Add a part to the end of the path's directories, encoding any restricted
|
||||
/// characters.
|
||||
pub(crate) fn push_dir(&mut self, part: impl Into<String>) {
|
||||
let part = part.into();
|
||||
self.directories.push((&*part).into());
|
||||
}
|
||||
|
||||
/// Push a bunch of parts as directories in one go.
|
||||
pub(crate) fn push_all_dirs<'a>(&mut self, parts: impl AsRef<[&'a str]>) {
|
||||
self.directories
|
||||
.extend(parts.as_ref().iter().map(|&v| v.into()));
|
||||
}
|
||||
|
||||
/// Add a `PathPart` to the end of the path's directories.
|
||||
pub(crate) fn push_part_as_dir(&mut self, part: &PathPart) {
|
||||
self.directories.push(part.to_owned());
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PathRepresentation> for DirsAndFileName {
|
||||
fn from(path_rep: PathRepresentation) -> Self {
|
||||
match path_rep {
|
||||
PathRepresentation::RawCloud(path) => {
|
||||
let mut parts: Vec<PathPart> = path
|
||||
.split_terminator(DELIMITER)
|
||||
.map(|s| PathPart(s.to_string()))
|
||||
.collect();
|
||||
let maybe_file_name = match parts.pop() {
|
||||
Some(file) if file.encoded().contains('.') => Some(file),
|
||||
Some(dir) => {
|
||||
parts.push(dir);
|
||||
None
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
Self {
|
||||
directories: parts,
|
||||
file_name: maybe_file_name,
|
||||
}
|
||||
}
|
||||
PathRepresentation::RawPathBuf(path) => {
|
||||
let mut parts: Vec<PathPart> = path
|
||||
.iter()
|
||||
.flat_map(|s| s.to_os_string().into_string().map(PathPart))
|
||||
.collect();
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
let maybe_file_name = match parts.pop() {
|
||||
Some(file)
|
||||
if !file.encoded().starts_with('.')
|
||||
&& (file.encoded().ends_with(".json")
|
||||
|| file.encoded().ends_with(".parquet")
|
||||
|| file.encoded().ends_with(".segment")) =>
|
||||
{
|
||||
Some(file)
|
||||
}
|
||||
Some(dir) => {
|
||||
parts.push(dir);
|
||||
None
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
Self {
|
||||
directories: parts,
|
||||
file_name: maybe_file_name,
|
||||
}
|
||||
}
|
||||
PathRepresentation::Parts(dirs_and_file_name) => dirs_and_file_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&'_ ObjectStorePath> for DirsAndFileName {
|
||||
fn from(other: &'_ ObjectStorePath) -> Self {
|
||||
other.clone().into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ObjectStorePath> for DirsAndFileName {
|
||||
fn from(other: ObjectStorePath) -> Self {
|
||||
other.inner.into()
|
||||
#[test]
|
||||
fn parts_after_prefix_behavior() {
|
||||
let mut existing_path = DirsAndFileName::default();
|
||||
existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]);
|
||||
existing_path.file_name = Some("egg.json".into());
|
||||
|
||||
// Prefix with one directory
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_dir("apple");
|
||||
let expected_parts: Vec<PathPart> = vec!["bear", "cow", "dog", "egg.json"]
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect();
|
||||
let parts = existing_path.parts_after_prefix(&prefix).unwrap();
|
||||
assert_eq!(parts, expected_parts);
|
||||
|
||||
// Prefix with two directories
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_all_dirs(&["apple", "bear"]);
|
||||
let expected_parts: Vec<PathPart> = vec!["cow", "dog", "egg.json"]
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect();
|
||||
let parts = existing_path.parts_after_prefix(&prefix).unwrap();
|
||||
assert_eq!(parts, expected_parts);
|
||||
|
||||
// Not a prefix
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_dir("cow");
|
||||
assert!(existing_path.parts_after_prefix(&prefix).is_none());
|
||||
|
||||
// Prefix with a partial directory
|
||||
let mut prefix = DirsAndFileName::default();
|
||||
prefix.push_dir("ap");
|
||||
assert!(existing_path.parts_after_prefix(&prefix).is_none());
|
||||
|
||||
// Prefix matches but there aren't any parts after it
|
||||
let mut existing_path = DirsAndFileName::default();
|
||||
existing_path.push_all_dirs(&["apple", "bear", "cow", "dog"]);
|
||||
let prefix = existing_path.clone();
|
||||
let parts = existing_path.parts_after_prefix(&prefix).unwrap();
|
||||
assert!(parts.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefix_matches() {
|
||||
let mut haystack = DirsAndFileName::default();
|
||||
haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]);
|
||||
|
||||
// self starts with self
|
||||
assert!(
|
||||
haystack.prefix_matches(&haystack),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
haystack
|
||||
);
|
||||
|
||||
// a longer prefix doesn't match
|
||||
let mut needle = haystack.clone();
|
||||
needle.push_dir("longer now");
|
||||
assert!(
|
||||
!haystack.prefix_matches(&needle),
|
||||
"{:?} shouldn't have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// one dir prefix matches
|
||||
let mut needle = DirsAndFileName::default();
|
||||
needle.push_dir("foo/bar");
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// two dir prefix matches
|
||||
needle.push_dir("baz%2Ftest");
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// partial dir prefix matches
|
||||
let mut needle = DirsAndFileName::default();
|
||||
needle.push_dir("f");
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// one dir and one partial dir matches
|
||||
let mut needle = DirsAndFileName::default();
|
||||
needle.push_all_dirs(&["foo/bar", "baz"]);
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefix_matches_with_file_name() {
|
||||
let mut haystack = DirsAndFileName::default();
|
||||
haystack.push_all_dirs(&["foo/bar", "baz%2Ftest", "something"]);
|
||||
|
||||
let mut needle = haystack.clone();
|
||||
|
||||
// All directories match and file name is a prefix
|
||||
haystack.set_file_name("foo.segment");
|
||||
needle.set_file_name("foo");
|
||||
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// All directories match but file name is not a prefix
|
||||
needle.set_file_name("e");
|
||||
|
||||
assert!(
|
||||
!haystack.prefix_matches(&needle),
|
||||
"{:?} should not have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// Not all directories match; file name is a prefix of the next directory; this
|
||||
// matches
|
||||
let mut needle = DirsAndFileName::default();
|
||||
needle.push_all_dirs(&["foo/bar", "baz%2Ftest"]);
|
||||
needle.set_file_name("s");
|
||||
|
||||
assert!(
|
||||
haystack.prefix_matches(&needle),
|
||||
"{:?} should have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
|
||||
// Not all directories match; file name is NOT a prefix of the next directory;
|
||||
// no match
|
||||
needle.set_file_name("p");
|
||||
|
||||
assert!(
|
||||
!haystack.prefix_matches(&needle),
|
||||
"{:?} should not have started with {:?}",
|
||||
haystack,
|
||||
needle
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,9 +3,10 @@
|
|||
use data_types::{
|
||||
data::ReplicatedWrite,
|
||||
database_rules::{WalBufferRollover, WriterId},
|
||||
DatabaseName,
|
||||
};
|
||||
use generated_types::wal;
|
||||
use object_store::path::ObjectStorePath;
|
||||
use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
|
||||
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
|
@ -20,7 +21,7 @@ use chrono::{DateTime, Utc};
|
|||
use crc32fast::Hasher;
|
||||
use data_types::database_rules::WalBufferConfig;
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use tracing::warn;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
|
@ -352,6 +353,44 @@ impl Segment {
|
|||
*persisted
|
||||
}
|
||||
|
||||
/// Spawns a tokio task that will continuously try to persist the bytes to
|
||||
/// the given object store location.
|
||||
pub fn persist_bytes_in_background(
|
||||
&self,
|
||||
writer_id: u32,
|
||||
db_name: &DatabaseName<'_>,
|
||||
store: Arc<ObjectStore>,
|
||||
) -> Result<()> {
|
||||
let data = self.to_file_bytes(writer_id)?;
|
||||
let location = database_object_store_path(writer_id, db_name, &store);
|
||||
let location = object_store_path_for_segment(&location, self.id)?;
|
||||
|
||||
let len = data.len();
|
||||
let mut stream_data = std::io::Result::Ok(data.clone());
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
while let Err(err) = store
|
||||
.put(
|
||||
&location,
|
||||
futures::stream::once(async move { stream_data }),
|
||||
len,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!("error writing bytes to store: {}", err);
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(
|
||||
super::STORE_ERROR_PAUSE_SECONDS,
|
||||
))
|
||||
.await;
|
||||
stream_data = std::io::Result::Ok(data.clone());
|
||||
}
|
||||
|
||||
info!("persisted data to {}", location.display());
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// converts the segment to its flatbuffer bytes
|
||||
fn fb_bytes(&self, writer_id: u32) -> Vec<u8> {
|
||||
let mut fbb = flatbuffers::FlatBufferBuilder::new_with_capacity(
|
||||
|
@ -468,10 +507,7 @@ const SEGMENT_FILE_EXTENSION: &str = ".segment";
|
|||
|
||||
/// Builds the path for a given segment id, given the root object store path.
|
||||
/// The path should be where the root of the database is (e.g. 1/my_db/).
|
||||
pub fn object_store_path_for_segment(
|
||||
root_path: &ObjectStorePath,
|
||||
segment_id: u64,
|
||||
) -> Result<ObjectStorePath> {
|
||||
fn object_store_path_for_segment<P: ObjectStorePath>(root_path: &P, segment_id: u64) -> Result<P> {
|
||||
ensure!(
|
||||
segment_id < MAX_SEGMENT_ID && segment_id > 0,
|
||||
SegmentIdOutOfBounds
|
||||
|
@ -494,12 +530,24 @@ pub fn object_store_path_for_segment(
|
|||
Ok(path)
|
||||
}
|
||||
|
||||
// base location in object store for a given database name
|
||||
fn database_object_store_path(
|
||||
writer_id: u32,
|
||||
database_name: &DatabaseName<'_>,
|
||||
store: &ObjectStore,
|
||||
) -> object_store::path::Path {
|
||||
let mut path = store.new_path();
|
||||
path.push_dir(format!("{}", writer_id));
|
||||
path.push_dir(database_name.to_string());
|
||||
path
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use data_types::{data::lines_to_replicated_write, database_rules::DatabaseRules};
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
use object_store::path::cloud::CloudConverter;
|
||||
use object_store::memory::InMemory;
|
||||
|
||||
#[test]
|
||||
fn append_increments_current_size_and_uses_existing_segment() {
|
||||
|
@ -806,33 +854,40 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn object_store_path_for_segment() {
|
||||
let path = ObjectStorePath::from_cloud_unchecked("1/mydb");
|
||||
let segment_path = super::object_store_path_for_segment(&path, 23).unwrap();
|
||||
let segment_path = CloudConverter::convert(&segment_path);
|
||||
fn valid_object_store_path_for_segment() {
|
||||
let storage = ObjectStore::new_in_memory(InMemory::new());
|
||||
let mut base_path = storage.new_path();
|
||||
base_path.push_all_dirs(&["1", "mydb"]);
|
||||
|
||||
assert_eq!(segment_path, "1/mydb/wal/000/000/023.segment");
|
||||
let segment_path = object_store_path_for_segment(&base_path, 23).unwrap();
|
||||
let mut expected_segment_path = base_path.clone();
|
||||
expected_segment_path.push_all_dirs(&["wal", "000", "000"]);
|
||||
expected_segment_path.set_file_name("023.segment");
|
||||
assert_eq!(segment_path, expected_segment_path);
|
||||
|
||||
let segment_path = super::object_store_path_for_segment(&path, 20_003).unwrap();
|
||||
let segment_path = CloudConverter::convert(&segment_path);
|
||||
let segment_path = object_store_path_for_segment(&base_path, 20_003).unwrap();
|
||||
let mut expected_segment_path = base_path.clone();
|
||||
expected_segment_path.push_all_dirs(&["wal", "000", "020"]);
|
||||
expected_segment_path.set_file_name("003.segment");
|
||||
assert_eq!(segment_path, expected_segment_path);
|
||||
|
||||
assert_eq!(segment_path, "1/mydb/wal/000/020/003.segment");
|
||||
|
||||
let segment_path = super::object_store_path_for_segment(&path, 45_010_105).unwrap();
|
||||
let segment_path = CloudConverter::convert(&segment_path);
|
||||
|
||||
assert_eq!(segment_path, "1/mydb/wal/045/010/105.segment");
|
||||
let segment_path = object_store_path_for_segment(&base_path, 45_010_105).unwrap();
|
||||
let mut expected_segment_path = base_path;
|
||||
expected_segment_path.push_all_dirs(&["wal", "045", "010"]);
|
||||
expected_segment_path.set_file_name("105.segment");
|
||||
assert_eq!(segment_path, expected_segment_path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn object_store_path_for_segment_out_of_bounds() {
|
||||
let path = ObjectStorePath::from_cloud_unchecked("1/mydb");
|
||||
let segment_path = super::object_store_path_for_segment(&path, 0)
|
||||
.err()
|
||||
.unwrap();
|
||||
let storage = ObjectStore::new_in_memory(InMemory::new());
|
||||
let mut base_path = storage.new_path();
|
||||
base_path.push_all_dirs(&["1", "mydb"]);
|
||||
|
||||
let segment_path = object_store_path_for_segment(&base_path, 0).err().unwrap();
|
||||
matches!(segment_path, Error::SegmentIdOutOfBounds);
|
||||
|
||||
let segment_path = super::object_store_path_for_segment(&path, 23_000_000_000)
|
||||
let segment_path = object_store_path_for_segment(&base_path, 23_000_000_000)
|
||||
.err()
|
||||
.unwrap();
|
||||
matches!(segment_path, Error::SegmentIdOutOfBounds);
|
||||
|
|
|
@ -87,10 +87,10 @@ impl Config {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn object_store_path_for_database_config(
|
||||
root: &ObjectStorePath,
|
||||
pub fn object_store_path_for_database_config<P: ObjectStorePath>(
|
||||
root: &P,
|
||||
name: &DatabaseName<'_>,
|
||||
) -> ObjectStorePath {
|
||||
) -> P {
|
||||
let mut path = root.clone();
|
||||
path.push_dir(name.to_string());
|
||||
path.set_file_name(DB_RULES_FILE_NAME);
|
||||
|
@ -136,7 +136,7 @@ impl<'a> Drop for CreateDatabaseHandle<'a> {
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use object_store::path::cloud::CloudConverter;
|
||||
use object_store::{memory::InMemory, ObjectStore, ObjectStoreApi};
|
||||
|
||||
#[test]
|
||||
fn create_db() {
|
||||
|
@ -157,11 +157,17 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn object_store_path_for_database_config() {
|
||||
let path = ObjectStorePath::from_cloud_unchecked("1");
|
||||
let name = DatabaseName::new("foo").unwrap();
|
||||
let rules_path = super::object_store_path_for_database_config(&path, &name);
|
||||
let rules_path = CloudConverter::convert(&rules_path);
|
||||
let storage = ObjectStore::new_in_memory(InMemory::new());
|
||||
let mut base_path = storage.new_path();
|
||||
base_path.push_dir("1");
|
||||
|
||||
assert_eq!(rules_path, "1/foo/rules.json");
|
||||
let name = DatabaseName::new("foo").unwrap();
|
||||
let rules_path = super::object_store_path_for_database_config(&base_path, &name);
|
||||
|
||||
let mut expected_path = base_path;
|
||||
expected_path.push_dir("foo");
|
||||
expected_path.set_file_name("rules.json");
|
||||
|
||||
assert_eq!(rules_path, expected_path);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,14 +86,14 @@ use data_types::{
|
|||
{DatabaseName, DatabaseNameError},
|
||||
};
|
||||
use influxdb_line_protocol::ParsedLine;
|
||||
use object_store::{path::ObjectStorePath, ObjectStore};
|
||||
use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
|
||||
use query::{exec::Executor, Database, DatabaseStore};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use futures::stream::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use tracing::{error, info};
|
||||
use tracing::error;
|
||||
|
||||
type DatabaseError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
|
@ -139,6 +139,8 @@ pub enum Error {
|
|||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
const STORE_ERROR_PAUSE_SECONDS: u64 = 100;
|
||||
|
||||
/// `Server` is the container struct for how servers store data internally, as
|
||||
/// well as how they communicate with other servers. Each server will have one
|
||||
/// of these structs, which keeps track of all replication and query rules.
|
||||
|
@ -186,7 +188,7 @@ impl<M: ConnectionManager> Server<M> {
|
|||
mut rules: DatabaseRules,
|
||||
) -> Result<()> {
|
||||
// Return an error if this server hasn't yet been setup with an id
|
||||
let id = self.require_id()?;
|
||||
self.require_id()?;
|
||||
|
||||
let name = db_name.into();
|
||||
let db_name = DatabaseName::new(name.clone()).context(InvalidDatabaseName)?;
|
||||
|
@ -197,10 +199,8 @@ impl<M: ConnectionManager> Server<M> {
|
|||
let data =
|
||||
Bytes::from(serde_json::to_vec(&db_reservation.db.rules).context(ErrorSerializing)?);
|
||||
let len = data.len();
|
||||
let location = object_store_path_for_database_config(
|
||||
&server_object_store_path(id),
|
||||
&db_reservation.name,
|
||||
);
|
||||
let location =
|
||||
object_store_path_for_database_config(&self.root_path()?, &db_reservation.name);
|
||||
|
||||
let stream_data = std::io::Result::Ok(data);
|
||||
self.store
|
||||
|
@ -217,19 +217,25 @@ impl<M: ConnectionManager> Server<M> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
// base location in object store for this writer
|
||||
fn root_path(&self) -> Result<object_store::path::Path> {
|
||||
let id = self.require_id()?;
|
||||
|
||||
let mut path = self.store.new_path();
|
||||
path.push_dir(format!("{}", id));
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
/// Loads the database configurations based on the databases in the
|
||||
/// object store. Any databases in the config already won't be
|
||||
/// replaced.
|
||||
pub async fn load_database_configs(&self) -> Result<()> {
|
||||
let id = self.require_id()?;
|
||||
let root_path = server_object_store_path(id);
|
||||
|
||||
// get the database names from the object store prefixes
|
||||
// TODO: update object store to pull back all common prefixes by
|
||||
// following the next tokens.
|
||||
let list_result = self
|
||||
.store
|
||||
.list_with_delimiter(&root_path)
|
||||
.list_with_delimiter(&self.root_path()?)
|
||||
.await
|
||||
.context(StoreError)?;
|
||||
|
||||
|
@ -345,12 +351,10 @@ impl<M: ConnectionManager> Server<M> {
|
|||
if let Some(segment) = segment {
|
||||
if persist {
|
||||
let writer_id = self.require_id()?;
|
||||
let data = segment.to_file_bytes(writer_id).context(WalError)?;
|
||||
let store = self.store.clone();
|
||||
let location = database_object_store_path(writer_id, db_name);
|
||||
let location = buffer::object_store_path_for_segment(&location, segment.id)
|
||||
segment
|
||||
.persist_bytes_in_background(writer_id, db_name, store)
|
||||
.context(WalError)?;
|
||||
persist_bytes_in_background(data, store, location);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -522,51 +526,13 @@ impl RemoteServer for RemoteServerImpl {
|
|||
}
|
||||
}
|
||||
|
||||
// base location in object store for a given database name
|
||||
fn database_object_store_path(writer_id: u32, database_name: &DatabaseName<'_>) -> ObjectStorePath {
|
||||
let mut path = ObjectStorePath::default();
|
||||
path.push_dir(format!("{}", writer_id));
|
||||
path.push_dir(database_name.to_string());
|
||||
path
|
||||
}
|
||||
|
||||
fn server_object_store_path(writer_id: u32) -> ObjectStorePath {
|
||||
ObjectStorePath::from_cloud_unchecked(format!("{}", writer_id))
|
||||
}
|
||||
|
||||
const STORE_ERROR_PAUSE_SECONDS: u64 = 100;
|
||||
|
||||
/// Spawns a tokio task that will continuously try to persist the bytes to the
|
||||
/// given object store location.
|
||||
fn persist_bytes_in_background(data: Bytes, store: Arc<ObjectStore>, location: ObjectStorePath) {
|
||||
let len = data.len();
|
||||
let mut stream_data = std::io::Result::Ok(data.clone());
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
while let Err(err) = store
|
||||
.put(
|
||||
&location,
|
||||
futures::stream::once(async move { stream_data }),
|
||||
len,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!("error writing bytes to store: {}", err);
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(STORE_ERROR_PAUSE_SECONDS)).await;
|
||||
stream_data = std::io::Result::Ok(data.clone());
|
||||
}
|
||||
|
||||
info!("persisted data to {}", store.convert_path(&location));
|
||||
});
|
||||
}
|
||||
|
||||
// get bytes from the location in object store
|
||||
async fn get_store_bytes(
|
||||
location: &ObjectStorePath,
|
||||
location: &object_store::path::Path,
|
||||
store: &ObjectStore,
|
||||
) -> Result<bytes::BytesMut> {
|
||||
let b = store
|
||||
.get(&location)
|
||||
.get(location)
|
||||
.await
|
||||
.context(StoreError)?
|
||||
.map_ok(|b| bytes::BytesMut::from(&b[..]))
|
||||
|
@ -589,7 +555,7 @@ mod tests {
|
|||
};
|
||||
use futures::TryStreamExt;
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
use object_store::memory::InMemory;
|
||||
use object_store::{memory::InMemory, path::ObjectStorePath};
|
||||
use query::frontend::sql::SQLQueryPlanner;
|
||||
use snafu::Snafu;
|
||||
use std::collections::BTreeMap;
|
||||
|
@ -644,11 +610,13 @@ mod tests {
|
|||
.await
|
||||
.expect("failed to create database");
|
||||
|
||||
let mut rules_path = server.store.new_path();
|
||||
rules_path.push_all_dirs(&["1", name]);
|
||||
rules_path.set_file_name("rules.json");
|
||||
|
||||
let read_data = server
|
||||
.store
|
||||
.get(&ObjectStorePath::from_cloud_unchecked(
|
||||
"1/bananas/rules.json",
|
||||
))
|
||||
.get(&rules_path)
|
||||
.await
|
||||
.unwrap()
|
||||
.map_ok(|b| bytes::BytesMut::from(&b[..]))
|
||||
|
@ -667,10 +635,7 @@ mod tests {
|
|||
.await
|
||||
.expect("failed to create 2nd db");
|
||||
|
||||
store
|
||||
.list_with_delimiter(&ObjectStorePath::from_cloud_unchecked(""))
|
||||
.await
|
||||
.unwrap();
|
||||
store.list_with_delimiter(&store.new_path()).await.unwrap();
|
||||
|
||||
let manager = TestConnectionManager::new();
|
||||
let server2 = Server::new(manager, store);
|
||||
|
@ -929,7 +894,10 @@ partition_key:
|
|||
// write lines should have caused a segment rollover and persist, wait
|
||||
tokio::task::yield_now().await;
|
||||
|
||||
let path = ObjectStorePath::from_cloud_unchecked("1/my_db/wal/000/000/001.segment");
|
||||
let mut path = store.new_path();
|
||||
path.push_all_dirs(&["1", "my_db", "wal", "000", "000"]);
|
||||
path.set_file_name("001.segment");
|
||||
|
||||
let data = store
|
||||
.get(&path)
|
||||
.await
|
||||
|
|
|
@ -8,7 +8,7 @@ use data_types::{
|
|||
partition_metadata::{Partition as PartitionMeta, Table},
|
||||
selection::Selection,
|
||||
};
|
||||
use object_store::{path::ObjectStorePath, ObjectStore};
|
||||
use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
|
||||
use query::PartitionChunk;
|
||||
|
||||
use std::io::{Cursor, Seek, SeekFrom, Write};
|
||||
|
@ -64,8 +64,8 @@ where
|
|||
{
|
||||
pub id: Uuid,
|
||||
pub partition_meta: PartitionMeta,
|
||||
pub metadata_path: ObjectStorePath,
|
||||
pub data_path: ObjectStorePath,
|
||||
pub metadata_path: object_store::path::Path,
|
||||
pub data_path: object_store::path::Path,
|
||||
store: Arc<ObjectStore>,
|
||||
partition: Arc<T>,
|
||||
status: Mutex<Status>,
|
||||
|
@ -77,8 +77,8 @@ where
|
|||
{
|
||||
fn new(
|
||||
partition_key: impl Into<String>,
|
||||
metadata_path: ObjectStorePath,
|
||||
data_path: ObjectStorePath,
|
||||
metadata_path: object_store::path::Path,
|
||||
data_path: object_store::path::Path,
|
||||
store: Arc<ObjectStore>,
|
||||
partition: Arc<T>,
|
||||
tables: Vec<Table>,
|
||||
|
@ -104,10 +104,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn data_path(&self) -> String {
|
||||
self.store.convert_path(&self.data_path)
|
||||
}
|
||||
|
||||
// returns the position of the next table
|
||||
fn next_table(&self) -> Option<(usize, &str)> {
|
||||
let mut status = self.status.lock().expect("mutex poisoned");
|
||||
|
@ -198,7 +194,7 @@ where
|
|||
async fn write_batches(
|
||||
&self,
|
||||
batches: Vec<RecordBatch>,
|
||||
file_name: &ObjectStorePath,
|
||||
file_name: &object_store::path::Path,
|
||||
) -> Result<()> {
|
||||
let mem_writer = MemWriter::default();
|
||||
{
|
||||
|
@ -250,8 +246,8 @@ pub struct Status {
|
|||
}
|
||||
|
||||
pub fn snapshot_chunk<T>(
|
||||
metadata_path: ObjectStorePath,
|
||||
data_path: ObjectStorePath,
|
||||
metadata_path: object_store::path::Path,
|
||||
data_path: object_store::path::Path,
|
||||
store: Arc<ObjectStore>,
|
||||
partition_key: &str,
|
||||
chunk: Arc<T>,
|
||||
|
@ -281,7 +277,7 @@ where
|
|||
info!(
|
||||
"starting snapshot of {} to {}",
|
||||
&snapshot.partition_meta.key,
|
||||
&snapshot.data_path()
|
||||
&snapshot.data_path.display()
|
||||
);
|
||||
if let Err(e) = snapshot.run(notify).await {
|
||||
error!("error running snapshot: {:?}", e);
|
||||
|
@ -365,10 +361,10 @@ mem,host=A,region=west used=45 1
|
|||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let chunk = Arc::new(chunk);
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
let mut metadata_path = ObjectStorePath::default();
|
||||
let mut metadata_path = store.new_path();
|
||||
metadata_path.push_dir("meta");
|
||||
|
||||
let mut data_path = ObjectStorePath::default();
|
||||
let mut data_path = store.new_path();
|
||||
data_path.push_dir("data");
|
||||
|
||||
let snapshot = snapshot_chunk(
|
||||
|
@ -418,10 +414,10 @@ mem,host=A,region=west used=45 1
|
|||
|
||||
let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
|
||||
let chunk = Arc::new(ChunkWB::new(11));
|
||||
let mut metadata_path = ObjectStorePath::default();
|
||||
let mut metadata_path = store.new_path();
|
||||
metadata_path.push_dir("meta");
|
||||
|
||||
let mut data_path = ObjectStorePath::default();
|
||||
let mut data_path = store.new_path();
|
||||
data_path.push_dir("data");
|
||||
|
||||
let snapshot = Snapshot::new("testaroo", metadata_path, data_path, store, chunk, tables);
|
||||
|
|
|
@ -17,7 +17,7 @@ use data_types::{
|
|||
DatabaseName,
|
||||
};
|
||||
use influxdb_line_protocol::parse_lines;
|
||||
use object_store::path::ObjectStorePath;
|
||||
use object_store::ObjectStoreApi;
|
||||
use query::{frontend::sql::SQLQueryPlanner, Database, DatabaseStore};
|
||||
use server::{ConnectionManager, Server as AppServer};
|
||||
|
||||
|
@ -695,6 +695,8 @@ where
|
|||
async fn snapshot_partition<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
||||
req: Request<Body>,
|
||||
) -> Result<Response<Body>, ApplicationError> {
|
||||
use object_store::path::ObjectStorePath;
|
||||
|
||||
let server = req
|
||||
.data::<Arc<AppServer<M>>>()
|
||||
.expect("server state")
|
||||
|
@ -715,7 +717,9 @@ async fn snapshot_partition<M: ConnectionManager + Send + Sync + Debug + 'static
|
|||
bucket: &snapshot.bucket,
|
||||
})?;
|
||||
|
||||
let mut metadata_path = ObjectStorePath::default();
|
||||
let store = server.store.clone();
|
||||
|
||||
let mut metadata_path = store.new_path();
|
||||
metadata_path.push_dir(&db_name.to_string());
|
||||
let mut data_path = metadata_path.clone();
|
||||
metadata_path.push_dir("meta");
|
||||
|
@ -726,7 +730,7 @@ async fn snapshot_partition<M: ConnectionManager + Send + Sync + Debug + 'static
|
|||
let snapshot = server::snapshot::snapshot_chunk(
|
||||
metadata_path,
|
||||
data_path,
|
||||
server.store.clone(),
|
||||
store,
|
||||
partition_key,
|
||||
chunk,
|
||||
None,
|
||||
|
|
Loading…
Reference in New Issue