From ccd094dfcfca6e27fec97835f233a86dadd54272 Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Wed, 19 May 2021 16:23:25 +0200 Subject: [PATCH] refactor: streamline directory handling in path parsing --- object_store/src/disk.rs | 8 +- object_store/src/path/file.rs | 152 +++++++++++++++++++++------------- 2 files changed, 97 insertions(+), 63 deletions(-) diff --git a/object_store/src/disk.rs b/object_store/src/disk.rs index 4a6f203f00..e53a28dd06 100644 --- a/object_store/src/disk.rs +++ b/object_store/src/disk.rs @@ -161,7 +161,7 @@ impl ObjectStoreApi for File { let relative_path = file.path().strip_prefix(&root_path).expect( "Must start with root path because this came from walking the root", ); - FilePath::raw(relative_path) + FilePath::raw(relative_path, false) }) .filter(|name| prefix.map_or(true, |p| name.prefix_matches(p))) .map(|name| Ok(vec![name])) @@ -198,7 +198,7 @@ impl ObjectStoreApi for File { let root_path = self.root.to_raw(); for entry in walkdir { let entry = entry.context(UnableToProcessEntry)?; - let entry_location = FilePath::raw(entry.path()); + let entry_location = FilePath::raw(entry.path(), false); if entry_location.prefix_matches(&resolved_prefix) { let metadata = entry @@ -218,7 +218,7 @@ impl ObjectStoreApi for File { .path() .strip_prefix(&root_path) .expect("must have prefix because of the if prefix_matches condition"); - let location = FilePath::raw(path); + let location = FilePath::raw(path, false); let last_modified = metadata .modified() @@ -248,7 +248,7 @@ impl File { /// Create new filesystem storage. pub fn new(root: impl Into) -> Self { Self { - root: FilePath::raw(root), + root: FilePath::raw(root, true), } } diff --git a/object_store/src/path/file.rs b/object_store/src/path/file.rs index f7ee9f0aeb..2c499222d9 100644 --- a/object_store/src/path/file.rs +++ b/object_store/src/path/file.rs @@ -1,6 +1,9 @@ use super::{DirsAndFileName, ObjectStorePath, PathPart}; -use std::{mem, path::PathBuf}; +use std::{ + mem, + path::{is_separator, PathBuf}, +}; /// An object storage location suitable for passing to disk based object /// storage. @@ -43,10 +46,10 @@ impl FilePath { /// Creates a file storage location from a `PathBuf` without parsing or /// allocating unless other methods are called on this instance that /// need it - pub fn raw(path: impl Into) -> Self { + pub fn raw(path: impl Into, is_directory: bool) -> Self { let path = path.into(); Self { - inner: FilePathRepresentation::Raw(path), + inner: FilePathRepresentation::Raw(path, is_directory), } } @@ -57,7 +60,7 @@ impl FilePath { use FilePathRepresentation::*; match &self.inner { - Raw(path) => path.to_owned(), + Raw(path, _) => path.to_owned(), Parsed(dirs_and_file_name) => { let mut path: PathBuf = dirs_and_file_name .directories @@ -120,7 +123,8 @@ impl From for FilePath { #[derive(Debug, Clone, Eq)] enum FilePathRepresentation { - Raw(PathBuf), + // raw: native path representation and also remember if it is a directory + Raw(PathBuf, bool), Parsed(DirsAndFileName), } @@ -287,30 +291,23 @@ impl From for DirsAndFileName { use FilePathRepresentation::*; match file_path_rep { - Raw(path) => { + Raw(path, is_directory) => { let mut parts: Vec = path .iter() .flat_map(|s| s.to_os_string().into_string().map(PathPart)) .collect(); - let maybe_file_name = match parts.pop() { - Some(file) - if !file.encoded().starts_with('.') - && (file.encoded().ends_with(".json") - || file.encoded().ends_with(".parquet") - || file.encoded().ends_with(".segment")) => - { - Some(file) + if !is_directory && !parts.is_empty() && !is_a_directory(&path) { + let file_name = Some(parts.pop().expect("cannot be empty")); + Self { + directories: parts, + file_name, } - Some(dir) => { - parts.push(dir); - None + } else { + Self { + directories: parts, + file_name: None, } - None => None, - }; - Self { - directories: parts, - file_name: maybe_file_name, } } Parsed(dirs_and_file_name) => dirs_and_file_name, @@ -318,100 +315,137 @@ impl From for DirsAndFileName { } } +/// Checks if the path is for sure an directory (i.e. ends with a separator). +fn is_a_directory(path: &std::path::Path) -> bool { + if let Some(s) = path.to_str() { + if let Some(c) = s.chars().last() { + return is_separator(c); + } + } + false +} + #[cfg(test)] mod tests { + use crate::parsed_path; + use super::*; #[test] fn path_buf_to_dirs_and_file_name_conversion() { // Last section ending in `.json` is a file name let path_buf: PathBuf = "/one/two/blah.json".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert_eq!(parts.directories.len(), 3); - assert_eq!(parts.file_name.unwrap().0, "blah.json"); + let mut expected_parts = parsed_path!(["/", "one", "two"], "blah.json"); + expected_parts.directories[0] = PathPart("/".to_string()); // not escaped + assert_eq!(parts, expected_parts); // Last section ending in `.segment` is a file name let path_buf: PathBuf = "/one/two/blah.segment".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert_eq!(parts.directories.len(), 3); - assert_eq!(parts.file_name.unwrap().0, "blah.segment"); + let mut expected_parts = parsed_path!(["/", "one", "two"], "blah.segment"); + expected_parts.directories[0] = PathPart("/".to_string()); // not escaped + assert_eq!(parts, expected_parts); // Last section ending in `.parquet` is a file name let path_buf: PathBuf = "/one/two/blah.parquet".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert_eq!(parts.directories.len(), 3); - assert_eq!(parts.file_name.unwrap().0, "blah.parquet"); + let mut expected_parts = parsed_path!(["/", "one", "two"], "blah.parquet"); + expected_parts.directories[0] = PathPart("/".to_string()); // not escaped + assert_eq!(parts, expected_parts); // Last section ending in `.txt` is NOT a file name; we don't recognize that // extension let path_buf: PathBuf = "/one/two/blah.txt".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert_eq!(parts.directories.len(), 4); - assert!(parts.file_name.is_none()); + let mut expected_parts = parsed_path!(["/", "one", "two"], "blah.txt"); + expected_parts.directories[0] = PathPart("/".to_string()); // not escaped + assert_eq!(parts, expected_parts); // Last section containing a `.` isn't a file name let path_buf: PathBuf = "/one/two/blah.blah".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert_eq!(parts.directories.len(), 4); - assert!(parts.file_name.is_none()); + let mut expected_parts = parsed_path!(["/", "one", "two"], "blah.blah"); + expected_parts.directories[0] = PathPart("/".to_string()); // not escaped + assert_eq!(parts, expected_parts); // Last section starting with a `.` isn't a file name (macos temp dirs do this) let path_buf: PathBuf = "/one/two/.blah".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert_eq!(parts.directories.len(), 4); - assert!(parts.file_name.is_none()); + let mut expected_parts = parsed_path!(["/", "one", "two"], ".blah"); + expected_parts.directories[0] = PathPart("/".to_string()); // not escaped + assert_eq!(parts, expected_parts); } #[test] fn conversions() { // dir and file name let path_buf: PathBuf = "foo/bar/blah.json".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - let mut expected_parts = DirsAndFileName::default(); - expected_parts.push_dir("foo"); - expected_parts.push_dir("bar"); - expected_parts.file_name = Some("blah.json".into()); - + let expected_parts = parsed_path!(["foo", "bar"], "blah.json"); assert_eq!(parts, expected_parts); // dir, no file name - let path_buf: PathBuf = "foo/bar".into(); - let file_path = FilePath::raw(path_buf); + let path_buf: PathBuf = "foo/bar/".into(); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - expected_parts.file_name = None; + let expected_parts = parsed_path!(["foo", "bar"]); + assert_eq!(parts, expected_parts); + // same but w/o the final marker + let path_buf: PathBuf = "foo/bar".into(); + let file_path = FilePath::raw(path_buf, false); + let parts: DirsAndFileName = file_path.into(); + + let expected_parts = parsed_path!(["foo"], "bar"); + assert_eq!(parts, expected_parts); + + // same but w/o the final marker, but forced to be a directory + let path_buf: PathBuf = "foo/bar".into(); + let file_path = FilePath::raw(path_buf, true); + let parts: DirsAndFileName = file_path.into(); + + let expected_parts = parsed_path!(["foo", "bar"]); assert_eq!(parts, expected_parts); // no dir, file name let path_buf: PathBuf = "blah.json".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert!(parts.directories.is_empty()); - assert_eq!(parts.file_name.unwrap().encoded(), "blah.json"); + let expected_parts = parsed_path!([], "blah.json"); + assert_eq!(parts, expected_parts); // empty let path_buf: PathBuf = "".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.into(); - assert!(parts.directories.is_empty()); - assert!(parts.file_name.is_none()); + let expected_parts = parsed_path!(); + assert_eq!(parts, expected_parts); + + // weird file name + let path_buf: PathBuf = "blah.x".into(); + let file_path = FilePath::raw(path_buf, false); + let parts: DirsAndFileName = file_path.into(); + + let expected_parts = parsed_path!("blah.x"); + assert_eq!(parts, expected_parts); } #[test] fn equality() { let path_buf: PathBuf = "foo/bar/blah.json".into(); - let file_path = FilePath::raw(path_buf); + let file_path = FilePath::raw(path_buf, false); let parts: DirsAndFileName = file_path.clone().into(); let parsed: FilePath = parts.into(); @@ -421,12 +455,12 @@ mod tests { #[test] fn ordering() { let a_path_buf: PathBuf = "foo/bar/a.json".into(); - let a_file_path = FilePath::raw(&a_path_buf); + let a_file_path = FilePath::raw(&a_path_buf, false); let a_parts: DirsAndFileName = a_file_path.into(); let a_parsed: FilePath = a_parts.into(); let b_path_buf: PathBuf = "foo/bar/b.json".into(); - let b_file_path = FilePath::raw(&b_path_buf); + let b_file_path = FilePath::raw(&b_path_buf, false); assert!(a_path_buf < b_path_buf); assert!( @@ -441,7 +475,7 @@ mod tests { fn path_display() { let a_path_buf: PathBuf = "foo/bar/a.json".into(); let expected_display = a_path_buf.display().to_string(); - let a_file_path = FilePath::raw(&a_path_buf); + let a_file_path = FilePath::raw(&a_path_buf, false); assert_eq!(a_file_path.display(), expected_display);