refactor: Extract PathPart to its own module
And be more explicit about the privacy of the inner Stringpull/24376/head
parent
004fb8f803
commit
979458034a
|
@ -1,7 +1,6 @@
|
|||
//! This module contains code for abstracting object locations that work
|
||||
//! across different backing implementations and platforms.
|
||||
|
||||
use percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS};
|
||||
use std::{mem, path::PathBuf};
|
||||
|
||||
/// Paths that came from or are to be used in cloud-based object storage
|
||||
|
@ -14,6 +13,9 @@ pub mod file;
|
|||
pub mod parsed;
|
||||
use parsed::DirsAndFileName;
|
||||
|
||||
mod parts;
|
||||
use parts::PathPart;
|
||||
|
||||
/// Universal interface for handling paths and locations for objects and
|
||||
/// directories in the object store.
|
||||
///
|
||||
|
@ -214,103 +216,11 @@ impl PartialEq for PathRepresentation {
|
|||
|
||||
/// The delimiter to separate object namespaces, creating a directory structure.
|
||||
pub const DELIMITER: &str = "/";
|
||||
// percent_encode's API needs this as a byte
|
||||
const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];
|
||||
|
||||
/// The PathPart type exists to validate the directory/file names that form part
|
||||
/// of a path.
|
||||
///
|
||||
/// A PathPart instance is guaranteed to contain no `/` characters as it can
|
||||
/// only be constructed by going through the `try_from` impl.
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default)]
|
||||
pub struct PathPart(String);
|
||||
|
||||
/// Characters we want to encode.
|
||||
const INVALID: &AsciiSet = &CONTROLS
|
||||
// The delimiter we are reserving for internal hierarchy
|
||||
.add(DELIMITER_BYTE)
|
||||
// Characters AWS recommends avoiding for object keys
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
|
||||
.add(b'\\')
|
||||
.add(b'{')
|
||||
// TODO: Non-printable ASCII characters (128–255 decimal characters)
|
||||
.add(b'^')
|
||||
.add(b'}')
|
||||
.add(b'%')
|
||||
.add(b'`')
|
||||
.add(b']')
|
||||
.add(b'"') // " <-- my editor is confused about double quotes within single quotes
|
||||
.add(b'>')
|
||||
.add(b'[')
|
||||
.add(b'~')
|
||||
.add(b'<')
|
||||
.add(b'#')
|
||||
.add(b'|')
|
||||
// Characters Google Cloud Storage recommends avoiding for object names
|
||||
// https://cloud.google.com/storage/docs/naming-objects
|
||||
.add(b'\r')
|
||||
.add(b'\n')
|
||||
.add(b'*')
|
||||
.add(b'?');
|
||||
|
||||
impl From<&str> for PathPart {
|
||||
fn from(v: &str) -> Self {
|
||||
match v {
|
||||
// We don't want to encode `.` generally, but we do want to disallow parts of paths
|
||||
// to be equal to `.` or `..` to prevent file system traversal shenanigans.
|
||||
"." => Self(String::from("%2E")),
|
||||
".." => Self(String::from("%2E%2E")),
|
||||
other => Self(percent_encode(other.as_bytes(), INVALID).to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PathPart {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
percent_decode_str(&self.0)
|
||||
.decode_utf8()
|
||||
.expect("Valid UTF-8 that came from String")
|
||||
.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn path_part_delimiter_gets_encoded() {
|
||||
let part: PathPart = "foo/bar".into();
|
||||
assert_eq!(part, PathPart(String::from("foo%2Fbar")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_gets_decoded_for_display() {
|
||||
let part: PathPart = "foo/bar".into();
|
||||
assert_eq!(part.to_string(), "foo/bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_given_already_encoded_string() {
|
||||
let part: PathPart = "foo%2Fbar".into();
|
||||
assert_eq!(part, PathPart(String::from("foo%252Fbar")));
|
||||
assert_eq!(part.to_string(), "foo%2Fbar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_cant_be_one_dot() {
|
||||
let part: PathPart = ".".into();
|
||||
assert_eq!(part, PathPart(String::from("%2E")));
|
||||
assert_eq!(part.to_string(), ".");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_cant_be_two_dots() {
|
||||
let part: PathPart = "..".into();
|
||||
assert_eq!(part, PathPart(String::from("%2E%2E")));
|
||||
assert_eq!(part.to_string(), "..");
|
||||
}
|
||||
|
||||
// Invariants to maintain/document/test:
|
||||
//
|
||||
// - always ends in DELIMITER if it's a directory. If it's the end object, it
|
||||
|
@ -507,10 +417,10 @@ mod tests {
|
|||
let path_buf_parts: DirsAndFileName = path_buf.into();
|
||||
|
||||
assert!(cloud_parts.directories.is_empty());
|
||||
assert_eq!(cloud_parts.file_name.unwrap().0, "blah.json");
|
||||
assert_eq!(cloud_parts.file_name.unwrap().encoded(), "blah.json");
|
||||
|
||||
assert!(path_buf_parts.directories.is_empty());
|
||||
assert_eq!(path_buf_parts.file_name.unwrap().0, "blah.json");
|
||||
assert_eq!(path_buf_parts.file_name.unwrap().encoded(), "blah.json");
|
||||
|
||||
// empty
|
||||
let cloud = PathRepresentation::RawCloud("".into());
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use super::{ObjectStorePath, PathRepresentation, DELIMITER};
|
||||
use super::{ObjectStorePath, PathPart, PathRepresentation, DELIMITER};
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
|
@ -20,14 +20,14 @@ impl CloudConverter {
|
|||
let mut path = dirs_and_file_name
|
||||
.directories
|
||||
.iter()
|
||||
.map(|p| &p.0)
|
||||
.map(PathPart::encoded)
|
||||
.join(DELIMITER);
|
||||
|
||||
if !path.is_empty() {
|
||||
path.push_str(DELIMITER);
|
||||
}
|
||||
if let Some(file_name) = &dirs_and_file_name.file_name {
|
||||
path.push_str(&file_name.0);
|
||||
path.push_str(file_name.encoded());
|
||||
}
|
||||
path
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use super::{ObjectStorePath, PathRepresentation};
|
||||
use super::{ObjectStorePath, PathPart, PathRepresentation};
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
@ -21,10 +21,10 @@ impl FileConverter {
|
|||
let mut path: PathBuf = dirs_and_file_name
|
||||
.directories
|
||||
.iter()
|
||||
.map(|p| &p.0)
|
||||
.map(PathPart::encoded)
|
||||
.collect();
|
||||
if let Some(file_name) = &dirs_and_file_name.file_name {
|
||||
path.push(&file_name.0);
|
||||
path.push(file_name.encoded());
|
||||
}
|
||||
path
|
||||
}
|
||||
|
|
|
@ -17,7 +17,9 @@ impl DirsAndFileName {
|
|||
use itertools::Diff;
|
||||
match diff {
|
||||
None => match (self.file_name.as_ref(), prefix.file_name.as_ref()) {
|
||||
(Some(self_file), Some(prefix_file)) => self_file.0.starts_with(&prefix_file.0),
|
||||
(Some(self_file), Some(prefix_file)) => {
|
||||
self_file.encoded().starts_with(prefix_file.encoded())
|
||||
}
|
||||
(Some(_self_file), None) => true,
|
||||
(None, Some(_prefix_file)) => false,
|
||||
(None, None) => true,
|
||||
|
@ -27,7 +29,7 @@ impl DirsAndFileName {
|
|||
.next()
|
||||
.expect("must have at least one mismatch to be in this case");
|
||||
match prefix.file_name.as_ref() {
|
||||
Some(prefix_file) => next_dir.0.starts_with(&prefix_file.0),
|
||||
Some(prefix_file) => next_dir.encoded().starts_with(prefix_file.encoded()),
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
|
@ -42,8 +44,8 @@ impl DirsAndFileName {
|
|||
&& remaining_self
|
||||
.next()
|
||||
.expect("must be at least one value")
|
||||
.0
|
||||
.starts_with(&first_prefix.0)
|
||||
.encoded()
|
||||
.starts_with(first_prefix.encoded())
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
|
@ -103,12 +105,12 @@ impl From<PathRepresentation> for DirsAndFileName {
|
|||
fn from(path_rep: PathRepresentation) -> Self {
|
||||
match path_rep {
|
||||
PathRepresentation::RawCloud(path) => {
|
||||
let mut parts: Vec<_> = path
|
||||
let mut parts: Vec<PathPart> = path
|
||||
.split_terminator(DELIMITER)
|
||||
.map(|s| PathPart(s.to_string()))
|
||||
.collect();
|
||||
let maybe_file_name = match parts.pop() {
|
||||
Some(file) if file.0.contains('.') => Some(file),
|
||||
Some(file) if file.encoded().contains('.') => Some(file),
|
||||
Some(dir) => {
|
||||
parts.push(dir);
|
||||
None
|
||||
|
@ -121,17 +123,17 @@ impl From<PathRepresentation> for DirsAndFileName {
|
|||
}
|
||||
}
|
||||
PathRepresentation::RawPathBuf(path) => {
|
||||
let mut parts: Vec<_> = path
|
||||
let mut parts: Vec<PathPart> = path
|
||||
.iter()
|
||||
.flat_map(|s| s.to_os_string().into_string().map(PathPart))
|
||||
.collect();
|
||||
|
||||
let maybe_file_name = match parts.pop() {
|
||||
Some(file)
|
||||
if !file.0.starts_with('.')
|
||||
&& (file.0.ends_with(".json")
|
||||
|| file.0.ends_with(".parquet")
|
||||
|| file.0.ends_with(".segment")) =>
|
||||
if !file.encoded().starts_with('.')
|
||||
&& (file.encoded().ends_with(".json")
|
||||
|| file.encoded().ends_with(".parquet")
|
||||
|| file.encoded().ends_with(".segment")) =>
|
||||
{
|
||||
Some(file)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
use percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS};
|
||||
|
||||
use super::DELIMITER;
|
||||
|
||||
// percent_encode's API needs this as a byte
|
||||
const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];
|
||||
|
||||
/// The PathPart type exists to validate the directory/file names that form part
|
||||
/// of a path.
|
||||
///
|
||||
/// A PathPart instance is guaranteed to contain no `/` characters as it can
|
||||
/// only be constructed by going through the `try_from` impl.
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default)]
|
||||
pub struct PathPart(pub(super) String);
|
||||
|
||||
/// Characters we want to encode.
|
||||
const INVALID: &AsciiSet = &CONTROLS
|
||||
// The delimiter we are reserving for internal hierarchy
|
||||
.add(DELIMITER_BYTE)
|
||||
// Characters AWS recommends avoiding for object keys
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
|
||||
.add(b'\\')
|
||||
.add(b'{')
|
||||
// TODO: Non-printable ASCII characters (128–255 decimal characters)
|
||||
.add(b'^')
|
||||
.add(b'}')
|
||||
.add(b'%')
|
||||
.add(b'`')
|
||||
.add(b']')
|
||||
.add(b'"') // " <-- my editor is confused about double quotes within single quotes
|
||||
.add(b'>')
|
||||
.add(b'[')
|
||||
.add(b'~')
|
||||
.add(b'<')
|
||||
.add(b'#')
|
||||
.add(b'|')
|
||||
// Characters Google Cloud Storage recommends avoiding for object names
|
||||
// https://cloud.google.com/storage/docs/naming-objects
|
||||
.add(b'\r')
|
||||
.add(b'\n')
|
||||
.add(b'*')
|
||||
.add(b'?');
|
||||
|
||||
impl From<&str> for PathPart {
|
||||
fn from(v: &str) -> Self {
|
||||
match v {
|
||||
// We don't want to encode `.` generally, but we do want to disallow parts of paths
|
||||
// to be equal to `.` or `..` to prevent file system traversal shenanigans.
|
||||
"." => Self(String::from("%2E")),
|
||||
".." => Self(String::from("%2E%2E")),
|
||||
other => Self(percent_encode(other.as_bytes(), INVALID).to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PathPart {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
percent_decode_str(&self.0)
|
||||
.decode_utf8()
|
||||
.expect("Valid UTF-8 that came from String")
|
||||
.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl PathPart {
|
||||
pub fn encoded(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn path_part_delimiter_gets_encoded() {
|
||||
let part: PathPart = "foo/bar".into();
|
||||
assert_eq!(part, PathPart(String::from("foo%2Fbar")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_gets_decoded_for_display() {
|
||||
let part: PathPart = "foo/bar".into();
|
||||
assert_eq!(part.to_string(), "foo/bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_given_already_encoded_string() {
|
||||
let part: PathPart = "foo%2Fbar".into();
|
||||
assert_eq!(part, PathPart(String::from("foo%252Fbar")));
|
||||
assert_eq!(part.to_string(), "foo%2Fbar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_cant_be_one_dot() {
|
||||
let part: PathPart = ".".into();
|
||||
assert_eq!(part, PathPart(String::from("%2E")));
|
||||
assert_eq!(part.to_string(), ".");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_cant_be_two_dots() {
|
||||
let part: PathPart = "..".into();
|
||||
assert_eq!(part, PathPart(String::from("%2E%2E")));
|
||||
assert_eq!(part.to_string(), "..");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue