diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs index 7e99d92522..6d07ec3ade 100644 --- a/delorean_ingest/src/lib.rs +++ b/delorean_ingest/src/lib.rs @@ -597,7 +597,7 @@ impl TSMFileConverter { })?; // this will create a column of repeated None values. - let col: Vec> = vec![None; col_len]; + let col: Vec>> = vec![None; col_len]; packed_columns[*idx] = Packers::from(col); } @@ -650,7 +650,7 @@ impl TSMFileConverter { packed_columns[*idx] = Packers::from(col); } BlockType::Str => { - let col: Vec> = vec![None; col_len]; + let col: Vec>> = vec![None; col_len]; packed_columns[*idx] = Packers::from(col); } BlockType::Unsigned => { diff --git a/delorean_table/src/packers.rs b/delorean_table/src/packers.rs index b24e8958a6..f3ea6a4bd9 100644 --- a/delorean_table/src/packers.rs +++ b/delorean_table/src/packers.rs @@ -152,13 +152,13 @@ impl std::convert::From for Packers { } } -impl std::convert::From>> for Packers { - fn from(values: Vec>) -> Self { +impl std::convert::From>>> for Packers { + fn from(values: Vec>>) -> Self { // TODO(edd): convert this with an iterator? let mut as_byte_array: Vec> = Vec::with_capacity(values.len()); for v in values { match v { - Some(v) => as_byte_array.push(Some(ByteArray::from(v.as_str()))), + Some(v) => as_byte_array.push(Some(ByteArray::from(v))), None => as_byte_array.push(None), } } diff --git a/delorean_tsm/src/encoders/string.rs b/delorean_tsm/src/encoders/string.rs index af9d266e87..66c29c9d48 100644 --- a/delorean_tsm/src/encoders/string.rs +++ b/delorean_tsm/src/encoders/string.rs @@ -9,8 +9,9 @@ const HEADER_LEN: usize = 1; /// Store `i32::MAX` as a `usize` for comparing with lengths in assertions const MAX_I32: usize = i32::MAX as usize; -/// Encodes a slice of string slices into a vector of bytes. Currently uses Snappy compression. -pub fn encode>(src: &[T], dst: &mut Vec) -> Result<(), Box> { +/// Encodes a slice of byte slices representing string data into a vector of bytes. Currently uses +/// Snappy compression. +pub fn encode(src: &[&[u8]], dst: &mut Vec) -> Result<(), Box> { dst.truncate(0); // reset buffer if src.is_empty() { return Ok(()); @@ -21,7 +22,7 @@ pub fn encode>(src: &[T], dst: &mut Vec) -> Result<(), Box>(src: &[T], dst: &mut Vec) -> Result<(), Box>(src: &[T], dst: &mut Vec) -> Result<(), Box) -> Result<(), Box> { +/// Decodes a slice of bytes representing Snappy-compressed data into a vector of vectors of bytes +/// representing string data, which may or may not be valid UTF-8. +pub fn decode(src: &[u8], dst: &mut Vec>) -> Result<(), Box> { if src.is_empty() { return Ok(()); } @@ -104,7 +105,7 @@ pub fn decode(src: &[u8], dst: &mut Vec) -> Result<(), Box> { return Err("short buffer".into()); } - dst.push(std::str::from_utf8(&decoded_bytes[lower..upper])?.to_string()); + dst.push(decoded_bytes[lower..upper].to_vec()); // The length of this string plus the length of the variable byte encoded length i += length + num_bytes_read; @@ -119,7 +120,7 @@ mod tests { #[test] fn encode_no_values() { - let src: Vec<&str> = vec![]; + let src: Vec<&[u8]> = vec![]; let mut dst = vec![]; // check for error @@ -131,7 +132,8 @@ mod tests { #[test] fn encode_single() { - let src = vec!["v1"]; + let v1_bytes = b"v1"; + let src = vec![&v1_bytes[..]]; let mut dst = vec![]; encode(&src, &mut dst).expect("failed to encode src"); @@ -140,7 +142,8 @@ mod tests { #[test] fn encode_multi_compressed() { - let src: Vec<_> = (0..10).map(|i| format!("value {}", i)).collect(); + let src_strings: Vec<_> = (0..10).map(|i| format!("value {}", i)).collect(); + let src: Vec<_> = src_strings.iter().map(|s| s.as_bytes()).collect(); let mut dst = vec![]; encode(&src, &mut dst).expect("failed to encode src"); @@ -172,7 +175,12 @@ mod tests { let mut dst = vec![]; decode(&src, &mut dst).expect("failed to decode src"); - assert_eq!(dst, vec!["v1"]); + + let dst_as_strings: Vec<_> = dst + .iter() + .map(|s| std::str::from_utf8(s).unwrap()) + .collect(); + assert_eq!(dst_as_strings, vec!["v1"]); } #[test] @@ -186,7 +194,11 @@ mod tests { decode(&src, &mut dst).expect("failed to decode src"); + let dst_as_strings: Vec<_> = dst + .iter() + .map(|s| std::str::from_utf8(s).unwrap()) + .collect(); let expected: Vec<_> = (0..10).map(|i| format!("value {}", i)).collect(); - assert_eq!(dst, expected); + assert_eq!(dst_as_strings, expected); } } diff --git a/delorean_tsm/src/lib.rs b/delorean_tsm/src/lib.rs index d5044e6a3c..ba09d8dd36 100644 --- a/delorean_tsm/src/lib.rs +++ b/delorean_tsm/src/lib.rs @@ -129,7 +129,7 @@ pub enum BlockData { Float { ts: Vec, values: Vec }, Integer { ts: Vec, values: Vec }, Bool { ts: Vec, values: Vec }, - Str { ts: Vec, values: Vec }, + Str { ts: Vec, values: Vec> }, Unsigned { ts: Vec, values: Vec }, } diff --git a/delorean_tsm/src/mapper.rs b/delorean_tsm/src/mapper.rs index 7577bb04f4..b37aa904a6 100644 --- a/delorean_tsm/src/mapper.rs +++ b/delorean_tsm/src/mapper.rs @@ -15,7 +15,7 @@ use std::iter::Peekable; /// The main purpose of the `TSMMeasurementMapper` is to provide a /// transformation step that allows one to convert per-series/per-field data /// into measurement-oriented table data. -/// +/// #[derive(Debug)] pub struct TSMMeasurementMapper where @@ -199,7 +199,7 @@ pub enum ColumnData { Float(Vec>), Integer(Vec>), Bool(Vec>), - Str(Vec>), + Str(Vec>>), Unsigned(Vec>), } @@ -209,7 +209,7 @@ enum ValuePair { F64((i64, f64)), I64((i64, i64)), Bool((i64, bool)), - Str((i64, String)), + Str((i64, Vec)), U64((i64, u64)), } diff --git a/src/storage/block.rs b/src/storage/block.rs index 634deecde6..b7cfdcd6e2 100644 --- a/src/storage/block.rs +++ b/src/storage/block.rs @@ -240,7 +240,8 @@ impl Encoder for Vec { impl Encoder for Vec<&str> { fn encode(&self, dst: &mut Vec) -> Result<(), StorageError> { - string::encode(&self, dst).map_err(|e| StorageError { + let bytes: Vec<_> = self.iter().map(|s| s.as_bytes()).collect(); + string::encode(&bytes, dst).map_err(|e| StorageError { description: e.to_string(), }) }