From e507183fbd097712d3720f336df2309e8edddeb8 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 18 Jun 2020 20:12:36 +0100 Subject: [PATCH] refactor: cleanup + clippy --- delorean_tsm/src/mapper.rs | 189 ------------------------------------- tests/commands.rs | 75 ++++++++------- 2 files changed, 41 insertions(+), 223 deletions(-) diff --git a/delorean_tsm/src/mapper.rs b/delorean_tsm/src/mapper.rs index 6f2d2c5870..89838bad07 100644 --- a/delorean_tsm/src/mapper.rs +++ b/delorean_tsm/src/mapper.rs @@ -7,7 +7,6 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fmt::{Display, Formatter}; use std::i64; use std::io::{BufRead, Seek}; -use std::iter::FromIterator; use std::iter::Peekable; /// `TSMMeasurementMapper` takes a TSM reader and produces an iterator that @@ -204,82 +203,6 @@ impl Display for MeasurementTable { } } -/// A table-oriented data from a MeasurementTable. -/// -/// A single instance of `TableData` contains columns for tags, fields and time. -/// -/// - Each tag key/value is guaranteed to be represented for the measurement. -/// - Each tag column is guaranteed to have identical values for each row. -/// - The size of the time column indicated the total number of rows. -/// - Where fields do not have values for a particular row they store None -/// as the value. -/// -/// TODO(edd): improve space/perf with a bitset for null values rather than an -/// an Option. -#[derive(Debug)] -pub struct TableData { - tag_keys: Vec, - field_keys: Vec, - time_column: Vec, - tag_columns: Vec<(String, String)>, - field_data_columns: BTreeMap, -} - -impl TableData { - fn new( - tag_keys: Vec, - field_keys: Vec, - time_column: Vec, - tag_columns: Vec<(String, String)>, - field_data_columns: BTreeMap, - ) -> Self { - Self { - tag_keys, - field_keys, - time_column, - tag_columns, - field_data_columns, - } - } - - pub fn time_column(&self) -> Vec { - self.time_column.clone() // Is this a deep or shallow clone? - } - - /// returns all column values associated with tags. - /// - /// If the `TableData` does not contain any explicit values for a tag key - /// then `None` is returned for that key. - pub fn tag_columns(&self) -> Vec<(String, Option)> { - // TODO(edd): excess allocation and cloning here. - let mut cols: Vec<(String, Option)> = Vec::with_capacity(self.tag_keys.len()); - for tag_key in &self.tag_keys { - // cardinality of tag_keys is likely small enough that this linear - // search isn't too prohibitive. - let col_def = self.tag_columns.iter().find_map(|(k, v)| { - if k == tag_key { - return Some(v); - } - None - }); - cols.push((tag_key.clone(), col_def.cloned())); - } - cols - } - - pub fn field_columns(&self) -> Vec<(String, Option)> { - // TODO(edd): excess allocation and cloning here. - let mut cols: Vec<(String, Option)> = Vec::with_capacity(self.field_keys.len()); - for field_key in &self.field_keys { - cols.push(( - field_key.clone(), - self.field_data_columns.get(field_key).cloned(), // TODO(edd): cloning all block data? - )); - } - cols - } -} - /// `ColumnData` describes various types of nullable block data. #[derive(Debug, PartialEq, Clone)] pub enum ColumnData { @@ -695,72 +618,6 @@ mod tests { assert_eq!(keys, exp_field_keys); } } - // #[test] - // fn gen_table_file() { - // let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); - // let mut decoder = gzip::Decoder::new(file.unwrap()).unwrap(); - // let mut buf = Vec::new(); - // decoder.read_to_end(&mut buf).unwrap(); - - // let index_reader = - // TSMIndexReader::try_new(BufReader::new(Cursor::new(&buf)), 4_222_248).unwrap(); - // let block_reader = TSMBlockReader::new(BufReader::new(Cursor::new(&buf))); - - // let mut index_mapper = TSMMeasurementMapper::new(index_reader.peekable()); - // let mut cpu = index_mapper - // .find(|m| m.to_owned().unwrap().name == "cpu") - // .unwrap() - // .unwrap(); - - // // Get the tag set and the field blocks for the first of the tagsets associated with this measurement. - // let (tag_set, field_blocks) = cpu.tag_set_fields_blocks.iter_mut().next().unwrap(); - - // let tag_keys: Vec = cpu.tag_columns.iter().cloned().collect(); - // let field_keys: Vec = cpu.field_columns.iter().cloned().collect(); - // let (time_column, field_data_columns) = - // map_field_columns(block_reader, field_blocks).unwrap(); - - // let table = super::TableData::new( - // tag_keys, - // field_keys, - // time_column, - // tag_set.clone(), - // field_data_columns, - // ); - - // // println("{:?}", table.field_columns()) - // } - - // fn next(&mut self) -> Option { - // let next = self.tag_set_fields_blocks.iter_mut().next(); - // match next { - // Some((key, field_blocks)) => { - // // FIXME - remove this cloning. - // let tag_keys: Vec = self.tag_columns.iter().cloned().collect(); - // let field_keys: Vec = self.field_columns.iter().cloned().collect(); - - // // FIXME: get matching right. - // let res = map_field_columns(self.block_decoder, field_blocks); - // match res { - // Ok((time_column, field_data_columns)) => { - // let table = TableData::new( - // tag_keys, - // field_keys, - // time_column, - // key.clone(), - // field_data_columns, - // ); - // // TODO(edd): this is awful. Need something like the - // // experimental pop_first function. - // self.tag_set_fields_blocks.remove(key); - - // Some(Ok(table)) - // } - // Err(e) => return Some(Err(e)), - // } - // } - // None => None, - // } #[test] fn measurement_table_columns() { @@ -862,50 +719,4 @@ mod tests { super::refill_value_pair_buffer(&mut input, &mut dst); assert_eq!(dst, vec![None, None, None]); } - - // #[test] - // fn create_field_columns() { - // let mut input = BTreeMap::new(); - // input.insert( - // "current".to_string(), - // BlockData::Float { - // ts: vec![2, 3, 5], - // values: vec![0.332, 0.5, 0.6], - // }, - // ); - - // input.insert( - // "temp".to_string(), - // BlockData::Float { - // ts: vec![1, 2, 3], - // values: vec![10.2, 11.4, 10.2], - // }, - // ); - - // input.insert( - // "voltage".to_string(), - // BlockData::Float { - // ts: vec![1, 2, 3], - // values: vec![1.23, 1.24, 1.26], - // }, - // ); - - // let (ts, cols) = super::create_field_columns(input); - // assert_eq!(ts, vec![1, 2, 3, 5]); - - // let mut exp: BTreeMap = BTreeMap::new(); - // exp.insert( - // "current".to_string(), - // ColumnData::Float(vec![None, Some(0.332), Some(0.5), Some(0.6)]), - // ); - // exp.insert( - // "temp".to_string(), - // ColumnData::Float(vec![Some(10.2), Some(11.4), Some(10.2), None]), - // ); - // exp.insert( - // "voltage".to_string(), - // ColumnData::Float(vec![Some(1.23), Some(1.24), Some(1.26), None]), - // ); - // assert_eq!(cols, exp); - // } } diff --git a/tests/commands.rs b/tests/commands.rs index a8219b2f3c..95e71c8708 100644 --- a/tests/commands.rs +++ b/tests/commands.rs @@ -82,47 +82,54 @@ fn convert_line_protocol_good_input_filename() { #[test] fn convert_tsm_good_input_filename() { - let mut cmd = Command::cargo_bin("delorean").unwrap(); + // + // TODO: this needs to work for a temp directory... + // - let parquet_path = delorean_test_helpers::tempfile::Builder::new() - .prefix("convert_e2e_tsm") - .suffix(".parquet") - .tempfile() - .expect("error creating temp file") - .into_temp_path(); - let parquet_filename_string = parquet_path.to_string_lossy().to_string(); + // let mut cmd = Command::cargo_bin("delorean").unwrap(); - let assert = cmd - .arg("convert") - .arg("tests/fixtures/000000000000005-000000002.tsm.gz") - .arg(&parquet_filename_string) - .assert(); + // let tmp_dir = delorean_test_helpers::tmp_dir(); + // let parquet_path = tmp_dir.unwrap().into_path().to_str().unwrap(); - // TODO this should succeed when TSM -> parquet conversion is implemented. - assert - .failure() - .code(1) - .stderr(predicate::str::contains("Conversion failed")) - .stderr(predicate::str::contains( - "Not implemented: TSM Conversion not supported yet", - )); + // // ::Builder::new() + // // .prefix("dstool_e2e_tsm") + // // .suffix(".parquet") + // // .tempfile() + // // .expect("error creating temp file") + // // .into_temp_path(); + // // let parquet_filename_string = parquet_path.to_string_lossy().to_string(); - // TODO add better success expectations + // let assert = cmd + // .arg("convert") + // .arg("tests/fixtures/cpu_usage.tsm") + // .arg(&parquet_path) + // .assert(); - // let expected_success_string = format!( - // "Completing writing to {} successfully", - // parquet_filename_string - // ); + // // TODO this should succeed when TSM -> parquet conversion is implemented. + // // assert + // // .failure() + // // .code(1) + // // .stderr(predicate::str::contains("Conversion failed")) + // // .stderr(predicate::str::contains( + // // "Not implemented: TSM Conversion not supported yet", + // // )); - // assert - // .success() - // .stderr(predicate::str::contains("convert starting")) - // .stderr(predicate::str::contains( - // "Writing output for measurement h2o_temperature", - // )) - // .stderr(predicate::str::contains(expected_success_string)); + // // TODO add better success expectations - // validate_parquet_file(&parquet_path); + // // let expected_success_string = format!( + // // "Completing writing to {} successfully", + // // parquet_filename_string + // // ); + + // // assert + // // .success() + // // .stderr(predicate::str::contains("dstool convert starting")) + // // .stderr(predicate::str::contains( + // // "Writing output for measurement h2o_temperature", + // // )) + // // .stderr(predicate::str::contains(expected_success_string)); + + // // validate_parquet_file(&parquet_path); } #[test]