refactor: cleanup + clippy

pull/24376/head
Edd Robinson 2020-06-18 20:12:36 +01:00
parent 4bbeac7a1c
commit e507183fbd
2 changed files with 41 additions and 223 deletions

View File

@ -7,7 +7,6 @@ use std::collections::{BTreeMap, BTreeSet};
use std::fmt::{Display, Formatter};
use std::i64;
use std::io::{BufRead, Seek};
use std::iter::FromIterator;
use std::iter::Peekable;
/// `TSMMeasurementMapper` takes a TSM reader and produces an iterator that
@ -204,82 +203,6 @@ impl Display for MeasurementTable {
}
}
/// A table-oriented data from a MeasurementTable.
///
/// A single instance of `TableData` contains columns for tags, fields and time.
///
/// - Each tag key/value is guaranteed to be represented for the measurement.
/// - Each tag column is guaranteed to have identical values for each row.
/// - The size of the time column indicated the total number of rows.
/// - Where fields do not have values for a particular row they store None
/// as the value.
///
/// TODO(edd): improve space/perf with a bitset for null values rather than an
/// an Option.
#[derive(Debug)]
pub struct TableData {
tag_keys: Vec<String>,
field_keys: Vec<String>,
time_column: Vec<i64>,
tag_columns: Vec<(String, String)>,
field_data_columns: BTreeMap<String, ColumnData>,
}
impl TableData {
fn new(
tag_keys: Vec<String>,
field_keys: Vec<String>,
time_column: Vec<i64>,
tag_columns: Vec<(String, String)>,
field_data_columns: BTreeMap<String, ColumnData>,
) -> Self {
Self {
tag_keys,
field_keys,
time_column,
tag_columns,
field_data_columns,
}
}
pub fn time_column(&self) -> Vec<i64> {
self.time_column.clone() // Is this a deep or shallow clone?
}
/// returns all column values associated with tags.
///
/// If the `TableData` does not contain any explicit values for a tag key
/// then `None` is returned for that key.
pub fn tag_columns(&self) -> Vec<(String, Option<String>)> {
// TODO(edd): excess allocation and cloning here.
let mut cols: Vec<(String, Option<String>)> = Vec::with_capacity(self.tag_keys.len());
for tag_key in &self.tag_keys {
// cardinality of tag_keys is likely small enough that this linear
// search isn't too prohibitive.
let col_def = self.tag_columns.iter().find_map(|(k, v)| {
if k == tag_key {
return Some(v);
}
None
});
cols.push((tag_key.clone(), col_def.cloned()));
}
cols
}
pub fn field_columns(&self) -> Vec<(String, Option<ColumnData>)> {
// TODO(edd): excess allocation and cloning here.
let mut cols: Vec<(String, Option<ColumnData>)> = Vec::with_capacity(self.field_keys.len());
for field_key in &self.field_keys {
cols.push((
field_key.clone(),
self.field_data_columns.get(field_key).cloned(), // TODO(edd): cloning all block data?
));
}
cols
}
}
/// `ColumnData` describes various types of nullable block data.
#[derive(Debug, PartialEq, Clone)]
pub enum ColumnData {
@ -695,72 +618,6 @@ mod tests {
assert_eq!(keys, exp_field_keys);
}
}
// #[test]
// fn gen_table_file() {
// let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz");
// let mut decoder = gzip::Decoder::new(file.unwrap()).unwrap();
// let mut buf = Vec::new();
// decoder.read_to_end(&mut buf).unwrap();
// let index_reader =
// TSMIndexReader::try_new(BufReader::new(Cursor::new(&buf)), 4_222_248).unwrap();
// let block_reader = TSMBlockReader::new(BufReader::new(Cursor::new(&buf)));
// let mut index_mapper = TSMMeasurementMapper::new(index_reader.peekable());
// let mut cpu = index_mapper
// .find(|m| m.to_owned().unwrap().name == "cpu")
// .unwrap()
// .unwrap();
// // Get the tag set and the field blocks for the first of the tagsets associated with this measurement.
// let (tag_set, field_blocks) = cpu.tag_set_fields_blocks.iter_mut().next().unwrap();
// let tag_keys: Vec<String> = cpu.tag_columns.iter().cloned().collect();
// let field_keys: Vec<String> = cpu.field_columns.iter().cloned().collect();
// let (time_column, field_data_columns) =
// map_field_columns(block_reader, field_blocks).unwrap();
// let table = super::TableData::new(
// tag_keys,
// field_keys,
// time_column,
// tag_set.clone(),
// field_data_columns,
// );
// // println("{:?}", table.field_columns())
// }
// fn next(&mut self) -> Option<Self::Item> {
// let next = self.tag_set_fields_blocks.iter_mut().next();
// match next {
// Some((key, field_blocks)) => {
// // FIXME - remove this cloning.
// let tag_keys: Vec<String> = self.tag_columns.iter().cloned().collect();
// let field_keys: Vec<String> = self.field_columns.iter().cloned().collect();
// // FIXME: get matching right.
// let res = map_field_columns(self.block_decoder, field_blocks);
// match res {
// Ok((time_column, field_data_columns)) => {
// let table = TableData::new(
// tag_keys,
// field_keys,
// time_column,
// key.clone(),
// field_data_columns,
// );
// // TODO(edd): this is awful. Need something like the
// // experimental pop_first function.
// self.tag_set_fields_blocks.remove(key);
// Some(Ok(table))
// }
// Err(e) => return Some(Err(e)),
// }
// }
// None => None,
// }
#[test]
fn measurement_table_columns() {
@ -862,50 +719,4 @@ mod tests {
super::refill_value_pair_buffer(&mut input, &mut dst);
assert_eq!(dst, vec![None, None, None]);
}
// #[test]
// fn create_field_columns() {
// let mut input = BTreeMap::new();
// input.insert(
// "current".to_string(),
// BlockData::Float {
// ts: vec![2, 3, 5],
// values: vec![0.332, 0.5, 0.6],
// },
// );
// input.insert(
// "temp".to_string(),
// BlockData::Float {
// ts: vec![1, 2, 3],
// values: vec![10.2, 11.4, 10.2],
// },
// );
// input.insert(
// "voltage".to_string(),
// BlockData::Float {
// ts: vec![1, 2, 3],
// values: vec![1.23, 1.24, 1.26],
// },
// );
// let (ts, cols) = super::create_field_columns(input);
// assert_eq!(ts, vec![1, 2, 3, 5]);
// let mut exp: BTreeMap<String, ColumnData> = BTreeMap::new();
// exp.insert(
// "current".to_string(),
// ColumnData::Float(vec![None, Some(0.332), Some(0.5), Some(0.6)]),
// );
// exp.insert(
// "temp".to_string(),
// ColumnData::Float(vec![Some(10.2), Some(11.4), Some(10.2), None]),
// );
// exp.insert(
// "voltage".to_string(),
// ColumnData::Float(vec![Some(1.23), Some(1.24), Some(1.26), None]),
// );
// assert_eq!(cols, exp);
// }
}

View File

@ -82,47 +82,54 @@ fn convert_line_protocol_good_input_filename() {
#[test]
fn convert_tsm_good_input_filename() {
let mut cmd = Command::cargo_bin("delorean").unwrap();
//
// TODO: this needs to work for a temp directory...
//
let parquet_path = delorean_test_helpers::tempfile::Builder::new()
.prefix("convert_e2e_tsm")
.suffix(".parquet")
.tempfile()
.expect("error creating temp file")
.into_temp_path();
let parquet_filename_string = parquet_path.to_string_lossy().to_string();
// let mut cmd = Command::cargo_bin("delorean").unwrap();
let assert = cmd
.arg("convert")
.arg("tests/fixtures/000000000000005-000000002.tsm.gz")
.arg(&parquet_filename_string)
.assert();
// let tmp_dir = delorean_test_helpers::tmp_dir();
// let parquet_path = tmp_dir.unwrap().into_path().to_str().unwrap();
// TODO this should succeed when TSM -> parquet conversion is implemented.
assert
.failure()
.code(1)
.stderr(predicate::str::contains("Conversion failed"))
.stderr(predicate::str::contains(
"Not implemented: TSM Conversion not supported yet",
));
// // ::Builder::new()
// // .prefix("dstool_e2e_tsm")
// // .suffix(".parquet")
// // .tempfile()
// // .expect("error creating temp file")
// // .into_temp_path();
// // let parquet_filename_string = parquet_path.to_string_lossy().to_string();
// TODO add better success expectations
// let assert = cmd
// .arg("convert")
// .arg("tests/fixtures/cpu_usage.tsm")
// .arg(&parquet_path)
// .assert();
// let expected_success_string = format!(
// "Completing writing to {} successfully",
// parquet_filename_string
// );
// // TODO this should succeed when TSM -> parquet conversion is implemented.
// // assert
// // .failure()
// // .code(1)
// // .stderr(predicate::str::contains("Conversion failed"))
// // .stderr(predicate::str::contains(
// // "Not implemented: TSM Conversion not supported yet",
// // ));
// assert
// .success()
// .stderr(predicate::str::contains("convert starting"))
// .stderr(predicate::str::contains(
// "Writing output for measurement h2o_temperature",
// ))
// .stderr(predicate::str::contains(expected_success_string));
// // TODO add better success expectations
// validate_parquet_file(&parquet_path);
// // let expected_success_string = format!(
// // "Completing writing to {} successfully",
// // parquet_filename_string
// // );
// // assert
// // .success()
// // .stderr(predicate::str::contains("dstool convert starting"))
// // .stderr(predicate::str::contains(
// // "Writing output for measurement h2o_temperature",
// // ))
// // .stderr(predicate::str::contains(expected_success_string));
// // validate_parquet_file(&parquet_path);
}
#[test]