test: exercise TSM converter

pull/24376/head
Edd Robinson 2020-06-29 18:21:20 +01:00
parent 414029b96d
commit 55bf2a44be
4 changed files with 288 additions and 11 deletions

View File

@ -589,11 +589,9 @@ impl TSMFileConverter {
.map(|c| (c.name.clone(), c.index as usize)) .map(|c| (c.name.clone(), c.index as usize))
.collect::<BTreeMap<String, usize>>(); .collect::<BTreeMap<String, usize>>();
// For each tagset combination in the measurement I need // For each tagset combination in the measurement build out a table.
// to build out the table. Then for each column in the // Then, for each column in that table convert it to a Packer<T> and
// table I need to convert to a Packer<T> and append it // append it to the relevant packer_column.
// to the packer_column.
for (i, (tag_set_pair, blocks)) in m.tag_set_fields_blocks().iter_mut().enumerate() { for (i, (tag_set_pair, blocks)) in m.tag_set_fields_blocks().iter_mut().enumerate() {
let (ts, field_cols) = let (ts, field_cols) =
map_field_columns(&mut block_reader, blocks).context(TSMProcessing)?; map_field_columns(&mut block_reader, blocks).context(TSMProcessing)?;
@ -780,7 +778,9 @@ impl std::fmt::Debug for TSMFileConverter {
#[cfg(test)] #[cfg(test)]
mod delorean_ingest_tests { mod delorean_ingest_tests {
use super::*; use super::*;
use delorean_table::{DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError}; use delorean_table::{
DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError, Packers,
};
use delorean_table_schema::ColumnDefinition; use delorean_table_schema::ColumnDefinition;
use delorean_test_helpers::approximately_equal; use delorean_test_helpers::approximately_equal;
@ -1444,7 +1444,284 @@ mod delorean_ingest_tests {
// ----- Tests for TSM Data ----- // ----- Tests for TSM Data -----
// TODO(edd): create a smaller TSM file for this test... // MockBlockDecoder implements the BlockDecoder trait. It uses the `min_time`
// value in a provided `Block` definition as a key to a map of block data,
// which should be provided on initialisation.
struct MockBlockDecoder {
blocks: BTreeMap<i64, delorean_tsm::BlockData>,
}
impl BlockDecoder for MockBlockDecoder {
fn decode(
&mut self,
block: &delorean_tsm::Block,
) -> std::result::Result<delorean_tsm::BlockData, delorean_tsm::TSMError> {
self.blocks
.get(&block.min_time)
.cloned()
.ok_or(delorean_tsm::TSMError {
description: "block not found".to_string(),
})
}
}
#[test]
fn process_measurement_table() -> Result<(), Error> {
use delorean_tsm::{Block, BlockData};
// Input data - in line protocol format
//
// cpu,region=east temp=1.2 0
// cpu,region=east voltage=10.2 0
//
// cpu,region=east temp=1.2 1
// cpu,region=east voltage=10.2 1000
//
// cpu,region=east temp=1.4 2000
// cpu,region=east voltage=10.4 2000
// cpu,region=west,server=a temp=100.2 2000
//
// cpu,az=b watts=1000 3000
// cpu,region=west,server=a temp=99.5 3000
//
// cpu,az=b watts=2000 4000
// cpu,region=west,server=a temp=100.3 4000
//
// cpu,az=b watts=3000 5000
// Expected output table
//
// | az | region | server | temp | voltage | watts | time |
// |------|--------|--------|-------|---------|---------|------|
// | b | NULL | NULL | NULL | NULL | 1000 | 3000 |
// | b | NULL | NULL | NULL | NULL | 2000 | 4000 |
// | b | NULL | NULL | NULL | NULL | 3000 | 5000 |
// | NULL | east | NULL | 1.2 | 10.2 | NULL | 0000 | <-- notice series joined on ts column
// | NULL | east | NULL | 1.2 | 10.2 | NULL | 1000 | <-- notice series joined on ts column
// | NULL | east | NULL | 1.4 | 10.4 | NULL | 2000 | <-- notice series joined on ts column
// | NULL | west | a | 100.2 | NULL | NULL | 2000 |
// | NULL | west | a | 99.5 | NULL | NULL | 3000 |
// | NULL | west | a | 100.3 | NULL | NULL | 4000 |
let mut table = MeasurementTable::new("cpu".to_string());
// cpu region=east temp=<all the block data for this key>
table
.add_series_data(
vec![("region".to_string(), "east".to_string())],
"temp".to_string(),
BlockType::Float,
Block {
min_time: 0,
max_time: 0,
offset: 0,
size: 0,
},
)
.map_err(|e| Error::TSMProcessing { source: e })?;
// cpu region=east,server=a temp=<all the block data for this key>
table
.add_series_data(
vec![("region".to_string(), "east".to_string())],
"voltage".to_string(),
BlockType::Float,
Block {
min_time: 1,
max_time: 0,
offset: 0,
size: 0,
},
)
.map_err(|e| Error::TSMProcessing { source: e })?;
// cpu region=west,server=a temp=<all the block data for this key>
table
.add_series_data(
vec![
("region".to_string(), "west".to_string()),
("server".to_string(), "a".to_string()),
],
"temp".to_string(),
BlockType::Float,
Block {
min_time: 2,
max_time: 0,
offset: 0,
size: 0,
},
)
.map_err(|e| Error::TSMProcessing { source: e })?;
// cpu az=b voltage=<all the block data for this key>
table
.add_series_data(
vec![("az".to_string(), "b".to_string())],
"watts".to_string(),
BlockType::Unsigned,
Block {
min_time: 3,
max_time: 0,
offset: 0,
size: 0,
},
)
.map_err(|e| Error::TSMProcessing { source: e })?;
let mut block_map = BTreeMap::new();
block_map.insert(
0,
BlockData::Float {
ts: vec![0, 1000, 2000],
values: vec![1.2, 1.2, 1.4],
},
);
block_map.insert(
1,
BlockData::Float {
ts: vec![0, 1000, 2000],
values: vec![10.2, 10.2, 10.4],
},
);
block_map.insert(
2,
BlockData::Float {
ts: vec![2000, 3000, 4000],
values: vec![100.2, 99.5, 100.3],
},
);
block_map.insert(
3,
BlockData::Unsigned {
ts: vec![3000, 4000, 5000],
values: vec![1000, 2000, 3000],
},
);
let decoder = MockBlockDecoder { blocks: block_map };
let (schema, packers) =
super::TSMFileConverter::process_measurement_table(decoder, &mut table)?;
let expected_defs = vec![
ColumnDefinition::new("az", 0, DataType::String),
ColumnDefinition::new("region", 1, DataType::String),
ColumnDefinition::new("server", 2, DataType::String),
ColumnDefinition::new("temp", 3, DataType::Float),
ColumnDefinition::new("voltage", 4, DataType::Float),
ColumnDefinition::new("watts", 5, DataType::Integer),
ColumnDefinition::new("time", 6, DataType::Timestamp),
];
assert_eq!(schema.get_col_defs(), expected_defs);
// az column
assert_eq!(
packers[0],
Packers::String(Packer::from(vec![
Some(ByteArray::from("b")),
Some(ByteArray::from("b")),
Some(ByteArray::from("b")),
None,
None,
None,
None,
None,
None,
]))
);
// region column
assert_eq!(
packers[1],
Packers::String(Packer::from(vec![
None,
None,
None,
Some(ByteArray::from("east")),
Some(ByteArray::from("east")),
Some(ByteArray::from("east")),
Some(ByteArray::from("west")),
Some(ByteArray::from("west")),
Some(ByteArray::from("west")),
]))
);
// server column
assert_eq!(
packers[2],
Packers::String(Packer::from(vec![
None,
None,
None,
None,
None,
None,
Some(ByteArray::from("a")),
Some(ByteArray::from("a")),
Some(ByteArray::from("a")),
]))
);
// temp column
assert_eq!(
packers[3],
Packers::Float(Packer::from(vec![
None,
None,
None,
Some(1.2),
Some(1.2),
Some(1.4),
Some(100.2),
Some(99.5),
Some(100.3),
]))
);
// voltage column
assert_eq!(
packers[4],
Packers::Float(Packer::from(vec![
None,
None,
None,
Some(10.2),
Some(10.2),
Some(10.4),
None,
None,
None,
]))
);
// watts column
assert_eq!(
packers[5],
Packers::Integer(Packer::from(vec![
Some(1000),
Some(2000),
Some(3000),
None,
None,
None,
None,
None,
None,
]))
);
// timestamp column
assert_eq!(
packers[6],
Packers::Integer(Packer::from(vec![
Some(3),
Some(4),
Some(5),
Some(0),
Some(1),
Some(2),
Some(2),
Some(3),
Some(4),
]))
);
Ok(())
}
#[test] #[test]
fn conversion_tsm_files() -> Result<(), Error> { fn conversion_tsm_files() -> Result<(), Error> {
let file = File::open("../tests/fixtures/000000000000462-000000002.tsm.gz"); let file = File::open("../tests/fixtures/000000000000462-000000002.tsm.gz");

View File

@ -13,7 +13,7 @@ use std::default::Default;
// NOTE: See https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet.html // NOTE: See https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet.html
// for an explanation of nesting levels // for an explanation of nesting levels
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum Packers { pub enum Packers {
Float(Packer<f64>), Float(Packer<f64>),
Integer(Packer<i64>), Integer(Packer<i64>),
@ -194,7 +194,7 @@ impl std::convert::From<Vec<Option<Vec<u8>>>> for Packers {
} }
} }
#[derive(Debug, Default)] #[derive(Debug, Default, PartialEq)]
pub struct Packer<T> pub struct Packer<T>
where where
T: Default + Clone, T: Default + Clone,

View File

@ -133,7 +133,7 @@ const MAX_BLOCK_VALUES: usize = 1000;
/// `BlockData` describes the various types of block data that can be held within /// `BlockData` describes the various types of block data that can be held within
/// a TSM file. /// a TSM file.
#[derive(Debug)] #[derive(Debug, Clone)]
pub enum BlockData { pub enum BlockData {
Float { ts: Vec<i64>, values: Vec<f64> }, Float { ts: Vec<i64>, values: Vec<f64> },
Integer { ts: Vec<i64>, values: Vec<i64> }, Integer { ts: Vec<i64>, values: Vec<i64> },

View File

@ -142,7 +142,7 @@ impl MeasurementTable {
} }
// updates the table with data from a single TSM index entry's block. // updates the table with data from a single TSM index entry's block.
fn add_series_data( pub fn add_series_data(
&mut self, &mut self,
tagset: Vec<(String, String)>, tagset: Vec<(String, String)>,
field_key: String, field_key: String,