From 85e0b4ec165640fc4db18745e44afd91e4f41eb6 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 17 Jun 2020 09:49:21 +0100 Subject: [PATCH] refactor: hoist tsm reader into own crate --- Cargo.lock | 12 ++++ Cargo.toml | 4 +- delorean_tsm/Cargo.toml | 16 +++++ {src => delorean_tsm/src}/encoders.rs | 0 {src => delorean_tsm/src}/encoders/float.rs | 0 {src => delorean_tsm/src}/encoders/integer.rs | 0 .../src}/encoders/simple8b.rs | 0 .../src}/encoders/timestamp.rs | 0 src/storage/tsm.rs => delorean_tsm/src/lib.rs | 65 ++++++++++++++++--- src/lib.rs | 1 - src/storage.rs | 1 - src/storage/block.rs | 2 +- 12 files changed, 87 insertions(+), 14 deletions(-) create mode 100644 delorean_tsm/Cargo.toml rename {src => delorean_tsm/src}/encoders.rs (100%) rename {src => delorean_tsm/src}/encoders/float.rs (100%) rename {src => delorean_tsm/src}/encoders/integer.rs (100%) rename {src => delorean_tsm/src}/encoders/simple8b.rs (100%) rename {src => delorean_tsm/src}/encoders/timestamp.rs (100%) rename src/storage/tsm.rs => delorean_tsm/src/lib.rs (92%) diff --git a/Cargo.lock b/Cargo.lock index 78a26eed9c..f5f5b5855a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,6 +634,7 @@ dependencies = [ "delorean_table", "delorean_table_schema", "delorean_test_helpers", + "delorean_tsm", "delorean_wal", "dirs 2.0.2", "dotenv", @@ -755,6 +756,17 @@ dependencies = [ "tempfile", ] +[[package]] +name = "delorean_tsm" +version = "0.1.0" +dependencies = [ + "delorean_test_helpers", + "hex", + "integer-encoding", + "libflate", + "rand 0.7.3", +] + [[package]] name = "delorean_utilities" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 3b632fd006..a22b182ad6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ default-run = "delorean" [workspace] members = [ + "delorean_generated_types", "delorean_ingest", "delorean_line_parser", "delorean_object_store", @@ -14,7 +15,7 @@ members = [ "delorean_table", "delorean_table_schema", "delorean_test_helpers", - "delorean_generated_types", + "delorean_tsm", "delorean_utilities", "delorean_wal", ] @@ -31,6 +32,7 @@ delorean_table = { path = "delorean_table" } delorean_table_schema = { path = "delorean_table_schema" } delorean_wal = { path = "delorean_wal" } delorean_object_store = { path = "delorean_object_store" } +delorean_tsm = { path = "delorean_tsm" } bytes = "0.5.4" integer-encoding = "1.0.7" diff --git a/delorean_tsm/Cargo.toml b/delorean_tsm/Cargo.toml new file mode 100644 index 0000000000..9ce1c4b1d5 --- /dev/null +++ b/delorean_tsm/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "delorean_tsm" +version = "0.1.0" +authors = ["Edd Robinson "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +integer-encoding = "1.0.7" + +[dev-dependencies] +hex = "0.4.2" +libflate = "1.0.0" +rand = "0.7.2" +delorean_test_helpers = { path = "../delorean_test_helpers" } diff --git a/src/encoders.rs b/delorean_tsm/src/encoders.rs similarity index 100% rename from src/encoders.rs rename to delorean_tsm/src/encoders.rs diff --git a/src/encoders/float.rs b/delorean_tsm/src/encoders/float.rs similarity index 100% rename from src/encoders/float.rs rename to delorean_tsm/src/encoders/float.rs diff --git a/src/encoders/integer.rs b/delorean_tsm/src/encoders/integer.rs similarity index 100% rename from src/encoders/integer.rs rename to delorean_tsm/src/encoders/integer.rs diff --git a/src/encoders/simple8b.rs b/delorean_tsm/src/encoders/simple8b.rs similarity index 100% rename from src/encoders/simple8b.rs rename to delorean_tsm/src/encoders/simple8b.rs diff --git a/src/encoders/timestamp.rs b/delorean_tsm/src/encoders/timestamp.rs similarity index 100% rename from src/encoders/timestamp.rs rename to delorean_tsm/src/encoders/timestamp.rs diff --git a/src/storage/tsm.rs b/delorean_tsm/src/lib.rs similarity index 92% rename from src/storage/tsm.rs rename to delorean_tsm/src/lib.rs index f31f888d6b..6afff835fb 100644 --- a/src/storage/tsm.rs +++ b/delorean_tsm/src/lib.rs @@ -1,9 +1,14 @@ //! Types for reading and writing TSM files produced by InfluxDB >= 2.x -use crate::encoders::*; -use crate::storage::block::*; -use crate::storage::StorageError; +pub mod encoders; + +// use crate::storage::block::*; +use encoders::*; use integer_encoding::VarInt; +use std::convert::TryFrom; +use std::error; +use std::fmt; +use std::io; use std::io::{BufRead, Seek, SeekFrom}; use std::u64; @@ -14,13 +19,13 @@ use std::u64; /// Iterating over the TSM index. /// /// ``` -/// # use delorean::storage::tsm::*; +/// # use delorean_tsm::*; /// # use libflate::gzip; /// # use std::fs::File; /// # use std::io::BufReader; /// # use std::io::Cursor; /// # use std::io::Read; -/// # let file = File::open("tests/fixtures/000000000000005-000000002.tsm.gz"); +/// # let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); /// # let mut decoder = gzip::Decoder::new(file.unwrap()).unwrap(); /// # let mut buf = Vec::new(); /// # decoder.read_to_end(&mut buf).unwrap(); @@ -300,6 +305,12 @@ fn parse_tsm_key(mut key: Vec) -> Result { }) } +pub const F64_BLOCKTYPE_MARKER: u8 = 0; +pub const I64_BLOCKTYPE_MARKER: u8 = 1; +pub const BOOL_BLOCKTYPE_MARKER: u8 = 2; +pub const STRING_BLOCKTYPE_MARKER: u8 = 3; +pub const U64_BLOCKTYPE_MARKER: u8 = 4; + /// `TSMBlockReader` allows you to read and decode TSM blocks from within a TSM /// file. /// @@ -442,6 +453,40 @@ impl std::fmt::Display for InfluxID { } } +#[derive(Debug, Clone)] +pub struct StorageError { + pub description: String, +} + +impl fmt::Display for StorageError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.description) + } +} + +impl error::Error for StorageError { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + // Generic error, underlying cause isn't tracked. + None + } +} + +impl From for StorageError { + fn from(e: io::Error) -> Self { + Self { + description: format!("TODO - io error: {} ({:?})", e, e), + } + } +} + +impl From for StorageError { + fn from(e: std::str::Utf8Error) -> Self { + Self { + description: format!("TODO - utf8 error: {} ({:?})", e, e), + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -454,7 +499,7 @@ mod tests { #[test] fn read_tsm_index() { - let file = File::open("tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = gzip::Decoder::new(file.unwrap()).unwrap(); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -467,7 +512,7 @@ mod tests { #[test] fn read_tsm_block() { - let file = File::open("tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = gzip::Decoder::new(file.unwrap()).unwrap(); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -521,7 +566,7 @@ mod tests { #[test] fn decode_tsm_blocks() { - let file = File::open("tests/fixtures/000000000000005-000000002.tsm.gz"); + let file = File::open("../tests/fixtures/000000000000005-000000002.tsm.gz"); let mut decoder = gzip::Decoder::new(file.unwrap()).unwrap(); let mut buf = Vec::new(); decoder.read_to_end(&mut buf).unwrap(); @@ -650,11 +695,11 @@ mod tests { #[test] fn check_tsm_cpu_usage() { - walk_index_and_check_for_errors("tests/fixtures/cpu_usage.tsm.gz"); + walk_index_and_check_for_errors("../tests/fixtures/cpu_usage.tsm.gz"); } #[test] fn check_tsm_000000000000005_000000002() { - walk_index_and_check_for_errors("tests/fixtures/000000000000005-000000002.tsm.gz"); + walk_index_and_check_for_errors("../tests/fixtures/000000000000005-000000002.tsm.gz"); } } diff --git a/src/lib.rs b/src/lib.rs index 84d87f5cce..41dedeb5e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,6 @@ use std::{error, fmt}; -pub mod encoders; pub mod id; pub mod line_parser; pub mod storage; diff --git a/src/storage.rs b/src/storage.rs index 4bb086ea76..1e6500f74c 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -10,7 +10,6 @@ pub mod partitioned_store; pub mod predicate; pub mod remote_partition; pub mod s3_partition; -pub mod tsm; pub mod tsm_mapper; #[derive(Debug, Eq, PartialEq, Clone)] diff --git a/src/storage/block.rs b/src/storage/block.rs index 214de05fc1..e523c4c0bb 100644 --- a/src/storage/block.rs +++ b/src/storage/block.rs @@ -160,8 +160,8 @@ //! ╚═════════════════════════════════════╝ //! ``` -use crate::encoders::{float, integer, timestamp}; use crate::storage::StorageError; +use delorean_tsm::encoders::{float, integer, timestamp}; use integer_encoding::*; use num::bigint::{BigInt, BigUint};