Merge pull request #173 from influxdata/idiomatic-snafu
commit
f5d73ee580
|
@ -9,21 +9,23 @@
|
|||
clippy::use_self
|
||||
)]
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::io::{BufRead, Seek};
|
||||
|
||||
use log::debug;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use delorean_line_parser::{FieldValue, ParsedLine};
|
||||
use delorean_table::{
|
||||
packers::{Packer, Packers},
|
||||
ByteArray, DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError,
|
||||
};
|
||||
use delorean_table_schema::{DataType, Schema, SchemaBuilder};
|
||||
use delorean_tsm::mapper::{map_field_columns, ColumnData, MeasurementTable, TSMMeasurementMapper};
|
||||
use delorean_tsm::reader::{TSMBlockReader, TSMIndexReader};
|
||||
use delorean_tsm::{BlockType, TSMError};
|
||||
use delorean_tsm::{
|
||||
mapper::{map_field_columns, ColumnData, MeasurementTable, TSMMeasurementMapper},
|
||||
reader::{TSMBlockReader, TSMIndexReader},
|
||||
BlockType, TSMError,
|
||||
};
|
||||
use log::debug;
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
io::{BufRead, Seek},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ConversionSettings {
|
||||
|
@ -66,8 +68,8 @@ pub enum Error {
|
|||
NeedsAtLeastOneLine,
|
||||
|
||||
// Only a single line protocol measurement field is currently supported
|
||||
#[snafu(display(r#"More than one measurement not yet supported: {}"#, message))]
|
||||
OnlyOneMeasurementSupported { message: String },
|
||||
#[snafu(display(r#"More than one measurement not yet supported: Saw new measurement {}, had been using measurement {}"#, actual, expected))]
|
||||
OnlyOneMeasurementSupported { expected: String, actual: String },
|
||||
|
||||
#[snafu(display(r#"Error writing to TableWriter: {}"#, source))]
|
||||
Writing { source: TableError },
|
||||
|
@ -77,6 +79,14 @@ pub enum Error {
|
|||
|
||||
#[snafu(display(r#"Error processing TSM File: {}"#, source))]
|
||||
TSMProcessing { source: TSMError },
|
||||
|
||||
// TODO clean this error up
|
||||
#[snafu(display(r#"could not find ts column"#))]
|
||||
CouldNotFindTsColumn,
|
||||
|
||||
// TODO clean this error up
|
||||
#[snafu(display(r#"could not find column"#))]
|
||||
CouldNotFindColumn,
|
||||
}
|
||||
|
||||
/// Handles buffering `ParsedLine` objects and deducing a schema from that sample
|
||||
|
@ -269,23 +279,22 @@ impl<'a> MeasurementSampler<'a> {
|
|||
/// Use the contents of self.schema_sample to deduce the Schema of
|
||||
/// `ParsedLine`s and return the deduced schema
|
||||
fn deduce_schema_from_sample(&mut self) -> Result<Schema, Error> {
|
||||
if self.schema_sample.is_empty() {
|
||||
return Err(Error::NeedsAtLeastOneLine {});
|
||||
}
|
||||
ensure!(!self.schema_sample.is_empty(), NeedsAtLeastOneLine);
|
||||
|
||||
let mut builder = SchemaBuilder::new(self.schema_sample[0].series.measurement.as_str());
|
||||
|
||||
for line in &self.schema_sample {
|
||||
let series = &line.series;
|
||||
if &series.measurement != builder.get_measurement_name() {
|
||||
return Err(Error::OnlyOneMeasurementSupported {
|
||||
message: format!(
|
||||
"Saw new measurement {}, had been using measurement {}",
|
||||
builder.get_measurement_name(),
|
||||
series.measurement
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
let measurement_name = builder.get_measurement_name();
|
||||
ensure!(
|
||||
series.measurement == measurement_name,
|
||||
OnlyOneMeasurementSupported {
|
||||
actual: measurement_name,
|
||||
expected: &series.measurement,
|
||||
}
|
||||
);
|
||||
|
||||
if let Some(tag_set) = &series.tag_set {
|
||||
for (tag_name, _) in tag_set {
|
||||
builder = builder.tag(tag_name.as_str());
|
||||
|
@ -521,32 +530,26 @@ impl TSMFileConverter {
|
|||
index_stream_size: usize,
|
||||
block_stream: impl BufRead + Seek,
|
||||
) -> Result<(), Error> {
|
||||
let index_reader = TSMIndexReader::try_new(index_stream, index_stream_size)
|
||||
.map_err(|e| Error::TSMProcessing { source: e })?;
|
||||
let index_reader =
|
||||
TSMIndexReader::try_new(index_stream, index_stream_size).context(TSMProcessing)?;
|
||||
let mut block_reader = TSMBlockReader::new(block_stream);
|
||||
|
||||
let mapper = TSMMeasurementMapper::new(index_reader.peekable());
|
||||
|
||||
for measurement in mapper {
|
||||
match measurement {
|
||||
Ok(mut m) => {
|
||||
let (schema, packed_columns) =
|
||||
Self::process_measurement_table(&mut block_reader, &mut m)?;
|
||||
let mut m = measurement.context(TSMProcessing)?;
|
||||
let (schema, packed_columns) =
|
||||
Self::process_measurement_table(&mut block_reader, &mut m)?;
|
||||
|
||||
let mut table_writer = self
|
||||
.table_writer_source
|
||||
.next_writer(&schema)
|
||||
.context(WriterCreation)?;
|
||||
let mut table_writer = self
|
||||
.table_writer_source
|
||||
.next_writer(&schema)
|
||||
.context(WriterCreation)?;
|
||||
|
||||
table_writer
|
||||
.write_batch(&packed_columns)
|
||||
.map_err(|e| Error::WriterCreation { source: e })?;
|
||||
table_writer
|
||||
.close()
|
||||
.map_err(|e| Error::WriterCreation { source: e })?;
|
||||
}
|
||||
Err(e) => return Err(Error::TSMProcessing { source: e }),
|
||||
}
|
||||
table_writer
|
||||
.write_batch(&packed_columns)
|
||||
.context(WriterCreation)?;
|
||||
table_writer.close().context(WriterCreation)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -592,19 +595,14 @@ impl TSMFileConverter {
|
|||
// to the packer_column.
|
||||
|
||||
for (i, (tag_set_pair, blocks)) in m.tag_set_fields_blocks().iter_mut().enumerate() {
|
||||
let (ts, field_cols) = map_field_columns(&mut block_reader, blocks)
|
||||
.map_err(|e| Error::TSMProcessing { source: e })?;
|
||||
let (ts, field_cols) =
|
||||
map_field_columns(&mut block_reader, blocks).context(TSMProcessing)?;
|
||||
|
||||
// Start with the timestamp column.
|
||||
let col_len = ts.len();
|
||||
let ts_idx = name_packer
|
||||
.get(schema.timestamp())
|
||||
.ok_or(Error::TSMProcessing {
|
||||
// TODO clean this error up
|
||||
source: TSMError {
|
||||
description: "could not find ts column".to_string(),
|
||||
},
|
||||
})?;
|
||||
.context(CouldNotFindTsColumn)?;
|
||||
|
||||
if i == 0 {
|
||||
packed_columns[*ts_idx] = Packers::from(ts);
|
||||
|
@ -617,12 +615,7 @@ impl TSMFileConverter {
|
|||
// Next let's pad out all of the tag columns we know have
|
||||
// repeated values.
|
||||
for (tag_key, tag_value) in tag_set_pair {
|
||||
let idx = name_packer.get(tag_key).ok_or(Error::TSMProcessing {
|
||||
// TODO clean this error up
|
||||
source: TSMError {
|
||||
description: "could not find column".to_string(),
|
||||
},
|
||||
})?;
|
||||
let idx = name_packer.get(tag_key).context(CouldNotFindColumn)?;
|
||||
|
||||
// this will create a column of repeated values.
|
||||
if i == 0 {
|
||||
|
@ -646,12 +639,7 @@ impl TSMFileConverter {
|
|||
continue;
|
||||
}
|
||||
|
||||
let idx = name_packer.get(key).ok_or(Error::TSMProcessing {
|
||||
// TODO clean this error up
|
||||
source: TSMError {
|
||||
description: "could not find column".to_string(),
|
||||
},
|
||||
})?;
|
||||
let idx = name_packer.get(key).context(CouldNotFindColumn)?;
|
||||
|
||||
if i == 0 {
|
||||
// creates a column of repeated None values.
|
||||
|
@ -669,12 +657,7 @@ impl TSMFileConverter {
|
|||
// Next let's write out all of the field column data.
|
||||
let mut got_field_cols = Vec::new();
|
||||
for (field_key, field_values) in field_cols {
|
||||
let idx = name_packer.get(&field_key).ok_or(Error::TSMProcessing {
|
||||
// TODO clean this error up
|
||||
source: TSMError {
|
||||
description: "could not find column".to_string(),
|
||||
},
|
||||
})?;
|
||||
let idx = name_packer.get(&field_key).context(CouldNotFindColumn)?;
|
||||
|
||||
if i == 0 {
|
||||
match field_values {
|
||||
|
@ -725,12 +708,7 @@ impl TSMFileConverter {
|
|||
continue;
|
||||
}
|
||||
|
||||
let idx = name_packer.get(key).ok_or(Error::TSMProcessing {
|
||||
// TODO clean this error up
|
||||
source: TSMError {
|
||||
description: "could not find column".to_string(),
|
||||
},
|
||||
})?;
|
||||
let idx = name_packer.get(key).context(CouldNotFindColumn)?;
|
||||
|
||||
// this will create a column of repeated None values.
|
||||
if i == 0 {
|
||||
|
@ -1160,7 +1138,7 @@ mod delorean_ingest_tests {
|
|||
// Then the converter does not support it
|
||||
assert!(matches!(
|
||||
schema_result,
|
||||
Err(Error::OnlyOneMeasurementSupported { message: _ })
|
||||
Err(Error::OnlyOneMeasurementSupported { .. })
|
||||
));
|
||||
}
|
||||
|
||||
|
|
|
@ -326,6 +326,21 @@ impl<'a> From<&'a str> for EscapedStr<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<EscapedStr<'_>> for String {
|
||||
fn from(other: EscapedStr<'_>) -> Self {
|
||||
match other {
|
||||
EscapedStr::SingleSlice(s) => s.into(),
|
||||
EscapedStr::CopiedValue(s) => s,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&EscapedStr<'_>> for String {
|
||||
fn from(other: &EscapedStr<'_>) -> Self {
|
||||
other.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<&str> for EscapedStr<'_> {
|
||||
fn eq(&self, other: &&str) -> bool {
|
||||
self.as_str() == *other
|
||||
|
|
|
@ -5,6 +5,7 @@ use parquet::errors::ParquetError;
|
|||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display(r#"{}, underlying parquet error {}"#, message, source))]
|
||||
#[snafu(visibility(pub(crate)))]
|
||||
ParquetLibraryError {
|
||||
message: String,
|
||||
source: ParquetError,
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
//! Provide storage statistics for parquet files
|
||||
use std::io::{Read, Seek};
|
||||
|
||||
use delorean_table_schema::DataType;
|
||||
use parquet::{
|
||||
file::reader::{FileReader, SerializedFileReader},
|
||||
schema,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
use std::io::{Read, Seek};
|
||||
|
||||
use delorean_table_schema::DataType;
|
||||
|
||||
use crate::{
|
||||
error::{Error, Result},
|
||||
Length, TryClone,
|
||||
};
|
||||
use crate::{error::Result, Length, TryClone};
|
||||
|
||||
pub fn parquet_schema_as_string(parquet_schema: &schema::types::Type) -> String {
|
||||
let mut parquet_schema_string = Vec::new();
|
||||
|
@ -42,9 +38,8 @@ where
|
|||
{
|
||||
let input_len = input.len();
|
||||
|
||||
let reader = SerializedFileReader::new(input).map_err(|e| Error::ParquetLibraryError {
|
||||
message: String::from("Creating parquet reader"),
|
||||
source: e,
|
||||
let reader = SerializedFileReader::new(input).context(crate::error::ParquetLibraryError {
|
||||
message: "Creating parquet reader",
|
||||
})?;
|
||||
|
||||
let parquet_metadata = reader.metadata();
|
||||
|
|
|
@ -1,19 +1,18 @@
|
|||
//! Provide storage statistics for parquet files
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::TryInto;
|
||||
use std::io::{Read, Seek};
|
||||
|
||||
use log::debug;
|
||||
use parquet::basic::{Compression, Encoding};
|
||||
use parquet::file::reader::{FileReader, SerializedFileReader};
|
||||
|
||||
use delorean_table::stats::{ColumnStats, ColumnStatsBuilder};
|
||||
|
||||
use crate::{
|
||||
error::{Error, Result},
|
||||
metadata::data_type_from_parquet_type,
|
||||
Length, TryClone,
|
||||
use log::debug;
|
||||
use parquet::{
|
||||
basic::{Compression, Encoding},
|
||||
file::reader::{FileReader, SerializedFileReader},
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
convert::TryInto,
|
||||
io::{Read, Seek},
|
||||
};
|
||||
|
||||
use crate::{error::Result, metadata::data_type_from_parquet_type, Length, TryClone};
|
||||
|
||||
/// Calculate storage statistics for a particular parquet file that can
|
||||
/// be read from `input`, with a total size of `input_size` byes
|
||||
|
@ -23,9 +22,8 @@ pub fn col_stats<R: 'static>(input: R) -> Result<Vec<ColumnStats>>
|
|||
where
|
||||
R: Read + Seek + TryClone + Length,
|
||||
{
|
||||
let reader = SerializedFileReader::new(input).map_err(|e| Error::ParquetLibraryError {
|
||||
message: String::from("Creating parquet reader"),
|
||||
source: e,
|
||||
let reader = SerializedFileReader::new(input).context(crate::error::ParquetLibraryError {
|
||||
message: "Creating parquet reader",
|
||||
})?;
|
||||
|
||||
let mut stats_builders = BTreeMap::new();
|
||||
|
|
|
@ -1,10 +1,4 @@
|
|||
//! This module contains the code to write delorean table data to parquet
|
||||
use std::{
|
||||
fmt,
|
||||
io::{Seek, Write},
|
||||
rc::Rc,
|
||||
};
|
||||
|
||||
use log::debug;
|
||||
use parquet::{
|
||||
basic::{Compression, Encoding, LogicalType, Repetition, Type as PhysicalType},
|
||||
|
@ -16,7 +10,12 @@ use parquet::{
|
|||
},
|
||||
schema::types::{ColumnPath, Type},
|
||||
};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use std::{
|
||||
fmt,
|
||||
io::{Seek, Write},
|
||||
rc::Rc,
|
||||
};
|
||||
|
||||
use crate::metadata::parquet_schema_as_string;
|
||||
use delorean_table::{DeloreanTableWriter, Error as TableError, Packers};
|
||||
|
@ -28,8 +27,8 @@ pub enum Error {
|
|||
message: String,
|
||||
source: ParquetError,
|
||||
},
|
||||
#[snafu(display(r#"{}"#, message))]
|
||||
MismatchedColumns { message: String },
|
||||
#[snafu(display(r#"Could not get packer for column {}"#, column_number))]
|
||||
MismatchedColumns { column_number: usize },
|
||||
}
|
||||
|
||||
impl From<Error> for TableError {
|
||||
|
@ -142,16 +141,11 @@ where
|
|||
message: String::from("Can't create the next row_group_writer"),
|
||||
})?
|
||||
{
|
||||
let packer = match packers.get(column_number) {
|
||||
Some(packer) => packer,
|
||||
None => {
|
||||
return Err(TableError::Other {
|
||||
source: Box::new(Error::MismatchedColumns {
|
||||
message: format!("Could not get packer for column {}", column_number),
|
||||
}),
|
||||
});
|
||||
}
|
||||
};
|
||||
let packer = packers
|
||||
.get(column_number)
|
||||
.context(MismatchedColumns { column_number })
|
||||
.map_err(TableError::from_other)?;
|
||||
|
||||
// TODO(edd) This seems super awkward and not the right way to do it...
|
||||
// We know we have a direct mapping between a col_writer (ColumnWriter)
|
||||
// type and a Packers variant. We also know that we do exactly the same
|
||||
|
|
|
@ -44,6 +44,21 @@ pub enum Error {
|
|||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn from_io(source: std::io::Error, message: impl Into<String>) -> Self {
|
||||
Self::IO {
|
||||
source,
|
||||
message: message.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_other(source: impl std::error::Error + 'static) -> Self {
|
||||
Self::Other {
|
||||
source: Box::new(source),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Something that knows how to write a set of columns somewhere
|
||||
pub trait DeloreanTableWriter {
|
||||
/// Writes a batch of packed data to the underlying output
|
||||
|
|
|
@ -196,7 +196,7 @@ impl SchemaBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn get_measurement_name(&self) -> &String {
|
||||
pub fn get_measurement_name(&self) -> &str {
|
||||
&self.measurement_name
|
||||
}
|
||||
|
||||
|
|
|
@ -1,16 +1,24 @@
|
|||
use delorean_ingest::{ConversionSettings, LineProtocolConverter, TSMFileConverter};
|
||||
use delorean_ingest::{
|
||||
ConversionSettings, Error as IngestError, LineProtocolConverter, TSMFileConverter,
|
||||
};
|
||||
use delorean_line_parser::parse_lines;
|
||||
use delorean_parquet::writer::DeloreanParquetTableWriter;
|
||||
use delorean_parquet::writer::Error as ParquetWriterError;
|
||||
use delorean_table::{DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError};
|
||||
use delorean_table_schema::Schema;
|
||||
use log::{debug, info, warn};
|
||||
use std::convert::TryInto;
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
use std::path::{Path, PathBuf};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{
|
||||
convert::TryInto,
|
||||
fs,
|
||||
io::Read,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use crate::commands::error::{Error, Result};
|
||||
use crate::commands::input::{FileType, InputReader};
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Creates `DeloreanParquetTableWriter` suitable for writing to a single file
|
||||
#[derive(Debug)]
|
||||
struct ParquetFileWriterSource {
|
||||
|
@ -24,16 +32,18 @@ impl DeloreanTableWriterSource for ParquetFileWriterSource {
|
|||
// Returns a `DeloreanTableWriter suitable for writing data from packers.
|
||||
fn next_writer(&mut self, schema: &Schema) -> Result<Box<dyn DeloreanTableWriter>, TableError> {
|
||||
if self.made_file {
|
||||
return Err(TableError::Other {
|
||||
source: Box::new(Error::MultipleMeasurementsToSingleFile {
|
||||
new_measurement_name: String::from(schema.measurement()),
|
||||
}),
|
||||
});
|
||||
return MultipleMeasurementsToSingleFile {
|
||||
new_measurement_name: schema.measurement(),
|
||||
}
|
||||
.fail()
|
||||
.map_err(TableError::from_other);
|
||||
}
|
||||
|
||||
let output_file = fs::File::create(&self.output_filename).map_err(|e| TableError::IO {
|
||||
message: format!("Error creating output file {}", self.output_filename),
|
||||
source: e,
|
||||
let output_file = fs::File::create(&self.output_filename).map_err(|e| {
|
||||
TableError::from_io(
|
||||
e,
|
||||
format!("Error creating output file {}", self.output_filename),
|
||||
)
|
||||
})?;
|
||||
info!(
|
||||
"Writing output for measurement {} to {} ...",
|
||||
|
@ -41,11 +51,9 @@ impl DeloreanTableWriterSource for ParquetFileWriterSource {
|
|||
self.output_filename
|
||||
);
|
||||
|
||||
let writer = DeloreanParquetTableWriter::new(schema, output_file).map_err(|e| {
|
||||
TableError::Other {
|
||||
source: Box::new(Error::UnableToCreateParquetTableWriter { source: e }),
|
||||
}
|
||||
})?;
|
||||
let writer = DeloreanParquetTableWriter::new(schema, output_file)
|
||||
.context(UnableToCreateParquetTableWriter)
|
||||
.map_err(TableError::from_other)?;
|
||||
self.made_file = true;
|
||||
Ok(Box::new(writer))
|
||||
}
|
||||
|
@ -68,9 +76,11 @@ impl DeloreanTableWriterSource for ParquetDirectoryWriterSource {
|
|||
output_file_path.push(schema.measurement());
|
||||
output_file_path.set_extension("parquet");
|
||||
|
||||
let output_file = fs::File::create(&output_file_path).map_err(|e| TableError::IO {
|
||||
message: format!("Error creating output file {:?}", output_file_path),
|
||||
source: e,
|
||||
let output_file = fs::File::create(&output_file_path).map_err(|e| {
|
||||
TableError::from_io(
|
||||
e,
|
||||
format!("Error creating output file {:?}", output_file_path),
|
||||
)
|
||||
})?;
|
||||
info!(
|
||||
"Writing output for measurement {} to {:?} ...",
|
||||
|
@ -78,11 +88,9 @@ impl DeloreanTableWriterSource for ParquetDirectoryWriterSource {
|
|||
output_file_path
|
||||
);
|
||||
|
||||
let writer = DeloreanParquetTableWriter::new(schema, output_file).map_err(|e| {
|
||||
TableError::Other {
|
||||
source: Box::new(Error::UnableToCreateParquetTableWriter { source: e }),
|
||||
}
|
||||
})?;
|
||||
let writer = DeloreanParquetTableWriter::new(schema, output_file)
|
||||
.context(UnableToCreateParquetTableWriter)
|
||||
.map_err(TableError::from_other)?;
|
||||
Ok(Box::new(writer))
|
||||
}
|
||||
}
|
||||
|
@ -97,7 +105,7 @@ pub fn convert(input_filename: &str, output_name: &str) -> Result<()> {
|
|||
info!("convert starting");
|
||||
debug!("Reading from input file {}", input_filename);
|
||||
|
||||
let input_reader = InputReader::new(input_filename)?;
|
||||
let input_reader = InputReader::new(input_filename).context(OpenInput)?;
|
||||
info!(
|
||||
"Preparing to convert {} bytes from {}",
|
||||
input_reader.len(),
|
||||
|
@ -111,13 +119,14 @@ pub fn convert(input_filename: &str, output_name: &str) -> Result<()> {
|
|||
FileType::TSM => {
|
||||
// TODO(edd): we can remove this when I figure out the best way to share
|
||||
// the reader between the TSM index reader and the Block decoder.
|
||||
let input_block_reader = InputReader::new(input_filename)?;
|
||||
let input_block_reader = InputReader::new(input_filename).context(OpenInput)?;
|
||||
let len = input_reader.len() as usize;
|
||||
convert_tsm_to_parquet(input_reader, len, input_block_reader, output_name)
|
||||
}
|
||||
FileType::Parquet => Err(Error::NotImplemented {
|
||||
operation_name: String::from("Parquet format conversion"),
|
||||
}),
|
||||
FileType::Parquet => NotImplemented {
|
||||
operation_name: "Parquet format conversion",
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,13 +141,12 @@ fn convert_line_protocol_to_parquet(
|
|||
input_reader
|
||||
.len()
|
||||
.try_into()
|
||||
.expect("Can not allocate buffer"),
|
||||
.expect("Cannot allocate buffer"),
|
||||
);
|
||||
input_reader
|
||||
.read_to_string(&mut buf)
|
||||
.map_err(|e| Error::UnableToReadInput {
|
||||
name: String::from(input_filename),
|
||||
source: e,
|
||||
.context(UnableToReadInput {
|
||||
name: input_filename,
|
||||
})?;
|
||||
|
||||
// FIXME: Design something sensible to do with lines that don't
|
||||
|
@ -169,10 +177,8 @@ fn convert_line_protocol_to_parquet(
|
|||
let mut converter = LineProtocolConverter::new(settings, writer_source);
|
||||
converter
|
||||
.convert(only_good_lines)
|
||||
.map_err(|e| Error::UnableToWriteGoodLines { source: e })?;
|
||||
converter
|
||||
.finalize()
|
||||
.map_err(|e| Error::UnableToCloseTableWriter { source: e })?;
|
||||
.context(UnableToWriteGoodLines)?;
|
||||
converter.finalize().context(UnableToCloseTableWriter)?;
|
||||
info!("Completing writing to {} successfully", output_name);
|
||||
Ok(())
|
||||
}
|
||||
|
@ -200,5 +206,35 @@ fn convert_tsm_to_parquet(
|
|||
let mut converter = TSMFileConverter::new(writer_source);
|
||||
converter
|
||||
.convert(index_stream, index_stream_size, block_stream)
|
||||
.map_err(|e| Error::UnableToCloseTableWriter { source: e })
|
||||
.context(UnableToCloseTableWriter)
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error reading {} ({})", name.display(), source))]
|
||||
UnableToReadInput {
|
||||
name: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Cannot write multiple measurements to a single file. Saw new measurement named {}",
|
||||
new_measurement_name
|
||||
))]
|
||||
MultipleMeasurementsToSingleFile { new_measurement_name: String },
|
||||
|
||||
#[snafu(display("Error creating a parquet table writer {}", source))]
|
||||
UnableToCreateParquetTableWriter { source: ParquetWriterError },
|
||||
|
||||
#[snafu(display("Not implemented: {}", operation_name))]
|
||||
NotImplemented { operation_name: String },
|
||||
|
||||
#[snafu(display("Error writing remaining lines {}", source))]
|
||||
UnableToWriteGoodLines { source: IngestError },
|
||||
|
||||
#[snafu(display("Error opening input {}", source))]
|
||||
OpenInput { source: super::input::Error },
|
||||
|
||||
#[snafu(display("Error while closing the table writer {}", source))]
|
||||
UnableToCloseTableWriter { source: IngestError },
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ pub enum Error {
|
|||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Can not write multiple measurements to a single file. Saw new measurement named {}",
|
||||
"Cannot write multiple measurements to a single file. Saw new measurement named {}",
|
||||
new_measurement_name
|
||||
))]
|
||||
MultipleMeasurementsToSingleFile { new_measurement_name: String },
|
||||
|
|
|
@ -1,45 +1,45 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::convert::TryInto;
|
||||
|
||||
use delorean_tsm::reader::{IndexEntry, TSMIndexReader};
|
||||
use delorean_tsm::{InfluxID, TSMError};
|
||||
|
||||
use delorean_parquet::metadata::print_parquet_metadata;
|
||||
use delorean_parquet::{error::Error as DeloreanParquetError, metadata::print_parquet_metadata};
|
||||
use delorean_tsm::{reader::IndexEntry, reader::TSMIndexReader, InfluxID, TSMError};
|
||||
use log::{debug, info};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
convert::TryInto,
|
||||
};
|
||||
|
||||
use crate::commands::error::{Error, Result};
|
||||
use crate::commands::input::{FileType, InputReader};
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
pub fn dump_meta(input_filename: &str) -> Result<()> {
|
||||
info!("meta starting");
|
||||
debug!("Reading from input file {}", input_filename);
|
||||
|
||||
let input_reader = InputReader::new(input_filename)?;
|
||||
let input_reader = InputReader::new(input_filename).context(OpenInput)?;
|
||||
|
||||
match input_reader.file_type() {
|
||||
FileType::LineProtocol => Err(Error::NotImplemented {
|
||||
operation_name: String::from("Line protocol metadata dump"),
|
||||
}),
|
||||
FileType::LineProtocol => NotImplemented {
|
||||
operation_name: "Line protocol metadata dump",
|
||||
}
|
||||
.fail(),
|
||||
FileType::TSM => {
|
||||
let len = input_reader
|
||||
.len()
|
||||
.try_into()
|
||||
.expect("File size more than usize");
|
||||
let reader =
|
||||
TSMIndexReader::try_new(input_reader, len).map_err(|e| Error::TSM { source: e })?;
|
||||
let reader = TSMIndexReader::try_new(input_reader, len).context(TSM)?;
|
||||
|
||||
let mut stats_builder = TSMMetadataBuilder::new();
|
||||
|
||||
for mut entry in reader {
|
||||
stats_builder.process_entry(&mut entry)?;
|
||||
for entry in reader {
|
||||
let entry = entry.context(TSM)?;
|
||||
stats_builder.process_entry(entry)?;
|
||||
}
|
||||
stats_builder.print_report();
|
||||
Ok(())
|
||||
}
|
||||
FileType::Parquet => {
|
||||
print_parquet_metadata(input_reader)
|
||||
.map_err(|e| Error::UnableDumpToParquetMetadata { source: e })?;
|
||||
Ok(())
|
||||
print_parquet_metadata(input_reader).context(UnableDumpToParquetMetadata)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -54,9 +54,7 @@ struct MeasurementMetadata {
|
|||
|
||||
impl MeasurementMetadata {
|
||||
fn update_for_entry(&mut self, index_entry: &mut IndexEntry) -> Result<()> {
|
||||
let key = index_entry
|
||||
.parse_key()
|
||||
.map_err(|e| Error::TSM { source: e })?;
|
||||
let key = index_entry.parse_key().context(TSM)?;
|
||||
|
||||
for (tag_name, tag_value) in key.tagset {
|
||||
let tag_entry = self.tags.entry(tag_name).or_default();
|
||||
|
@ -93,9 +91,7 @@ impl BucketMetadata {
|
|||
fn update_for_entry(&mut self, index_entry: &mut IndexEntry) -> Result<()> {
|
||||
self.count += 1;
|
||||
self.total_records += u64::from(index_entry.count);
|
||||
let key = index_entry
|
||||
.parse_key()
|
||||
.map_err(|e| Error::TSM { source: e })?;
|
||||
let key = index_entry.parse_key().context(TSM)?;
|
||||
|
||||
let meta = self.measurements.entry(key.measurement).or_default();
|
||||
meta.update_for_entry(index_entry)?;
|
||||
|
@ -124,17 +120,12 @@ impl TSMMetadataBuilder {
|
|||
Self::default()
|
||||
}
|
||||
|
||||
fn process_entry(&mut self, entry: &mut Result<IndexEntry, TSMError>) -> Result<()> {
|
||||
match entry {
|
||||
Ok(index_entry) => {
|
||||
self.num_entries += 1;
|
||||
let key = (index_entry.org_id(), index_entry.bucket_id());
|
||||
let stats = self.bucket_stats.entry(key).or_default();
|
||||
stats.update_for_entry(index_entry)?;
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => Err(Error::TSM { source: e.clone() }),
|
||||
}
|
||||
fn process_entry(&mut self, mut index_entry: IndexEntry) -> Result<()> {
|
||||
self.num_entries += 1;
|
||||
let key = (index_entry.org_id(), index_entry.bucket_id());
|
||||
let stats = self.bucket_stats.entry(key).or_default();
|
||||
stats.update_for_entry(&mut index_entry)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_report(&self) {
|
||||
|
@ -152,3 +143,18 @@ impl TSMMetadataBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error opening input {}", source))]
|
||||
OpenInput { source: super::input::Error },
|
||||
|
||||
#[snafu(display("Not implemented: {}", operation_name))]
|
||||
NotImplemented { operation_name: String },
|
||||
|
||||
#[snafu(display("Unable to dump parquet file metadata: {}", source))]
|
||||
UnableDumpToParquetMetadata { source: DeloreanParquetError },
|
||||
|
||||
#[snafu(display(r#"Error reading TSM data: {}"#, source))]
|
||||
TSM { source: TSMError },
|
||||
}
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
use delorean_parquet::ParquetError;
|
||||
/// Module to handle input files (and maybe urls?)
|
||||
use libflate::gzip;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{BufRead, BufReader, Cursor, Read, Seek, SeekFrom};
|
||||
use std::path::Path;
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use std::{
|
||||
fs::File,
|
||||
io,
|
||||
io::{BufRead, BufReader, Cursor, Read, Seek, SeekFrom},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use crate::commands::error::{Error, Result};
|
||||
use delorean_parquet::ParquetError;
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FileType {
|
||||
|
@ -40,17 +43,11 @@ pub struct MemoryInputReader {
|
|||
|
||||
impl FileInputReader {
|
||||
fn new(file_type: FileType, input_name: &str) -> Result<Self> {
|
||||
let file = File::open(input_name).map_err(|e| Error::UnableToReadInput {
|
||||
name: String::from(input_name),
|
||||
source: e,
|
||||
})?;
|
||||
let file = File::open(input_name).context(UnableToReadInput { input_name })?;
|
||||
|
||||
let file_size = file
|
||||
.metadata()
|
||||
.map_err(|e| Error::UnableToReadInput {
|
||||
name: String::from(input_name),
|
||||
source: e,
|
||||
})?
|
||||
.context(UnableToReadInput { input_name })?
|
||||
.len();
|
||||
|
||||
Ok(Self {
|
||||
|
@ -146,14 +143,12 @@ impl InputReader {
|
|||
// inspect contents.
|
||||
let ext = path
|
||||
.extension()
|
||||
.ok_or(Error::UnknownInputType {
|
||||
.context(UnknownInputType {
|
||||
details: String::from("No extension"),
|
||||
input_name: path.display().to_string(),
|
||||
})?
|
||||
.to_str()
|
||||
.ok_or(Error::FileNameDecode {
|
||||
input_name: path.display().to_string(),
|
||||
})?;
|
||||
.context(FileNameDecode { input_name: path })?;
|
||||
|
||||
match ext {
|
||||
"tsm" => Ok(Self::FileInputType(FileInputReader::new(
|
||||
|
@ -173,31 +168,20 @@ impl InputReader {
|
|||
|
||||
let stem_ext = stem
|
||||
.extension()
|
||||
.ok_or(Error::UnknownInputType {
|
||||
.context(UnknownInputType {
|
||||
details: String::from("No extension before .gz"),
|
||||
input_name: path.display().to_string(),
|
||||
input_name: path,
|
||||
})?
|
||||
.to_str()
|
||||
.ok_or(Error::FileNameDecode {
|
||||
input_name: path.display().to_string(),
|
||||
})?;
|
||||
.context(FileNameDecode { input_name: path })?;
|
||||
|
||||
let file = File::open(input_name).map_err(|e| Error::UnableToReadInput {
|
||||
name: input_name.to_string(),
|
||||
source: e,
|
||||
})?;
|
||||
let file = File::open(input_name).context(UnableToReadInput { input_name })?;
|
||||
let mut decoder =
|
||||
gzip::Decoder::new(file).map_err(|gzip_err| Error::UnableToReadInput {
|
||||
name: input_name.to_string(),
|
||||
source: gzip_err,
|
||||
})?;
|
||||
gzip::Decoder::new(file).context(UnableToReadInput { input_name })?;
|
||||
let mut buffer = Vec::new();
|
||||
decoder
|
||||
.read_to_end(&mut buffer)
|
||||
.map_err(|e| Error::ReadingGzip {
|
||||
input_name: input_name.to_string(),
|
||||
source: e,
|
||||
})?;
|
||||
.context(ReadingGzip { input_name })?;
|
||||
|
||||
match stem_ext {
|
||||
"tsm" => Ok(Self::MemoryInputType(MemoryInputReader::new(
|
||||
|
@ -212,16 +196,42 @@ impl InputReader {
|
|||
FileType::Parquet,
|
||||
buffer,
|
||||
))),
|
||||
_ => Err(Error::UnknownInputType {
|
||||
details: String::from("Unknown input extension before .gz"),
|
||||
input_name: input_name.to_string(),
|
||||
}),
|
||||
_ => UnknownInputType {
|
||||
details: "Unknown input extension before .gz",
|
||||
input_name,
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
_ => Err(Error::UnknownInputType {
|
||||
details: String::from("Unknown input extension"),
|
||||
input_name: input_name.to_string(),
|
||||
}),
|
||||
_ => UnknownInputType {
|
||||
details: "Unknown input extension",
|
||||
input_name,
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Error reading {} ({})", input_name.display(), source))]
|
||||
UnableToReadInput {
|
||||
input_name: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Unknown input type: {} for {}", details, input_name.display()))]
|
||||
UnknownInputType {
|
||||
details: String,
|
||||
input_name: PathBuf,
|
||||
},
|
||||
|
||||
#[snafu(display("Can't convert filename to utf-8, : {}", input_name.display()))]
|
||||
FileNameDecode { input_name: PathBuf },
|
||||
|
||||
#[snafu(display("Can't read gzip data : {}", input_name.display()))]
|
||||
ReadingGzip {
|
||||
input_name: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,37 +1,37 @@
|
|||
//! This module contains code to report compression statistics for storage files
|
||||
|
||||
use delorean_parquet::{error::Error as DeloreanParquetError, stats::col_stats};
|
||||
use log::info;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::{
|
||||
commands::error::{Error, Result},
|
||||
commands::input::{FileType, InputReader},
|
||||
};
|
||||
use crate::commands::input::{FileType, InputReader};
|
||||
|
||||
use delorean_parquet::stats::col_stats;
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Print statistics about the file name in input_filename to stdout
|
||||
pub fn stats(input_filename: &str) -> Result<()> {
|
||||
info!("stats starting");
|
||||
|
||||
let input_reader = InputReader::new(input_filename)?;
|
||||
let input_reader = InputReader::new(input_filename).context(OpenInput)?;
|
||||
|
||||
let (input_len, col_stats) = match input_reader.file_type() {
|
||||
FileType::LineProtocol => {
|
||||
return Err(Error::NotImplemented {
|
||||
operation_name: String::from("Line protocol storage statistics"),
|
||||
});
|
||||
return NotImplemented {
|
||||
operation_name: "Line protocol storage statistics",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
FileType::TSM => {
|
||||
return Err(Error::NotImplemented {
|
||||
operation_name: String::from("TSM storage statistics"),
|
||||
})
|
||||
return NotImplemented {
|
||||
operation_name: "TSM storage statistics",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
FileType::Parquet => {
|
||||
let input_len = input_reader.len();
|
||||
(
|
||||
input_len,
|
||||
col_stats(input_reader)
|
||||
.map_err(|e| Error::UnableDumpToParquetMetadata { source: e })?,
|
||||
col_stats(input_reader).context(UnableDumpToParquetMetadata)?,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
@ -76,3 +76,15 @@ pub fn stats(input_filename: &str) -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Not implemented: {}", operation_name))]
|
||||
NotImplemented { operation_name: String },
|
||||
|
||||
#[snafu(display("Error opening input {}", source))]
|
||||
OpenInput { source: super::input::Error },
|
||||
|
||||
#[snafu(display("Unable to dump parquet file metadata: {}", source))]
|
||||
UnableDumpToParquetMetadata { source: DeloreanParquetError },
|
||||
}
|
||||
|
|
|
@ -11,7 +11,6 @@ use log::{debug, error, warn};
|
|||
|
||||
mod commands {
|
||||
pub mod convert;
|
||||
mod error;
|
||||
pub mod file_meta;
|
||||
mod input;
|
||||
pub mod stats;
|
||||
|
|
|
@ -195,7 +195,7 @@ fn meta_bad_input_filename() {
|
|||
.code(2)
|
||||
.stderr(predicate::str::contains("Metadata dump failed"))
|
||||
.stderr(predicate::str::contains(
|
||||
"Metadata dump failed: Unknown input type: No extension for non_existent_input",
|
||||
"Metadata dump failed: Error opening input Unknown input type: No extension for non_existent_input",
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -209,7 +209,7 @@ fn meta_non_existent_input_filename() {
|
|||
.code(2)
|
||||
.stderr(predicate::str::contains("Metadata dump failed"))
|
||||
.stderr(predicate::str::contains(
|
||||
"Metadata dump failed: Error reading non_existent_input.tsm",
|
||||
"Metadata dump failed: Error opening input Error reading non_existent_input.tsm",
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -223,7 +223,7 @@ fn meta_bad_input_filename_gz() {
|
|||
.code(2)
|
||||
.stderr(predicate::str::contains("Metadata dump failed"))
|
||||
.stderr(predicate::str::contains(
|
||||
"Metadata dump failed: Unknown input type: No extension before .gz for non_existent_input.gz",
|
||||
"Metadata dump failed: Error opening input Unknown input type: No extension before .gz for non_existent_input.gz",
|
||||
));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue