refactor: inline `Table` into `parquet_file::chunk::Chunk`

Note that the resulting size estimations are different because we were double-counting `Table`. `mem::size_of::<Self>()` is recursive for non-boxed types since the child will be part of the parent structure. Issue: #1295.
2021-06-11 11:51:51 +02:00 · 2021-06-11 11:51:51 +02:00 · f8a518bbed
parent 13dd4b23fd
commit f8a518bbed
7 changed files with 119 additions and 237 deletions
--- a/parquet_file/src/chunk.rs
+++ b/parquet_file/src/chunk.rs
@ -1,10 +1,16 @@
 use snafu::{ResultExt, Snafu};
 use std::{collections::BTreeSet, sync::Arc};
-use crate::table::Table;
+use crate::storage::Storage;
-use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
+use data_types::{
    partition_metadata::{Statistics, TableSummary},
    timestamp::TimestampRange,
 };
 use datafusion::physical_plan::SendableRecordBatchStream;
-use internal_types::{schema::Schema, selection::Selection};
+use internal_types::{
    schema::{Schema, TIME_COLUMN_NAME},
    selection::Selection,
 };
 use object_store::{path::Path, ObjectStore};
 use query::predicate::Predicate;
@ -13,25 +19,15 @@ use std::mem;
 #[derive(Debug, Snafu)]
 pub enum Error {
    #[snafu(display("Error writing table '{}': {}", table_name, source))]
    TableWrite {
        table_name: String,
        source: crate::table::Error,
    },
    #[snafu(display("Table Error in '{}': {}", table_name, source))]
    NamedTableError {
        table_name: String,
        source: crate::table::Error,
    },
    #[snafu(display("Table '{}' not found in chunk", table_name))]
    NamedTableNotFoundInChunk { table_name: String },
-    #[snafu(display("Error read parquet file for table '{}'", table_name,))]
+    #[snafu(display("Failed to read parquet: {}", source))]
-    ReadParquet {
+    ReadParquet { source: crate::storage::Error },
-        table_name: String,
+
-        source: crate::table::Error,
+    #[snafu(display("Failed to select columns: {}", source))]
    SelectColumns {
        source: internal_types::schema::Error,
    },
 }
@ -64,8 +60,23 @@ pub struct Chunk {
    /// Partition this chunk belongs to
    partition_key: String,
-    /// The table in chunk
+    /// Meta data of the table
-    table: Table,
+    table_summary: Arc<TableSummary>,
    /// Schema that goes with this table's parquet file
    schema: Arc<Schema>,
    /// Timestamp range of this table's parquet file
    /// (extracted from TableSummary)
    timestamp_range: Option<TimestampRange>,
    /// Object store of the above relative path to open and read the file
    object_store: Arc<ObjectStore>,
    /// Path in the object store. Format:
    ///  <writer id>/<database>/data/<partition key>/<chunk
    /// id>/<tablename>.parquet
    object_store_path: Path,
    metrics: ChunkMetrics,
 }
@ -79,11 +90,15 @@ impl Chunk {
        schema: Schema,
        metrics: ChunkMetrics,
    ) -> Self {
-        let table = Table::new(table_summary, file_location, store, schema);
+        let timestamp_range = extract_range(&table_summary);
        let mut chunk = Self {
            partition_key: part_key.into(),
-            table,
+            table_summary: Arc::new(table_summary),
            schema: Arc::new(schema),
            timestamp_range,
            object_store: store,
            object_store_path: file_location,
            metrics,
        };
@ -97,64 +112,109 @@ impl Chunk {
    }
    /// Return object store path for this chunk
-    pub fn table_path(&self) -> Path {
+    pub fn path(&self) -> Path {
-        self.table.path()
+        self.object_store_path.clone()
    }
    /// Returns the summary statistics for this chunk
    pub fn table_summary(&self) -> &Arc<TableSummary> {
-        self.table.table_summary()
+        &self.table_summary
    }
    /// Returns the name of the table this chunk holds
    pub fn table_name(&self) -> &str {
-        self.table.name()
+        &self.table_summary.name
    }
    /// Return the approximate memory size of the chunk, in bytes including the
    /// dictionary, tables, and their rows.
    pub fn size(&self) -> usize {
-        self.table.size() + self.partition_key.len() + mem::size_of::<Self>()
+        mem::size_of::<Self>()
            + self.partition_key.len()
            + self.table_summary.size()
            + mem::size_of_val(&self.schema.as_ref())
            + mem::size_of_val(&self.object_store_path)
    }
-    /// Return possibly restricted Schema for the table in this chunk
+    /// Return possibly restricted Schema for this chunk
-    pub fn table_schema(&self, selection: Selection<'_>) -> Result<Schema> {
+    pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
-        self.table.schema(selection).context(NamedTableError {
+        Ok(match selection {
-            table_name: self.table_name(),
+            Selection::All => self.schema.as_ref().clone(),
            Selection::Some(columns) => {
                let columns = self.schema.select(columns).context(SelectColumns)?;
                self.schema.project(&columns)
            }
        })
    }
    /// Infallably return the full schema (for all columns) for this chunk
    pub fn full_schema(&self) -> Arc<Schema> {
-        self.table.full_schema()
+        Arc::clone(&self.schema)
    }
-    // Return true if the table in this chunk contains values within the time range
+    // Return true if this chunk contains values within the time range
    pub fn has_timerange(&self, timestamp_range: Option<&TimestampRange>) -> bool {
-        self.table.matches_predicate(timestamp_range)
+        match (self.timestamp_range, timestamp_range) {
            (Some(a), Some(b)) => !a.disjoint(b),
            (None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
            // the predicate
            (_, None) => true,
        }
    }
    // Return the columns names that belong to the given column
    // selection
    pub fn column_names(&self, selection: Selection<'_>) -> Option<BTreeSet<String>> {
-        self.table.column_names(selection)
+        let fields = self.schema.inner().fields().iter();
        Some(match selection {
            Selection::Some(cols) => fields
                .filter_map(|x| {
                    if cols.contains(&x.name().as_str()) {
                        Some(x.name().clone())
                    } else {
                        None
                    }
                })
                .collect(),
            Selection::All => fields.map(|x| x.name().clone()).collect(),
        })
    }
-    /// Return stream of data read from parquet file of the given table
+    /// Return stream of data read from parquet file
    pub fn read_filter(
        &self,
        predicate: &Predicate,
        selection: Selection<'_>,
    ) -> Result<SendableRecordBatchStream> {
-        self.table
+        Storage::read_filter(
-            .read_filter(predicate, selection)
+            predicate,
-            .context(ReadParquet {
+            selection,
-                table_name: self.table_name(),
+            Arc::clone(&self.schema.as_arrow()),
-            })
+            self.object_store_path.clone(),
            Arc::clone(&self.object_store),
        )
        .context(ReadParquet)
    }
-    /// The total number of rows in all row groups in all tables in this chunk.
+    /// The total number of rows in all row groups in this chunk.
    pub fn rows(&self) -> usize {
-        self.table.rows()
+        // All columns have the same rows, so return get row count of the first column
        self.table_summary.columns[0].count() as usize
    }
 }
 /// Extracts min/max values of the timestamp column, from the TableSummary, if possible
 fn extract_range(table_summary: &TableSummary) -> Option<TimestampRange> {
    table_summary
        .column(TIME_COLUMN_NAME)
        .map(|c| {
            if let Statistics::I64(s) = &c.stats {
                if let (Some(min), Some(max)) = (s.min, s.max) {
                    return Some(TimestampRange::new(min, max));
                }
            }
            None
        })
        .flatten()
 }
--- a/parquet_file/src/lib.rs
+++ b/parquet_file/src/lib.rs
@ -14,7 +14,6 @@ pub mod cleanup;
 pub mod metadata;
 pub mod rebuild;
 pub mod storage;
 pub mod table;
 pub mod test_utils;
 mod storage_testing;
--- a/parquet_file/src/metadata.rs
+++ b/parquet_file/src/metadata.rs
@ -551,7 +551,7 @@ mod tests {
        // step 1: read back schema
        let schema_actual = read_schema_from_parquet_metadata(&parquet_metadata).unwrap();
-        let schema_expected = chunk.table_schema(Selection::All).unwrap();
+        let schema_expected = chunk.schema(Selection::All).unwrap();
        assert_eq!(schema_actual, schema_expected);
        // step 2: read back statistics
@ -574,7 +574,7 @@ mod tests {
        // step 1: read back schema
        let schema_actual = read_schema_from_parquet_metadata(&parquet_metadata).unwrap();
-        let schema_expected = chunk.table_schema(Selection::All).unwrap();
+        let schema_expected = chunk.schema(Selection::All).unwrap();
        assert_eq!(schema_actual, schema_expected);
        // step 2: read back statistics
@ -595,7 +595,7 @@ mod tests {
        // step 1: read back schema
        let schema_actual = read_schema_from_parquet_metadata(&parquet_metadata).unwrap();
-        let schema_expected = chunk.table_schema(Selection::All).unwrap();
+        let schema_expected = chunk.schema(Selection::All).unwrap();
        assert_eq!(schema_actual, schema_expected);
        // step 2: reading back statistics fails
@ -618,7 +618,7 @@ mod tests {
        // step 1: read back schema
        let schema_actual = read_schema_from_parquet_metadata(&parquet_metadata).unwrap();
-        let schema_expected = chunk.table_schema(Selection::All).unwrap();
+        let schema_expected = chunk.schema(Selection::All).unwrap();
        assert_eq!(schema_actual, schema_expected);
        // step 2: reading back statistics fails
--- a/parquet_file/src/table.rs
+++ b/parquet_file/src/table.rs
@ -1,177 +0,0 @@
 use snafu::{ResultExt, Snafu};
 use std::{collections::BTreeSet, mem, sync::Arc};
 use crate::storage::{self, Storage};
 use data_types::{
    partition_metadata::{Statistics, TableSummary},
    timestamp::TimestampRange,
 };
 use datafusion::physical_plan::SendableRecordBatchStream;
 use internal_types::{
    schema::{Schema, TIME_COLUMN_NAME},
    selection::Selection,
 };
 use object_store::{path::Path, ObjectStore};
 use query::predicate::Predicate;
 #[derive(Debug, Snafu)]
 pub enum Error {
    #[snafu(display("Failed to select columns: {}", source))]
    SelectColumns {
        source: internal_types::schema::Error,
    },
    #[snafu(display("Failed to read parquet: {}", source))]
    ReadParquet { source: storage::Error },
 }
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 /// Table that belongs to a chunk persisted in a parquet file in object store
 #[derive(Debug, Clone)]
 pub struct Table {
    /// Meta data of the table
    table_summary: Arc<TableSummary>,
    /// Path in the object store. Format:
    ///  <writer id>/<database>/data/<partition key>/<chunk
    /// id>/<tablename>.parquet
    object_store_path: Path,
    /// Object store of the above relative path to open and read the file
    object_store: Arc<ObjectStore>,
    /// Schema that goes with this table's parquet file
    table_schema: Arc<Schema>,
    /// Timestamp range of this table's parquet file
    /// (extracted from TableSummary)
    timestamp_range: Option<TimestampRange>,
 }
 impl Table {
    pub fn new(
        table_summary: TableSummary,
        path: Path,
        store: Arc<ObjectStore>,
        schema: Schema,
    ) -> Self {
        let timestamp_range = extract_range(&table_summary);
        Self {
            table_summary: Arc::new(table_summary),
            object_store_path: path,
            object_store: store,
            table_schema: Arc::new(schema),
            timestamp_range,
        }
    }
    pub fn table_summary(&self) -> &Arc<TableSummary> {
        &self.table_summary
    }
    pub fn has_table(&self, table_name: &str) -> bool {
        self.table_summary.has_table(table_name)
    }
    /// Return the approximate memory size of the table
    pub fn size(&self) -> usize {
        mem::size_of::<Self>()
            + self.table_summary.size()
            + mem::size_of_val(&self.object_store_path)
            + mem::size_of_val(&self.table_schema.as_ref())
    }
    /// Return name of this table
    pub fn name(&self) -> &str {
        &self.table_summary.name
    }
    /// Return the object store path of this table
    pub fn path(&self) -> Path {
        self.object_store_path.clone()
    }
    /// Return schema of this table for specified selection columns
    pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
        Ok(match selection {
            Selection::All => self.table_schema.as_ref().clone(),
            Selection::Some(columns) => {
                let columns = self.table_schema.select(columns).context(SelectColumns)?;
                self.table_schema.project(&columns)
            }
        })
    }
    /// Infallably return the full schema (for all columns) for this chunk
    pub fn full_schema(&self) -> Arc<Schema> {
        Arc::clone(&self.table_schema)
    }
    // Check if 2 time ranges overlap
    pub fn matches_predicate(&self, timestamp_range: Option<&TimestampRange>) -> bool {
        match (self.timestamp_range, timestamp_range) {
            (Some(a), Some(b)) => !a.disjoint(b),
            (None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
            // the predicate
            (_, None) => true,
        }
    }
    // Return columns names of this table that belong to the given column selection
    pub fn column_names(&self, selection: Selection<'_>) -> Option<BTreeSet<String>> {
        let fields = self.table_schema.inner().fields().iter();
        Some(match selection {
            Selection::Some(cols) => fields
                .filter_map(|x| {
                    if cols.contains(&x.name().as_str()) {
                        Some(x.name().clone())
                    } else {
                        None
                    }
                })
                .collect(),
            Selection::All => fields.map(|x| x.name().clone()).collect(),
        })
    }
    /// Return stream of data read from parquet file for given predicate and
    /// column selection
    pub fn read_filter(
        &self,
        predicate: &Predicate,
        selection: Selection<'_>,
    ) -> Result<SendableRecordBatchStream> {
        Storage::read_filter(
            predicate,
            selection,
            Arc::clone(&self.table_schema.as_arrow()),
            self.object_store_path.clone(),
            Arc::clone(&self.object_store),
        )
        .context(ReadParquet)
    }
    /// The number of rows of this table
    pub fn rows(&self) -> usize {
        // All columns have the same rows, so return get row count of the first column
        self.table_summary.columns[0].count() as usize
    }
 }
 /// Extracts min/max values of the timestamp column, from the TableSummary, if possible
 fn extract_range(table_summary: &TableSummary) -> Option<TimestampRange> {
    table_summary
        .column(TIME_COLUMN_NAME)
        .map(|c| {
            if let Statistics::I64(s) = &c.stats {
                if let (Some(min), Some(max)) = (s.min, s.max) {
                    return Some(TimestampRange::new(min, max));
                }
            }
            None
        })
        .flatten()
 }
--- a/parquet_file/src/test_utils.rs
+++ b/parquet_file/src/test_utils.rs
@ -66,7 +66,7 @@ pub async fn load_parquet_from_store_for_chunk(
    chunk: &Chunk,
    store: Arc<ObjectStore>,
 ) -> Result<(String, Vec<u8>)> {
-    let path = chunk.table_path();
+    let path = chunk.path();
    let table_name = chunk.table_name().to_string();
    Ok((
        table_name,
@ -584,7 +584,7 @@ pub async fn make_metadata(
        .await
        .unwrap();
    (
-        chunk.table_path(),
+        chunk.path(),
        read_parquet_metadata_from_file(parquet_data).unwrap(),
    )
 }
--- a/server/src/db.rs
+++ b/server/src/db.rs
@ -1461,7 +1461,7 @@ mod tests {
            .eq(1.0)
            .unwrap();
-        let expected_parquet_size = 759;
+        let expected_parquet_size = 647;
        catalog_chunk_size_bytes_metric_eq(&test_db.metric_registry, "read_buffer", 1616).unwrap();
        // now also in OS
        catalog_chunk_size_bytes_metric_eq(
@ -1817,7 +1817,7 @@ mod tests {
                ("svr_id", "10"),
            ])
            .histogram()
-            .sample_sum_eq(2375.0)
+            .sample_sum_eq(2263.0)
            .unwrap();
        // it should be the same chunk!
@ -1925,7 +1925,7 @@ mod tests {
                ("svr_id", "10"),
            ])
            .histogram()
-            .sample_sum_eq(2375.0)
+            .sample_sum_eq(2263.0)
            .unwrap();
        // Unload RB chunk but keep it in OS
@ -1953,7 +1953,7 @@ mod tests {
                ("svr_id", "10"),
            ])
            .histogram()
-            .sample_sum_eq(759.0)
+            .sample_sum_eq(647.0)
            .unwrap();
        // Verify data written to the parquet file in object store
@ -2342,7 +2342,7 @@ mod tests {
                Arc::from("cpu"),
                0,
                ChunkStorage::ReadBufferAndObjectStore,
-                2373, // size of RB and OS chunks
+                2261, // size of RB and OS chunks
                1,
            ),
            ChunkSummary::new_without_timestamps(
@ -2402,7 +2402,7 @@ mod tests {
                .memory()
                .parquet()
                .get_total(),
-            759
+            647
        );
    }
@ -2864,7 +2864,7 @@ mod tests {
            let chunk = db.chunk(table_name, partition_key, *chunk_id).unwrap();
            let chunk = chunk.read();
            if let ChunkStage::Persisted { parquet, .. } = chunk.stage() {
-                paths_expected.push(parquet.table_path().display());
+                paths_expected.push(parquet.path().display());
            } else {
                panic!("Wrong chunk state.");
            }
@ -2944,7 +2944,7 @@ mod tests {
            let chunk = db.chunk(&table_name, &partition_key, chunk_id).unwrap();
            let chunk = chunk.read();
            if let ChunkStage::Persisted { parquet, .. } = chunk.stage() {
-                paths_keep.push(parquet.table_path());
+                paths_keep.push(parquet.path());
            } else {
                panic!("Wrong chunk state.");
            }
--- a/server/src/db/chunk.rs
+++ b/server/src/db/chunk.rs
@ -195,7 +195,7 @@ impl DbChunk {
    /// persisted, if any
    pub fn object_store_path(&self) -> Option<Path> {
        match &self.state {
-            State::ParquetFile { chunk } => Some(chunk.table_path()),
+            State::ParquetFile { chunk } => Some(chunk.path()),
            _ => None,
        }
    }