From 35d93f9475794deb9a6f2aa88e80684bce65a3df Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Wed, 5 Jul 2023 12:42:39 +0200 Subject: [PATCH] fix: include `PartitionHashId` in size estimations (#8153) As for the other types: size estimations are conservative, so we assume the value behind the `Arc` is owned by the estimating party. --- data_types/src/lib.rs | 8 +++++++- data_types/src/partition.rs | 17 +++++++++++++++++ querier/src/cache/parquet_file.rs | 4 ++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs index ae73ebeb09..3317dc8a1a 100644 --- a/data_types/src/lib.rs +++ b/data_types/src/lib.rs @@ -614,7 +614,13 @@ impl ParquetFile { /// Estimate the memory consumption of this object and its contents pub fn size(&self) -> usize { - std::mem::size_of_val(self) + self.column_set.size() + std::mem::size_of_val(self) + + self + .partition_hash_id + .as_ref() + .map(|id| id.size() - std::mem::size_of_val(id)) + .unwrap_or_default() + + self.column_set.size() - std::mem::size_of_val(&self.column_set) } diff --git a/data_types/src/partition.rs b/data_types/src/partition.rs index 342552f79e..0f85addb65 100644 --- a/data_types/src/partition.rs +++ b/data_types/src/partition.rs @@ -19,6 +19,18 @@ pub enum TransitionPartitionId { Deterministic(PartitionHashId), } +impl TransitionPartitionId { + /// Size in bytes including `self`. + pub fn size(&self) -> usize { + match self { + Self::Deprecated(_) => std::mem::size_of::(), + Self::Deterministic(id) => { + std::mem::size_of::() + id.size() - std::mem::size_of_val(id) + } + } + } +} + impl std::fmt::Display for TransitionPartitionId { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { @@ -216,6 +228,11 @@ impl PartitionHashId { pub fn as_bytes(&self) -> &[u8] { self.0.as_ref() } + + /// Size in bytes including `Self`. + pub fn size(&self) -> usize { + std::mem::size_of::() + self.0.len() + } } impl<'q> sqlx::encode::Encode<'q, sqlx::Postgres> for &'q PartitionHashId { diff --git a/querier/src/cache/parquet_file.rs b/querier/src/cache/parquet_file.rs index ea48f3c8f7..0cca211320 100644 --- a/querier/src/cache/parquet_file.rs +++ b/querier/src/cache/parquet_file.rs @@ -361,8 +361,8 @@ mod tests { partition.create_parquet_file(builder).await; let table_id = table.table.id; - let single_file_size = 208; - let two_file_size = 384; + let single_file_size = 240; + let two_file_size = 448; assert!(single_file_size < two_file_size); let cache = make_cache(&catalog);