fix: include `PartitionHashId` in size estimations (#8153)

As for the other types: size estimations are conservative, so we assume
the value behind the `Arc` is owned by the estimating party.
pull/24376/head
Marco Neumann 2023-07-05 12:42:39 +02:00 committed by GitHub
parent 3827257f94
commit 35d93f9475
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 3 deletions

View File

@ -614,7 +614,13 @@ impl ParquetFile {
/// Estimate the memory consumption of this object and its contents
pub fn size(&self) -> usize {
std::mem::size_of_val(self) + self.column_set.size()
std::mem::size_of_val(self)
+ self
.partition_hash_id
.as_ref()
.map(|id| id.size() - std::mem::size_of_val(id))
.unwrap_or_default()
+ self.column_set.size()
- std::mem::size_of_val(&self.column_set)
}

View File

@ -19,6 +19,18 @@ pub enum TransitionPartitionId {
Deterministic(PartitionHashId),
}
impl TransitionPartitionId {
/// Size in bytes including `self`.
pub fn size(&self) -> usize {
match self {
Self::Deprecated(_) => std::mem::size_of::<Self>(),
Self::Deterministic(id) => {
std::mem::size_of::<Self>() + id.size() - std::mem::size_of_val(id)
}
}
}
}
impl std::fmt::Display for TransitionPartitionId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
@ -216,6 +228,11 @@ impl PartitionHashId {
pub fn as_bytes(&self) -> &[u8] {
self.0.as_ref()
}
/// Size in bytes including `Self`.
pub fn size(&self) -> usize {
std::mem::size_of::<Self>() + self.0.len()
}
}
impl<'q> sqlx::encode::Encode<'q, sqlx::Postgres> for &'q PartitionHashId {

View File

@ -361,8 +361,8 @@ mod tests {
partition.create_parquet_file(builder).await;
let table_id = table.table.id;
let single_file_size = 208;
let two_file_size = 384;
let single_file_size = 240;
let two_file_size = 448;
assert!(single_file_size < two_file_size);
let cache = make_cache(&catalog);