diff --git a/Cargo.lock b/Cargo.lock index b687e4a234..206bf75d72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3314,6 +3314,7 @@ dependencies = [ "hashbrown 0.13.1", "iox_time", "itertools", + "mutable_batch_lp", "rand", "schema", "snafu", diff --git a/mutable_batch/Cargo.toml b/mutable_batch/Cargo.toml index 0187c7d7d8..09554b8039 100644 --- a/mutable_batch/Cargo.toml +++ b/mutable_batch/Cargo.toml @@ -19,4 +19,5 @@ itertools = "0.10" workspace-hack = { path = "../workspace-hack"} [dev-dependencies] +mutable_batch_lp = { path = "../mutable_batch_lp" } rand = "0.8" diff --git a/mutable_batch/src/column.rs b/mutable_batch/src/column.rs index 3b9b7efa45..b85ff4948d 100644 --- a/mutable_batch/src/column.rs +++ b/mutable_batch/src/column.rs @@ -243,6 +243,21 @@ impl Column { mem::size_of::() + data_size + self.valid.byte_len() } + /// The approximate memory size of the data in the column, not counting for stats or self or + /// whatever extra space has been allocated for the vecs + pub fn size_data(&self) -> usize { + match &self.data { + ColumnData::F64(_, _) => mem::size_of::() * self.len(), + ColumnData::I64(_, _) => mem::size_of::() * self.len(), + ColumnData::U64(_, _) => mem::size_of::() * self.len(), + ColumnData::Bool(_, _) => mem::size_of::() * self.len(), + ColumnData::Tag(_, dictionary, _) => { + mem::size_of::() * self.len() + dictionary.size() + } + ColumnData::String(v, _) => v.size(), + } + } + /// Converts this column to an arrow [`ArrayRef`] pub fn to_arrow(&self) -> Result { let nulls = self.valid.to_arrow(); diff --git a/mutable_batch/src/lib.rs b/mutable_batch/src/lib.rs index 27cc37d5dc..db574a6e14 100644 --- a/mutable_batch/src/lib.rs +++ b/mutable_batch/src/lib.rs @@ -212,6 +212,11 @@ impl MutableBatch { .sum::() + self.columns.iter().map(|c| c.size()).sum::() } + + /// Return the approximate memory size of the data in the batch, in bytes. + pub fn size_data(&self) -> usize { + self.columns.iter().map(|c| c.size_data()).sum::() + } } /// A description of the distribution of timestamps in a @@ -249,3 +254,40 @@ impl TimestampSummary { self.record(Time::from_timestamp_nanos(timestamp_nanos)) } } + +#[cfg(test)] +mod tests { + use mutable_batch_lp::lines_to_batches; + + #[test] + fn size_data_without_nulls() { + let batches = lines_to_batches( + "cpu,t1=hello,t2=world f1=1.1,f2=1i 1234\ncpu,t1=h,t2=w f1=2.2,f2=2i 1234", + 0, + ) + .unwrap(); + let batch = batches.get("cpu").unwrap(); + + assert_eq!(batch.size_data(), 128); + + let batches = lines_to_batches( + "cpu,t1=hellomore,t2=world f1=1.1,f2=1i 1234\ncpu,t1=h,t2=w f1=2.2,f2=2i 1234", + 0, + ) + .unwrap(); + let batch = batches.get("cpu").unwrap(); + assert_eq!(batch.size_data(), 138); + } + + #[test] + fn size_data_with_nulls() { + let batches = lines_to_batches( + "cpu,t1=hello,t2=world f1=1.1 1234\ncpu,t2=w f1=2.2,f2=2i 1234", + 0, + ) + .unwrap(); + let batch = batches.get("cpu").unwrap(); + + assert_eq!(batch.size_data(), 124); + } +}