feat: implement raw column size on integer columns
parent
850db3f6c2
commit
301df03e72
|
@ -14,6 +14,7 @@
|
|||
//! consumer of these encodings.
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::Debug;
|
||||
use std::mem::size_of;
|
||||
|
||||
use arrow::{
|
||||
array::{Array, PrimitiveArray},
|
||||
|
@ -74,7 +75,21 @@ where
|
|||
/// Returns an estimation of the total size in bytes used by this column
|
||||
/// encoding.
|
||||
pub fn size(&self) -> usize {
|
||||
std::mem::size_of::<PrimitiveArray<T>>() + self.arr.get_array_memory_size()
|
||||
size_of::<PrimitiveArray<T>>() + self.arr.get_array_memory_size()
|
||||
}
|
||||
|
||||
/// The estimated total size in bytes of the underlying values in the
|
||||
/// column if they were stored contiguously and uncompressed. `include_nulls`
|
||||
/// will effectively size each NULL value as 8b if `true` because the logical
|
||||
/// size of all types of `T` is 8b
|
||||
pub fn size_raw(&self, include_nulls: bool) -> usize {
|
||||
// hmmm whilst Vec<i64> is probably accurate it's not really correct if
|
||||
// T is not i64.
|
||||
let base_size = size_of::<Vec<i64>>();
|
||||
if !self.contains_null() || include_nulls {
|
||||
return base_size + (self.num_rows() as usize * 8);
|
||||
}
|
||||
base_size + ((self.num_rows() as usize - self.arr.null_count()) * 8)
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -607,6 +622,23 @@ mod test {
|
|||
assert_eq!(v.size(), 344);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn size_raw() {
|
||||
let v = FixedNull::<UInt64Type>::from(vec![None, None, Some(100), Some(2222)].as_slice());
|
||||
// values = 4 * 8 = 32b
|
||||
// Vec<u64> = 24b
|
||||
assert_eq!(v.size_raw(true), 56);
|
||||
assert_eq!(v.size_raw(false), 40);
|
||||
|
||||
let v = FixedNull::<Int64Type>::from(vec![None, None].as_slice());
|
||||
assert_eq!(v.size_raw(true), 32);
|
||||
assert_eq!(v.size_raw(false), 24);
|
||||
|
||||
let v = FixedNull::<Float64Type>::from(vec![None, None, Some(22.3)].as_slice());
|
||||
assert_eq!(v.size_raw(true), 48);
|
||||
assert_eq!(v.size_raw(false), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_row_id_eq_value() {
|
||||
let v = super::FixedNull::<Int64Type>::from(vec![22, 33, 18].as_slice());
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use std::mem::size_of;
|
||||
|
||||
use arrow::{self, array::Array};
|
||||
|
||||
use super::encoding::{scalar::Fixed, scalar::FixedNull};
|
||||
|
@ -43,6 +45,33 @@ impl IntegerEncoding {
|
|||
}
|
||||
}
|
||||
|
||||
/// The estimated total size in bytes of the underlying integer values in
|
||||
/// the column if they were stored contiguously and uncompressed (natively
|
||||
/// as i64/u64). `include_nulls` will effectively size each NULL value as 8b if
|
||||
/// `true`.
|
||||
pub fn size_raw(&self, include_nulls: bool) -> usize {
|
||||
match &self {
|
||||
Self::I64I64(_)
|
||||
| Self::I64I32(_)
|
||||
| Self::I64U32(_)
|
||||
| Self::I64I16(_)
|
||||
| Self::I64U16(_)
|
||||
| Self::I64I8(_)
|
||||
| Self::I64U8(_)
|
||||
| Self::U64U64(_)
|
||||
| Self::U64U32(_)
|
||||
| Self::U64U16(_)
|
||||
| Self::U64U8(_) => {
|
||||
// really one should do the correct i64/u64 in each arm but this
|
||||
// is terser and still correct.
|
||||
size_of::<Vec<i64>>() + (size_of::<i64>() * self.num_rows() as usize)
|
||||
}
|
||||
|
||||
Self::I64I64N(enc) => enc.size_raw(include_nulls),
|
||||
Self::U64U64N(enc) => enc.size_raw(include_nulls),
|
||||
}
|
||||
}
|
||||
|
||||
/// The total number of rows in the column.
|
||||
pub fn num_rows(&self) -> u32 {
|
||||
match self {
|
||||
|
@ -585,6 +614,8 @@ impl From<arrow::array::UInt64Array> for IntegerEncoding {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow::datatypes::Int64Type;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
|
@ -614,4 +645,29 @@ mod test {
|
|||
//assert_eq!(IntegerEncoding::from(&case), exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn size_raw() {
|
||||
let enc = IntegerEncoding::I64U8(Fixed::<u8>::from(&[2, 22, 12, 31][..]));
|
||||
// (4 * 8) + 24
|
||||
assert_eq!(enc.size_raw(true), 56);
|
||||
assert_eq!(enc.size_raw(false), 56);
|
||||
|
||||
let enc = IntegerEncoding::U64U64(Fixed::<u64>::from(&[2, 22, 12, 31][..]));
|
||||
// (4 * 8) + 24
|
||||
assert_eq!(enc.size_raw(true), 56);
|
||||
assert_eq!(enc.size_raw(false), 56);
|
||||
|
||||
let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(&[2, 22, 12, 31][..]));
|
||||
// (4 * 8) + 24
|
||||
assert_eq!(enc.size_raw(true), 56);
|
||||
assert_eq!(enc.size_raw(false), 56);
|
||||
|
||||
let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(
|
||||
&[Some(2), Some(22), Some(12), None, None, Some(31)][..],
|
||||
));
|
||||
// (6 * 8) + 24
|
||||
assert_eq!(enc.size_raw(true), 72);
|
||||
assert_eq!(enc.size_raw(false), 56);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue