feat: implement raw column size on integer columns

2021-05-07 12:33:52 +01:00 · 2021-05-07 12:33:52 +01:00 · 301df03e72
parent 850db3f6c2
commit 301df03e72
2 changed files with 89 additions and 1 deletions
--- a/read_buffer/src/column/encoding/scalar/fixed_null.rs
+++ b/read_buffer/src/column/encoding/scalar/fixed_null.rs
@ -14,6 +14,7 @@
 //! consumer of these encodings.
 use std::cmp::Ordering;
 use std::fmt::Debug;
 use std::mem::size_of;
 use arrow::{
    array::{Array, PrimitiveArray},
@ -74,7 +75,21 @@ where
    /// Returns an estimation of the total size in bytes used by this column
    /// encoding.
    pub fn size(&self) -> usize {
-        std::mem::size_of::<PrimitiveArray<T>>() + self.arr.get_array_memory_size()
+        size_of::<PrimitiveArray<T>>() + self.arr.get_array_memory_size()
    }
    /// The estimated total size in bytes of the underlying values in the
    /// column if they were stored contiguously and uncompressed. `include_nulls`
    /// will effectively size each NULL value as 8b if `true` because the logical
    /// size of all types of `T` is 8b
    pub fn size_raw(&self, include_nulls: bool) -> usize {
        // hmmm whilst Vec<i64> is probably accurate it's not really correct if
        // T is not i64.
        let base_size = size_of::<Vec<i64>>();
        if !self.contains_null() || include_nulls {
            return base_size + (self.num_rows() as usize * 8);
        }
        base_size + ((self.num_rows() as usize - self.arr.null_count()) * 8)
    }
    //
@ -607,6 +622,23 @@ mod test {
        assert_eq!(v.size(), 344);
    }
    #[test]
    fn size_raw() {
        let v = FixedNull::<UInt64Type>::from(vec![None, None, Some(100), Some(2222)].as_slice());
        // values   = 4 * 8 = 32b
        // Vec<u64> = 24b
        assert_eq!(v.size_raw(true), 56);
        assert_eq!(v.size_raw(false), 40);
        let v = FixedNull::<Int64Type>::from(vec![None, None].as_slice());
        assert_eq!(v.size_raw(true), 32);
        assert_eq!(v.size_raw(false), 24);
        let v = FixedNull::<Float64Type>::from(vec![None, None, Some(22.3)].as_slice());
        assert_eq!(v.size_raw(true), 48);
        assert_eq!(v.size_raw(false), 32);
    }
    #[test]
    fn first_row_id_eq_value() {
        let v = super::FixedNull::<Int64Type>::from(vec![22, 33, 18].as_slice());
--- a/read_buffer/src/column/integer.rs
+++ b/read_buffer/src/column/integer.rs
@ -1,3 +1,5 @@
 use std::mem::size_of;
 use arrow::{self, array::Array};
 use super::encoding::{scalar::Fixed, scalar::FixedNull};
@ -43,6 +45,33 @@ impl IntegerEncoding {
        }
    }
    /// The estimated total size in bytes of the underlying integer values in
    /// the column if they were stored contiguously and uncompressed (natively
    /// as i64/u64). `include_nulls` will effectively size each NULL value as 8b if
    /// `true`.
    pub fn size_raw(&self, include_nulls: bool) -> usize {
        match &self {
            Self::I64I64(_)
            | Self::I64I32(_)
            | Self::I64U32(_)
            | Self::I64I16(_)
            | Self::I64U16(_)
            | Self::I64I8(_)
            | Self::I64U8(_)
            | Self::U64U64(_)
            | Self::U64U32(_)
            | Self::U64U16(_)
            | Self::U64U8(_) => {
                // really one should do the correct i64/u64 in each arm but this
                // is terser and still correct.
                size_of::<Vec<i64>>() + (size_of::<i64>() * self.num_rows() as usize)
            }
            Self::I64I64N(enc) => enc.size_raw(include_nulls),
            Self::U64U64N(enc) => enc.size_raw(include_nulls),
        }
    }
    /// The total number of rows in the column.
    pub fn num_rows(&self) -> u32 {
        match self {
@ -585,6 +614,8 @@ impl From<arrow::array::UInt64Array> for IntegerEncoding {
 #[cfg(test)]
 mod test {
    use arrow::datatypes::Int64Type;
    use super::*;
    #[test]
@ -614,4 +645,29 @@ mod test {
            //assert_eq!(IntegerEncoding::from(&case), exp);
        }
    }
    #[test]
    fn size_raw() {
        let enc = IntegerEncoding::I64U8(Fixed::<u8>::from(&[2, 22, 12, 31][..]));
        // (4 * 8) + 24
        assert_eq!(enc.size_raw(true), 56);
        assert_eq!(enc.size_raw(false), 56);
        let enc = IntegerEncoding::U64U64(Fixed::<u64>::from(&[2, 22, 12, 31][..]));
        // (4 * 8) + 24
        assert_eq!(enc.size_raw(true), 56);
        assert_eq!(enc.size_raw(false), 56);
        let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(&[2, 22, 12, 31][..]));
        // (4 * 8) + 24
        assert_eq!(enc.size_raw(true), 56);
        assert_eq!(enc.size_raw(false), 56);
        let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(
            &[Some(2), Some(22), Some(12), None, None, Some(31)][..],
        ));
        // (6 * 8) + 24
        assert_eq!(enc.size_raw(true), 72);
        assert_eq!(enc.size_raw(false), 56);
    }
 }