feat: implement byte trimming on nullable encodings

2021-05-17 12:25:54 +01:00 · 2021-05-17 12:25:54 +01:00 · 2963d63b5e
parent 6a72274517
commit 2963d63b5e
1 changed files with 624 additions and 64 deletions
--- a/read_buffer/src/column/integer.rs
+++ b/read_buffer/src/column/integer.rs
@ -1,12 +1,19 @@
 use std::mem::size_of;
-use arrow::{self, array::Array};
+use arrow::{
    self, array::Array, datatypes::Int16Type as ArrowInt16Type,
    datatypes::Int32Type as ArrowInt32Type, datatypes::Int64Type as ArrowInt64Type,
    datatypes::Int8Type as ArrowInt8Type, datatypes::UInt16Type as ArrowUInt16Type,
    datatypes::UInt32Type as ArrowUInt32Type, datatypes::UInt64Type as ArrowUInt64Type,
    datatypes::UInt8Type as ArrowUInt8Type,
 };
 use super::encoding::{scalar::Fixed, scalar::FixedNull};
 use super::{cmp, Statistics};
 use crate::column::{EncodedValues, RowIDs, Scalar, Value, Values};
 pub enum IntegerEncoding {
    // non-null encodings. These are backed by `Vec<T>`
    I64I64(Fixed<i64>),
    I64I32(Fixed<i32>),
    I64U32(Fixed<u32>),
@ -14,15 +21,23 @@ pub enum IntegerEncoding {
    I64U16(Fixed<u16>),
    I64I8(Fixed<i8>),
    I64U8(Fixed<u8>),
    U64U64(Fixed<u64>),
    U64U32(Fixed<u32>),
    U64U16(Fixed<u16>),
    U64U8(Fixed<u8>),
-    // Nullable encodings - TODO, add variants for smaller physical types.
+    // Nullable encodings. These are backed by an Arrow array.
-    I64I64N(FixedNull<arrow::datatypes::Int64Type>),
+    I64I64N(FixedNull<ArrowInt64Type>),
-    U64U64N(FixedNull<arrow::datatypes::UInt64Type>),
+    I64I32N(FixedNull<ArrowInt32Type>),
    I64U32N(FixedNull<ArrowUInt32Type>),
    I64I16N(FixedNull<ArrowInt16Type>),
    I64U16N(FixedNull<ArrowUInt16Type>),
    I64I8N(FixedNull<ArrowInt8Type>),
    I64U8N(FixedNull<ArrowUInt8Type>),
    U64U64N(FixedNull<ArrowUInt64Type>),
    U64U32N(FixedNull<ArrowUInt32Type>),
    U64U16N(FixedNull<ArrowUInt16Type>),
    U64U8N(FixedNull<ArrowUInt8Type>),
 }
 impl PartialEq for IntegerEncoding {
@ -40,12 +55,57 @@ impl PartialEq for IntegerEncoding {
            (Self::U64U16(a), Self::U64U16(b)) => a == b,
            (Self::U64U8(a), Self::U64U8(b)) => a == b,
            (Self::I64I64N(a), Self::I64I64N(b)) => {
-                let a = a.all_values(vec![]);
+                let a = a.all_values::<i64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::I64I32N(a), Self::I64I32N(b)) => {
                let a = a.all_values::<i64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::I64U32N(a), Self::I64U32N(b)) => {
                let a = a.all_values::<i64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::I64I16N(a), Self::I64I16N(b)) => {
                let a = a.all_values::<i64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::I64U16N(a), Self::I64U16N(b)) => {
                let a = a.all_values::<i64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::I64I8N(a), Self::I64I8N(b)) => {
                let a = a.all_values::<i64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::I64U8N(a), Self::I64U8N(b)) => {
                let a = a.all_values::<i64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::U64U64N(a), Self::U64U64N(b)) => {
-                let a = a.all_values(vec![]);
+                let a = a.all_values::<u64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::U64U32N(a), Self::U64U32N(b)) => {
                let a = a.all_values::<u64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::U64U16N(a), Self::U64U16N(b)) => {
                let a = a.all_values::<u64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
            (Self::U64U8N(a), Self::U64U8N(b)) => {
                let a = a.all_values::<u64>(vec![]);
                let b = b.all_values(vec![]);
                a == b
            }
@ -70,7 +130,16 @@ impl IntegerEncoding {
            Self::U64U16(enc) => enc.size(),
            Self::U64U8(enc) => enc.size(),
            Self::I64I64N(enc) => enc.size(),
            Self::I64I32N(enc) => enc.size(),
            Self::I64U32N(enc) => enc.size(),
            Self::I64I16N(enc) => enc.size(),
            Self::I64U16N(enc) => enc.size(),
            Self::I64I8N(enc) => enc.size(),
            Self::I64U8N(enc) => enc.size(),
            Self::U64U64N(enc) => enc.size(),
            Self::U64U32N(enc) => enc.size(),
            Self::U64U16N(enc) => enc.size(),
            Self::U64U8N(enc) => enc.size(),
        }
    }
@ -97,7 +166,16 @@ impl IntegerEncoding {
            }
            Self::I64I64N(enc) => enc.size_raw(include_nulls),
            Self::I64I32N(enc) => enc.size_raw(include_nulls),
            Self::I64U32N(enc) => enc.size_raw(include_nulls),
            Self::I64I16N(enc) => enc.size_raw(include_nulls),
            Self::I64U16N(enc) => enc.size_raw(include_nulls),
            Self::I64I8N(enc) => enc.size_raw(include_nulls),
            Self::I64U8N(enc) => enc.size_raw(include_nulls),
            Self::U64U64N(enc) => enc.size_raw(include_nulls),
            Self::U64U32N(enc) => enc.size_raw(include_nulls),
            Self::U64U16N(enc) => enc.size_raw(include_nulls),
            Self::U64U8N(enc) => enc.size_raw(include_nulls),
        }
    }
@ -116,7 +194,16 @@ impl IntegerEncoding {
            Self::U64U16(enc) => enc.num_rows(),
            Self::U64U8(enc) => enc.num_rows(),
            Self::I64I64N(enc) => enc.num_rows(),
            Self::I64I32N(enc) => enc.num_rows(),
            Self::I64U32N(enc) => enc.num_rows(),
            Self::I64I16N(enc) => enc.num_rows(),
            Self::I64U16N(enc) => enc.num_rows(),
            Self::I64I8N(enc) => enc.num_rows(),
            Self::I64U8N(enc) => enc.num_rows(),
            Self::U64U64N(enc) => enc.num_rows(),
            Self::U64U32N(enc) => enc.num_rows(),
            Self::U64U16N(enc) => enc.num_rows(),
            Self::U64U8N(enc) => enc.num_rows(),
        }
    }
@ -137,7 +224,16 @@ impl IntegerEncoding {
    pub fn contains_null(&self) -> bool {
        match self {
            Self::I64I64N(enc) => enc.contains_null(),
            Self::I64I32N(enc) => enc.contains_null(),
            Self::I64U32N(enc) => enc.contains_null(),
            Self::I64I16N(enc) => enc.contains_null(),
            Self::I64U16N(enc) => enc.contains_null(),
            Self::I64I8N(enc) => enc.contains_null(),
            Self::I64U8N(enc) => enc.contains_null(),
            Self::U64U64N(enc) => enc.contains_null(),
            Self::U64U32N(enc) => enc.contains_null(),
            Self::U64U16N(enc) => enc.contains_null(),
            Self::U64U8N(enc) => enc.contains_null(),
            _ => false,
        }
    }
@ -157,7 +253,16 @@ impl IntegerEncoding {
            Self::U64U16(_) => 0,
            Self::U64U8(_) => 0,
            Self::I64I64N(enc) => enc.null_count(),
            Self::I64I32N(enc) => enc.null_count(),
            Self::I64U32N(enc) => enc.null_count(),
            Self::I64I16N(enc) => enc.null_count(),
            Self::I64U16N(enc) => enc.null_count(),
            Self::I64I8N(enc) => enc.null_count(),
            Self::I64U8N(enc) => enc.null_count(),
            Self::U64U64N(enc) => enc.null_count(),
            Self::U64U32N(enc) => enc.null_count(),
            Self::U64U16N(enc) => enc.null_count(),
            Self::U64U8N(enc) => enc.null_count(),
        }
    }
@ -165,7 +270,16 @@ impl IntegerEncoding {
    pub fn has_any_non_null_value(&self) -> bool {
        match self {
            Self::I64I64N(enc) => enc.has_any_non_null_value(),
            Self::I64I32N(enc) => enc.has_any_non_null_value(),
            Self::I64U32N(enc) => enc.has_any_non_null_value(),
            Self::I64I16N(enc) => enc.has_any_non_null_value(),
            Self::I64U16N(enc) => enc.has_any_non_null_value(),
            Self::I64I8N(enc) => enc.has_any_non_null_value(),
            Self::I64U8N(enc) => enc.has_any_non_null_value(),
            Self::U64U64N(enc) => enc.has_any_non_null_value(),
            Self::U64U32N(enc) => enc.has_any_non_null_value(),
            Self::U64U16N(enc) => enc.has_any_non_null_value(),
            Self::U64U8N(enc) => enc.has_any_non_null_value(),
            _ => true,
        }
    }
@ -175,7 +289,16 @@ impl IntegerEncoding {
    pub fn has_non_null_value(&self, row_ids: &[u32]) -> bool {
        match self {
            Self::I64I64N(enc) => enc.has_non_null_value(row_ids),
            Self::I64I32N(enc) => enc.has_non_null_value(row_ids),
            Self::I64U32N(enc) => enc.has_non_null_value(row_ids),
            Self::I64I16N(enc) => enc.has_non_null_value(row_ids),
            Self::I64U16N(enc) => enc.has_non_null_value(row_ids),
            Self::I64I8N(enc) => enc.has_non_null_value(row_ids),
            Self::I64U8N(enc) => enc.has_non_null_value(row_ids),
            Self::U64U64N(enc) => enc.has_non_null_value(row_ids),
            Self::U64U32N(enc) => enc.has_non_null_value(row_ids),
            Self::U64U16N(enc) => enc.has_non_null_value(row_ids),
            Self::U64U8N(enc) => enc.has_non_null_value(row_ids),
            _ => !row_ids.is_empty(), // all rows will be non-null
        }
    }
@ -187,25 +310,64 @@ impl IntegerEncoding {
            // `c.value` should return as the logical type
            // signed 64-bit variants - logical type is i64 for all these
-            Self::I64I64(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
+            Self::I64I64(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
-            Self::I64I32(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
+            Self::I64I32(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
-            Self::I64U32(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
+            Self::I64U32(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
-            Self::I64I16(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
+            Self::I64I16(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
-            Self::I64U16(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
+            Self::I64U16(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
-            Self::I64I8(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
+            Self::I64I8(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
-            Self::I64U8(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
+            Self::I64U8(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
            // unsigned 64-bit variants - logical type is u64 for all these
-            Self::U64U64(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
+            Self::U64U64(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
-            Self::U64U32(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
+            Self::U64U32(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
-            Self::U64U16(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
+            Self::U64U16(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
-            Self::U64U8(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
+            Self::U64U8(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
-            Self::I64I64N(c) => match c.value(row_id) {
+            // signed 64-bit variants
            Self::I64I64N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
-            Self::U64U64N(c) => match c.value(row_id) {
+            Self::I64I32N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U32N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64I16N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U16N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64I8N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U8N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            // unsigned 64-bit variants
            Self::U64U64N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U32N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U16N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U8N(enc) => match enc.value(row_id) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
@ -219,22 +381,34 @@ impl IntegerEncoding {
    pub fn values(&self, row_ids: &[u32]) -> Values<'_> {
        match &self {
            // signed 64-bit variants - logical type is i64 for all these
-            Self::I64I64(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
+            Self::I64I64(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
-            Self::I64I32(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
+            Self::I64I32(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
-            Self::I64U32(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
+            Self::I64U32(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
-            Self::I64I16(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
+            Self::I64I16(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
-            Self::I64U16(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
+            Self::I64U16(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
-            Self::I64I8(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
+            Self::I64I8(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
-            Self::I64U8(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
+            Self::I64U8(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
            // unsigned 64-bit variants - logical type is u64 for all these
-            Self::U64U64(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
+            Self::U64U64(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
-            Self::U64U32(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
+            Self::U64U32(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
-            Self::U64U16(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
+            Self::U64U16(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
-            Self::U64U8(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
+            Self::U64U8(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
-            Self::I64I64N(c) => Values::I64N(c.values(row_ids, vec![])),
+            // signed 64-bit nullable variants - logical type is i64 for all these.
-            Self::U64U64N(c) => Values::U64N(c.values(row_ids, vec![])),
+            Self::I64I64N(enc) => Values::I64N(enc.values(row_ids, vec![])),
            Self::I64I32N(enc) => Values::I64N(enc.values(row_ids, vec![])),
            Self::I64U32N(enc) => Values::I64N(enc.values(row_ids, vec![])),
            Self::I64I16N(enc) => Values::I64N(enc.values(row_ids, vec![])),
            Self::I64U16N(enc) => Values::I64N(enc.values(row_ids, vec![])),
            Self::I64I8N(enc) => Values::I64N(enc.values(row_ids, vec![])),
            Self::I64U8N(enc) => Values::I64N(enc.values(row_ids, vec![])),
            // unsigned 64-bit nullable variants - logical type is u64 for all these.
            Self::U64U64N(enc) => Values::U64N(enc.values(row_ids, vec![])),
            Self::U64U32N(enc) => Values::U64N(enc.values(row_ids, vec![])),
            Self::U64U16N(enc) => Values::U64N(enc.values(row_ids, vec![])),
            Self::U64U8N(enc) => Values::U64N(enc.values(row_ids, vec![])),
        }
    }
@ -259,8 +433,20 @@ impl IntegerEncoding {
            Self::U64U16(c) => Values::U64(c.all_values::<u64>(vec![])),
            Self::U64U8(c) => Values::U64(c.all_values::<u64>(vec![])),
-            Self::I64I64N(c) => Values::I64N(c.all_values(vec![])),
+            // signed 64-bit nullable variants - logical type is i64 for all these.
-            Self::U64U64N(c) => Values::U64N(c.all_values(vec![])),
+            Self::I64I64N(enc) => Values::I64N(enc.all_values(vec![])),
            Self::I64I32N(enc) => Values::I64N(enc.all_values(vec![])),
            Self::I64U32N(enc) => Values::I64N(enc.all_values(vec![])),
            Self::I64I16N(enc) => Values::I64N(enc.all_values(vec![])),
            Self::I64U16N(enc) => Values::I64N(enc.all_values(vec![])),
            Self::I64I8N(enc) => Values::I64N(enc.all_values(vec![])),
            Self::I64U8N(enc) => Values::I64N(enc.all_values(vec![])),
            // unsigned 64-bit nullable variants - logical type is u64 for all these.
            Self::U64U64N(enc) => Values::U64N(enc.all_values(vec![])),
            Self::U64U32N(enc) => Values::U64N(enc.all_values(vec![])),
            Self::U64U16N(enc) => Values::U64N(enc.all_values(vec![])),
            Self::U64U8N(enc) => Values::U64N(enc.all_values(vec![])),
        }
    }
@ -326,8 +512,20 @@ impl IntegerEncoding {
            Self::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
            Self::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
-            Self::I64I64N(c) => c.row_ids_filter(value.as_i64(), op, dst),
+            // signed 64-bit nullable variants - logical type is i64 for all these.
-            Self::U64U64N(c) => c.row_ids_filter(value.as_u64(), op, dst),
+            Self::I64I64N(enc) => enc.row_ids_filter(value.as_i64(), op, dst),
            Self::I64I32N(enc) => enc.row_ids_filter(value.as_i32(), op, dst),
            Self::I64U32N(enc) => enc.row_ids_filter(value.as_u32(), op, dst),
            Self::I64I16N(enc) => enc.row_ids_filter(value.as_i16(), op, dst),
            Self::I64U16N(enc) => enc.row_ids_filter(value.as_u16(), op, dst),
            Self::I64I8N(enc) => enc.row_ids_filter(value.as_i8(), op, dst),
            Self::I64U8N(enc) => enc.row_ids_filter(value.as_u8(), op, dst),
            // unsigned 64-bit nullable variants - logical type is u64 for all these.
            Self::U64U64N(enc) => enc.row_ids_filter(value.as_u64(), op, dst),
            Self::U64U32N(enc) => enc.row_ids_filter(value.as_u32(), op, dst),
            Self::U64U16N(enc) => enc.row_ids_filter(value.as_u16(), op, dst),
            Self::U64U8N(enc) => enc.row_ids_filter(value.as_u8(), op, dst),
        }
    }
@ -378,8 +576,41 @@ impl IntegerEncoding {
                c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
            }
-            Self::I64I64N(_) => todo!(),
+            Self::I64I64N(enc) => {
-            Self::U64U64N(_) => todo!(),
+                enc.row_ids_filter_range((low.1.as_i64(), low.0), (high.1.as_i64(), high.0), dst)
            }
            Self::I64I32N(enc) => {
                enc.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst)
            }
            Self::I64U32N(enc) => {
                enc.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
            }
            Self::I64I16N(enc) => {
                enc.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
            }
            Self::I64U16N(enc) => {
                enc.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
            }
            Self::I64I8N(enc) => {
                enc.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
            }
            Self::I64U8N(enc) => {
                enc.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
            }
            // unsigned 64-bit nullable variants - logical type is u64 for all these.
            Self::U64U64N(enc) => {
                enc.row_ids_filter_range((low.1.as_u64(), low.0), (high.1.as_u64(), high.0), dst)
            }
            Self::U64U32N(enc) => {
                enc.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
            }
            Self::U64U16N(enc) => {
                enc.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
            }
            Self::U64U8N(enc) => {
                enc.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
            }
        }
    }
@ -396,11 +627,49 @@ impl IntegerEncoding {
            Self::U64U32(c) => Value::Scalar(Scalar::U64(c.min(row_ids))),
            Self::U64U16(c) => Value::Scalar(Scalar::U64(c.min(row_ids))),
            Self::U64U8(c) => Value::Scalar(Scalar::U64(c.min(row_ids))),
-            Self::I64I64N(c) => match c.min(row_ids) {
+
            Self::I64I64N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
-            Self::U64U64N(c) => match c.min(row_ids) {
+            Self::I64I32N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U32N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64I16N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U16N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64I8N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U8N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::U64U64N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U32N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U16N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U8N(enc) => match enc.min(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
@ -420,11 +689,48 @@ impl IntegerEncoding {
            Self::U64U32(c) => Value::Scalar(Scalar::U64(c.max(row_ids))),
            Self::U64U16(c) => Value::Scalar(Scalar::U64(c.max(row_ids))),
            Self::U64U8(c) => Value::Scalar(Scalar::U64(c.max(row_ids))),
-            Self::I64I64N(c) => match c.max(row_ids) {
+            Self::I64I64N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
-            Self::U64U64N(c) => match c.max(row_ids) {
+            Self::I64I32N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U32N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64I16N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U16N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64I8N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::I64U8N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::I64(v)),
                None => Value::Null,
            },
            Self::U64U64N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U32N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U16N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
            Self::U64U8N(enc) => match enc.max(row_ids) {
                Some(v) => Value::Scalar(Scalar::U64(v)),
                None => Value::Null,
            },
@ -444,11 +750,48 @@ impl IntegerEncoding {
            Self::U64U32(c) => Scalar::U64(c.sum(row_ids)),
            Self::U64U16(c) => Scalar::U64(c.sum(row_ids)),
            Self::U64U8(c) => Scalar::U64(c.sum(row_ids)),
-            Self::I64I64N(c) => match c.sum(row_ids) {
+            Self::I64I64N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::I64(v),
                None => Scalar::Null,
            },
-            Self::U64U64N(c) => match c.sum(row_ids) {
+            Self::I64I32N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::I64(v),
                None => Scalar::Null,
            },
            Self::I64U32N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::I64(v),
                None => Scalar::Null,
            },
            Self::I64I16N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::I64(v),
                None => Scalar::Null,
            },
            Self::I64U16N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::I64(v),
                None => Scalar::Null,
            },
            Self::I64I8N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::I64(v),
                None => Scalar::Null,
            },
            Self::I64U8N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::I64(v),
                None => Scalar::Null,
            },
            Self::U64U64N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::U64(v),
                None => Scalar::Null,
            },
            Self::U64U32N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::U64(v),
                None => Scalar::Null,
            },
            Self::U64U16N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::U64(v),
                None => Scalar::Null,
            },
            Self::U64U8N(enc) => match enc.sum(row_ids) {
                Some(v) => Scalar::U64(v),
                None => Scalar::Null,
            },
@ -468,8 +811,17 @@ impl IntegerEncoding {
            Self::U64U32(c) => c.count(row_ids),
            Self::U64U16(c) => c.count(row_ids),
            Self::U64U8(c) => c.count(row_ids),
-            Self::I64I64N(c) => c.count(row_ids),
+            Self::I64I64N(enc) => enc.count(row_ids),
-            Self::U64U64N(c) => c.count(row_ids),
+            Self::I64I32N(enc) => enc.count(row_ids),
            Self::I64U32N(enc) => enc.count(row_ids),
            Self::I64I16N(enc) => enc.count(row_ids),
            Self::I64U16N(enc) => enc.count(row_ids),
            Self::I64I8N(enc) => enc.count(row_ids),
            Self::I64U8N(enc) => enc.count(row_ids),
            Self::U64U64N(enc) => enc.count(row_ids),
            Self::U64U32N(enc) => enc.count(row_ids),
            Self::U64U16N(enc) => enc.count(row_ids),
            Self::U64U8N(enc) => enc.count(row_ids),
        }
    }
@ -488,7 +840,16 @@ impl IntegerEncoding {
            Self::U64U16(_) => "BT_U16",
            Self::U64U8(_) => "BT_U8",
            Self::I64I64N(_) => "None",
            Self::I64I32N(_) => "BT_I32N",
            Self::I64U32N(_) => "BT_U32N",
            Self::I64I16N(_) => "BT_U16N",
            Self::I64U16N(_) => "BT_U16N",
            Self::I64I8N(_) => "BT_I8N",
            Self::I64U8N(_) => "BT_U8N",
            Self::U64U64N(_) => "None",
            Self::U64U32N(_) => "BT_U32N",
            Self::U64U16N(_) => "BT_U16N",
            Self::U64U8N(_) => "BT_U8N",
        }
    }
@ -507,7 +868,16 @@ impl IntegerEncoding {
            Self::U64U16(_) => "u64",
            Self::U64U8(_) => "u64",
            Self::I64I64N(_) => "i64",
            Self::I64I32N(_) => "i64",
            Self::I64U32N(_) => "i64",
            Self::I64I16N(_) => "i64",
            Self::I64U16N(_) => "i64",
            Self::I64I8N(_) => "i64",
            Self::I64U8N(_) => "i64",
            Self::U64U64N(_) => "u64",
            Self::U64U32N(_) => "u64",
            Self::U64U16N(_) => "u64",
            Self::U64U8N(_) => "u64",
        }
    }
 }
@ -528,7 +898,16 @@ impl std::fmt::Display for IntegerEncoding {
            Self::U64U16(enc) => write!(f, "[{}]: {}", name, enc),
            Self::U64U8(enc) => write!(f, "[{}]: {}", name, enc),
            Self::I64I64N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::I64I32N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::I64U32N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::I64I16N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::I64U16N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::I64I8N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::I64U8N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::U64U64N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::U64U32N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::U64U16N(enc) => write!(f, "[{}]: {}", name, enc),
            Self::U64U8N(enc) => write!(f, "[{}]: {}", name, enc),
        }
    }
 }
@ -549,7 +928,16 @@ impl std::fmt::Debug for IntegerEncoding {
            Self::U64U16(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::U64U8(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::I64I64N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::I64I32N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::I64U32N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::I64I16N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::I64U16N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::I64I8N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::I64U8N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::U64U64N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::U64U32N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::U64U16N(enc) => write!(f, "[{}]: {:?}", name, enc),
            Self::U64U8N(enc) => write!(f, "[{}]: {:?}", name, enc),
        }
    }
 }
@ -603,17 +991,48 @@ impl From<&[i64]> for IntegerEncoding {
 /// Converts an Arrow array into an IntegerEncoding.
 ///
-/// TODO(edd): convert underlying type of Arrow data to smallest physical
+/// The most compact physical Arrow array type is used to store the column
-/// representation.
+/// within a `FixedNull` encoding.
 impl From<arrow::array::Int64Array> for IntegerEncoding {
    fn from(arr: arrow::array::Int64Array) -> Self {
        if arr.null_count() == 0 {
            return Self::from(arr.values());
        }
-        // TODO(edd): currently fixed null only supports 64-bit logical/physical
+        // determine min and max values.
-        // types. Need to add support for storing as smaller physical types.
+        let min = arrow::compute::kernels::aggregate::min(&arr);
-        Self::I64I64N(FixedNull::<arrow::datatypes::Int64Type>::from(arr))
+        let max = arrow::compute::kernels::aggregate::max(&arr);
        // This match is carefully ordered. It prioritises smaller physical
        // datatypes that can safely represent the provided logical data
        match (min, max) {
            // encode as u8 values
            (min, max) if min >= Some(0) && max <= Some(u8::MAX as i64) => {
                Self::I64U8N(FixedNull::<ArrowUInt8Type>::from(arr))
            }
            // encode as i8 values
            (min, max) if min >= Some(i8::MIN as i64) && max <= Some(i8::MAX as i64) => {
                Self::I64I8N(FixedNull::<ArrowInt8Type>::from(arr))
            }
            // encode as u16 values
            (min, max) if min >= Some(0) && max <= Some(u16::MAX as i64) => {
                Self::I64U16N(FixedNull::<ArrowUInt16Type>::from(arr))
            }
            // encode as i16 values
            (min, max) if min >= Some(i16::MIN as i64) && max <= Some(i16::MAX as i64) => {
                Self::I64I16N(FixedNull::<ArrowInt16Type>::from(arr))
            }
            // encode as u32 values
            (min, max) if min >= Some(0) && max <= Some(u32::MAX as i64) => {
                Self::I64U32N(FixedNull::<ArrowUInt32Type>::from(arr))
            }
            // encode as i32 values
            (min, max) if min >= Some(i32::MIN as i64) && max <= Some(i32::MAX as i64) => {
                Self::I64I32N(FixedNull::<ArrowInt32Type>::from(arr))
            }
            // otherwise, encode with the same physical type (i64)
            (_, _) => Self::I64I64N(FixedNull::<ArrowInt64Type>::from(arr)),
        }
    }
 }
@ -650,23 +1069,42 @@ impl From<&[u64]> for IntegerEncoding {
 /// Converts an Arrow array into an IntegerEncoding.
 ///
-/// TODO(edd): convert underlying type of Arrow data to smallest physical
+/// The most compact physical Arrow array type is used to store the column
-/// representation.
+/// within a `FixedNull` encoding.
 impl From<arrow::array::UInt64Array> for IntegerEncoding {
    fn from(arr: arrow::array::UInt64Array) -> Self {
        if arr.null_count() == 0 {
            return Self::from(arr.values());
        }
-        // TODO(edd): currently fixed null only supports 64-bit logical/physical
+        // determine max value.
-        // types. Need to add support for storing as smaller physical types.
+        let max = arrow::compute::kernels::aggregate::max(&arr);
-        Self::U64U64N(FixedNull::<arrow::datatypes::UInt64Type>::from(arr))
+
        // This match is carefully ordered. It prioritises smaller physical
        // datatypes that can safely represent the provided logical data
        match max {
            // encode as u8 values
            max if max <= Some(u8::MAX as u64) => {
                Self::U64U8N(FixedNull::<ArrowUInt8Type>::from(arr))
            }
            // encode as u16 values
            max if max <= Some(u16::MAX as u64) => {
                Self::U64U16N(FixedNull::<ArrowUInt16Type>::from(arr))
            }
            // encode as u32 values
            max if max <= Some(u32::MAX as u64) => {
                Self::U64U32N(FixedNull::<ArrowUInt32Type>::from(arr))
            }
            // otherwise, encode with the same physical type (u64)
            _ => Self::U64U64N(FixedNull::<ArrowUInt64Type>::from(arr)),
        }
    }
 }
 #[cfg(test)]
 mod test {
-    use arrow::datatypes::Int64Type;
+    use arrow::array::{Int64Array, UInt64Array};
    use std::iter;
    use super::*;
@ -718,6 +1156,133 @@ mod test {
        }
    }
    #[test]
    fn from_arrow_i64_array() {
        let cases = vec![
            vec![0_i64, 2, 245, 3],
            vec![0_i64, -120, 127, 3],
            vec![399_i64, 2, 2452, 3],
            vec![-399_i64, 2, 2452, 3],
            vec![u32::MAX as i64, 2, 245, 3],
            vec![i32::MIN as i64, 2, 245, 3],
            vec![0_i64, 2, 245, u32::MAX as i64 + 1],
        ];
        let exp = vec![
            IntegerEncoding::I64U8(Fixed::<u8>::from(cases[0].as_slice())),
            IntegerEncoding::I64I8(Fixed::<i8>::from(cases[1].as_slice())),
            IntegerEncoding::I64U16(Fixed::<u16>::from(cases[2].as_slice())),
            IntegerEncoding::I64I16(Fixed::<i16>::from(cases[3].as_slice())),
            IntegerEncoding::I64U32(Fixed::<u32>::from(cases[4].as_slice())),
            IntegerEncoding::I64I32(Fixed::<i32>::from(cases[5].as_slice())),
            IntegerEncoding::I64I64(Fixed::<i64>::from(cases[6].as_slice())),
        ];
        // for Arrow arrays with no nulls we can store the column using a
        // non-nullable fixed encoding
        for (case, exp) in cases.iter().cloned().zip(exp.into_iter()) {
            let arr = Int64Array::from(case);
            assert_eq!(IntegerEncoding::from(arr), exp);
        }
        // Tack a NULL onto each of the input cases.
        let cases = cases
            .iter()
            .map(|case| {
                case.iter()
                    .map(|x| Some(*x))
                    .chain(iter::repeat(None).take(1))
                    .collect::<Vec<_>>()
            })
            .collect::<Vec<_>>();
        // when a NULL value is present then we need to use a nullable encoding.
        let exp = vec![
            IntegerEncoding::I64U8N(FixedNull::<ArrowUInt8Type>::from(Int64Array::from(
                cases[0].clone(),
            ))),
            IntegerEncoding::I64I8N(FixedNull::<ArrowInt8Type>::from(Int64Array::from(
                cases[1].clone(),
            ))),
            IntegerEncoding::I64U16N(FixedNull::<ArrowUInt16Type>::from(Int64Array::from(
                cases[2].clone(),
            ))),
            IntegerEncoding::I64I16N(FixedNull::<ArrowInt16Type>::from(Int64Array::from(
                cases[3].clone(),
            ))),
            IntegerEncoding::I64U32N(FixedNull::<ArrowUInt32Type>::from(Int64Array::from(
                cases[4].clone(),
            ))),
            IntegerEncoding::I64I32N(FixedNull::<ArrowInt32Type>::from(Int64Array::from(
                cases[5].clone(),
            ))),
            IntegerEncoding::I64I64N(FixedNull::<ArrowInt64Type>::from(Int64Array::from(
                cases[6].clone(),
            ))),
        ];
        for (case, exp) in cases.into_iter().zip(exp.into_iter()) {
            let arr = Int64Array::from(case.clone());
            assert_eq!(IntegerEncoding::from(arr), exp);
        }
    }
    #[test]
    fn from_arrow_u64_array() {
        let cases = vec![
            vec![0_u64, 2, 245, 3],
            vec![399_u64, 2, 2452, 3],
            vec![u32::MAX as u64, 2, 245, 3],
            vec![0_u64, 2, 245, u32::MAX as u64 + 1],
        ];
        let exp = vec![
            IntegerEncoding::U64U8(Fixed::<u8>::from(cases[0].as_slice())),
            IntegerEncoding::U64U16(Fixed::<u16>::from(cases[1].as_slice())),
            IntegerEncoding::U64U32(Fixed::<u32>::from(cases[2].as_slice())),
            IntegerEncoding::U64U64(Fixed::<u64>::from(cases[3].as_slice())),
        ];
        // for Arrow arrays with no nulls we can store the column using a
        // non-nullable fixed encoding
        for (case, exp) in cases.iter().cloned().zip(exp.into_iter()) {
            let arr = UInt64Array::from(case);
            assert_eq!(IntegerEncoding::from(arr), exp);
        }
        // Tack a NULL onto each of the input cases.
        let cases = cases
            .iter()
            .map(|case| {
                case.iter()
                    .map(|x| Some(*x))
                    .chain(iter::repeat(None).take(1))
                    .collect::<Vec<_>>()
            })
            .collect::<Vec<_>>();
        // when a NULL value is present then we need to use a nullable encoding.
        let exp = vec![
            IntegerEncoding::U64U8N(FixedNull::<ArrowUInt8Type>::from(UInt64Array::from(
                cases[0].clone(),
            ))),
            IntegerEncoding::U64U16N(FixedNull::<ArrowUInt16Type>::from(UInt64Array::from(
                cases[1].clone(),
            ))),
            IntegerEncoding::U64U32N(FixedNull::<ArrowUInt32Type>::from(UInt64Array::from(
                cases[2].clone(),
            ))),
            IntegerEncoding::U64U64N(FixedNull::<ArrowUInt64Type>::from(UInt64Array::from(
                cases[3].clone(),
            ))),
        ];
        for (case, exp) in cases.into_iter().zip(exp.into_iter()) {
            let arr = UInt64Array::from(case.clone());
            assert_eq!(IntegerEncoding::from(arr), exp);
        }
    }
    #[test]
    fn size_raw() {
        let enc = IntegerEncoding::I64U8(Fixed::<u8>::from(&[2, 22, 12, 31][..]));
@ -730,12 +1295,7 @@ mod test {
        assert_eq!(enc.size_raw(true), 56);
        assert_eq!(enc.size_raw(false), 56);
-        let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(&[2, 22, 12, 31][..]));
+        let enc = IntegerEncoding::I64I64N(FixedNull::<ArrowInt64Type>::from(
        // (4 * 8) + 24
        assert_eq!(enc.size_raw(true), 56);
        assert_eq!(enc.size_raw(false), 56);
        let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(
            &[Some(2), Some(22), Some(12), None, None, Some(31)][..],
        ));
        // (6 * 8) + 24