feat: implement byte trimming on nullable encodings
parent
6a72274517
commit
2963d63b5e
|
@ -1,12 +1,19 @@
|
|||
use std::mem::size_of;
|
||||
|
||||
use arrow::{self, array::Array};
|
||||
use arrow::{
|
||||
self, array::Array, datatypes::Int16Type as ArrowInt16Type,
|
||||
datatypes::Int32Type as ArrowInt32Type, datatypes::Int64Type as ArrowInt64Type,
|
||||
datatypes::Int8Type as ArrowInt8Type, datatypes::UInt16Type as ArrowUInt16Type,
|
||||
datatypes::UInt32Type as ArrowUInt32Type, datatypes::UInt64Type as ArrowUInt64Type,
|
||||
datatypes::UInt8Type as ArrowUInt8Type,
|
||||
};
|
||||
|
||||
use super::encoding::{scalar::Fixed, scalar::FixedNull};
|
||||
use super::{cmp, Statistics};
|
||||
use crate::column::{EncodedValues, RowIDs, Scalar, Value, Values};
|
||||
|
||||
pub enum IntegerEncoding {
|
||||
// non-null encodings. These are backed by `Vec<T>`
|
||||
I64I64(Fixed<i64>),
|
||||
I64I32(Fixed<i32>),
|
||||
I64U32(Fixed<u32>),
|
||||
|
@ -14,15 +21,23 @@ pub enum IntegerEncoding {
|
|||
I64U16(Fixed<u16>),
|
||||
I64I8(Fixed<i8>),
|
||||
I64U8(Fixed<u8>),
|
||||
|
||||
U64U64(Fixed<u64>),
|
||||
U64U32(Fixed<u32>),
|
||||
U64U16(Fixed<u16>),
|
||||
U64U8(Fixed<u8>),
|
||||
|
||||
// Nullable encodings - TODO, add variants for smaller physical types.
|
||||
I64I64N(FixedNull<arrow::datatypes::Int64Type>),
|
||||
U64U64N(FixedNull<arrow::datatypes::UInt64Type>),
|
||||
// Nullable encodings. These are backed by an Arrow array.
|
||||
I64I64N(FixedNull<ArrowInt64Type>),
|
||||
I64I32N(FixedNull<ArrowInt32Type>),
|
||||
I64U32N(FixedNull<ArrowUInt32Type>),
|
||||
I64I16N(FixedNull<ArrowInt16Type>),
|
||||
I64U16N(FixedNull<ArrowUInt16Type>),
|
||||
I64I8N(FixedNull<ArrowInt8Type>),
|
||||
I64U8N(FixedNull<ArrowUInt8Type>),
|
||||
U64U64N(FixedNull<ArrowUInt64Type>),
|
||||
U64U32N(FixedNull<ArrowUInt32Type>),
|
||||
U64U16N(FixedNull<ArrowUInt16Type>),
|
||||
U64U8N(FixedNull<ArrowUInt8Type>),
|
||||
}
|
||||
|
||||
impl PartialEq for IntegerEncoding {
|
||||
|
@ -40,12 +55,57 @@ impl PartialEq for IntegerEncoding {
|
|||
(Self::U64U16(a), Self::U64U16(b)) => a == b,
|
||||
(Self::U64U8(a), Self::U64U8(b)) => a == b,
|
||||
(Self::I64I64N(a), Self::I64I64N(b)) => {
|
||||
let a = a.all_values(vec![]);
|
||||
let a = a.all_values::<i64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::I64I32N(a), Self::I64I32N(b)) => {
|
||||
let a = a.all_values::<i64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::I64U32N(a), Self::I64U32N(b)) => {
|
||||
let a = a.all_values::<i64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::I64I16N(a), Self::I64I16N(b)) => {
|
||||
let a = a.all_values::<i64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::I64U16N(a), Self::I64U16N(b)) => {
|
||||
let a = a.all_values::<i64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::I64I8N(a), Self::I64I8N(b)) => {
|
||||
let a = a.all_values::<i64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::I64U8N(a), Self::I64U8N(b)) => {
|
||||
let a = a.all_values::<i64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::U64U64N(a), Self::U64U64N(b)) => {
|
||||
let a = a.all_values(vec![]);
|
||||
let a = a.all_values::<u64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::U64U32N(a), Self::U64U32N(b)) => {
|
||||
let a = a.all_values::<u64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::U64U16N(a), Self::U64U16N(b)) => {
|
||||
let a = a.all_values::<u64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
(Self::U64U8N(a), Self::U64U8N(b)) => {
|
||||
let a = a.all_values::<u64>(vec![]);
|
||||
let b = b.all_values(vec![]);
|
||||
a == b
|
||||
}
|
||||
|
@ -70,7 +130,16 @@ impl IntegerEncoding {
|
|||
Self::U64U16(enc) => enc.size(),
|
||||
Self::U64U8(enc) => enc.size(),
|
||||
Self::I64I64N(enc) => enc.size(),
|
||||
Self::I64I32N(enc) => enc.size(),
|
||||
Self::I64U32N(enc) => enc.size(),
|
||||
Self::I64I16N(enc) => enc.size(),
|
||||
Self::I64U16N(enc) => enc.size(),
|
||||
Self::I64I8N(enc) => enc.size(),
|
||||
Self::I64U8N(enc) => enc.size(),
|
||||
Self::U64U64N(enc) => enc.size(),
|
||||
Self::U64U32N(enc) => enc.size(),
|
||||
Self::U64U16N(enc) => enc.size(),
|
||||
Self::U64U8N(enc) => enc.size(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,7 +166,16 @@ impl IntegerEncoding {
|
|||
}
|
||||
|
||||
Self::I64I64N(enc) => enc.size_raw(include_nulls),
|
||||
Self::I64I32N(enc) => enc.size_raw(include_nulls),
|
||||
Self::I64U32N(enc) => enc.size_raw(include_nulls),
|
||||
Self::I64I16N(enc) => enc.size_raw(include_nulls),
|
||||
Self::I64U16N(enc) => enc.size_raw(include_nulls),
|
||||
Self::I64I8N(enc) => enc.size_raw(include_nulls),
|
||||
Self::I64U8N(enc) => enc.size_raw(include_nulls),
|
||||
Self::U64U64N(enc) => enc.size_raw(include_nulls),
|
||||
Self::U64U32N(enc) => enc.size_raw(include_nulls),
|
||||
Self::U64U16N(enc) => enc.size_raw(include_nulls),
|
||||
Self::U64U8N(enc) => enc.size_raw(include_nulls),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -116,7 +194,16 @@ impl IntegerEncoding {
|
|||
Self::U64U16(enc) => enc.num_rows(),
|
||||
Self::U64U8(enc) => enc.num_rows(),
|
||||
Self::I64I64N(enc) => enc.num_rows(),
|
||||
Self::I64I32N(enc) => enc.num_rows(),
|
||||
Self::I64U32N(enc) => enc.num_rows(),
|
||||
Self::I64I16N(enc) => enc.num_rows(),
|
||||
Self::I64U16N(enc) => enc.num_rows(),
|
||||
Self::I64I8N(enc) => enc.num_rows(),
|
||||
Self::I64U8N(enc) => enc.num_rows(),
|
||||
Self::U64U64N(enc) => enc.num_rows(),
|
||||
Self::U64U32N(enc) => enc.num_rows(),
|
||||
Self::U64U16N(enc) => enc.num_rows(),
|
||||
Self::U64U8N(enc) => enc.num_rows(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,7 +224,16 @@ impl IntegerEncoding {
|
|||
pub fn contains_null(&self) -> bool {
|
||||
match self {
|
||||
Self::I64I64N(enc) => enc.contains_null(),
|
||||
Self::I64I32N(enc) => enc.contains_null(),
|
||||
Self::I64U32N(enc) => enc.contains_null(),
|
||||
Self::I64I16N(enc) => enc.contains_null(),
|
||||
Self::I64U16N(enc) => enc.contains_null(),
|
||||
Self::I64I8N(enc) => enc.contains_null(),
|
||||
Self::I64U8N(enc) => enc.contains_null(),
|
||||
Self::U64U64N(enc) => enc.contains_null(),
|
||||
Self::U64U32N(enc) => enc.contains_null(),
|
||||
Self::U64U16N(enc) => enc.contains_null(),
|
||||
Self::U64U8N(enc) => enc.contains_null(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@ -157,7 +253,16 @@ impl IntegerEncoding {
|
|||
Self::U64U16(_) => 0,
|
||||
Self::U64U8(_) => 0,
|
||||
Self::I64I64N(enc) => enc.null_count(),
|
||||
Self::I64I32N(enc) => enc.null_count(),
|
||||
Self::I64U32N(enc) => enc.null_count(),
|
||||
Self::I64I16N(enc) => enc.null_count(),
|
||||
Self::I64U16N(enc) => enc.null_count(),
|
||||
Self::I64I8N(enc) => enc.null_count(),
|
||||
Self::I64U8N(enc) => enc.null_count(),
|
||||
Self::U64U64N(enc) => enc.null_count(),
|
||||
Self::U64U32N(enc) => enc.null_count(),
|
||||
Self::U64U16N(enc) => enc.null_count(),
|
||||
Self::U64U8N(enc) => enc.null_count(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -165,7 +270,16 @@ impl IntegerEncoding {
|
|||
pub fn has_any_non_null_value(&self) -> bool {
|
||||
match self {
|
||||
Self::I64I64N(enc) => enc.has_any_non_null_value(),
|
||||
Self::I64I32N(enc) => enc.has_any_non_null_value(),
|
||||
Self::I64U32N(enc) => enc.has_any_non_null_value(),
|
||||
Self::I64I16N(enc) => enc.has_any_non_null_value(),
|
||||
Self::I64U16N(enc) => enc.has_any_non_null_value(),
|
||||
Self::I64I8N(enc) => enc.has_any_non_null_value(),
|
||||
Self::I64U8N(enc) => enc.has_any_non_null_value(),
|
||||
Self::U64U64N(enc) => enc.has_any_non_null_value(),
|
||||
Self::U64U32N(enc) => enc.has_any_non_null_value(),
|
||||
Self::U64U16N(enc) => enc.has_any_non_null_value(),
|
||||
Self::U64U8N(enc) => enc.has_any_non_null_value(),
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
@ -175,7 +289,16 @@ impl IntegerEncoding {
|
|||
pub fn has_non_null_value(&self, row_ids: &[u32]) -> bool {
|
||||
match self {
|
||||
Self::I64I64N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::I64I32N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::I64U32N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::I64I16N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::I64U16N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::I64I8N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::I64U8N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::U64U64N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::U64U32N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::U64U16N(enc) => enc.has_non_null_value(row_ids),
|
||||
Self::U64U8N(enc) => enc.has_non_null_value(row_ids),
|
||||
_ => !row_ids.is_empty(), // all rows will be non-null
|
||||
}
|
||||
}
|
||||
|
@ -187,25 +310,64 @@ impl IntegerEncoding {
|
|||
// `c.value` should return as the logical type
|
||||
|
||||
// signed 64-bit variants - logical type is i64 for all these
|
||||
Self::I64I64(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
|
||||
Self::I64I32(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
|
||||
Self::I64U32(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
|
||||
Self::I64I16(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
|
||||
Self::I64U16(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
|
||||
Self::I64I8(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
|
||||
Self::I64U8(c) => Value::Scalar(Scalar::I64(c.value(row_id))),
|
||||
Self::I64I64(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
|
||||
Self::I64I32(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
|
||||
Self::I64U32(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
|
||||
Self::I64I16(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
|
||||
Self::I64U16(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
|
||||
Self::I64I8(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
|
||||
Self::I64U8(enc) => Value::Scalar(Scalar::I64(enc.value(row_id))),
|
||||
|
||||
// unsigned 64-bit variants - logical type is u64 for all these
|
||||
Self::U64U64(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
|
||||
Self::U64U32(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
|
||||
Self::U64U16(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
|
||||
Self::U64U8(c) => Value::Scalar(Scalar::U64(c.value(row_id))),
|
||||
Self::U64U64(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
|
||||
Self::U64U32(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
|
||||
Self::U64U16(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
|
||||
Self::U64U8(enc) => Value::Scalar(Scalar::U64(enc.value(row_id))),
|
||||
|
||||
Self::I64I64N(c) => match c.value(row_id) {
|
||||
// signed 64-bit variants
|
||||
Self::I64I64N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U64N(c) => match c.value(row_id) {
|
||||
Self::I64I32N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U32N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64I16N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U16N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64I8N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U8N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
|
||||
// unsigned 64-bit variants
|
||||
Self::U64U64N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U32N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U16N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U8N(enc) => match enc.value(row_id) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
|
@ -219,22 +381,34 @@ impl IntegerEncoding {
|
|||
pub fn values(&self, row_ids: &[u32]) -> Values<'_> {
|
||||
match &self {
|
||||
// signed 64-bit variants - logical type is i64 for all these
|
||||
Self::I64I64(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
|
||||
Self::I64I32(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
|
||||
Self::I64U32(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
|
||||
Self::I64I16(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
|
||||
Self::I64U16(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
|
||||
Self::I64I8(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
|
||||
Self::I64U8(c) => Values::I64(c.values::<i64>(row_ids, vec![])),
|
||||
Self::I64I64(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
|
||||
Self::I64I32(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
|
||||
Self::I64U32(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
|
||||
Self::I64I16(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
|
||||
Self::I64U16(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
|
||||
Self::I64I8(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
|
||||
Self::I64U8(enc) => Values::I64(enc.values::<i64>(row_ids, vec![])),
|
||||
|
||||
// unsigned 64-bit variants - logical type is u64 for all these
|
||||
Self::U64U64(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
|
||||
Self::U64U32(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
|
||||
Self::U64U16(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
|
||||
Self::U64U8(c) => Values::U64(c.values::<u64>(row_ids, vec![])),
|
||||
Self::U64U64(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
|
||||
Self::U64U32(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
|
||||
Self::U64U16(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
|
||||
Self::U64U8(enc) => Values::U64(enc.values::<u64>(row_ids, vec![])),
|
||||
|
||||
Self::I64I64N(c) => Values::I64N(c.values(row_ids, vec![])),
|
||||
Self::U64U64N(c) => Values::U64N(c.values(row_ids, vec![])),
|
||||
// signed 64-bit nullable variants - logical type is i64 for all these.
|
||||
Self::I64I64N(enc) => Values::I64N(enc.values(row_ids, vec![])),
|
||||
Self::I64I32N(enc) => Values::I64N(enc.values(row_ids, vec![])),
|
||||
Self::I64U32N(enc) => Values::I64N(enc.values(row_ids, vec![])),
|
||||
Self::I64I16N(enc) => Values::I64N(enc.values(row_ids, vec![])),
|
||||
Self::I64U16N(enc) => Values::I64N(enc.values(row_ids, vec![])),
|
||||
Self::I64I8N(enc) => Values::I64N(enc.values(row_ids, vec![])),
|
||||
Self::I64U8N(enc) => Values::I64N(enc.values(row_ids, vec![])),
|
||||
|
||||
// unsigned 64-bit nullable variants - logical type is u64 for all these.
|
||||
Self::U64U64N(enc) => Values::U64N(enc.values(row_ids, vec![])),
|
||||
Self::U64U32N(enc) => Values::U64N(enc.values(row_ids, vec![])),
|
||||
Self::U64U16N(enc) => Values::U64N(enc.values(row_ids, vec![])),
|
||||
Self::U64U8N(enc) => Values::U64N(enc.values(row_ids, vec![])),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -259,8 +433,20 @@ impl IntegerEncoding {
|
|||
Self::U64U16(c) => Values::U64(c.all_values::<u64>(vec![])),
|
||||
Self::U64U8(c) => Values::U64(c.all_values::<u64>(vec![])),
|
||||
|
||||
Self::I64I64N(c) => Values::I64N(c.all_values(vec![])),
|
||||
Self::U64U64N(c) => Values::U64N(c.all_values(vec![])),
|
||||
// signed 64-bit nullable variants - logical type is i64 for all these.
|
||||
Self::I64I64N(enc) => Values::I64N(enc.all_values(vec![])),
|
||||
Self::I64I32N(enc) => Values::I64N(enc.all_values(vec![])),
|
||||
Self::I64U32N(enc) => Values::I64N(enc.all_values(vec![])),
|
||||
Self::I64I16N(enc) => Values::I64N(enc.all_values(vec![])),
|
||||
Self::I64U16N(enc) => Values::I64N(enc.all_values(vec![])),
|
||||
Self::I64I8N(enc) => Values::I64N(enc.all_values(vec![])),
|
||||
Self::I64U8N(enc) => Values::I64N(enc.all_values(vec![])),
|
||||
|
||||
// unsigned 64-bit nullable variants - logical type is u64 for all these.
|
||||
Self::U64U64N(enc) => Values::U64N(enc.all_values(vec![])),
|
||||
Self::U64U32N(enc) => Values::U64N(enc.all_values(vec![])),
|
||||
Self::U64U16N(enc) => Values::U64N(enc.all_values(vec![])),
|
||||
Self::U64U8N(enc) => Values::U64N(enc.all_values(vec![])),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -326,8 +512,20 @@ impl IntegerEncoding {
|
|||
Self::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
|
||||
Self::I64I64N(c) => c.row_ids_filter(value.as_i64(), op, dst),
|
||||
Self::U64U64N(c) => c.row_ids_filter(value.as_u64(), op, dst),
|
||||
// signed 64-bit nullable variants - logical type is i64 for all these.
|
||||
Self::I64I64N(enc) => enc.row_ids_filter(value.as_i64(), op, dst),
|
||||
Self::I64I32N(enc) => enc.row_ids_filter(value.as_i32(), op, dst),
|
||||
Self::I64U32N(enc) => enc.row_ids_filter(value.as_u32(), op, dst),
|
||||
Self::I64I16N(enc) => enc.row_ids_filter(value.as_i16(), op, dst),
|
||||
Self::I64U16N(enc) => enc.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::I64I8N(enc) => enc.row_ids_filter(value.as_i8(), op, dst),
|
||||
Self::I64U8N(enc) => enc.row_ids_filter(value.as_u8(), op, dst),
|
||||
|
||||
// unsigned 64-bit nullable variants - logical type is u64 for all these.
|
||||
Self::U64U64N(enc) => enc.row_ids_filter(value.as_u64(), op, dst),
|
||||
Self::U64U32N(enc) => enc.row_ids_filter(value.as_u32(), op, dst),
|
||||
Self::U64U16N(enc) => enc.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::U64U8N(enc) => enc.row_ids_filter(value.as_u8(), op, dst),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -378,8 +576,41 @@ impl IntegerEncoding {
|
|||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
|
||||
Self::I64I64N(_) => todo!(),
|
||||
Self::U64U64N(_) => todo!(),
|
||||
Self::I64I64N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_i64(), low.0), (high.1.as_i64(), high.0), dst)
|
||||
}
|
||||
Self::I64I32N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst)
|
||||
}
|
||||
Self::I64U32N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||
}
|
||||
Self::I64I16N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
|
||||
}
|
||||
Self::I64U16N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||
}
|
||||
Self::I64I8N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||
}
|
||||
Self::I64U8N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
|
||||
// unsigned 64-bit nullable variants - logical type is u64 for all these.
|
||||
Self::U64U64N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_u64(), low.0), (high.1.as_u64(), high.0), dst)
|
||||
}
|
||||
Self::U64U32N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||
}
|
||||
Self::U64U16N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||
}
|
||||
Self::U64U8N(enc) => {
|
||||
enc.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -396,11 +627,49 @@ impl IntegerEncoding {
|
|||
Self::U64U32(c) => Value::Scalar(Scalar::U64(c.min(row_ids))),
|
||||
Self::U64U16(c) => Value::Scalar(Scalar::U64(c.min(row_ids))),
|
||||
Self::U64U8(c) => Value::Scalar(Scalar::U64(c.min(row_ids))),
|
||||
Self::I64I64N(c) => match c.min(row_ids) {
|
||||
|
||||
Self::I64I64N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U64N(c) => match c.min(row_ids) {
|
||||
Self::I64I32N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U32N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64I16N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U16N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64I8N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U8N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
|
||||
Self::U64U64N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U32N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U16N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U8N(enc) => match enc.min(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
|
@ -420,11 +689,48 @@ impl IntegerEncoding {
|
|||
Self::U64U32(c) => Value::Scalar(Scalar::U64(c.max(row_ids))),
|
||||
Self::U64U16(c) => Value::Scalar(Scalar::U64(c.max(row_ids))),
|
||||
Self::U64U8(c) => Value::Scalar(Scalar::U64(c.max(row_ids))),
|
||||
Self::I64I64N(c) => match c.max(row_ids) {
|
||||
Self::I64I64N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U64N(c) => match c.max(row_ids) {
|
||||
Self::I64I32N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U32N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64I16N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U16N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64I8N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::I64U8N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::I64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
|
||||
Self::U64U64N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U32N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U16N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
Self::U64U8N(enc) => match enc.max(row_ids) {
|
||||
Some(v) => Value::Scalar(Scalar::U64(v)),
|
||||
None => Value::Null,
|
||||
},
|
||||
|
@ -444,11 +750,48 @@ impl IntegerEncoding {
|
|||
Self::U64U32(c) => Scalar::U64(c.sum(row_ids)),
|
||||
Self::U64U16(c) => Scalar::U64(c.sum(row_ids)),
|
||||
Self::U64U8(c) => Scalar::U64(c.sum(row_ids)),
|
||||
Self::I64I64N(c) => match c.sum(row_ids) {
|
||||
Self::I64I64N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::I64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::U64U64N(c) => match c.sum(row_ids) {
|
||||
Self::I64I32N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::I64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::I64U32N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::I64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::I64I16N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::I64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::I64U16N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::I64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::I64I8N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::I64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::I64U8N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::I64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
|
||||
Self::U64U64N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::U64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::U64U32N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::U64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::U64U16N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::U64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
Self::U64U8N(enc) => match enc.sum(row_ids) {
|
||||
Some(v) => Scalar::U64(v),
|
||||
None => Scalar::Null,
|
||||
},
|
||||
|
@ -468,8 +811,17 @@ impl IntegerEncoding {
|
|||
Self::U64U32(c) => c.count(row_ids),
|
||||
Self::U64U16(c) => c.count(row_ids),
|
||||
Self::U64U8(c) => c.count(row_ids),
|
||||
Self::I64I64N(c) => c.count(row_ids),
|
||||
Self::U64U64N(c) => c.count(row_ids),
|
||||
Self::I64I64N(enc) => enc.count(row_ids),
|
||||
Self::I64I32N(enc) => enc.count(row_ids),
|
||||
Self::I64U32N(enc) => enc.count(row_ids),
|
||||
Self::I64I16N(enc) => enc.count(row_ids),
|
||||
Self::I64U16N(enc) => enc.count(row_ids),
|
||||
Self::I64I8N(enc) => enc.count(row_ids),
|
||||
Self::I64U8N(enc) => enc.count(row_ids),
|
||||
Self::U64U64N(enc) => enc.count(row_ids),
|
||||
Self::U64U32N(enc) => enc.count(row_ids),
|
||||
Self::U64U16N(enc) => enc.count(row_ids),
|
||||
Self::U64U8N(enc) => enc.count(row_ids),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -488,7 +840,16 @@ impl IntegerEncoding {
|
|||
Self::U64U16(_) => "BT_U16",
|
||||
Self::U64U8(_) => "BT_U8",
|
||||
Self::I64I64N(_) => "None",
|
||||
Self::I64I32N(_) => "BT_I32N",
|
||||
Self::I64U32N(_) => "BT_U32N",
|
||||
Self::I64I16N(_) => "BT_U16N",
|
||||
Self::I64U16N(_) => "BT_U16N",
|
||||
Self::I64I8N(_) => "BT_I8N",
|
||||
Self::I64U8N(_) => "BT_U8N",
|
||||
Self::U64U64N(_) => "None",
|
||||
Self::U64U32N(_) => "BT_U32N",
|
||||
Self::U64U16N(_) => "BT_U16N",
|
||||
Self::U64U8N(_) => "BT_U8N",
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -507,7 +868,16 @@ impl IntegerEncoding {
|
|||
Self::U64U16(_) => "u64",
|
||||
Self::U64U8(_) => "u64",
|
||||
Self::I64I64N(_) => "i64",
|
||||
Self::I64I32N(_) => "i64",
|
||||
Self::I64U32N(_) => "i64",
|
||||
Self::I64I16N(_) => "i64",
|
||||
Self::I64U16N(_) => "i64",
|
||||
Self::I64I8N(_) => "i64",
|
||||
Self::I64U8N(_) => "i64",
|
||||
Self::U64U64N(_) => "u64",
|
||||
Self::U64U32N(_) => "u64",
|
||||
Self::U64U16N(_) => "u64",
|
||||
Self::U64U8N(_) => "u64",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -528,7 +898,16 @@ impl std::fmt::Display for IntegerEncoding {
|
|||
Self::U64U16(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::U64U8(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::I64I64N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::I64I32N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::I64U32N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::I64I16N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::I64U16N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::I64I8N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::I64U8N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::U64U64N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::U64U32N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::U64U16N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
Self::U64U8N(enc) => write!(f, "[{}]: {}", name, enc),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -549,7 +928,16 @@ impl std::fmt::Debug for IntegerEncoding {
|
|||
Self::U64U16(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::U64U8(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::I64I64N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::I64I32N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::I64U32N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::I64I16N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::I64U16N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::I64I8N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::I64U8N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::U64U64N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::U64U32N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::U64U16N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
Self::U64U8N(enc) => write!(f, "[{}]: {:?}", name, enc),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -603,17 +991,48 @@ impl From<&[i64]> for IntegerEncoding {
|
|||
|
||||
/// Converts an Arrow array into an IntegerEncoding.
|
||||
///
|
||||
/// TODO(edd): convert underlying type of Arrow data to smallest physical
|
||||
/// representation.
|
||||
/// The most compact physical Arrow array type is used to store the column
|
||||
/// within a `FixedNull` encoding.
|
||||
impl From<arrow::array::Int64Array> for IntegerEncoding {
|
||||
fn from(arr: arrow::array::Int64Array) -> Self {
|
||||
if arr.null_count() == 0 {
|
||||
return Self::from(arr.values());
|
||||
}
|
||||
|
||||
// TODO(edd): currently fixed null only supports 64-bit logical/physical
|
||||
// types. Need to add support for storing as smaller physical types.
|
||||
Self::I64I64N(FixedNull::<arrow::datatypes::Int64Type>::from(arr))
|
||||
// determine min and max values.
|
||||
let min = arrow::compute::kernels::aggregate::min(&arr);
|
||||
let max = arrow::compute::kernels::aggregate::max(&arr);
|
||||
|
||||
// This match is carefully ordered. It prioritises smaller physical
|
||||
// datatypes that can safely represent the provided logical data
|
||||
match (min, max) {
|
||||
// encode as u8 values
|
||||
(min, max) if min >= Some(0) && max <= Some(u8::MAX as i64) => {
|
||||
Self::I64U8N(FixedNull::<ArrowUInt8Type>::from(arr))
|
||||
}
|
||||
// encode as i8 values
|
||||
(min, max) if min >= Some(i8::MIN as i64) && max <= Some(i8::MAX as i64) => {
|
||||
Self::I64I8N(FixedNull::<ArrowInt8Type>::from(arr))
|
||||
}
|
||||
// encode as u16 values
|
||||
(min, max) if min >= Some(0) && max <= Some(u16::MAX as i64) => {
|
||||
Self::I64U16N(FixedNull::<ArrowUInt16Type>::from(arr))
|
||||
}
|
||||
// encode as i16 values
|
||||
(min, max) if min >= Some(i16::MIN as i64) && max <= Some(i16::MAX as i64) => {
|
||||
Self::I64I16N(FixedNull::<ArrowInt16Type>::from(arr))
|
||||
}
|
||||
// encode as u32 values
|
||||
(min, max) if min >= Some(0) && max <= Some(u32::MAX as i64) => {
|
||||
Self::I64U32N(FixedNull::<ArrowUInt32Type>::from(arr))
|
||||
}
|
||||
// encode as i32 values
|
||||
(min, max) if min >= Some(i32::MIN as i64) && max <= Some(i32::MAX as i64) => {
|
||||
Self::I64I32N(FixedNull::<ArrowInt32Type>::from(arr))
|
||||
}
|
||||
// otherwise, encode with the same physical type (i64)
|
||||
(_, _) => Self::I64I64N(FixedNull::<ArrowInt64Type>::from(arr)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -650,23 +1069,42 @@ impl From<&[u64]> for IntegerEncoding {
|
|||
|
||||
/// Converts an Arrow array into an IntegerEncoding.
|
||||
///
|
||||
/// TODO(edd): convert underlying type of Arrow data to smallest physical
|
||||
/// representation.
|
||||
/// The most compact physical Arrow array type is used to store the column
|
||||
/// within a `FixedNull` encoding.
|
||||
impl From<arrow::array::UInt64Array> for IntegerEncoding {
|
||||
fn from(arr: arrow::array::UInt64Array) -> Self {
|
||||
if arr.null_count() == 0 {
|
||||
return Self::from(arr.values());
|
||||
}
|
||||
|
||||
// TODO(edd): currently fixed null only supports 64-bit logical/physical
|
||||
// types. Need to add support for storing as smaller physical types.
|
||||
Self::U64U64N(FixedNull::<arrow::datatypes::UInt64Type>::from(arr))
|
||||
// determine max value.
|
||||
let max = arrow::compute::kernels::aggregate::max(&arr);
|
||||
|
||||
// This match is carefully ordered. It prioritises smaller physical
|
||||
// datatypes that can safely represent the provided logical data
|
||||
match max {
|
||||
// encode as u8 values
|
||||
max if max <= Some(u8::MAX as u64) => {
|
||||
Self::U64U8N(FixedNull::<ArrowUInt8Type>::from(arr))
|
||||
}
|
||||
// encode as u16 values
|
||||
max if max <= Some(u16::MAX as u64) => {
|
||||
Self::U64U16N(FixedNull::<ArrowUInt16Type>::from(arr))
|
||||
}
|
||||
// encode as u32 values
|
||||
max if max <= Some(u32::MAX as u64) => {
|
||||
Self::U64U32N(FixedNull::<ArrowUInt32Type>::from(arr))
|
||||
}
|
||||
// otherwise, encode with the same physical type (u64)
|
||||
_ => Self::U64U64N(FixedNull::<ArrowUInt64Type>::from(arr)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow::datatypes::Int64Type;
|
||||
use arrow::array::{Int64Array, UInt64Array};
|
||||
use std::iter;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -718,6 +1156,133 @@ mod test {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_arrow_i64_array() {
|
||||
let cases = vec![
|
||||
vec![0_i64, 2, 245, 3],
|
||||
vec![0_i64, -120, 127, 3],
|
||||
vec![399_i64, 2, 2452, 3],
|
||||
vec![-399_i64, 2, 2452, 3],
|
||||
vec![u32::MAX as i64, 2, 245, 3],
|
||||
vec![i32::MIN as i64, 2, 245, 3],
|
||||
vec![0_i64, 2, 245, u32::MAX as i64 + 1],
|
||||
];
|
||||
|
||||
let exp = vec![
|
||||
IntegerEncoding::I64U8(Fixed::<u8>::from(cases[0].as_slice())),
|
||||
IntegerEncoding::I64I8(Fixed::<i8>::from(cases[1].as_slice())),
|
||||
IntegerEncoding::I64U16(Fixed::<u16>::from(cases[2].as_slice())),
|
||||
IntegerEncoding::I64I16(Fixed::<i16>::from(cases[3].as_slice())),
|
||||
IntegerEncoding::I64U32(Fixed::<u32>::from(cases[4].as_slice())),
|
||||
IntegerEncoding::I64I32(Fixed::<i32>::from(cases[5].as_slice())),
|
||||
IntegerEncoding::I64I64(Fixed::<i64>::from(cases[6].as_slice())),
|
||||
];
|
||||
|
||||
// for Arrow arrays with no nulls we can store the column using a
|
||||
// non-nullable fixed encoding
|
||||
for (case, exp) in cases.iter().cloned().zip(exp.into_iter()) {
|
||||
let arr = Int64Array::from(case);
|
||||
assert_eq!(IntegerEncoding::from(arr), exp);
|
||||
}
|
||||
|
||||
// Tack a NULL onto each of the input cases.
|
||||
let cases = cases
|
||||
.iter()
|
||||
.map(|case| {
|
||||
case.iter()
|
||||
.map(|x| Some(*x))
|
||||
.chain(iter::repeat(None).take(1))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// when a NULL value is present then we need to use a nullable encoding.
|
||||
let exp = vec![
|
||||
IntegerEncoding::I64U8N(FixedNull::<ArrowUInt8Type>::from(Int64Array::from(
|
||||
cases[0].clone(),
|
||||
))),
|
||||
IntegerEncoding::I64I8N(FixedNull::<ArrowInt8Type>::from(Int64Array::from(
|
||||
cases[1].clone(),
|
||||
))),
|
||||
IntegerEncoding::I64U16N(FixedNull::<ArrowUInt16Type>::from(Int64Array::from(
|
||||
cases[2].clone(),
|
||||
))),
|
||||
IntegerEncoding::I64I16N(FixedNull::<ArrowInt16Type>::from(Int64Array::from(
|
||||
cases[3].clone(),
|
||||
))),
|
||||
IntegerEncoding::I64U32N(FixedNull::<ArrowUInt32Type>::from(Int64Array::from(
|
||||
cases[4].clone(),
|
||||
))),
|
||||
IntegerEncoding::I64I32N(FixedNull::<ArrowInt32Type>::from(Int64Array::from(
|
||||
cases[5].clone(),
|
||||
))),
|
||||
IntegerEncoding::I64I64N(FixedNull::<ArrowInt64Type>::from(Int64Array::from(
|
||||
cases[6].clone(),
|
||||
))),
|
||||
];
|
||||
|
||||
for (case, exp) in cases.into_iter().zip(exp.into_iter()) {
|
||||
let arr = Int64Array::from(case.clone());
|
||||
assert_eq!(IntegerEncoding::from(arr), exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_arrow_u64_array() {
|
||||
let cases = vec![
|
||||
vec![0_u64, 2, 245, 3],
|
||||
vec![399_u64, 2, 2452, 3],
|
||||
vec![u32::MAX as u64, 2, 245, 3],
|
||||
vec![0_u64, 2, 245, u32::MAX as u64 + 1],
|
||||
];
|
||||
|
||||
let exp = vec![
|
||||
IntegerEncoding::U64U8(Fixed::<u8>::from(cases[0].as_slice())),
|
||||
IntegerEncoding::U64U16(Fixed::<u16>::from(cases[1].as_slice())),
|
||||
IntegerEncoding::U64U32(Fixed::<u32>::from(cases[2].as_slice())),
|
||||
IntegerEncoding::U64U64(Fixed::<u64>::from(cases[3].as_slice())),
|
||||
];
|
||||
|
||||
// for Arrow arrays with no nulls we can store the column using a
|
||||
// non-nullable fixed encoding
|
||||
for (case, exp) in cases.iter().cloned().zip(exp.into_iter()) {
|
||||
let arr = UInt64Array::from(case);
|
||||
assert_eq!(IntegerEncoding::from(arr), exp);
|
||||
}
|
||||
|
||||
// Tack a NULL onto each of the input cases.
|
||||
let cases = cases
|
||||
.iter()
|
||||
.map(|case| {
|
||||
case.iter()
|
||||
.map(|x| Some(*x))
|
||||
.chain(iter::repeat(None).take(1))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// when a NULL value is present then we need to use a nullable encoding.
|
||||
let exp = vec![
|
||||
IntegerEncoding::U64U8N(FixedNull::<ArrowUInt8Type>::from(UInt64Array::from(
|
||||
cases[0].clone(),
|
||||
))),
|
||||
IntegerEncoding::U64U16N(FixedNull::<ArrowUInt16Type>::from(UInt64Array::from(
|
||||
cases[1].clone(),
|
||||
))),
|
||||
IntegerEncoding::U64U32N(FixedNull::<ArrowUInt32Type>::from(UInt64Array::from(
|
||||
cases[2].clone(),
|
||||
))),
|
||||
IntegerEncoding::U64U64N(FixedNull::<ArrowUInt64Type>::from(UInt64Array::from(
|
||||
cases[3].clone(),
|
||||
))),
|
||||
];
|
||||
|
||||
for (case, exp) in cases.into_iter().zip(exp.into_iter()) {
|
||||
let arr = UInt64Array::from(case.clone());
|
||||
assert_eq!(IntegerEncoding::from(arr), exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn size_raw() {
|
||||
let enc = IntegerEncoding::I64U8(Fixed::<u8>::from(&[2, 22, 12, 31][..]));
|
||||
|
@ -730,12 +1295,7 @@ mod test {
|
|||
assert_eq!(enc.size_raw(true), 56);
|
||||
assert_eq!(enc.size_raw(false), 56);
|
||||
|
||||
let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(&[2, 22, 12, 31][..]));
|
||||
// (4 * 8) + 24
|
||||
assert_eq!(enc.size_raw(true), 56);
|
||||
assert_eq!(enc.size_raw(false), 56);
|
||||
|
||||
let enc = IntegerEncoding::I64I64N(FixedNull::<Int64Type>::from(
|
||||
let enc = IntegerEncoding::I64I64N(FixedNull::<ArrowInt64Type>::from(
|
||||
&[Some(2), Some(22), Some(12), None, None, Some(31)][..],
|
||||
));
|
||||
// (6 * 8) + 24
|
||||
|
|
Loading…
Reference in New Issue