use std::cmp::Ordering; use std::{convert::TryFrom, fmt::Formatter}; use std::{mem::size_of, sync::Arc}; use crate::{AggregateType, LogicalDataType}; #[derive(Clone, PartialEq, Debug)] /// A type that holds aggregates where each variant encodes the underlying data /// type and aggregate type for a vector of data. An `AggregateVec` can be /// updated on a value-by-value basis and new values can be appended. /// /// The type is structured this way to improve the performance of aggregations /// in the read buffer by reducing the number of matches (branches) needed per /// row. pub enum AggregateVec { Count(Vec>), SumI64(Vec>), SumU64(Vec>), SumF64(Vec>), MinU64(Vec>), MinI64(Vec>), MinF64(Vec>), MinString(Vec>), MinBytes(Vec>>), MinBool(Vec>), MaxU64(Vec>), MaxI64(Vec>), MaxF64(Vec>), MaxString(Vec>), MaxBytes(Vec>>), MaxBool(Vec>), FirstU64((Vec>, Vec>)), FirstI64((Vec>, Vec>)), FirstF64((Vec>, Vec>)), FirstString((Vec>, Vec>)), FirstBytes((Vec>>, Vec>)), FirstBool((Vec>, Vec>)), LastU64((Vec>, Vec>)), LastI64((Vec>, Vec>)), LastF64((Vec>, Vec>)), LastString((Vec>, Vec>)), LastBytes((Vec>>, Vec>)), LastBool((Vec>, Vec>)), } impl AggregateVec { pub fn len(&self) -> usize { match self { Self::Count(arr) => arr.len(), Self::SumI64(arr) => arr.len(), Self::SumU64(arr) => arr.len(), Self::SumF64(arr) => arr.len(), Self::MinU64(arr) => arr.len(), Self::MinI64(arr) => arr.len(), Self::MinF64(arr) => arr.len(), Self::MinString(arr) => arr.len(), Self::MinBytes(arr) => arr.len(), Self::MinBool(arr) => arr.len(), Self::MaxU64(arr) => arr.len(), Self::MaxI64(arr) => arr.len(), Self::MaxF64(arr) => arr.len(), Self::MaxString(arr) => arr.len(), Self::MaxBytes(arr) => arr.len(), Self::MaxBool(arr) => arr.len(), Self::FirstU64((arr, _)) => arr.len(), Self::FirstI64((arr, _)) => arr.len(), Self::FirstF64((arr, _)) => arr.len(), Self::FirstString((arr, _)) => arr.len(), Self::FirstBytes((arr, _)) => arr.len(), Self::FirstBool((arr, _)) => arr.len(), Self::LastU64((arr, _)) => arr.len(), Self::LastI64((arr, _)) => arr.len(), Self::LastF64((arr, _)) => arr.len(), Self::LastString((arr, _)) => arr.len(), Self::LastBytes((arr, _)) => arr.len(), Self::LastBool((arr, _)) => arr.len(), } } /// Returns the value specified by `offset`. pub fn value(&self, offset: usize) -> Value<'_> { match &self { Self::Count(arr) => Value::from(arr[offset]), Self::SumI64(arr) => Value::from(arr[offset]), Self::SumU64(arr) => Value::from(arr[offset]), Self::SumF64(arr) => Value::from(arr[offset]), Self::MinU64(arr) => Value::from(arr[offset]), Self::MinI64(arr) => Value::from(arr[offset]), Self::MinF64(arr) => Value::from(arr[offset]), Self::MinString(arr) => Value::from(arr[offset].as_deref()), Self::MinBytes(arr) => Value::from(arr[offset].as_deref()), Self::MinBool(arr) => Value::from(arr[offset]), Self::MaxU64(arr) => Value::from(arr[offset]), Self::MaxI64(arr) => Value::from(arr[offset]), Self::MaxF64(arr) => Value::from(arr[offset]), Self::MaxString(arr) => Value::from(arr[offset].as_deref()), Self::MaxBytes(arr) => Value::from(arr[offset].as_deref()), Self::MaxBool(arr) => Value::from(arr[offset]), _ => unimplemented!("first/last not yet implemented"), } } /// Updates with a new value located in the provided input column help in /// `Values`. /// /// Panics if the type of `Value` does not satisfy the aggregate type. pub fn update(&mut self, values: &Values<'_>, row_id: usize, offset: usize) { if values.is_null(row_id) { return; } match self { Self::Count(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } *arr[offset].get_or_insert(0) += 1; } Self::SumI64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v += values.value_i64(row_id), None => arr[offset] = Some(values.value_i64(row_id)), } } Self::SumU64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v += values.value_u64(row_id), None => arr[offset] = Some(values.value_u64(row_id)), } } Self::SumF64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v += values.value_f64(row_id), None => arr[offset] = Some(values.value_f64(row_id)), } } Self::MinU64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).min(values.value_u64(row_id)), None => arr[offset] = Some(values.value_u64(row_id)), } } Self::MinI64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).min(values.value_i64(row_id)), None => arr[offset] = Some(values.value_i64(row_id)), } } Self::MinF64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).min(values.value_f64(row_id)), None => arr[offset] = Some(values.value_f64(row_id)), } } Self::MinString(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => { let other = values.value_str(row_id); if other < v.as_str() { *v = other.to_owned(); } } None => arr[offset] = Some(values.value_str(row_id).to_owned()), } } Self::MinBytes(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => { let other = values.value_bytes(row_id); if other < v.as_slice() { *v = other.to_owned(); } } None => arr[offset] = Some(values.value_bytes(row_id).to_owned()), } } Self::MinBool(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).min(values.value_bool(row_id)), None => arr[offset] = Some(values.value_bool(row_id)), } } Self::MaxU64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).max(values.value_u64(row_id)), None => arr[offset] = Some(values.value_u64(row_id)), } } Self::MaxI64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).max(values.value_i64(row_id)), None => arr[offset] = Some(values.value_i64(row_id)), } } Self::MaxF64(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).max(values.value_f64(row_id)), None => arr[offset] = Some(values.value_f64(row_id)), } } Self::MaxString(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => { let other = values.value_str(row_id); if other > v.as_str() { *v = other.to_owned(); } } None => arr[offset] = Some(values.value_str(row_id).to_owned()), } } Self::MaxBytes(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => { let other = values.value_bytes(row_id); if other > v.as_slice() { *v = other.to_owned(); } } None => arr[offset] = Some(values.value_bytes(row_id).to_owned()), } } Self::MaxBool(arr) => { if offset >= arr.len() { arr.resize(offset + 1, None); } match &mut arr[offset] { Some(v) => *v = (*v).max(values.value_bool(row_id)), None => arr[offset] = Some(values.value_bool(row_id)), } } // TODO - implement first/last _ => unimplemented!("aggregate update not implemented"), } } /// Appends the provided value to the end of the aggregate vector. /// Panics if the type of `Value` does not satisfy the aggregate type. /// /// Note: updating pushed first/last variants is not currently a supported /// operation. pub fn push(&mut self, value: Value<'_>) { match self { Self::Count(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.u64())); } } Self::SumI64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.i64())); } } Self::SumU64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.u64())); } } Self::SumF64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.f64())); } } Self::MinU64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.u64())); } } Self::MinI64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.i64())); } } Self::MinF64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.f64())); } } Self::MinString(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.str().to_owned())); } } Self::MinBytes(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bytes().to_owned())); } } Self::MinBool(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bool())); } } Self::MaxU64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.u64())); } } Self::MaxI64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.i64())); } } Self::MaxF64(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.f64())); } } Self::MaxString(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.str().to_owned())); } } Self::MaxBytes(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bytes().to_owned())); } } Self::MaxBool(arr) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bool())); } } Self::FirstU64((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.u64())); } } Self::FirstI64((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.i64())); } } Self::FirstF64((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.f64())); } } Self::FirstString((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.str().to_owned())); } } Self::FirstBytes((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bytes().to_owned())); } } Self::FirstBool((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bool())); } } Self::LastU64((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.u64())); } } Self::LastI64((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.i64())); } } Self::LastF64((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.f64())); } } Self::LastString((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.str().to_owned())); } } Self::LastBytes((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bytes().to_owned())); } } Self::LastBool((arr, _)) => { if value.is_null() { arr.push(None); } else { arr.push(Some(value.bool())); } } } } /// Writes a textual representation of the value specified by `offset` to /// the provided formatter. pub fn write_value(&self, offset: usize, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::Count(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::SumI64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::SumU64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::SumF64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MinU64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MinI64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MinF64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MinString(arr) => match &arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MinBytes(arr) => match &arr[offset] { Some(v) => write!(f, "{:?}", v)?, None => write!(f, "NULL")?, }, Self::MinBool(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MaxU64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MaxI64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MaxF64(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MaxString(arr) => match &arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::MaxBytes(arr) => match &arr[offset] { Some(v) => write!(f, "{:?}", v)?, None => write!(f, "NULL")?, }, Self::MaxBool(arr) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::FirstU64((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::FirstI64((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::FirstF64((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::FirstString((arr, _)) => match &arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::FirstBytes((arr, _)) => match &arr[offset] { Some(v) => write!(f, "{:?}", v)?, None => write!(f, "NULL")?, }, Self::FirstBool((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::LastU64((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::LastI64((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::LastF64((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::LastString((arr, _)) => match &arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, Self::LastBytes((arr, _)) => match &arr[offset] { Some(v) => write!(f, "{:?}", v)?, None => write!(f, "NULL")?, }, Self::LastBool((arr, _)) => match arr[offset] { Some(v) => write!(f, "{}", v)?, None => write!(f, "NULL")?, }, } Ok(()) } // Consumes self and returns the inner `Vec>`. pub fn take_as_i64(self) -> Vec> { match self { Self::SumI64(arr) => arr, Self::MinI64(arr) => arr, Self::MaxI64(arr) => arr, _ => panic!("cannot convert {} to Vec>", self), } } // Consumes self and returns the inner `Vec>`. pub fn take_as_u64(self) -> Vec> { match self { Self::Count(arr) => arr, Self::SumU64(arr) => arr, Self::MinU64(arr) => arr, Self::MaxU64(arr) => arr, _ => panic!("cannot convert {} to Vec>", self), } } // Consumes self and returns the inner `Vec>`. pub fn take_as_f64(self) -> Vec> { match self { Self::SumF64(arr) => arr, Self::MinF64(arr) => arr, Self::MaxF64(arr) => arr, _ => panic!("cannot convert {} to Vec>", self), } } // Consumes self and returns the inner `Vec>`. pub fn take_as_str(self) -> Vec> { match self { Self::MinString(arr) => arr, Self::MaxString(arr) => arr, _ => panic!("cannot convert {} to Vec>", self), } } // Consumes self and returns the inner `Vec>>`. pub fn take_as_bytes(self) -> Vec>> { match self { Self::MinBytes(arr) => arr, Self::MaxBytes(arr) => arr, _ => panic!("cannot convert {} to Vec>", self), } } // Consumes self and returns the inner `Vec>`. pub fn take_as_bool(self) -> Vec> { match self { Self::MinBool(arr) => arr, Self::MaxBool(arr) => arr, _ => panic!("cannot convert {} to Vec", self), } } /// Extends the `AggregateVec` with the provided `Option` iterator. pub fn extend_with_i64(&mut self, itr: impl Iterator>) { match self { Self::SumI64(arr) => { arr.extend(itr); } Self::MinI64(arr) => { arr.extend(itr); } Self::MaxI64(arr) => { arr.extend(itr); } _ => panic!("unsupported iterator"), } } /// Extends the `AggregateVec` with the provided `Option` iterator. pub fn extend_with_u64(&mut self, itr: impl Iterator>) { match self { Self::Count(arr) => { arr.extend(itr); } Self::SumU64(arr) => { arr.extend(itr); } Self::MinU64(arr) => { arr.extend(itr); } Self::MaxU64(arr) => { arr.extend(itr); } _ => panic!("unsupported iterator"), } } /// Extends the `AggregateVec` with the provided `Option` iterator. pub fn extend_with_f64(&mut self, itr: impl Iterator>) { match self { Self::SumF64(arr) => { arr.extend(itr); } Self::MinF64(arr) => { arr.extend(itr); } Self::MaxF64(arr) => { arr.extend(itr); } _ => panic!("unsupported iterator"), } } /// Extends the `AggregateVec` with the provided `Option` iterator. pub fn extend_with_str(&mut self, itr: impl Iterator>) { match self { Self::MinString(arr) => { arr.extend(itr); } Self::MaxString(arr) => { arr.extend(itr); } _ => panic!("unsupported iterator"), } } /// Extends the `AggregateVec` with the provided `Option>` iterator. pub fn extend_with_bytes(&mut self, itr: impl Iterator>>) { match self { Self::MinBytes(arr) => { arr.extend(itr); } Self::MaxBytes(arr) => { arr.extend(itr); } _ => panic!("unsupported iterator"), } } /// Extends the `AggregateVec` with the provided `Option` iterator. pub fn extend_with_bool(&mut self, itr: impl Iterator>) { match self { Self::MinBool(arr) => { arr.extend(itr); } Self::MaxBool(arr) => { arr.extend(itr); } _ => panic!("unsupported iterator"), } } pub fn sort_with_permutation(&mut self, p: &permutation::Permutation) { match self { Self::Count(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::SumI64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::SumU64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::SumF64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MinU64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MinI64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MinF64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MinString(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MinBytes(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MinBool(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MaxU64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MaxI64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MaxF64(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MaxString(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MaxBytes(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::MaxBool(arr) => { *arr = p.apply_slice(arr.as_slice()); } Self::FirstU64((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::FirstI64((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::FirstF64((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::FirstString((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::FirstBytes((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::FirstBool((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::LastU64((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::LastI64((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::LastF64((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::LastString((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::LastBytes((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } Self::LastBool((arr, time)) => { *arr = p.apply_slice(arr.as_slice()); *time = p.apply_slice(time.as_slice()); } } } } impl std::fmt::Display for AggregateVec { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Count(_) => write!(f, "Count"), Self::SumI64(_) => write!(f, "Sum"), Self::SumU64(_) => write!(f, "Sum"), Self::SumF64(_) => write!(f, "Sum"), Self::MinU64(_) => write!(f, "Min"), Self::MinI64(_) => write!(f, "Min"), Self::MinF64(_) => write!(f, "Min"), Self::MinString(_) => write!(f, "Min"), Self::MinBytes(_) => write!(f, "Min>"), Self::MinBool(_) => write!(f, "Min"), Self::MaxU64(_) => write!(f, "Max"), Self::MaxI64(_) => write!(f, "Max"), Self::MaxF64(_) => write!(f, "Max"), Self::MaxString(_) => write!(f, "Max"), Self::MaxBytes(_) => write!(f, "Max>"), Self::MaxBool(_) => write!(f, "Max"), Self::FirstU64(_) => write!(f, "First"), Self::FirstI64(_) => write!(f, "First"), Self::FirstF64(_) => write!(f, "First"), Self::FirstString(_) => write!(f, "First"), Self::FirstBytes(_) => write!(f, "First>"), Self::FirstBool(_) => write!(f, "First"), Self::LastU64(_) => write!(f, "Last"), Self::LastI64(_) => write!(f, "Last"), Self::LastF64(_) => write!(f, "Last"), Self::LastString(_) => write!(f, "Last"), Self::LastBytes(_) => write!(f, "Last>"), Self::LastBool(_) => write!(f, "Last"), } } } impl From<(&AggregateType, &LogicalDataType)> for AggregateVec { fn from(v: (&AggregateType, &LogicalDataType)) -> Self { match (v.0, v.1) { (AggregateType::Count, _) => Self::Count(vec![]), (AggregateType::First, LogicalDataType::Integer) => Self::FirstI64((vec![], vec![])), (AggregateType::First, LogicalDataType::Unsigned) => Self::FirstU64((vec![], vec![])), (AggregateType::First, LogicalDataType::Float) => Self::FirstF64((vec![], vec![])), (AggregateType::First, LogicalDataType::String) => Self::FirstString((vec![], vec![])), (AggregateType::First, LogicalDataType::Binary) => Self::FirstBytes((vec![], vec![])), (AggregateType::First, LogicalDataType::Boolean) => Self::FirstBool((vec![], vec![])), (AggregateType::Last, LogicalDataType::Integer) => Self::LastI64((vec![], vec![])), (AggregateType::Last, LogicalDataType::Unsigned) => Self::LastU64((vec![], vec![])), (AggregateType::Last, LogicalDataType::Float) => Self::LastF64((vec![], vec![])), (AggregateType::Last, LogicalDataType::String) => Self::LastString((vec![], vec![])), (AggregateType::Last, LogicalDataType::Binary) => Self::LastBytes((vec![], vec![])), (AggregateType::Last, LogicalDataType::Boolean) => Self::LastBool((vec![], vec![])), (AggregateType::Min, LogicalDataType::Integer) => Self::MinI64(vec![]), (AggregateType::Min, LogicalDataType::Unsigned) => Self::MinU64(vec![]), (AggregateType::Min, LogicalDataType::Float) => Self::MinF64(vec![]), (AggregateType::Min, LogicalDataType::String) => Self::MinString(vec![]), (AggregateType::Min, LogicalDataType::Binary) => Self::MinBytes(vec![]), (AggregateType::Min, LogicalDataType::Boolean) => Self::MinBool(vec![]), (AggregateType::Max, LogicalDataType::Integer) => Self::MaxI64(vec![]), (AggregateType::Max, LogicalDataType::Unsigned) => Self::MaxU64(vec![]), (AggregateType::Max, LogicalDataType::Float) => Self::MaxF64(vec![]), (AggregateType::Max, LogicalDataType::String) => Self::MaxString(vec![]), (AggregateType::Max, LogicalDataType::Binary) => Self::MaxBytes(vec![]), (AggregateType::Max, LogicalDataType::Boolean) => Self::MaxBool(vec![]), (AggregateType::Sum, LogicalDataType::Integer) => Self::SumI64(vec![]), (AggregateType::Sum, LogicalDataType::Unsigned) => Self::SumU64(vec![]), (AggregateType::Sum, LogicalDataType::Float) => Self::SumF64(vec![]), (AggregateType::Sum, _) => unreachable!("unsupported SUM aggregates"), } } } /// A scalar is a numerical value that can be aggregated. #[derive(Debug, PartialEq, Copy, Clone)] pub enum Scalar { Null, I64(i64), U64(u64), F64(f64), } // This `PartialOrd` implementation will attempt to compare all integer types // to each other, floating point types to themselves, but does not attempt to // compare floating point values to any other value. impl PartialOrd for Scalar { fn partial_cmp(&self, other: &Self) -> Option { match (self, other) { (Self::I64(a), Self::I64(b)) => a.partial_cmp(b), (Self::I64(a), Self::U64(b)) => { if *b > i64::MAX as u64 { return Some(std::cmp::Ordering::Less); } // `try_from` must succeed. let b = i64::try_from(*b).unwrap(); a.partial_cmp(&b) } (Self::U64(a), Self::I64(b)) => { if *b < 0 { return Some(std::cmp::Ordering::Greater); } // `try_from` must succeed. let b = u64::try_from(*b).unwrap(); a.partial_cmp(&b) } (Self::U64(a), Self::U64(b)) => a.partial_cmp(b), (Self::F64(a), Self::F64(b)) => a.partial_cmp(b), // Can't compare NULL to anything. // Can't sanely compare floats to integers because not all integers // can be represented precisely as floats. We could perhaps revisit // this in the future. (_, Self::Null) | (Self::Null, _) | (Self::F64(_), _) | (_, Self::F64(_)) => None, } } } macro_rules! typed_scalar_converters { ($(($name:ident, $try_name:ident, $type:ident),)*) => { $( pub fn $name(&self) -> $type { match &self { Self::I64(v) => $type::try_from(*v).unwrap(), Self::U64(v) => $type::try_from(*v).unwrap(), Self::F64(_) => panic!("cannot convert Self::F64"), Self::Null => panic!("cannot convert Scalar::Null"), } } pub fn $try_name(&self) -> Option<$type> { match &self { Self::I64(v) => $type::try_from(*v).ok(), Self::U64(v) => $type::try_from(*v).ok(), Self::F64(_) => panic!("cannot convert Self::F64"), Self::Null => None, } } )* }; } impl Scalar { pub fn is_null(&self) -> bool { matches!(self, Self::Null) } // Implementations of all the accessors for the variants of `Scalar`. typed_scalar_converters! { (as_i64, try_as_i64, i64), (as_i32, try_as_i32, i32), (as_i16, try_as_i16, i16), (as_i8, try_as_i8, i8), (as_u64, try_as_u64, u64), (as_u32, try_as_u32, u32), (as_u16, try_as_u16, u16), (as_u8, try_as_u8, u8), } pub fn as_f64(&self) -> f64 { match &self { Self::F64(v) => *v, _ => unimplemented!("converting integer Scalar to f64 unsupported"), } } pub fn try_as_f64(&self) -> Option { match &self { Self::F64(v) => Some(*v), _ => unimplemented!("converting integer Scalar to f64 unsupported"), } } } impl std::ops::AddAssign<&Self> for Scalar { fn add_assign(&mut self, rhs: &Self) { if rhs.is_null() { // Adding NULL does nothing. return; } match self { Self::F64(v) => { if let Self::F64(other) = rhs { *v += *other; } else { panic!("invalid AddAssign types"); }; } Self::I64(v) => { if let Self::I64(other) = rhs { *v += *other; } else { panic!("invalid AddAssign types"); }; } Self::U64(v) => { if let Self::U64(other) = rhs { *v += *other; } else { panic!("invalid AddAssign types"); }; } _ => unimplemented!("unsupported and to be removed"), } } } impl<'a> std::ops::AddAssign<&Scalar> for &mut Scalar { fn add_assign(&mut self, rhs: &Scalar) { match self { Scalar::F64(v) => { if let Scalar::F64(other) = rhs { *v += *other; } else { panic!("invalid AddAssign types"); }; } Scalar::I64(v) => { if let Scalar::I64(other) = rhs { *v += *other; } else { panic!("invalid AddAssign types"); }; } Scalar::U64(v) => { if let Scalar::U64(other) = rhs { *v += *other; } else { panic!("invalid AddAssign types"); }; } _ => unimplemented!("unsupported and to be removed"), } } } impl std::ops::Add for Scalar { type Output = Self; fn add(self, other: Self) -> Self { match (self, other) { (Self::Null, Self::Null) => Self::Null, (Self::Null, Self::I64(_)) => other, (Self::Null, Self::U64(_)) => other, (Self::Null, Self::F64(_)) => other, (Self::I64(_), Self::Null) => self, (Self::I64(a), Self::I64(b)) => Self::I64(a + b), (Self::U64(_), Self::Null) => self, (Self::U64(a), Self::U64(b)) => Self::U64(a + b), (Self::F64(_), Self::Null) => self, (Self::F64(a), Self::F64(b)) => Self::F64(a + b), (a, b) => panic!("{:?} + {:?} is an unsupported operation", a, b), } } } impl std::fmt::Display for &Scalar { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Scalar::Null => write!(f, "NULL"), Scalar::I64(v) => write!(f, "{}", v), Scalar::U64(v) => write!(f, "{}", v), Scalar::F64(v) => write!(f, "{}", v), } } } #[derive(Debug, PartialEq, Clone)] pub enum OwnedValue { // Represents a NULL value in a column row. Null, /// A UTF-8 valid string. String(String), /// An arbitrary byte array. ByteArray(Vec), /// A boolean value. Boolean(bool), /// A numeric scalar value, or NULL Scalar(Scalar), } impl PartialOrd for OwnedValue { fn partial_cmp(&self, other: &Self) -> Option { match (self, other) { (Self::String(s), Self::String(o)) => s.partial_cmp(o), (Self::ByteArray(s), Self::ByteArray(o)) => s.partial_cmp(o), (Self::Boolean(s), Self::Boolean(o)) => s.partial_cmp(o), (Self::Scalar(s), Self::Scalar(o)) => s.partial_cmp(o), // can not compare if not the same type _ => None, } } } impl OwnedValue { pub fn new_null() -> Self { Self::Null } pub fn is_null(&self) -> bool { matches!(self, Self::Null) } pub fn as_string(&self) -> Option { match self { OwnedValue::Null => None, OwnedValue::String(s) => Some(s.clone()), v => panic!("{:?} cannot be unwrapped as string", v), } } pub fn as_byte_array(&self) -> Option> { match self { OwnedValue::Null => None, OwnedValue::ByteArray(arr) => Some(arr.clone()), v => panic!("{:?} cannot be unwrapped as byte array", v), } } pub fn as_bool(&self) -> Option { match self { OwnedValue::Null => None, OwnedValue::Boolean(b) => Some(*b), v => panic!("{:?} cannot be unwrapped as string", v), } } /// Update self to the min of self and other, taking into /// account NULL pub fn update_min(&mut self, other: &Self) { if (self.is_null() && !other.is_null()) || other.partial_cmp(self) == Some(Ordering::Less) { *self = other.clone() } } /// Update self to the max of self and other, taking into /// account NULL pub fn update_max(&mut self, other: &Self) { if (self.is_null() && !other.is_null()) || other.partial_cmp(self) == Some(Ordering::Greater) { *self = other.clone() } } /// The size in bytes of this value. pub fn size(&self) -> usize { let self_size = size_of::(); match self { Self::String(s) => s.len() + self_size, Self::ByteArray(arr) => arr.len() + self_size, _ => self_size, } } } // Implementations of as_type for various scalar types. macro_rules! owned_value_as_impls { ($(($type:ident, $name:ident),)*) => { $( impl OwnedValue { pub fn $name(&self) -> Option<$type> { match self { OwnedValue::Null => None, OwnedValue::Scalar(s) => { (!s.is_null()).then(|| s.$name()) } v => panic!("{:?} cannot be unwrapped as {:?}", v, stringify!($type)), } } } )* }; } owned_value_as_impls! { (i64, as_i64), (f64, as_f64), (u64, as_u64), } impl std::fmt::Display for &OwnedValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { OwnedValue::Null => write!(f, "NULL"), OwnedValue::String(s) => s.fmt(f), OwnedValue::ByteArray(s) => write!(f, "{}", String::from_utf8_lossy(s)), OwnedValue::Boolean(b) => b.fmt(f), OwnedValue::Scalar(s) => s.fmt(f), } } } impl PartialEq> for OwnedValue { fn eq(&self, other: &Value<'_>) -> bool { match (&self, other) { (Self::String(a), Value::String(b)) => a == b, (Self::Scalar(a), Value::Scalar(b)) => a == b, (Self::Boolean(a), Value::Boolean(b)) => a == b, (Self::ByteArray(a), Value::ByteArray(b)) => a == b, _ => false, } } } impl PartialOrd> for OwnedValue { fn partial_cmp(&self, other: &Value<'_>) -> Option { match (&self, other) { (Self::String(a), Value::String(b)) => Some(a.as_str().cmp(b)), (Self::Scalar(a), Value::Scalar(b)) => a.partial_cmp(b), (Self::Boolean(a), Value::Boolean(b)) => a.partial_cmp(b), (Self::ByteArray(a), Value::ByteArray(b)) => a.as_slice().partial_cmp(*b), _ => None, } } } /// Each variant is a possible value type that can be returned from a column. #[derive(Debug, PartialEq, PartialOrd, Copy, Clone)] pub enum Value<'a> { // Represents a NULL value in a column row. Null, // A UTF-8 valid string. String(&'a str), // An arbitrary byte array. ByteArray(&'a [u8]), // A boolean value. Boolean(bool), // A numeric scalar value. Scalar(Scalar), } impl<'a> Value<'a> { pub fn is_null(&self) -> bool { matches!(self, Self::Null) } pub fn scalar(&self) -> &Scalar { if let Self::Scalar(s) = self { return s; } panic!("cannot unwrap Value to Scalar"); } pub fn i64(self) -> i64 { if let Self::Scalar(Scalar::I64(v)) = self { return v; } panic!("cannot unwrap Value to i64"); } pub fn u64(self) -> u64 { if let Self::Scalar(Scalar::U64(v)) = self { return v; } panic!("cannot unwrap Value to u64"); } pub fn f64(self) -> f64 { if let Self::Scalar(Scalar::F64(v)) = self { return v; } panic!("cannot unwrap Value to f64"); } pub fn str(self) -> &'a str { if let Self::String(s) = self { return s; } panic!("cannot unwrap Value to String"); } pub fn bytes(self) -> &'a [u8] { if let Self::ByteArray(s) = self { return s; } panic!("cannot unwrap Value to byte array"); } pub fn bool(self) -> bool { if let Self::Boolean(b) = self { return b; } panic!("cannot unwrap Value to Scalar"); } } impl std::fmt::Display for Value<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Value::Null => write!(f, "NULL"), Value::String(s) => write!(f, "{}", s), Value::ByteArray(arr) => write!(f, "{:?}", arr), Value::Boolean(b) => write!(f, "{}", b), Value::Scalar(s) => match s { Scalar::I64(v) => write!(f, "{}", v), Scalar::U64(v) => write!(f, "{}", v), Scalar::F64(v) => write!(f, "{}", v), Scalar::Null => write!(f, "NULL"), }, } } } impl<'a> From<&'a str> for Value<'a> { fn from(v: &'a str) -> Self { Self::String(v) } } impl<'a> From> for Value<'a> { fn from(v: Option<&'a str>) -> Self { match v { Some(s) => Self::String(s), None => Self::Null, } } } impl<'a> From<&'a [u8]> for Value<'a> { fn from(v: &'a [u8]) -> Self { Self::ByteArray(v) } } impl<'a> From> for Value<'a> { fn from(v: Option<&'a [u8]>) -> Self { match v { Some(s) => Self::ByteArray(s), None => Self::Null, } } } impl<'a> From for Value<'a> { fn from(v: bool) -> Self { Self::Boolean(v) } } impl<'a> From> for Value<'a> { fn from(v: Option) -> Self { match v { Some(s) => Self::Boolean(s), None => Self::Null, } } } // Implementations of From trait for various concrete types. macro_rules! scalar_from_impls { ($(($variant:ident, $type:ident),)*) => { $( impl From<$type> for Value<'_> { fn from(v: $type) -> Self { Self::Scalar(Scalar::$variant(v)) } } impl From> for Value<'_> { fn from(v: Option<$type>) -> Self { match v { Some(v) => Self::Scalar(Scalar::$variant(v)), None => Self::Null, } } } )* }; } scalar_from_impls! { (I64, i64), (U64, u64), (F64, f64), } impl std::ops::Add for Value<'_> { type Output = Self; fn add(self, other: Self) -> Self { match (self, other) { (Self::Scalar(a), Self::Scalar(b)) => Self::Scalar(a + b), _ => panic!("unsupported operation on Value"), } } } /// Each variant is a typed vector of materialised values for a column. #[derive(Debug, PartialEq)] pub enum Values<'a> { // UTF-8 valid unicode strings String(Vec>), // A dictionary mapping between a vector of dictionary integer keys and the // string values they refer to. // NOTE the strings are always sorted Dictionary(Vec, Vec>), // Scalar types I64(Vec), U64(Vec), F64(Vec), I64N(Vec>), U64N(Vec>), F64N(Vec>), // Boolean values Bool(Vec>), // Arbitrary byte arrays ByteArray(Vec>), } impl<'a> Values<'a> { pub fn len(&self) -> usize { match &self { Self::String(c) => c.len(), Self::Dictionary(c, _) => c.len(), Self::I64(c) => c.len(), Self::U64(c) => c.len(), Self::F64(c) => c.len(), Self::Bool(c) => c.len(), Self::ByteArray(c) => c.len(), Self::I64N(c) => c.len(), Self::U64N(c) => c.len(), Self::F64N(c) => c.len(), } } pub fn is_empty(&self) -> bool { self.len() == 0 } pub fn is_null(&self, i: usize) -> bool { match &self { Self::String(c) => c[i].is_none(), Self::Dictionary(keys, values) => values[keys[i] as usize].is_none(), Self::F64(_) => false, Self::I64(_) => false, Self::U64(_) => false, Self::Bool(c) => c[i].is_none(), Self::ByteArray(c) => c[i].is_none(), Self::I64N(c) => c[i].is_none(), Self::U64N(c) => c[i].is_none(), Self::F64N(c) => c[i].is_none(), } } pub fn value(&self, i: usize) -> Value<'a> { match &self { Self::String(c) => match c[i] { Some(v) => Value::String(v), None => Value::Null, }, Self::Dictionary(keys, values) => match values[keys[i] as usize] { Some(v) => Value::String(v), None => Value::Null, }, Self::F64(c) => Value::Scalar(Scalar::F64(c[i])), Self::I64(c) => Value::Scalar(Scalar::I64(c[i])), Self::U64(c) => Value::Scalar(Scalar::U64(c[i])), Self::Bool(c) => match c[i] { Some(v) => Value::Boolean(v), None => Value::Null, }, Self::ByteArray(c) => match c[i] { Some(v) => Value::ByteArray(v), None => Value::Null, }, Self::I64N(c) => match c[i] { Some(v) => Value::Scalar(Scalar::I64(v)), None => Value::Null, }, Self::U64N(c) => match c[i] { Some(v) => Value::Scalar(Scalar::U64(v)), None => Value::Null, }, Self::F64N(c) => match c[i] { Some(v) => Value::Scalar(Scalar::F64(v)), None => Value::Null, }, } } // Returns a value as an i64. Panics if not possible. fn value_i64(&self, i: usize) -> i64 { match &self { Values::I64(c) => c[i], Values::I64N(c) => c[i].unwrap(), _ => panic!("value cannot be returned as i64"), } } // Returns a value as an u64. Panics if not possible. fn value_u64(&self, i: usize) -> u64 { match &self { Values::U64(c) => c[i], Values::U64N(c) => c[i].unwrap(), _ => panic!("value cannot be returned as u64"), } } // Returns a value as an f64. Panics if not possible. fn value_f64(&self, i: usize) -> f64 { match &self { Values::F64(c) => c[i], Values::F64N(c) => c[i].unwrap(), _ => panic!("value cannot be returned as f64"), } } // Returns a value as a &str. Panics if not possible. fn value_str(&self, i: usize) -> &'a str { match &self { Values::String(c) => c[i].unwrap(), Values::Dictionary(keys, values) => values[keys[i] as usize].unwrap(), _ => panic!("value cannot be returned as &str"), } } // Returns a value as a binary array. Panics if not possible. fn value_bytes(&self, i: usize) -> &'a [u8] { match &self { Values::ByteArray(c) => c[i].unwrap(), _ => panic!("value cannot be returned as &str"), } } // Returns a value as a bool. Panics if not possible. fn value_bool(&self, i: usize) -> bool { match &self { Values::Bool(c) => c[i].unwrap(), _ => panic!("value cannot be returned as &str"), } } } use arrow::{ array::{Array, ArrayDataBuilder, DictionaryArray}, buffer::Buffer, datatypes::{DataType, Int32Type}, }; use arrow_util::bitset::BitSet; use std::iter::FromIterator; /// Moves ownership of Values into an arrow `ArrayRef`. impl From> for arrow::array::ArrayRef { fn from(values: Values<'_>) -> Self { match values { Values::String(values) => Arc::new(arrow::array::StringArray::from(values)), Values::Dictionary(mut keys, values) => { // check for NULL values, setting null positions // on the null bitmap if there is at least one NULL // value. let null_bitmap = if matches!(values.first(), Some(None)) { let mut bitset = BitSet::with_size(keys.len()); for (i, v) in keys.iter_mut().enumerate() { if *v as usize != 0 { bitset.set(i); // valid value } // because Arrow Dictionary arrays do not maintain a // None/NULL entry in the string values array we need to // shift the encoded key down so it maps correctly to // the values array. The encoded key for NULL entries is // never used (it's undefined) so we can keep those // encoded keys set to 0. if *v > 0 { *v -= 1; } } Some(bitset) } else { None }; // If there is a null bitmap we need to remove the None entry // from the string values array since Arrow doesn't maintain // NULL entries in a dictionary's value array. let values_arr = if null_bitmap.is_some() { // drop NULL value entry as this is not stored in Arrow's // dictionary values array. assert!(values[0].is_none()); values .into_iter() .skip(1) .collect::() } else { values.into_iter().collect::() }; let mut builder = ArrayDataBuilder::new(DataType::Dictionary( Box::new(DataType::Int32), Box::new(DataType::Utf8), )) .len(keys.len()) .add_buffer(Buffer::from_iter(keys)) .add_child_data(values_arr.data().clone()); if let Some(bm) = null_bitmap { builder = builder.null_bit_buffer(bm.to_arrow()); } let data = builder.build().unwrap(); Arc::new(DictionaryArray::::from(data)) } Values::I64(values) => Arc::new(arrow::array::Int64Array::from(values)), Values::U64(values) => Arc::new(arrow::array::UInt64Array::from(values)), Values::F64(values) => Arc::new(arrow::array::Float64Array::from(values)), Values::I64N(values) => Arc::new(arrow::array::Int64Array::from(values)), Values::U64N(values) => Arc::new(arrow::array::UInt64Array::from(values)), Values::F64N(values) => Arc::new(arrow::array::Float64Array::from(values)), Values::Bool(values) => Arc::new(arrow::array::BooleanArray::from(values)), Values::ByteArray(values) => Arc::new(arrow::array::BinaryArray::from(values)), } } } pub struct ValuesIterator<'a> { v: &'a Values<'a>, next_i: usize, } impl<'a> ValuesIterator<'a> { pub fn new(v: &'a Values<'a>) -> Self { Self { v, next_i: 0 } } } impl<'a> Iterator for ValuesIterator<'a> { type Item = Value<'a>; fn next(&mut self) -> Option { let curr_i = self.next_i; self.next_i += 1; if curr_i == self.v.len() { return None; } Some(self.v.value(curr_i)) } } #[derive(Debug, PartialEq)] /// A representation of encoded values for a column. pub enum EncodedValues { I64(Vec), U32(Vec), } impl EncodedValues { pub fn with_capacity_i64(capacity: usize) -> Self { Self::I64(Vec::with_capacity(capacity)) } pub fn with_capacity_u32(capacity: usize) -> Self { Self::U32(Vec::with_capacity(capacity)) } pub fn as_i64(&self) -> &Vec { if let Self::I64(arr) = self { return arr; } panic!("cannot borrow &Vec"); } pub fn as_u32(&self) -> &Vec { if let Self::U32(arr) = self { return arr; } panic!("cannot borrow &Vec"); } /// Takes a `Vec` out of the enum. pub fn take_u32(&mut self) -> Vec { std::mem::take(match self { Self::I64(_) => panic!("cannot take Vec out of I64 variant"), Self::U32(arr) => arr, }) } pub fn len(&self) -> usize { match self { Self::I64(v) => v.len(), Self::U32(v) => v.len(), } } pub fn is_empty(&self) -> bool { match self { Self::I64(v) => v.is_empty(), Self::U32(v) => v.is_empty(), } } pub fn clear(&mut self) { match self { Self::I64(v) => v.clear(), Self::U32(v) => v.clear(), } } pub fn reserve(&mut self, additional: usize) { match self { Self::I64(v) => v.reserve(additional), Self::U32(v) => v.reserve(additional), } } } #[cfg(test)] mod test { use super::*; use arrow::array::ArrayRef; use std::cmp::Ordering; #[test] fn aggregate_vec_update() { // i64 let values = Values::I64N(vec![Some(1), Some(2), Some(3), None, Some(-1), Some(2)]); let mut aggs = vec![ AggregateVec::Count(vec![]), AggregateVec::SumI64(vec![]), AggregateVec::MinI64(vec![]), AggregateVec::MaxI64(vec![]), ]; for i in 0..values.len() { for agg in &mut aggs { agg.update(&values, i, 0); } } assert_eq!( aggs, vec![ AggregateVec::Count(vec![Some(5)]), AggregateVec::SumI64(vec![Some(7)]), AggregateVec::MinI64(vec![Some(-1)]), AggregateVec::MaxI64(vec![Some(3)]), ] ); // u64 let values = Values::U64N(vec![Some(1), Some(2), Some(3), None, Some(0), Some(2)]); let mut aggs = vec![ AggregateVec::Count(vec![]), AggregateVec::SumU64(vec![]), AggregateVec::MinU64(vec![]), AggregateVec::MaxU64(vec![]), ]; for i in 0..values.len() { for agg in &mut aggs { agg.update(&values, i, 0); } } assert_eq!( aggs, vec![ AggregateVec::Count(vec![Some(5)]), AggregateVec::SumU64(vec![Some(8)]), AggregateVec::MinU64(vec![Some(0)]), AggregateVec::MaxU64(vec![Some(3)]), ] ); // f64 let values = Values::F64N(vec![ Some(1.0), Some(2.0), Some(3.0), None, Some(0.0), Some(2.0), ]); let mut aggs = vec![ AggregateVec::Count(vec![]), AggregateVec::SumF64(vec![]), AggregateVec::MinF64(vec![]), AggregateVec::MaxF64(vec![]), ]; for i in 0..values.len() { for agg in &mut aggs { agg.update(&values, i, 0); } } assert_eq!( aggs, vec![ AggregateVec::Count(vec![Some(5)]), AggregateVec::SumF64(vec![Some(8.0)]), AggregateVec::MinF64(vec![Some(0.0)]), AggregateVec::MaxF64(vec![Some(3.0)]), ] ); // string let values = Values::String(vec![ Some("Pop Song 89"), Some("Orange Crush"), Some("Stand"), None, ]); let mut aggs = vec![ AggregateVec::Count(vec![]), AggregateVec::MinString(vec![]), AggregateVec::MaxString(vec![]), ]; for i in 0..values.len() { for agg in &mut aggs { agg.update(&values, i, 0); } } assert_eq!( aggs, vec![ AggregateVec::Count(vec![Some(3)]), AggregateVec::MinString(vec![Some("Orange Crush".to_owned())]), AggregateVec::MaxString(vec![Some("Stand".to_owned())]), ] ); // bytes let arr = vec![ "Pop Song 89".to_string(), "Orange Crush".to_string(), "Stand".to_string(), ]; let values = Values::ByteArray(vec![ Some(arr[0].as_bytes()), Some(arr[1].as_bytes()), Some(arr[2].as_bytes()), None, ]); let mut aggs = vec![ AggregateVec::Count(vec![]), AggregateVec::MinBytes(vec![]), AggregateVec::MaxBytes(vec![]), ]; for i in 0..values.len() { for agg in &mut aggs { agg.update(&values, i, 0); } } assert_eq!( aggs, vec![ AggregateVec::Count(vec![Some(3)]), AggregateVec::MinBytes(vec![Some(arr[1].bytes().collect())]), AggregateVec::MaxBytes(vec![Some(arr[2].bytes().collect())]), ] ); // bool let values = Values::Bool(vec![Some(true), None, Some(false)]); let mut aggs = vec![ AggregateVec::Count(vec![]), AggregateVec::MinBool(vec![]), AggregateVec::MaxBool(vec![]), ]; for i in 0..values.len() { for agg in &mut aggs { agg.update(&values, i, 0); } } assert_eq!( aggs, vec![ AggregateVec::Count(vec![Some(2)]), AggregateVec::MinBool(vec![Some(false)]), AggregateVec::MaxBool(vec![Some(true)]), ] ); } #[test] fn size() { let v1 = OwnedValue::new_null(); assert_eq!(v1.size(), 32); let v1 = OwnedValue::Scalar(Scalar::I64(22)); assert_eq!(v1.size(), 32); let v1 = OwnedValue::String("11morebytes".to_owned()); assert_eq!(v1.size(), 43); let v1 = OwnedValue::ByteArray(vec![2, 44, 252]); assert_eq!(v1.size(), 35); } #[test] fn update_min() { let null = OwnedValue::new_null(); let eleven = OwnedValue::Scalar(Scalar::I64(11)); let twenty_two = OwnedValue::Scalar(Scalar::I64(22)); let thirty_three = OwnedValue::Scalar(Scalar::I64(33)); let string = OwnedValue::String("foo".to_string()); let mut v1 = OwnedValue::new_null(); v1.update_min(&null); assert_eq!(&v1, &null); v1.update_min(&twenty_two); assert_eq!(&v1, &twenty_two); v1.update_min(&thirty_three); assert_eq!(&v1, &twenty_two); v1.update_min(&eleven); assert_eq!(&v1, &eleven); v1.update_min(&null); assert_eq!(&v1, &eleven); // different type should not update either v1.update_min(&string); assert_eq!(&v1, &eleven); } #[test] fn update_max() { let null = OwnedValue::new_null(); let eleven = OwnedValue::Scalar(Scalar::I64(11)); let twenty_two = OwnedValue::Scalar(Scalar::I64(22)); let thirty_three = OwnedValue::Scalar(Scalar::I64(33)); let string = OwnedValue::String("foo".to_string()); let mut v1 = OwnedValue::new_null(); v1.update_max(&null); assert_eq!(&v1, &null); v1.update_max(&twenty_two); assert_eq!(&v1, &twenty_two); v1.update_max(&eleven); assert_eq!(&v1, &twenty_two); v1.update_max(&thirty_three); assert_eq!(&v1, &thirty_three); v1.update_max(&null); assert_eq!(&v1, &thirty_three); // different type should not update either v1.update_max(&string); assert_eq!(&v1, &thirty_three); } #[test] fn from_dictionary_arrow() { let values = Values::Dictionary( vec![0, 1, 2, 0, 1, 2, 2], vec![Some("bones"), Some("just"), Some("planet telex")], ); let arr = ArrayRef::from(values); // no null values in Arrow dictionary array assert_eq!(arr.null_count(), 0); assert!((0..7).into_iter().all(|i| !arr.is_null(i))); // Should produce the same the array as when created from an iterator // of strings. let exp_dict_arr = vec![ Some("bones"), Some("just"), Some("planet telex"), Some("bones"), Some("just"), Some("planet telex"), Some("planet telex"), ] .into_iter() .collect::>(); let as_dict_arr = arr .as_any() .downcast_ref::>() .unwrap(); assert_eq!(as_dict_arr.keys(), exp_dict_arr.keys()); // Now let's try with some NULL entries. let values = Values::Dictionary( vec![0, 1, 2, 0, 1, 2, 2], vec![None, Some("just"), Some("planet telex")], ); let arr = ArrayRef::from(values); assert_eq!(arr.null_count(), 2); for (i, exp) in vec![true, false, false, true, false, false, false] .iter() .enumerate() { assert_eq!(arr.is_null(i), *exp); } // Should produce the same the array as when created from an iterator // of strings. let exp_dict_arr = vec![ None, Some("just"), Some("planet telex"), None, Some("just"), Some("planet telex"), Some("planet telex"), ] .into_iter() .collect::>(); let as_dict_arr = arr .as_any() .downcast_ref::>() .unwrap(); assert_eq!(as_dict_arr.keys(), exp_dict_arr.keys()); } #[test] fn scalar_comparison() { let cases = vec![ (Scalar::I64(1), Scalar::I64(2), Some(Ordering::Less)), (Scalar::I64(3), Scalar::I64(3), Some(Ordering::Equal)), (Scalar::I64(4), Scalar::I64(3), Some(Ordering::Greater)), (Scalar::I64(1), Scalar::U64(2), Some(Ordering::Less)), (Scalar::I64(3), Scalar::U64(3), Some(Ordering::Equal)), (Scalar::I64(4), Scalar::U64(3), Some(Ordering::Greater)), (Scalar::U64(1), Scalar::I64(2), Some(Ordering::Less)), (Scalar::U64(3), Scalar::I64(3), Some(Ordering::Equal)), (Scalar::U64(4), Scalar::I64(3), Some(Ordering::Greater)), (Scalar::U64(1), Scalar::U64(2), Some(Ordering::Less)), (Scalar::U64(3), Scalar::U64(3), Some(Ordering::Equal)), (Scalar::U64(4), Scalar::U64(3), Some(Ordering::Greater)), (Scalar::U64(1), Scalar::I64(-20), Some(Ordering::Greater)), (Scalar::U64(3), Scalar::I64(-20), Some(Ordering::Greater)), (Scalar::U64(4), Scalar::I64(-20), Some(Ordering::Greater)), ( Scalar::U64(u64::MAX), Scalar::I64(-20), Some(Ordering::Greater), ), ( Scalar::U64(u64::MAX), Scalar::I64(1), Some(Ordering::Greater), ), ( Scalar::I64(-20), Scalar::U64(u64::MAX), Some(Ordering::Less), ), (Scalar::I64(1), Scalar::U64(u64::MAX), Some(Ordering::Less)), (Scalar::F64(1.0), Scalar::F64(1.0), Some(Ordering::Equal)), ]; for (a, b, exp) in cases { assert_eq!(a.partial_cmp(&b), exp, "Example {:?} <=> {:?} failed", a, b); } } }