feat: implement RLE methods for materialising
parent
c55dce3af5
commit
9a666fac00
|
@ -3,6 +3,7 @@ use crate::column::RowIDs;
|
||||||
use std::{
|
use std::{
|
||||||
cmp::Ordering,
|
cmp::Ordering,
|
||||||
fmt::{Debug, Display},
|
fmt::{Debug, Display},
|
||||||
|
iter,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const ENCODING_NAME: &str = "RLE";
|
pub const ENCODING_NAME: &str = "RLE";
|
||||||
|
@ -270,34 +271,131 @@ impl<T: PartialOrd + Debug + Copy> RLE<T> {
|
||||||
|
|
||||||
/// Returns the logical value present at the provided row id.
|
/// Returns the logical value present at the provided row id.
|
||||||
///
|
///
|
||||||
/// N.B right now this doesn't discern between an invalid row id and a NULL
|
/// TODO(edd): a sparse index on this can help with materialisation cost by
|
||||||
/// value at a valid location.
|
/// providing starting indexes into the in the run length collection.
|
||||||
pub fn value(&self, _row_id: u32) -> Option<T> {
|
pub fn value(&self, row_id: u32) -> Option<T> {
|
||||||
todo!()
|
assert!(
|
||||||
|
row_id < self.num_rows(),
|
||||||
|
"row_id {:?} out of bounds for {:?} rows",
|
||||||
|
row_id,
|
||||||
|
self.num_rows()
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut ordinal_offset = 0;
|
||||||
|
for (rl, v) in &self.run_lengths {
|
||||||
|
if ordinal_offset + rl > row_id {
|
||||||
|
// this run-length overlaps desired row id
|
||||||
|
return *v;
|
||||||
|
}
|
||||||
|
ordinal_offset += rl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we are guaranteed to find a value at the provided row_id because
|
||||||
|
// `row_id < num_rows`
|
||||||
|
unreachable!(
|
||||||
|
"could not find value at row ID {:?}. num_rows = {:?}",
|
||||||
|
row_id,
|
||||||
|
self.num_rows()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_row_ids_ordered(&self, row_ids: &[u32]) -> bool {
|
||||||
|
if row_ids.is_empty() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut last = row_ids[0];
|
||||||
|
for &row_id in row_ids.iter().skip(1) {
|
||||||
|
if row_id <= last {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
last = row_id;
|
||||||
|
}
|
||||||
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Materialises a vector of references to the decoded values in the
|
/// Materialises a vector of references to the decoded values in the
|
||||||
/// provided row ids.
|
/// provided ordered set of row ids.
|
||||||
///
|
///
|
||||||
/// NULL values are represented by None. It is the caller's responsibility
|
/// NULL values are represented by None.
|
||||||
/// to ensure row ids are a monotonically increasing set.
|
///
|
||||||
pub fn values<'a>(
|
/// # Panics
|
||||||
&'a self,
|
///
|
||||||
_row_ids: &[u32],
|
/// The behaviour of providing row IDs that are not an ordered set is
|
||||||
mut _dst: Vec<Option<&'a str>>,
|
/// undefined. `values` may panic if the provided row IDs are are not an
|
||||||
) -> Vec<Option<T>> {
|
/// ordered set in ascending order.
|
||||||
todo!()
|
///
|
||||||
|
/// Panics if the number of row IDs requested is more than the number of
|
||||||
|
/// rows in the column.
|
||||||
|
///
|
||||||
|
/// Panics if a requested row ID is out of bounds of the ordinal offset of
|
||||||
|
/// a logical value.
|
||||||
|
///
|
||||||
|
pub fn values(&self, row_ids: &[u32], mut dst: Vec<Option<T>>) -> Vec<Option<T>> {
|
||||||
|
assert!(
|
||||||
|
row_ids.len() < self.num_rows() as usize,
|
||||||
|
"more row_ids {:?} than rows {:?}",
|
||||||
|
row_ids.len(),
|
||||||
|
self.num_rows()
|
||||||
|
);
|
||||||
|
|
||||||
|
dst.clear();
|
||||||
|
dst.reserve(row_ids.len());
|
||||||
|
|
||||||
|
// Ensure row ids ordered
|
||||||
|
debug_assert!(self.check_row_ids_ordered(row_ids));
|
||||||
|
|
||||||
|
let mut curr_logical_row_id = 0;
|
||||||
|
let (mut curr_entry_rl, mut curr_value) = self.run_lengths[0];
|
||||||
|
|
||||||
|
let mut i = 1;
|
||||||
|
for &row_id in row_ids {
|
||||||
|
assert!(
|
||||||
|
row_id < self.num_rows(),
|
||||||
|
"row_id {:?} beyond max row {:?}",
|
||||||
|
row_id,
|
||||||
|
self.num_rows() - 1
|
||||||
|
);
|
||||||
|
|
||||||
|
while curr_logical_row_id + curr_entry_rl <= row_id {
|
||||||
|
// this encoded entry does not cover the row we need.
|
||||||
|
// move on to next entry
|
||||||
|
curr_logical_row_id += curr_entry_rl;
|
||||||
|
curr_entry_rl = self.run_lengths[i].0;
|
||||||
|
curr_value = self.run_lengths[i].1;
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// this encoded entry covers the row_id we want.
|
||||||
|
dst.push(curr_value);
|
||||||
|
|
||||||
|
curr_logical_row_id += 1; // move forwards a logical row
|
||||||
|
curr_entry_rl -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(row_ids.len(), dst.len());
|
||||||
|
dst
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns references to the logical (decoded) values for all the rows in
|
/// Returns references to the logical (decoded) values for all the rows in
|
||||||
/// the column.
|
/// the column.
|
||||||
///
|
///
|
||||||
/// NULL values are represented by None.
|
/// NULL values are represented by None.
|
||||||
pub fn all_values<'a>(&'a self, mut _dst: Vec<Option<&'a str>>) -> Vec<Option<T>> {
|
pub fn all_values(&self, mut dst: Vec<Option<T>>) -> Vec<Option<T>> {
|
||||||
todo!()
|
dst.clear();
|
||||||
|
dst.reserve(self.num_rows as usize);
|
||||||
|
|
||||||
|
for (rl, v) in &self.run_lengths {
|
||||||
|
dst.extend(iter::repeat(v).take(*rl as usize));
|
||||||
|
}
|
||||||
|
dst
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if a non-null value exists at any of the row ids.
|
/// Returns true if a non-null value exists at any of the row ids.
|
||||||
|
///
|
||||||
|
/// TODO(edd): this needs implementing when we push down NULL predicate
|
||||||
|
/// support.
|
||||||
pub fn has_non_null_value(&self, _row_ids: &[u32]) -> bool {
|
pub fn has_non_null_value(&self, _row_ids: &[u32]) -> bool {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
@ -423,13 +521,93 @@ mod test {
|
||||||
fn size() {}
|
fn size() {}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn value() {}
|
fn value() {
|
||||||
|
let mut enc = RLE::default();
|
||||||
|
enc.push_none();
|
||||||
|
enc.push_additional(Some(45), 3);
|
||||||
|
enc.push_additional(Some(90), 2);
|
||||||
|
enc.push(21);
|
||||||
|
|
||||||
|
assert_eq!(enc.value(0), None);
|
||||||
|
assert_eq!(enc.value(1), Some(45));
|
||||||
|
assert_eq!(enc.value(3), Some(45));
|
||||||
|
assert_eq!(enc.value(4), Some(90));
|
||||||
|
assert_eq!(enc.value(6), Some(21));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn values() {}
|
fn check_row_ids_ordered() {
|
||||||
|
let cases = vec![
|
||||||
|
(&[0, 1, 2][..], true),
|
||||||
|
(&[0], true),
|
||||||
|
(&[], true),
|
||||||
|
(&[0, 2], true),
|
||||||
|
(&[1, 2], true),
|
||||||
|
(&[0, 0, 2], false),
|
||||||
|
(&[0, 1, 0], false),
|
||||||
|
(&[2, 1, 0], false),
|
||||||
|
(&[1, 1], false),
|
||||||
|
(&[1, 2, 2], false),
|
||||||
|
];
|
||||||
|
|
||||||
|
let enc: RLE<i16> = RLE::default();
|
||||||
|
|
||||||
|
for (input, exp) in cases {
|
||||||
|
assert_eq!(enc.check_row_ids_ordered(input), exp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn all_values() {}
|
fn values() {
|
||||||
|
let mut enc = RLE::default();
|
||||||
|
enc.push_none();
|
||||||
|
enc.push_additional(Some(45), 3);
|
||||||
|
enc.push_additional(Some(90), 2);
|
||||||
|
enc.push(21);
|
||||||
|
|
||||||
|
// ensure buffer cleared by populating it
|
||||||
|
assert_eq!(
|
||||||
|
enc.values(&[0, 1, 2], vec![Some(33)]),
|
||||||
|
vec![None, Some(45), Some(45)]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
enc.values(&[0, 1, 2, 3, 4], vec![]),
|
||||||
|
vec![None, Some(45), Some(45), Some(45), Some(90)]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(enc.values(&[2, 5], vec![]), vec![Some(45), Some(90)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn all_values() {
|
||||||
|
let mut enc = RLE::default();
|
||||||
|
// ensure buffer cleared by populating it
|
||||||
|
assert!(enc.all_values(vec![Some(33)]).is_empty());
|
||||||
|
|
||||||
|
enc.push_additional(Some(45), 3);
|
||||||
|
enc.push_additional(Some(90), 2);
|
||||||
|
enc.push_additional(None, 2);
|
||||||
|
enc.push(21);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
enc.all_values(vec![None, Some(99)]),
|
||||||
|
vec![
|
||||||
|
Some(45),
|
||||||
|
Some(45),
|
||||||
|
Some(45),
|
||||||
|
Some(90),
|
||||||
|
Some(90),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
Some(21)
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut enc: RLE<u8> = RLE::default();
|
||||||
|
enc.push_none();
|
||||||
|
assert_eq!(enc.all_values(vec![]), vec![None]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn row_ids_filter_eq() {}
|
fn row_ids_filter_eq() {}
|
||||||
|
|
Loading…
Reference in New Issue