Merge pull request #371 from influxdata/er/feat/dict-encoding
feat: add dictionary RLE encoding to Segment Storepull/24376/head
commit
daf89c7d22
|
@ -0,0 +1,269 @@
|
|||
use std::mem::size_of;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use rand::prelude::*;
|
||||
|
||||
use delorean_arrow::arrow::datatypes::*;
|
||||
use delorean_segment_store::column::fixed::Fixed;
|
||||
use delorean_segment_store::column::fixed_null::FixedNull;
|
||||
|
||||
const ROWS: [usize; 5] = [10, 100, 1_000, 10_000, 60_000];
|
||||
const CHUNKS: [Chunks; 4] = [
|
||||
Chunks::All,
|
||||
Chunks::Even,
|
||||
Chunks::ManySmall,
|
||||
Chunks::RandomTenPercent,
|
||||
];
|
||||
|
||||
const PHYSICAL_TYPES: [PhysicalType; 3] = [PhysicalType::I64, PhysicalType::I32, PhysicalType::I16];
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Chunks {
|
||||
All, // sum up the entire column
|
||||
Even, // sum up the even rows
|
||||
ManySmall, // sum up chunks of 10 values
|
||||
RandomTenPercent, // sum up random 10% of values
|
||||
}
|
||||
|
||||
enum EncType {
|
||||
Fixed,
|
||||
Arrow,
|
||||
}
|
||||
|
||||
enum PhysicalType {
|
||||
I64,
|
||||
I32,
|
||||
I16,
|
||||
}
|
||||
|
||||
fn encoding_sum(c: &mut Criterion) {
|
||||
benchmark_plain_sum(
|
||||
c,
|
||||
"encoding_fixed_sum",
|
||||
EncType::Fixed,
|
||||
&ROWS,
|
||||
&CHUNKS,
|
||||
&PHYSICAL_TYPES,
|
||||
);
|
||||
benchmark_plain_sum(
|
||||
c,
|
||||
"encoding_arrow_sum",
|
||||
EncType::Arrow,
|
||||
&ROWS,
|
||||
&CHUNKS,
|
||||
&PHYSICAL_TYPES,
|
||||
);
|
||||
}
|
||||
|
||||
fn benchmark_plain_sum(
|
||||
c: &mut Criterion,
|
||||
benchmark_group_name: &str,
|
||||
enc_type: EncType,
|
||||
row_size: &[usize],
|
||||
chunks: &[Chunks],
|
||||
physical_type: &[PhysicalType],
|
||||
) {
|
||||
let mut group = c.benchmark_group(benchmark_group_name);
|
||||
for &num_rows in row_size {
|
||||
for chunk in chunks {
|
||||
for pt in physical_type {
|
||||
// Encoded incrementing values.
|
||||
|
||||
let input: Vec<usize>;
|
||||
match chunk {
|
||||
Chunks::All => input = (0..num_rows).collect(),
|
||||
Chunks::Even => input = gen_even_chunk(num_rows),
|
||||
Chunks::ManySmall => input = gen_many_small_chunk(num_rows),
|
||||
Chunks::RandomTenPercent => input = gen_random_10_percent(num_rows),
|
||||
}
|
||||
|
||||
match pt {
|
||||
PhysicalType::I64 => {
|
||||
group
|
||||
.throughput(Throughput::Bytes((input.len() * size_of::<i64>()) as u64));
|
||||
|
||||
match enc_type {
|
||||
EncType::Fixed => {
|
||||
let encoding = Fixed::<i64>::from(
|
||||
(0..num_rows as i64).collect::<Vec<i64>>().as_slice(),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!(
|
||||
"{:?}_{:?}_i64",
|
||||
num_rows, chunk
|
||||
)),
|
||||
&input,
|
||||
|b, input| {
|
||||
b.iter(|| {
|
||||
// do work
|
||||
let _ = encoding.sum::<i64>(&input);
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
EncType::Arrow => {
|
||||
let encoding = FixedNull::<Int64Type>::from(
|
||||
(0..num_rows as i64).collect::<Vec<i64>>().as_slice(),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!(
|
||||
"{:?}_{:?}_i64",
|
||||
num_rows, chunk
|
||||
)),
|
||||
&input,
|
||||
|b, input| {
|
||||
b.iter(|| {
|
||||
// do work
|
||||
let _ = encoding.sum(&input);
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
PhysicalType::I32 => {
|
||||
group
|
||||
.throughput(Throughput::Bytes((input.len() * size_of::<i64>()) as u64));
|
||||
|
||||
match enc_type {
|
||||
EncType::Fixed => {
|
||||
let encoding = Fixed::<i32>::from(
|
||||
(0..num_rows as i32).collect::<Vec<i32>>().as_slice(),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!(
|
||||
"{:?}_{:?}_i32",
|
||||
num_rows, chunk
|
||||
)),
|
||||
&input,
|
||||
|b, input| {
|
||||
b.iter(|| {
|
||||
// do work
|
||||
let _ = encoding.sum::<i32>(&input);
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
EncType::Arrow => {
|
||||
let encoding = FixedNull::<Int32Type>::from(
|
||||
(0..num_rows as i32).collect::<Vec<i32>>().as_slice(),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!(
|
||||
"{:?}_{:?}_i32",
|
||||
num_rows, chunk
|
||||
)),
|
||||
&input,
|
||||
|b, input| {
|
||||
b.iter(|| {
|
||||
// do work
|
||||
let _ = encoding.sum(&input);
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
PhysicalType::I16 => {
|
||||
group
|
||||
.throughput(Throughput::Bytes((input.len() * size_of::<i64>()) as u64));
|
||||
|
||||
match enc_type {
|
||||
EncType::Fixed => {
|
||||
let encoding = Fixed::<i16>::from(
|
||||
(0..num_rows as i16).collect::<Vec<i16>>().as_slice(),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!(
|
||||
"{:?}_{:?}_i16",
|
||||
num_rows, chunk
|
||||
)),
|
||||
&input,
|
||||
|b, input| {
|
||||
b.iter(|| {
|
||||
// do work
|
||||
let _ = encoding.sum::<i16>(&input);
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
EncType::Arrow => {
|
||||
let encoding = FixedNull::<Int16Type>::from(
|
||||
(0..num_rows as i16).collect::<Vec<i16>>().as_slice(),
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(format!(
|
||||
"{:?}_{:?}_i16",
|
||||
num_rows, chunk
|
||||
)),
|
||||
&input,
|
||||
|b, input| {
|
||||
b.iter(|| {
|
||||
// do work
|
||||
let _ = encoding.sum(&input);
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// results in about 50% rows being requested.
|
||||
fn gen_even_chunk(rows: usize) -> Vec<usize> {
|
||||
(0..rows).filter(|x| x % 2 == 0).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
// generate small sequences of 3 rows periodically. This leads to about 34% of
|
||||
// rows being requested.
|
||||
fn gen_many_small_chunk(rows: usize) -> Vec<usize> {
|
||||
let mut input = vec![];
|
||||
let mut emit_chunk = false;
|
||||
let mut chunk_size = 0;
|
||||
|
||||
for i in 0..rows {
|
||||
if i % 9 == 0 {
|
||||
emit_chunk = true;
|
||||
}
|
||||
|
||||
if emit_chunk {
|
||||
input.push(i);
|
||||
chunk_size += 1;
|
||||
}
|
||||
|
||||
if chunk_size == 3 {
|
||||
chunk_size = 0;
|
||||
emit_chunk = false;
|
||||
}
|
||||
}
|
||||
|
||||
input
|
||||
}
|
||||
|
||||
// generate random 10% sequence.
|
||||
fn gen_random_10_percent(rows: usize) -> Vec<usize> {
|
||||
let mut rnd = thread_rng();
|
||||
let mut input = vec![];
|
||||
|
||||
for i in 0..rows {
|
||||
if rnd.gen::<f64>() < 0.1 {
|
||||
input.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
input
|
||||
}
|
||||
|
||||
criterion_group!(benches, encoding_sum,);
|
||||
criterion_main!(benches);
|
|
@ -1,7 +1,10 @@
|
|||
pub mod cmp;
|
||||
pub mod dictionary;
|
||||
pub mod fixed;
|
||||
pub mod fixed_null;
|
||||
|
||||
use croaring::Bitmap;
|
||||
|
||||
use delorean_arrow::arrow;
|
||||
|
||||
/// The possible logical types that column values can have. All values in a
|
||||
|
@ -104,3 +107,41 @@ pub enum Values {
|
|||
// Arbitrary byte arrays
|
||||
ByteArray(arrow::array::UInt8Array),
|
||||
}
|
||||
|
||||
/// Represents vectors of row IDs, which are usually used for intermediate
|
||||
/// results as a method of late materialisation.
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum RowIDs {
|
||||
Bitmap(Bitmap),
|
||||
Vector(Vec<u32>),
|
||||
}
|
||||
|
||||
impl RowIDs {
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
RowIDs::Bitmap(ids) => ids.cardinality() as usize,
|
||||
RowIDs::Vector(ids) => ids.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
RowIDs::Bitmap(ids) => ids.is_empty(),
|
||||
RowIDs::Vector(ids) => ids.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
match self {
|
||||
RowIDs::Bitmap(ids) => ids.clear(),
|
||||
RowIDs::Vector(ids) => ids.clear(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_range(&mut self, from: u32, to: u32) {
|
||||
match self {
|
||||
RowIDs::Bitmap(ids) => ids.add_range(from as u64..to as u64),
|
||||
RowIDs::Vector(ids) => ids.extend(from..to),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,969 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::convert::From;
|
||||
use std::iter;
|
||||
|
||||
use croaring::Bitmap;
|
||||
|
||||
use delorean_arrow::arrow::array::{Array, StringArray};
|
||||
|
||||
use crate::column::{cmp, RowIDs};
|
||||
|
||||
// `RLE` is a run-length encoding for dictionary columns, where all dictionary
|
||||
// entries are utf-8 valid strings.
|
||||
#[derive(Default)]
|
||||
pub struct RLE {
|
||||
// TODO(edd): revisit choice of storing owned string versus references.
|
||||
|
||||
// The mapping between an entry and its assigned index.
|
||||
entry_index: BTreeMap<Option<String>, u32>,
|
||||
|
||||
// The mapping between an index and its entry.
|
||||
index_entries: Vec<Option<String>>,
|
||||
|
||||
// The set of rows that belong to each distinct value in the dictionary.
|
||||
// This allows essentially constant time grouping of rows on the column by
|
||||
// value.
|
||||
index_row_ids: BTreeMap<u32, Bitmap>,
|
||||
|
||||
// stores tuples where each pair refers to a dictionary entry and the number
|
||||
// of times the entry repeats.
|
||||
run_lengths: Vec<(u32, u32)>,
|
||||
|
||||
num_rows: u32,
|
||||
}
|
||||
|
||||
impl RLE {
|
||||
/// Adds the provided string value to the encoded data. It is the caller's
|
||||
/// responsibility to ensure that the dictionary encoded remains sorted.
|
||||
pub fn push(&mut self, v: String) {
|
||||
self.push_additional(Some(v), 1);
|
||||
}
|
||||
|
||||
/// Adds a NULL value to the encoded data. It is the caller's
|
||||
/// responsibility to ensure that the dictionary encoded remains sorted.
|
||||
pub fn push_none(&mut self) {
|
||||
self.push_additional(None, 1);
|
||||
}
|
||||
|
||||
/// Adds additional repetitions of the provided value to the encoded data.
|
||||
/// It is the caller's responsibility to ensure that the dictionary encoded
|
||||
/// remains sorted.
|
||||
pub fn push_additional(&mut self, v: Option<String>, additional: u32) {
|
||||
let idx = self.entry_index.get(&v);
|
||||
match idx {
|
||||
Some(idx) => {
|
||||
if let Some((last_idx, rl)) = self.run_lengths.last_mut() {
|
||||
if last_idx == idx {
|
||||
// update the existing run-length
|
||||
*rl += additional;
|
||||
} else {
|
||||
// start a new run-length
|
||||
self.run_lengths.push((*idx, additional));
|
||||
}
|
||||
self.index_row_ids
|
||||
.get_mut(&(*idx as u32))
|
||||
.unwrap()
|
||||
.add_range(self.num_rows as u64..self.num_rows as u64 + additional as u64);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// New dictionary entry.
|
||||
let idx = self.index_entries.len() as u32;
|
||||
if idx > 0 {
|
||||
match (&self.index_entries[idx as usize - 1], &v) {
|
||||
(None, Some(_)) => panic!("out of order dictionary insertion"),
|
||||
(Some(_), None) => {}
|
||||
(Some(a), Some(b)) => assert!(a < b),
|
||||
(_, _) => unreachable!("multiple None values"),
|
||||
}
|
||||
}
|
||||
self.index_entries.push(v.clone());
|
||||
|
||||
self.entry_index.insert(v, idx);
|
||||
self.index_row_ids.insert(idx, Bitmap::create());
|
||||
|
||||
self.run_lengths.push((idx, additional));
|
||||
self.index_row_ids
|
||||
.get_mut(&(idx as u32))
|
||||
.unwrap()
|
||||
.add_range(self.num_rows as u64..self.num_rows as u64 + additional as u64);
|
||||
}
|
||||
}
|
||||
self.num_rows += additional;
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// ---- Methods for getting row ids from values.
|
||||
//
|
||||
//
|
||||
|
||||
/// Populates the provided destination container with the row ids satisfying
|
||||
/// the provided predicate.
|
||||
pub fn row_ids_filter(&self, value: Option<String>, op: cmp::Operator, dst: RowIDs) -> RowIDs {
|
||||
match op {
|
||||
cmp::Operator::Equal | cmp::Operator::NotEqual => self.row_ids_equal(value, op, dst),
|
||||
cmp::Operator::LT | cmp::Operator::LTE | cmp::Operator::GT | cmp::Operator::GTE => {
|
||||
self.row_ids_cmp(value, op, dst)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finds row ids based on = or != operator.
|
||||
fn row_ids_equal(&self, value: Option<String>, op: cmp::Operator, mut dst: RowIDs) -> RowIDs {
|
||||
dst.clear();
|
||||
let include = match op {
|
||||
cmp::Operator::Equal => true,
|
||||
cmp::Operator::NotEqual => false,
|
||||
_ => unreachable!("invalid operator"),
|
||||
};
|
||||
|
||||
if let Some(encoded_id) = self.entry_index.get(&value) {
|
||||
let mut index: u32 = 0;
|
||||
for (other_encoded_id, other_rl) in &self.run_lengths {
|
||||
let start = index;
|
||||
index += *other_rl;
|
||||
if (other_encoded_id == encoded_id) == include {
|
||||
dst.add_range(start, index)
|
||||
}
|
||||
}
|
||||
} else if let cmp::Operator::NotEqual = op {
|
||||
// special case - the column does not contain the provided
|
||||
// value and the operator is != so we need to return all
|
||||
// row ids.
|
||||
dst.add_range(0, self.num_rows)
|
||||
}
|
||||
|
||||
dst
|
||||
}
|
||||
|
||||
// Finds row ids based on <, <=, > or >= operator.
|
||||
fn row_ids_cmp(&self, value: Option<String>, op: cmp::Operator, mut dst: RowIDs) -> RowIDs {
|
||||
dst.clear();
|
||||
|
||||
// happy path - the value exists in the column
|
||||
if let Some(encoded_id) = self.entry_index.get(&value) {
|
||||
let cmp = match op {
|
||||
cmp::Operator::GT => PartialOrd::gt,
|
||||
cmp::Operator::GTE => PartialOrd::ge,
|
||||
cmp::Operator::LT => PartialOrd::lt,
|
||||
cmp::Operator::LTE => PartialOrd::le,
|
||||
_ => unreachable!("operator not supported"),
|
||||
};
|
||||
|
||||
let mut index: u32 = 0; // current position in the column.
|
||||
for (other_encoded_id, other_rl) in &self.run_lengths {
|
||||
let start = index;
|
||||
index += *other_rl;
|
||||
if cmp(other_encoded_id, encoded_id) {
|
||||
dst.add_range(start, index)
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
match op {
|
||||
cmp::Operator::GT | cmp::Operator::GTE => {
|
||||
// find the first decoded value that satisfies the predicate.
|
||||
for (other, other_encoded_id) in &self.entry_index {
|
||||
if other > &value {
|
||||
// change filter from either `x > value` or `x >= value` to `x >= other`
|
||||
return self.row_ids_cmp(other.clone(), cmp::Operator::GTE, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
cmp::Operator::LT | cmp::Operator::LTE => {
|
||||
// find the first decoded value that satisfies the predicate.
|
||||
// Note iteration is in reverse
|
||||
for (other, other_encoded_id) in self.entry_index.iter().rev() {
|
||||
if other < &value {
|
||||
// change filter from either `x < value` or `x <= value` to `x <= other`
|
||||
return self.row_ids_cmp(other.clone(), cmp::Operator::LTE, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!("operator not supported"),
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
// The set of row ids for each distinct value in the column.
|
||||
pub fn group_row_ids(&self) -> &BTreeMap<u32, Bitmap> {
|
||||
&self.index_row_ids
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// ---- Methods for getting materialised values.
|
||||
//
|
||||
//
|
||||
|
||||
pub fn dictionary(&self) -> &[Option<String>] {
|
||||
&self.index_entries
|
||||
}
|
||||
|
||||
/// Returns the logical value present at the provided row id.
|
||||
///
|
||||
/// N.B right now this doesn't discern between an invalid row id and a NULL
|
||||
/// value at a valid location.
|
||||
pub fn value(&self, row_id: u32) -> &Option<String> {
|
||||
if row_id < self.num_rows {
|
||||
let mut total = 0;
|
||||
for (encoded_id, rl) in &self.run_lengths {
|
||||
if total + rl > row_id {
|
||||
// this run-length overlaps desired row id
|
||||
return &self.index_entries[*encoded_id as usize];
|
||||
}
|
||||
total += rl;
|
||||
}
|
||||
}
|
||||
&None
|
||||
}
|
||||
|
||||
/// Materialises the decoded value belonging to the provided encoded id.
|
||||
///
|
||||
/// Panics if there is no decoded value for the provided id
|
||||
pub fn decode_id(&self, encoded_id: u32) -> Option<String> {
|
||||
self.index_entries[encoded_id as usize].clone()
|
||||
}
|
||||
|
||||
/// Materialises a vector of references to the decoded values in the
|
||||
/// provided row ids.
|
||||
///
|
||||
/// NULL values are represented by None. It is the caller's responsibility
|
||||
/// to ensure row ids are a monotonically increasing set.
|
||||
pub fn values<'a>(
|
||||
&'a self,
|
||||
row_ids: &[u32],
|
||||
mut dst: Vec<&'a Option<String>>,
|
||||
) -> Vec<&'a Option<String>> {
|
||||
dst.clear();
|
||||
dst.reserve(row_ids.len());
|
||||
|
||||
let mut curr_logical_row_id = 0;
|
||||
|
||||
let (mut curr_entry_id, mut curr_entry_rl) = self.run_lengths[0];
|
||||
|
||||
let mut i = 1;
|
||||
for row_id in row_ids {
|
||||
if row_id >= &self.num_rows {
|
||||
return dst; // row ids beyond length of column
|
||||
}
|
||||
|
||||
while curr_logical_row_id + curr_entry_rl <= *row_id {
|
||||
// this encoded entry does not cover the row we need.
|
||||
// move on to next entry
|
||||
curr_logical_row_id += curr_entry_rl;
|
||||
curr_entry_id = self.run_lengths[i].0;
|
||||
curr_entry_rl = self.run_lengths[i].1;
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// this encoded entry covers the row_id we want.
|
||||
// let value = &self.index_entries[curr_entry_id as usize];
|
||||
dst.push(&self.index_entries[curr_entry_id as usize]);
|
||||
curr_logical_row_id += 1;
|
||||
curr_entry_rl -= 1;
|
||||
}
|
||||
|
||||
assert_eq!(row_ids.len(), dst.len());
|
||||
dst
|
||||
}
|
||||
|
||||
/// Returns references to the logical (decoded) values for all the rows in
|
||||
/// the column.
|
||||
///
|
||||
/// NULL values are represented by None.
|
||||
///
|
||||
pub fn all_values<'a>(
|
||||
&'a mut self,
|
||||
mut dst: Vec<&'a Option<String>>,
|
||||
) -> Vec<&'a Option<String>> {
|
||||
dst.clear();
|
||||
dst.reserve(self.num_rows as usize);
|
||||
|
||||
for (idx, rl) in &self.run_lengths {
|
||||
let v = &self.index_entries[*idx as usize];
|
||||
dst.extend(iter::repeat(v).take(*rl as usize));
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
/// Returns references to the unique set of values encoded at each of the
|
||||
/// provided ids.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure row ids are a monotonically
|
||||
/// increasing set.
|
||||
pub fn distinct_values<'a>(
|
||||
&'a self,
|
||||
row_ids: &[u32],
|
||||
mut dst: BTreeSet<&'a String>,
|
||||
) -> BTreeSet<&'a String> {
|
||||
// TODO(edd): Perf... We can improve on this if we know the column is
|
||||
// totally ordered.
|
||||
dst.clear();
|
||||
|
||||
// Used to mark off when a decoded value has been added to the result
|
||||
// set. TODO(perf) - this might benefit from being pooled somehow.
|
||||
let mut encoded_values = Vec::with_capacity(self.index_entries.len());
|
||||
encoded_values.resize(self.index_entries.len(), false);
|
||||
|
||||
let mut found = 0;
|
||||
if let Some(i) = self.entry_index.get(&None) {
|
||||
// the encoding contains NULL values, but we don't return those as
|
||||
// distinct values. So we will mark them.
|
||||
encoded_values[*i as usize] = true;
|
||||
found += 1;
|
||||
}
|
||||
|
||||
let mut curr_logical_row_id = 0;
|
||||
let (mut curr_entry_id, mut curr_entry_rl) = self.run_lengths[0];
|
||||
|
||||
let mut i = 1;
|
||||
'by_row: for row_id in row_ids {
|
||||
if row_id >= &self.num_rows {
|
||||
return dst; // rows beyond the column size
|
||||
}
|
||||
|
||||
while curr_logical_row_id + curr_entry_rl <= *row_id {
|
||||
// this encoded entry does not cover the row we need.
|
||||
// move on to next entry
|
||||
curr_logical_row_id += curr_entry_rl;
|
||||
curr_entry_id = self.run_lengths[i].0;
|
||||
curr_entry_rl = self.run_lengths[i].1;
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// encoded value not already in result set.
|
||||
if !encoded_values[curr_entry_id as usize] {
|
||||
// annoying unwrap. We know that there can't be None here as
|
||||
// we removed that at the top of the method.
|
||||
dst.insert(self.index_entries[curr_entry_id as usize].as_ref().unwrap());
|
||||
encoded_values[curr_entry_id as usize] = true;
|
||||
found += 1;
|
||||
}
|
||||
|
||||
if found == encoded_values.len() {
|
||||
// all distinct values have been read
|
||||
break 'by_row;
|
||||
}
|
||||
|
||||
curr_logical_row_id += 1;
|
||||
curr_entry_rl -= 1;
|
||||
}
|
||||
|
||||
assert!(dst.len() <= self.index_entries.len());
|
||||
dst
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// ---- Methods for getting encoded values directly, typically to be used
|
||||
// as part of group keys.
|
||||
//
|
||||
//
|
||||
|
||||
/// Return the raw encoded values for the provided logical row ids.
|
||||
/// Encoded values for NULL values are included.
|
||||
///
|
||||
pub fn encoded_values(&self, row_ids: &[u32], mut dst: Vec<u32>) -> Vec<u32> {
|
||||
dst.clear();
|
||||
dst.reserve(row_ids.len());
|
||||
|
||||
let mut curr_logical_row_id = 0;
|
||||
|
||||
let (mut curr_entry_id, mut curr_entry_rl) = self.run_lengths[0];
|
||||
|
||||
let mut i = 1;
|
||||
for row_id in row_ids {
|
||||
while curr_logical_row_id + curr_entry_rl <= *row_id {
|
||||
// this encoded entry does not cover the row we need.
|
||||
// move on to next entry
|
||||
curr_logical_row_id += curr_entry_rl;
|
||||
curr_entry_id = self.run_lengths[i].0;
|
||||
curr_entry_rl = self.run_lengths[i].1;
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// this entry covers the row_id we want.
|
||||
dst.push(curr_entry_id);
|
||||
curr_logical_row_id += 1;
|
||||
curr_entry_rl -= 1;
|
||||
}
|
||||
|
||||
assert_eq!(row_ids.len(), dst.len());
|
||||
dst
|
||||
}
|
||||
|
||||
/// Returns all encoded values for the column including the encoded value
|
||||
/// for any NULL values.
|
||||
pub fn all_encoded_values(&self, mut dst: Vec<u32>) -> Vec<u32> {
|
||||
dst.clear();
|
||||
dst.reserve(self.num_rows as usize);
|
||||
|
||||
for (idx, rl) in &self.run_lengths {
|
||||
dst.extend(iter::repeat(*idx).take(*rl as usize));
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// ---- Methods for optimising schema exploration.
|
||||
//
|
||||
//
|
||||
|
||||
/// Efficiently determines if this column contains non-null values that
|
||||
/// differ from the provided set of values.
|
||||
///
|
||||
/// Informally, this method provides an efficient way of answering "is it
|
||||
/// worth spending time reading this column for values or do I already have
|
||||
/// all the values in a set".
|
||||
///
|
||||
/// More formally, this method returns the relative complement of this
|
||||
/// column's values in the provided set of values.
|
||||
///
|
||||
/// This method would be useful when the same column is being read across
|
||||
/// many segments, and one wants to determine to the total distinct set of
|
||||
/// values. By exposing the current result set to each column (as an
|
||||
/// argument to `contains_other_values`) columns can be short-circuited when
|
||||
/// they only contain values that have already been discovered.
|
||||
///
|
||||
pub fn contains_other_values(&self, values: &BTreeSet<&String>) -> bool {
|
||||
let mut encoded_values = self.index_entries.len();
|
||||
if self.entry_index.contains_key(&None) {
|
||||
encoded_values -= 1;
|
||||
}
|
||||
|
||||
if encoded_values > values.len() {
|
||||
return true;
|
||||
}
|
||||
|
||||
for key in self.entry_index.keys() {
|
||||
if let Some(key) = key {
|
||||
if !values.contains(key) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// skip NULL entry
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Determines if the column contains at least one non-null value at
|
||||
/// any of the provided row ids.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure row ids are a monotonically
|
||||
/// increasing set.
|
||||
pub fn has_non_null_value(&self, row_ids: &[u32]) -> bool {
|
||||
match self.entry_index.get(&None) {
|
||||
Some(&id) => self.find_non_null_value(id, row_ids),
|
||||
None => {
|
||||
// There are no NULL entries in this column so just find a row id
|
||||
// that falls on any row in the column.
|
||||
for &id in row_ids {
|
||||
if id < self.num_rows {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if there exists an encoded non-null value at any of the row
|
||||
// ids.
|
||||
fn find_non_null_value(&self, null_encoded_id: u32, row_ids: &[u32]) -> bool {
|
||||
let mut curr_logical_row_id = 0;
|
||||
|
||||
let (mut curr_encoded_id, mut curr_entry_rl) = self.run_lengths[0];
|
||||
|
||||
let mut i = 1;
|
||||
for &row_id in row_ids {
|
||||
if row_id >= self.num_rows {
|
||||
return false; // all other row ids beyond column.
|
||||
}
|
||||
|
||||
while curr_logical_row_id + curr_entry_rl <= row_id {
|
||||
// this encoded entry does not cover the row we need.
|
||||
// move on to next encoded id
|
||||
curr_logical_row_id += curr_entry_rl;
|
||||
curr_encoded_id = self.run_lengths[i].0;
|
||||
curr_entry_rl = self.run_lengths[i].1;
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// this entry covers the row_id we want if it points to a non-null value.
|
||||
if curr_encoded_id != null_encoded_id {
|
||||
return true;
|
||||
}
|
||||
curr_logical_row_id += 1;
|
||||
curr_entry_rl -= 1;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Vec<&str>> for RLE {
|
||||
fn from(vec: Vec<&str>) -> Self {
|
||||
let mut drle = Self::default();
|
||||
for v in vec {
|
||||
drle.push(v.to_string());
|
||||
}
|
||||
drle
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Vec<String>> for RLE {
|
||||
fn from(vec: Vec<String>) -> Self {
|
||||
let mut drle = Self::default();
|
||||
for v in vec {
|
||||
drle.push(v);
|
||||
}
|
||||
drle
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Vec<Option<&str>>> for RLE {
|
||||
fn from(vec: Vec<Option<&str>>) -> Self {
|
||||
let mut drle = Self::default();
|
||||
for v in vec {
|
||||
match v {
|
||||
Some(x) => drle.push(x.to_string()),
|
||||
None => drle.push_none(),
|
||||
}
|
||||
}
|
||||
drle
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Vec<Option<String>>> for RLE {
|
||||
fn from(vec: Vec<Option<String>>) -> Self {
|
||||
let mut drle = Self::default();
|
||||
for v in vec {
|
||||
match v {
|
||||
Some(x) => drle.push(x),
|
||||
None => drle.push_none(),
|
||||
}
|
||||
}
|
||||
drle
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<StringArray> for RLE {
|
||||
fn from(arr: StringArray) -> Self {
|
||||
let mut drle = Self::default();
|
||||
for i in 0..arr.len() {
|
||||
if arr.is_null(i) {
|
||||
drle.push_none();
|
||||
} else {
|
||||
drle.push(arr.value(i).to_string());
|
||||
}
|
||||
}
|
||||
drle
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RLE {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"[RLE] rows: {:?} dict entries: {}, runs: {} ",
|
||||
self.num_rows,
|
||||
self.index_entries.len(),
|
||||
self.run_lengths.len()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use crate::column::{cmp, RowIDs};
|
||||
|
||||
#[test]
|
||||
fn rle_push() {
|
||||
let mut drle = super::RLE::from(vec!["hello", "hello", "hello", "hello"]);
|
||||
drle.push_additional(Some("hello".to_string()), 1);
|
||||
drle.push("world".to_string());
|
||||
|
||||
assert_eq!(
|
||||
drle.all_values(vec![]),
|
||||
[
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("world".to_string()),
|
||||
]
|
||||
);
|
||||
|
||||
drle.push_additional(Some("zoo".to_string()), 3);
|
||||
drle.push_none();
|
||||
assert_eq!(
|
||||
drle.all_values(vec![]),
|
||||
[
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("hello".to_string()),
|
||||
&Some("world".to_string()),
|
||||
&Some("zoo".to_string()),
|
||||
&Some("zoo".to_string()),
|
||||
&Some("zoo".to_string()),
|
||||
&None,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn rle_push_none_first() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_none();
|
||||
drle.push_additional(Some("hello".to_string()), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn rle_push_wrong_order() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push("b".to_string());
|
||||
drle.push("a".to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_values() {
|
||||
let mut drle = super::RLE::from(vec!["hello", "zoo"]);
|
||||
|
||||
let zoo = Some("zoo".to_string());
|
||||
let dst = vec![&zoo, &zoo, &zoo, &zoo];
|
||||
let got = drle.all_values(dst);
|
||||
|
||||
assert_eq!(got, [&Some("hello".to_string()), &Some("zoo".to_string()),]);
|
||||
assert_eq!(got.capacity(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_equal() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 1);
|
||||
drle.push_additional(Some("east".to_string()), 5);
|
||||
drle.push_additional(Some("south".to_string()), 2);
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("east".to_string()),
|
||||
cmp::Operator::Equal,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 4, 5, 6, 7, 8]));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("south".to_string()),
|
||||
cmp::Operator::Equal,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector(vec![9, 10]));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("foo".to_string()),
|
||||
cmp::Operator::Equal,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert!(ids.is_empty());
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("foo".to_string()),
|
||||
cmp::Operator::NotEqual,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector((0..11).collect::<Vec<_>>()));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("east".to_string()),
|
||||
cmp::Operator::NotEqual,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector(vec![3, 9, 10]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_filter_cmp() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3); // 0,1,2
|
||||
drle.push_additional(Some("north".to_string()), 1); // 3
|
||||
drle.push_additional(Some("east".to_string()), 5); // 4,5,6,7,8
|
||||
drle.push_additional(Some("south".to_string()), 2); // 9,10
|
||||
drle.push_additional(Some("west".to_string()), 1); // 11
|
||||
drle.push_additional(Some("north".to_string()), 1); // 12
|
||||
drle.push_additional(Some("west".to_string()), 5); // 13,14,15,16,17
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("east".to_string()),
|
||||
cmp::Operator::LTE,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 4, 5, 6, 7, 8]));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("east".to_string()),
|
||||
cmp::Operator::LT,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert!(ids.is_empty());
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("north".to_string()),
|
||||
cmp::Operator::GT,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector(vec![9, 10, 11, 13, 14, 15, 16, 17]));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("north".to_string()),
|
||||
cmp::Operator::GTE,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(
|
||||
ids,
|
||||
RowIDs::Vector(vec![3, 9, 10, 11, 12, 13, 14, 15, 16, 17])
|
||||
);
|
||||
|
||||
// The encoding also supports comparisons on values that don't directly exist in the column.
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("abba".to_string()),
|
||||
cmp::Operator::GT,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector((0..18).collect::<Vec<u32>>()));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("east1".to_string()),
|
||||
cmp::Operator::GT,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(
|
||||
ids,
|
||||
RowIDs::Vector(vec![3, 9, 10, 11, 12, 13, 14, 15, 16, 17])
|
||||
);
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("east1".to_string()),
|
||||
cmp::Operator::GTE,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(
|
||||
ids,
|
||||
RowIDs::Vector(vec![3, 9, 10, 11, 12, 13, 14, 15, 16, 17])
|
||||
);
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("east1".to_string()),
|
||||
cmp::Operator::LTE,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 4, 5, 6, 7, 8]));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("region".to_string()),
|
||||
cmp::Operator::LT,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 12]));
|
||||
|
||||
let ids = drle.row_ids_filter(
|
||||
Some("zoo".to_string()),
|
||||
cmp::Operator::LTE,
|
||||
RowIDs::Vector(vec![]),
|
||||
);
|
||||
assert_eq!(ids, RowIDs::Vector((0..18).collect::<Vec<u32>>()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 1);
|
||||
drle.push_additional(Some("east".to_string()), 5);
|
||||
drle.push_additional(Some("south".to_string()), 2);
|
||||
|
||||
assert_eq!(drle.value(3), &Some("north".to_string()));
|
||||
assert_eq!(drle.value(0), &Some("east".to_string()));
|
||||
assert_eq!(drle.value(10), &Some("south".to_string()));
|
||||
|
||||
assert_eq!(drle.value(22), &None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn values() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 1);
|
||||
drle.push_additional(Some("east".to_string()), 5);
|
||||
drle.push_additional(Some("south".to_string()), 2);
|
||||
drle.push_none();
|
||||
|
||||
let mut dst = Vec::with_capacity(1000);
|
||||
dst = drle.values(&[0, 1, 3, 4], dst);
|
||||
assert_eq!(
|
||||
dst,
|
||||
vec![
|
||||
&Some("east".to_string()),
|
||||
&Some("east".to_string()),
|
||||
&Some("north".to_string()),
|
||||
&Some("east".to_string())
|
||||
]
|
||||
);
|
||||
|
||||
dst = drle.values(&[8, 10, 11], dst);
|
||||
assert_eq!(
|
||||
dst,
|
||||
vec![&Some("east".to_string()), &Some("south".to_string()), &None]
|
||||
);
|
||||
|
||||
assert_eq!(dst.capacity(), 1000);
|
||||
|
||||
assert!(drle.values(&[1000], dst).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distinct_values() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 100);
|
||||
|
||||
let values = drle.distinct_values((0..100).collect::<Vec<_>>().as_slice(), BTreeSet::new());
|
||||
assert_eq!(
|
||||
values,
|
||||
vec!["east".to_string()].iter().collect::<BTreeSet<_>>()
|
||||
);
|
||||
|
||||
drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 1);
|
||||
drle.push_additional(Some("east".to_string()), 5);
|
||||
drle.push_additional(Some("south".to_string()), 2);
|
||||
drle.push_none();
|
||||
|
||||
let values = drle.distinct_values((0..11).collect::<Vec<_>>().as_slice(), BTreeSet::new());
|
||||
assert_eq!(
|
||||
values,
|
||||
vec!["east".to_string(), "north".to_string(), "south".to_string(),]
|
||||
.iter()
|
||||
.collect::<BTreeSet<_>>()
|
||||
);
|
||||
|
||||
let values = drle.distinct_values((0..4).collect::<Vec<_>>().as_slice(), BTreeSet::new());
|
||||
assert_eq!(
|
||||
values,
|
||||
vec!["east".to_string(), "north".to_string(),]
|
||||
.iter()
|
||||
.collect::<BTreeSet<_>>()
|
||||
);
|
||||
|
||||
let values = drle.distinct_values(&[3, 10], BTreeSet::new());
|
||||
assert_eq!(
|
||||
values,
|
||||
vec!["north".to_string(), "south".to_string(),]
|
||||
.iter()
|
||||
.collect::<BTreeSet<_>>()
|
||||
);
|
||||
|
||||
let values = drle.distinct_values(&[100], BTreeSet::new());
|
||||
assert!(values.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_other_values() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 1);
|
||||
drle.push_additional(Some("east".to_string()), 5);
|
||||
drle.push_additional(Some("south".to_string()), 2);
|
||||
drle.push_none();
|
||||
|
||||
let east = "east".to_string();
|
||||
let north = "north".to_string();
|
||||
let south = "south".to_string();
|
||||
|
||||
let mut others = BTreeSet::new();
|
||||
others.insert(&east);
|
||||
others.insert(&north);
|
||||
|
||||
assert!(drle.contains_other_values(&others));
|
||||
|
||||
let f1 = "foo".to_string();
|
||||
others.insert(&f1);
|
||||
assert!(drle.contains_other_values(&others));
|
||||
|
||||
others.insert(&south);
|
||||
assert!(!drle.contains_other_values(&others));
|
||||
|
||||
let f2 = "bar".to_string();
|
||||
others.insert(&f2);
|
||||
assert!(!drle.contains_other_values(&others));
|
||||
|
||||
assert!(drle.contains_other_values(&BTreeSet::new()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_non_null_value() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 1);
|
||||
drle.push_additional(Some("east".to_string()), 5);
|
||||
drle.push_additional(Some("south".to_string()), 2);
|
||||
drle.push_none();
|
||||
|
||||
assert!(drle.has_non_null_value(&[0]));
|
||||
assert!(drle.has_non_null_value(&[0, 1, 2]));
|
||||
assert!(drle.has_non_null_value(&[10]));
|
||||
|
||||
assert!(!drle.has_non_null_value(&[11]));
|
||||
assert!(!drle.has_non_null_value(&[11, 12, 100]));
|
||||
|
||||
drle = super::RLE::default();
|
||||
drle.push_additional(None, 10);
|
||||
assert!(!drle.has_non_null_value(&[0]));
|
||||
assert!(!drle.has_non_null_value(&[4, 7]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encoded_values() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 1);
|
||||
drle.push_additional(Some("east".to_string()), 5);
|
||||
drle.push_additional(Some("south".to_string()), 2);
|
||||
drle.push_none();
|
||||
|
||||
let mut encoded = drle.encoded_values(&[0], vec![]);
|
||||
assert_eq!(encoded, vec![0]);
|
||||
|
||||
encoded = drle.encoded_values(&[1, 3, 5, 6], vec![]);
|
||||
assert_eq!(encoded, vec![0, 1, 0, 0]);
|
||||
|
||||
encoded = drle.encoded_values(&[9, 10, 11], vec![]);
|
||||
assert_eq!(encoded, vec![2, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_encoded_values() {
|
||||
let mut drle = super::RLE::default();
|
||||
drle.push_additional(Some("east".to_string()), 3);
|
||||
drle.push_additional(Some("north".to_string()), 2);
|
||||
|
||||
let dst = Vec::with_capacity(100);
|
||||
let dst = drle.all_encoded_values(dst);
|
||||
assert_eq!(dst, vec![0, 0, 0, 1, 1]);
|
||||
assert_eq!(dst.capacity(), 100);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue