Merge pull request #371 from influxdata/er/feat/dict-encoding

feat: add dictionary RLE encoding to Segment Store
2020-10-21 14:00:32 +01:00 · 2020-10-21 14:00:32 +01:00 · daf89c7d22
parent c0f09901dd a663964193
commit daf89c7d22
3 changed files with 1279 additions and 0 deletions
--- a/delorean_segment_store/benches/plain.rs
+++ b/delorean_segment_store/benches/plain.rs
@ -0,0 +1,269 @@
+use std::mem::size_of;
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+use rand::prelude::*;
+
+use delorean_arrow::arrow::datatypes::*;
+use delorean_segment_store::column::fixed::Fixed;
+use delorean_segment_store::column::fixed_null::FixedNull;
+
+const ROWS: [usize; 5] = [10, 100, 1_000, 10_000, 60_000];
+const CHUNKS: [Chunks; 4] = [
+    Chunks::All,
+    Chunks::Even,
+    Chunks::ManySmall,
+    Chunks::RandomTenPercent,
+];
+
+const PHYSICAL_TYPES: [PhysicalType; 3] = [PhysicalType::I64, PhysicalType::I32, PhysicalType::I16];
+
+#[derive(Debug)]
+enum Chunks {
+    All,              // sum up the entire column
+    Even,             // sum up the even rows
+    ManySmall,        // sum up chunks of 10 values
+    RandomTenPercent, // sum up random 10% of values
+}
+
+enum EncType {
+    Fixed,
+    Arrow,
+}
+
+enum PhysicalType {
+    I64,
+    I32,
+    I16,
+}
+
+fn encoding_sum(c: &mut Criterion) {
+    benchmark_plain_sum(
+        c,
+        "encoding_fixed_sum",
+        EncType::Fixed,
+        &ROWS,
+        &CHUNKS,
+        &PHYSICAL_TYPES,
+    );
+    benchmark_plain_sum(
+        c,
+        "encoding_arrow_sum",
+        EncType::Arrow,
+        &ROWS,
+        &CHUNKS,
+        &PHYSICAL_TYPES,
+    );
+}
+
+fn benchmark_plain_sum(
+    c: &mut Criterion,
+    benchmark_group_name: &str,
+    enc_type: EncType,
+    row_size: &[usize],
+    chunks: &[Chunks],
+    physical_type: &[PhysicalType],
+) {
+    let mut group = c.benchmark_group(benchmark_group_name);
+    for &num_rows in row_size {
+        for chunk in chunks {
+            for pt in physical_type {
+                // Encoded incrementing values.
+
+                let input: Vec<usize>;
+                match chunk {
+                    Chunks::All => input = (0..num_rows).collect(),
+                    Chunks::Even => input = gen_even_chunk(num_rows),
+                    Chunks::ManySmall => input = gen_many_small_chunk(num_rows),
+                    Chunks::RandomTenPercent => input = gen_random_10_percent(num_rows),
+                }
+
+                match pt {
+                    PhysicalType::I64 => {
+                        group
+                            .throughput(Throughput::Bytes((input.len() * size_of::<i64>()) as u64));
+
+                        match enc_type {
+                            EncType::Fixed => {
+                                let encoding = Fixed::<i64>::from(
+                                    (0..num_rows as i64).collect::<Vec<i64>>().as_slice(),
+                                );
+
+                                group.bench_with_input(
+                                    BenchmarkId::from_parameter(format!(
+                                        "{:?}_{:?}_i64",
+                                        num_rows, chunk
+                                    )),
+                                    &input,
+                                    |b, input| {
+                                        b.iter(|| {
+                                            // do work
+                                            let _ = encoding.sum::<i64>(&input);
+                                        });
+                                    },
+                                );
+                            }
+                            EncType::Arrow => {
+                                let encoding = FixedNull::<Int64Type>::from(
+                                    (0..num_rows as i64).collect::<Vec<i64>>().as_slice(),
+                                );
+
+                                group.bench_with_input(
+                                    BenchmarkId::from_parameter(format!(
+                                        "{:?}_{:?}_i64",
+                                        num_rows, chunk
+                                    )),
+                                    &input,
+                                    |b, input| {
+                                        b.iter(|| {
+                                            // do work
+                                            let _ = encoding.sum(&input);
+                                        });
+                                    },
+                                );
+                            }
+                        }
+                    }
+                    PhysicalType::I32 => {
+                        group
+                            .throughput(Throughput::Bytes((input.len() * size_of::<i64>()) as u64));
+
+                        match enc_type {
+                            EncType::Fixed => {
+                                let encoding = Fixed::<i32>::from(
+                                    (0..num_rows as i32).collect::<Vec<i32>>().as_slice(),
+                                );
+
+                                group.bench_with_input(
+                                    BenchmarkId::from_parameter(format!(
+                                        "{:?}_{:?}_i32",
+                                        num_rows, chunk
+                                    )),
+                                    &input,
+                                    |b, input| {
+                                        b.iter(|| {
+                                            // do work
+                                            let _ = encoding.sum::<i32>(&input);
+                                        });
+                                    },
+                                );
+                            }
+                            EncType::Arrow => {
+                                let encoding = FixedNull::<Int32Type>::from(
+                                    (0..num_rows as i32).collect::<Vec<i32>>().as_slice(),
+                                );
+
+                                group.bench_with_input(
+                                    BenchmarkId::from_parameter(format!(
+                                        "{:?}_{:?}_i32",
+                                        num_rows, chunk
+                                    )),
+                                    &input,
+                                    |b, input| {
+                                        b.iter(|| {
+                                            // do work
+                                            let _ = encoding.sum(&input);
+                                        });
+                                    },
+                                );
+                            }
+                        }
+                    }
+                    PhysicalType::I16 => {
+                        group
+                            .throughput(Throughput::Bytes((input.len() * size_of::<i64>()) as u64));
+
+                        match enc_type {
+                            EncType::Fixed => {
+                                let encoding = Fixed::<i16>::from(
+                                    (0..num_rows as i16).collect::<Vec<i16>>().as_slice(),
+                                );
+
+                                group.bench_with_input(
+                                    BenchmarkId::from_parameter(format!(
+                                        "{:?}_{:?}_i16",
+                                        num_rows, chunk
+                                    )),
+                                    &input,
+                                    |b, input| {
+                                        b.iter(|| {
+                                            // do work
+                                            let _ = encoding.sum::<i16>(&input);
+                                        });
+                                    },
+                                );
+                            }
+                            EncType::Arrow => {
+                                let encoding = FixedNull::<Int16Type>::from(
+                                    (0..num_rows as i16).collect::<Vec<i16>>().as_slice(),
+                                );
+
+                                group.bench_with_input(
+                                    BenchmarkId::from_parameter(format!(
+                                        "{:?}_{:?}_i16",
+                                        num_rows, chunk
+                                    )),
+                                    &input,
+                                    |b, input| {
+                                        b.iter(|| {
+                                            // do work
+                                            let _ = encoding.sum(&input);
+                                        });
+                                    },
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    group.finish();
+}
+
+// results in about 50% rows being requested.
+fn gen_even_chunk(rows: usize) -> Vec<usize> {
+    (0..rows).filter(|x| x % 2 == 0).collect::<Vec<_>>()
+}
+
+// generate small sequences of 3 rows periodically. This leads to about 34% of
+// rows being requested.
+fn gen_many_small_chunk(rows: usize) -> Vec<usize> {
+    let mut input = vec![];
+    let mut emit_chunk = false;
+    let mut chunk_size = 0;
+
+    for i in 0..rows {
+        if i % 9 == 0 {
+            emit_chunk = true;
+        }
+
+        if emit_chunk {
+            input.push(i);
+            chunk_size += 1;
+        }
+
+        if chunk_size == 3 {
+            chunk_size = 0;
+            emit_chunk = false;
+        }
+    }
+
+    input
+}
+
+// generate random 10% sequence.
+fn gen_random_10_percent(rows: usize) -> Vec<usize> {
+    let mut rnd = thread_rng();
+    let mut input = vec![];
+
+    for i in 0..rows {
+        if rnd.gen::<f64>() < 0.1 {
+            input.push(i);
+        }
+    }
+
+    input
+}
+
+criterion_group!(benches, encoding_sum,);
+criterion_main!(benches);
--- a/delorean_segment_store/src/column.rs
+++ b/delorean_segment_store/src/column.rs
@ -1,7 +1,10 @@
 pub mod cmp;
+pub mod dictionary;
 pub mod fixed;
 pub mod fixed_null;

+use croaring::Bitmap;
+
 use delorean_arrow::arrow;

 /// The possible logical types that column values can have. All values in a
@ -104,3 +107,41 @@ pub enum Values {
    // Arbitrary byte arrays
    ByteArray(arrow::array::UInt8Array),
 }
+
+/// Represents vectors of row IDs, which are usually used for intermediate
+/// results as a method of late materialisation.
+#[derive(PartialEq, Debug)]
+pub enum RowIDs {
+    Bitmap(Bitmap),
+    Vector(Vec<u32>),
+}
+
+impl RowIDs {
+    pub fn len(&self) -> usize {
+        match self {
+            RowIDs::Bitmap(ids) => ids.cardinality() as usize,
+            RowIDs::Vector(ids) => ids.len(),
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        match self {
+            RowIDs::Bitmap(ids) => ids.is_empty(),
+            RowIDs::Vector(ids) => ids.is_empty(),
+        }
+    }
+
+    pub fn clear(&mut self) {
+        match self {
+            RowIDs::Bitmap(ids) => ids.clear(),
+            RowIDs::Vector(ids) => ids.clear(),
+        }
+    }
+
+    pub fn add_range(&mut self, from: u32, to: u32) {
+        match self {
+            RowIDs::Bitmap(ids) => ids.add_range(from as u64..to as u64),
+            RowIDs::Vector(ids) => ids.extend(from..to),
+        }
+    }
+}
--- a/delorean_segment_store/src/column/dictionary.rs
+++ b/delorean_segment_store/src/column/dictionary.rs
@ -0,0 +1,969 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::convert::From;
+use std::iter;
+
+use croaring::Bitmap;
+
+use delorean_arrow::arrow::array::{Array, StringArray};
+
+use crate::column::{cmp, RowIDs};
+
+// `RLE` is a run-length encoding for dictionary columns, where all dictionary
+// entries are utf-8 valid strings.
+#[derive(Default)]
+pub struct RLE {
+    // TODO(edd): revisit choice of storing owned string versus references.
+
+    // The mapping between an entry and its assigned index.
+    entry_index: BTreeMap<Option<String>, u32>,
+
+    // The mapping between an index and its entry.
+    index_entries: Vec<Option<String>>,
+
+    // The set of rows that belong to each distinct value in the dictionary.
+    // This allows essentially constant time grouping of rows on the column by
+    // value.
+    index_row_ids: BTreeMap<u32, Bitmap>,
+
+    // stores tuples where each pair refers to a dictionary entry and the number
+    // of times the entry repeats.
+    run_lengths: Vec<(u32, u32)>,
+
+    num_rows: u32,
+}
+
+impl RLE {
+    /// Adds the provided string value to the encoded data. It is the caller's
+    /// responsibility to ensure that the dictionary encoded remains sorted.
+    pub fn push(&mut self, v: String) {
+        self.push_additional(Some(v), 1);
+    }
+
+    /// Adds a NULL value to the encoded data. It is the caller's
+    /// responsibility to ensure that the dictionary encoded remains sorted.
+    pub fn push_none(&mut self) {
+        self.push_additional(None, 1);
+    }
+
+    /// Adds additional repetitions of the provided value to the encoded data.
+    /// It is the caller's responsibility to ensure that the dictionary encoded
+    /// remains sorted.
+    pub fn push_additional(&mut self, v: Option<String>, additional: u32) {
+        let idx = self.entry_index.get(&v);
+        match idx {
+            Some(idx) => {
+                if let Some((last_idx, rl)) = self.run_lengths.last_mut() {
+                    if last_idx == idx {
+                        // update the existing run-length
+                        *rl += additional;
+                    } else {
+                        // start a new run-length
+                        self.run_lengths.push((*idx, additional));
+                    }
+                    self.index_row_ids
+                        .get_mut(&(*idx as u32))
+                        .unwrap()
+                        .add_range(self.num_rows as u64..self.num_rows as u64 + additional as u64);
+                }
+            }
+            None => {
+                // New dictionary entry.
+                let idx = self.index_entries.len() as u32;
+                if idx > 0 {
+                    match (&self.index_entries[idx as usize - 1], &v) {
+                        (None, Some(_)) => panic!("out of order dictionary insertion"),
+                        (Some(_), None) => {}
+                        (Some(a), Some(b)) => assert!(a < b),
+                        (_, _) => unreachable!("multiple None values"),
+                    }
+                }
+                self.index_entries.push(v.clone());
+
+                self.entry_index.insert(v, idx);
+                self.index_row_ids.insert(idx, Bitmap::create());
+
+                self.run_lengths.push((idx, additional));
+                self.index_row_ids
+                    .get_mut(&(idx as u32))
+                    .unwrap()
+                    .add_range(self.num_rows as u64..self.num_rows as u64 + additional as u64);
+            }
+        }
+        self.num_rows += additional;
+    }
+
+    //
+    //
+    // ---- Methods for getting row ids from values.
+    //
+    //
+
+    /// Populates the provided destination container with the row ids satisfying
+    /// the provided predicate.
+    pub fn row_ids_filter(&self, value: Option<String>, op: cmp::Operator, dst: RowIDs) -> RowIDs {
+        match op {
+            cmp::Operator::Equal | cmp::Operator::NotEqual => self.row_ids_equal(value, op, dst),
+            cmp::Operator::LT | cmp::Operator::LTE | cmp::Operator::GT | cmp::Operator::GTE => {
+                self.row_ids_cmp(value, op, dst)
+            }
+        }
+    }
+
+    // Finds row ids based on = or != operator.
+    fn row_ids_equal(&self, value: Option<String>, op: cmp::Operator, mut dst: RowIDs) -> RowIDs {
+        dst.clear();
+        let include = match op {
+            cmp::Operator::Equal => true,
+            cmp::Operator::NotEqual => false,
+            _ => unreachable!("invalid operator"),
+        };
+
+        if let Some(encoded_id) = self.entry_index.get(&value) {
+            let mut index: u32 = 0;
+            for (other_encoded_id, other_rl) in &self.run_lengths {
+                let start = index;
+                index += *other_rl;
+                if (other_encoded_id == encoded_id) == include {
+                    dst.add_range(start, index)
+                }
+            }
+        } else if let cmp::Operator::NotEqual = op {
+            // special case - the column does not contain the provided
+            // value and the operator is != so we need to return all
+            // row ids.
+            dst.add_range(0, self.num_rows)
+        }
+
+        dst
+    }
+
+    // Finds row ids based on <, <=, > or >= operator.
+    fn row_ids_cmp(&self, value: Option<String>, op: cmp::Operator, mut dst: RowIDs) -> RowIDs {
+        dst.clear();
+
+        // happy path - the value exists in the column
+        if let Some(encoded_id) = self.entry_index.get(&value) {
+            let cmp = match op {
+                cmp::Operator::GT => PartialOrd::gt,
+                cmp::Operator::GTE => PartialOrd::ge,
+                cmp::Operator::LT => PartialOrd::lt,
+                cmp::Operator::LTE => PartialOrd::le,
+                _ => unreachable!("operator not supported"),
+            };
+
+            let mut index: u32 = 0; // current position in the column.
+            for (other_encoded_id, other_rl) in &self.run_lengths {
+                let start = index;
+                index += *other_rl;
+                if cmp(other_encoded_id, encoded_id) {
+                    dst.add_range(start, index)
+                }
+            }
+            return dst;
+        }
+
+        match op {
+            cmp::Operator::GT | cmp::Operator::GTE => {
+                // find the first decoded value that satisfies the predicate.
+                for (other, other_encoded_id) in &self.entry_index {
+                    if other > &value {
+                        // change filter from either `x > value` or `x >= value` to `x >= other`
+                        return self.row_ids_cmp(other.clone(), cmp::Operator::GTE, dst);
+                    }
+                }
+            }
+            cmp::Operator::LT | cmp::Operator::LTE => {
+                // find the first decoded value that satisfies the predicate.
+                // Note iteration is in reverse
+                for (other, other_encoded_id) in self.entry_index.iter().rev() {
+                    if other < &value {
+                        // change filter from either `x < value` or `x <= value` to `x <= other`
+                        return self.row_ids_cmp(other.clone(), cmp::Operator::LTE, dst);
+                    }
+                }
+            }
+            _ => unreachable!("operator not supported"),
+        }
+        dst
+    }
+
+    // The set of row ids for each distinct value in the column.
+    pub fn group_row_ids(&self) -> &BTreeMap<u32, Bitmap> {
+        &self.index_row_ids
+    }
+
+    //
+    //
+    // ---- Methods for getting materialised values.
+    //
+    //
+
+    pub fn dictionary(&self) -> &[Option<String>] {
+        &self.index_entries
+    }
+
+    /// Returns the logical value present at the provided row id.
+    ///
+    /// N.B right now this doesn't discern between an invalid row id and a NULL
+    /// value at a valid location.
+    pub fn value(&self, row_id: u32) -> &Option<String> {
+        if row_id < self.num_rows {
+            let mut total = 0;
+            for (encoded_id, rl) in &self.run_lengths {
+                if total + rl > row_id {
+                    // this run-length overlaps desired row id
+                    return &self.index_entries[*encoded_id as usize];
+                }
+                total += rl;
+            }
+        }
+        &None
+    }
+
+    /// Materialises the decoded value belonging to the provided encoded id.
+    ///
+    /// Panics if there is no decoded value for the provided id
+    pub fn decode_id(&self, encoded_id: u32) -> Option<String> {
+        self.index_entries[encoded_id as usize].clone()
+    }
+
+    /// Materialises a vector of references to the decoded values in the
+    /// provided row ids.
+    ///
+    /// NULL values are represented by None. It is the caller's responsibility
+    /// to ensure row ids are a monotonically increasing set.
+    pub fn values<'a>(
+        &'a self,
+        row_ids: &[u32],
+        mut dst: Vec<&'a Option<String>>,
+    ) -> Vec<&'a Option<String>> {
+        dst.clear();
+        dst.reserve(row_ids.len());
+
+        let mut curr_logical_row_id = 0;
+
+        let (mut curr_entry_id, mut curr_entry_rl) = self.run_lengths[0];
+
+        let mut i = 1;
+        for row_id in row_ids {
+            if row_id >= &self.num_rows {
+                return dst; // row ids beyond length of column
+            }
+
+            while curr_logical_row_id + curr_entry_rl <= *row_id {
+                // this encoded entry does not cover the row we need.
+                // move on to next entry
+                curr_logical_row_id += curr_entry_rl;
+                curr_entry_id = self.run_lengths[i].0;
+                curr_entry_rl = self.run_lengths[i].1;
+
+                i += 1;
+            }
+
+            // this encoded entry covers the row_id we want.
+            // let value = &self.index_entries[curr_entry_id as usize];
+            dst.push(&self.index_entries[curr_entry_id as usize]);
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        assert_eq!(row_ids.len(), dst.len());
+        dst
+    }
+
+    /// Returns references to the logical (decoded) values for all the rows in
+    /// the column.
+    ///
+    /// NULL values are represented by None.
+    ///
+    pub fn all_values<'a>(
+        &'a mut self,
+        mut dst: Vec<&'a Option<String>>,
+    ) -> Vec<&'a Option<String>> {
+        dst.clear();
+        dst.reserve(self.num_rows as usize);
+
+        for (idx, rl) in &self.run_lengths {
+            let v = &self.index_entries[*idx as usize];
+            dst.extend(iter::repeat(v).take(*rl as usize));
+        }
+        dst
+    }
+
+    /// Returns references to the unique set of values encoded at each of the
+    /// provided ids.
+    ///
+    /// It is the caller's responsibility to ensure row ids are a monotonically
+    /// increasing set.
+    pub fn distinct_values<'a>(
+        &'a self,
+        row_ids: &[u32],
+        mut dst: BTreeSet<&'a String>,
+    ) -> BTreeSet<&'a String> {
+        // TODO(edd): Perf... We can improve on this if we know the column is
+        // totally ordered.
+        dst.clear();
+
+        // Used to mark off when a decoded value has been added to the result
+        // set. TODO(perf) - this might benefit from being pooled somehow.
+        let mut encoded_values = Vec::with_capacity(self.index_entries.len());
+        encoded_values.resize(self.index_entries.len(), false);
+
+        let mut found = 0;
+        if let Some(i) = self.entry_index.get(&None) {
+            // the encoding contains NULL values, but we don't return those as
+            // distinct values. So we will mark them.
+            encoded_values[*i as usize] = true;
+            found += 1;
+        }
+
+        let mut curr_logical_row_id = 0;
+        let (mut curr_entry_id, mut curr_entry_rl) = self.run_lengths[0];
+
+        let mut i = 1;
+        'by_row: for row_id in row_ids {
+            if row_id >= &self.num_rows {
+                return dst; // rows beyond the column size
+            }
+
+            while curr_logical_row_id + curr_entry_rl <= *row_id {
+                // this encoded entry does not cover the row we need.
+                // move on to next entry
+                curr_logical_row_id += curr_entry_rl;
+                curr_entry_id = self.run_lengths[i].0;
+                curr_entry_rl = self.run_lengths[i].1;
+
+                i += 1;
+            }
+
+            // encoded value not already in result set.
+            if !encoded_values[curr_entry_id as usize] {
+                // annoying unwrap. We know that there can't be None here as
+                // we removed that at the top of the method.
+                dst.insert(self.index_entries[curr_entry_id as usize].as_ref().unwrap());
+                encoded_values[curr_entry_id as usize] = true;
+                found += 1;
+            }
+
+            if found == encoded_values.len() {
+                // all distinct values have been read
+                break 'by_row;
+            }
+
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        assert!(dst.len() <= self.index_entries.len());
+        dst
+    }
+
+    //
+    //
+    // ---- Methods for getting encoded values directly, typically to be used
+    //      as part of group keys.
+    //
+    //
+
+    /// Return the raw encoded values for the provided logical row ids.
+    /// Encoded values for NULL values are included.
+    ///
+    pub fn encoded_values(&self, row_ids: &[u32], mut dst: Vec<u32>) -> Vec<u32> {
+        dst.clear();
+        dst.reserve(row_ids.len());
+
+        let mut curr_logical_row_id = 0;
+
+        let (mut curr_entry_id, mut curr_entry_rl) = self.run_lengths[0];
+
+        let mut i = 1;
+        for row_id in row_ids {
+            while curr_logical_row_id + curr_entry_rl <= *row_id {
+                // this encoded entry does not cover the row we need.
+                // move on to next entry
+                curr_logical_row_id += curr_entry_rl;
+                curr_entry_id = self.run_lengths[i].0;
+                curr_entry_rl = self.run_lengths[i].1;
+
+                i += 1;
+            }
+
+            // this entry covers the row_id we want.
+            dst.push(curr_entry_id);
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        assert_eq!(row_ids.len(), dst.len());
+        dst
+    }
+
+    /// Returns all encoded values for the column including the encoded value
+    /// for any NULL values.
+    pub fn all_encoded_values(&self, mut dst: Vec<u32>) -> Vec<u32> {
+        dst.clear();
+        dst.reserve(self.num_rows as usize);
+
+        for (idx, rl) in &self.run_lengths {
+            dst.extend(iter::repeat(*idx).take(*rl as usize));
+        }
+        dst
+    }
+
+    //
+    //
+    // ---- Methods for optimising schema exploration.
+    //
+    //
+
+    /// Efficiently determines if this column contains non-null values that
+    /// differ from the provided set of values.
+    ///
+    /// Informally, this method provides an efficient way of answering "is it
+    /// worth spending time reading this column for values or do I already have
+    /// all the values in a set".
+    ///
+    /// More formally, this method returns the relative complement of this
+    /// column's values in the provided set of values.
+    ///
+    /// This method would be useful when the same column is being read across
+    /// many segments, and one wants to determine to the total distinct set of
+    /// values. By exposing the current result set to each column (as an
+    /// argument to `contains_other_values`) columns can be short-circuited when
+    /// they only contain values that have already been discovered.
+    ///
+    pub fn contains_other_values(&self, values: &BTreeSet<&String>) -> bool {
+        let mut encoded_values = self.index_entries.len();
+        if self.entry_index.contains_key(&None) {
+            encoded_values -= 1;
+        }
+
+        if encoded_values > values.len() {
+            return true;
+        }
+
+        for key in self.entry_index.keys() {
+            if let Some(key) = key {
+                if !values.contains(key) {
+                    return true;
+                }
+            }
+            // skip NULL entry
+        }
+        false
+    }
+
+    /// Determines if the column contains at least one non-null value at
+    /// any of the provided row ids.
+    ///
+    /// It is the caller's responsibility to ensure row ids are a monotonically
+    /// increasing set.
+    pub fn has_non_null_value(&self, row_ids: &[u32]) -> bool {
+        match self.entry_index.get(&None) {
+            Some(&id) => self.find_non_null_value(id, row_ids),
+            None => {
+                // There are no NULL entries in this column so just find a row id
+                // that falls on any row in the column.
+                for &id in row_ids {
+                    if id < self.num_rows {
+                        return true;
+                    }
+                }
+                false
+            }
+        }
+    }
+
+    // Returns true if there exists an encoded non-null value at any of the row
+    // ids.
+    fn find_non_null_value(&self, null_encoded_id: u32, row_ids: &[u32]) -> bool {
+        let mut curr_logical_row_id = 0;
+
+        let (mut curr_encoded_id, mut curr_entry_rl) = self.run_lengths[0];
+
+        let mut i = 1;
+        for &row_id in row_ids {
+            if row_id >= self.num_rows {
+                return false; // all other row ids beyond column.
+            }
+
+            while curr_logical_row_id + curr_entry_rl <= row_id {
+                // this encoded entry does not cover the row we need.
+                // move on to next encoded id
+                curr_logical_row_id += curr_entry_rl;
+                curr_encoded_id = self.run_lengths[i].0;
+                curr_entry_rl = self.run_lengths[i].1;
+
+                i += 1;
+            }
+
+            // this entry covers the row_id we want if it points to a non-null value.
+            if curr_encoded_id != null_encoded_id {
+                return true;
+            }
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        false
+    }
+}
+
+impl<'a> From<Vec<&str>> for RLE {
+    fn from(vec: Vec<&str>) -> Self {
+        let mut drle = Self::default();
+        for v in vec {
+            drle.push(v.to_string());
+        }
+        drle
+    }
+}
+
+impl<'a> From<Vec<String>> for RLE {
+    fn from(vec: Vec<String>) -> Self {
+        let mut drle = Self::default();
+        for v in vec {
+            drle.push(v);
+        }
+        drle
+    }
+}
+
+impl<'a> From<Vec<Option<&str>>> for RLE {
+    fn from(vec: Vec<Option<&str>>) -> Self {
+        let mut drle = Self::default();
+        for v in vec {
+            match v {
+                Some(x) => drle.push(x.to_string()),
+                None => drle.push_none(),
+            }
+        }
+        drle
+    }
+}
+
+impl<'a> From<Vec<Option<String>>> for RLE {
+    fn from(vec: Vec<Option<String>>) -> Self {
+        let mut drle = Self::default();
+        for v in vec {
+            match v {
+                Some(x) => drle.push(x),
+                None => drle.push_none(),
+            }
+        }
+        drle
+    }
+}
+
+impl<'a> From<StringArray> for RLE {
+    fn from(arr: StringArray) -> Self {
+        let mut drle = Self::default();
+        for i in 0..arr.len() {
+            if arr.is_null(i) {
+                drle.push_none();
+            } else {
+                drle.push(arr.value(i).to_string());
+            }
+        }
+        drle
+    }
+}
+
+impl std::fmt::Display for RLE {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "[RLE] rows: {:?} dict entries: {}, runs: {} ",
+            self.num_rows,
+            self.index_entries.len(),
+            self.run_lengths.len()
+        )
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::collections::BTreeSet;
+
+    use crate::column::{cmp, RowIDs};
+
+    #[test]
+    fn rle_push() {
+        let mut drle = super::RLE::from(vec!["hello", "hello", "hello", "hello"]);
+        drle.push_additional(Some("hello".to_string()), 1);
+        drle.push("world".to_string());
+
+        assert_eq!(
+            drle.all_values(vec![]),
+            [
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("world".to_string()),
+            ]
+        );
+
+        drle.push_additional(Some("zoo".to_string()), 3);
+        drle.push_none();
+        assert_eq!(
+            drle.all_values(vec![]),
+            [
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("hello".to_string()),
+                &Some("world".to_string()),
+                &Some("zoo".to_string()),
+                &Some("zoo".to_string()),
+                &Some("zoo".to_string()),
+                &None,
+            ]
+        );
+    }
+
+    #[test]
+    #[should_panic]
+    fn rle_push_none_first() {
+        let mut drle = super::RLE::default();
+        drle.push_none();
+        drle.push_additional(Some("hello".to_string()), 1);
+    }
+
+    #[test]
+    #[should_panic]
+    fn rle_push_wrong_order() {
+        let mut drle = super::RLE::default();
+        drle.push("b".to_string());
+        drle.push("a".to_string());
+    }
+
+    #[test]
+    fn all_values() {
+        let mut drle = super::RLE::from(vec!["hello", "zoo"]);
+
+        let zoo = Some("zoo".to_string());
+        let dst = vec![&zoo, &zoo, &zoo, &zoo];
+        let got = drle.all_values(dst);
+
+        assert_eq!(got, [&Some("hello".to_string()), &Some("zoo".to_string()),]);
+        assert_eq!(got.capacity(), 4);
+    }
+
+    #[test]
+    fn row_ids_filter_equal() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 1);
+        drle.push_additional(Some("east".to_string()), 5);
+        drle.push_additional(Some("south".to_string()), 2);
+
+        let ids = drle.row_ids_filter(
+            Some("east".to_string()),
+            cmp::Operator::Equal,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 4, 5, 6, 7, 8]));
+
+        let ids = drle.row_ids_filter(
+            Some("south".to_string()),
+            cmp::Operator::Equal,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector(vec![9, 10]));
+
+        let ids = drle.row_ids_filter(
+            Some("foo".to_string()),
+            cmp::Operator::Equal,
+            RowIDs::Vector(vec![]),
+        );
+        assert!(ids.is_empty());
+
+        let ids = drle.row_ids_filter(
+            Some("foo".to_string()),
+            cmp::Operator::NotEqual,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector((0..11).collect::<Vec<_>>()));
+
+        let ids = drle.row_ids_filter(
+            Some("east".to_string()),
+            cmp::Operator::NotEqual,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector(vec![3, 9, 10]));
+    }
+
+    #[test]
+    fn row_ids_filter_cmp() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3); // 0,1,2
+        drle.push_additional(Some("north".to_string()), 1); // 3
+        drle.push_additional(Some("east".to_string()), 5); // 4,5,6,7,8
+        drle.push_additional(Some("south".to_string()), 2); // 9,10
+        drle.push_additional(Some("west".to_string()), 1); // 11
+        drle.push_additional(Some("north".to_string()), 1); // 12
+        drle.push_additional(Some("west".to_string()), 5); // 13,14,15,16,17
+
+        let ids = drle.row_ids_filter(
+            Some("east".to_string()),
+            cmp::Operator::LTE,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 4, 5, 6, 7, 8]));
+
+        let ids = drle.row_ids_filter(
+            Some("east".to_string()),
+            cmp::Operator::LT,
+            RowIDs::Vector(vec![]),
+        );
+        assert!(ids.is_empty());
+
+        let ids = drle.row_ids_filter(
+            Some("north".to_string()),
+            cmp::Operator::GT,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector(vec![9, 10, 11, 13, 14, 15, 16, 17]));
+
+        let ids = drle.row_ids_filter(
+            Some("north".to_string()),
+            cmp::Operator::GTE,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(
+            ids,
+            RowIDs::Vector(vec![3, 9, 10, 11, 12, 13, 14, 15, 16, 17])
+        );
+
+        // The encoding also supports comparisons on values that don't directly exist in the column.
+        let ids = drle.row_ids_filter(
+            Some("abba".to_string()),
+            cmp::Operator::GT,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector((0..18).collect::<Vec<u32>>()));
+
+        let ids = drle.row_ids_filter(
+            Some("east1".to_string()),
+            cmp::Operator::GT,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(
+            ids,
+            RowIDs::Vector(vec![3, 9, 10, 11, 12, 13, 14, 15, 16, 17])
+        );
+
+        let ids = drle.row_ids_filter(
+            Some("east1".to_string()),
+            cmp::Operator::GTE,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(
+            ids,
+            RowIDs::Vector(vec![3, 9, 10, 11, 12, 13, 14, 15, 16, 17])
+        );
+
+        let ids = drle.row_ids_filter(
+            Some("east1".to_string()),
+            cmp::Operator::LTE,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 4, 5, 6, 7, 8]));
+
+        let ids = drle.row_ids_filter(
+            Some("region".to_string()),
+            cmp::Operator::LT,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 12]));
+
+        let ids = drle.row_ids_filter(
+            Some("zoo".to_string()),
+            cmp::Operator::LTE,
+            RowIDs::Vector(vec![]),
+        );
+        assert_eq!(ids, RowIDs::Vector((0..18).collect::<Vec<u32>>()));
+    }
+
+    #[test]
+    fn value() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 1);
+        drle.push_additional(Some("east".to_string()), 5);
+        drle.push_additional(Some("south".to_string()), 2);
+
+        assert_eq!(drle.value(3), &Some("north".to_string()));
+        assert_eq!(drle.value(0), &Some("east".to_string()));
+        assert_eq!(drle.value(10), &Some("south".to_string()));
+
+        assert_eq!(drle.value(22), &None);
+    }
+
+    #[test]
+    fn values() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 1);
+        drle.push_additional(Some("east".to_string()), 5);
+        drle.push_additional(Some("south".to_string()), 2);
+        drle.push_none();
+
+        let mut dst = Vec::with_capacity(1000);
+        dst = drle.values(&[0, 1, 3, 4], dst);
+        assert_eq!(
+            dst,
+            vec![
+                &Some("east".to_string()),
+                &Some("east".to_string()),
+                &Some("north".to_string()),
+                &Some("east".to_string())
+            ]
+        );
+
+        dst = drle.values(&[8, 10, 11], dst);
+        assert_eq!(
+            dst,
+            vec![&Some("east".to_string()), &Some("south".to_string()), &None]
+        );
+
+        assert_eq!(dst.capacity(), 1000);
+
+        assert!(drle.values(&[1000], dst).is_empty());
+    }
+
+    #[test]
+    fn distinct_values() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 100);
+
+        let values = drle.distinct_values((0..100).collect::<Vec<_>>().as_slice(), BTreeSet::new());
+        assert_eq!(
+            values,
+            vec!["east".to_string()].iter().collect::<BTreeSet<_>>()
+        );
+
+        drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 1);
+        drle.push_additional(Some("east".to_string()), 5);
+        drle.push_additional(Some("south".to_string()), 2);
+        drle.push_none();
+
+        let values = drle.distinct_values((0..11).collect::<Vec<_>>().as_slice(), BTreeSet::new());
+        assert_eq!(
+            values,
+            vec!["east".to_string(), "north".to_string(), "south".to_string(),]
+                .iter()
+                .collect::<BTreeSet<_>>()
+        );
+
+        let values = drle.distinct_values((0..4).collect::<Vec<_>>().as_slice(), BTreeSet::new());
+        assert_eq!(
+            values,
+            vec!["east".to_string(), "north".to_string(),]
+                .iter()
+                .collect::<BTreeSet<_>>()
+        );
+
+        let values = drle.distinct_values(&[3, 10], BTreeSet::new());
+        assert_eq!(
+            values,
+            vec!["north".to_string(), "south".to_string(),]
+                .iter()
+                .collect::<BTreeSet<_>>()
+        );
+
+        let values = drle.distinct_values(&[100], BTreeSet::new());
+        assert!(values.is_empty());
+    }
+
+    #[test]
+    fn contains_other_values() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 1);
+        drle.push_additional(Some("east".to_string()), 5);
+        drle.push_additional(Some("south".to_string()), 2);
+        drle.push_none();
+
+        let east = "east".to_string();
+        let north = "north".to_string();
+        let south = "south".to_string();
+
+        let mut others = BTreeSet::new();
+        others.insert(&east);
+        others.insert(&north);
+
+        assert!(drle.contains_other_values(&others));
+
+        let f1 = "foo".to_string();
+        others.insert(&f1);
+        assert!(drle.contains_other_values(&others));
+
+        others.insert(&south);
+        assert!(!drle.contains_other_values(&others));
+
+        let f2 = "bar".to_string();
+        others.insert(&f2);
+        assert!(!drle.contains_other_values(&others));
+
+        assert!(drle.contains_other_values(&BTreeSet::new()));
+    }
+
+    #[test]
+    fn has_non_null_value() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 1);
+        drle.push_additional(Some("east".to_string()), 5);
+        drle.push_additional(Some("south".to_string()), 2);
+        drle.push_none();
+
+        assert!(drle.has_non_null_value(&[0]));
+        assert!(drle.has_non_null_value(&[0, 1, 2]));
+        assert!(drle.has_non_null_value(&[10]));
+
+        assert!(!drle.has_non_null_value(&[11]));
+        assert!(!drle.has_non_null_value(&[11, 12, 100]));
+
+        drle = super::RLE::default();
+        drle.push_additional(None, 10);
+        assert!(!drle.has_non_null_value(&[0]));
+        assert!(!drle.has_non_null_value(&[4, 7]));
+    }
+
+    #[test]
+    fn encoded_values() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 1);
+        drle.push_additional(Some("east".to_string()), 5);
+        drle.push_additional(Some("south".to_string()), 2);
+        drle.push_none();
+
+        let mut encoded = drle.encoded_values(&[0], vec![]);
+        assert_eq!(encoded, vec![0]);
+
+        encoded = drle.encoded_values(&[1, 3, 5, 6], vec![]);
+        assert_eq!(encoded, vec![0, 1, 0, 0]);
+
+        encoded = drle.encoded_values(&[9, 10, 11], vec![]);
+        assert_eq!(encoded, vec![2, 2, 3]);
+    }
+
+    #[test]
+    fn all_encoded_values() {
+        let mut drle = super::RLE::default();
+        drle.push_additional(Some("east".to_string()), 3);
+        drle.push_additional(Some("north".to_string()), 2);
+
+        let dst = Vec::with_capacity(100);
+        let dst = drle.all_encoded_values(dst);
+        assert_eq!(dst, vec![0, 0, 0, 1, 1]);
+        assert_eq!(dst.capacity(), 100);
+    }
+}