refactor: expose public API

pull/24376/head
Edd Robinson 2020-11-12 18:23:34 +00:00
parent fc881776dd
commit d54c30147e
3 changed files with 16 additions and 14 deletions

View File

@ -22,7 +22,7 @@ use arrow_deps::{arrow, arrow::array::Array};
// FWIW it's not the cardinality of the column that should drive the decision
// it's how many run-lengths would be produced in an RLE column and whether that
// compression is worth the memory and compute costs to work on it.
pub const TEMP_CARDINALITY_DICTIONARY_ENCODING_LIMIT: usize = 1_000_000;
pub const TEMP_CARDINALITY_DICTIONARY_ENCODING_LIMIT: usize = 100_000;
/// The possible logical types that column values can have. All values in a
/// column have the same physical type.
pub enum Column {

View File

@ -20,7 +20,7 @@ pub enum Encoding {
}
impl Encoding {
fn debug_name(&self) -> &'static str {
pub fn debug_name(&self) -> &'static str {
match &self {
Encoding::RLE(_) => "RLE encoder",
Encoding::Plain(_) => "plain encoder",
@ -81,7 +81,7 @@ impl Encoding {
/// Populates the provided destination container with the row ids satisfying
/// the provided predicate.
fn row_ids_filter(&self, value: &str, op: &cmp::Operator, dst: RowIDs) -> RowIDs {
pub fn row_ids_filter(&self, value: &str, op: &cmp::Operator, dst: RowIDs) -> RowIDs {
match self {
Encoding::RLE(enc) => enc.row_ids_filter(value, op, dst),
Encoding::Plain(enc) => enc.row_ids_filter(value, op, dst),
@ -161,7 +161,11 @@ impl Encoding {
///
/// NULL values are represented by None. It is the caller's responsibility
/// to ensure row ids are a monotonically increasing set.
fn values<'a>(&'a self, row_ids: &[u32], dst: Vec<Option<&'a str>>) -> Vec<Option<&'a str>> {
pub fn values<'a>(
&'a self,
row_ids: &[u32],
dst: Vec<Option<&'a str>>,
) -> Vec<Option<&'a str>> {
match self {
Encoding::RLE(enc) => enc.values(row_ids, dst),
Encoding::Plain(enc) => enc.values(row_ids, dst),

View File

@ -24,10 +24,12 @@ pub struct Plain {
contains_null: bool,
}
// The default initialisation of an Plain involves reserving the first id/index 0
// for the NULL value.
// The default initialisation of an Plain involves reserving the first id/index
// `0`, which is the encoded representation of the NULL value.
impl Default for Plain {
fn default() -> Self {
// for this to make sense NULL_ID must be `0`.
assert_eq!(NULL_ID, 0);
Self {
entries: vec![None],
encoded_data: vec![],
@ -118,8 +120,8 @@ impl Plain {
}
}
// Preferred to add values to the column. `id` is the encoded
// representation of a logical string value.
// Preferred method to add values to the column. `id` is the encoded
// representation of a logical value.
fn push_encoded_values(&mut self, id: u32, additional: u32) {
self.encoded_data
.extend(std::iter::repeat(id).take(additional as usize));
@ -142,11 +144,6 @@ impl Plain {
}
}
// correct way to determine next encoded id for a new value.
fn next_encoded_id(&self) -> u32 {
todo!()
}
/// The number of logical rows encoded in this column.
pub fn num_rows(&self) -> u32 {
self.encoded_data.len() as u32
@ -449,7 +446,8 @@ impl Plain {
dst.clear();
dst.reserve(row_ids.len());
// TODO - not sure at all about this deref...
// The `as_deref` is needed to convert an `&Option<String>` into an
// `Option<&str>`.
for chunks in row_ids.chunks_exact(4) {
dst.push(self.entries[self.encoded_data[chunks[0] as usize] as usize].as_deref());
dst.push(self.entries[self.encoded_data[chunks[1] as usize] as usize].as_deref());