refactor: expose public API
parent
fc881776dd
commit
d54c30147e
|
@ -22,7 +22,7 @@ use arrow_deps::{arrow, arrow::array::Array};
|
|||
// FWIW it's not the cardinality of the column that should drive the decision
|
||||
// it's how many run-lengths would be produced in an RLE column and whether that
|
||||
// compression is worth the memory and compute costs to work on it.
|
||||
pub const TEMP_CARDINALITY_DICTIONARY_ENCODING_LIMIT: usize = 1_000_000;
|
||||
pub const TEMP_CARDINALITY_DICTIONARY_ENCODING_LIMIT: usize = 100_000;
|
||||
/// The possible logical types that column values can have. All values in a
|
||||
/// column have the same physical type.
|
||||
pub enum Column {
|
||||
|
|
|
@ -20,7 +20,7 @@ pub enum Encoding {
|
|||
}
|
||||
|
||||
impl Encoding {
|
||||
fn debug_name(&self) -> &'static str {
|
||||
pub fn debug_name(&self) -> &'static str {
|
||||
match &self {
|
||||
Encoding::RLE(_) => "RLE encoder",
|
||||
Encoding::Plain(_) => "plain encoder",
|
||||
|
@ -81,7 +81,7 @@ impl Encoding {
|
|||
|
||||
/// Populates the provided destination container with the row ids satisfying
|
||||
/// the provided predicate.
|
||||
fn row_ids_filter(&self, value: &str, op: &cmp::Operator, dst: RowIDs) -> RowIDs {
|
||||
pub fn row_ids_filter(&self, value: &str, op: &cmp::Operator, dst: RowIDs) -> RowIDs {
|
||||
match self {
|
||||
Encoding::RLE(enc) => enc.row_ids_filter(value, op, dst),
|
||||
Encoding::Plain(enc) => enc.row_ids_filter(value, op, dst),
|
||||
|
@ -161,7 +161,11 @@ impl Encoding {
|
|||
///
|
||||
/// NULL values are represented by None. It is the caller's responsibility
|
||||
/// to ensure row ids are a monotonically increasing set.
|
||||
fn values<'a>(&'a self, row_ids: &[u32], dst: Vec<Option<&'a str>>) -> Vec<Option<&'a str>> {
|
||||
pub fn values<'a>(
|
||||
&'a self,
|
||||
row_ids: &[u32],
|
||||
dst: Vec<Option<&'a str>>,
|
||||
) -> Vec<Option<&'a str>> {
|
||||
match self {
|
||||
Encoding::RLE(enc) => enc.values(row_ids, dst),
|
||||
Encoding::Plain(enc) => enc.values(row_ids, dst),
|
||||
|
|
|
@ -24,10 +24,12 @@ pub struct Plain {
|
|||
contains_null: bool,
|
||||
}
|
||||
|
||||
// The default initialisation of an Plain involves reserving the first id/index 0
|
||||
// for the NULL value.
|
||||
// The default initialisation of an Plain involves reserving the first id/index
|
||||
// `0`, which is the encoded representation of the NULL value.
|
||||
impl Default for Plain {
|
||||
fn default() -> Self {
|
||||
// for this to make sense NULL_ID must be `0`.
|
||||
assert_eq!(NULL_ID, 0);
|
||||
Self {
|
||||
entries: vec![None],
|
||||
encoded_data: vec![],
|
||||
|
@ -118,8 +120,8 @@ impl Plain {
|
|||
}
|
||||
}
|
||||
|
||||
// Preferred to add values to the column. `id` is the encoded
|
||||
// representation of a logical string value.
|
||||
// Preferred method to add values to the column. `id` is the encoded
|
||||
// representation of a logical value.
|
||||
fn push_encoded_values(&mut self, id: u32, additional: u32) {
|
||||
self.encoded_data
|
||||
.extend(std::iter::repeat(id).take(additional as usize));
|
||||
|
@ -142,11 +144,6 @@ impl Plain {
|
|||
}
|
||||
}
|
||||
|
||||
// correct way to determine next encoded id for a new value.
|
||||
fn next_encoded_id(&self) -> u32 {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// The number of logical rows encoded in this column.
|
||||
pub fn num_rows(&self) -> u32 {
|
||||
self.encoded_data.len() as u32
|
||||
|
@ -449,7 +446,8 @@ impl Plain {
|
|||
dst.clear();
|
||||
dst.reserve(row_ids.len());
|
||||
|
||||
// TODO - not sure at all about this deref...
|
||||
// The `as_deref` is needed to convert an `&Option<String>` into an
|
||||
// `Option<&str>`.
|
||||
for chunks in row_ids.chunks_exact(4) {
|
||||
dst.push(self.entries[self.encoded_data[chunks[0] as usize] as usize].as_deref());
|
||||
dst.push(self.entries[self.encoded_data[chunks[1] as usize] as usize].as_deref());
|
||||
|
|
Loading…
Reference in New Issue