feat: implement range based predicate on column
parent
952959cd5f
commit
c5dc48db04
|
@ -184,32 +184,12 @@ impl Column {
|
||||||
///
|
///
|
||||||
/// TODO(edd): row ids pooling.
|
/// TODO(edd): row ids pooling.
|
||||||
pub fn row_ids_filter(&self, op: cmp::Operator, value: Value<'_>) -> RowIDsOption {
|
pub fn row_ids_filter(&self, op: cmp::Operator, value: Value<'_>) -> RowIDsOption {
|
||||||
match op {
|
// If we can get an answer using only the meta-data on the column then
|
||||||
// When the predicate is == and the metadata range indicates the column
|
// return that answer.
|
||||||
// can't contain `value` then the column doesn't need to be read.
|
match self.evaluate_predicate_on_meta(&op, &value) {
|
||||||
cmp::Operator::Equal => {
|
PredicateMatch::None => return RowIDsOption::None,
|
||||||
if !self.might_contain_value(&value) {
|
PredicateMatch::All => return RowIDsOption::All,
|
||||||
return RowIDsOption::None; // no rows are going to match.
|
PredicateMatch::SomeMaybe => {} // have to apply predicate to column
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// When the predicate is one of {<, <=, >, >=} and the column doesn't
|
|
||||||
// contain any null values, and the entire range of values satisfies the
|
|
||||||
// predicate then the column doesn't need to be read.
|
|
||||||
cmp::Operator::GT | cmp::Operator::GTE | cmp::Operator::LT | cmp::Operator::LTE => {
|
|
||||||
if self.predicate_matches_all_values(&op, &value) {
|
|
||||||
return RowIDsOption::All;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// When the predicate is != and the metadata range indicates that the
|
|
||||||
// column can't possibly contain `value` then the predicate must
|
|
||||||
// match all rows on the column.
|
|
||||||
cmp::Operator::NotEqual => {
|
|
||||||
if !self.might_contain_value(&value) {
|
|
||||||
return RowIDsOption::All; // all rows are going to match.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(edd): figure out pooling of these
|
// TODO(edd): figure out pooling of these
|
||||||
|
@ -240,7 +220,95 @@ impl Column {
|
||||||
low: (cmp::Operator, Value<'_>),
|
low: (cmp::Operator, Value<'_>),
|
||||||
high: (cmp::Operator, Value<'_>),
|
high: (cmp::Operator, Value<'_>),
|
||||||
) -> RowIDsOption {
|
) -> RowIDsOption {
|
||||||
todo!()
|
let l = self.evaluate_predicate_on_meta(&low.0, &low.1);
|
||||||
|
let h = self.evaluate_predicate_on_meta(&high.0, &high.1);
|
||||||
|
match (l, h) {
|
||||||
|
(PredicateMatch::All, PredicateMatch::All) => return RowIDsOption::All,
|
||||||
|
|
||||||
|
// One of the predicates can't be satisfied, therefore no rows will
|
||||||
|
// match both predicates.
|
||||||
|
(PredicateMatch::None, _) | (_, PredicateMatch::None) => return RowIDsOption::None,
|
||||||
|
|
||||||
|
// One of the predicates matches all rows so reduce the operation
|
||||||
|
// to the other side.
|
||||||
|
(PredicateMatch::SomeMaybe, PredicateMatch::All) => {
|
||||||
|
return self.row_ids_filter(low.0, low.1);
|
||||||
|
}
|
||||||
|
(PredicateMatch::All, PredicateMatch::SomeMaybe) => {
|
||||||
|
return self.row_ids_filter(high.0, high.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Have to apply the predicates to the column to identify correct
|
||||||
|
// set of rows.
|
||||||
|
(PredicateMatch::SomeMaybe, PredicateMatch::SomeMaybe) => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(edd): figure out pooling of these
|
||||||
|
let dst = RowIDs::Bitmap(Bitmap::create());
|
||||||
|
|
||||||
|
// Check the column for all rows that satisfy the predicate.
|
||||||
|
let row_ids = match &self {
|
||||||
|
Column::String(_, data) => unimplemented!("not supported on string columns yet"),
|
||||||
|
Column::Float(_, data) => {
|
||||||
|
data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst)
|
||||||
|
}
|
||||||
|
Column::Integer(_, data) => {
|
||||||
|
data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst)
|
||||||
|
}
|
||||||
|
Column::Unsigned(_, data) => {
|
||||||
|
data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst)
|
||||||
|
}
|
||||||
|
Column::Bool => todo!(),
|
||||||
|
Column::ByteArray(_, data) => todo!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if row_ids.is_empty() {
|
||||||
|
return RowIDsOption::None;
|
||||||
|
}
|
||||||
|
RowIDsOption::Some(row_ids)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to determine if the predicate matches either no rows or
|
||||||
|
// all the rows in a column. This is determined by looking at the metadata
|
||||||
|
// on the column.
|
||||||
|
//
|
||||||
|
// `None` indicates that the column may contain some matching rows and the
|
||||||
|
// predicate should be directly applied to the column.
|
||||||
|
fn evaluate_predicate_on_meta(&self, op: &cmp::Operator, value: &Value<'_>) -> PredicateMatch {
|
||||||
|
match op {
|
||||||
|
// When the predicate is == and the metadata range indicates the column
|
||||||
|
// can't contain `value` then the column doesn't need to be read.
|
||||||
|
cmp::Operator::Equal => {
|
||||||
|
if !self.might_contain_value(&value) {
|
||||||
|
return PredicateMatch::None; // no rows are going to match.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// When the predicate is one of {<, <=, >, >=} and the column doesn't
|
||||||
|
// contain any null values, and the entire range of values satisfies the
|
||||||
|
// predicate then the column doesn't need to be read.
|
||||||
|
cmp::Operator::GT | cmp::Operator::GTE | cmp::Operator::LT | cmp::Operator::LTE => {
|
||||||
|
if self.predicate_matches_all_values(&op, &value) {
|
||||||
|
return PredicateMatch::All;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// When the predicate is != and the metadata range indicates that the
|
||||||
|
// column can't possibly contain `value` then the predicate must
|
||||||
|
// match all rows on the column.
|
||||||
|
cmp::Operator::NotEqual => {
|
||||||
|
if !self.might_contain_value(&value) {
|
||||||
|
return PredicateMatch::All; // all rows are going to match.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.predicate_matches_no_values(&op, &value) {
|
||||||
|
return PredicateMatch::None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The predicate could match some values
|
||||||
|
PredicateMatch::SomeMaybe
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper method to determine if the column possibly contains this value
|
// Helper method to determine if the column possibly contains this value
|
||||||
|
@ -339,6 +407,32 @@ impl Column {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper method to determine if the predicate can not possibly match any
|
||||||
|
// values in the column.
|
||||||
|
fn predicate_matches_no_values(&self, op: &cmp::Operator, value: &Value<'_>) -> bool {
|
||||||
|
match &self {
|
||||||
|
Column::String(meta, data) => {
|
||||||
|
if let Value::String(other) = value {
|
||||||
|
meta.match_no_values(op, other)
|
||||||
|
} else {
|
||||||
|
unreachable!("impossible value comparison");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// breaking this down:
|
||||||
|
// * Extract a Scalar variant from `value`, which should panic if
|
||||||
|
// that's not possible;
|
||||||
|
// * Convert that scalar to a primitive value based
|
||||||
|
// on the logical type used for the metadata on the column.
|
||||||
|
// * See if one can prove none of the column can match the predicate.
|
||||||
|
//
|
||||||
|
Column::Float(meta, data) => meta.match_no_values(op, &value.scalar().as_f64()),
|
||||||
|
Column::Integer(meta, data) => meta.match_no_values(op, &value.scalar().as_i64()),
|
||||||
|
Column::Unsigned(meta, data) => meta.match_no_values(op, &value.scalar().as_u64()),
|
||||||
|
Column::Bool => todo!(),
|
||||||
|
Column::ByteArray(meta, _) => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Methods for selecting
|
// Methods for selecting
|
||||||
//
|
//
|
||||||
|
@ -411,7 +505,6 @@ impl<T: PartialOrd + std::fmt::Debug> MetaData<T> {
|
||||||
// column. It is up to the caller to determine if the column contains null
|
// column. It is up to the caller to determine if the column contains null
|
||||||
// values, which would invalidate a truthful result.
|
// values, which would invalidate a truthful result.
|
||||||
fn might_match_all_values(&self, op: &cmp::Operator, v: &T) -> bool {
|
fn might_match_all_values(&self, op: &cmp::Operator, v: &T) -> bool {
|
||||||
println!("comparing {:?} {:?}, {:?}", op, v, self.range);
|
|
||||||
match &self.range {
|
match &self.range {
|
||||||
Some(range) => match op {
|
Some(range) => match op {
|
||||||
// all values in column equal to v
|
// all values in column equal to v
|
||||||
|
@ -430,6 +523,28 @@ impl<T: PartialOrd + std::fmt::Debug> MetaData<T> {
|
||||||
None => false, // only null values in column.
|
None => false, // only null values in column.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determines if it can be shown that the predicate would not match any rows
|
||||||
|
// in the column.
|
||||||
|
fn match_no_values(&self, op: &cmp::Operator, v: &T) -> bool {
|
||||||
|
match &self.range {
|
||||||
|
Some(range) => match op {
|
||||||
|
// no values are `v` so no rows will match `== v`
|
||||||
|
cmp::Operator::Equal => range.0 == range.1 && &range.1 != v,
|
||||||
|
// all values are `v` so no rows will match `!= v`
|
||||||
|
cmp::Operator::NotEqual => range.0 == range.1 && &range.1 == v,
|
||||||
|
// max value in column is `<= v` so no values can be `> v`
|
||||||
|
cmp::Operator::GT => &range.1 <= v,
|
||||||
|
// max value in column is `< v` so no values can be `>= v`
|
||||||
|
cmp::Operator::GTE => &range.1 < v,
|
||||||
|
// min value in column is `>= v` so no values can be `< v`
|
||||||
|
cmp::Operator::LT => &range.0 >= v,
|
||||||
|
// min value in column is `> v` so no values can be `<= v`
|
||||||
|
cmp::Operator::LTE => &range.0 > v,
|
||||||
|
},
|
||||||
|
None => true, // only null values in column so no values satisfy `v`
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pub enum StringEncoding {
|
pub enum StringEncoding {
|
||||||
RLE(dictionary::RLE),
|
RLE(dictionary::RLE),
|
||||||
|
@ -828,13 +943,13 @@ impl IntegerEncoding {
|
||||||
// non-null signed 64-bit integers.
|
// non-null signed 64-bit integers.
|
||||||
match dst {
|
match dst {
|
||||||
EncodedValues::I64(dst) => match &self {
|
EncodedValues::I64(dst) => match &self {
|
||||||
IntegerEncoding::I64I64(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
Self::I64I64(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||||
IntegerEncoding::I64I32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
Self::I64I32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||||
IntegerEncoding::I64U32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
Self::I64U32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||||
IntegerEncoding::I64I16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
Self::I64I16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||||
IntegerEncoding::I64U16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
Self::I64U16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||||
IntegerEncoding::I64I8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
Self::I64I8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||||
IntegerEncoding::I64U8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
Self::I64U8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||||
_ => unreachable!("encoded values on encoding type not supported"),
|
_ => unreachable!("encoded values on encoding type not supported"),
|
||||||
},
|
},
|
||||||
_ => unreachable!("currently only support encoded values as i64"),
|
_ => unreachable!("currently only support encoded values as i64"),
|
||||||
|
@ -849,13 +964,13 @@ impl IntegerEncoding {
|
||||||
// non-null signed 64-bit integers.
|
// non-null signed 64-bit integers.
|
||||||
match dst {
|
match dst {
|
||||||
EncodedValues::I64(dst) => match &self {
|
EncodedValues::I64(dst) => match &self {
|
||||||
IntegerEncoding::I64I64(data) => EncodedValues::I64(data.all_values(dst)),
|
Self::I64I64(data) => EncodedValues::I64(data.all_values(dst)),
|
||||||
IntegerEncoding::I64I32(data) => EncodedValues::I64(data.all_values(dst)),
|
Self::I64I32(data) => EncodedValues::I64(data.all_values(dst)),
|
||||||
IntegerEncoding::I64U32(data) => EncodedValues::I64(data.all_values(dst)),
|
Self::I64U32(data) => EncodedValues::I64(data.all_values(dst)),
|
||||||
IntegerEncoding::I64I16(data) => EncodedValues::I64(data.all_values(dst)),
|
Self::I64I16(data) => EncodedValues::I64(data.all_values(dst)),
|
||||||
IntegerEncoding::I64U16(data) => EncodedValues::I64(data.all_values(dst)),
|
Self::I64U16(data) => EncodedValues::I64(data.all_values(dst)),
|
||||||
IntegerEncoding::I64I8(data) => EncodedValues::I64(data.all_values(dst)),
|
Self::I64I8(data) => EncodedValues::I64(data.all_values(dst)),
|
||||||
IntegerEncoding::I64U8(data) => EncodedValues::I64(data.all_values(dst)),
|
Self::I64U8(data) => EncodedValues::I64(data.all_values(dst)),
|
||||||
_ => unreachable!("encoded values on encoding type not supported"),
|
_ => unreachable!("encoded values on encoding type not supported"),
|
||||||
},
|
},
|
||||||
_ => unreachable!("currently only support encoded values as i64"),
|
_ => unreachable!("currently only support encoded values as i64"),
|
||||||
|
@ -869,33 +984,127 @@ impl IntegerEncoding {
|
||||||
/// `row_ids_filter` will panic if this invariant is broken.
|
/// `row_ids_filter` will panic if this invariant is broken.
|
||||||
pub fn row_ids_filter(&self, op: cmp::Operator, value: &Scalar, dst: RowIDs) -> RowIDs {
|
pub fn row_ids_filter(&self, op: cmp::Operator, value: &Scalar, dst: RowIDs) -> RowIDs {
|
||||||
match &self {
|
match &self {
|
||||||
IntegerEncoding::I64I64(c) => c.row_ids_filter(value.as_i64(), op, dst),
|
Self::I64I64(c) => c.row_ids_filter(value.as_i64(), op, dst),
|
||||||
IntegerEncoding::I64I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
Self::I64I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
||||||
IntegerEncoding::I64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
Self::I64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||||
IntegerEncoding::I64I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
Self::I64I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||||
IntegerEncoding::I64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
Self::I64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||||
IntegerEncoding::I64I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
Self::I64I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||||
IntegerEncoding::I64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
Self::I64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||||
IntegerEncoding::I32I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
Self::I32I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
||||||
IntegerEncoding::I32I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
Self::I32I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||||
IntegerEncoding::I32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
Self::I32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||||
IntegerEncoding::I32I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
Self::I32I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||||
IntegerEncoding::I32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
Self::I32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||||
IntegerEncoding::I16I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
Self::I16I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||||
IntegerEncoding::I16I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
Self::I16I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||||
IntegerEncoding::I16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
Self::I16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||||
IntegerEncoding::I8I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
Self::I8I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||||
IntegerEncoding::U64U64(c) => c.row_ids_filter(value.as_u64(), op, dst),
|
Self::U64U64(c) => c.row_ids_filter(value.as_u64(), op, dst),
|
||||||
IntegerEncoding::U64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
Self::U64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||||
IntegerEncoding::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
Self::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||||
IntegerEncoding::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
Self::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||||
IntegerEncoding::U32U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
Self::U32U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||||
IntegerEncoding::U32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
Self::U32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||||
IntegerEncoding::U32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
Self::U32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||||
IntegerEncoding::U16U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
Self::U16U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||||
IntegerEncoding::U16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
Self::U16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||||
IntegerEncoding::U8U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
Self::U8U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||||
IntegerEncoding::I64I64N(c) => todo!(),
|
Self::I64I64N(c) => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the row ids that satisfy both the provided predicates.
|
||||||
|
///
|
||||||
|
/// Note: it is the caller's responsibility to ensure that the provided
|
||||||
|
/// `Scalar` value will fit within the physical type of the encoded column.
|
||||||
|
/// `row_ids_filter` will panic if this invariant is broken.
|
||||||
|
pub fn row_ids_filter_range(
|
||||||
|
&self,
|
||||||
|
low: (cmp::Operator, &Scalar),
|
||||||
|
high: (cmp::Operator, &Scalar),
|
||||||
|
dst: RowIDs,
|
||||||
|
) -> RowIDs {
|
||||||
|
match &self {
|
||||||
|
Self::I64I64(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i64(), low.0), (high.1.as_i64(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I64I32(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I64U32(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I64I16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I64U16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I64I8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I64U8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I32I32(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I32I16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I32U16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I32I8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I32U8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I16I16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I16I8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I16U8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I8I8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U64U64(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u64(), low.0), (high.1.as_u64(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U64U32(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U64U16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U64U8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U32U32(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U32U16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U32U8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U16U16(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U16U8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::U8U8(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||||
|
}
|
||||||
|
Self::I64I64N(c) => todo!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -945,6 +1154,27 @@ impl FloatEncoding {
|
||||||
FloatEncoding::Fixed32(c) => c.row_ids_filter(value.as_f32(), op, dst),
|
FloatEncoding::Fixed32(c) => c.row_ids_filter(value.as_f32(), op, dst),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the row ids that satisfy both the provided predicates.
|
||||||
|
///
|
||||||
|
/// Note: it is the caller's responsibility to ensure that the provided
|
||||||
|
/// `Scalar` value will fit within the physical type of the encoded column.
|
||||||
|
/// `row_ids_filter` will panic if this invariant is broken.
|
||||||
|
pub fn row_ids_filter_range(
|
||||||
|
&self,
|
||||||
|
low: (cmp::Operator, &Scalar),
|
||||||
|
high: (cmp::Operator, &Scalar),
|
||||||
|
dst: RowIDs,
|
||||||
|
) -> RowIDs {
|
||||||
|
match &self {
|
||||||
|
FloatEncoding::Fixed64(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_f64(), low.0), (high.1.as_f64(), high.0), dst)
|
||||||
|
}
|
||||||
|
FloatEncoding::Fixed32(c) => {
|
||||||
|
c.row_ids_filter_range((low.1.as_f32(), low.0), (high.1.as_f32(), high.0), dst)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Converts an Arrow `StringArray` into a column, currently using the RLE
|
// Converts an Arrow `StringArray` into a column, currently using the RLE
|
||||||
|
@ -1714,9 +1944,16 @@ impl EncodedValues {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
enum PredicateMatch {
|
||||||
|
None,
|
||||||
|
SomeMaybe,
|
||||||
|
All,
|
||||||
|
}
|
||||||
|
|
||||||
/// A specific type of Option for `RowIDs` where the notion of all rows ids is
|
/// A specific type of Option for `RowIDs` where the notion of all rows ids is
|
||||||
/// represented.
|
/// represented.
|
||||||
#[derive(Debug)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum RowIDsOption {
|
pub enum RowIDsOption {
|
||||||
None,
|
None,
|
||||||
Some(RowIDs),
|
Some(RowIDs),
|
||||||
|
@ -2392,6 +2629,18 @@ mod test {
|
||||||
Value::String(&"Adam Raised a Cain".to_string()),
|
Value::String(&"Adam Raised a Cain".to_string()),
|
||||||
);
|
);
|
||||||
assert!(matches!(row_ids, RowIDsOption::All));
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter(
|
||||||
|
cmp::Operator::GT,
|
||||||
|
Value::String(&"Adam Raised a Cain".to_string()),
|
||||||
|
);
|
||||||
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter(
|
||||||
|
cmp::Operator::NotEqual,
|
||||||
|
Value::String(&"Thunder Road".to_string()),
|
||||||
|
);
|
||||||
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2413,6 +2662,9 @@ mod test {
|
||||||
|
|
||||||
row_ids = col.row_ids_filter(cmp::Operator::NotEqual, Value::Scalar(Scalar::I32(-1257)));
|
row_ids = col.row_ids_filter(cmp::Operator::NotEqual, Value::Scalar(Scalar::I32(-1257)));
|
||||||
assert!(matches!(row_ids, RowIDsOption::All));
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter(cmp::Operator::LT, Value::Scalar(Scalar::I64(i64::MAX)));
|
||||||
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2461,6 +2713,63 @@ mod test {
|
||||||
assert!(matches!(row_ids, RowIDsOption::All));
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn row_ids_range() {
|
||||||
|
let input = &[100, 200, 300, 2, 200, 22, 30];
|
||||||
|
|
||||||
|
let col = Column::from(&input[..]);
|
||||||
|
let mut row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GT, Value::Scalar(Scalar::I32(100))),
|
||||||
|
(cmp::Operator::LT, Value::Scalar(Scalar::I32(300))),
|
||||||
|
);
|
||||||
|
assert_eq!(row_ids.unwrap().to_vec(), vec![1, 4]);
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(200))),
|
||||||
|
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(300))),
|
||||||
|
);
|
||||||
|
assert_eq!(row_ids.unwrap().to_vec(), vec![1, 2, 4]);
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(23333))),
|
||||||
|
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(999999))),
|
||||||
|
);
|
||||||
|
assert!(matches!(row_ids, RowIDsOption::None));
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GT, Value::Scalar(Scalar::I32(-100))),
|
||||||
|
(cmp::Operator::LT, Value::Scalar(Scalar::I32(301))),
|
||||||
|
);
|
||||||
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(2))),
|
||||||
|
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(300))),
|
||||||
|
);
|
||||||
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(87))),
|
||||||
|
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(999999))),
|
||||||
|
);
|
||||||
|
assert_eq!(row_ids.unwrap().to_vec(), vec![0, 1, 2, 4]);
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(0))),
|
||||||
|
(
|
||||||
|
cmp::Operator::NotEqual,
|
||||||
|
Value::Scalar(Scalar::I64(i64::MAX)),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
assert!(matches!(row_ids, RowIDsOption::All));
|
||||||
|
|
||||||
|
row_ids = col.row_ids_filter_range(
|
||||||
|
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(0))),
|
||||||
|
(cmp::Operator::NotEqual, Value::Scalar(Scalar::I64(99))),
|
||||||
|
);
|
||||||
|
assert_eq!(row_ids.unwrap().to_vec(), vec![0, 1, 2, 3, 4, 5, 6]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn might_contain_value() {
|
fn might_contain_value() {
|
||||||
let input = &[100i64, 200, 300, 2, 200, 22, 30];
|
let input = &[100i64, 200, 300, 2, 200, 22, 30];
|
||||||
|
@ -2566,4 +2875,67 @@ mod test {
|
||||||
false
|
false
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn evaluate_predicate_on_meta() {
|
||||||
|
let input = &[100i64, 200, 300, 2, 200, 22, 30];
|
||||||
|
let col = Column::from(&input[..]);
|
||||||
|
|
||||||
|
let cases: Vec<(cmp::Operator, Scalar, PredicateMatch)> = vec![
|
||||||
|
(
|
||||||
|
cmp::Operator::GT,
|
||||||
|
Scalar::U64(100),
|
||||||
|
PredicateMatch::SomeMaybe,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
cmp::Operator::GT,
|
||||||
|
Scalar::I64(100),
|
||||||
|
PredicateMatch::SomeMaybe,
|
||||||
|
),
|
||||||
|
(cmp::Operator::GT, Scalar::I8(-99), PredicateMatch::All),
|
||||||
|
(
|
||||||
|
cmp::Operator::GT,
|
||||||
|
Scalar::I64(100),
|
||||||
|
PredicateMatch::SomeMaybe,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
cmp::Operator::LT,
|
||||||
|
Scalar::I64(300),
|
||||||
|
PredicateMatch::SomeMaybe,
|
||||||
|
),
|
||||||
|
(cmp::Operator::LTE, Scalar::I32(300), PredicateMatch::All),
|
||||||
|
(
|
||||||
|
cmp::Operator::Equal,
|
||||||
|
Scalar::I32(2),
|
||||||
|
PredicateMatch::SomeMaybe,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
cmp::Operator::NotEqual,
|
||||||
|
Scalar::I32(2),
|
||||||
|
PredicateMatch::SomeMaybe,
|
||||||
|
),
|
||||||
|
(cmp::Operator::NotEqual, Scalar::I64(1), PredicateMatch::All),
|
||||||
|
(
|
||||||
|
cmp::Operator::NotEqual,
|
||||||
|
Scalar::I64(301),
|
||||||
|
PredicateMatch::All,
|
||||||
|
),
|
||||||
|
(cmp::Operator::GT, Scalar::I64(100000), PredicateMatch::None),
|
||||||
|
(cmp::Operator::GTE, Scalar::I64(301), PredicateMatch::None),
|
||||||
|
(cmp::Operator::LT, Scalar::I64(2), PredicateMatch::None),
|
||||||
|
(cmp::Operator::LTE, Scalar::I8(-100), PredicateMatch::None),
|
||||||
|
(
|
||||||
|
cmp::Operator::Equal,
|
||||||
|
Scalar::I64(100000),
|
||||||
|
PredicateMatch::None,
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (op, scalar, result) in cases {
|
||||||
|
assert_eq!(
|
||||||
|
col.evaluate_predicate_on_meta(&op, &Value::Scalar(scalar)),
|
||||||
|
result
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue