feat: implement range based predicate on column
parent
952959cd5f
commit
c5dc48db04
|
@ -184,32 +184,12 @@ impl Column {
|
|||
///
|
||||
/// TODO(edd): row ids pooling.
|
||||
pub fn row_ids_filter(&self, op: cmp::Operator, value: Value<'_>) -> RowIDsOption {
|
||||
match op {
|
||||
// When the predicate is == and the metadata range indicates the column
|
||||
// can't contain `value` then the column doesn't need to be read.
|
||||
cmp::Operator::Equal => {
|
||||
if !self.might_contain_value(&value) {
|
||||
return RowIDsOption::None; // no rows are going to match.
|
||||
}
|
||||
}
|
||||
|
||||
// When the predicate is one of {<, <=, >, >=} and the column doesn't
|
||||
// contain any null values, and the entire range of values satisfies the
|
||||
// predicate then the column doesn't need to be read.
|
||||
cmp::Operator::GT | cmp::Operator::GTE | cmp::Operator::LT | cmp::Operator::LTE => {
|
||||
if self.predicate_matches_all_values(&op, &value) {
|
||||
return RowIDsOption::All;
|
||||
}
|
||||
}
|
||||
|
||||
// When the predicate is != and the metadata range indicates that the
|
||||
// column can't possibly contain `value` then the predicate must
|
||||
// match all rows on the column.
|
||||
cmp::Operator::NotEqual => {
|
||||
if !self.might_contain_value(&value) {
|
||||
return RowIDsOption::All; // all rows are going to match.
|
||||
}
|
||||
}
|
||||
// If we can get an answer using only the meta-data on the column then
|
||||
// return that answer.
|
||||
match self.evaluate_predicate_on_meta(&op, &value) {
|
||||
PredicateMatch::None => return RowIDsOption::None,
|
||||
PredicateMatch::All => return RowIDsOption::All,
|
||||
PredicateMatch::SomeMaybe => {} // have to apply predicate to column
|
||||
}
|
||||
|
||||
// TODO(edd): figure out pooling of these
|
||||
|
@ -240,7 +220,95 @@ impl Column {
|
|||
low: (cmp::Operator, Value<'_>),
|
||||
high: (cmp::Operator, Value<'_>),
|
||||
) -> RowIDsOption {
|
||||
todo!()
|
||||
let l = self.evaluate_predicate_on_meta(&low.0, &low.1);
|
||||
let h = self.evaluate_predicate_on_meta(&high.0, &high.1);
|
||||
match (l, h) {
|
||||
(PredicateMatch::All, PredicateMatch::All) => return RowIDsOption::All,
|
||||
|
||||
// One of the predicates can't be satisfied, therefore no rows will
|
||||
// match both predicates.
|
||||
(PredicateMatch::None, _) | (_, PredicateMatch::None) => return RowIDsOption::None,
|
||||
|
||||
// One of the predicates matches all rows so reduce the operation
|
||||
// to the other side.
|
||||
(PredicateMatch::SomeMaybe, PredicateMatch::All) => {
|
||||
return self.row_ids_filter(low.0, low.1);
|
||||
}
|
||||
(PredicateMatch::All, PredicateMatch::SomeMaybe) => {
|
||||
return self.row_ids_filter(high.0, high.1);
|
||||
}
|
||||
|
||||
// Have to apply the predicates to the column to identify correct
|
||||
// set of rows.
|
||||
(PredicateMatch::SomeMaybe, PredicateMatch::SomeMaybe) => {}
|
||||
}
|
||||
|
||||
// TODO(edd): figure out pooling of these
|
||||
let dst = RowIDs::Bitmap(Bitmap::create());
|
||||
|
||||
// Check the column for all rows that satisfy the predicate.
|
||||
let row_ids = match &self {
|
||||
Column::String(_, data) => unimplemented!("not supported on string columns yet"),
|
||||
Column::Float(_, data) => {
|
||||
data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst)
|
||||
}
|
||||
Column::Integer(_, data) => {
|
||||
data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst)
|
||||
}
|
||||
Column::Unsigned(_, data) => {
|
||||
data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst)
|
||||
}
|
||||
Column::Bool => todo!(),
|
||||
Column::ByteArray(_, data) => todo!(),
|
||||
};
|
||||
|
||||
if row_ids.is_empty() {
|
||||
return RowIDsOption::None;
|
||||
}
|
||||
RowIDsOption::Some(row_ids)
|
||||
}
|
||||
|
||||
// Helper function to determine if the predicate matches either no rows or
|
||||
// all the rows in a column. This is determined by looking at the metadata
|
||||
// on the column.
|
||||
//
|
||||
// `None` indicates that the column may contain some matching rows and the
|
||||
// predicate should be directly applied to the column.
|
||||
fn evaluate_predicate_on_meta(&self, op: &cmp::Operator, value: &Value<'_>) -> PredicateMatch {
|
||||
match op {
|
||||
// When the predicate is == and the metadata range indicates the column
|
||||
// can't contain `value` then the column doesn't need to be read.
|
||||
cmp::Operator::Equal => {
|
||||
if !self.might_contain_value(&value) {
|
||||
return PredicateMatch::None; // no rows are going to match.
|
||||
}
|
||||
}
|
||||
|
||||
// When the predicate is one of {<, <=, >, >=} and the column doesn't
|
||||
// contain any null values, and the entire range of values satisfies the
|
||||
// predicate then the column doesn't need to be read.
|
||||
cmp::Operator::GT | cmp::Operator::GTE | cmp::Operator::LT | cmp::Operator::LTE => {
|
||||
if self.predicate_matches_all_values(&op, &value) {
|
||||
return PredicateMatch::All;
|
||||
}
|
||||
}
|
||||
|
||||
// When the predicate is != and the metadata range indicates that the
|
||||
// column can't possibly contain `value` then the predicate must
|
||||
// match all rows on the column.
|
||||
cmp::Operator::NotEqual => {
|
||||
if !self.might_contain_value(&value) {
|
||||
return PredicateMatch::All; // all rows are going to match.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.predicate_matches_no_values(&op, &value) {
|
||||
return PredicateMatch::None;
|
||||
}
|
||||
|
||||
// The predicate could match some values
|
||||
PredicateMatch::SomeMaybe
|
||||
}
|
||||
|
||||
// Helper method to determine if the column possibly contains this value
|
||||
|
@ -339,6 +407,32 @@ impl Column {
|
|||
}
|
||||
}
|
||||
|
||||
// Helper method to determine if the predicate can not possibly match any
|
||||
// values in the column.
|
||||
fn predicate_matches_no_values(&self, op: &cmp::Operator, value: &Value<'_>) -> bool {
|
||||
match &self {
|
||||
Column::String(meta, data) => {
|
||||
if let Value::String(other) = value {
|
||||
meta.match_no_values(op, other)
|
||||
} else {
|
||||
unreachable!("impossible value comparison");
|
||||
}
|
||||
}
|
||||
// breaking this down:
|
||||
// * Extract a Scalar variant from `value`, which should panic if
|
||||
// that's not possible;
|
||||
// * Convert that scalar to a primitive value based
|
||||
// on the logical type used for the metadata on the column.
|
||||
// * See if one can prove none of the column can match the predicate.
|
||||
//
|
||||
Column::Float(meta, data) => meta.match_no_values(op, &value.scalar().as_f64()),
|
||||
Column::Integer(meta, data) => meta.match_no_values(op, &value.scalar().as_i64()),
|
||||
Column::Unsigned(meta, data) => meta.match_no_values(op, &value.scalar().as_u64()),
|
||||
Column::Bool => todo!(),
|
||||
Column::ByteArray(meta, _) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Methods for selecting
|
||||
//
|
||||
|
@ -411,7 +505,6 @@ impl<T: PartialOrd + std::fmt::Debug> MetaData<T> {
|
|||
// column. It is up to the caller to determine if the column contains null
|
||||
// values, which would invalidate a truthful result.
|
||||
fn might_match_all_values(&self, op: &cmp::Operator, v: &T) -> bool {
|
||||
println!("comparing {:?} {:?}, {:?}", op, v, self.range);
|
||||
match &self.range {
|
||||
Some(range) => match op {
|
||||
// all values in column equal to v
|
||||
|
@ -430,6 +523,28 @@ impl<T: PartialOrd + std::fmt::Debug> MetaData<T> {
|
|||
None => false, // only null values in column.
|
||||
}
|
||||
}
|
||||
|
||||
// Determines if it can be shown that the predicate would not match any rows
|
||||
// in the column.
|
||||
fn match_no_values(&self, op: &cmp::Operator, v: &T) -> bool {
|
||||
match &self.range {
|
||||
Some(range) => match op {
|
||||
// no values are `v` so no rows will match `== v`
|
||||
cmp::Operator::Equal => range.0 == range.1 && &range.1 != v,
|
||||
// all values are `v` so no rows will match `!= v`
|
||||
cmp::Operator::NotEqual => range.0 == range.1 && &range.1 == v,
|
||||
// max value in column is `<= v` so no values can be `> v`
|
||||
cmp::Operator::GT => &range.1 <= v,
|
||||
// max value in column is `< v` so no values can be `>= v`
|
||||
cmp::Operator::GTE => &range.1 < v,
|
||||
// min value in column is `>= v` so no values can be `< v`
|
||||
cmp::Operator::LT => &range.0 >= v,
|
||||
// min value in column is `> v` so no values can be `<= v`
|
||||
cmp::Operator::LTE => &range.0 > v,
|
||||
},
|
||||
None => true, // only null values in column so no values satisfy `v`
|
||||
}
|
||||
}
|
||||
}
|
||||
pub enum StringEncoding {
|
||||
RLE(dictionary::RLE),
|
||||
|
@ -828,13 +943,13 @@ impl IntegerEncoding {
|
|||
// non-null signed 64-bit integers.
|
||||
match dst {
|
||||
EncodedValues::I64(dst) => match &self {
|
||||
IntegerEncoding::I64I64(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
IntegerEncoding::I64I32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
IntegerEncoding::I64U32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
IntegerEncoding::I64I16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
IntegerEncoding::I64U16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
IntegerEncoding::I64I8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
IntegerEncoding::I64U8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
Self::I64I64(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
Self::I64I32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
Self::I64U32(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
Self::I64I16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
Self::I64U16(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
Self::I64I8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
Self::I64U8(data) => EncodedValues::I64(data.values(row_ids, dst)),
|
||||
_ => unreachable!("encoded values on encoding type not supported"),
|
||||
},
|
||||
_ => unreachable!("currently only support encoded values as i64"),
|
||||
|
@ -849,13 +964,13 @@ impl IntegerEncoding {
|
|||
// non-null signed 64-bit integers.
|
||||
match dst {
|
||||
EncodedValues::I64(dst) => match &self {
|
||||
IntegerEncoding::I64I64(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
IntegerEncoding::I64I32(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
IntegerEncoding::I64U32(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
IntegerEncoding::I64I16(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
IntegerEncoding::I64U16(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
IntegerEncoding::I64I8(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
IntegerEncoding::I64U8(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
Self::I64I64(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
Self::I64I32(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
Self::I64U32(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
Self::I64I16(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
Self::I64U16(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
Self::I64I8(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
Self::I64U8(data) => EncodedValues::I64(data.all_values(dst)),
|
||||
_ => unreachable!("encoded values on encoding type not supported"),
|
||||
},
|
||||
_ => unreachable!("currently only support encoded values as i64"),
|
||||
|
@ -869,33 +984,127 @@ impl IntegerEncoding {
|
|||
/// `row_ids_filter` will panic if this invariant is broken.
|
||||
pub fn row_ids_filter(&self, op: cmp::Operator, value: &Scalar, dst: RowIDs) -> RowIDs {
|
||||
match &self {
|
||||
IntegerEncoding::I64I64(c) => c.row_ids_filter(value.as_i64(), op, dst),
|
||||
IntegerEncoding::I64I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
||||
IntegerEncoding::I64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||
IntegerEncoding::I64I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||
IntegerEncoding::I64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
IntegerEncoding::I64I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
IntegerEncoding::I64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
IntegerEncoding::I32I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
||||
IntegerEncoding::I32I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||
IntegerEncoding::I32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
IntegerEncoding::I32I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
IntegerEncoding::I32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
IntegerEncoding::I16I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||
IntegerEncoding::I16I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
IntegerEncoding::I16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
IntegerEncoding::I8I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
IntegerEncoding::U64U64(c) => c.row_ids_filter(value.as_u64(), op, dst),
|
||||
IntegerEncoding::U64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||
IntegerEncoding::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
IntegerEncoding::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
IntegerEncoding::U32U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||
IntegerEncoding::U32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
IntegerEncoding::U32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
IntegerEncoding::U16U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
IntegerEncoding::U16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
IntegerEncoding::U8U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
IntegerEncoding::I64I64N(c) => todo!(),
|
||||
Self::I64I64(c) => c.row_ids_filter(value.as_i64(), op, dst),
|
||||
Self::I64I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
||||
Self::I64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||
Self::I64I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||
Self::I64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::I64I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
Self::I64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
Self::I32I32(c) => c.row_ids_filter(value.as_i32(), op, dst),
|
||||
Self::I32I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||
Self::I32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::I32I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
Self::I32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
Self::I16I16(c) => c.row_ids_filter(value.as_i16(), op, dst),
|
||||
Self::I16I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
Self::I16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
Self::I8I8(c) => c.row_ids_filter(value.as_i8(), op, dst),
|
||||
Self::U64U64(c) => c.row_ids_filter(value.as_u64(), op, dst),
|
||||
Self::U64U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||
Self::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
Self::U32U32(c) => c.row_ids_filter(value.as_u32(), op, dst),
|
||||
Self::U32U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::U32U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
Self::U16U16(c) => c.row_ids_filter(value.as_u16(), op, dst),
|
||||
Self::U16U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
Self::U8U8(c) => c.row_ids_filter(value.as_u8(), op, dst),
|
||||
Self::I64I64N(c) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the row ids that satisfy both the provided predicates.
|
||||
///
|
||||
/// Note: it is the caller's responsibility to ensure that the provided
|
||||
/// `Scalar` value will fit within the physical type of the encoded column.
|
||||
/// `row_ids_filter` will panic if this invariant is broken.
|
||||
pub fn row_ids_filter_range(
|
||||
&self,
|
||||
low: (cmp::Operator, &Scalar),
|
||||
high: (cmp::Operator, &Scalar),
|
||||
dst: RowIDs,
|
||||
) -> RowIDs {
|
||||
match &self {
|
||||
Self::I64I64(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i64(), low.0), (high.1.as_i64(), high.0), dst)
|
||||
}
|
||||
Self::I64I32(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst)
|
||||
}
|
||||
Self::I64U32(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||
}
|
||||
Self::I64I16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
|
||||
}
|
||||
Self::I64U16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||
}
|
||||
Self::I64I8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||
}
|
||||
Self::I64U8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
Self::I32I32(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst)
|
||||
}
|
||||
Self::I32I16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
|
||||
}
|
||||
Self::I32U16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||
}
|
||||
Self::I32I8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||
}
|
||||
Self::I32U8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
Self::I16I16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst)
|
||||
}
|
||||
Self::I16I8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||
}
|
||||
Self::I16U8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
Self::I8I8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst)
|
||||
}
|
||||
Self::U64U64(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u64(), low.0), (high.1.as_u64(), high.0), dst)
|
||||
}
|
||||
Self::U64U32(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||
}
|
||||
Self::U64U16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||
}
|
||||
Self::U64U8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
Self::U32U32(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst)
|
||||
}
|
||||
Self::U32U16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||
}
|
||||
Self::U32U8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
Self::U16U16(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst)
|
||||
}
|
||||
Self::U16U8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
Self::U8U8(c) => {
|
||||
c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst)
|
||||
}
|
||||
Self::I64I64N(c) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -945,6 +1154,27 @@ impl FloatEncoding {
|
|||
FloatEncoding::Fixed32(c) => c.row_ids_filter(value.as_f32(), op, dst),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the row ids that satisfy both the provided predicates.
|
||||
///
|
||||
/// Note: it is the caller's responsibility to ensure that the provided
|
||||
/// `Scalar` value will fit within the physical type of the encoded column.
|
||||
/// `row_ids_filter` will panic if this invariant is broken.
|
||||
pub fn row_ids_filter_range(
|
||||
&self,
|
||||
low: (cmp::Operator, &Scalar),
|
||||
high: (cmp::Operator, &Scalar),
|
||||
dst: RowIDs,
|
||||
) -> RowIDs {
|
||||
match &self {
|
||||
FloatEncoding::Fixed64(c) => {
|
||||
c.row_ids_filter_range((low.1.as_f64(), low.0), (high.1.as_f64(), high.0), dst)
|
||||
}
|
||||
FloatEncoding::Fixed32(c) => {
|
||||
c.row_ids_filter_range((low.1.as_f32(), low.0), (high.1.as_f32(), high.0), dst)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Converts an Arrow `StringArray` into a column, currently using the RLE
|
||||
|
@ -1714,9 +1944,16 @@ impl EncodedValues {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum PredicateMatch {
|
||||
None,
|
||||
SomeMaybe,
|
||||
All,
|
||||
}
|
||||
|
||||
/// A specific type of Option for `RowIDs` where the notion of all rows ids is
|
||||
/// represented.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum RowIDsOption {
|
||||
None,
|
||||
Some(RowIDs),
|
||||
|
@ -2392,6 +2629,18 @@ mod test {
|
|||
Value::String(&"Adam Raised a Cain".to_string()),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
|
||||
row_ids = col.row_ids_filter(
|
||||
cmp::Operator::GT,
|
||||
Value::String(&"Adam Raised a Cain".to_string()),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
|
||||
row_ids = col.row_ids_filter(
|
||||
cmp::Operator::NotEqual,
|
||||
Value::String(&"Thunder Road".to_string()),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -2413,6 +2662,9 @@ mod test {
|
|||
|
||||
row_ids = col.row_ids_filter(cmp::Operator::NotEqual, Value::Scalar(Scalar::I32(-1257)));
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
|
||||
row_ids = col.row_ids_filter(cmp::Operator::LT, Value::Scalar(Scalar::I64(i64::MAX)));
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -2461,6 +2713,63 @@ mod test {
|
|||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_ids_range() {
|
||||
let input = &[100, 200, 300, 2, 200, 22, 30];
|
||||
|
||||
let col = Column::from(&input[..]);
|
||||
let mut row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GT, Value::Scalar(Scalar::I32(100))),
|
||||
(cmp::Operator::LT, Value::Scalar(Scalar::I32(300))),
|
||||
);
|
||||
assert_eq!(row_ids.unwrap().to_vec(), vec![1, 4]);
|
||||
|
||||
row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(200))),
|
||||
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(300))),
|
||||
);
|
||||
assert_eq!(row_ids.unwrap().to_vec(), vec![1, 2, 4]);
|
||||
|
||||
row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(23333))),
|
||||
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(999999))),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::None));
|
||||
|
||||
row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GT, Value::Scalar(Scalar::I32(-100))),
|
||||
(cmp::Operator::LT, Value::Scalar(Scalar::I32(301))),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
|
||||
row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(2))),
|
||||
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(300))),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
|
||||
row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(87))),
|
||||
(cmp::Operator::LTE, Value::Scalar(Scalar::I32(999999))),
|
||||
);
|
||||
assert_eq!(row_ids.unwrap().to_vec(), vec![0, 1, 2, 4]);
|
||||
|
||||
row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(0))),
|
||||
(
|
||||
cmp::Operator::NotEqual,
|
||||
Value::Scalar(Scalar::I64(i64::MAX)),
|
||||
),
|
||||
);
|
||||
assert!(matches!(row_ids, RowIDsOption::All));
|
||||
|
||||
row_ids = col.row_ids_filter_range(
|
||||
(cmp::Operator::GTE, Value::Scalar(Scalar::I32(0))),
|
||||
(cmp::Operator::NotEqual, Value::Scalar(Scalar::I64(99))),
|
||||
);
|
||||
assert_eq!(row_ids.unwrap().to_vec(), vec![0, 1, 2, 3, 4, 5, 6]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn might_contain_value() {
|
||||
let input = &[100i64, 200, 300, 2, 200, 22, 30];
|
||||
|
@ -2566,4 +2875,67 @@ mod test {
|
|||
false
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluate_predicate_on_meta() {
|
||||
let input = &[100i64, 200, 300, 2, 200, 22, 30];
|
||||
let col = Column::from(&input[..]);
|
||||
|
||||
let cases: Vec<(cmp::Operator, Scalar, PredicateMatch)> = vec![
|
||||
(
|
||||
cmp::Operator::GT,
|
||||
Scalar::U64(100),
|
||||
PredicateMatch::SomeMaybe,
|
||||
),
|
||||
(
|
||||
cmp::Operator::GT,
|
||||
Scalar::I64(100),
|
||||
PredicateMatch::SomeMaybe,
|
||||
),
|
||||
(cmp::Operator::GT, Scalar::I8(-99), PredicateMatch::All),
|
||||
(
|
||||
cmp::Operator::GT,
|
||||
Scalar::I64(100),
|
||||
PredicateMatch::SomeMaybe,
|
||||
),
|
||||
(
|
||||
cmp::Operator::LT,
|
||||
Scalar::I64(300),
|
||||
PredicateMatch::SomeMaybe,
|
||||
),
|
||||
(cmp::Operator::LTE, Scalar::I32(300), PredicateMatch::All),
|
||||
(
|
||||
cmp::Operator::Equal,
|
||||
Scalar::I32(2),
|
||||
PredicateMatch::SomeMaybe,
|
||||
),
|
||||
(
|
||||
cmp::Operator::NotEqual,
|
||||
Scalar::I32(2),
|
||||
PredicateMatch::SomeMaybe,
|
||||
),
|
||||
(cmp::Operator::NotEqual, Scalar::I64(1), PredicateMatch::All),
|
||||
(
|
||||
cmp::Operator::NotEqual,
|
||||
Scalar::I64(301),
|
||||
PredicateMatch::All,
|
||||
),
|
||||
(cmp::Operator::GT, Scalar::I64(100000), PredicateMatch::None),
|
||||
(cmp::Operator::GTE, Scalar::I64(301), PredicateMatch::None),
|
||||
(cmp::Operator::LT, Scalar::I64(2), PredicateMatch::None),
|
||||
(cmp::Operator::LTE, Scalar::I8(-100), PredicateMatch::None),
|
||||
(
|
||||
cmp::Operator::Equal,
|
||||
Scalar::I64(100000),
|
||||
PredicateMatch::None,
|
||||
),
|
||||
];
|
||||
|
||||
for (op, scalar, result) in cases {
|
||||
assert_eq!(
|
||||
col.evaluate_predicate_on_meta(&op, &Value::Scalar(scalar)),
|
||||
result
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue