From c5dc48db04c6ceb3adb2fed77ffa95faf7277b75 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 30 Oct 2020 19:03:11 +0000 Subject: [PATCH] feat: implement range based predicate on column --- delorean_segment_store/src/column.rs | 512 +++++++++++++++++++++++---- 1 file changed, 442 insertions(+), 70 deletions(-) diff --git a/delorean_segment_store/src/column.rs b/delorean_segment_store/src/column.rs index fd7ada83a2..d3f8a76cf3 100644 --- a/delorean_segment_store/src/column.rs +++ b/delorean_segment_store/src/column.rs @@ -184,32 +184,12 @@ impl Column { /// /// TODO(edd): row ids pooling. pub fn row_ids_filter(&self, op: cmp::Operator, value: Value<'_>) -> RowIDsOption { - match op { - // When the predicate is == and the metadata range indicates the column - // can't contain `value` then the column doesn't need to be read. - cmp::Operator::Equal => { - if !self.might_contain_value(&value) { - return RowIDsOption::None; // no rows are going to match. - } - } - - // When the predicate is one of {<, <=, >, >=} and the column doesn't - // contain any null values, and the entire range of values satisfies the - // predicate then the column doesn't need to be read. - cmp::Operator::GT | cmp::Operator::GTE | cmp::Operator::LT | cmp::Operator::LTE => { - if self.predicate_matches_all_values(&op, &value) { - return RowIDsOption::All; - } - } - - // When the predicate is != and the metadata range indicates that the - // column can't possibly contain `value` then the predicate must - // match all rows on the column. - cmp::Operator::NotEqual => { - if !self.might_contain_value(&value) { - return RowIDsOption::All; // all rows are going to match. - } - } + // If we can get an answer using only the meta-data on the column then + // return that answer. + match self.evaluate_predicate_on_meta(&op, &value) { + PredicateMatch::None => return RowIDsOption::None, + PredicateMatch::All => return RowIDsOption::All, + PredicateMatch::SomeMaybe => {} // have to apply predicate to column } // TODO(edd): figure out pooling of these @@ -240,7 +220,95 @@ impl Column { low: (cmp::Operator, Value<'_>), high: (cmp::Operator, Value<'_>), ) -> RowIDsOption { - todo!() + let l = self.evaluate_predicate_on_meta(&low.0, &low.1); + let h = self.evaluate_predicate_on_meta(&high.0, &high.1); + match (l, h) { + (PredicateMatch::All, PredicateMatch::All) => return RowIDsOption::All, + + // One of the predicates can't be satisfied, therefore no rows will + // match both predicates. + (PredicateMatch::None, _) | (_, PredicateMatch::None) => return RowIDsOption::None, + + // One of the predicates matches all rows so reduce the operation + // to the other side. + (PredicateMatch::SomeMaybe, PredicateMatch::All) => { + return self.row_ids_filter(low.0, low.1); + } + (PredicateMatch::All, PredicateMatch::SomeMaybe) => { + return self.row_ids_filter(high.0, high.1); + } + + // Have to apply the predicates to the column to identify correct + // set of rows. + (PredicateMatch::SomeMaybe, PredicateMatch::SomeMaybe) => {} + } + + // TODO(edd): figure out pooling of these + let dst = RowIDs::Bitmap(Bitmap::create()); + + // Check the column for all rows that satisfy the predicate. + let row_ids = match &self { + Column::String(_, data) => unimplemented!("not supported on string columns yet"), + Column::Float(_, data) => { + data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst) + } + Column::Integer(_, data) => { + data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst) + } + Column::Unsigned(_, data) => { + data.row_ids_filter_range((low.0, low.1.scalar()), (high.0, high.1.scalar()), dst) + } + Column::Bool => todo!(), + Column::ByteArray(_, data) => todo!(), + }; + + if row_ids.is_empty() { + return RowIDsOption::None; + } + RowIDsOption::Some(row_ids) + } + + // Helper function to determine if the predicate matches either no rows or + // all the rows in a column. This is determined by looking at the metadata + // on the column. + // + // `None` indicates that the column may contain some matching rows and the + // predicate should be directly applied to the column. + fn evaluate_predicate_on_meta(&self, op: &cmp::Operator, value: &Value<'_>) -> PredicateMatch { + match op { + // When the predicate is == and the metadata range indicates the column + // can't contain `value` then the column doesn't need to be read. + cmp::Operator::Equal => { + if !self.might_contain_value(&value) { + return PredicateMatch::None; // no rows are going to match. + } + } + + // When the predicate is one of {<, <=, >, >=} and the column doesn't + // contain any null values, and the entire range of values satisfies the + // predicate then the column doesn't need to be read. + cmp::Operator::GT | cmp::Operator::GTE | cmp::Operator::LT | cmp::Operator::LTE => { + if self.predicate_matches_all_values(&op, &value) { + return PredicateMatch::All; + } + } + + // When the predicate is != and the metadata range indicates that the + // column can't possibly contain `value` then the predicate must + // match all rows on the column. + cmp::Operator::NotEqual => { + if !self.might_contain_value(&value) { + return PredicateMatch::All; // all rows are going to match. + } + } + } + + if self.predicate_matches_no_values(&op, &value) { + return PredicateMatch::None; + } + + // The predicate could match some values + PredicateMatch::SomeMaybe } // Helper method to determine if the column possibly contains this value @@ -339,6 +407,32 @@ impl Column { } } + // Helper method to determine if the predicate can not possibly match any + // values in the column. + fn predicate_matches_no_values(&self, op: &cmp::Operator, value: &Value<'_>) -> bool { + match &self { + Column::String(meta, data) => { + if let Value::String(other) = value { + meta.match_no_values(op, other) + } else { + unreachable!("impossible value comparison"); + } + } + // breaking this down: + // * Extract a Scalar variant from `value`, which should panic if + // that's not possible; + // * Convert that scalar to a primitive value based + // on the logical type used for the metadata on the column. + // * See if one can prove none of the column can match the predicate. + // + Column::Float(meta, data) => meta.match_no_values(op, &value.scalar().as_f64()), + Column::Integer(meta, data) => meta.match_no_values(op, &value.scalar().as_i64()), + Column::Unsigned(meta, data) => meta.match_no_values(op, &value.scalar().as_u64()), + Column::Bool => todo!(), + Column::ByteArray(meta, _) => todo!(), + } + } + // // Methods for selecting // @@ -411,7 +505,6 @@ impl MetaData { // column. It is up to the caller to determine if the column contains null // values, which would invalidate a truthful result. fn might_match_all_values(&self, op: &cmp::Operator, v: &T) -> bool { - println!("comparing {:?} {:?}, {:?}", op, v, self.range); match &self.range { Some(range) => match op { // all values in column equal to v @@ -430,6 +523,28 @@ impl MetaData { None => false, // only null values in column. } } + + // Determines if it can be shown that the predicate would not match any rows + // in the column. + fn match_no_values(&self, op: &cmp::Operator, v: &T) -> bool { + match &self.range { + Some(range) => match op { + // no values are `v` so no rows will match `== v` + cmp::Operator::Equal => range.0 == range.1 && &range.1 != v, + // all values are `v` so no rows will match `!= v` + cmp::Operator::NotEqual => range.0 == range.1 && &range.1 == v, + // max value in column is `<= v` so no values can be `> v` + cmp::Operator::GT => &range.1 <= v, + // max value in column is `< v` so no values can be `>= v` + cmp::Operator::GTE => &range.1 < v, + // min value in column is `>= v` so no values can be `< v` + cmp::Operator::LT => &range.0 >= v, + // min value in column is `> v` so no values can be `<= v` + cmp::Operator::LTE => &range.0 > v, + }, + None => true, // only null values in column so no values satisfy `v` + } + } } pub enum StringEncoding { RLE(dictionary::RLE), @@ -828,13 +943,13 @@ impl IntegerEncoding { // non-null signed 64-bit integers. match dst { EncodedValues::I64(dst) => match &self { - IntegerEncoding::I64I64(data) => EncodedValues::I64(data.values(row_ids, dst)), - IntegerEncoding::I64I32(data) => EncodedValues::I64(data.values(row_ids, dst)), - IntegerEncoding::I64U32(data) => EncodedValues::I64(data.values(row_ids, dst)), - IntegerEncoding::I64I16(data) => EncodedValues::I64(data.values(row_ids, dst)), - IntegerEncoding::I64U16(data) => EncodedValues::I64(data.values(row_ids, dst)), - IntegerEncoding::I64I8(data) => EncodedValues::I64(data.values(row_ids, dst)), - IntegerEncoding::I64U8(data) => EncodedValues::I64(data.values(row_ids, dst)), + Self::I64I64(data) => EncodedValues::I64(data.values(row_ids, dst)), + Self::I64I32(data) => EncodedValues::I64(data.values(row_ids, dst)), + Self::I64U32(data) => EncodedValues::I64(data.values(row_ids, dst)), + Self::I64I16(data) => EncodedValues::I64(data.values(row_ids, dst)), + Self::I64U16(data) => EncodedValues::I64(data.values(row_ids, dst)), + Self::I64I8(data) => EncodedValues::I64(data.values(row_ids, dst)), + Self::I64U8(data) => EncodedValues::I64(data.values(row_ids, dst)), _ => unreachable!("encoded values on encoding type not supported"), }, _ => unreachable!("currently only support encoded values as i64"), @@ -849,13 +964,13 @@ impl IntegerEncoding { // non-null signed 64-bit integers. match dst { EncodedValues::I64(dst) => match &self { - IntegerEncoding::I64I64(data) => EncodedValues::I64(data.all_values(dst)), - IntegerEncoding::I64I32(data) => EncodedValues::I64(data.all_values(dst)), - IntegerEncoding::I64U32(data) => EncodedValues::I64(data.all_values(dst)), - IntegerEncoding::I64I16(data) => EncodedValues::I64(data.all_values(dst)), - IntegerEncoding::I64U16(data) => EncodedValues::I64(data.all_values(dst)), - IntegerEncoding::I64I8(data) => EncodedValues::I64(data.all_values(dst)), - IntegerEncoding::I64U8(data) => EncodedValues::I64(data.all_values(dst)), + Self::I64I64(data) => EncodedValues::I64(data.all_values(dst)), + Self::I64I32(data) => EncodedValues::I64(data.all_values(dst)), + Self::I64U32(data) => EncodedValues::I64(data.all_values(dst)), + Self::I64I16(data) => EncodedValues::I64(data.all_values(dst)), + Self::I64U16(data) => EncodedValues::I64(data.all_values(dst)), + Self::I64I8(data) => EncodedValues::I64(data.all_values(dst)), + Self::I64U8(data) => EncodedValues::I64(data.all_values(dst)), _ => unreachable!("encoded values on encoding type not supported"), }, _ => unreachable!("currently only support encoded values as i64"), @@ -869,33 +984,127 @@ impl IntegerEncoding { /// `row_ids_filter` will panic if this invariant is broken. pub fn row_ids_filter(&self, op: cmp::Operator, value: &Scalar, dst: RowIDs) -> RowIDs { match &self { - IntegerEncoding::I64I64(c) => c.row_ids_filter(value.as_i64(), op, dst), - IntegerEncoding::I64I32(c) => c.row_ids_filter(value.as_i32(), op, dst), - IntegerEncoding::I64U32(c) => c.row_ids_filter(value.as_u32(), op, dst), - IntegerEncoding::I64I16(c) => c.row_ids_filter(value.as_i16(), op, dst), - IntegerEncoding::I64U16(c) => c.row_ids_filter(value.as_u16(), op, dst), - IntegerEncoding::I64I8(c) => c.row_ids_filter(value.as_i8(), op, dst), - IntegerEncoding::I64U8(c) => c.row_ids_filter(value.as_u8(), op, dst), - IntegerEncoding::I32I32(c) => c.row_ids_filter(value.as_i32(), op, dst), - IntegerEncoding::I32I16(c) => c.row_ids_filter(value.as_i16(), op, dst), - IntegerEncoding::I32U16(c) => c.row_ids_filter(value.as_u16(), op, dst), - IntegerEncoding::I32I8(c) => c.row_ids_filter(value.as_i8(), op, dst), - IntegerEncoding::I32U8(c) => c.row_ids_filter(value.as_u8(), op, dst), - IntegerEncoding::I16I16(c) => c.row_ids_filter(value.as_i16(), op, dst), - IntegerEncoding::I16I8(c) => c.row_ids_filter(value.as_i8(), op, dst), - IntegerEncoding::I16U8(c) => c.row_ids_filter(value.as_u8(), op, dst), - IntegerEncoding::I8I8(c) => c.row_ids_filter(value.as_i8(), op, dst), - IntegerEncoding::U64U64(c) => c.row_ids_filter(value.as_u64(), op, dst), - IntegerEncoding::U64U32(c) => c.row_ids_filter(value.as_u32(), op, dst), - IntegerEncoding::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst), - IntegerEncoding::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst), - IntegerEncoding::U32U32(c) => c.row_ids_filter(value.as_u32(), op, dst), - IntegerEncoding::U32U16(c) => c.row_ids_filter(value.as_u16(), op, dst), - IntegerEncoding::U32U8(c) => c.row_ids_filter(value.as_u8(), op, dst), - IntegerEncoding::U16U16(c) => c.row_ids_filter(value.as_u16(), op, dst), - IntegerEncoding::U16U8(c) => c.row_ids_filter(value.as_u8(), op, dst), - IntegerEncoding::U8U8(c) => c.row_ids_filter(value.as_u8(), op, dst), - IntegerEncoding::I64I64N(c) => todo!(), + Self::I64I64(c) => c.row_ids_filter(value.as_i64(), op, dst), + Self::I64I32(c) => c.row_ids_filter(value.as_i32(), op, dst), + Self::I64U32(c) => c.row_ids_filter(value.as_u32(), op, dst), + Self::I64I16(c) => c.row_ids_filter(value.as_i16(), op, dst), + Self::I64U16(c) => c.row_ids_filter(value.as_u16(), op, dst), + Self::I64I8(c) => c.row_ids_filter(value.as_i8(), op, dst), + Self::I64U8(c) => c.row_ids_filter(value.as_u8(), op, dst), + Self::I32I32(c) => c.row_ids_filter(value.as_i32(), op, dst), + Self::I32I16(c) => c.row_ids_filter(value.as_i16(), op, dst), + Self::I32U16(c) => c.row_ids_filter(value.as_u16(), op, dst), + Self::I32I8(c) => c.row_ids_filter(value.as_i8(), op, dst), + Self::I32U8(c) => c.row_ids_filter(value.as_u8(), op, dst), + Self::I16I16(c) => c.row_ids_filter(value.as_i16(), op, dst), + Self::I16I8(c) => c.row_ids_filter(value.as_i8(), op, dst), + Self::I16U8(c) => c.row_ids_filter(value.as_u8(), op, dst), + Self::I8I8(c) => c.row_ids_filter(value.as_i8(), op, dst), + Self::U64U64(c) => c.row_ids_filter(value.as_u64(), op, dst), + Self::U64U32(c) => c.row_ids_filter(value.as_u32(), op, dst), + Self::U64U16(c) => c.row_ids_filter(value.as_u16(), op, dst), + Self::U64U8(c) => c.row_ids_filter(value.as_u8(), op, dst), + Self::U32U32(c) => c.row_ids_filter(value.as_u32(), op, dst), + Self::U32U16(c) => c.row_ids_filter(value.as_u16(), op, dst), + Self::U32U8(c) => c.row_ids_filter(value.as_u8(), op, dst), + Self::U16U16(c) => c.row_ids_filter(value.as_u16(), op, dst), + Self::U16U8(c) => c.row_ids_filter(value.as_u8(), op, dst), + Self::U8U8(c) => c.row_ids_filter(value.as_u8(), op, dst), + Self::I64I64N(c) => todo!(), + } + } + + /// Returns the row ids that satisfy both the provided predicates. + /// + /// Note: it is the caller's responsibility to ensure that the provided + /// `Scalar` value will fit within the physical type of the encoded column. + /// `row_ids_filter` will panic if this invariant is broken. + pub fn row_ids_filter_range( + &self, + low: (cmp::Operator, &Scalar), + high: (cmp::Operator, &Scalar), + dst: RowIDs, + ) -> RowIDs { + match &self { + Self::I64I64(c) => { + c.row_ids_filter_range((low.1.as_i64(), low.0), (high.1.as_i64(), high.0), dst) + } + Self::I64I32(c) => { + c.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst) + } + Self::I64U32(c) => { + c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst) + } + Self::I64I16(c) => { + c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst) + } + Self::I64U16(c) => { + c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst) + } + Self::I64I8(c) => { + c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst) + } + Self::I64U8(c) => { + c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst) + } + Self::I32I32(c) => { + c.row_ids_filter_range((low.1.as_i32(), low.0), (high.1.as_i32(), high.0), dst) + } + Self::I32I16(c) => { + c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst) + } + Self::I32U16(c) => { + c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst) + } + Self::I32I8(c) => { + c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst) + } + Self::I32U8(c) => { + c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst) + } + Self::I16I16(c) => { + c.row_ids_filter_range((low.1.as_i16(), low.0), (high.1.as_i16(), high.0), dst) + } + Self::I16I8(c) => { + c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst) + } + Self::I16U8(c) => { + c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst) + } + Self::I8I8(c) => { + c.row_ids_filter_range((low.1.as_i8(), low.0), (high.1.as_i8(), high.0), dst) + } + Self::U64U64(c) => { + c.row_ids_filter_range((low.1.as_u64(), low.0), (high.1.as_u64(), high.0), dst) + } + Self::U64U32(c) => { + c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst) + } + Self::U64U16(c) => { + c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst) + } + Self::U64U8(c) => { + c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst) + } + Self::U32U32(c) => { + c.row_ids_filter_range((low.1.as_u32(), low.0), (high.1.as_u32(), high.0), dst) + } + Self::U32U16(c) => { + c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst) + } + Self::U32U8(c) => { + c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst) + } + Self::U16U16(c) => { + c.row_ids_filter_range((low.1.as_u16(), low.0), (high.1.as_u16(), high.0), dst) + } + Self::U16U8(c) => { + c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst) + } + Self::U8U8(c) => { + c.row_ids_filter_range((low.1.as_u8(), low.0), (high.1.as_u8(), high.0), dst) + } + Self::I64I64N(c) => todo!(), } } } @@ -945,6 +1154,27 @@ impl FloatEncoding { FloatEncoding::Fixed32(c) => c.row_ids_filter(value.as_f32(), op, dst), } } + + /// Returns the row ids that satisfy both the provided predicates. + /// + /// Note: it is the caller's responsibility to ensure that the provided + /// `Scalar` value will fit within the physical type of the encoded column. + /// `row_ids_filter` will panic if this invariant is broken. + pub fn row_ids_filter_range( + &self, + low: (cmp::Operator, &Scalar), + high: (cmp::Operator, &Scalar), + dst: RowIDs, + ) -> RowIDs { + match &self { + FloatEncoding::Fixed64(c) => { + c.row_ids_filter_range((low.1.as_f64(), low.0), (high.1.as_f64(), high.0), dst) + } + FloatEncoding::Fixed32(c) => { + c.row_ids_filter_range((low.1.as_f32(), low.0), (high.1.as_f32(), high.0), dst) + } + } + } } // Converts an Arrow `StringArray` into a column, currently using the RLE @@ -1714,9 +1944,16 @@ impl EncodedValues { } } +#[derive(Debug, PartialEq)] +enum PredicateMatch { + None, + SomeMaybe, + All, +} + /// A specific type of Option for `RowIDs` where the notion of all rows ids is /// represented. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum RowIDsOption { None, Some(RowIDs), @@ -2392,6 +2629,18 @@ mod test { Value::String(&"Adam Raised a Cain".to_string()), ); assert!(matches!(row_ids, RowIDsOption::All)); + + row_ids = col.row_ids_filter( + cmp::Operator::GT, + Value::String(&"Adam Raised a Cain".to_string()), + ); + assert!(matches!(row_ids, RowIDsOption::All)); + + row_ids = col.row_ids_filter( + cmp::Operator::NotEqual, + Value::String(&"Thunder Road".to_string()), + ); + assert!(matches!(row_ids, RowIDsOption::All)); } #[test] @@ -2413,6 +2662,9 @@ mod test { row_ids = col.row_ids_filter(cmp::Operator::NotEqual, Value::Scalar(Scalar::I32(-1257))); assert!(matches!(row_ids, RowIDsOption::All)); + + row_ids = col.row_ids_filter(cmp::Operator::LT, Value::Scalar(Scalar::I64(i64::MAX))); + assert!(matches!(row_ids, RowIDsOption::All)); } #[test] @@ -2461,6 +2713,63 @@ mod test { assert!(matches!(row_ids, RowIDsOption::All)); } + #[test] + fn row_ids_range() { + let input = &[100, 200, 300, 2, 200, 22, 30]; + + let col = Column::from(&input[..]); + let mut row_ids = col.row_ids_filter_range( + (cmp::Operator::GT, Value::Scalar(Scalar::I32(100))), + (cmp::Operator::LT, Value::Scalar(Scalar::I32(300))), + ); + assert_eq!(row_ids.unwrap().to_vec(), vec![1, 4]); + + row_ids = col.row_ids_filter_range( + (cmp::Operator::GTE, Value::Scalar(Scalar::I32(200))), + (cmp::Operator::LTE, Value::Scalar(Scalar::I32(300))), + ); + assert_eq!(row_ids.unwrap().to_vec(), vec![1, 2, 4]); + + row_ids = col.row_ids_filter_range( + (cmp::Operator::GTE, Value::Scalar(Scalar::I32(23333))), + (cmp::Operator::LTE, Value::Scalar(Scalar::I32(999999))), + ); + assert!(matches!(row_ids, RowIDsOption::None)); + + row_ids = col.row_ids_filter_range( + (cmp::Operator::GT, Value::Scalar(Scalar::I32(-100))), + (cmp::Operator::LT, Value::Scalar(Scalar::I32(301))), + ); + assert!(matches!(row_ids, RowIDsOption::All)); + + row_ids = col.row_ids_filter_range( + (cmp::Operator::GTE, Value::Scalar(Scalar::I32(2))), + (cmp::Operator::LTE, Value::Scalar(Scalar::I32(300))), + ); + assert!(matches!(row_ids, RowIDsOption::All)); + + row_ids = col.row_ids_filter_range( + (cmp::Operator::GTE, Value::Scalar(Scalar::I32(87))), + (cmp::Operator::LTE, Value::Scalar(Scalar::I32(999999))), + ); + assert_eq!(row_ids.unwrap().to_vec(), vec![0, 1, 2, 4]); + + row_ids = col.row_ids_filter_range( + (cmp::Operator::GTE, Value::Scalar(Scalar::I32(0))), + ( + cmp::Operator::NotEqual, + Value::Scalar(Scalar::I64(i64::MAX)), + ), + ); + assert!(matches!(row_ids, RowIDsOption::All)); + + row_ids = col.row_ids_filter_range( + (cmp::Operator::GTE, Value::Scalar(Scalar::I32(0))), + (cmp::Operator::NotEqual, Value::Scalar(Scalar::I64(99))), + ); + assert_eq!(row_ids.unwrap().to_vec(), vec![0, 1, 2, 3, 4, 5, 6]); + } + #[test] fn might_contain_value() { let input = &[100i64, 200, 300, 2, 200, 22, 30]; @@ -2566,4 +2875,67 @@ mod test { false ); } + + #[test] + fn evaluate_predicate_on_meta() { + let input = &[100i64, 200, 300, 2, 200, 22, 30]; + let col = Column::from(&input[..]); + + let cases: Vec<(cmp::Operator, Scalar, PredicateMatch)> = vec![ + ( + cmp::Operator::GT, + Scalar::U64(100), + PredicateMatch::SomeMaybe, + ), + ( + cmp::Operator::GT, + Scalar::I64(100), + PredicateMatch::SomeMaybe, + ), + (cmp::Operator::GT, Scalar::I8(-99), PredicateMatch::All), + ( + cmp::Operator::GT, + Scalar::I64(100), + PredicateMatch::SomeMaybe, + ), + ( + cmp::Operator::LT, + Scalar::I64(300), + PredicateMatch::SomeMaybe, + ), + (cmp::Operator::LTE, Scalar::I32(300), PredicateMatch::All), + ( + cmp::Operator::Equal, + Scalar::I32(2), + PredicateMatch::SomeMaybe, + ), + ( + cmp::Operator::NotEqual, + Scalar::I32(2), + PredicateMatch::SomeMaybe, + ), + (cmp::Operator::NotEqual, Scalar::I64(1), PredicateMatch::All), + ( + cmp::Operator::NotEqual, + Scalar::I64(301), + PredicateMatch::All, + ), + (cmp::Operator::GT, Scalar::I64(100000), PredicateMatch::None), + (cmp::Operator::GTE, Scalar::I64(301), PredicateMatch::None), + (cmp::Operator::LT, Scalar::I64(2), PredicateMatch::None), + (cmp::Operator::LTE, Scalar::I8(-100), PredicateMatch::None), + ( + cmp::Operator::Equal, + Scalar::I64(100000), + PredicateMatch::None, + ), + ]; + + for (op, scalar, result) in cases { + assert_eq!( + col.evaluate_predicate_on_meta(&op, &Value::Scalar(scalar)), + result + ); + } + } }