fix: ensure correct PredicateMatch for columns

pull/24376/head
Edd Robinson 2021-10-08 12:32:39 +01:00
parent 8817ee8d61
commit 04a7ddd34a
2 changed files with 121 additions and 67 deletions

View File

@ -496,23 +496,18 @@ impl Column {
}
}
// When the predicate is one of {<, <=, >, >=} and the column doesn't
// contain any null values, and the entire range of values satisfies the
// predicate then the column doesn't need to be read.
cmp::Operator::GT | cmp::Operator::GTE | cmp::Operator::LT | cmp::Operator::LTE => {
// When the predicate is one of {!=, <, <=, >, >=} and the column
// doesn't contain any null values, and the entire range of values
// satisfies the predicate then the column doesn't need to be read.
cmp::Operator::NotEqual
| cmp::Operator::GT
| cmp::Operator::GTE
| cmp::Operator::LT
| cmp::Operator::LTE => {
if self.predicate_matches_all_values(op, value) {
return PredicateMatch::All;
}
}
// When the predicate is != and the metadata range indicates that the
// column can't possibly contain `value` then the predicate must
// match all rows on the column.
cmp::Operator::NotEqual => {
if !self.might_contain_value(value) {
return PredicateMatch::All; // all rows are going to match.
}
}
}
if self.predicate_matches_no_values(op, value) {
@ -1429,6 +1424,7 @@ pub(crate) struct Statistics {
mod test {
use super::*;
use arrow::array::{Int64Array, StringArray};
use cmp::Operator;
use encoding::string::NULL_ID;
#[test]
@ -2020,7 +2016,7 @@ mod test {
row_ids = col.row_ids_filter(
&cmp::Operator::NotEqual,
&Value::from(-1257_i64),
&Value::from(1257_i64),
RowIDs::new_bitmap(),
);
assert!(matches!(row_ids, RowIDsOption::All(_)));
@ -2256,63 +2252,90 @@ mod test {
#[test]
fn evaluate_predicate_on_meta() {
let input = &[100_i64, 200, 300, 2, 200, 22, 30];
let col = Column::from(&input[..]);
let col = Column::from(&[100_i64, 200, 300, 2, 200, 22, 30][..]);
let cases: Vec<(cmp::Operator, Scalar, PredicateMatch)> = vec![
(Operator::GT, Scalar::U64(100), PredicateMatch::SomeMaybe),
(Operator::GT, Scalar::I64(100), PredicateMatch::SomeMaybe),
(Operator::GT, Scalar::I64(-99), PredicateMatch::All),
(Operator::GT, Scalar::I64(100), PredicateMatch::SomeMaybe),
(Operator::LT, Scalar::I64(300), PredicateMatch::SomeMaybe),
(Operator::LTE, Scalar::I64(300), PredicateMatch::All),
(Operator::Equal, Scalar::I64(2), PredicateMatch::SomeMaybe),
(
cmp::Operator::GT,
Scalar::U64(100),
PredicateMatch::SomeMaybe,
),
(
cmp::Operator::GT,
Scalar::I64(100),
PredicateMatch::SomeMaybe,
),
(cmp::Operator::GT, Scalar::I64(-99), PredicateMatch::All),
(
cmp::Operator::GT,
Scalar::I64(100),
PredicateMatch::SomeMaybe,
),
(
cmp::Operator::LT,
Scalar::I64(300),
PredicateMatch::SomeMaybe,
),
(cmp::Operator::LTE, Scalar::I64(300), PredicateMatch::All),
(
cmp::Operator::Equal,
Operator::NotEqual,
Scalar::I64(2),
PredicateMatch::SomeMaybe,
),
(
cmp::Operator::NotEqual,
Scalar::I64(2),
PredicateMatch::SomeMaybe,
),
(cmp::Operator::NotEqual, Scalar::I64(1), PredicateMatch::All),
(
cmp::Operator::NotEqual,
Scalar::I64(301),
PredicateMatch::All,
),
(cmp::Operator::GT, Scalar::I64(100000), PredicateMatch::None),
(cmp::Operator::GTE, Scalar::I64(301), PredicateMatch::None),
(cmp::Operator::LT, Scalar::I64(2), PredicateMatch::None),
(cmp::Operator::LTE, Scalar::I64(-100), PredicateMatch::None),
(
cmp::Operator::Equal,
Scalar::I64(100000),
PredicateMatch::None,
),
(Operator::NotEqual, Scalar::I64(1), PredicateMatch::All),
(Operator::NotEqual, Scalar::I64(301), PredicateMatch::All),
(Operator::GT, Scalar::I64(100000), PredicateMatch::None),
(Operator::GTE, Scalar::I64(301), PredicateMatch::None),
(Operator::LT, Scalar::I64(2), PredicateMatch::None),
(Operator::LTE, Scalar::I64(-100), PredicateMatch::None),
(Operator::Equal, Scalar::I64(100000), PredicateMatch::None),
];
for (op, scalar, result) in cases {
for (i, (op, scalar, result)) in cases.into_iter().enumerate() {
assert_eq!(
col.evaluate_predicate_on_meta(&op, &Value::Scalar(scalar)),
result
result,
"case {:?} failed",
i
);
}
// Now with a column containing at least one NULL value.
let col = Column::from(Int64Array::from(vec![
Some(100_i64),
Some(200),
Some(300),
Some(2),
None,
Some(200),
Some(22),
Some(30),
]));
let cases: Vec<(cmp::Operator, Scalar, PredicateMatch)> = vec![
(Operator::GT, Scalar::U64(100), PredicateMatch::SomeMaybe),
(Operator::GT, Scalar::I64(100), PredicateMatch::SomeMaybe),
// Not all values will match > -99 because of NULL
(Operator::GT, Scalar::I64(-99), PredicateMatch::SomeMaybe),
(Operator::GT, Scalar::I64(100), PredicateMatch::SomeMaybe),
(Operator::LT, Scalar::I64(300), PredicateMatch::SomeMaybe),
// Not all values will match < 300 because of NULL
(Operator::LTE, Scalar::I64(300), PredicateMatch::SomeMaybe),
(Operator::Equal, Scalar::I64(2), PredicateMatch::SomeMaybe),
(
Operator::NotEqual,
Scalar::I64(2),
PredicateMatch::SomeMaybe,
),
// Not all values will match != 1 because of NULL
(
Operator::NotEqual,
Scalar::I64(1),
PredicateMatch::SomeMaybe,
),
// Not all values will match != 301 because of NULL
(
Operator::NotEqual,
Scalar::I64(301),
PredicateMatch::SomeMaybe,
),
(Operator::GT, Scalar::I64(100000), PredicateMatch::None),
(Operator::GTE, Scalar::I64(301), PredicateMatch::None),
(Operator::LT, Scalar::I64(2), PredicateMatch::None),
(Operator::LTE, Scalar::I64(-100), PredicateMatch::None),
(Operator::Equal, Scalar::I64(100000), PredicateMatch::None),
];
for (i, (op, scalar, result)) in cases.into_iter().enumerate() {
assert_eq!(
col.evaluate_predicate_on_meta(&op, &Value::Scalar(scalar)),
result,
"case {:?} failed",
i
);
}
}

View File

@ -2638,23 +2638,31 @@ mod test {
fn _read_filter_setup() -> RowGroup {
let mut columns = vec![];
let tc = ColumnType::Time(Column::from(&[1_i64, 2, 3, 4, 5, 6][..]));
let tc = ColumnType::Time(Column::from(&[1_i64, 2, 3, 4, 5, 6, 8][..]));
columns.push(("time".to_string(), tc));
let rc = ColumnType::Tag(Column::from(
&["west", "west", "east", "west", "south", "north"][..],
&[
Some("west"),
Some("west"),
Some("east"),
Some("west"),
Some("south"),
Some("north"),
None,
][..],
));
columns.push(("region".to_string(), rc));
let mc = ColumnType::Tag(Column::from(
&["GET", "POST", "POST", "POST", "PUT", "GET"][..],
&["GET", "POST", "POST", "POST", "PUT", "GET", "PATCH"][..],
));
columns.push(("method".to_string(), mc));
let fc = ColumnType::Field(Column::from(&[100_u64, 101, 200, 203, 203, 10][..]));
let fc = ColumnType::Field(Column::from(&[100_u64, 101, 200, 203, 203, 10, 29][..]));
columns.push(("count".to_string(), fc));
RowGroup::new(6, columns)
RowGroup::new(7, columns)
}
#[test]
@ -2711,6 +2719,28 @@ POST
west,2
east,3
west,4
",
),
(
vec!["region", "time"],
Predicate::with_time_range(&[BinaryExpr::from(("method", "!=", "HEAD"))], 0, 10),
"region,time
west,1
west,2
east,3
west,4
south,5
north,6
NULL,8
",
),
(
vec!["region", "time"],
Predicate::with_time_range(&[BinaryExpr::from(("region", "!=", "west"))], 0, 10),
"region,time
east,3
south,5
north,6
",
),
];
@ -2744,6 +2774,7 @@ POST,east,3
POST,west,4
PUT,south,5
GET,north,6
PATCH,NULL,8
",
),
(