feat: add chunk predicate check

pull/24376/head
Edd Robinson 2021-04-01 12:08:00 +01:00 committed by kodiakhq[bot]
parent c2e0c80f8c
commit 03b72cc80d
2 changed files with 131 additions and 14 deletions

View File

@ -84,7 +84,7 @@ pub(crate) struct TableData {
} }
impl Chunk { impl Chunk {
/// Initialises a new `Chunk`. /// Initialises a new `Chunk` with the associated chunk ID.
pub fn new(id: u32) -> Self { pub fn new(id: u32) -> Self {
Self { Self {
id, id,
@ -230,18 +230,9 @@ impl Chunk {
} }
} }
/// Return table summaries or all tables in this chunk. Note that //
/// there can be more than one TableSummary for each table. // Methods for executing queries.
pub fn table_summaries(&self) -> Vec<TableSummary> { //
// read lock on chunk.
let chunk_data = self.chunk_data.read().unwrap();
chunk_data
.data
.values()
.map(|table| table.table_summary())
.collect()
}
/// Returns selected data for the specified columns in the provided table. /// Returns selected data for the specified columns in the provided table.
/// ///
@ -302,9 +293,37 @@ impl Chunk {
} }
// //
// ---- Schema API queries // ---- Schema queries
// //
/// Determines if one of more rows in the provided table could possibly
/// match the provided predicate.
///
/// If the provided table does not exist then `could_pass_predicate` returns
/// `false`.
pub fn could_pass_predicate(&self, table_name: &str, predicate: Predicate) -> bool {
// read lock on chunk.
let chunk_data = self.chunk_data.read().unwrap();
match chunk_data.data.get(table_name) {
Some(table) => table.could_pass_predicate(&predicate),
None => false,
}
}
/// Return table summaries or all tables in this chunk. Note that
/// there can be more than one TableSummary for each table.
pub fn table_summaries(&self) -> Vec<TableSummary> {
// read lock on chunk.
let chunk_data = self.chunk_data.read().unwrap();
chunk_data
.data
.values()
.map(|table| table.table_summary())
.collect()
}
/// Returns a schema object for a `read_filter` operation using the provided /// Returns a schema object for a `read_filter` operation using the provided
/// column selection. An error is returned if the specified columns do not /// column selection. An error is returned if the specified columns do not
/// exist. /// exist.
@ -778,6 +797,20 @@ mod test {
assert!(itr.next().is_none()); assert!(itr.next().is_none());
} }
#[test]
fn could_pass_predicate() {
let mut chunk = Chunk::new(22);
// Add a new table to the chunk.
chunk.upsert_table("a_table", gen_recordbatch());
assert!(!chunk.could_pass_predicate("not my table", Predicate::default()));
assert!(chunk.could_pass_predicate(
"a_table",
Predicate::new(vec![BinaryExpr::from(("region", "=", "east"))])
));
}
#[test] #[test]
fn table_names() { fn table_names() {
let columns = vec![ let columns = vec![

View File

@ -162,6 +162,16 @@ impl Table {
Arc::clone(&self.table_data.read().unwrap().meta) Arc::clone(&self.table_data.read().unwrap().meta)
} }
/// Determines if one of more row groups in the `Table` could possibly
/// contain one or more rows that satisfy the provided predicate.
pub fn could_pass_predicate(&self, predicate: &Predicate) -> bool {
let table_data = self.table_data.read().unwrap();
table_data.data.iter().any(|row_group| {
row_group.could_satisfy_conjunctive_binary_expressions(predicate.iter())
})
}
// Identify set of row groups that might satisfy the predicate. // Identify set of row groups that might satisfy the predicate.
// //
// Produce a set of these row groups along with a snapshot of the table meta // Produce a set of these row groups along with a snapshot of the table meta
@ -1004,6 +1014,80 @@ mod test {
.expect_err("drop_row_group should have returned an error"); .expect_err("drop_row_group should have returned an error");
} }
#[test]
fn could_pass_predicate() {
let mut columns = vec![];
let tc = ColumnType::Time(Column::from(&[10_i64, 20, 30][..]));
columns.push(("time".to_string(), tc));
let rc = ColumnType::Tag(Column::from(&["south", "north", "east"][..]));
columns.push(("region".to_string(), rc));
let fc = ColumnType::Field(Column::from(&[1000_u64, 1002, 1200][..]));
columns.push(("count".to_string(), fc));
let row_group = RowGroup::new(3, columns);
let mut table = Table::new("cpu".to_owned(), row_group);
// add another row group
let mut columns = vec![];
let tc = ColumnType::Time(Column::from(&[1_i64, 2, 3, 4, 5, 6][..]));
columns.push(("time".to_string(), tc));
let rc = ColumnType::Tag(Column::from(
&["west", "west", "east", "west", "south", "north"][..],
));
columns.push(("region".to_string(), rc));
let fc = ColumnType::Field(Column::from(&[100_u64, 101, 200, 203, 203, 10][..]));
columns.push(("count".to_string(), fc));
let rg = RowGroup::new(6, columns);
table.add_row_group(rg);
// everything could match empty predicate
let predicate = Predicate::default();
assert!(table.could_pass_predicate(&predicate));
// matches first row group
let predicate = Predicate::new(vec![BinaryExpr::from(("time", ">=", 7_i64))]);
assert!(table.could_pass_predicate(&predicate));
// matches first row group different column
let predicate = Predicate::new(vec![BinaryExpr::from(("region", "=", "east"))]);
assert!(table.could_pass_predicate(&predicate));
// matches multiple columns
let predicate = Predicate::new(vec![
BinaryExpr::from(("region", "=", "east")),
BinaryExpr::from(("count", "=", 1200_u64)),
]);
assert!(table.could_pass_predicate(&predicate));
// Columns matches predicate but on different rows (although no row
// exists that satisfies the predicate).
let predicate = Predicate::new(vec![
BinaryExpr::from(("region", "=", "east")),
BinaryExpr::from(("count", "=", 1002_u64)),
]);
assert!(table.could_pass_predicate(&predicate));
// matches second row group
let predicate = Predicate::new(vec![BinaryExpr::from(("region", ">=", "west"))]);
assert!(table.could_pass_predicate(&predicate));
// doesn't match either row group no column
let predicate = Predicate::new(vec![BinaryExpr::from(("temp", ">=", 0_u64))]);
assert!(!table.could_pass_predicate(&predicate));
// doesn't match either row group column exists but no matching value
let predicate = Predicate::new(vec![BinaryExpr::from(("time", ">=", 10192929_i64))]);
assert!(!table.could_pass_predicate(&predicate));
// doesn't match either row group; one column could satisfy predicate but
// other can't.
let predicate = Predicate::new(vec![
BinaryExpr::from(("region", "=", "east")),
BinaryExpr::from(("count", "<=", 0_u64)),
]);
assert!(!table.could_pass_predicate(&predicate));
}
#[test] #[test]
fn select() { fn select() {
// Build first row group. // Build first row group.