From c89a569e03db6b5e3ea551477d7b0c45373acb0f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 25 Jan 2021 17:11:20 +0000 Subject: [PATCH] feat: add per-chunk pred support in table_names --- read_buffer/src/chunk.rs | 167 +++++++++++++++++++++++++++++++++++++-- read_buffer/src/lib.rs | 2 +- read_buffer/src/table.rs | 47 ++++++++++- 3 files changed, 207 insertions(+), 9 deletions(-) diff --git a/read_buffer/src/chunk.rs b/read_buffer/src/chunk.rs index 62279d5d16..75ce7ed1f3 100644 --- a/read_buffer/src/chunk.rs +++ b/read_buffer/src/chunk.rs @@ -70,11 +70,12 @@ impl Chunk { } /// Add a row_group to a table in the chunk, updating all Chunk meta data. - pub fn upsert_table(&mut self, table_name: String, row_group: RowGroup) { + pub fn upsert_table(&mut self, table_name: impl Into, row_group: RowGroup) { // update meta data self.meta.update(&row_group); + let table_name = table_name.into(); - match self.tables.entry(table_name.to_owned()) { + match self.tables.entry(table_name.clone()) { Entry::Occupied(mut e) => { let table = e.get_mut(); table.add_row_group(row_group); @@ -138,10 +139,38 @@ impl Chunk { // ---- Schema API queries // - /// Returns the distinct set of table names that contain data that satisfies - /// the time range and predicates. - pub fn table_names(&self, predicate: &Predicate) -> BTreeSet<&String> { - self.tables.keys().collect::>() + /// Returns the distinct set of table names that contain data satisfying the + /// provided predicate. + /// + /// `exclude_table_names` can be used to provide a set of table names to + /// skip, typically because they're already included in results from other + /// chunks. + pub fn table_names( + &self, + predicate: &Predicate, + skip_table_names: &BTreeSet<&String>, + ) -> BTreeSet<&String> { + if predicate.is_empty() { + return self + .tables + .keys() + .filter(|&name| !skip_table_names.contains(name)) + .collect::>(); + } + + self.tables + .iter() + .filter_map(|(name, table)| { + if skip_table_names.contains(name) { + return None; + } + + match table.satisfies_predicate(predicate) { + true => Some(name), + false => None, + } + }) + .collect::>() } /// Returns the distinct set of tag keys (column names) matching the @@ -228,3 +257,129 @@ impl MetaData { todo!() } } + +#[cfg(test)] +mod test { + use std::collections::BTreeMap; + + use super::*; + use crate::row_group::{ColumnType, RowGroup}; + use crate::{column::Column, BinaryExpr}; + + #[test] + fn table_names() { + let columns = vec![ + ( + "time", + ColumnType::Time(Column::from(&[1_i64, 2, 3, 4, 5, 6][..])), + ), + ( + "region", + ColumnType::Tag(Column::from( + &["west", "west", "east", "west", "south", "north"][..], + )), + ), + ] + .into_iter() + .map(|(k, v)| (k.to_owned(), v)) + .collect::>(); + let rg = RowGroup::new(6, columns); + let table = Table::new("table_1", rg); + let mut chunk = Chunk::new(22, table); + + // All table names returned when no predicate. + let table_names = chunk.table_names(&Predicate::default(), &BTreeSet::new()); + assert_eq!( + table_names + .iter() + .map(|v| v.as_str()) + .collect::>(), + vec!["table_1"] + ); + + // All table names returned if no predicate and not in skip list + let table_names = chunk.table_names( + &Predicate::default(), + &["table_2".to_owned()].iter().collect::>(), + ); + assert_eq!( + table_names + .iter() + .map(|v| v.as_str()) + .collect::>(), + vec!["table_1"] + ); + + // Table name not returned if it is in skip list + let table_names = chunk.table_names( + &Predicate::default(), + &["table_1".to_owned()].iter().collect::>(), + ); + assert!(table_names.is_empty()); + + // table returned when predicate matches + let table_names = chunk.table_names( + &Predicate::new(vec![BinaryExpr::from(("region", ">=", "west"))]), + &BTreeSet::new(), + ); + assert_eq!( + table_names + .iter() + .map(|v| v.as_str()) + .collect::>(), + vec!["table_1"] + ); + + // table not returned when predicate doesn't match + let table_names = chunk.table_names( + &Predicate::new(vec![BinaryExpr::from(("region", ">", "west"))]), + &BTreeSet::new(), + ); + assert!(table_names.is_empty()); + + // create another table with different timestamps. + let columns = vec![ + ( + "time", + ColumnType::Time(Column::from(&[100_i64, 200, 300, 400, 500, 600][..])), + ), + ( + "region", + ColumnType::Tag(Column::from( + &["west", "west", "east", "west", "south", "north"][..], + )), + ), + ] + .into_iter() + .map(|(k, v)| (k.to_owned(), v)) + .collect::>(); + let rg = RowGroup::new(6, columns); + chunk.upsert_table("table_2", rg); + + // all tables returned when predicate matches both + let table_names = chunk.table_names( + &Predicate::new(vec![BinaryExpr::from(("region", "!=", "north-north-east"))]), + &BTreeSet::new(), + ); + assert_eq!( + table_names + .iter() + .map(|v| v.as_str()) + .collect::>(), + vec!["table_1", "table_2"] + ); + + // only one table returned when one table matches predicate + let table_names = chunk.table_names( + &Predicate::new(vec![BinaryExpr::from(("time", ">", 300_i64))]), + &BTreeSet::new(), + ); + assert_eq!( + table_names + .iter() + .map(|v| v.as_str()) + .collect::>(), + vec!["table_2"] + ); + } +} diff --git a/read_buffer/src/lib.rs b/read_buffer/src/lib.rs index 0c5d1c1050..d7aa3dfa97 100644 --- a/read_buffer/src/lib.rs +++ b/read_buffer/src/lib.rs @@ -395,7 +395,7 @@ impl Database { let names = chunks .iter() .fold(BTreeSet::new(), |mut names, chunk| { - names.append(&mut chunk.table_names(&predicate)); + names.append(&mut chunk.table_names(&predicate, &BTreeSet::new())); names }) // have a BTreeSet here, convert to an iterator of Some(&str) diff --git a/read_buffer/src/table.rs b/read_buffer/src/table.rs index 32938fa272..ee2182f8be 100644 --- a/read_buffer/src/table.rs +++ b/read_buffer/src/table.rs @@ -33,9 +33,9 @@ pub struct Table { impl Table { /// Create a new table with the provided row_group. - pub fn new(name: String, rg: RowGroup) -> Self { + pub fn new(name: impl Into, rg: RowGroup) -> Self { Self { - name, + name: name.into(), meta: MetaData::new(rg.metadata()), row_groups: vec![rg], } @@ -399,6 +399,49 @@ impl Table { // that only have values that have already been found. todo!(); } + + /// Determines if this table could satisfy the provided predicate. + /// + /// `false` is proof that no row within this table would match the + /// predicate, whilst `true` indicates one or more rows *might* match the + /// predicate. + fn could_satisfy_predicate(&self, predicate: &Predicate) -> bool { + // if the table doesn't have a column for one of the predicate's + // expressions then the table cannot satisfy the predicate. + if !predicate + .iter() + .all(|expr| self.meta.columns.contains_key(expr.column())) + { + return false; + } + + // If there is a single row group in the table that could satisfy the + // predicate then the table itself could satisfy the predicate so return + // true. If none of the row groups could match then return false. + let exprs = predicate.expressions(); + self.row_groups + .iter() + .any(|row_group| row_group.could_satisfy_conjunctive_binary_expressions(exprs)) + } + + /// Determines if this table contains one or more rows that satisfy the + /// predicate. + pub fn satisfies_predicate(&self, predicate: &Predicate) -> bool { + // if the table doesn't have a column for one of the predicate's + // expressions then the table cannot satisfy the predicate. + if !predicate + .iter() + .all(|expr| self.meta.columns.contains_key(expr.column())) + { + return false; + } + + // apply the predicate to all row groups. Each row group will do its own + // column pruning based on its column ranges. + self.row_groups + .iter() + .any(|row_group| row_group.satisfies_predicate(predicate)) + } } // TODO(edd): reduce owned strings here by, e.g., using references as keys.