refactor: apply timestamp predicate in visit code (#340)

pull/24376/head
Andrew Lamb 2020-10-07 12:33:04 -04:00 committed by GitHub
parent 9a81bf4d72
commit 5400c55b2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 97 additions and 95 deletions

View File

@ -360,13 +360,13 @@ impl Database for Db {
) -> Result<StringSetPlan, Self::Error> { ) -> Result<StringSetPlan, Self::Error> {
match predicate { match predicate {
None => { None => {
let mut visitor = NameVisitor::new(range); let mut visitor = NameVisitor::new();
self.visit_tables(table, &mut visitor).await?; self.visit_tables(table, range, &mut visitor).await?;
Ok(visitor.column_names.into()) Ok(visitor.column_names.into())
} }
Some(predicate) => { Some(predicate) => {
let mut visitor = NamePredVisitor::new(range, predicate); let mut visitor = NamePredVisitor::new(predicate);
self.visit_tables(table, &mut visitor).await?; self.visit_tables(table, range, &mut visitor).await?;
Ok(visitor.plans.into()) Ok(visitor.plans.into())
} }
} }
@ -382,13 +382,13 @@ impl Database for Db {
) -> Result<StringSetPlan, Self::Error> { ) -> Result<StringSetPlan, Self::Error> {
match predicate { match predicate {
None => { None => {
let mut visitor = ValueVisitor::new(column_name, range); let mut visitor = ValueVisitor::new(column_name);
self.visit_tables(table, &mut visitor).await?; self.visit_tables(table, range, &mut visitor).await?;
Ok(visitor.column_values.into()) Ok(visitor.column_values.into())
} }
Some(predicate) => { Some(predicate) => {
let mut visitor = ValuePredVisitor::new(column_name, range, predicate); let mut visitor = ValuePredVisitor::new(column_name, predicate);
self.visit_tables(table, &mut visitor).await?; self.visit_tables(table, range, &mut visitor).await?;
Ok(visitor.plans.into()) Ok(visitor.plans.into())
} }
} }
@ -497,12 +497,23 @@ trait Visitor {
} }
// called once before any column in a Table is visited // called once before any column in a Table is visited
fn pre_visit_table(&mut self, _table: &Table, _partition: &Partition) -> Result<()> { fn pre_visit_table(
&mut self,
_table: &Table,
_partition: &Partition,
_ts_pred: Option<&TimestampPredicate>,
) -> Result<()> {
Ok(()) Ok(())
} }
// called every time a column is visited // called every time a column is visited
fn visit_column(&mut self, _table: &Table, _column_id: u32, _column: &Column) -> Result<()> { fn visit_column(
&mut self,
_table: &Table,
_column_id: u32,
_column: &Column,
_ts_pred: Option<&TimestampPredicate>,
) -> Result<()> {
Ok(()) Ok(())
} }
@ -518,12 +529,31 @@ trait Visitor {
} }
impl Db { impl Db {
/// Traverse this database's tables, calling the relevant functions, in /// Traverse this database's tables, calling the relevant
/// order, of `visitor`, as described on the Visitor trait /// functions, in order, of `visitor`, as described on the Visitor
async fn visit_tables<V: Visitor>(&self, table: Option<String>, visitor: &mut V) -> Result<()> { /// trait.
///
/// If table is specified, skips tables that do not match the
/// name.
///
/// If range is specified, skips tables which do not have any
/// values within the timestamp range.
async fn visit_tables<V: Visitor>(
&self,
table: Option<String>,
range: Option<TimestampRange>,
visitor: &mut V,
) -> Result<()> {
let partitions = self.partitions.read().await; let partitions = self.partitions.read().await;
for partition in partitions.iter() { for partition in partitions.iter() {
// The id of the timestamp column is specific to each partition
let timestamp_predicate = partition
.make_timestamp_predicate(range)
.context(PartitionError)?;
let ts_pred = timestamp_predicate.as_ref();
// ids are relative to a specific partition // ids are relative to a specific partition
let table_symbol = match &table { let table_symbol = match &table {
Some(name) => Some(partition.dictionary.lookup_value(&name).context( Some(name) => Some(partition.dictionary.lookup_value(&name).context(
@ -535,22 +565,27 @@ impl Db {
None => None, None => None,
}; };
// The table iterator is the list of tables we want to process
let table_iter = partition
.tables
.values()
// filter out any tables that don't pass the predicates
.filter(|table| table.matches_id_predicate(&table_symbol));
visitor.pre_visit_partition(partition)?; visitor.pre_visit_partition(partition)?;
for table in table_iter { for table in partition.tables.values() {
visitor.pre_visit_table(table, partition)?; // Apply predicates, if any, to skip processing the table entirely
if table.matches_id_predicate(&table_symbol)
&& table
.matches_timestamp_predicate(ts_pred)
.context(TableError)?
{
visitor.pre_visit_table(table, partition, ts_pred)?;
for (column_id, column_index) in &table.column_id_to_index { for (column_id, column_index) in &table.column_id_to_index {
visitor.visit_column(table, *column_id, &table.columns[*column_index])? visitor.visit_column(
table,
*column_id,
&table.columns[*column_index],
ts_pred,
)?
}
visitor.post_visit_table(table, partition)?;
} }
visitor.post_visit_table(table, partition)?;
} }
visitor.post_visit_partition(partition)?; visitor.post_visit_partition(partition)?;
} // next partition } // next partition
@ -561,28 +596,30 @@ impl Db {
/// return all column names in this database, while applying the timestamp range /// return all column names in this database, while applying the timestamp range
struct NameVisitor { struct NameVisitor {
timestamp_predicate: Option<TimestampPredicate>,
column_names: StringSet, column_names: StringSet,
partition_column_ids: BTreeSet<u32>, partition_column_ids: BTreeSet<u32>,
range: Option<TimestampRange>,
} }
impl NameVisitor { impl NameVisitor {
fn new(range: Option<TimestampRange>) -> Self { fn new() -> Self {
Self { Self {
timestamp_predicate: None,
column_names: StringSet::new(), column_names: StringSet::new(),
partition_column_ids: BTreeSet::new(), partition_column_ids: BTreeSet::new(),
range,
} }
} }
} }
impl Visitor for NameVisitor { impl Visitor for NameVisitor {
fn visit_column(&mut self, table: &Table, column_id: u32, column: &Column) -> Result<()> { fn visit_column(
&mut self,
table: &Table,
column_id: u32,
column: &Column,
ts_pred: Option<&TimestampPredicate>,
) -> Result<()> {
if let Column::Tag(column) = column { if let Column::Tag(column) = column {
if table if table
.column_matches_timestamp_predicate(column, self.timestamp_predicate.as_ref()) .column_matches_timestamp_predicate(column, ts_pred)
.context(TableError)? .context(TableError)?
{ {
self.partition_column_ids.insert(column_id); self.partition_column_ids.insert(column_id);
@ -591,11 +628,8 @@ impl Visitor for NameVisitor {
Ok(()) Ok(())
} }
fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> { fn pre_visit_partition(&mut self, _partition: &Partition) -> Result<()> {
self.partition_column_ids.clear(); self.partition_column_ids.clear();
self.timestamp_predicate = partition
.make_timestamp_predicate(self.range)
.context(PartitionError)?;
Ok(()) Ok(())
} }
@ -631,17 +665,13 @@ impl Visitor for NameVisitor {
/// predicate values, e.g., if you have env = "us-west" and a /// predicate values, e.g., if you have env = "us-west" and a
/// table's env column has the range ["eu-south", "us-north"]. /// table's env column has the range ["eu-south", "us-north"].
struct NamePredVisitor { struct NamePredVisitor {
timestamp_predicate: Option<TimestampPredicate>,
range: Option<TimestampRange>,
predicate: Predicate, predicate: Predicate,
plans: Vec<LogicalPlan>, plans: Vec<LogicalPlan>,
} }
impl NamePredVisitor { impl NamePredVisitor {
fn new(range: Option<TimestampRange>, predicate: Predicate) -> Self { fn new(predicate: Predicate) -> Self {
Self { Self {
timestamp_predicate: None,
range,
predicate, predicate,
plans: Vec::new(), plans: Vec::new(),
} }
@ -649,29 +679,15 @@ impl NamePredVisitor {
} }
impl Visitor for NamePredVisitor { impl Visitor for NamePredVisitor {
fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> { fn pre_visit_table(
self.timestamp_predicate = partition &mut self,
.make_timestamp_predicate(self.range) table: &Table,
.context(PartitionError)?; partition: &Partition,
Ok(()) ts_pred: Option<&TimestampPredicate>,
} ) -> Result<()> {
fn pre_visit_table(&mut self, table: &Table, partition: &Partition) -> Result<()> {
// skip table entirely if there are no rows that fall in the timestamp
if !table
.matches_timestamp_predicate(self.timestamp_predicate.as_ref())
.context(TableError)?
{
return Ok(());
}
self.plans.push( self.plans.push(
table table
.tag_column_names_plan( .tag_column_names_plan(&self.predicate, ts_pred, partition)
&self.predicate,
self.timestamp_predicate.as_ref(),
partition,
)
.context(TableError)?, .context(TableError)?,
); );
Ok(()) Ok(())
@ -688,21 +704,17 @@ struct ValueVisitor<'a> {
column_name: &'a str, column_name: &'a str,
// what column id we are looking for // what column id we are looking for
column_id: Option<u32>, column_id: Option<u32>,
timestamp_predicate: Option<TimestampPredicate>,
partition_value_ids: BTreeSet<u32>, partition_value_ids: BTreeSet<u32>,
column_values: StringSet, column_values: StringSet,
range: Option<TimestampRange>,
} }
impl<'a> ValueVisitor<'a> { impl<'a> ValueVisitor<'a> {
fn new(column_name: &'a str, range: Option<TimestampRange>) -> Self { fn new(column_name: &'a str) -> Self {
Self { Self {
column_name, column_name,
column_id: None, column_id: None,
timestamp_predicate: None,
column_values: StringSet::new(), column_values: StringSet::new(),
partition_value_ids: BTreeSet::new(), partition_value_ids: BTreeSet::new(),
range,
} }
} }
} }
@ -721,14 +733,16 @@ impl<'a> Visitor for ValueVisitor<'a> {
})?, })?,
); );
self.timestamp_predicate = partition
.make_timestamp_predicate(self.range)
.context(PartitionError)?;
Ok(()) Ok(())
} }
fn visit_column(&mut self, table: &Table, column_id: u32, column: &Column) -> Result<()> { fn visit_column(
&mut self,
table: &Table,
column_id: u32,
column: &Column,
ts_pred: Option<&TimestampPredicate>,
) -> Result<()> {
if Some(column_id) != self.column_id { if Some(column_id) != self.column_id {
return Ok(()); return Ok(());
} }
@ -738,7 +752,7 @@ impl<'a> Visitor for ValueVisitor<'a> {
// if we have a timestamp prediate, find all values // if we have a timestamp prediate, find all values
// that the timestamp is within range. Otherwise take // that the timestamp is within range. Otherwise take
// all values. // all values.
match self.timestamp_predicate { match ts_pred {
None => { None => {
// take all non-null values // take all non-null values
column.iter().filter_map(|&s| s).for_each(|value_id| { column.iter().filter_map(|&s| s).for_each(|value_id| {
@ -796,19 +810,14 @@ impl<'a> Visitor for ValueVisitor<'a> {
/// database, while applying the timestamp range and predicate /// database, while applying the timestamp range and predicate
struct ValuePredVisitor<'a> { struct ValuePredVisitor<'a> {
column_name: &'a str, column_name: &'a str,
// what column id we are looking for
timestamp_predicate: Option<TimestampPredicate>,
range: Option<TimestampRange>,
predicate: Predicate, predicate: Predicate,
plans: Vec<LogicalPlan>, plans: Vec<LogicalPlan>,
} }
impl<'a> ValuePredVisitor<'a> { impl<'a> ValuePredVisitor<'a> {
fn new(column_name: &'a str, range: Option<TimestampRange>, predicate: Predicate) -> Self { fn new(column_name: &'a str, predicate: Predicate) -> Self {
Self { Self {
column_name, column_name,
timestamp_predicate: None,
range,
predicate, predicate,
plans: Vec::new(), plans: Vec::new(),
} }
@ -816,19 +825,17 @@ impl<'a> ValuePredVisitor<'a> {
} }
impl<'a> Visitor for ValuePredVisitor<'a> { impl<'a> Visitor for ValuePredVisitor<'a> {
fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> {
self.timestamp_predicate = partition
.make_timestamp_predicate(self.range)
.context(PartitionError)?;
Ok(())
}
// TODO try and rule out entire tables based on the same critera // TODO try and rule out entire tables based on the same critera
// as explained on NamePredVisitor // as explained on NamePredVisitor
fn pre_visit_table(&mut self, table: &Table, partition: &Partition) -> Result<()> { fn pre_visit_table(
&mut self,
table: &Table,
partition: &Partition,
ts_pred: Option<&TimestampPredicate>,
) -> Result<()> {
// skip table entirely if there are no rows that fall in the timestamp // skip table entirely if there are no rows that fall in the timestamp
if !table if !table
.matches_timestamp_predicate(self.timestamp_predicate.as_ref()) .matches_timestamp_predicate(ts_pred)
.context(TableError)? .context(TableError)?
{ {
return Ok(()); return Ok(());
@ -836,12 +843,7 @@ impl<'a> Visitor for ValuePredVisitor<'a> {
self.plans.push( self.plans.push(
table table
.tag_values_plan( .tag_values_plan(self.column_name, &self.predicate, ts_pred, partition)
self.column_name,
&self.predicate,
self.timestamp_predicate.as_ref(),
partition,
)
.context(TableError)?, .context(TableError)?,
); );
Ok(()) Ok(())