refactor: apply timestamp predicate in visit code (#340)
parent
9a81bf4d72
commit
5400c55b2a
|
@ -360,13 +360,13 @@ impl Database for Db {
|
||||||
) -> Result<StringSetPlan, Self::Error> {
|
) -> Result<StringSetPlan, Self::Error> {
|
||||||
match predicate {
|
match predicate {
|
||||||
None => {
|
None => {
|
||||||
let mut visitor = NameVisitor::new(range);
|
let mut visitor = NameVisitor::new();
|
||||||
self.visit_tables(table, &mut visitor).await?;
|
self.visit_tables(table, range, &mut visitor).await?;
|
||||||
Ok(visitor.column_names.into())
|
Ok(visitor.column_names.into())
|
||||||
}
|
}
|
||||||
Some(predicate) => {
|
Some(predicate) => {
|
||||||
let mut visitor = NamePredVisitor::new(range, predicate);
|
let mut visitor = NamePredVisitor::new(predicate);
|
||||||
self.visit_tables(table, &mut visitor).await?;
|
self.visit_tables(table, range, &mut visitor).await?;
|
||||||
Ok(visitor.plans.into())
|
Ok(visitor.plans.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -382,13 +382,13 @@ impl Database for Db {
|
||||||
) -> Result<StringSetPlan, Self::Error> {
|
) -> Result<StringSetPlan, Self::Error> {
|
||||||
match predicate {
|
match predicate {
|
||||||
None => {
|
None => {
|
||||||
let mut visitor = ValueVisitor::new(column_name, range);
|
let mut visitor = ValueVisitor::new(column_name);
|
||||||
self.visit_tables(table, &mut visitor).await?;
|
self.visit_tables(table, range, &mut visitor).await?;
|
||||||
Ok(visitor.column_values.into())
|
Ok(visitor.column_values.into())
|
||||||
}
|
}
|
||||||
Some(predicate) => {
|
Some(predicate) => {
|
||||||
let mut visitor = ValuePredVisitor::new(column_name, range, predicate);
|
let mut visitor = ValuePredVisitor::new(column_name, predicate);
|
||||||
self.visit_tables(table, &mut visitor).await?;
|
self.visit_tables(table, range, &mut visitor).await?;
|
||||||
Ok(visitor.plans.into())
|
Ok(visitor.plans.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -497,12 +497,23 @@ trait Visitor {
|
||||||
}
|
}
|
||||||
|
|
||||||
// called once before any column in a Table is visited
|
// called once before any column in a Table is visited
|
||||||
fn pre_visit_table(&mut self, _table: &Table, _partition: &Partition) -> Result<()> {
|
fn pre_visit_table(
|
||||||
|
&mut self,
|
||||||
|
_table: &Table,
|
||||||
|
_partition: &Partition,
|
||||||
|
_ts_pred: Option<&TimestampPredicate>,
|
||||||
|
) -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// called every time a column is visited
|
// called every time a column is visited
|
||||||
fn visit_column(&mut self, _table: &Table, _column_id: u32, _column: &Column) -> Result<()> {
|
fn visit_column(
|
||||||
|
&mut self,
|
||||||
|
_table: &Table,
|
||||||
|
_column_id: u32,
|
||||||
|
_column: &Column,
|
||||||
|
_ts_pred: Option<&TimestampPredicate>,
|
||||||
|
) -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -518,12 +529,31 @@ trait Visitor {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Db {
|
impl Db {
|
||||||
/// Traverse this database's tables, calling the relevant functions, in
|
/// Traverse this database's tables, calling the relevant
|
||||||
/// order, of `visitor`, as described on the Visitor trait
|
/// functions, in order, of `visitor`, as described on the Visitor
|
||||||
async fn visit_tables<V: Visitor>(&self, table: Option<String>, visitor: &mut V) -> Result<()> {
|
/// trait.
|
||||||
|
///
|
||||||
|
/// If table is specified, skips tables that do not match the
|
||||||
|
/// name.
|
||||||
|
///
|
||||||
|
/// If range is specified, skips tables which do not have any
|
||||||
|
/// values within the timestamp range.
|
||||||
|
async fn visit_tables<V: Visitor>(
|
||||||
|
&self,
|
||||||
|
table: Option<String>,
|
||||||
|
range: Option<TimestampRange>,
|
||||||
|
visitor: &mut V,
|
||||||
|
) -> Result<()> {
|
||||||
let partitions = self.partitions.read().await;
|
let partitions = self.partitions.read().await;
|
||||||
|
|
||||||
for partition in partitions.iter() {
|
for partition in partitions.iter() {
|
||||||
|
// The id of the timestamp column is specific to each partition
|
||||||
|
let timestamp_predicate = partition
|
||||||
|
.make_timestamp_predicate(range)
|
||||||
|
.context(PartitionError)?;
|
||||||
|
|
||||||
|
let ts_pred = timestamp_predicate.as_ref();
|
||||||
|
|
||||||
// ids are relative to a specific partition
|
// ids are relative to a specific partition
|
||||||
let table_symbol = match &table {
|
let table_symbol = match &table {
|
||||||
Some(name) => Some(partition.dictionary.lookup_value(&name).context(
|
Some(name) => Some(partition.dictionary.lookup_value(&name).context(
|
||||||
|
@ -535,22 +565,27 @@ impl Db {
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
// The table iterator is the list of tables we want to process
|
|
||||||
let table_iter = partition
|
|
||||||
.tables
|
|
||||||
.values()
|
|
||||||
// filter out any tables that don't pass the predicates
|
|
||||||
.filter(|table| table.matches_id_predicate(&table_symbol));
|
|
||||||
|
|
||||||
visitor.pre_visit_partition(partition)?;
|
visitor.pre_visit_partition(partition)?;
|
||||||
for table in table_iter {
|
for table in partition.tables.values() {
|
||||||
visitor.pre_visit_table(table, partition)?;
|
// Apply predicates, if any, to skip processing the table entirely
|
||||||
|
if table.matches_id_predicate(&table_symbol)
|
||||||
|
&& table
|
||||||
|
.matches_timestamp_predicate(ts_pred)
|
||||||
|
.context(TableError)?
|
||||||
|
{
|
||||||
|
visitor.pre_visit_table(table, partition, ts_pred)?;
|
||||||
|
|
||||||
for (column_id, column_index) in &table.column_id_to_index {
|
for (column_id, column_index) in &table.column_id_to_index {
|
||||||
visitor.visit_column(table, *column_id, &table.columns[*column_index])?
|
visitor.visit_column(
|
||||||
|
table,
|
||||||
|
*column_id,
|
||||||
|
&table.columns[*column_index],
|
||||||
|
ts_pred,
|
||||||
|
)?
|
||||||
|
}
|
||||||
|
|
||||||
|
visitor.post_visit_table(table, partition)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
visitor.post_visit_table(table, partition)?;
|
|
||||||
}
|
}
|
||||||
visitor.post_visit_partition(partition)?;
|
visitor.post_visit_partition(partition)?;
|
||||||
} // next partition
|
} // next partition
|
||||||
|
@ -561,28 +596,30 @@ impl Db {
|
||||||
|
|
||||||
/// return all column names in this database, while applying the timestamp range
|
/// return all column names in this database, while applying the timestamp range
|
||||||
struct NameVisitor {
|
struct NameVisitor {
|
||||||
timestamp_predicate: Option<TimestampPredicate>,
|
|
||||||
column_names: StringSet,
|
column_names: StringSet,
|
||||||
partition_column_ids: BTreeSet<u32>,
|
partition_column_ids: BTreeSet<u32>,
|
||||||
range: Option<TimestampRange>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NameVisitor {
|
impl NameVisitor {
|
||||||
fn new(range: Option<TimestampRange>) -> Self {
|
fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
timestamp_predicate: None,
|
|
||||||
column_names: StringSet::new(),
|
column_names: StringSet::new(),
|
||||||
partition_column_ids: BTreeSet::new(),
|
partition_column_ids: BTreeSet::new(),
|
||||||
range,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Visitor for NameVisitor {
|
impl Visitor for NameVisitor {
|
||||||
fn visit_column(&mut self, table: &Table, column_id: u32, column: &Column) -> Result<()> {
|
fn visit_column(
|
||||||
|
&mut self,
|
||||||
|
table: &Table,
|
||||||
|
column_id: u32,
|
||||||
|
column: &Column,
|
||||||
|
ts_pred: Option<&TimestampPredicate>,
|
||||||
|
) -> Result<()> {
|
||||||
if let Column::Tag(column) = column {
|
if let Column::Tag(column) = column {
|
||||||
if table
|
if table
|
||||||
.column_matches_timestamp_predicate(column, self.timestamp_predicate.as_ref())
|
.column_matches_timestamp_predicate(column, ts_pred)
|
||||||
.context(TableError)?
|
.context(TableError)?
|
||||||
{
|
{
|
||||||
self.partition_column_ids.insert(column_id);
|
self.partition_column_ids.insert(column_id);
|
||||||
|
@ -591,11 +628,8 @@ impl Visitor for NameVisitor {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> {
|
fn pre_visit_partition(&mut self, _partition: &Partition) -> Result<()> {
|
||||||
self.partition_column_ids.clear();
|
self.partition_column_ids.clear();
|
||||||
self.timestamp_predicate = partition
|
|
||||||
.make_timestamp_predicate(self.range)
|
|
||||||
.context(PartitionError)?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -631,17 +665,13 @@ impl Visitor for NameVisitor {
|
||||||
/// predicate values, e.g., if you have env = "us-west" and a
|
/// predicate values, e.g., if you have env = "us-west" and a
|
||||||
/// table's env column has the range ["eu-south", "us-north"].
|
/// table's env column has the range ["eu-south", "us-north"].
|
||||||
struct NamePredVisitor {
|
struct NamePredVisitor {
|
||||||
timestamp_predicate: Option<TimestampPredicate>,
|
|
||||||
range: Option<TimestampRange>,
|
|
||||||
predicate: Predicate,
|
predicate: Predicate,
|
||||||
plans: Vec<LogicalPlan>,
|
plans: Vec<LogicalPlan>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NamePredVisitor {
|
impl NamePredVisitor {
|
||||||
fn new(range: Option<TimestampRange>, predicate: Predicate) -> Self {
|
fn new(predicate: Predicate) -> Self {
|
||||||
Self {
|
Self {
|
||||||
timestamp_predicate: None,
|
|
||||||
range,
|
|
||||||
predicate,
|
predicate,
|
||||||
plans: Vec::new(),
|
plans: Vec::new(),
|
||||||
}
|
}
|
||||||
|
@ -649,29 +679,15 @@ impl NamePredVisitor {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Visitor for NamePredVisitor {
|
impl Visitor for NamePredVisitor {
|
||||||
fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> {
|
fn pre_visit_table(
|
||||||
self.timestamp_predicate = partition
|
&mut self,
|
||||||
.make_timestamp_predicate(self.range)
|
table: &Table,
|
||||||
.context(PartitionError)?;
|
partition: &Partition,
|
||||||
Ok(())
|
ts_pred: Option<&TimestampPredicate>,
|
||||||
}
|
) -> Result<()> {
|
||||||
|
|
||||||
fn pre_visit_table(&mut self, table: &Table, partition: &Partition) -> Result<()> {
|
|
||||||
// skip table entirely if there are no rows that fall in the timestamp
|
|
||||||
if !table
|
|
||||||
.matches_timestamp_predicate(self.timestamp_predicate.as_ref())
|
|
||||||
.context(TableError)?
|
|
||||||
{
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
self.plans.push(
|
self.plans.push(
|
||||||
table
|
table
|
||||||
.tag_column_names_plan(
|
.tag_column_names_plan(&self.predicate, ts_pred, partition)
|
||||||
&self.predicate,
|
|
||||||
self.timestamp_predicate.as_ref(),
|
|
||||||
partition,
|
|
||||||
)
|
|
||||||
.context(TableError)?,
|
.context(TableError)?,
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -688,21 +704,17 @@ struct ValueVisitor<'a> {
|
||||||
column_name: &'a str,
|
column_name: &'a str,
|
||||||
// what column id we are looking for
|
// what column id we are looking for
|
||||||
column_id: Option<u32>,
|
column_id: Option<u32>,
|
||||||
timestamp_predicate: Option<TimestampPredicate>,
|
|
||||||
partition_value_ids: BTreeSet<u32>,
|
partition_value_ids: BTreeSet<u32>,
|
||||||
column_values: StringSet,
|
column_values: StringSet,
|
||||||
range: Option<TimestampRange>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ValueVisitor<'a> {
|
impl<'a> ValueVisitor<'a> {
|
||||||
fn new(column_name: &'a str, range: Option<TimestampRange>) -> Self {
|
fn new(column_name: &'a str) -> Self {
|
||||||
Self {
|
Self {
|
||||||
column_name,
|
column_name,
|
||||||
column_id: None,
|
column_id: None,
|
||||||
timestamp_predicate: None,
|
|
||||||
column_values: StringSet::new(),
|
column_values: StringSet::new(),
|
||||||
partition_value_ids: BTreeSet::new(),
|
partition_value_ids: BTreeSet::new(),
|
||||||
range,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -721,14 +733,16 @@ impl<'a> Visitor for ValueVisitor<'a> {
|
||||||
})?,
|
})?,
|
||||||
);
|
);
|
||||||
|
|
||||||
self.timestamp_predicate = partition
|
|
||||||
.make_timestamp_predicate(self.range)
|
|
||||||
.context(PartitionError)?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_column(&mut self, table: &Table, column_id: u32, column: &Column) -> Result<()> {
|
fn visit_column(
|
||||||
|
&mut self,
|
||||||
|
table: &Table,
|
||||||
|
column_id: u32,
|
||||||
|
column: &Column,
|
||||||
|
ts_pred: Option<&TimestampPredicate>,
|
||||||
|
) -> Result<()> {
|
||||||
if Some(column_id) != self.column_id {
|
if Some(column_id) != self.column_id {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
@ -738,7 +752,7 @@ impl<'a> Visitor for ValueVisitor<'a> {
|
||||||
// if we have a timestamp prediate, find all values
|
// if we have a timestamp prediate, find all values
|
||||||
// that the timestamp is within range. Otherwise take
|
// that the timestamp is within range. Otherwise take
|
||||||
// all values.
|
// all values.
|
||||||
match self.timestamp_predicate {
|
match ts_pred {
|
||||||
None => {
|
None => {
|
||||||
// take all non-null values
|
// take all non-null values
|
||||||
column.iter().filter_map(|&s| s).for_each(|value_id| {
|
column.iter().filter_map(|&s| s).for_each(|value_id| {
|
||||||
|
@ -796,19 +810,14 @@ impl<'a> Visitor for ValueVisitor<'a> {
|
||||||
/// database, while applying the timestamp range and predicate
|
/// database, while applying the timestamp range and predicate
|
||||||
struct ValuePredVisitor<'a> {
|
struct ValuePredVisitor<'a> {
|
||||||
column_name: &'a str,
|
column_name: &'a str,
|
||||||
// what column id we are looking for
|
|
||||||
timestamp_predicate: Option<TimestampPredicate>,
|
|
||||||
range: Option<TimestampRange>,
|
|
||||||
predicate: Predicate,
|
predicate: Predicate,
|
||||||
plans: Vec<LogicalPlan>,
|
plans: Vec<LogicalPlan>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ValuePredVisitor<'a> {
|
impl<'a> ValuePredVisitor<'a> {
|
||||||
fn new(column_name: &'a str, range: Option<TimestampRange>, predicate: Predicate) -> Self {
|
fn new(column_name: &'a str, predicate: Predicate) -> Self {
|
||||||
Self {
|
Self {
|
||||||
column_name,
|
column_name,
|
||||||
timestamp_predicate: None,
|
|
||||||
range,
|
|
||||||
predicate,
|
predicate,
|
||||||
plans: Vec::new(),
|
plans: Vec::new(),
|
||||||
}
|
}
|
||||||
|
@ -816,19 +825,17 @@ impl<'a> ValuePredVisitor<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Visitor for ValuePredVisitor<'a> {
|
impl<'a> Visitor for ValuePredVisitor<'a> {
|
||||||
fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> {
|
|
||||||
self.timestamp_predicate = partition
|
|
||||||
.make_timestamp_predicate(self.range)
|
|
||||||
.context(PartitionError)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO try and rule out entire tables based on the same critera
|
// TODO try and rule out entire tables based on the same critera
|
||||||
// as explained on NamePredVisitor
|
// as explained on NamePredVisitor
|
||||||
fn pre_visit_table(&mut self, table: &Table, partition: &Partition) -> Result<()> {
|
fn pre_visit_table(
|
||||||
|
&mut self,
|
||||||
|
table: &Table,
|
||||||
|
partition: &Partition,
|
||||||
|
ts_pred: Option<&TimestampPredicate>,
|
||||||
|
) -> Result<()> {
|
||||||
// skip table entirely if there are no rows that fall in the timestamp
|
// skip table entirely if there are no rows that fall in the timestamp
|
||||||
if !table
|
if !table
|
||||||
.matches_timestamp_predicate(self.timestamp_predicate.as_ref())
|
.matches_timestamp_predicate(ts_pred)
|
||||||
.context(TableError)?
|
.context(TableError)?
|
||||||
{
|
{
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -836,12 +843,7 @@ impl<'a> Visitor for ValuePredVisitor<'a> {
|
||||||
|
|
||||||
self.plans.push(
|
self.plans.push(
|
||||||
table
|
table
|
||||||
.tag_values_plan(
|
.tag_values_plan(self.column_name, &self.predicate, ts_pred, partition)
|
||||||
self.column_name,
|
|
||||||
&self.predicate,
|
|
||||||
self.timestamp_predicate.as_ref(),
|
|
||||||
partition,
|
|
||||||
)
|
|
||||||
.context(TableError)?,
|
.context(TableError)?,
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
Loading…
Reference in New Issue