feat: implement column_values for write buffer database (#339)

2020-10-07 10:12:28 -04:00 · 2020-10-07 10:12:28 -04:00 · 9a81bf4d72
parent 3ba1a95795
commit 9a81bf4d72
3 changed files with 536 additions and 11 deletions
--- a/delorean_storage/src/lib.rs
+++ b/delorean_storage/src/lib.rs
@ -31,6 +31,18 @@ impl TimestampRange {
    pub fn new(start: i64, end: i64) -> Self {
        Self { start, end }
    }
    #[inline]
    /// Returns true if this range contains the value v
    pub fn contains(&self, v: i64) -> bool {
        self.start <= v && v < self.end
    }
    #[inline]
    /// Returns true if this range contains the value v
    pub fn contains_opt(&self, v: Option<i64>) -> bool {
        Some(true) == v.map(|ts| self.contains(ts))
    }
 }
 /// Represents a general purpose predicate for evaluation
@ -151,3 +163,32 @@ pub fn org_and_bucket_to_database(org: impl Into<String>, bucket: &str) -> Strin
 //
 //#[cfg(test)]
 pub mod test;
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_timestamp_range_contains() {
        let range = TimestampRange::new(100, 200);
        assert!(!range.contains(99));
        assert!(range.contains(100));
        assert!(range.contains(101));
        assert!(range.contains(199));
        assert!(!range.contains(200));
        assert!(!range.contains(201));
    }
    #[test]
    fn test_timestamp_range_contains_opt() {
        let range = TimestampRange::new(100, 200);
        assert!(!range.contains_opt(Some(99)));
        assert!(range.contains_opt(Some(100)));
        assert!(range.contains_opt(Some(101)));
        assert!(range.contains_opt(Some(199)));
        assert!(!range.contains_opt(Some(200)));
        assert!(!range.contains_opt(Some(201)));
        assert!(!range.contains_opt(None));
    }
 }
--- a/delorean_write_buffer/src/database.rs
+++ b/delorean_write_buffer/src/database.rs
@ -115,16 +115,44 @@ pub enum Error {
    },
    #[snafu(display(
-        "Column ID {} not found in dictionary of partition {}",
+        "Column name {} not found in dictionary of partition {}",
-        column,
+        column_name,
        partition
    ))]
-    ColumnIdNotFoundInDictionary {
+    ColumnNameNotFoundInDictionary {
-        column: u32,
+        column_name: String,
        partition: String,
        source: DictionaryError,
    },
    #[snafu(display(
        "Column ID {} not found in dictionary of partition {}",
        column_id,
        partition
    ))]
    ColumnIdNotFoundInDictionary {
        column_id: u32,
        partition: String,
        source: DictionaryError,
    },
    #[snafu(display(
        "Value ID {} not found in dictionary of partition {}",
        value_id,
        partition
    ))]
    ColumnValueIdNotFoundInDictionary {
        value_id: u32,
        partition: String,
        source: DictionaryError,
    },
    #[snafu(display(
        "Column '{}' is not a tag column and thus can not list values",
        column_name
    ))]
    UnsupportedColumnTypeForListingValues { column_name: String },
    #[snafu(display("Table {} not found in partition {}", table, partition))]
    TableNotFoundInPartition { table: u32, partition: String },
@ -344,14 +372,26 @@ impl Database for Db {
        }
    }
    /// return all column values in this database, while applying optional predicates
    async fn column_values(
        &self,
-        _column_name: &str,
+        column_name: &str,
-        _table: Option<String>,
+        table: Option<String>,
-        _range: Option<TimestampRange>,
+        range: Option<TimestampRange>,
-        _predicate: Option<Predicate>,
+        predicate: Option<Predicate>,
    ) -> Result<StringSetPlan, Self::Error> {
-        unimplemented!("Write buffer database does not yet implement column_values");
+        match predicate {
            None => {
                let mut visitor = ValueVisitor::new(column_name, range);
                self.visit_tables(table, &mut visitor).await?;
                Ok(visitor.column_values.into())
            }
            Some(predicate) => {
                let mut visitor = ValuePredVisitor::new(column_name, range, predicate);
                self.visit_tables(table, &mut visitor).await?;
                Ok(visitor.plans.into())
            }
        }
    }
    async fn table_to_arrow(
@ -564,7 +604,7 @@ impl Visitor for NameVisitor {
        for &column_id in &self.partition_column_ids {
            let column_name = partition.dictionary.lookup_id(column_id).context(
                ColumnIdNotFoundInDictionary {
-                    column: column_id,
+                    column_id,
                    partition: &partition.key,
                },
            )?;
@ -636,7 +676,177 @@ impl Visitor for NamePredVisitor {
        );
        Ok(())
    }
-} // next partition
+}
 /// return all values in the `column_name` column
 /// in this database, while applying the timestamp range
 ///
 /// Potential optimizations: Run this in parallel (in different
 /// futures) for each partition / table, rather than a single one
 /// -- but that will require building up parallel hash tables.
 struct ValueVisitor<'a> {
    column_name: &'a str,
    // what column id we are looking for
    column_id: Option<u32>,
    timestamp_predicate: Option<TimestampPredicate>,
    partition_value_ids: BTreeSet<u32>,
    column_values: StringSet,
    range: Option<TimestampRange>,
 }
 impl<'a> ValueVisitor<'a> {
    fn new(column_name: &'a str, range: Option<TimestampRange>) -> Self {
        Self {
            column_name,
            column_id: None,
            timestamp_predicate: None,
            column_values: StringSet::new(),
            partition_value_ids: BTreeSet::new(),
            range,
        }
    }
 }
 impl<'a> Visitor for ValueVisitor<'a> {
    fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> {
        self.partition_value_ids.clear();
        self.column_id = Some(
            partition
                .dictionary
                .lookup_value(self.column_name)
                .context(ColumnNameNotFoundInDictionary {
                    column_name: self.column_name,
                    partition: &partition.key,
                })?,
        );
        self.timestamp_predicate = partition
            .make_timestamp_predicate(self.range)
            .context(PartitionError)?;
        Ok(())
    }
    fn visit_column(&mut self, table: &Table, column_id: u32, column: &Column) -> Result<()> {
        if Some(column_id) != self.column_id {
            return Ok(());
        }
        match column {
            Column::Tag(column) => {
                // if we have a timestamp prediate, find all values
                // that the timestamp is within range. Otherwise take
                // all values.
                match self.timestamp_predicate {
                    None => {
                        // take all non-null values
                        column.iter().filter_map(|&s| s).for_each(|value_id| {
                            self.partition_value_ids.insert(value_id);
                        });
                    }
                    Some(pred) => {
                        // filter out all values that don't match the timestmap
                        let time_column =
                            table.column_i64(pred.time_column_id).context(TableError)?;
                        column
                            .iter()
                            .zip(time_column.iter())
                            .filter_map(|(&column_value_id, &timestamp_value)| {
                                if pred.range.contains_opt(timestamp_value) {
                                    column_value_id
                                } else {
                                    None
                                }
                            })
                            .for_each(|value_id| {
                                self.partition_value_ids.insert(value_id);
                            });
                    }
                }
                Ok(())
            }
            _ => UnsupportedColumnTypeForListingValues {
                column_name: self.column_name,
            }
            .fail(),
        }
    }
    fn post_visit_partition(&mut self, partition: &Partition) -> Result<()> {
        // convert all the partition's column_ids to Strings
        for &value_id in &self.partition_value_ids {
            let value = partition.dictionary.lookup_id(value_id).context(
                ColumnValueIdNotFoundInDictionary {
                    value_id,
                    partition: &partition.key,
                },
            )?;
            if !self.column_values.contains(value) {
                self.column_values.insert(value.to_string());
            }
        }
        Ok(())
    }
 }
 /// return all column values for the specified column in this
 /// database, while applying the timestamp range and predicate
 struct ValuePredVisitor<'a> {
    column_name: &'a str,
    // what column id we are looking for
    timestamp_predicate: Option<TimestampPredicate>,
    range: Option<TimestampRange>,
    predicate: Predicate,
    plans: Vec<LogicalPlan>,
 }
 impl<'a> ValuePredVisitor<'a> {
    fn new(column_name: &'a str, range: Option<TimestampRange>, predicate: Predicate) -> Self {
        Self {
            column_name,
            timestamp_predicate: None,
            range,
            predicate,
            plans: Vec::new(),
        }
    }
 }
 impl<'a> Visitor for ValuePredVisitor<'a> {
    fn pre_visit_partition(&mut self, partition: &Partition) -> Result<()> {
        self.timestamp_predicate = partition
            .make_timestamp_predicate(self.range)
            .context(PartitionError)?;
        Ok(())
    }
    // TODO try and rule out entire tables based on the same critera
    // as explained on NamePredVisitor
    fn pre_visit_table(&mut self, table: &Table, partition: &Partition) -> Result<()> {
        // skip table entirely if there are no rows that fall in the timestamp
        if !table
            .matches_timestamp_predicate(self.timestamp_predicate.as_ref())
            .context(TableError)?
        {
            return Ok(());
        }
        self.plans.push(
            table
                .tag_values_plan(
                    self.column_name,
                    &self.predicate,
                    self.timestamp_predicate.as_ref(),
                    partition,
                )
                .context(TableError)?,
        );
        Ok(())
    }
 }
 // partition_key returns the partition key for the given line. The key will be the prefix of a
 // partition name (multiple partitions can exist for each key). It uses the user defined
@ -1203,4 +1413,167 @@ disk bytes=23432323i 1600136510000000000",
        assert_eq!(to_set(&["state", "city", "county"]), *actual_tag_keys);
        Ok(())
    }
    #[tokio::test(threaded_scheduler)]
    async fn list_column_values() -> Result {
        let mut dir = delorean_test_helpers::tmp_dir()?.into_path();
        let db = Db::try_with_wal("column_namedb", &mut dir).await?;
        let lp_data = "h2o,state=CA,city=LA temp=70.4 100\n\
                       h2o,state=MA,city=Boston temp=72.4 250\n\
                       o2,state=MA,city=Boston temp=50.4 200\n\
                       o2,state=CA temp=79.0 300\n\
                       o2,state=NY temp=60.8 400\n";
        let lines: Vec<_> = parse_lines(lp_data).map(|l| l.unwrap()).collect();
        db.write_lines(&lines).await?;
        #[derive(Debug)]
        struct TestCase<'a> {
            description: &'a str,
            column_name: &'a str,
            measurement: Option<String>,
            range: Option<TimestampRange>,
            predicate: Option<Predicate>,
            expected_column_values: Result<Vec<&'a str>>,
        };
        let test_cases = vec![
            TestCase {
                description: "No predicates, 'state' col",
                column_name: "state",
                measurement: None,
                range: None,
                predicate: None,
                expected_column_values: Ok(vec!["CA", "MA", "NY"]),
            },
            TestCase {
                description: "No predicates, 'city' col",
                column_name: "city",
                measurement: None,
                range: None,
                predicate: None,
                expected_column_values: Ok(vec!["Boston", "LA"]),
            },
            TestCase {
                description: "Restrictions: timestamp",
                column_name: "state",
                measurement: None,
                range: Some(TimestampRange::new(50, 201)),
                predicate: None,
                expected_column_values: Ok(vec!["CA", "MA"]),
            },
            TestCase {
                description: "Restrictions: predicate",
                column_name: "city",
                measurement: None,
                range: None,
                predicate: make_column_eq_predicate("state", "MA"), // state=MA
                expected_column_values: Ok(vec!["Boston"]),
            },
            TestCase {
                description: "Restrictions: timestamp and predicate",
                column_name: "state",
                measurement: None,
                range: Some(TimestampRange::new(150, 301)),
                predicate: make_column_eq_predicate("state", "MA"), // state=MA
                expected_column_values: Ok(vec!["MA"]),
            },
            TestCase {
                description: "Restrictions: measurement name",
                column_name: "state",
                measurement: Some("h2o".to_string()),
                range: None,
                predicate: None,
                expected_column_values: Ok(vec!["CA", "MA"]),
            },
            TestCase {
                description: "Restrictions: measurement name, with nulls",
                column_name: "city",
                measurement: Some("o2".to_string()),
                range: None,
                predicate: None,
                expected_column_values: Ok(vec!["Boston"]),
            },
            TestCase {
                description: "Restrictions: measurement name and timestamp",
                column_name: "state",
                measurement: Some("o2".to_string()),
                range: Some(TimestampRange::new(50, 201)),
                predicate: None,
                expected_column_values: Ok(vec!["MA"]),
            },
            TestCase {
                description: "Restrictions: measurement name and predicate",
                column_name: "state",
                measurement: Some("o2".to_string()),
                range: None,
                predicate: make_column_eq_predicate("state", "NY"), // state=NY
                expected_column_values: Ok(vec!["NY"]),
            },
            TestCase {
                description: "Restrictions: measurement name, timestamp and predicate",
                column_name: "state",
                measurement: Some("o2".to_string()),
                range: Some(TimestampRange::new(1, 550)),
                predicate: make_column_eq_predicate("state", "NY"), // state=NY
                expected_column_values: Ok(vec!["NY"]),
            },
            TestCase {
                description: "Restrictions: measurement name, timestamp and predicate: no match",
                column_name: "state",
                measurement: Some("o2".to_string()),
                range: Some(TimestampRange::new(1, 300)), // filters out the NY row
                predicate: make_column_eq_predicate("state", "NY"), // state=NY
                expected_column_values: Ok(vec![]),
            },
        ];
        for test_case in test_cases.into_iter() {
            let test_case_str = format!("{:#?}", test_case);
            println!("Running test case: {:?}", test_case);
            let column_values_plan = db
                .column_values(
                    test_case.column_name,
                    test_case.measurement,
                    test_case.range,
                    test_case.predicate,
                )
                .await
                .expect("Created tag_values plan successfully");
            // run the execution plan (
            let executor = Executor::default();
            let actual_column_values = executor.to_string_set(column_values_plan).await;
            let is_match = if let Ok(expected_column_values) = &test_case.expected_column_values {
                let expected_column_values = to_set(expected_column_values);
                if let Ok(actual_column_values) = &actual_column_values {
                    **actual_column_values == expected_column_values
                } else {
                    false
                }
            } else if let Err(e) = &actual_column_values {
                // use string compare to compare errors to avoid having to build exact errors
                format!("{:?}", e) == format!("{:?}", test_case.expected_column_values)
            } else {
                false
            };
            assert!(
                is_match,
                "Mismatch\n\
                     actual_column_values: \n\
                     {:?}\n\
                     expected_column_values: \n\
                     {:?}\n\
                     Test_case: \n\
                     {}",
                actual_column_values, test_case.expected_column_values, test_case_str
            );
        }
        Ok(())
    }
 }
--- a/delorean_write_buffer/src/table.rs
+++ b/delorean_write_buffer/src/table.rs
@ -64,6 +64,18 @@ pub enum Error {
        inserted_value_type: String,
    },
    #[snafu(display(
        "Internal error: Expected column {} to be type {} but was {}",
        column_id,
        expected_column_type,
        actual_column_type
    ))]
    InternalColumnTypeMismatch {
        column_id: u32,
        expected_column_type: String,
        actual_column_type: String,
    },
    #[snafu(display(
        "Column name '{}' not found in dictionary of partition {}",
        column_name,
@ -75,6 +87,17 @@ pub enum Error {
        source: DictionaryError,
    },
    #[snafu(display(
        "Internal: Column id '{}' not found in dictionary of partition {}",
        column_id,
        partition
    ))]
    ColumnIdNotFoundInDictionary {
        column_id: u32,
        partition: String,
        source: DictionaryError,
    },
    #[snafu(display(
        "Schema mismatch: for column {}: can't insert {} into column with type {}",
        column,
@ -222,6 +245,21 @@ impl Table {
            .expect("invalid column id"))
    }
    /// Returns a reference to the specified column as a slice of
    /// i64s. Errors if the type is not i64
    pub fn column_i64(&self, column_id: u32) -> Result<&[Option<i64>]> {
        let column = self.column(column_id)?;
        match column {
            Column::I64(vals) => Ok(vals),
            _ => InternalColumnTypeMismatch {
                column_id,
                expected_column_type: "i64",
                actual_column_type: column.type_description(),
            }
            .fail(),
        }
    }
    pub fn append_rows(
        &mut self,
        dictionary: &mut Dictionary,
@ -285,6 +323,7 @@ impl Table {
            })
            .collect::<Vec<_>>();
        // TODO avoid materializing here
        let data = self.to_arrow_impl(partition, &requested_columns_with_index)?;
        let schema = data.schema();
@ -327,6 +366,59 @@ impl Table {
        Ok(plan)
    }
    /// Creates a DataFusion LogicalPlan that returns column values as a
    /// single column of Strings
    ///
    /// The created plan looks like:
    ///
    ///    Projection
    ///        Filter(predicate)
    ///          InMemoryScan
    pub fn tag_values_plan(
        &self,
        column_name: &str,
        predicate: &Predicate,
        timestamp_predicate: Option<&TimestampPredicate>,
        partition: &Partition,
    ) -> Result<LogicalPlan> {
        // Note we also need to add a timestamp predicate to this
        // expression as some additional rows may be filtered out (and
        // we couldn't prune out the entire Table based on range)
        let df_predicate = predicate.expr.clone();
        let df_predicate = match timestamp_predicate {
            None => df_predicate,
            Some(timestamp_predicate) => {
                Self::add_timestamp_predicate_expr(df_predicate, timestamp_predicate)
            }
        };
        // TODO avoid materializing all the columns here (ideally
        // DataFusion can prune them out)
        let data = self.all_to_arrow(partition)?;
        let schema = data.schema();
        let projection = None;
        let projected_schema = schema.clone();
        let select_exprs = vec![Expr::Column(column_name.into())];
        // And build the plan!
        let plan_builder = LogicalPlanBuilder::from(&LogicalPlan::InMemoryScan {
            data: vec![vec![data]],
            schema,
            projection,
            projected_schema,
        });
        plan_builder
            .filter(df_predicate)
            .context(BuildingPlan)?
            .project(select_exprs)
            .context(BuildingPlan)?
            .build()
            .context(BuildingPlan)
    }
    /// Creates a DataFusion predicate of the form:
    ///
    /// `expr AND (range.start <= time and time < range.end)`
@ -387,6 +479,25 @@ impl Table {
        self.to_arrow_impl(partition, &requested_columns_with_index)
    }
    /// Convert all columns to an arrow record batch
    pub fn all_to_arrow(&self, partition: &Partition) -> Result<RecordBatch> {
        let requested_columns_with_index = self
            .column_id_to_index
            .iter()
            .map(|(&column_id, &column_index)| {
                let column_name = partition.dictionary.lookup_id(column_id).context(
                    ColumnIdNotFoundInDictionary {
                        column_id,
                        partition: &partition.key,
                    },
                )?;
                Ok((column_name, column_index))
            })
            .collect::<Result<Vec<_>>>()?;
        self.to_arrow_impl(partition, &requested_columns_with_index)
    }
    /// Converts this table to an arrow record batch,
    ///
    /// requested columns with index are tuples of column_name, column_index