From d9e8132a3af079c37114c2f2715c77f6509bec38 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 26 Feb 2021 14:14:41 +0000 Subject: [PATCH] refactor: wire up column_values for row_group --- read_buffer/src/column.rs | 19 +++++++++++++++---- read_buffer/src/column/string.rs | 9 +++++++++ read_buffer/src/row_group.rs | 25 +++++++++++++++++++++---- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/read_buffer/src/column.rs b/read_buffer/src/column.rs index 60cf71254f..b24c88dd50 100644 --- a/read_buffer/src/column.rs +++ b/read_buffer/src/column.rs @@ -683,10 +683,21 @@ impl Column { } } - /// Determines if the column contains other values than those provided in - /// `values`. - pub fn has_other_values(&self, values: &BTreeSet) -> bool { - todo!() + /// Determines if the column contains any string values that are not present + /// in the provided `values` argument. + pub fn has_other_non_null_string_values(&self, values: &BTreeSet) -> bool { + match self { + Column::String(_, data) => data.has_other_non_null_values(values), + Column::Float(_, _) => unimplemented!("operation not supported on `Float` column"), + Column::Integer(_, _) => unimplemented!("operation not supported on `Integer` column"), + Column::Unsigned(_, _) => { + unimplemented!("operation not supported on `Unsigned` column") + } + Column::Bool(_, _) => unimplemented!("operation not supported on `Bool` column"), + Column::ByteArray(_, _) => { + unimplemented!("operation not supported on `ByteArray` column") + } + } } } diff --git a/read_buffer/src/column/string.rs b/read_buffer/src/column/string.rs index e1acf34869..b9ab01026d 100644 --- a/read_buffer/src/column/string.rs +++ b/read_buffer/src/column/string.rs @@ -91,6 +91,15 @@ impl StringEncoding { } } + /// Determines if the column contains any values other than those provided. + /// Short-circuits execution as soon as it finds a value not in `values`. + pub fn has_other_non_null_values(&self, values: &BTreeSet) -> bool { + match &self { + Self::RLEDictionary(c) => c.has_other_non_null_values(values), + Self::Dictionary(c) => c.has_other_non_null_values(values), + } + } + /// Returns the logical value found at the provided row id. pub fn value(&self, row_id: u32) -> Value<'_> { match &self { diff --git a/read_buffer/src/row_group.rs b/read_buffer/src/row_group.rs index 57ca2a5f07..3a62977257 100644 --- a/read_buffer/src/row_group.rs +++ b/read_buffer/src/row_group.rs @@ -1019,7 +1019,7 @@ impl RowGroup { match dst.get(*name) { // process the column if we haven't got all the distinct // values. - Some(values) => column.has_other_values(values), + Some(values) => column.has_other_non_null_string_values(values), // no existing values for this column - we will need to // process it. None => true, @@ -1039,9 +1039,11 @@ impl RowGroup { }; let results = dst.entry(name.clone()).or_default(); - for value in column.distinct_values(row_itr).iter() { - if value.is_some() && !results.contains(value.unwrap()) { - results.insert(value.unwrap().to_owned()); + for value in column.distinct_values(row_itr).into_iter() { + if let Some(v) = value { + if !results.contains(v) { + results.insert(v.to_owned()); + } } } } @@ -3057,6 +3059,21 @@ west,host-d,11,9 to_map(vec![("env", &["stag"]), ("region", &["north", "south"])]) ); + let mut dst = BTreeMap::new(); + dst.insert( + "env".to_owned(), + vec!["stag".to_owned()].into_iter().collect::>(), + ); + let result = rg.column_values( + &Predicate::new(vec![BinaryExpr::from(("time", ">", 1_i64))]), + &["env", "region"], + dst, + ); + assert_eq!( + result, + to_map(vec![("env", &["stag"]), ("region", &["north", "south"])]) + ); + let result = rg.column_values( &Predicate::new(vec![BinaryExpr::from(("time", ">", 4_i64))]), &["env", "region"],