refactor: wire up column_values for row_group

pull/24376/head
Edd Robinson 2021-02-26 14:14:41 +00:00 committed by kodiakhq[bot]
parent 9b1346ddea
commit d9e8132a3a
3 changed files with 45 additions and 8 deletions

View File

@ -683,10 +683,21 @@ impl Column {
} }
} }
/// Determines if the column contains other values than those provided in /// Determines if the column contains any string values that are not present
/// `values`. /// in the provided `values` argument.
pub fn has_other_values(&self, values: &BTreeSet<String>) -> bool { pub fn has_other_non_null_string_values(&self, values: &BTreeSet<String>) -> bool {
todo!() match self {
Column::String(_, data) => data.has_other_non_null_values(values),
Column::Float(_, _) => unimplemented!("operation not supported on `Float` column"),
Column::Integer(_, _) => unimplemented!("operation not supported on `Integer` column"),
Column::Unsigned(_, _) => {
unimplemented!("operation not supported on `Unsigned` column")
}
Column::Bool(_, _) => unimplemented!("operation not supported on `Bool` column"),
Column::ByteArray(_, _) => {
unimplemented!("operation not supported on `ByteArray` column")
}
}
} }
} }

View File

@ -91,6 +91,15 @@ impl StringEncoding {
} }
} }
/// Determines if the column contains any values other than those provided.
/// Short-circuits execution as soon as it finds a value not in `values`.
pub fn has_other_non_null_values(&self, values: &BTreeSet<String>) -> bool {
match &self {
Self::RLEDictionary(c) => c.has_other_non_null_values(values),
Self::Dictionary(c) => c.has_other_non_null_values(values),
}
}
/// Returns the logical value found at the provided row id. /// Returns the logical value found at the provided row id.
pub fn value(&self, row_id: u32) -> Value<'_> { pub fn value(&self, row_id: u32) -> Value<'_> {
match &self { match &self {

View File

@ -1019,7 +1019,7 @@ impl RowGroup {
match dst.get(*name) { match dst.get(*name) {
// process the column if we haven't got all the distinct // process the column if we haven't got all the distinct
// values. // values.
Some(values) => column.has_other_values(values), Some(values) => column.has_other_non_null_string_values(values),
// no existing values for this column - we will need to // no existing values for this column - we will need to
// process it. // process it.
None => true, None => true,
@ -1039,9 +1039,11 @@ impl RowGroup {
}; };
let results = dst.entry(name.clone()).or_default(); let results = dst.entry(name.clone()).or_default();
for value in column.distinct_values(row_itr).iter() { for value in column.distinct_values(row_itr).into_iter() {
if value.is_some() && !results.contains(value.unwrap()) { if let Some(v) = value {
results.insert(value.unwrap().to_owned()); if !results.contains(v) {
results.insert(v.to_owned());
}
} }
} }
} }
@ -3057,6 +3059,21 @@ west,host-d,11,9
to_map(vec![("env", &["stag"]), ("region", &["north", "south"])]) to_map(vec![("env", &["stag"]), ("region", &["north", "south"])])
); );
let mut dst = BTreeMap::new();
dst.insert(
"env".to_owned(),
vec!["stag".to_owned()].into_iter().collect::<BTreeSet<_>>(),
);
let result = rg.column_values(
&Predicate::new(vec![BinaryExpr::from(("time", ">", 1_i64))]),
&["env", "region"],
dst,
);
assert_eq!(
result,
to_map(vec![("env", &["stag"]), ("region", &["north", "south"])])
);
let result = rg.column_values( let result = rg.column_values(
&Predicate::new(vec![BinaryExpr::from(("time", ">", 4_i64))]), &Predicate::new(vec![BinaryExpr::from(("time", ">", 4_i64))]),
&["env", "region"], &["env", "region"],