diff --git a/read_buffer/src/row_group.rs b/read_buffer/src/row_group.rs index a785e79b9e..4b49b01101 100644 --- a/read_buffer/src/row_group.rs +++ b/read_buffer/src/row_group.rs @@ -1058,6 +1058,7 @@ impl RowGroup { pub fn column_names( &self, predicate: &Predicate, + negated_predicates: &[Predicate], columns: Selection<'_>, dst: &mut BTreeSet, ) { @@ -1083,13 +1084,62 @@ impl RowGroup { }) .collect::>(); - match self.row_ids_from_predicate(predicate) { - RowIDsOption::None(_) => {} // nothing matches predicate + // apply predicate to determine candidate rows. + let row_ids = self.row_ids_from_predicate(predicate); + + // identify rows that have been marked as deleted. + let deleted_row_ids = self.row_ids_from_delete_predicates(negated_predicates); + + // determine final candidate rows + let final_row_ids = match (row_ids, deleted_row_ids) { + // no matching rows + (RowIDsOption::None(_), _) => RowIDsOption::new_none(), + // everything marked deleted + (_, RowIDsOption::All(_)) => RowIDsOption::new_none(), + // nothing to delete + (row_ids, RowIDsOption::None(_)) => row_ids, + // in these cases some rows have been deleted + (RowIDsOption::Some(mut row_ids), RowIDsOption::Some(delete_row_ids)) => { + row_ids.relative_complement(&delete_row_ids); + if row_ids.is_empty() { + RowIDsOption::new_none() + } else { + RowIDsOption::Some(row_ids) + } + } + (RowIDsOption::All(mut row_ids), RowIDsOption::Some(delete_row_ids)) => { + // Recall that the `All` variant for `RowIDsOption` is an + // optimisation to represent all row IDs in the column without + // having to materialise the bitset. In this case however, we + // will have to materialise the bitset in order to calculate the + // relative complement. + row_ids.add_range(0, self.rows()); + row_ids.relative_complement(&delete_row_ids); + + // N.B we can't remove all rows since there are more selected + // rows than deleted rows - we always end up deleting no rows + // or some rows. + if row_ids.len() == self.rows() as usize { + RowIDsOption::All(row_ids) // all selected rows remain and they're all rows in column + } else { + RowIDsOption::Some(row_ids) + } + } + }; + + // With the final valid row IDs it is possible to try to identify a row + // for each column where there is a non-null value. + match final_row_ids { + RowIDsOption::None(_) => {} // no valid rows RowIDsOption::Some(row_ids) => { + // TODO(edd): perf refactor these operations to use + // iterators of row IDs. let row_ids = row_ids.to_vec(); for (name, column) in candidate_columns { if column.has_non_null_value(&row_ids) { + // at least one non-null value for this column in the + // set of valid rows. dst.insert(name.to_owned()); } } @@ -1097,6 +1147,8 @@ impl RowGroup { RowIDsOption::All(_) => { for (name, column) in candidate_columns { if column.has_any_non_null_value() { + // at least one non-null value for this column in the + // set of valid rows. dst.insert(name.to_owned()); } } @@ -3597,8 +3649,7 @@ west,host-c,pro,10,6 assert_ne!(col2, col3); } - #[test] - fn column_names() { + fn column_names_setup() -> RowGroup { let mut columns = vec![]; let rc = ColumnType::Tag(Column::from(&[Some("west"), Some("west"), None, None][..])); columns.push(("region".to_string(), rc)); @@ -3613,11 +3664,16 @@ west,host-c,pro,10,6 let tc = ColumnType::Time(Column::from(&[100_i64, 200, 500, 600][..])); columns.push(("time".to_string(), tc)); - let row_group = RowGroup::new(4, columns); + RowGroup::new(4, columns) + } + + #[test] + fn column_names() { + let row_group = column_names_setup(); // No predicate - just find a value in each column that matches. let mut dst = BTreeSet::new(); - row_group.column_names(&Predicate::default(), Selection::All, &mut dst); + row_group.column_names(&Predicate::default(), &[], Selection::All, &mut dst); assert_eq!( dst, vec!["region", "temp", "time", "track"] @@ -3630,6 +3686,7 @@ west,host-c,pro,10,6 let mut dst = BTreeSet::new(); row_group.column_names( &Predicate::new(vec![BinaryExpr::from(("region", "=", "east"))]), + &[], Selection::All, &mut dst, ); @@ -3640,6 +3697,7 @@ west,host-c,pro,10,6 let mut dst = BTreeSet::new(); row_group.column_names( &Predicate::new(vec![BinaryExpr::from(("track", "=", "place"))]), + &[], Selection::All, &mut dst, ); @@ -3668,7 +3726,7 @@ west,host-c,pro,10,6 columns.push(("time".to_string(), tc)); let row_group = RowGroup::new(1, columns); - row_group.column_names(&Predicate::default(), Selection::All, &mut dst); + row_group.column_names(&Predicate::default(), &[], Selection::All, &mut dst); assert_eq!( dst, vec!["env", "temp", "time", "track"] @@ -3679,7 +3737,12 @@ west,host-c,pro,10,6 // just tag keys dst.clear(); - row_group.column_names(&Predicate::default(), Selection::Some(&["env"]), &mut dst); + row_group.column_names( + &Predicate::default(), + &[], + Selection::Some(&["env"]), + &mut dst, + ); assert_eq!( dst.iter().cloned().collect::>(), vec!["env".to_owned()], @@ -3687,13 +3750,144 @@ west,host-c,pro,10,6 // just field keys dst.clear(); - row_group.column_names(&Predicate::default(), Selection::Some(&["temp"]), &mut dst); + row_group.column_names( + &Predicate::default(), + &[], + Selection::Some(&["temp"]), + &mut dst, + ); assert_eq!( dst.iter().cloned().collect::>(), vec!["temp".to_owned()], ); } + #[test] + fn column_names_with_deletes() { + let row_group = column_names_setup(); + + // region | track | temp | time + // -------|----------|------|----- + // west | Thinking | hot | 100 + // west | of | cold | 200 + // NULL | a | cold | 500 + // NULL | place | warm | 600 + let cases = vec![ + // 0. A delete predicate, but no rows match. All columns should be + // returned. + ( + Predicate::default(), + vec![Predicate::new(vec![BinaryExpr::from(( + "region", "=", "nomatch", + ))])], + Selection::All, + vec!["region", "temp", "time", "track"], + ), + // 1. A delete predicate matching one row but all columns should + // still be returned. + ( + Predicate::default(), + vec![Predicate::new(vec![BinaryExpr::from(( + "track", "=", "place", + ))])], + Selection::All, + vec!["region", "temp", "time", "track"], + ), + // 2. A delete predicate with multiple expressions matching rows but + // all columns should be returned. + ( + Predicate::default(), + vec![Predicate::new(vec![ + BinaryExpr::from(("temp", "=", "cold")), + BinaryExpr::from(("time", ">", 300_i64)), + ])], + Selection::All, + vec!["region", "temp", "time", "track"], + ), + // 3. A delete predicate matching all "region" rows that have + // non-null values. "region" is excluded from results because it + // only matching rows with NULL values. + ( + Predicate::default(), + vec![Predicate::new(vec![BinaryExpr::from(( + "region", "=", "west", + ))])], + Selection::All, + vec!["temp", "time", "track"], + ), + // 4. Two delete predicates covering all rows. No columns names + // returned. + ( + Predicate::default(), + vec![ + Predicate::new(vec![BinaryExpr::from(("region", "=", "west"))]), + Predicate::new(vec![BinaryExpr::from(("time", ">", 400_i64))]), + ], + Selection::All, + vec![], + ), + // 5. Combination of filtering predicate and delete predicate + ( + Predicate::new(vec![BinaryExpr::from(("temp", "=", "cold"))]), + vec![Predicate::new(vec![BinaryExpr::from(( + "region", "=", "west", // deletes only non-null value for region column + ))])], + Selection::All, + vec!["temp", "time", "track"], + ), + // 6. Same filtering predicate and delete predicate. No rows + ( + Predicate::new(vec![BinaryExpr::from(("temp", "=", "cold"))]), + vec![Predicate::new(vec![BinaryExpr::from(( + "temp", "=", "cold", + ))])], + Selection::All, + vec![], + ), + // 7. Combination of filtering predicate and delete predicates that + // covers all filtered rows. + ( + Predicate::new(vec![BinaryExpr::from(("temp", "=", "cold"))]), + vec![ + Predicate::new(vec![BinaryExpr::from(("time", "=", 200_i64))]), + Predicate::new(vec![BinaryExpr::from(("time", "=", 500_i64))]), + ], + Selection::All, + vec![], + ), + // 8. deletes all non-null rows for region column through negation. + ( + Predicate::new(vec![BinaryExpr::from(("track", "=", "place"))]), + vec![Predicate::new(vec![BinaryExpr::from(( + "region", "!=", "east", + ))])], + Selection::All, + vec!["temp", "time", "track"], + ), + // 9. deletes covers all non-null rows for region column but NULL + // values mean some column names returned + ( + Predicate::default(), + vec![Predicate::new(vec![BinaryExpr::from(( + "region", ">", "apple", + ))])], + Selection::All, + vec!["temp", "time", "track"], + ), + ]; + + for (i, (filter_pred, negated_preds, projection, exp)) in cases.into_iter().enumerate() { + let mut dst = BTreeSet::new(); + row_group.column_names(&filter_pred, &negated_preds, projection, &mut dst); + assert_eq!( + dst, + exp.into_iter().map(|s| s.to_owned()).collect(), + "case {:?} failed", + i + ); + } + } + fn to_map(arr: Vec<(&str, &[&str])>) -> BTreeMap> { arr.iter() .map(|(k, values)| { diff --git a/read_buffer/src/table.rs b/read_buffer/src/table.rs index 6db494b7aa..e57d882f39 100644 --- a/read_buffer/src/table.rs +++ b/read_buffer/src/table.rs @@ -478,10 +478,10 @@ impl Table { pub fn column_names( &self, predicate: &Predicate, + negated_predicates: &[Predicate], columns: Selection<'_>, mut dst: BTreeSet, ) -> Result> { - // TODO(edd): add delete support let (meta, row_groups) = { let table_data = self.table_data.read(); (Arc::clone(&table_data.meta), table_data.data.clone()) @@ -493,15 +493,22 @@ impl Table { return Ok(dst); } - // Determine if predicate can be applied. + // Determine if predicate can be applied to table. let predicate: Predicate = meta.validate_exprs(predicate.clone())?.into(); + // Determine if the negated predicates (deletes) can be applied to the + // table. + let mut n_predicates: Vec = vec![]; + for pred in negated_predicates { + n_predicates.push(meta.validate_exprs(pred.clone())?.into()); + } + // Filter set of row groups to process using predicate. let row_groups = self.filter_row_groups(&predicate, row_groups); // Execute against each row group for row_group in row_groups { - row_group.column_names(&predicate, columns, &mut dst); + row_group.column_names(&predicate, negated_predicates, columns, &mut dst); } Ok(dst) @@ -1774,7 +1781,7 @@ west,host-b,100 let mut dst: BTreeSet = BTreeSet::new(); dst = table - .column_names(&Predicate::default(), Selection::All, dst) + .column_names(&Predicate::default(), &[], Selection::All, dst) .unwrap(); assert_eq!( @@ -1784,7 +1791,7 @@ west,host-b,100 // re-run and get the same answer dst = table - .column_names(&Predicate::default(), Selection::All, dst) + .column_names(&Predicate::default(), &[], Selection::All, dst) .unwrap(); assert_eq!( dst.iter().cloned().collect::>(), @@ -1796,6 +1803,7 @@ west,host-b,100 dst = table .column_names( &Predicate::new(vec![BinaryExpr::from(("time", ">=", 300_i64))]), + &[], Selection::All, dst, ) @@ -1809,6 +1817,7 @@ west,host-b,100 dst = table .column_names( &Predicate::new(vec![BinaryExpr::from(("time", ">=", 300_i64))]), + &[], Selection::All, BTreeSet::new(), ) @@ -1822,6 +1831,7 @@ west,host-b,100 assert!(table .column_names( &Predicate::new(vec![BinaryExpr::from(("time", ">=", "not a number"))]), + &[], Selection::All, dst, )