feat: add delete support to column_names

pull/24376/head
Edd Robinson 2021-10-08 13:04:15 +01:00
parent 7c97cd68bf
commit 7c3b0a00e5
2 changed files with 218 additions and 14 deletions

View File

@ -1058,6 +1058,7 @@ impl RowGroup {
pub fn column_names(
&self,
predicate: &Predicate,
negated_predicates: &[Predicate],
columns: Selection<'_>,
dst: &mut BTreeSet<String>,
) {
@ -1083,13 +1084,62 @@ impl RowGroup {
})
.collect::<Vec<_>>();
match self.row_ids_from_predicate(predicate) {
RowIDsOption::None(_) => {} // nothing matches predicate
// apply predicate to determine candidate rows.
let row_ids = self.row_ids_from_predicate(predicate);
// identify rows that have been marked as deleted.
let deleted_row_ids = self.row_ids_from_delete_predicates(negated_predicates);
// determine final candidate rows
let final_row_ids = match (row_ids, deleted_row_ids) {
// no matching rows
(RowIDsOption::None(_), _) => RowIDsOption::new_none(),
// everything marked deleted
(_, RowIDsOption::All(_)) => RowIDsOption::new_none(),
// nothing to delete
(row_ids, RowIDsOption::None(_)) => row_ids,
// in these cases some rows have been deleted
(RowIDsOption::Some(mut row_ids), RowIDsOption::Some(delete_row_ids)) => {
row_ids.relative_complement(&delete_row_ids);
if row_ids.is_empty() {
RowIDsOption::new_none()
} else {
RowIDsOption::Some(row_ids)
}
}
(RowIDsOption::All(mut row_ids), RowIDsOption::Some(delete_row_ids)) => {
// Recall that the `All` variant for `RowIDsOption` is an
// optimisation to represent all row IDs in the column without
// having to materialise the bitset. In this case however, we
// will have to materialise the bitset in order to calculate the
// relative complement.
row_ids.add_range(0, self.rows());
row_ids.relative_complement(&delete_row_ids);
// N.B we can't remove all rows since there are more selected
// rows than deleted rows - we always end up deleting no rows
// or some rows.
if row_ids.len() == self.rows() as usize {
RowIDsOption::All(row_ids) // all selected rows remain and they're all rows in column
} else {
RowIDsOption::Some(row_ids)
}
}
};
// With the final valid row IDs it is possible to try to identify a row
// for each column where there is a non-null value.
match final_row_ids {
RowIDsOption::None(_) => {} // no valid rows
RowIDsOption::Some(row_ids) => {
// TODO(edd): perf refactor these operations to use
// iterators of row IDs.
let row_ids = row_ids.to_vec();
for (name, column) in candidate_columns {
if column.has_non_null_value(&row_ids) {
// at least one non-null value for this column in the
// set of valid rows.
dst.insert(name.to_owned());
}
}
@ -1097,6 +1147,8 @@ impl RowGroup {
RowIDsOption::All(_) => {
for (name, column) in candidate_columns {
if column.has_any_non_null_value() {
// at least one non-null value for this column in the
// set of valid rows.
dst.insert(name.to_owned());
}
}
@ -3597,8 +3649,7 @@ west,host-c,pro,10,6
assert_ne!(col2, col3);
}
#[test]
fn column_names() {
fn column_names_setup() -> RowGroup {
let mut columns = vec![];
let rc = ColumnType::Tag(Column::from(&[Some("west"), Some("west"), None, None][..]));
columns.push(("region".to_string(), rc));
@ -3613,11 +3664,16 @@ west,host-c,pro,10,6
let tc = ColumnType::Time(Column::from(&[100_i64, 200, 500, 600][..]));
columns.push(("time".to_string(), tc));
let row_group = RowGroup::new(4, columns);
RowGroup::new(4, columns)
}
#[test]
fn column_names() {
let row_group = column_names_setup();
// No predicate - just find a value in each column that matches.
let mut dst = BTreeSet::new();
row_group.column_names(&Predicate::default(), Selection::All, &mut dst);
row_group.column_names(&Predicate::default(), &[], Selection::All, &mut dst);
assert_eq!(
dst,
vec!["region", "temp", "time", "track"]
@ -3630,6 +3686,7 @@ west,host-c,pro,10,6
let mut dst = BTreeSet::new();
row_group.column_names(
&Predicate::new(vec![BinaryExpr::from(("region", "=", "east"))]),
&[],
Selection::All,
&mut dst,
);
@ -3640,6 +3697,7 @@ west,host-c,pro,10,6
let mut dst = BTreeSet::new();
row_group.column_names(
&Predicate::new(vec![BinaryExpr::from(("track", "=", "place"))]),
&[],
Selection::All,
&mut dst,
);
@ -3668,7 +3726,7 @@ west,host-c,pro,10,6
columns.push(("time".to_string(), tc));
let row_group = RowGroup::new(1, columns);
row_group.column_names(&Predicate::default(), Selection::All, &mut dst);
row_group.column_names(&Predicate::default(), &[], Selection::All, &mut dst);
assert_eq!(
dst,
vec!["env", "temp", "time", "track"]
@ -3679,7 +3737,12 @@ west,host-c,pro,10,6
// just tag keys
dst.clear();
row_group.column_names(&Predicate::default(), Selection::Some(&["env"]), &mut dst);
row_group.column_names(
&Predicate::default(),
&[],
Selection::Some(&["env"]),
&mut dst,
);
assert_eq!(
dst.iter().cloned().collect::<Vec<_>>(),
vec!["env".to_owned()],
@ -3687,13 +3750,144 @@ west,host-c,pro,10,6
// just field keys
dst.clear();
row_group.column_names(&Predicate::default(), Selection::Some(&["temp"]), &mut dst);
row_group.column_names(
&Predicate::default(),
&[],
Selection::Some(&["temp"]),
&mut dst,
);
assert_eq!(
dst.iter().cloned().collect::<Vec<_>>(),
vec!["temp".to_owned()],
);
}
#[test]
fn column_names_with_deletes() {
let row_group = column_names_setup();
// region | track | temp | time
// -------|----------|------|-----
// west | Thinking | hot | 100
// west | of | cold | 200
// NULL | a | cold | 500
// NULL | place | warm | 600
let cases = vec![
// 0. A delete predicate, but no rows match. All columns should be
// returned.
(
Predicate::default(),
vec![Predicate::new(vec![BinaryExpr::from((
"region", "=", "nomatch",
))])],
Selection::All,
vec!["region", "temp", "time", "track"],
),
// 1. A delete predicate matching one row but all columns should
// still be returned.
(
Predicate::default(),
vec![Predicate::new(vec![BinaryExpr::from((
"track", "=", "place",
))])],
Selection::All,
vec!["region", "temp", "time", "track"],
),
// 2. A delete predicate with multiple expressions matching rows but
// all columns should be returned.
(
Predicate::default(),
vec![Predicate::new(vec![
BinaryExpr::from(("temp", "=", "cold")),
BinaryExpr::from(("time", ">", 300_i64)),
])],
Selection::All,
vec!["region", "temp", "time", "track"],
),
// 3. A delete predicate matching all "region" rows that have
// non-null values. "region" is excluded from results because it
// only matching rows with NULL values.
(
Predicate::default(),
vec![Predicate::new(vec![BinaryExpr::from((
"region", "=", "west",
))])],
Selection::All,
vec!["temp", "time", "track"],
),
// 4. Two delete predicates covering all rows. No columns names
// returned.
(
Predicate::default(),
vec![
Predicate::new(vec![BinaryExpr::from(("region", "=", "west"))]),
Predicate::new(vec![BinaryExpr::from(("time", ">", 400_i64))]),
],
Selection::All,
vec![],
),
// 5. Combination of filtering predicate and delete predicate
(
Predicate::new(vec![BinaryExpr::from(("temp", "=", "cold"))]),
vec![Predicate::new(vec![BinaryExpr::from((
"region", "=", "west", // deletes only non-null value for region column
))])],
Selection::All,
vec!["temp", "time", "track"],
),
// 6. Same filtering predicate and delete predicate. No rows
(
Predicate::new(vec![BinaryExpr::from(("temp", "=", "cold"))]),
vec![Predicate::new(vec![BinaryExpr::from((
"temp", "=", "cold",
))])],
Selection::All,
vec![],
),
// 7. Combination of filtering predicate and delete predicates that
// covers all filtered rows.
(
Predicate::new(vec![BinaryExpr::from(("temp", "=", "cold"))]),
vec![
Predicate::new(vec![BinaryExpr::from(("time", "=", 200_i64))]),
Predicate::new(vec![BinaryExpr::from(("time", "=", 500_i64))]),
],
Selection::All,
vec![],
),
// 8. deletes all non-null rows for region column through negation.
(
Predicate::new(vec![BinaryExpr::from(("track", "=", "place"))]),
vec![Predicate::new(vec![BinaryExpr::from((
"region", "!=", "east",
))])],
Selection::All,
vec!["temp", "time", "track"],
),
// 9. deletes covers all non-null rows for region column but NULL
// values mean some column names returned
(
Predicate::default(),
vec![Predicate::new(vec![BinaryExpr::from((
"region", ">", "apple",
))])],
Selection::All,
vec!["temp", "time", "track"],
),
];
for (i, (filter_pred, negated_preds, projection, exp)) in cases.into_iter().enumerate() {
let mut dst = BTreeSet::new();
row_group.column_names(&filter_pred, &negated_preds, projection, &mut dst);
assert_eq!(
dst,
exp.into_iter().map(|s| s.to_owned()).collect(),
"case {:?} failed",
i
);
}
}
fn to_map(arr: Vec<(&str, &[&str])>) -> BTreeMap<String, BTreeSet<String>> {
arr.iter()
.map(|(k, values)| {

View File

@ -478,10 +478,10 @@ impl Table {
pub fn column_names(
&self,
predicate: &Predicate,
negated_predicates: &[Predicate],
columns: Selection<'_>,
mut dst: BTreeSet<String>,
) -> Result<BTreeSet<String>> {
// TODO(edd): add delete support
let (meta, row_groups) = {
let table_data = self.table_data.read();
(Arc::clone(&table_data.meta), table_data.data.clone())
@ -493,15 +493,22 @@ impl Table {
return Ok(dst);
}
// Determine if predicate can be applied.
// Determine if predicate can be applied to table.
let predicate: Predicate = meta.validate_exprs(predicate.clone())?.into();
// Determine if the negated predicates (deletes) can be applied to the
// table.
let mut n_predicates: Vec<Predicate> = vec![];
for pred in negated_predicates {
n_predicates.push(meta.validate_exprs(pred.clone())?.into());
}
// Filter set of row groups to process using predicate.
let row_groups = self.filter_row_groups(&predicate, row_groups);
// Execute against each row group
for row_group in row_groups {
row_group.column_names(&predicate, columns, &mut dst);
row_group.column_names(&predicate, negated_predicates, columns, &mut dst);
}
Ok(dst)
@ -1774,7 +1781,7 @@ west,host-b,100
let mut dst: BTreeSet<String> = BTreeSet::new();
dst = table
.column_names(&Predicate::default(), Selection::All, dst)
.column_names(&Predicate::default(), &[], Selection::All, dst)
.unwrap();
assert_eq!(
@ -1784,7 +1791,7 @@ west,host-b,100
// re-run and get the same answer
dst = table
.column_names(&Predicate::default(), Selection::All, dst)
.column_names(&Predicate::default(), &[], Selection::All, dst)
.unwrap();
assert_eq!(
dst.iter().cloned().collect::<Vec<_>>(),
@ -1796,6 +1803,7 @@ west,host-b,100
dst = table
.column_names(
&Predicate::new(vec![BinaryExpr::from(("time", ">=", 300_i64))]),
&[],
Selection::All,
dst,
)
@ -1809,6 +1817,7 @@ west,host-b,100
dst = table
.column_names(
&Predicate::new(vec![BinaryExpr::from(("time", ">=", 300_i64))]),
&[],
Selection::All,
BTreeSet::new(),
)
@ -1822,6 +1831,7 @@ west,host-b,100
assert!(table
.column_names(
&Predicate::new(vec![BinaryExpr::from(("time", ">=", "not a number"))]),
&[],
Selection::All,
dst,
)