test: more tests to discover what still need to be done
parent
f4f140d3b7
commit
63cc7b3fb0
|
@ -161,13 +161,15 @@ impl Predicate {
|
|||
pub fn is_empty(&self) -> bool {
|
||||
self == &EMPTY_PREDICATE
|
||||
}
|
||||
|
||||
/// Add each range [start, stop] of the delete_predicates into the predicate in
|
||||
|
||||
/// Add each range [start, stop] of the delete_predicates into the predicate in
|
||||
/// the form "time < start OR time > stop" to eliminate that range from the query
|
||||
pub fn add_delete_ranges(&mut self, delete_predicates: &[Self]) {
|
||||
pub fn add_delete_ranges(&mut self, delete_predicates: &[Self]) {
|
||||
for pred in delete_predicates {
|
||||
if let Some(range) = pred.range {
|
||||
let expr = col(TIME_COLUMN_NAME).lt(lit(range.start)).or(col(TIME_COLUMN_NAME).gt(lit(range.end)));
|
||||
let expr = col(TIME_COLUMN_NAME)
|
||||
.lt(lit(range.start))
|
||||
.or(col(TIME_COLUMN_NAME).gt(lit(range.end)));
|
||||
self.exprs.push(expr);
|
||||
}
|
||||
}
|
||||
|
@ -175,14 +177,12 @@ impl Predicate {
|
|||
|
||||
/// Add a list of not(a delete expr)
|
||||
pub fn add_delete_exprs(&mut self, delete_predicates: &[Self]) {
|
||||
|
||||
for pred in delete_predicates {
|
||||
for exp in &pred.exprs {
|
||||
self.exprs.push(exp.clone().not());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl fmt::Display for Predicate {
|
||||
|
|
|
@ -524,7 +524,7 @@ impl InfluxRpcPlanner {
|
|||
/// The data is sorted on (tag_col1, tag_col2, ...) so that all
|
||||
/// rows for a particular series (groups where all tags are the
|
||||
/// same) occur together in the plan
|
||||
// NGA todo: may need to add delete predicate here to emilimate deleted data at read time
|
||||
// NGA todo: may need to add delete predicate here to emilimate deleted data at read time
|
||||
pub fn read_filter<D>(&self, database: &D, predicate: Predicate) -> Result<SeriesSetPlans>
|
||||
where
|
||||
D: QueryDatabase + 'static,
|
||||
|
|
|
@ -137,7 +137,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync {
|
|||
&self,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
delete_predicates: &Vec<Predicate>,
|
||||
delete_predicates: &[Predicate],
|
||||
) -> Result<SendableRecordBatchStream, Self::Error>;
|
||||
|
||||
/// Returns true if data of this chunk is sorted
|
||||
|
|
|
@ -116,16 +116,18 @@ impl<C: QueryChunk + 'static> ExecutionPlan for IOxReadFilterNode<C> {
|
|||
let selection_cols = restrict_selection(selection_cols, &chunk_table_schema);
|
||||
let selection = Selection::Some(&selection_cols);
|
||||
|
||||
let del_preds= chunk.delete_predicates();
|
||||
let del_preds = chunk.delete_predicates();
|
||||
|
||||
let stream = chunk.read_filter(&self.predicate, selection, del_preds).map_err(|e| {
|
||||
DataFusionError::Execution(format!(
|
||||
"Error creating scan for table {} chunk {}: {}",
|
||||
self.table_name,
|
||||
chunk.id(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
let stream = chunk
|
||||
.read_filter(&self.predicate, selection, del_preds)
|
||||
.map_err(|e| {
|
||||
DataFusionError::Execution(format!(
|
||||
"Error creating scan for table {} chunk {}: {}",
|
||||
self.table_name,
|
||||
chunk.id(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
// all CPU time is now done, pass in baseline metrics to adapter
|
||||
timer.done();
|
||||
|
|
|
@ -822,7 +822,7 @@ impl QueryChunk for TestChunk {
|
|||
&self,
|
||||
predicate: &Predicate,
|
||||
_selection: Selection<'_>,
|
||||
_delete_predicates: &Vec<Predicate>,
|
||||
_delete_predicates: &[Predicate],
|
||||
) -> Result<SendableRecordBatchStream, Self::Error> {
|
||||
self.check_error()?;
|
||||
|
||||
|
@ -923,7 +923,7 @@ pub async fn raw_data(chunks: &[Arc<TestChunk>]) -> Vec<RecordBatch> {
|
|||
for c in chunks {
|
||||
let pred = Predicate::default();
|
||||
let selection = Selection::All;
|
||||
let delete_predicates: Vec<Predicate> = vec!{};
|
||||
let delete_predicates: Vec<Predicate> = vec![];
|
||||
let mut stream = c
|
||||
.read_filter(&pred, selection, &delete_predicates)
|
||||
.expect("Error in read_filter");
|
||||
|
|
111
server/src/db.rs
111
server/src/db.rs
|
@ -1285,7 +1285,9 @@ mod tests {
|
|||
use arrow::record_batch::RecordBatch;
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, TimeZone};
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use futures::{stream, StreamExt, TryStreamExt};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use ::test_helpers::assert_contains;
|
||||
|
@ -1920,7 +1922,7 @@ mod tests {
|
|||
|
||||
async fn collect_read_filter(chunk: &DbChunk) -> Vec<RecordBatch> {
|
||||
chunk
|
||||
.read_filter(&Default::default(), Selection::All, &vec![])
|
||||
.read_filter(&Default::default(), Selection::All, &[])
|
||||
.unwrap()
|
||||
.collect::<Vec<_>>()
|
||||
.await
|
||||
|
@ -3782,4 +3784,111 @@ mod tests {
|
|||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_open_mub() {
|
||||
::test_helpers::maybe_start_logging();
|
||||
|
||||
let db = make_db().await.db;
|
||||
// load a line at time 10 nanosecond which belong to partition starting with 0 nanosecond or "1970-01-01T00"
|
||||
write_lp(db.as_ref(), "cpu bar=1 10").await;
|
||||
write_lp(db.as_ref(), "cpu bar=2 20").await;
|
||||
let partition_key = "1970-01-01T00";
|
||||
assert_eq!(vec![partition_key], db.partition_keys().unwrap());
|
||||
|
||||
// ----
|
||||
// There is one open mub chunk
|
||||
// Let query it
|
||||
let expected = vec![
|
||||
"+-----+--------------------------------+",
|
||||
"| bar | time |",
|
||||
"+-----+--------------------------------+",
|
||||
"| 1 | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| 2 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----+--------------------------------+",
|
||||
];
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu").await;
|
||||
assert_batches_sorted_eq!(expected, &batches);
|
||||
|
||||
// ---
|
||||
// Delete that row
|
||||
let i: f64 = 1.0;
|
||||
let expr = col("bar").eq(lit(i));
|
||||
let pred = PredicateBuilder::new()
|
||||
.table("cpu")
|
||||
.timestamp_range(0, 15)
|
||||
.add_expr(expr)
|
||||
.build();
|
||||
db.delete("cpu", &pred).await.unwrap();
|
||||
// When the above delete is issued, the open mub chunk is frozen with the delete predicate added
|
||||
|
||||
// Let query again
|
||||
// NGA todo: since MUB does not filtering data, the filtering needs to be done at scan step
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu").await;
|
||||
assert_batches_sorted_eq!(expected, &batches); // this fails when the todo is done
|
||||
|
||||
// --
|
||||
// Since chunk was frozen when the delete is issue, nothing will be frozen futher
|
||||
let mb_open_chunk = db.rollover_partition("cpu", partition_key).await.unwrap();
|
||||
assert!(mb_open_chunk.is_none());
|
||||
|
||||
// ---
|
||||
// let move MUB to RUB. The delete predicate will be included in RUB
|
||||
let mb_chunk_id = 0;
|
||||
let _mb_chunk = db.chunk("cpu", partition_key, mb_chunk_id).unwrap();
|
||||
let _rb_chunk = db
|
||||
.move_chunk_to_read_buffer("cpu", partition_key, mb_chunk_id)
|
||||
.await
|
||||
.unwrap();
|
||||
// Let query again and since RUB supports delete predicate, we should see only 1 row
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu").await;
|
||||
let expected = vec![
|
||||
"+-----+--------------------------------+",
|
||||
"| bar | time |",
|
||||
"+-----+--------------------------------+",
|
||||
"| 2 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(expected, &batches);
|
||||
|
||||
// ---
|
||||
// Write the RB chunk to Object Store but keep it in RB
|
||||
let pq_chunk = db
|
||||
.persist_partition(
|
||||
"cpu",
|
||||
partition_key,
|
||||
Instant::now() + Duration::from_secs(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let pq_chunk_id = pq_chunk.id();
|
||||
// we should have chunks in both the read buffer and pq
|
||||
assert!(mutable_chunk_ids(&db, partition_key).is_empty());
|
||||
assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![pq_chunk_id]);
|
||||
assert_eq!(
|
||||
read_parquet_file_chunk_ids(&db, partition_key),
|
||||
vec![pq_chunk_id]
|
||||
);
|
||||
// Let query again from RUB and should see only 1 row
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu").await;
|
||||
assert_batches_sorted_eq!(expected, &batches);
|
||||
|
||||
// ---
|
||||
// Unload RB chunk but keep it in OS
|
||||
let _pq_chunk = db
|
||||
.unload_read_buffer("cpu", partition_key, pq_chunk_id)
|
||||
.unwrap();
|
||||
|
||||
// we should only have chunk in os
|
||||
assert!(mutable_chunk_ids(&db, partition_key).is_empty());
|
||||
assert!(read_buffer_chunk_ids(&db, partition_key).is_empty());
|
||||
assert_eq!(
|
||||
read_parquet_file_chunk_ids(&db, partition_key),
|
||||
vec![pq_chunk_id]
|
||||
);
|
||||
// Let query again but this time from OS
|
||||
let batches = run_query(Arc::clone(&db), "select * from cpu").await;
|
||||
assert_batches_sorted_eq!(expected, &batches);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,11 @@ use chrono::{DateTime, Utc};
|
|||
use data_types::{chunk_metadata::ChunkOrder, partition_metadata};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion_util::MemoryStream;
|
||||
use internal_types::{access::AccessRecorder, schema::{Schema, sort::SortKey}, selection::Selection};
|
||||
use internal_types::{
|
||||
access::AccessRecorder,
|
||||
schema::{sort::SortKey, Schema},
|
||||
selection::Selection,
|
||||
};
|
||||
use iox_object_store::ParquetFilePath;
|
||||
use mutable_buffer::chunk::snapshot::ChunkSnapshot;
|
||||
use observability_deps::tracing::debug;
|
||||
|
@ -221,13 +225,16 @@ impl DbChunk {
|
|||
self.time_of_last_write
|
||||
}
|
||||
|
||||
pub fn to_rub_negated_predicates(delete_predicates: &Vec<Predicate>) -> Result<Vec<read_buffer::Predicate>> {
|
||||
pub fn to_rub_negated_predicates(
|
||||
delete_predicates: &[Predicate],
|
||||
) -> Result<Vec<read_buffer::Predicate>> {
|
||||
let mut rub_preds: Vec<read_buffer::Predicate> = vec![];
|
||||
for pred in delete_predicates {
|
||||
let rub_pred = to_read_buffer_predicate(pred).context(PredicateConversion)?;
|
||||
rub_preds.push(rub_pred);
|
||||
}
|
||||
|
||||
debug!(?rub_preds, "RUB delete predicates");
|
||||
Ok(rub_preds)
|
||||
}
|
||||
}
|
||||
|
@ -324,7 +331,7 @@ impl QueryChunk for DbChunk {
|
|||
&self,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
delete_predicates: &Vec<Predicate>,
|
||||
delete_predicates: &[Predicate],
|
||||
) -> Result<SendableRecordBatchStream, Self::Error> {
|
||||
// Predicate is not required to be applied for correctness. We only pushed it down
|
||||
// when possible for performance gain
|
||||
|
@ -332,15 +339,23 @@ impl QueryChunk for DbChunk {
|
|||
debug!(?predicate, "Input Predicate to read_filter");
|
||||
self.access_recorder.record_access_now();
|
||||
|
||||
debug!(?delete_predicates, "Input Delete Predicates to read_filter");
|
||||
|
||||
// add negated deleted ranges to the predicate
|
||||
let mut pred_with_deleted_ranges = predicate.clone();
|
||||
pred_with_deleted_ranges.add_delete_ranges(delete_predicates);
|
||||
debug!(?pred_with_deleted_ranges, "Input Predicate plus deleted ranges");
|
||||
debug!(
|
||||
?pred_with_deleted_ranges,
|
||||
"Input Predicate plus deleted ranges"
|
||||
);
|
||||
|
||||
// add negated deleted predicates
|
||||
let mut pred_wth_deleted_exprs = pred_with_deleted_ranges.clone();
|
||||
let mut pred_wth_deleted_exprs = pred_with_deleted_ranges.clone();
|
||||
pred_wth_deleted_exprs.add_delete_exprs(delete_predicates);
|
||||
debug!(?pred_wth_deleted_exprs, "Input Predicate plus deleted ranges and deleted predicates");
|
||||
debug!(
|
||||
?pred_wth_deleted_exprs,
|
||||
"Input Predicate plus deleted ranges and deleted predicates"
|
||||
);
|
||||
|
||||
match &self.state {
|
||||
State::MutableBuffer { chunk, .. } => {
|
||||
|
@ -350,11 +365,12 @@ impl QueryChunk for DbChunk {
|
|||
}
|
||||
State::ReadBuffer { chunk, .. } => {
|
||||
// Only apply pushdownable predicates
|
||||
let rb_predicate =
|
||||
match to_read_buffer_predicate(&pred_with_deleted_ranges).context(PredicateConversion) {
|
||||
Ok(predicate) => predicate,
|
||||
Err(_) => read_buffer::Predicate::default(),
|
||||
};
|
||||
let rb_predicate = match to_read_buffer_predicate(&pred_with_deleted_ranges)
|
||||
.context(PredicateConversion)
|
||||
{
|
||||
Ok(predicate) => predicate,
|
||||
Err(_) => read_buffer::Predicate::default(),
|
||||
};
|
||||
|
||||
// combine all delete expressions to RUB's negated ones
|
||||
let negated_delete_exprs = Self::to_rub_negated_predicates(delete_predicates)?;
|
||||
|
@ -374,13 +390,11 @@ impl QueryChunk for DbChunk {
|
|||
schema.into(),
|
||||
)))
|
||||
}
|
||||
State::ParquetFile { chunk, .. } => {
|
||||
chunk
|
||||
.read_filter(&pred_wth_deleted_exprs, selection)
|
||||
.context(ParquetFileChunkError {
|
||||
chunk_id: self.id(),
|
||||
})
|
||||
}
|
||||
State::ParquetFile { chunk, .. } => chunk
|
||||
.read_filter(&pred_wth_deleted_exprs, selection)
|
||||
.context(ParquetFileChunkError {
|
||||
chunk_id: self.id(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -544,7 +558,7 @@ mod tests {
|
|||
let t2 = chunk.access_recorder().get_metrics();
|
||||
|
||||
snapshot
|
||||
.read_filter(&Default::default(), Selection::All, &vec![])
|
||||
.read_filter(&Default::default(), Selection::All, &[])
|
||||
.unwrap();
|
||||
let t3 = chunk.access_recorder().get_metrics();
|
||||
|
||||
|
|
|
@ -144,6 +144,8 @@ pub fn persist_chunks(
|
|||
);
|
||||
}
|
||||
|
||||
// NGA todo: we hit this error if there are rows but they are deleted - todo before merge: open a ticket
|
||||
// Need to think a way to handle this
|
||||
let to_persist = to_persist.expect("should be rows to persist");
|
||||
|
||||
let (new_chunk_id, new_chunk) = partition_write.create_rub_chunk(
|
||||
|
|
|
@ -87,11 +87,13 @@ pub(super) fn write_chunk_to_object_store(
|
|||
let (partition_checkpoint, database_checkpoint) =
|
||||
collect_checkpoints(flush_handle.checkpoint(), &db.catalog);
|
||||
|
||||
|
||||
|
||||
// Get RecordBatchStream of data from the read buffer chunk
|
||||
let stream = db_chunk
|
||||
.read_filter(&Default::default(), Selection::All, db_chunk.delete_predicates())
|
||||
.read_filter(
|
||||
&Default::default(),
|
||||
Selection::All,
|
||||
db_chunk.delete_predicates(),
|
||||
)
|
||||
.expect("read filter should be infallible");
|
||||
|
||||
// check that the upcoming state change will very likely succeed
|
||||
|
|
|
@ -1491,8 +1491,8 @@ async fn test_delete() {
|
|||
// Delete some data
|
||||
let table = "cpu";
|
||||
let start = "0";
|
||||
let stop = "12000000000000";
|
||||
let pred = "region = west";
|
||||
let stop = "1200";
|
||||
let pred = r#"region = "west""#;
|
||||
let _del = management_client
|
||||
.delete(db_name.clone(), table, start, stop, pred)
|
||||
.await
|
||||
|
|
Loading…
Reference in New Issue