test: deduplication across memory and parquet chunks
parent
756a50daa6
commit
e0b1e81fc9
|
@ -124,6 +124,76 @@ async fn test_deduplicate_rows_in_write_buffer_parquet() {
|
||||||
insta::assert_snapshot!(plan);
|
insta::assert_snapshot!(plan);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test_log::test(tokio::test)]
|
||||||
|
async fn test_deduplicate_rows_in_write_buffer_both_memory_and_parquet() {
|
||||||
|
let wal_config = WalConfig {
|
||||||
|
gen1_duration: Gen1Duration::new_1m(),
|
||||||
|
max_write_buffer_size: 100,
|
||||||
|
flush_interval: Duration::from_millis(10),
|
||||||
|
// uses a small snapshot size to get parquet persisted early, and therefore have queries
|
||||||
|
// serve chunks from both in-memory buffer and persisted parquet
|
||||||
|
snapshot_size: 1,
|
||||||
|
};
|
||||||
|
let service = TestService::setup(wal_config).await;
|
||||||
|
|
||||||
|
service
|
||||||
|
.do_writes(
|
||||||
|
"foo",
|
||||||
|
[
|
||||||
|
TestWrite::lp("bar value=false", 1),
|
||||||
|
TestWrite::lp("bar value=false", 2),
|
||||||
|
TestWrite::lp("bar value=false", 3),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
service.wait_for_snapshot_sequence(1).await;
|
||||||
|
|
||||||
|
let batches = service.query_sql("foo", "select * from bar").await;
|
||||||
|
assert_batches_sorted_eq!(
|
||||||
|
[
|
||||||
|
"+---------------------+-------+",
|
||||||
|
"| time | value |",
|
||||||
|
"+---------------------+-------+",
|
||||||
|
"| 1970-01-01T00:00:01 | false |", // note that this is `false`
|
||||||
|
"| 1970-01-01T00:00:02 | false |",
|
||||||
|
"| 1970-01-01T00:00:03 | false |",
|
||||||
|
"+---------------------+-------+",
|
||||||
|
],
|
||||||
|
&batches
|
||||||
|
);
|
||||||
|
|
||||||
|
// write a duplicate row, but don't trigger a snapshot, so subsequent query will be served
|
||||||
|
// by chunks in both memory buffer and persisted parquet; this also flips the `value` to `true`
|
||||||
|
// to show that the most recent written line appears in the query result:
|
||||||
|
service
|
||||||
|
.do_writes("foo", [TestWrite::lp("bar value=true", 1)])
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let batches = service.query_sql("foo", "select * from bar").await;
|
||||||
|
assert_batches_sorted_eq!(
|
||||||
|
[
|
||||||
|
"+---------------------+-------+",
|
||||||
|
"| time | value |",
|
||||||
|
"+---------------------+-------+",
|
||||||
|
"| 1970-01-01T00:00:01 | true |", // note that this is now `true`
|
||||||
|
"| 1970-01-01T00:00:02 | false |",
|
||||||
|
"| 1970-01-01T00:00:03 | false |",
|
||||||
|
"+---------------------+-------+",
|
||||||
|
],
|
||||||
|
&batches
|
||||||
|
);
|
||||||
|
|
||||||
|
let batches = service.query_sql("foo", "explain select * from bar").await;
|
||||||
|
// There should be a union of a ParquetExec and RecordBatchesExec that feeds into a
|
||||||
|
// DeduplicateExec in the query plan, i.e., there is both in-memory buffer chunks and persisted
|
||||||
|
// parquet chunks:
|
||||||
|
let plan = arrow::util::pretty::pretty_format_batches(&batches)
|
||||||
|
.unwrap()
|
||||||
|
.to_string();
|
||||||
|
insta::assert_snapshot!(plan);
|
||||||
|
}
|
||||||
|
|
||||||
struct TestService {
|
struct TestService {
|
||||||
query_executor: Arc<dyn QueryExecutor>,
|
query_executor: Arc<dyn QueryExecutor>,
|
||||||
write_buffer: Arc<dyn WriteBuffer>,
|
write_buffer: Arc<dyn WriteBuffer>,
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
---
|
||||||
|
source: influxdb3_server/tests/lib.rs
|
||||||
|
expression: plan
|
||||||
|
---
|
||||||
|
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
|
| plan_type | plan |
|
||||||
|
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
|
| logical_plan | TableScan: bar projection=[time, value] |
|
||||||
|
| physical_plan | ProjectionExec: expr=[time@0 as time, value@1 as value] |
|
||||||
|
| | DeduplicateExec: [time@0 ASC] |
|
||||||
|
| | SortPreservingMergeExec: [time@0 ASC, __chunk_order@2 ASC] |
|
||||||
|
| | UnionExec |
|
||||||
|
| | SortExec: expr=[time@0 ASC, __chunk_order@2 ASC], preserve_partitioning=[false] |
|
||||||
|
| | RecordBatchesExec: chunks=1, projection=[time, value, __chunk_order] |
|
||||||
|
| | SortExec: expr=[time@0 ASC, __chunk_order@2 ASC], preserve_partitioning=[false] |
|
||||||
|
| | ParquetExec: file_groups={1 group: [[test-node/dbs/foo-1/bar-0/1970-01-01/00-00/0000000003.parquet]]}, projection=[time, value, __chunk_order], output_ordering=[__chunk_order@2 ASC] |
|
||||||
|
| | |
|
||||||
|
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
Loading…
Reference in New Issue