Merge branch 'main' into sgc/issue/subquery_6891_03

pull/24376/head
Stuart Carnie 2023-05-04 08:58:05 +10:00
commit 60931631b8
No known key found for this signature in database
GPG Key ID: 848D9C9718D78B4F
4 changed files with 73 additions and 0 deletions

View File

@ -121,6 +121,18 @@ async fn duplicates_parquet_many() {
.await;
}
#[tokio::test]
async fn duplicates_parquet_50() {
test_helpers::maybe_start_logging();
TestCase {
input: "cases/in/duplicates_parquet_50_files.sql",
chunk_stage: ChunkStage::Parquet,
}
.run()
.await;
}
#[tokio::test]
async fn gapfill() {
test_helpers::maybe_start_logging();

View File

@ -0,0 +1,14 @@
-- Test setup for running with 50 parquet files
-- IOX_SETUP: FiftySortedSameParquetFiles
-- each parquet file has either 2 rows, one with f=1 and the other with f=2
-- and then there are 50 that have a single row with f=3
select count(1), sum(f1) from m;
-- All 50 files are sorted but since it is larger than max_parquet_fanout which is set 40,
-- we do not use the presort and add a SortExec
-- WHen running this test, a warning "cannot use pre-sorted parquet files, fan-out too wide" is printed
-- IOX_COMPARE: uuid
EXPLAIN select count(1), sum(f1) from m;

View File

@ -0,0 +1,25 @@
-- Test Setup: FiftySortedSameParquetFiles
-- SQL: select count(1), sum(f1) from m;
+-----------------+-----------+
| COUNT(Int64(1)) | SUM(m.f1) |
+-----------------+-----------+
| 1 | 1.0 |
+-----------------+-----------+
-- SQL: EXPLAIN select count(1), sum(f1) from m;
-- Results After Normalizing UUIDs
----------
| plan_type | plan |
----------
| logical_plan | Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1)), SUM(m.f1)]] |
| | TableScan: m projection=[f1] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)] |
| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| | ProjectionExec: expr=[f1@1 as f1] |
| | DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC] |
| | SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] |
| | SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] |
| | ParquetExec: limit=None, partitions={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time] |
| | |
----------

View File

@ -269,6 +269,28 @@ pub static SETUPS: Lazy<HashMap<SetupName, SetupSteps>> = Lazy::new(|| {
})
.collect::<Vec<_>>(),
),
(
"FiftySortedSameParquetFiles",
(0..50)
.flat_map(|_i| {
let write = Step::WriteLineProtocol(
"m,tag1=A,tag2=B,tag3=C,tag4=D f1=1,f2=2 2001".into(), // duplicated across all chunks
);
[
Step::RecordNumParquetFiles,
write,
Step::Persist,
Step::WaitForPersisted2 {
expected_increase: 1,
},
]
.into_iter()
})
.collect::<Vec<_>>(),
),
(
"OneMeasurementManyFields",
vec![