diff --git a/influxdb_iox/tests/query_tests2/cases.rs b/influxdb_iox/tests/query_tests2/cases.rs index 7113ae1eca..6c8d803a6f 100644 --- a/influxdb_iox/tests/query_tests2/cases.rs +++ b/influxdb_iox/tests/query_tests2/cases.rs @@ -121,6 +121,18 @@ async fn duplicates_parquet_many() { .await; } +#[tokio::test] +async fn duplicates_parquet_50() { + test_helpers::maybe_start_logging(); + + TestCase { + input: "cases/in/duplicates_parquet_50_files.sql", + chunk_stage: ChunkStage::Parquet, + } + .run() + .await; +} + #[tokio::test] async fn gapfill() { test_helpers::maybe_start_logging(); diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql new file mode 100644 index 0000000000..704756cd9d --- /dev/null +++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql @@ -0,0 +1,14 @@ +-- Test setup for running with 50 parquet files +-- IOX_SETUP: FiftySortedSameParquetFiles + + +-- each parquet file has either 2 rows, one with f=1 and the other with f=2 +-- and then there are 50 that have a single row with f=3 +select count(1), sum(f1) from m; + +-- All 50 files are sorted but since it is larger than max_parquet_fanout which is set 40, +-- we do not use the presort and add a SortExec +-- WHen running this test, a warning "cannot use pre-sorted parquet files, fan-out too wide" is printed +-- IOX_COMPARE: uuid +EXPLAIN select count(1), sum(f1) from m; + diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected new file mode 100644 index 0000000000..0e397532e6 --- /dev/null +++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected @@ -0,0 +1,25 @@ +-- Test Setup: FiftySortedSameParquetFiles +-- SQL: select count(1), sum(f1) from m; ++-----------------+-----------+ +| COUNT(Int64(1)) | SUM(m.f1) | ++-----------------+-----------+ +| 1 | 1.0 | ++-----------------+-----------+ +-- SQL: EXPLAIN select count(1), sum(f1) from m; +-- Results After Normalizing UUIDs +---------- +| plan_type | plan | +---------- +| logical_plan | Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1)), SUM(m.f1)]] | +| | TableScan: m projection=[f1] | +| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)] | +| | CoalescePartitionsExec | +| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)] | +| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | +| | ProjectionExec: expr=[f1@1 as f1] | +| | DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC] | +| | SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] | +| | SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] | +| | ParquetExec: limit=None, partitions={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time] | +| | | +---------- \ No newline at end of file diff --git a/influxdb_iox/tests/query_tests2/setups.rs b/influxdb_iox/tests/query_tests2/setups.rs index 74d5b2df59..c87a275942 100644 --- a/influxdb_iox/tests/query_tests2/setups.rs +++ b/influxdb_iox/tests/query_tests2/setups.rs @@ -269,6 +269,28 @@ pub static SETUPS: Lazy> = Lazy::new(|| { }) .collect::>(), ), + ( + "FiftySortedSameParquetFiles", + (0..50) + .flat_map(|_i| { + + let write = Step::WriteLineProtocol( + "m,tag1=A,tag2=B,tag3=C,tag4=D f1=1,f2=2 2001".into(), // duplicated across all chunks + ); + + [ + Step::RecordNumParquetFiles, + write, + Step::Persist, + Step::WaitForPersisted2 { + expected_increase: 1, + }, + ] + .into_iter() + }) + .collect::>(), + ), + ( "OneMeasurementManyFields", vec![