diff --git a/server/src/query_tests/sql.rs b/server/src/query_tests/sql.rs index f148b9160a..4254747301 100644 --- a/server/src/query_tests/sql.rs +++ b/server/src/query_tests/sql.rs @@ -409,3 +409,230 @@ async fn sql_select_with_schema_merge_subset() { &expected ); } + +#[tokio::test] +async fn sql_predicate_pushdown() { + // Test 1: Select everything + // + // Check correctness + let expected = vec![ + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + "| partition_key | chunk_id | table_name | column_name | storage | count | min_value | max_value | estimated_bytes |", + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + "| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | Boston | time | 585 |", + "| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 2 | 70.4 | 70.4 | 369 |", + "| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | Boston | time | 585 |", + "| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 2 | 70.4 | 70.4 | 369 |", + "| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 50 | 250 | 51 |", + "| 1970-01-01T00 | 0 | o2 | __dictionary | OpenMutableBuffer | | | | 112 |", + "| 1970-01-01T00 | 0 | o2 | city | OpenMutableBuffer | 1 | Boston | Boston | 17 |", + "| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |", + "| 1970-01-01T00 | 0 | o2 | state | OpenMutableBuffer | 2 | CA | MA | 17 |", + "| 1970-01-01T00 | 0 | o2 | temp | OpenMutableBuffer | 2 | 53.4 | 79 | 25 |", + "| 1970-01-01T00 | 0 | o2 | time | OpenMutableBuffer | 2 | 50 | 300 | 25 |", + "| 1970-01-01T00 | 1 | h2o | __dictionary | OpenMutableBuffer | | | | 94 |", + "| 1970-01-01T00 | 1 | h2o | city | OpenMutableBuffer | 1 | Boston | Boston | 13 |", + "| 1970-01-01T00 | 1 | h2o | other_temp | OpenMutableBuffer | 1 | 72.4 | 72.4 | 17 |", + "| 1970-01-01T00 | 1 | h2o | state | OpenMutableBuffer | 1 | CA | CA | 13 |", + "| 1970-01-01T00 | 1 | h2o | time | OpenMutableBuffer | 1 | 350 | 350 | 17 |", + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "SELECT * from system.chunk_columns", + &expected + ); + + // Check if the predicate is pushed down in explain verbose + // TODO: List full explain for now for reviewers to verify the pushdown. When we are happy with the outcome, I will + // only check if the actual includes work we want to avoid recapture the tests when add more stuff into the explain + let expected = vec![ + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| | MemoryExec: partitions=1, partition_sizes=[1] |", + "| | TableScan: system.chunk_columns projection=None |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| logical_plan | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| physical_plan | ProjectionExec: expr=[partition_key, chunk_id, table_name, column_name, storage, count, min_value, max_value, estimated_bytes] |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "EXPLAIN VERBOSE SELECT * from system.chunk_columns", + &expected + ); + + // Test 2: One push-down expression estimated_bytes > 20 + // + // Check correctness + let expected = vec![ + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + "| partition_key | chunk_id | table_name | column_name | storage | count | min_value | max_value | estimated_bytes |", + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + "| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | Boston | time | 585 |", + "| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 2 | 70.4 | 70.4 | 369 |", + "| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | Boston | time | 585 |", + "| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 2 | 70.4 | 70.4 | 369 |", + "| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 50 | 250 | 51 |", + "| 1970-01-01T00 | 0 | o2 | __dictionary | OpenMutableBuffer | | | | 112 |", + "| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |", + "| 1970-01-01T00 | 0 | o2 | temp | OpenMutableBuffer | 2 | 53.4 | 79 | 25 |", + "| 1970-01-01T00 | 0 | o2 | time | OpenMutableBuffer | 2 | 50 | 300 | 25 |", + "| 1970-01-01T00 | 1 | h2o | __dictionary | OpenMutableBuffer | | | | 94 |", + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "SELECT * from system.chunk_columns where estimated_bytes > 20", + &expected + ); + + // Check if the predicate is pushed down in explain verbose + let expected = vec![ + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| | MemoryExec: partitions=1, partition_sizes=[1] |", + "| | TableScan: system.chunk_columns projection=None |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | Filter: #estimated_bytes Gt Int64(20) |", + "| | Filter: #estimated_bytes Gt Int64(20) |", + "| | Filter: #estimated_bytes Gt Int64(20) |", + "| | FilterExec: CAST(estimated_bytes AS Int64) > 20 |", + "| logical_plan | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| physical_plan | ProjectionExec: expr=[partition_key, chunk_id, table_name, column_name, storage, count, min_value, max_value, estimated_bytes] |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "EXPLAIN VERBOSE SELECT * from system.chunk_columns where estimated_bytes > 20", + &expected + ); + + // Test 3: Two push-down expression estimated_bytes > 20 AND table_name = 'o2' + // + // Check correctness + let expected = vec![ + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + "| partition_key | chunk_id | table_name | column_name | storage | count | min_value | max_value | estimated_bytes |", + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + "| 1970-01-01T00 | 0 | o2 | __dictionary | OpenMutableBuffer | | | | 112 |", + "| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |", + "| 1970-01-01T00 | 0 | o2 | temp | OpenMutableBuffer | 2 | 53.4 | 79 | 25 |", + "| 1970-01-01T00 | 0 | o2 | time | OpenMutableBuffer | 2 | 50 | 300 | 25 |", + "+---------------+----------+------------+--------------+-------------------+-------+-----------+-----------+-----------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "SELECT * from system.chunk_columns where estimated_bytes > 20 AND table_name = 'o2'", + &expected + ); + + // Check if the predicate is pushed down in explain verbose + let expected = vec![ + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| | MemoryExec: partitions=1, partition_sizes=[1] |", + "| | TableScan: system.chunk_columns projection=None |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") |", + "| | FilterExec: CAST(estimated_bytes AS Int64) > 20 AND table_name = o2 |", + "| logical_plan | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| physical_plan | ProjectionExec: expr=[partition_key, chunk_id, table_name, column_name, storage, count, min_value, max_value, estimated_bytes] |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "EXPLAIN VERBOSE SELECT * from system.chunk_columns where estimated_bytes > 20 AND table_name = 'o2'", + &expected + ); + + // Test 4: Two push-down expression estimated_bytes > 20 AND table_name = 'o2' even though there is an extra OR + // + // Check correctness + let expected = vec![ + "+---------------+----------+------------+-------------+-------------------+-------+-----------+-----------+-----------------+", + "| partition_key | chunk_id | table_name | column_name | storage | count | min_value | max_value | estimated_bytes |", + "+---------------+----------+------------+-------------+-------------------+-------+-----------+-----------+-----------------+", + "| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 1 | 51 | 51 | 25 |", + "+---------------+----------+------------+-------------+-------------------+-------+-----------+-----------+-----------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "SELECT * from system.chunk_columns where estimated_bytes > 20 AND table_name = 'o2' AND (count = 1 OR count > 2)", + &expected + ); + + // Check if the predicate is pushed down in explain verbose + let expected = vec![ + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + "| | MemoryExec: partitions=1, partition_sizes=[1] |", + "| | TableScan: system.chunk_columns projection=None |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") And #count Eq Int64(1) Or #count Gt Int64(2) |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") And #count Eq Int64(1) Or #count Gt Int64(2) |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") And #count Eq Int64(1) Or #count Gt Int64(2) |", + "| | FilterExec: CAST(estimated_bytes AS Int64) > 20 AND table_name = o2 AND CAST(count AS Int64) = 1 OR CAST(count AS Int64) > 2 |", + "| logical_plan | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| physical_plan | ProjectionExec: expr=[partition_key, chunk_id, table_name, column_name, storage, count, min_value, max_value, estimated_bytes] |", + "+-----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "EXPLAIN VERBOSE SELECT * from system.chunk_columns where estimated_bytes > 20 AND table_name = 'o2' AND (count = 1 OR count > 2)", + &expected + ); + + // Test 5: Three push-down expression estimated_bytes > 20 AND table_name = 'o2' AND max_value != 51 even though there is an extra OR + // + // Check correctness + let expected = vec!["++", "++"]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "SELECT * from system.chunk_columns where estimated_bytes > 20 AND table_name = 'o2' AND (count = 1 OR count > 2) and column_name != 'reading'", + &expected + ); + + // Check if the predicate is pushed down in explain verbose + let expected = vec![ + "+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+", + "| | MemoryExec: partitions=1, partition_sizes=[1] |", + "| | TableScan: system.chunk_columns projection=None |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | TableScan: system.chunk_columns projection=Some([0, 1, 2, 3, 4, 5, 6, 7, 8]) |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") And #count Eq Int64(1) Or #count Gt Int64(2) And #column_name NotEq Utf8(\"reading\") |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") And #count Eq Int64(1) Or #count Gt Int64(2) And #column_name NotEq Utf8(\"reading\") |", + "| | Filter: #estimated_bytes Gt Int64(20) And #table_name Eq Utf8(\"o2\") And #count Eq Int64(1) Or #count Gt Int64(2) And #column_name NotEq Utf8(\"reading\") |", + "| | FilterExec: CAST(estimated_bytes AS Int64) > 20 AND table_name = o2 AND CAST(count AS Int64) = 1 OR CAST(count AS Int64) > 2 AND column_name != reading |", + "| logical_plan | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| logical_plan after projection_push_down | Projection: #partition_key, #chunk_id, #table_name, #column_name, #storage, #count, #min_value, #max_value, #estimated_bytes |", + "| physical_plan | ProjectionExec: expr=[partition_key, chunk_id, table_name, column_name, storage, count, min_value, max_value, estimated_bytes] |", + "+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+", + ]; + run_sql_test_case!( + TwoMeasurementsManyFieldsTwoChunks {}, + "EXPLAIN VERBOSE SELECT * from system.chunk_columns where estimated_bytes > 20 AND table_name = 'o2' AND (count = 1 OR count > 2) and column_name != 'reading'", + &expected + ); +}