From 97206b13cbcfbd290f6daf406b6a23e42527eedc Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Fri, 5 Nov 2021 18:11:54 -0400 Subject: [PATCH] fix: statistics for max/min(time) should have data type timstamp --- query/src/pruning.rs | 4 +- query/src/statistics.rs | 35 +++- .../in/delete_multi_expr_one_chunk.expected | 40 +++++ .../cases/in/delete_multi_expr_one_chunk.sql | 20 ++- .../in/delete_simple_pred_one_chunk.expected | 18 +++ .../cases/in/delete_simple_pred_one_chunk.sql | 7 +- .../delete_three_delete_three_chunks.expected | 25 +++ .../in/delete_three_delete_three_chunks.sql | 14 +- query_tests/src/scenarios/delete.rs | 4 +- query_tests/src/sql.rs | 150 +++++++++++++++++- 10 files changed, 291 insertions(+), 26 deletions(-) diff --git a/query/src/pruning.rs b/query/src/pruning.rs index e4c3444de3..53b1e73c19 100644 --- a/query/src/pruning.rs +++ b/query/src/pruning.rs @@ -123,7 +123,7 @@ impl<'a> PruningStatistics for ChunkMetaStats<'a> { fn min_values(&self, column: &Column) -> Option { let min = self .column_summary(&column.name) - .and_then(|c| min_to_scalar(&c.stats)) + .and_then(|c| min_to_scalar(&c.influxdb_type, &c.stats)) .map(|s| s.to_array_of_size(1)); min } @@ -131,7 +131,7 @@ impl<'a> PruningStatistics for ChunkMetaStats<'a> { fn max_values(&self, column: &Column) -> Option { let max = self .column_summary(&column.name) - .and_then(|c| max_to_scalar(&c.stats)) + .and_then(|c| max_to_scalar(&c.influxdb_type, &c.stats)) .map(|s| s.to_array_of_size(1)); max } diff --git a/query/src/statistics.rs b/query/src/statistics.rs index e16e540c1d..aba206776f 100644 --- a/query/src/statistics.rs +++ b/query/src/statistics.rs @@ -1,6 +1,8 @@ //! Code to translate IOx statistics to DataFusion statistics -use data_types::partition_metadata::{ColumnSummary, Statistics as IOxStatistics, TableSummary}; +use data_types::partition_metadata::{ + ColumnSummary, InfluxDbType, Statistics as IOxStatistics, TableSummary, +}; use datafusion::{ physical_plan::{ColumnStatistics, Statistics as DFStatistics}, scalar::ScalarValue, @@ -8,9 +10,18 @@ use datafusion::{ use schema::Schema; /// Converts stats.min and an appropriate `ScalarValue` -pub(crate) fn min_to_scalar(stats: &IOxStatistics) -> Option { +pub(crate) fn min_to_scalar( + influx_type: &Option, + stats: &IOxStatistics, +) -> Option { match stats { - IOxStatistics::I64(v) => v.min.map(ScalarValue::from), + IOxStatistics::I64(v) => { + if let Some(InfluxDbType::Timestamp) = *influx_type { + Some(ScalarValue::TimestampNanosecond(v.min)) + } else { + v.min.map(ScalarValue::from) + } + } IOxStatistics::U64(v) => v.min.map(ScalarValue::from), IOxStatistics::F64(v) => v.min.map(ScalarValue::from), IOxStatistics::Bool(v) => v.min.map(ScalarValue::from), @@ -19,9 +30,18 @@ pub(crate) fn min_to_scalar(stats: &IOxStatistics) -> Option { } /// Converts stats.max to an appropriate `ScalarValue` -pub(crate) fn max_to_scalar(stats: &IOxStatistics) -> Option { +pub(crate) fn max_to_scalar( + influx_type: &Option, + stats: &IOxStatistics, +) -> Option { match stats { - IOxStatistics::I64(v) => v.max.map(ScalarValue::from), + IOxStatistics::I64(v) => { + if let Some(InfluxDbType::Timestamp) = *influx_type { + Some(ScalarValue::TimestampNanosecond(v.max)) + } else { + v.max.map(ScalarValue::from) + } + } IOxStatistics::U64(v) => v.max.map(ScalarValue::from), IOxStatistics::F64(v) => v.max.map(ScalarValue::from), IOxStatistics::Bool(v) => v.max.map(ScalarValue::from), @@ -66,6 +86,7 @@ pub(crate) fn df_from_iox(schema: &Schema, summary: &TableSummary) -> DFStatisti /// Convert IOx `ColumnSummary` to DataFusion's `ColumnStatistics` fn df_from_iox_col(col: &ColumnSummary) -> ColumnStatistics { let stats = &col.stats; + let col_data_type = &col.influxdb_type; let distinct_count = stats.distinct_count().map(|v| { let v: u64 = v.into(); @@ -76,8 +97,8 @@ fn df_from_iox_col(col: &ColumnSummary) -> ColumnStatistics { ColumnStatistics { null_count, - max_value: max_to_scalar(stats), - min_value: min_to_scalar(stats), + max_value: max_to_scalar(col_data_type, stats), + min_value: min_to_scalar(col_data_type, stats), distinct_count, } } diff --git a/query_tests/cases/in/delete_multi_expr_one_chunk.expected b/query_tests/cases/in/delete_multi_expr_one_chunk.expected index 471d19085a..48e9e625f6 100644 --- a/query_tests/cases/in/delete_multi_expr_one_chunk.expected +++ b/query_tests/cases/in/delete_multi_expr_one_chunk.expected @@ -75,6 +75,46 @@ +--------------+ | you | +--------------+ +-- SQL: SELECT min(foo) from cpu group by bar; ++--------------+ +| MIN(cpu.foo) | ++--------------+ +| you | +| me | ++--------------+ +-- SQL: SELECT bar, max(foo) from cpu group by bar; ++-----+--------------+ +| bar | MAX(cpu.foo) | ++-----+--------------+ +| 2 | you | +| 1 | me | ++-----+--------------+ +-- SQL: SELECT min(time) from cpu; ++--------------------------------+ +| MIN(cpu.time) | ++--------------------------------+ +| 1970-01-01T00:00:00.000000020Z | ++--------------------------------+ +-- SQL: SELECT max(time) from cpu; ++--------------------------------+ +| MAX(cpu.time) | ++--------------------------------+ +| 1970-01-01T00:00:00.000000040Z | ++--------------------------------+ +-- SQL: SELECT min(time) from cpu group by bar; ++--------------------------------+ +| MIN(cpu.time) | ++--------------------------------+ +| 1970-01-01T00:00:00.000000020Z | +| 1970-01-01T00:00:00.000000040Z | ++--------------------------------+ +-- SQL: SELECT bar, min(time) from cpu group by bar; ++-----+--------------------------------+ +| bar | MIN(cpu.time) | ++-----+--------------------------------+ +| 2 | 1970-01-01T00:00:00.000000020Z | +| 1 | 1970-01-01T00:00:00.000000040Z | ++-----+--------------------------------+ -- SQL: SELECT time from cpu; +--------------------------------+ | time | diff --git a/query_tests/cases/in/delete_multi_expr_one_chunk.sql b/query_tests/cases/in/delete_multi_expr_one_chunk.sql index 03cd62445d..28e5fa39f6 100644 --- a/query_tests/cases/in/delete_multi_expr_one_chunk.sql +++ b/query_tests/cases/in/delete_multi_expr_one_chunk.sql @@ -25,10 +25,22 @@ SELECT foo from cpu; SELECT min(foo) from cpu; SELECT max(foo) from cpu; --- BUG: https://github.com/influxdata/influxdb_iox/issues/2779 --- inconsistent format returned --- SELECT min(time) from cpu; --- SELECT max(time) from cpu; +-- SELECT min(foo) from cpu group by time; +-- SELECT max(foo) from cpu group by time; +-- SELECT time, max(foo) from cpu group by time; + +SELECT min(foo) from cpu group by bar; +SELECT bar, max(foo) from cpu group by bar; +-- Todo: Test not work in this framework. Exact same test works in sql.rs +-- SELECT max(foo) from cpu group by time; + +SELECT min(time) from cpu; +SELECT max(time) from cpu; + +SELECT min(time) from cpu group by bar; +SELECT bar, min(time) from cpu group by bar; +-- Todo: Test not work in this framework. Exact same test works in sql.rs +-- SELECT max(time) from cpu group by foo; SELECT time from cpu; diff --git a/query_tests/cases/in/delete_simple_pred_one_chunk.expected b/query_tests/cases/in/delete_simple_pred_one_chunk.expected index a224e34ff6..f367cdefef 100644 --- a/query_tests/cases/in/delete_simple_pred_one_chunk.expected +++ b/query_tests/cases/in/delete_simple_pred_one_chunk.expected @@ -23,6 +23,24 @@ +--------------------------------+ | 1970-01-01T00:00:00.000000020Z | +--------------------------------+ +-- SQL: SELECT max(time) from cpu; ++--------------------------------+ +| MAX(cpu.time) | ++--------------------------------+ +| 1970-01-01T00:00:00.000000020Z | ++--------------------------------+ +-- SQL: SELECT min(time) from cpu group by bar; ++--------------------------------+ +| MIN(cpu.time) | ++--------------------------------+ +| 1970-01-01T00:00:00.000000020Z | ++--------------------------------+ +-- SQL: SELECT bar, min(time) from cpu group by bar; ++-----+--------------------------------+ +| bar | MIN(cpu.time) | ++-----+--------------------------------+ +| 2 | 1970-01-01T00:00:00.000000020Z | ++-----+--------------------------------+ -- SQL: SELECT count(time), max(time) from cpu; +-----------------+--------------------------------+ | COUNT(cpu.time) | MAX(cpu.time) | diff --git a/query_tests/cases/in/delete_simple_pred_one_chunk.sql b/query_tests/cases/in/delete_simple_pred_one_chunk.sql index 340aba872d..7b22641c63 100644 --- a/query_tests/cases/in/delete_simple_pred_one_chunk.sql +++ b/query_tests/cases/in/delete_simple_pred_one_chunk.sql @@ -10,10 +10,9 @@ SELECT min(bar), max(bar) from cpu; SELECT time from cpu; --- BUG: https://github.com/influxdata/influxdb_iox/issues/2779 --- inconsistent format returned --- ignore for now --- SELECT max(time) from cpu; +SELECT max(time) from cpu; +SELECT min(time) from cpu group by bar; +SELECT bar, min(time) from cpu group by bar; SELECT count(time), max(time) from cpu; diff --git a/query_tests/cases/in/delete_three_delete_three_chunks.expected b/query_tests/cases/in/delete_three_delete_three_chunks.expected index 3628deb513..fedc15abe0 100644 --- a/query_tests/cases/in/delete_three_delete_three_chunks.expected +++ b/query_tests/cases/in/delete_three_delete_three_chunks.expected @@ -95,6 +95,25 @@ +--------------+ | you | +--------------+ +-- SQL: SELECT min(time) from cpu; ++--------------------------------+ +| MIN(cpu.time) | ++--------------------------------+ +| 1970-01-01T00:00:00.000000040Z | ++--------------------------------+ +-- SQL: SELECT max(time) from cpu; ++--------------------------------+ +| MAX(cpu.time) | ++--------------------------------+ +| 1970-01-01T00:00:00.000000080Z | ++--------------------------------+ +-- SQL: SELECT foo, min(time) from cpu group by foo; ++-----+--------------------------------+ +| foo | MIN(cpu.time) | ++-----+--------------------------------+ +| me | 1970-01-01T00:00:00.000000040Z | +| you | 1970-01-01T00:00:00.000000070Z | ++-----+--------------------------------+ -- SQL: SELECT time from cpu order by time; +--------------------------------+ | time | @@ -164,6 +183,12 @@ +--------------+ | 1 | +--------------+ +-- SQL: SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0); ++--------------+ +| MAX(cpu.foo) | ++--------------+ +| me | ++--------------+ -- SQL: SELECT min(time) from cpu where foo = 'me' and (bar > 2 or bar = 1.0); +--------------------------------+ | MIN(cpu.time) | diff --git a/query_tests/cases/in/delete_three_delete_three_chunks.sql b/query_tests/cases/in/delete_three_delete_three_chunks.sql index 8390be8c35..8a10e7761c 100644 --- a/query_tests/cases/in/delete_three_delete_three_chunks.sql +++ b/query_tests/cases/in/delete_three_delete_three_chunks.sql @@ -25,10 +25,13 @@ SELECT foo from cpu order by foo; SELECT min(foo) from cpu; SELECT max(foo) from cpu; --- BUG: https://github.com/influxdata/influxdb_iox/issues/2779 --- inconsistent format returned --- SELECT min(time) from cpu; --- SELECT max(time) from cpu; +SELECT min(time) from cpu; +SELECT max(time) from cpu; + +SELECT foo, min(time) from cpu group by foo; +-- Todo: Test not work in this framework +-- SELECT bar, max(time) from cpu group by bar; +-- SELECT max(time) from cpu group by bar; SELECT time from cpu order by time; @@ -51,8 +54,7 @@ SELECT * from cpu where foo = 'you' and (bar > 3.0 or bar = 1) order by bar, foo SELECT min(bar) from cpu where foo = 'me' and (bar > 2 or bar = 1.0); --- BUG: https://github.com/influxdata/influxdb_iox/issues/2779 --- SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0); +SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0); SELECT min(time) from cpu where foo = 'me' and (bar > 2 or bar = 1.0); diff --git a/query_tests/src/scenarios/delete.rs b/query_tests/src/scenarios/delete.rs index dbfbec019c..b5c8a5d6d9 100644 --- a/query_tests/src/scenarios/delete.rs +++ b/query_tests/src/scenarios/delete.rs @@ -99,9 +99,9 @@ impl DbSetup for OneDeleteMultiExprsOneChunk { let table_name = "cpu"; // chunk data let lp_lines = vec![ - "cpu,foo=me bar=1 10", + "cpu,foo=me bar=1 10", // deleted "cpu,foo=you bar=2 20", - "cpu,foo=me bar=1 30", + "cpu,foo=me bar=1 30", // deleted "cpu,foo=me bar=1 40", ]; // delete predicate diff --git a/query_tests/src/sql.rs b/query_tests/src/sql.rs index 0f2b7395c1..89c7314f57 100644 --- a/query_tests/src/sql.rs +++ b/query_tests/src/sql.rs @@ -3,7 +3,10 @@ //! wired all the pieces together (as well as ensure any particularly //! important SQL does not regress) -use crate::scenarios; +use crate::scenarios::{ + self, + delete::{OneDeleteMultiExprsOneChunk, OneDeleteSimpleExprOneChunk}, +}; use super::scenarios::*; use arrow::record_batch::RecordBatch; @@ -772,4 +775,149 @@ async fn sql_select_without_delete_min_foo() { .await; } +#[tokio::test] +async fn sql_select_max_time_gb() { + let expected = vec![ + "+--------------------------------+", + "| MAX(cpu.time) |", + "+--------------------------------+", + "| 1970-01-01T00:00:00.000000020Z |", + "+--------------------------------+", + ]; + run_sql_test_case( + OneDeleteSimpleExprOneChunk {}, + "SELECT max(time) from cpu group by bar", + &expected, + ) + .await; + + let expected = vec![ + "+-----+--------------------------------+", + "| bar | MAX(cpu.time) |", + "+-----+--------------------------------+", + "| 2 | 1970-01-01T00:00:00.000000020Z |", + "+-----+--------------------------------+", + ]; + + run_sql_test_case( + OneDeleteSimpleExprOneChunk {}, + "SELECT bar, max(time) from cpu group by bar", + &expected, + ) + .await; +} + +#[tokio::test] +async fn sql_select_max_time_gb_bar() { + let expected = vec![ + "+--------------------------------+", + "| MAX(cpu.time) |", + "+--------------------------------+", + "| 1970-01-01T00:00:00.000000020Z |", + "| 1970-01-01T00:00:00.000000040Z |", + "+--------------------------------+", + ]; + run_sql_test_case( + OneDeleteMultiExprsOneChunk {}, + "SELECT max(time) from cpu group by bar", + &expected, + ) + .await; +} + +#[tokio::test] +async fn sql_select_bar_max_time_gb_bar() { + let expected = vec![ + "+-----+--------------------------------+", + "| bar | MAX(cpu.time) |", + "+-----+--------------------------------+", + "| 1 | 1970-01-01T00:00:00.000000040Z |", + "| 2 | 1970-01-01T00:00:00.000000020Z |", + "+-----+--------------------------------+", + ]; + + run_sql_test_case( + OneDeleteMultiExprsOneChunk {}, + "SELECT bar, max(time) from cpu group by bar", + &expected, + ) + .await; +} + +#[tokio::test] +async fn sql_select_max_time_gb_foo() { + let expected = vec![ + "+--------------------------------+", + "| MAX(cpu.time) |", + "+--------------------------------+", + "| 1970-01-01T00:00:00.000000020Z |", + "| 1970-01-01T00:00:00.000000040Z |", + "+--------------------------------+", + ]; + + run_sql_test_case( + OneDeleteMultiExprsOneChunk {}, + "SELECT max(time) from cpu group by foo", + &expected, + ) + .await; +} + +#[tokio::test] +async fn sql_select_time_max_time_gb_foo() { + let expected = vec![ + "+-----+--------------------------------+", + "| foo | MAX(cpu.time) |", + "+-----+--------------------------------+", + "| me | 1970-01-01T00:00:00.000000040Z |", + "| you | 1970-01-01T00:00:00.000000020Z |", + "+-----+--------------------------------+", + ]; + + run_sql_test_case( + OneDeleteMultiExprsOneChunk {}, + "SELECT foo, max(time) from cpu group by foo", + &expected, + ) + .await; +} + +#[tokio::test] +async fn sql_select_min_foo_gb_time() { + let expected = vec![ + "+--------------+", + "| MIN(cpu.foo) |", + "+--------------+", + "| me |", + "| you |", + "+--------------+", + ]; + + run_sql_test_case( + OneDeleteMultiExprsOneChunk {}, + "SELECT min(foo) from cpu group by time", + &expected, + ) + .await; +} + +#[tokio::test] +async fn sql_select_time_max_foo_gb_time() { + let expected = vec![ + "+--------------------------------+--------------+", + "| time | MAX(cpu.foo) |", + "+--------------------------------+--------------+", + "| 1970-01-01T00:00:00.000000020Z | you |", + "| 1970-01-01T00:00:00.000000040Z | me |", + "+--------------------------------+--------------+", + ]; + + run_sql_test_case( + OneDeleteMultiExprsOneChunk {}, + "SELECT time, max(foo) from cpu group by time", + &expected, + ) + .await; +} + // --------------------------------------------------------