Merge pull request #3054 from influxdata/tran/rub_timestamp

fix: statistics for max/min(time) should have data type timstamp
pull/24376/head
kodiakhq[bot] 2021-11-08 14:36:29 +00:00 committed by GitHub
commit 6fdfaef72c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 298 additions and 33 deletions

14
Cargo.lock generated
View File

@ -1681,7 +1681,7 @@ dependencies = [
name = "influxdb_line_protocol"
version = "0.1.0"
dependencies = [
"nom 7.1.0",
"nom 7.0.0",
"observability_deps",
"smallvec",
"snafu",
@ -1919,9 +1919,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.107"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219"
checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673"
[[package]]
name = "libloading"
@ -2103,9 +2103,9 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
checksum = "9c64630dcdd71f1a64c435f54885086a0de5d6a12d104d69b165fb7d5286d677"
[[package]]
name = "miniz_oxide"
@ -2330,9 +2330,9 @@ dependencies = [
[[package]]
name = "nom"
version = "7.1.0"
version = "7.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109"
checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1"
dependencies = [
"memchr",
"minimal-lexical",

View File

@ -123,7 +123,7 @@ impl<'a> PruningStatistics for ChunkMetaStats<'a> {
fn min_values(&self, column: &Column) -> Option<ArrayRef> {
let min = self
.column_summary(&column.name)
.and_then(|c| min_to_scalar(&c.stats))
.and_then(|c| min_to_scalar(&c.influxdb_type, &c.stats))
.map(|s| s.to_array_of_size(1));
min
}
@ -131,7 +131,7 @@ impl<'a> PruningStatistics for ChunkMetaStats<'a> {
fn max_values(&self, column: &Column) -> Option<ArrayRef> {
let max = self
.column_summary(&column.name)
.and_then(|c| max_to_scalar(&c.stats))
.and_then(|c| max_to_scalar(&c.influxdb_type, &c.stats))
.map(|s| s.to_array_of_size(1));
max
}

View File

@ -1,6 +1,8 @@
//! Code to translate IOx statistics to DataFusion statistics
use data_types::partition_metadata::{ColumnSummary, Statistics as IOxStatistics, TableSummary};
use data_types::partition_metadata::{
ColumnSummary, InfluxDbType, Statistics as IOxStatistics, TableSummary,
};
use datafusion::{
physical_plan::{ColumnStatistics, Statistics as DFStatistics},
scalar::ScalarValue,
@ -8,9 +10,18 @@ use datafusion::{
use schema::Schema;
/// Converts stats.min and an appropriate `ScalarValue`
pub(crate) fn min_to_scalar(stats: &IOxStatistics) -> Option<ScalarValue> {
pub(crate) fn min_to_scalar(
influx_type: &Option<InfluxDbType>,
stats: &IOxStatistics,
) -> Option<ScalarValue> {
match stats {
IOxStatistics::I64(v) => v.min.map(ScalarValue::from),
IOxStatistics::I64(v) => {
if let Some(InfluxDbType::Timestamp) = *influx_type {
Some(ScalarValue::TimestampNanosecond(v.min))
} else {
v.min.map(ScalarValue::from)
}
}
IOxStatistics::U64(v) => v.min.map(ScalarValue::from),
IOxStatistics::F64(v) => v.min.map(ScalarValue::from),
IOxStatistics::Bool(v) => v.min.map(ScalarValue::from),
@ -19,9 +30,18 @@ pub(crate) fn min_to_scalar(stats: &IOxStatistics) -> Option<ScalarValue> {
}
/// Converts stats.max to an appropriate `ScalarValue`
pub(crate) fn max_to_scalar(stats: &IOxStatistics) -> Option<ScalarValue> {
pub(crate) fn max_to_scalar(
influx_type: &Option<InfluxDbType>,
stats: &IOxStatistics,
) -> Option<ScalarValue> {
match stats {
IOxStatistics::I64(v) => v.max.map(ScalarValue::from),
IOxStatistics::I64(v) => {
if let Some(InfluxDbType::Timestamp) = *influx_type {
Some(ScalarValue::TimestampNanosecond(v.max))
} else {
v.max.map(ScalarValue::from)
}
}
IOxStatistics::U64(v) => v.max.map(ScalarValue::from),
IOxStatistics::F64(v) => v.max.map(ScalarValue::from),
IOxStatistics::Bool(v) => v.max.map(ScalarValue::from),
@ -66,6 +86,7 @@ pub(crate) fn df_from_iox(schema: &Schema, summary: &TableSummary) -> DFStatisti
/// Convert IOx `ColumnSummary` to DataFusion's `ColumnStatistics`
fn df_from_iox_col(col: &ColumnSummary) -> ColumnStatistics {
let stats = &col.stats;
let col_data_type = &col.influxdb_type;
let distinct_count = stats.distinct_count().map(|v| {
let v: u64 = v.into();
@ -76,8 +97,8 @@ fn df_from_iox_col(col: &ColumnSummary) -> ColumnStatistics {
ColumnStatistics {
null_count,
max_value: max_to_scalar(stats),
min_value: min_to_scalar(stats),
max_value: max_to_scalar(col_data_type, stats),
min_value: min_to_scalar(col_data_type, stats),
distinct_count,
}
}

View File

@ -75,6 +75,46 @@
+--------------+
| you |
+--------------+
-- SQL: SELECT min(foo) from cpu group by bar;
+--------------+
| MIN(cpu.foo) |
+--------------+
| you |
| me |
+--------------+
-- SQL: SELECT bar, max(foo) from cpu group by bar;
+-----+--------------+
| bar | MAX(cpu.foo) |
+-----+--------------+
| 2 | you |
| 1 | me |
+-----+--------------+
-- SQL: SELECT min(time) from cpu;
+--------------------------------+
| MIN(cpu.time) |
+--------------------------------+
| 1970-01-01T00:00:00.000000020Z |
+--------------------------------+
-- SQL: SELECT max(time) from cpu;
+--------------------------------+
| MAX(cpu.time) |
+--------------------------------+
| 1970-01-01T00:00:00.000000040Z |
+--------------------------------+
-- SQL: SELECT min(time) from cpu group by bar;
+--------------------------------+
| MIN(cpu.time) |
+--------------------------------+
| 1970-01-01T00:00:00.000000020Z |
| 1970-01-01T00:00:00.000000040Z |
+--------------------------------+
-- SQL: SELECT bar, min(time) from cpu group by bar;
+-----+--------------------------------+
| bar | MIN(cpu.time) |
+-----+--------------------------------+
| 2 | 1970-01-01T00:00:00.000000020Z |
| 1 | 1970-01-01T00:00:00.000000040Z |
+-----+--------------------------------+
-- SQL: SELECT time from cpu;
+--------------------------------+
| time |

View File

@ -25,10 +25,22 @@ SELECT foo from cpu;
SELECT min(foo) from cpu;
SELECT max(foo) from cpu;
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
-- inconsistent format returned
-- SELECT min(time) from cpu;
-- SELECT max(time) from cpu;
-- SELECT min(foo) from cpu group by time;
-- SELECT max(foo) from cpu group by time;
-- SELECT time, max(foo) from cpu group by time;
SELECT min(foo) from cpu group by bar;
SELECT bar, max(foo) from cpu group by bar;
-- Todo: Test not work in this framework. Exact same test works in sql.rs
-- SELECT max(foo) from cpu group by time;
SELECT min(time) from cpu;
SELECT max(time) from cpu;
SELECT min(time) from cpu group by bar;
SELECT bar, min(time) from cpu group by bar;
-- Todo: Test not work in this framework. Exact same test works in sql.rs
-- SELECT max(time) from cpu group by foo;
SELECT time from cpu;

View File

@ -23,6 +23,24 @@
+--------------------------------+
| 1970-01-01T00:00:00.000000020Z |
+--------------------------------+
-- SQL: SELECT max(time) from cpu;
+--------------------------------+
| MAX(cpu.time) |
+--------------------------------+
| 1970-01-01T00:00:00.000000020Z |
+--------------------------------+
-- SQL: SELECT min(time) from cpu group by bar;
+--------------------------------+
| MIN(cpu.time) |
+--------------------------------+
| 1970-01-01T00:00:00.000000020Z |
+--------------------------------+
-- SQL: SELECT bar, min(time) from cpu group by bar;
+-----+--------------------------------+
| bar | MIN(cpu.time) |
+-----+--------------------------------+
| 2 | 1970-01-01T00:00:00.000000020Z |
+-----+--------------------------------+
-- SQL: SELECT count(time), max(time) from cpu;
+-----------------+--------------------------------+
| COUNT(cpu.time) | MAX(cpu.time) |

View File

@ -10,10 +10,9 @@ SELECT min(bar), max(bar) from cpu;
SELECT time from cpu;
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
-- inconsistent format returned
-- ignore for now
-- SELECT max(time) from cpu;
SELECT max(time) from cpu;
SELECT min(time) from cpu group by bar;
SELECT bar, min(time) from cpu group by bar;
SELECT count(time), max(time) from cpu;

View File

@ -95,6 +95,25 @@
+--------------+
| you |
+--------------+
-- SQL: SELECT min(time) from cpu;
+--------------------------------+
| MIN(cpu.time) |
+--------------------------------+
| 1970-01-01T00:00:00.000000040Z |
+--------------------------------+
-- SQL: SELECT max(time) from cpu;
+--------------------------------+
| MAX(cpu.time) |
+--------------------------------+
| 1970-01-01T00:00:00.000000080Z |
+--------------------------------+
-- SQL: SELECT foo, min(time) from cpu group by foo;
+-----+--------------------------------+
| foo | MIN(cpu.time) |
+-----+--------------------------------+
| me | 1970-01-01T00:00:00.000000040Z |
| you | 1970-01-01T00:00:00.000000070Z |
+-----+--------------------------------+
-- SQL: SELECT time from cpu order by time;
+--------------------------------+
| time |
@ -164,6 +183,12 @@
+--------------+
| 1 |
+--------------+
-- SQL: SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
+--------------+
| MAX(cpu.foo) |
+--------------+
| me |
+--------------+
-- SQL: SELECT min(time) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
+--------------------------------+
| MIN(cpu.time) |

View File

@ -25,10 +25,13 @@ SELECT foo from cpu order by foo;
SELECT min(foo) from cpu;
SELECT max(foo) from cpu;
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
-- inconsistent format returned
-- SELECT min(time) from cpu;
-- SELECT max(time) from cpu;
SELECT min(time) from cpu;
SELECT max(time) from cpu;
SELECT foo, min(time) from cpu group by foo;
-- Todo: Test not work in this framework
-- SELECT bar, max(time) from cpu group by bar;
-- SELECT max(time) from cpu group by bar;
SELECT time from cpu order by time;
@ -51,8 +54,7 @@ SELECT * from cpu where foo = 'you' and (bar > 3.0 or bar = 1) order by bar, foo
SELECT min(bar) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
-- SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
SELECT min(time) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);

View File

@ -99,9 +99,9 @@ impl DbSetup for OneDeleteMultiExprsOneChunk {
let table_name = "cpu";
// chunk data
let lp_lines = vec![
"cpu,foo=me bar=1 10",
"cpu,foo=me bar=1 10", // deleted
"cpu,foo=you bar=2 20",
"cpu,foo=me bar=1 30",
"cpu,foo=me bar=1 30", // deleted
"cpu,foo=me bar=1 40",
];
// delete predicate

View File

@ -3,7 +3,10 @@
//! wired all the pieces together (as well as ensure any particularly
//! important SQL does not regress)
use crate::scenarios;
use crate::scenarios::{
self,
delete::{OneDeleteMultiExprsOneChunk, OneDeleteSimpleExprOneChunk},
};
use super::scenarios::*;
use arrow::record_batch::RecordBatch;
@ -805,6 +808,151 @@ async fn sql_select_without_delete_min_foo() {
.await;
}
#[tokio::test]
async fn sql_select_max_time_gb() {
let expected = vec![
"+--------------------------------+",
"| MAX(cpu.time) |",
"+--------------------------------+",
"| 1970-01-01T00:00:00.000000020Z |",
"+--------------------------------+",
];
run_sql_test_case(
OneDeleteSimpleExprOneChunk {},
"SELECT max(time) from cpu group by bar",
&expected,
)
.await;
let expected = vec![
"+-----+--------------------------------+",
"| bar | MAX(cpu.time) |",
"+-----+--------------------------------+",
"| 2 | 1970-01-01T00:00:00.000000020Z |",
"+-----+--------------------------------+",
];
run_sql_test_case(
OneDeleteSimpleExprOneChunk {},
"SELECT bar, max(time) from cpu group by bar",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_max_time_gb_bar() {
let expected = vec![
"+--------------------------------+",
"| MAX(cpu.time) |",
"+--------------------------------+",
"| 1970-01-01T00:00:00.000000020Z |",
"| 1970-01-01T00:00:00.000000040Z |",
"+--------------------------------+",
];
run_sql_test_case(
OneDeleteMultiExprsOneChunk {},
"SELECT max(time) from cpu group by bar",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_bar_max_time_gb_bar() {
let expected = vec![
"+-----+--------------------------------+",
"| bar | MAX(cpu.time) |",
"+-----+--------------------------------+",
"| 1 | 1970-01-01T00:00:00.000000040Z |",
"| 2 | 1970-01-01T00:00:00.000000020Z |",
"+-----+--------------------------------+",
];
run_sql_test_case(
OneDeleteMultiExprsOneChunk {},
"SELECT bar, max(time) from cpu group by bar",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_max_time_gb_foo() {
let expected = vec![
"+--------------------------------+",
"| MAX(cpu.time) |",
"+--------------------------------+",
"| 1970-01-01T00:00:00.000000020Z |",
"| 1970-01-01T00:00:00.000000040Z |",
"+--------------------------------+",
];
run_sql_test_case(
OneDeleteMultiExprsOneChunk {},
"SELECT max(time) from cpu group by foo",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_time_max_time_gb_foo() {
let expected = vec![
"+-----+--------------------------------+",
"| foo | MAX(cpu.time) |",
"+-----+--------------------------------+",
"| me | 1970-01-01T00:00:00.000000040Z |",
"| you | 1970-01-01T00:00:00.000000020Z |",
"+-----+--------------------------------+",
];
run_sql_test_case(
OneDeleteMultiExprsOneChunk {},
"SELECT foo, max(time) from cpu group by foo",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_min_foo_gb_time() {
let expected = vec![
"+--------------+",
"| MIN(cpu.foo) |",
"+--------------+",
"| me |",
"| you |",
"+--------------+",
];
run_sql_test_case(
OneDeleteMultiExprsOneChunk {},
"SELECT min(foo) from cpu group by time",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_select_time_max_foo_gb_time() {
let expected = vec![
"+--------------------------------+--------------+",
"| time | MAX(cpu.foo) |",
"+--------------------------------+--------------+",
"| 1970-01-01T00:00:00.000000020Z | you |",
"| 1970-01-01T00:00:00.000000040Z | me |",
"+--------------------------------+--------------+",
];
run_sql_test_case(
OneDeleteMultiExprsOneChunk {},
"SELECT time, max(foo) from cpu group by time",
&expected,
)
.await;
}
#[tokio::test]
async fn sql_create_external_table() {
let expected_error = "Unsupported logical plan: CreateExternalTable";