Merge pull request #3054 from influxdata/tran/rub_timestamp
fix: statistics for max/min(time) should have data type timstamppull/24376/head
commit
6fdfaef72c
|
@ -1681,7 +1681,7 @@ dependencies = [
|
|||
name = "influxdb_line_protocol"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"nom 7.1.0",
|
||||
"nom 7.0.0",
|
||||
"observability_deps",
|
||||
"smallvec",
|
||||
"snafu",
|
||||
|
@ -1919,9 +1919,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.107"
|
||||
version = "0.2.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219"
|
||||
checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
|
@ -2103,9 +2103,9 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
|
|||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
checksum = "9c64630dcdd71f1a64c435f54885086a0de5d6a12d104d69b165fb7d5286d677"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
|
@ -2330,9 +2330,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.0"
|
||||
version = "7.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109"
|
||||
checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
|
|
|
@ -123,7 +123,7 @@ impl<'a> PruningStatistics for ChunkMetaStats<'a> {
|
|||
fn min_values(&self, column: &Column) -> Option<ArrayRef> {
|
||||
let min = self
|
||||
.column_summary(&column.name)
|
||||
.and_then(|c| min_to_scalar(&c.stats))
|
||||
.and_then(|c| min_to_scalar(&c.influxdb_type, &c.stats))
|
||||
.map(|s| s.to_array_of_size(1));
|
||||
min
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ impl<'a> PruningStatistics for ChunkMetaStats<'a> {
|
|||
fn max_values(&self, column: &Column) -> Option<ArrayRef> {
|
||||
let max = self
|
||||
.column_summary(&column.name)
|
||||
.and_then(|c| max_to_scalar(&c.stats))
|
||||
.and_then(|c| max_to_scalar(&c.influxdb_type, &c.stats))
|
||||
.map(|s| s.to_array_of_size(1));
|
||||
max
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
//! Code to translate IOx statistics to DataFusion statistics
|
||||
|
||||
use data_types::partition_metadata::{ColumnSummary, Statistics as IOxStatistics, TableSummary};
|
||||
use data_types::partition_metadata::{
|
||||
ColumnSummary, InfluxDbType, Statistics as IOxStatistics, TableSummary,
|
||||
};
|
||||
use datafusion::{
|
||||
physical_plan::{ColumnStatistics, Statistics as DFStatistics},
|
||||
scalar::ScalarValue,
|
||||
|
@ -8,9 +10,18 @@ use datafusion::{
|
|||
use schema::Schema;
|
||||
|
||||
/// Converts stats.min and an appropriate `ScalarValue`
|
||||
pub(crate) fn min_to_scalar(stats: &IOxStatistics) -> Option<ScalarValue> {
|
||||
pub(crate) fn min_to_scalar(
|
||||
influx_type: &Option<InfluxDbType>,
|
||||
stats: &IOxStatistics,
|
||||
) -> Option<ScalarValue> {
|
||||
match stats {
|
||||
IOxStatistics::I64(v) => v.min.map(ScalarValue::from),
|
||||
IOxStatistics::I64(v) => {
|
||||
if let Some(InfluxDbType::Timestamp) = *influx_type {
|
||||
Some(ScalarValue::TimestampNanosecond(v.min))
|
||||
} else {
|
||||
v.min.map(ScalarValue::from)
|
||||
}
|
||||
}
|
||||
IOxStatistics::U64(v) => v.min.map(ScalarValue::from),
|
||||
IOxStatistics::F64(v) => v.min.map(ScalarValue::from),
|
||||
IOxStatistics::Bool(v) => v.min.map(ScalarValue::from),
|
||||
|
@ -19,9 +30,18 @@ pub(crate) fn min_to_scalar(stats: &IOxStatistics) -> Option<ScalarValue> {
|
|||
}
|
||||
|
||||
/// Converts stats.max to an appropriate `ScalarValue`
|
||||
pub(crate) fn max_to_scalar(stats: &IOxStatistics) -> Option<ScalarValue> {
|
||||
pub(crate) fn max_to_scalar(
|
||||
influx_type: &Option<InfluxDbType>,
|
||||
stats: &IOxStatistics,
|
||||
) -> Option<ScalarValue> {
|
||||
match stats {
|
||||
IOxStatistics::I64(v) => v.max.map(ScalarValue::from),
|
||||
IOxStatistics::I64(v) => {
|
||||
if let Some(InfluxDbType::Timestamp) = *influx_type {
|
||||
Some(ScalarValue::TimestampNanosecond(v.max))
|
||||
} else {
|
||||
v.max.map(ScalarValue::from)
|
||||
}
|
||||
}
|
||||
IOxStatistics::U64(v) => v.max.map(ScalarValue::from),
|
||||
IOxStatistics::F64(v) => v.max.map(ScalarValue::from),
|
||||
IOxStatistics::Bool(v) => v.max.map(ScalarValue::from),
|
||||
|
@ -66,6 +86,7 @@ pub(crate) fn df_from_iox(schema: &Schema, summary: &TableSummary) -> DFStatisti
|
|||
/// Convert IOx `ColumnSummary` to DataFusion's `ColumnStatistics`
|
||||
fn df_from_iox_col(col: &ColumnSummary) -> ColumnStatistics {
|
||||
let stats = &col.stats;
|
||||
let col_data_type = &col.influxdb_type;
|
||||
|
||||
let distinct_count = stats.distinct_count().map(|v| {
|
||||
let v: u64 = v.into();
|
||||
|
@ -76,8 +97,8 @@ fn df_from_iox_col(col: &ColumnSummary) -> ColumnStatistics {
|
|||
|
||||
ColumnStatistics {
|
||||
null_count,
|
||||
max_value: max_to_scalar(stats),
|
||||
min_value: min_to_scalar(stats),
|
||||
max_value: max_to_scalar(col_data_type, stats),
|
||||
min_value: min_to_scalar(col_data_type, stats),
|
||||
distinct_count,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,6 +75,46 @@
|
|||
+--------------+
|
||||
| you |
|
||||
+--------------+
|
||||
-- SQL: SELECT min(foo) from cpu group by bar;
|
||||
+--------------+
|
||||
| MIN(cpu.foo) |
|
||||
+--------------+
|
||||
| you |
|
||||
| me |
|
||||
+--------------+
|
||||
-- SQL: SELECT bar, max(foo) from cpu group by bar;
|
||||
+-----+--------------+
|
||||
| bar | MAX(cpu.foo) |
|
||||
+-----+--------------+
|
||||
| 2 | you |
|
||||
| 1 | me |
|
||||
+-----+--------------+
|
||||
-- SQL: SELECT min(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MIN(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MAX(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT min(time) from cpu group by bar;
|
||||
+--------------------------------+
|
||||
| MIN(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT bar, min(time) from cpu group by bar;
|
||||
+-----+--------------------------------+
|
||||
| bar | MIN(cpu.time) |
|
||||
+-----+--------------------------------+
|
||||
| 2 | 1970-01-01T00:00:00.000000020Z |
|
||||
| 1 | 1970-01-01T00:00:00.000000040Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT time from cpu;
|
||||
+--------------------------------+
|
||||
| time |
|
||||
|
|
|
@ -25,10 +25,22 @@ SELECT foo from cpu;
|
|||
SELECT min(foo) from cpu;
|
||||
SELECT max(foo) from cpu;
|
||||
|
||||
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
|
||||
-- inconsistent format returned
|
||||
-- SELECT min(time) from cpu;
|
||||
-- SELECT max(time) from cpu;
|
||||
-- SELECT min(foo) from cpu group by time;
|
||||
-- SELECT max(foo) from cpu group by time;
|
||||
-- SELECT time, max(foo) from cpu group by time;
|
||||
|
||||
SELECT min(foo) from cpu group by bar;
|
||||
SELECT bar, max(foo) from cpu group by bar;
|
||||
-- Todo: Test not work in this framework. Exact same test works in sql.rs
|
||||
-- SELECT max(foo) from cpu group by time;
|
||||
|
||||
SELECT min(time) from cpu;
|
||||
SELECT max(time) from cpu;
|
||||
|
||||
SELECT min(time) from cpu group by bar;
|
||||
SELECT bar, min(time) from cpu group by bar;
|
||||
-- Todo: Test not work in this framework. Exact same test works in sql.rs
|
||||
-- SELECT max(time) from cpu group by foo;
|
||||
|
||||
SELECT time from cpu;
|
||||
|
||||
|
|
|
@ -23,6 +23,24 @@
|
|||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MAX(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT min(time) from cpu group by bar;
|
||||
+--------------------------------+
|
||||
| MIN(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000020Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT bar, min(time) from cpu group by bar;
|
||||
+-----+--------------------------------+
|
||||
| bar | MIN(cpu.time) |
|
||||
+-----+--------------------------------+
|
||||
| 2 | 1970-01-01T00:00:00.000000020Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT count(time), max(time) from cpu;
|
||||
+-----------------+--------------------------------+
|
||||
| COUNT(cpu.time) | MAX(cpu.time) |
|
||||
|
|
|
@ -10,10 +10,9 @@ SELECT min(bar), max(bar) from cpu;
|
|||
|
||||
SELECT time from cpu;
|
||||
|
||||
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
|
||||
-- inconsistent format returned
|
||||
-- ignore for now
|
||||
-- SELECT max(time) from cpu;
|
||||
SELECT max(time) from cpu;
|
||||
SELECT min(time) from cpu group by bar;
|
||||
SELECT bar, min(time) from cpu group by bar;
|
||||
|
||||
SELECT count(time), max(time) from cpu;
|
||||
|
||||
|
|
|
@ -95,6 +95,25 @@
|
|||
+--------------+
|
||||
| you |
|
||||
+--------------+
|
||||
-- SQL: SELECT min(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MIN(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000040Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT max(time) from cpu;
|
||||
+--------------------------------+
|
||||
| MAX(cpu.time) |
|
||||
+--------------------------------+
|
||||
| 1970-01-01T00:00:00.000000080Z |
|
||||
+--------------------------------+
|
||||
-- SQL: SELECT foo, min(time) from cpu group by foo;
|
||||
+-----+--------------------------------+
|
||||
| foo | MIN(cpu.time) |
|
||||
+-----+--------------------------------+
|
||||
| me | 1970-01-01T00:00:00.000000040Z |
|
||||
| you | 1970-01-01T00:00:00.000000070Z |
|
||||
+-----+--------------------------------+
|
||||
-- SQL: SELECT time from cpu order by time;
|
||||
+--------------------------------+
|
||||
| time |
|
||||
|
@ -164,6 +183,12 @@
|
|||
+--------------+
|
||||
| 1 |
|
||||
+--------------+
|
||||
-- SQL: SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
|
||||
+--------------+
|
||||
| MAX(cpu.foo) |
|
||||
+--------------+
|
||||
| me |
|
||||
+--------------+
|
||||
-- SQL: SELECT min(time) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
|
||||
+--------------------------------+
|
||||
| MIN(cpu.time) |
|
||||
|
|
|
@ -25,10 +25,13 @@ SELECT foo from cpu order by foo;
|
|||
SELECT min(foo) from cpu;
|
||||
SELECT max(foo) from cpu;
|
||||
|
||||
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
|
||||
-- inconsistent format returned
|
||||
-- SELECT min(time) from cpu;
|
||||
-- SELECT max(time) from cpu;
|
||||
SELECT min(time) from cpu;
|
||||
SELECT max(time) from cpu;
|
||||
|
||||
SELECT foo, min(time) from cpu group by foo;
|
||||
-- Todo: Test not work in this framework
|
||||
-- SELECT bar, max(time) from cpu group by bar;
|
||||
-- SELECT max(time) from cpu group by bar;
|
||||
|
||||
SELECT time from cpu order by time;
|
||||
|
||||
|
@ -51,8 +54,7 @@ SELECT * from cpu where foo = 'you' and (bar > 3.0 or bar = 1) order by bar, foo
|
|||
|
||||
SELECT min(bar) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
|
||||
|
||||
-- BUG: https://github.com/influxdata/influxdb_iox/issues/2779
|
||||
-- SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
|
||||
SELECT max(foo) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
|
||||
|
||||
SELECT min(time) from cpu where foo = 'me' and (bar > 2 or bar = 1.0);
|
||||
|
||||
|
|
|
@ -99,9 +99,9 @@ impl DbSetup for OneDeleteMultiExprsOneChunk {
|
|||
let table_name = "cpu";
|
||||
// chunk data
|
||||
let lp_lines = vec![
|
||||
"cpu,foo=me bar=1 10",
|
||||
"cpu,foo=me bar=1 10", // deleted
|
||||
"cpu,foo=you bar=2 20",
|
||||
"cpu,foo=me bar=1 30",
|
||||
"cpu,foo=me bar=1 30", // deleted
|
||||
"cpu,foo=me bar=1 40",
|
||||
];
|
||||
// delete predicate
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
//! wired all the pieces together (as well as ensure any particularly
|
||||
//! important SQL does not regress)
|
||||
|
||||
use crate::scenarios;
|
||||
use crate::scenarios::{
|
||||
self,
|
||||
delete::{OneDeleteMultiExprsOneChunk, OneDeleteSimpleExprOneChunk},
|
||||
};
|
||||
|
||||
use super::scenarios::*;
|
||||
use arrow::record_batch::RecordBatch;
|
||||
|
@ -805,6 +808,151 @@ async fn sql_select_without_delete_min_foo() {
|
|||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_max_time_gb() {
|
||||
let expected = vec![
|
||||
"+--------------------------------+",
|
||||
"| MAX(cpu.time) |",
|
||||
"+--------------------------------+",
|
||||
"| 1970-01-01T00:00:00.000000020Z |",
|
||||
"+--------------------------------+",
|
||||
];
|
||||
run_sql_test_case(
|
||||
OneDeleteSimpleExprOneChunk {},
|
||||
"SELECT max(time) from cpu group by bar",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
|
||||
let expected = vec![
|
||||
"+-----+--------------------------------+",
|
||||
"| bar | MAX(cpu.time) |",
|
||||
"+-----+--------------------------------+",
|
||||
"| 2 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----+--------------------------------+",
|
||||
];
|
||||
|
||||
run_sql_test_case(
|
||||
OneDeleteSimpleExprOneChunk {},
|
||||
"SELECT bar, max(time) from cpu group by bar",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_max_time_gb_bar() {
|
||||
let expected = vec![
|
||||
"+--------------------------------+",
|
||||
"| MAX(cpu.time) |",
|
||||
"+--------------------------------+",
|
||||
"| 1970-01-01T00:00:00.000000020Z |",
|
||||
"| 1970-01-01T00:00:00.000000040Z |",
|
||||
"+--------------------------------+",
|
||||
];
|
||||
run_sql_test_case(
|
||||
OneDeleteMultiExprsOneChunk {},
|
||||
"SELECT max(time) from cpu group by bar",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_bar_max_time_gb_bar() {
|
||||
let expected = vec![
|
||||
"+-----+--------------------------------+",
|
||||
"| bar | MAX(cpu.time) |",
|
||||
"+-----+--------------------------------+",
|
||||
"| 1 | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| 2 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----+--------------------------------+",
|
||||
];
|
||||
|
||||
run_sql_test_case(
|
||||
OneDeleteMultiExprsOneChunk {},
|
||||
"SELECT bar, max(time) from cpu group by bar",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_max_time_gb_foo() {
|
||||
let expected = vec![
|
||||
"+--------------------------------+",
|
||||
"| MAX(cpu.time) |",
|
||||
"+--------------------------------+",
|
||||
"| 1970-01-01T00:00:00.000000020Z |",
|
||||
"| 1970-01-01T00:00:00.000000040Z |",
|
||||
"+--------------------------------+",
|
||||
];
|
||||
|
||||
run_sql_test_case(
|
||||
OneDeleteMultiExprsOneChunk {},
|
||||
"SELECT max(time) from cpu group by foo",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_time_max_time_gb_foo() {
|
||||
let expected = vec![
|
||||
"+-----+--------------------------------+",
|
||||
"| foo | MAX(cpu.time) |",
|
||||
"+-----+--------------------------------+",
|
||||
"| me | 1970-01-01T00:00:00.000000040Z |",
|
||||
"| you | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----+--------------------------------+",
|
||||
];
|
||||
|
||||
run_sql_test_case(
|
||||
OneDeleteMultiExprsOneChunk {},
|
||||
"SELECT foo, max(time) from cpu group by foo",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_min_foo_gb_time() {
|
||||
let expected = vec![
|
||||
"+--------------+",
|
||||
"| MIN(cpu.foo) |",
|
||||
"+--------------+",
|
||||
"| me |",
|
||||
"| you |",
|
||||
"+--------------+",
|
||||
];
|
||||
|
||||
run_sql_test_case(
|
||||
OneDeleteMultiExprsOneChunk {},
|
||||
"SELECT min(foo) from cpu group by time",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_select_time_max_foo_gb_time() {
|
||||
let expected = vec![
|
||||
"+--------------------------------+--------------+",
|
||||
"| time | MAX(cpu.foo) |",
|
||||
"+--------------------------------+--------------+",
|
||||
"| 1970-01-01T00:00:00.000000020Z | you |",
|
||||
"| 1970-01-01T00:00:00.000000040Z | me |",
|
||||
"+--------------------------------+--------------+",
|
||||
];
|
||||
|
||||
run_sql_test_case(
|
||||
OneDeleteMultiExprsOneChunk {},
|
||||
"SELECT time, max(foo) from cpu group by time",
|
||||
&expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_create_external_table() {
|
||||
let expected_error = "Unsupported logical plan: CreateExternalTable";
|
||||
|
|
Loading…
Reference in New Issue