From d8b06c59c46fed14ca10b2f7bd161b7909911828 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 02:03:15 +0000 Subject: [PATCH 1/9] chore(deps): Bump once_cell from 1.17.2 to 1.18.0 Bumps [once_cell](https://github.com/matklad/once_cell) from 1.17.2 to 1.18.0. - [Changelog](https://github.com/matklad/once_cell/blob/master/CHANGELOG.md) - [Commits](https://github.com/matklad/once_cell/compare/v1.17.2...v1.18.0) --- updated-dependencies: - dependency-name: once_cell dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- arrow_util/Cargo.toml | 2 +- executor/Cargo.toml | 2 +- garbage_collector/Cargo.toml | 2 +- influxdb2_client/Cargo.toml | 2 +- influxdb_iox/Cargo.toml | 2 +- ingester/Cargo.toml | 2 +- logfmt/Cargo.toml | 2 +- test_helpers_end_to_end/Cargo.toml | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe168dd63e..ff964c96e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3860,9 +3860,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.2" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" dependencies = [ "parking_lot_core 0.9.7", ] diff --git a/arrow_util/Cargo.toml b/arrow_util/Cargo.toml index 1f7bb4b3bd..6c8614a3e9 100644 --- a/arrow_util/Cargo.toml +++ b/arrow_util/Cargo.toml @@ -15,7 +15,7 @@ chrono = { version = "0.4", default-features = false } comfy-table = { version = "6.2", default-features = false } hashbrown = { workspace = true } num-traits = "0.2" -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } regex = "1.8.3" snafu = "0.7" uuid = "1" diff --git a/executor/Cargo.toml b/executor/Cargo.toml index e2a73c6529..f1c337fa8b 100644 --- a/executor/Cargo.toml +++ b/executor/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true [dependencies] futures = "0.3" observability_deps = { path = "../observability_deps" } -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" pin-project = "1.1" tokio = { version = "1.28" } diff --git a/garbage_collector/Cargo.toml b/garbage_collector/Cargo.toml index 6c319dfa02..b0fa415c86 100644 --- a/garbage_collector/Cargo.toml +++ b/garbage_collector/Cargo.toml @@ -28,6 +28,6 @@ bytes = "1.4" data_types = { path = "../data_types" } filetime = "0.2" metric = { path = "../metric" } -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parquet_file = { path = "../parquet_file" } tempfile = "3" diff --git a/influxdb2_client/Cargo.toml b/influxdb2_client/Cargo.toml index 335bacbc43..23d4356c67 100644 --- a/influxdb2_client/Cargo.toml +++ b/influxdb2_client/Cargo.toml @@ -17,7 +17,7 @@ uuid = { version = "1", features = ["v4"] } [dev-dependencies] # In alphabetical order mockito = "1.0" -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" tokio = { version = "1.28", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] } test_helpers = { path = "../test_helpers" } diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml index 1874731fe0..b9009464af 100644 --- a/influxdb_iox/Cargo.toml +++ b/influxdb_iox/Cargo.toml @@ -64,7 +64,7 @@ itertools = "0.10.5" lazy_static = "1.4.0" libc = { version = "0.2" } num_cpus = "1.15.0" -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } rustyline = { version = "11.0", default-features = false, features = ["with-file-history"]} serde_json = "1.0.96" snafu = "0.7" diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index e7c6fb1ea8..d1c1c041ac 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -29,7 +29,7 @@ metric = { version = "0.1.0", path = "../metric" } mutable_batch = { version = "0.1.0", path = "../mutable_batch" } mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" } observability_deps = { version = "0.1.0", path = "../observability_deps" } -once_cell = "1.17" +once_cell = "1.18" parking_lot = "0.12.1" parquet_file = { version = "0.1.0", path = "../parquet_file" } pin-project = "1.1.0" diff --git a/logfmt/Cargo.toml b/logfmt/Cargo.toml index 01b6d60b20..e7eceb0476 100644 --- a/logfmt/Cargo.toml +++ b/logfmt/Cargo.toml @@ -12,7 +12,7 @@ tracing-subscriber = "0.3" workspace-hack = { version = "0.1", path = "../workspace-hack" } [dev-dependencies] # In alphabetical order -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" regex = "1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/test_helpers_end_to_end/Cargo.toml b/test_helpers_end_to_end/Cargo.toml index 17e2518f6c..ec075dda2e 100644 --- a/test_helpers_end_to_end/Cargo.toml +++ b/test_helpers_end_to_end/Cargo.toml @@ -24,7 +24,7 @@ mutable_batch_lp = { path = "../mutable_batch_lp" } mutable_batch_pb = { path = "../mutable_batch_pb" } nix = "0.26" observability_deps = { path = "../observability_deps" } -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" prost = "0.11" rand = "0.8.3" From 8e61dc5aef678962ca78fde0a191f53e59fd5a2e Mon Sep 17 00:00:00 2001 From: Dom Dwyer Date: Wed, 31 May 2023 15:43:03 +0200 Subject: [PATCH 2/9] refactor: remove InvalidStrftime value It's big, it's annoying, it's already available to the user. --- mutable_batch/src/payload/partition.rs | 11 ++++++----- router/tests/grpc.rs | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mutable_batch/src/payload/partition.rs b/mutable_batch/src/payload/partition.rs index 34edb77c5a..4645644b3c 100644 --- a/mutable_batch/src/payload/partition.rs +++ b/mutable_batch/src/payload/partition.rs @@ -22,12 +22,13 @@ use crate::{ }; /// An error generating a partition key for a row. +#[allow(missing_copy_implementations)] #[derive(Debug, Error, PartialEq, Eq)] pub enum PartitionKeyError { /// The partition template defines a [`Template::TimeFormat`] part, but the /// provided strftime formatter is invalid. - #[error("invalid strftime format in partition template: {0}")] - InvalidStrftime(String), + #[error("invalid strftime format in partition template")] + InvalidStrftime, /// The partition template defines a [`Template::TagValue`] part, but the /// column type is not "tag". @@ -93,7 +94,7 @@ impl<'a> Template<'a> { Utc.timestamp_nanos(t[idx]) .format_with_items(format.clone()) // Cheap clone of refs ) - .map_err(|_| PartitionKeyError::InvalidStrftime(format!("{format:?}")))?; + .map_err(|_| PartitionKeyError::InvalidStrftime)?; out.write_str( Cow::from(utf8_percent_encode( @@ -474,7 +475,7 @@ mod tests { let ret = partition_keys(&batch, template.parts()).collect::, _>>(); - assert_matches!(ret, Err(PartitionKeyError::InvalidStrftime(_))); + assert_matches!(ret, Err(PartitionKeyError::InvalidStrftime)); } // These values are arbitrarily chosen when building an input to the @@ -605,7 +606,7 @@ mod tests { // properties: match ret { Ok(v) => { assert_eq!(v.len(), 1); }, - Err(e) => { assert_matches!(e, PartitionKeyError::InvalidStrftime(_)); }, + Err(e) => { assert_matches!(e, PartitionKeyError::InvalidStrftime); }, } } } diff --git a/router/tests/grpc.rs b/router/tests/grpc.rs index f015b543c1..df5cf29cc1 100644 --- a/router/tests/grpc.rs +++ b/router/tests/grpc.rs @@ -973,7 +973,7 @@ async fn test_invalid_strftime_partition_template() { assert_matches!( got, Err(Error::DmlHandler(DmlError::Partition( - PartitionError::Partitioner(PartitionKeyError::InvalidStrftime(_)) + PartitionError::Partitioner(PartitionKeyError::InvalidStrftime) ))) ); From ea3dcba308c4a14788479389698be26aa84faeee Mon Sep 17 00:00:00 2001 From: Dom Dwyer Date: Wed, 31 May 2023 15:52:28 +0200 Subject: [PATCH 3/9] perf: preallocate partition key strings Partition keys tend to be approximately the same size each time (and in the default case, always exactly the same size). This simple change reduces allocations by pre-sizing the next partition key string to match that of the previous. This should reduce the number of allocations needed to grow the string for ~10% throughput increase. --- mutable_batch/src/payload/partition.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mutable_batch/src/payload/partition.rs b/mutable_batch/src/payload/partition.rs index 4645644b3c..066c1aa91a 100644 --- a/mutable_batch/src/payload/partition.rs +++ b/mutable_batch/src/payload/partition.rs @@ -146,9 +146,18 @@ fn partition_keys<'a>( }) .collect::>(); + // Track the length of the last yielded partition key, and pre-allocate the + // next partition key string to match it. + // + // In the happy path, keys of consistent sizes are generated and the + // allocations reach a minimum. If the keys are inconsistent, at best a + // subset of allocations are eliminated, and at worst, a few bytes of memory + // is temporarily allocated until the resulting string is shrunk down. + let mut last_len = 5; + // Yield a partition key string for each row in `batch` (0..batch.row_count).map(move |idx| { - let mut string = String::new(); + let mut string = String::with_capacity(last_len); // Evaluate each template part for this row for (col_idx, col) in template.iter().enumerate() { @@ -161,6 +170,8 @@ fn partition_keys<'a>( } } + last_len = string.len(); + string.shrink_to_fit(); Ok(string) }) } From 9bf7dee6282b7783209ebfbf41b473a9b6ce0c78 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 09:34:15 +0000 Subject: [PATCH 4/9] chore(deps): Bump libc from 0.2.144 to 0.2.145 (#7921) Bumps [libc](https://github.com/rust-lang/libc) from 0.2.144 to 0.2.145. - [Release notes](https://github.com/rust-lang/libc/releases) - [Commits](https://github.com/rust-lang/libc/compare/0.2.144...0.2.145) --- updated-dependencies: - dependency-name: libc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Dom --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff964c96e9..6afe51c9b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3297,9 +3297,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.144" +version = "0.2.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "fc86cde3ff845662b8f4ef6cb50ea0e20c524eb3d29ae048287e06a1b3fa6a81" [[package]] name = "libm" From ee61e954bfbc1d77551146f0961fb73bad4d3ac5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 09:41:08 +0000 Subject: [PATCH 5/9] chore(deps): Bump flatbuffers from 23.1.21 to 23.5.26 (#7922) Bumps [flatbuffers](https://github.com/google/flatbuffers) from 23.1.21 to 23.5.26. - [Release notes](https://github.com/google/flatbuffers/releases) - [Changelog](https://github.com/google/flatbuffers/blob/master/CHANGELOG.md) - [Commits](https://github.com/google/flatbuffers/compare/v23.1.21...v23.5.26) --- updated-dependencies: - dependency-name: flatbuffers dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Dom Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- ingester/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6afe51c9b8..ff5c83ea96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1777,9 +1777,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "23.1.21" +version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ "bitflags", "rustc_version", diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index d1c1c041ac..5528c2ba7c 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -17,7 +17,7 @@ crossbeam-utils = "0.8.15" data_types = { version = "0.1.0", path = "../data_types" } datafusion.workspace = true dml = { version = "0.1.0", path = "../dml" } -flatbuffers = "23.1.21" +flatbuffers = "23.5.26" futures = "0.3.28" generated_types = { version = "0.1.0", path = "../generated_types" } hashbrown.workspace = true From 3f2960f0c759f38bc2d214c51b805ba6b4efb7e9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:24:42 +0000 Subject: [PATCH 6/9] chore(deps): Bump url from 2.3.1 to 2.4.0 (#7925) * chore(deps): Bump url from 2.3.1 to 2.4.0 Bumps [url](https://github.com/servo/rust-url) from 2.3.1 to 2.4.0. - [Release notes](https://github.com/servo/rust-url/releases) - [Commits](https://github.com/servo/rust-url/compare/v2.3.1...v2.4.0) --- updated-dependencies: - dependency-name: url dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * chore: Run cargo hakari tasks --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: CircleCI[bot] Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- Cargo.lock | 18 ++++++++++-------- datafusion_util/Cargo.toml | 2 +- influxdb2_client/Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++++ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff5c83ea96..639aa1fb7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1841,9 +1841,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -2388,9 +2388,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -4108,9 +4108,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pest" @@ -6311,9 +6311,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" dependencies = [ "form_urlencoded", "idna", @@ -6876,6 +6876,8 @@ dependencies = [ "tracing-core", "tracing-log", "tracing-subscriber", + "unicode-bidi", + "unicode-normalization", "url", "uuid 1.3.3", "webpki", diff --git a/datafusion_util/Cargo.toml b/datafusion_util/Cargo.toml index 3b70e903c7..4f45f61d15 100644 --- a/datafusion_util/Cargo.toml +++ b/datafusion_util/Cargo.toml @@ -15,7 +15,7 @@ observability_deps = { path = "../observability_deps" } pin-project = "1.1" tokio = { version = "1.28", features = ["parking_lot", "sync"] } tokio-stream = "0.1" -url = "2.2" +url = "2.4" workspace-hack = { version = "0.1", path = "../workspace-hack" } [dev-dependencies] diff --git a/influxdb2_client/Cargo.toml b/influxdb2_client/Cargo.toml index 23d4356c67..f8df7a436f 100644 --- a/influxdb2_client/Cargo.toml +++ b/influxdb2_client/Cargo.toml @@ -12,7 +12,7 @@ reqwest = { version = "0.11", default-features = false, features = ["stream", "j serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.96" snafu = "0.7" -url = "2.3.1" +url = "2.4.0" uuid = { version = "1", features = ["v4"] } [dev-dependencies] # In alphabetical order diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 657da5b335..aec7b077aa 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -91,6 +91,8 @@ tracing = { version = "0.1", features = ["log", "max_level_trace", "release_max_ tracing-core = { version = "0.1" } tracing-log = { version = "0.1" } tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "parking_lot"] } +unicode-bidi = { version = "0.3" } +unicode-normalization = { version = "0.1" } url = { version = "2" } uuid = { version = "1", features = ["v4"] } zstd = { version = "0.12" } @@ -150,6 +152,8 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-trai syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] } tokio = { version = "1", features = ["full", "test-util", "tracing"] } tokio-stream = { version = "0.1", features = ["fs", "net"] } +unicode-bidi = { version = "0.3" } +unicode-normalization = { version = "0.1" } url = { version = "2" } uuid = { version = "1", features = ["v4"] } From a0686c5f9535618d0016162aeba439bbd010c013 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 15:50:26 +0000 Subject: [PATCH 7/9] chore(deps): Bump regex from 1.8.3 to 1.8.4 (#7928) Bumps [regex](https://github.com/rust-lang/regex) from 1.8.3 to 1.8.4. - [Release notes](https://github.com/rust-lang/regex/releases) - [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/regex/compare/1.8.3...1.8.4) --- updated-dependencies: - dependency-name: regex dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- arrow_util/Cargo.toml | 2 +- service_grpc_influxrpc/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 639aa1fb7c..2ff16b977a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4707,9 +4707,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ca098a9821bd52d6b24fd8b10bd081f47d39c22778cafaa75a2857a62c6390" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ "aho-corasick", "memchr", diff --git a/arrow_util/Cargo.toml b/arrow_util/Cargo.toml index 6c8614a3e9..2b9ba58274 100644 --- a/arrow_util/Cargo.toml +++ b/arrow_util/Cargo.toml @@ -16,7 +16,7 @@ comfy-table = { version = "6.2", default-features = false } hashbrown = { workspace = true } num-traits = "0.2" once_cell = { version = "1.18", features = ["parking_lot"] } -regex = "1.8.3" +regex = "1.8.4" snafu = "0.7" uuid = "1" workspace-hack = { version = "0.1", path = "../workspace-hack" } diff --git a/service_grpc_influxrpc/Cargo.toml b/service_grpc_influxrpc/Cargo.toml index 112629d22c..9499baba29 100644 --- a/service_grpc_influxrpc/Cargo.toml +++ b/service_grpc_influxrpc/Cargo.toml @@ -25,7 +25,7 @@ arrow = { workspace = true, features = ["prettyprint"] } futures = "0.3" pin-project = "1.1" prost = "0.11" -regex = "1.8.3" +regex = "1.8.4" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.96" snafu = "0.7" From 566869aa30dcc2568aa8f86dbec1bef352458634 Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Mon, 5 Jun 2023 12:53:09 -0400 Subject: [PATCH 8/9] refactor: replace namespace with database for flight proto (#7910) * refactor: replace namespace with database for flight proto * chore: address review comments --------- Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- .../influxdata/iox/querier/v1/flight.proto | 5 +- influxdb_iox_client/src/client/flight/mod.rs | 12 +- service_grpc_flight/src/lib.rs | 2 +- service_grpc_flight/src/request.rs | 185 ++++++++++++------ 4 files changed, 136 insertions(+), 68 deletions(-) diff --git a/generated_types/protos/influxdata/iox/querier/v1/flight.proto b/generated_types/protos/influxdata/iox/querier/v1/flight.proto index d37040e8aa..adf544b16b 100644 --- a/generated_types/protos/influxdata/iox/querier/v1/flight.proto +++ b/generated_types/protos/influxdata/iox/querier/v1/flight.proto @@ -16,8 +16,9 @@ option go_package = "github.com/influxdata/iox/querier/v1"; * `GetFlightInfo` followed by a `DoGet`). */ message ReadInfo { - // Namespace name. - string namespace_name = 1; + // Database name + // This used to be namespace_name + string database = 1; // Query text (either SQL or InfluxQL, depending on query_type) string sql_query = 2; diff --git a/influxdb_iox_client/src/client/flight/mod.rs b/influxdb_iox_client/src/client/flight/mod.rs index 9d1ae22177..7180543783 100644 --- a/influxdb_iox_client/src/client/flight/mod.rs +++ b/influxdb_iox_client/src/client/flight/mod.rs @@ -185,15 +185,15 @@ impl Client { Ok(self.inner.add_header(key, value)?) } - /// Query the given namespace with the given SQL query, returning + /// Query the given database with the given SQL query, returning /// a struct that can stream Arrow [`RecordBatch`] results. pub async fn sql( &mut self, - namespace_name: impl Into + Send, + database: impl Into + Send, sql_query: impl Into + Send, ) -> Result { let request = ReadInfo { - namespace_name: namespace_name.into(), + database: database.into(), sql_query: sql_query.into(), query_type: QueryType::Sql.into(), flightsql_command: vec![], @@ -203,15 +203,15 @@ impl Client { self.do_get_with_read_info(request).await } - /// Query the given namespace with the given InfluxQL query, returning + /// Query the given database with the given InfluxQL query, returning /// a struct that can stream Arrow [`RecordBatch`] results. pub async fn influxql( &mut self, - namespace_name: impl Into + Send, + database: impl Into + Send, influxql_query: impl Into + Send, ) -> Result { let request = ReadInfo { - namespace_name: namespace_name.into(), + database: database.into(), sql_query: influxql_query.into(), query_type: QueryType::InfluxQl.into(), flightsql_command: vec![], diff --git a/service_grpc_flight/src/lib.rs b/service_grpc_flight/src/lib.rs index 0e5cf6cf9a..c29869425a 100644 --- a/service_grpc_flight/src/lib.rs +++ b/service_grpc_flight/src/lib.rs @@ -497,7 +497,7 @@ where }; let request = request?; - let namespace_name = request.namespace_name(); + let namespace_name = request.database(); let query = request.query(); is_debug |= request.is_debug(); diff --git a/service_grpc_flight/src/request.rs b/service_grpc_flight/src/request.rs index 8e8fa5d01f..fc4fe77bd8 100644 --- a/service_grpc_flight/src/request.rs +++ b/service_grpc_flight/src/request.rs @@ -47,11 +47,11 @@ pub type Result = std::result::Result; /// /// ## Example JSON Ticket format /// -/// This runs the SQL "SELECT 1" in namespace `my_db` +/// This runs the SQL "SELECT 1" in database `my_db` /// /// ```json /// { -/// "namespace_name": "my_db", +/// "database": "my_db", /// "sql_query": "SELECT 1;" /// } /// ``` @@ -60,7 +60,7 @@ pub type Result = std::result::Result; /// /// ```json /// { -/// "namespace_name": "my_db", +/// "database": "my_db", /// "sql_query": "SELECT 1;" /// "query_type": "sql" /// } @@ -70,14 +70,14 @@ pub type Result = std::result::Result; /// /// ```json /// { -/// "namespace_name": "my_db", +/// "database": "my_db", /// "sql_query": "SHOW DATABASES;" /// "query_type": "influxql" /// } /// ``` #[derive(Debug, PartialEq, Clone)] pub struct IoxGetRequest { - namespace_name: String, + database: String, query: RunQuery, is_debug: bool, } @@ -116,9 +116,9 @@ impl Display for RunQuery { impl IoxGetRequest { /// Create a new request to run the specified query - pub fn new(namespace_name: impl Into, query: RunQuery, is_debug: bool) -> Self { + pub fn new(database: impl Into, query: RunQuery, is_debug: bool) -> Self { Self { - namespace_name: namespace_name.into(), + database: database.into(), query, is_debug, } @@ -142,21 +142,21 @@ impl IoxGetRequest { /// Encode the request as a protobuf Ticket pub fn try_encode(self) -> Result { let Self { - namespace_name, + database, query, is_debug, } = self; let read_info = match query { RunQuery::Sql(sql_query) => proto::ReadInfo { - namespace_name, + database, sql_query, query_type: QueryType::Sql.into(), flightsql_command: vec![], is_debug, }, RunQuery::InfluxQL(influxql) => proto::ReadInfo { - namespace_name, + database, // field name is misleading sql_query: influxql, query_type: QueryType::InfluxQl.into(), @@ -164,7 +164,7 @@ impl IoxGetRequest { is_debug, }, RunQuery::FlightSQL(flightsql_command) => proto::ReadInfo { - namespace_name, + database, sql_query: "".into(), query_type: QueryType::FlightSqlMessage.into(), flightsql_command: flightsql_command @@ -189,8 +189,8 @@ impl IoxGetRequest { /// This represents ths JSON fields #[derive(Deserialize, Debug)] struct ReadInfoJson { - #[serde(alias = "database", alias = "bucket", alias = "bucket-name")] - namespace_name: String, + #[serde(alias = "namespace_name", alias = "bucket", alias = "bucket-name")] + database: String, sql_query: String, // If query type is not supplied, defaults to SQL query_type: Option, @@ -199,7 +199,7 @@ impl IoxGetRequest { } let ReadInfoJson { - namespace_name, + database, sql_query, query_type, is_debug, @@ -221,7 +221,7 @@ impl IoxGetRequest { }; Ok(Self { - namespace_name, + database, query, is_debug, }) @@ -233,7 +233,7 @@ impl IoxGetRequest { let query_type = read_info.query_type(); let proto::ReadInfo { - namespace_name, + database, sql_query, query_type: _, flightsql_command, @@ -241,7 +241,7 @@ impl IoxGetRequest { } = read_info; Ok(Self { - namespace_name, + database, query: match query_type { QueryType::Unspecified | QueryType::Sql => { if !flightsql_command.is_empty() { @@ -277,8 +277,8 @@ impl IoxGetRequest { }) } - pub fn namespace_name(&self) -> &str { - self.namespace_name.as_ref() + pub fn database(&self) -> &str { + self.database.as_ref() } pub fn query(&self) -> &RunQuery { @@ -306,10 +306,10 @@ mod tests { // - assert_eq!(query, "SELECT 1;")); } @@ -321,22 +321,22 @@ mod tests { } impl TestCase { - fn new_sql(json: &'static str, expected_namespace: &str, query: &str) -> Self { + fn new_sql(json: &'static str, expected_database: &str, query: &str) -> Self { Self { json, expected: IoxGetRequest { - namespace_name: String::from(expected_namespace), + database: String::from(expected_database), query: RunQuery::Sql(String::from(query)), is_debug: false, }, } } - fn new_influxql(json: &'static str, expected_namespace: &str, query: &str) -> Self { + fn new_influxql(json: &'static str, expected_database: &str, query: &str) -> Self { Self { json, expected: IoxGetRequest { - namespace_name: String::from(expected_namespace), + database: String::from(expected_database), query: RunQuery::InfluxQL(String::from(query)), is_debug: false, }, @@ -346,63 +346,130 @@ mod tests { let cases = vec![ // implict `query_type` + TestCase::new_sql( + r#"{"database": "my_db", "sql_query": "SELECT 1;"}"#, + "my_db", + "SELECT 1;", + ), TestCase::new_sql( r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;"}"#, "my_db", "SELECT 1;", ), + TestCase::new_sql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;"}"#, + "my_db", + "SELECT 1;", + ), // explicit query type, sql + TestCase::new_sql( + r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, + "my_db", + "SELECT 1;", + ), TestCase::new_sql( r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, "my_db", "SELECT 1;", ), + TestCase::new_sql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, + "my_db", + "SELECT 1;", + ), // explicit query type null + TestCase::new_sql( + r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, + "my_db", + "SELECT 1;", + ), TestCase::new_sql( r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, "my_db", "SELECT 1;", ), + TestCase::new_sql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, + "my_db", + "SELECT 1;", + ), // explicit query type, influxql + TestCase::new_influxql( + r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_influxql( + r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_influxql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_influxql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + // explicit query type, influxql on metadata + TestCase::new_influxql( + r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, + "my_otherdb", + "SHOW DATABASES;", + ), TestCase::new_influxql( r#"{"namespace_name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, "my_otherdb", "SHOW DATABASES;", ), - TestCase::new_influxql( - r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, - "my_otherdb", - "SHOW DATABASES;", - ), - // influxql bucket metadata TestCase::new_influxql( r#"{"bucket": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, "my_otherdb", "SHOW DATABASES;", ), - // influxql bucket-name metadata TestCase::new_influxql( r#"{"bucket-name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, "my_otherdb", "SHOW DATABASES;", ), - // sql database metadata + // explicit query type, sql on metadata TestCase::new_sql( - r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, - "my_db", - "SELECT 1;", + r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", ), - // sql bucket metadata TestCase::new_sql( - r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, - "my_db", - "SELECT 1;", + r#"{"namespace_name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", ), - // sql bucket-name metadata TestCase::new_sql( - r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, - "my_db", - "SELECT 1;", + r#"{"bucket": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", ), ]; @@ -446,7 +513,7 @@ mod tests { #[test] fn proto_ticket_decoding_unspecified() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::Unspecified.into(), flightsql_command: vec![], @@ -455,14 +522,14 @@ mod tests { // Reverts to default (unspecified) for invalid query_type enumeration, and thus SQL let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_sql() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::Sql.into(), flightsql_command: vec![], @@ -470,14 +537,14 @@ mod tests { }); let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_influxql() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::InfluxQl.into(), flightsql_command: vec![], @@ -485,14 +552,14 @@ mod tests { }); let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::InfluxQL(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_too_new() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".into(), query_type: 42, // not a known query type flightsql_command: vec![], @@ -501,14 +568,14 @@ mod tests { // Reverts to default (unspecified) for invalid query_type enumeration, and thus SQL let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_sql_too_many_fields() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::Sql.into(), // can't have both sql_query and flightsql @@ -523,7 +590,7 @@ mod tests { #[test] fn proto_ticket_decoding_influxql_too_many_fields() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::InfluxQl.into(), // can't have both sql_query and flightsql @@ -538,7 +605,7 @@ mod tests { #[test] fn proto_ticket_decoding_flightsql_too_many_fields() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::FlightSqlMessage.into(), // can't have both sql_query and flightsql @@ -564,7 +631,7 @@ mod tests { #[test] fn round_trip_sql() { let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::Sql("select * from bar".into()), is_debug: false, }; @@ -579,7 +646,7 @@ mod tests { #[test] fn round_trip_sql_is_debug() { let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::Sql("select * from bar".into()), is_debug: true, }; @@ -594,7 +661,7 @@ mod tests { #[test] fn round_trip_influxql() { let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::InfluxQL("select * from bar".into()), is_debug: false, }; @@ -613,7 +680,7 @@ mod tests { }); let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::FlightSQL(cmd), is_debug: false, }; From f571aeb4457b6436fc1e82b7292aa5f54101abe0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 5 Jun 2023 14:38:59 -0400 Subject: [PATCH 9/9] chore: Update DataFusion pin (#7916) * chore: Update DataFusion pin * chore: Update cargo * fix: update for API changes * fix: Update plans * chore: Update for new api * fix: Update plans * chore: Update for API changes more --------- Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- Cargo.lock | 25 ++-- Cargo.toml | 4 +- .../cases/in/date_bin.sql.expected | 122 +++++++++--------- .../cases/in/duplicates_parquet.sql.expected | 21 ++- .../query_tests/cases/in/gapfill.sql.expected | 8 +- iox_query/src/exec/gapfill/mod.rs | 4 +- .../src/logical_optimizer/handle_gapfill.rs | 14 +- .../physical_optimizer/predicate_pushdown.rs | 2 +- .../physical_optimizer/projection_pushdown.rs | 10 +- .../sort/parquet_sortness.rs | 24 ++-- iox_query/src/provider/physical.rs | 3 + iox_query/src/pruning.rs | 2 +- iox_query_influxql/src/plan/planner.rs | 122 ++++++++++++------ parquet_file/src/storage.rs | 2 +- parquet_to_line_protocol/src/lib.rs | 2 +- predicate/src/delete_expr.rs | 2 +- predicate/src/lib.rs | 1 + predicate/src/rpc_predicate/rewrite.rs | 2 +- workspace-hack/Cargo.toml | 7 +- 19 files changed, 217 insertions(+), 160 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ff16b977a..bbed8ec598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1382,7 +1382,7 @@ dependencies = [ [[package]] name = "datafusion" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "ahash 0.8.3", "arrow", @@ -1431,7 +1431,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "arrow-array", @@ -1445,7 +1445,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "dashmap", "datafusion-common", @@ -1462,18 +1462,21 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "ahash 0.8.3", "arrow", "datafusion-common", + "lazy_static", "sqlparser", + "strum", + "strum_macros", ] [[package]] name = "datafusion-optimizer" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "async-trait", @@ -1490,7 +1493,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "ahash 0.8.3", "arrow", @@ -1522,7 +1525,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "chrono", @@ -1536,7 +1539,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "datafusion-common", @@ -1547,7 +1550,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "arrow-schema", @@ -5573,6 +5576,9 @@ name = "strum" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros", +] [[package]] name = "strum_macros" @@ -6864,6 +6870,7 @@ dependencies = [ "sqlx", "sqlx-core", "sqlx-macros", + "strum", "syn 1.0.109", "syn 2.0.16", "thrift", diff --git a/Cargo.toml b/Cargo.toml index 2bbdc9de13..fe4dad8e3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -118,8 +118,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "40.0.0" } arrow-flight = { version = "40.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7" } hashbrown = { version = "0.13.2" } object_store = { version = "0.5.6" } parquet = { version = "40.0.0" } diff --git a/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected b/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected index f4ed971ead..dcbde0c76f 100644 --- a/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected +++ b/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected @@ -1,67 +1,67 @@ -- Test Setup: OneMeasurementTwoSeries -- SQL: select date_bin(INTERVAL '1 month', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq; -+---------------------------------------------------------------------------+ -| datebin(IntervalMonthDayNano("79228162514264337593543950336"),sq.column1) | -+---------------------------------------------------------------------------+ -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-02-01T00:00:00Z | -| 2022-02-01T00:00:00Z | -| 2022-03-01T00:00:00Z | -+---------------------------------------------------------------------------+ ++----------------------------------------------------------------------------+ +| date_bin(IntervalMonthDayNano("79228162514264337593543950336"),sq.column1) | ++----------------------------------------------------------------------------+ +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-02-01T00:00:00Z | +| 2022-02-01T00:00:00Z | +| 2022-03-01T00:00:00Z | ++----------------------------------------------------------------------------+ -- SQL: select date_bin('1 year', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2023-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq; -+------------------------------------+ -| datebin(Utf8("1 year"),sq.column1) | -+------------------------------------+ -| 2022-01-01T00:00:00Z | -| 2023-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -+------------------------------------+ ++-------------------------------------+ +| date_bin(Utf8("1 year"),sq.column1) | ++-------------------------------------+ +| 2022-01-01T00:00:00Z | +| 2023-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | ++-------------------------------------+ -- SQL: select date_bin('1 month', column1, '1970-12-31T00:15:00Z') from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq; -+------------------------------------------------------------------+ -| datebin(Utf8("1 month"),sq.column1,Utf8("1970-12-31T00:15:00Z")) | -+------------------------------------------------------------------+ -| 2021-12-31T00:15:00Z | -| 2021-12-31T00:15:00Z | -| 2021-12-31T00:15:00Z | -| 2022-01-31T00:15:00Z | -| 2022-01-31T00:15:00Z | -| 2022-02-28T00:15:00Z | -+------------------------------------------------------------------+ ++-------------------------------------------------------------------+ +| date_bin(Utf8("1 month"),sq.column1,Utf8("1970-12-31T00:15:00Z")) | ++-------------------------------------------------------------------+ +| 2021-12-31T00:15:00Z | +| 2021-12-31T00:15:00Z | +| 2021-12-31T00:15:00Z | +| 2022-01-31T00:15:00Z | +| 2022-01-31T00:15:00Z | +| 2022-02-28T00:15:00Z | ++-------------------------------------------------------------------+ -- SQL: SELECT DATE_BIN('5 month', '2022-01-01T00:00:00Z'); -+-------------------------------------------------------+ -| datebin(Utf8("5 month"),Utf8("2022-01-01T00:00:00Z")) | -+-------------------------------------------------------+ -| 2021-09-01T00:00:00Z | -+-------------------------------------------------------+ ++--------------------------------------------------------+ +| date_bin(Utf8("5 month"),Utf8("2022-01-01T00:00:00Z")) | ++--------------------------------------------------------+ +| 2021-09-01T00:00:00Z | ++--------------------------------------------------------+ -- SQL: SELECT DATE_BIN('3 month', '2022-04-01T00:00:00Z', '2021-05-31T00:04:00Z'); -+------------------------------------------------------------------------------------+ -| datebin(Utf8("3 month"),Utf8("2022-04-01T00:00:00Z"),Utf8("2021-05-31T00:04:00Z")) | -+------------------------------------------------------------------------------------+ -| 2022-02-28T00:04:00Z | -+------------------------------------------------------------------------------------+ ++-------------------------------------------------------------------------------------+ +| date_bin(Utf8("3 month"),Utf8("2022-04-01T00:00:00Z"),Utf8("2021-05-31T00:04:00Z")) | ++-------------------------------------------------------------------------------------+ +| 2022-02-28T00:04:00Z | ++-------------------------------------------------------------------------------------+ -- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-02-29T00:00:00'); -+----------------------------------------------------------------------------------+ -| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-02-29T00:00:00")) | -+----------------------------------------------------------------------------------+ -| 2000-01-29T00:00:00Z | -+----------------------------------------------------------------------------------+ ++-----------------------------------------------------------------------------------+ +| date_bin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-02-29T00:00:00")) | ++-----------------------------------------------------------------------------------+ +| 2000-01-29T00:00:00Z | ++-----------------------------------------------------------------------------------+ -- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-03-29T00:00:00'); -+----------------------------------------------------------------------------------+ -| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-03-29T00:00:00")) | -+----------------------------------------------------------------------------------+ -| 2000-01-29T00:00:00Z | -+----------------------------------------------------------------------------------+ ++-----------------------------------------------------------------------------------+ +| date_bin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-03-29T00:00:00")) | ++-----------------------------------------------------------------------------------+ +| 2000-01-29T00:00:00Z | ++-----------------------------------------------------------------------------------+ -- SQL: SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z'); -+----------------------------------------------------------------+ -| datebin(Utf8("3 years 1 months"),Utf8("2022-09-01 00:00:00Z")) | -+----------------------------------------------------------------+ -| 2022-06-01T00:00:00Z | -+----------------------------------------------------------------+ ++-----------------------------------------------------------------+ +| date_bin(Utf8("3 years 1 months"),Utf8("2022-09-01 00:00:00Z")) | ++-----------------------------------------------------------------+ +| 2022-06-01T00:00:00Z | ++-----------------------------------------------------------------+ -- SQL: SELECT * FROM cpu ORDER BY REGION, TIME; +------+--------+----------------------+------+ | idle | region | time | user | @@ -103,14 +103,14 @@ ---------- | plan_type | plan | ---------- -| logical_plan | Projection: datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z")) AS month, COUNT(cpu.user) | -| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("79228162514264337593543950336"), cpu.time, TimestampNanosecond(31450500000000000, None)) AS datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))]], aggr=[[COUNT(cpu.user)]] | +| logical_plan | Projection: date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z")) AS month, COUNT(cpu.user) | +| | Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("79228162514264337593543950336"), cpu.time, TimestampNanosecond(31450500000000000, None)) AS date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))]], aggr=[[COUNT(cpu.user)]] | | | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] | -| physical_plan | ProjectionExec: expr=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as month, COUNT(cpu.user)@1 as COUNT(cpu.user)] | -| | AggregateExec: mode=FinalPartitioned, gby=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | +| physical_plan | ProjectionExec: expr=[date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as month, COUNT(cpu.user)@1 as COUNT(cpu.user)] | +| | AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | | | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "datebin(Utf8(\"1 month\"),cpu.time,Utf8(\"1970-12-31T00:15:00Z\"))", index: 0 }], 4), input_partitions=4 | -| | AggregateExec: mode=Partial, gby=[datebin(79228162514264337593543950336, time@0, 31450500000000000) as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | +| | RepartitionExec: partitioning=Hash([Column { name: "date_bin(Utf8(\"1 month\"),cpu.time,Utf8(\"1970-12-31T00:15:00Z\"))", index: 0 }], 4), input_partitions=4 | +| | AggregateExec: mode=Partial, gby=[date_bin(79228162514264337593543950336, time@0, 31450500000000000) as date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 | diff --git a/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected index 20b0c3d3d7..b4d20b99fe 100644 --- a/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected +++ b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected @@ -96,16 +96,15 @@ ---------- | plan_type | plan | ---------- -| Plan with Metrics | CoalescePartitionsExec, metrics=[elapsed_compute=1.234ms, output_rows=10] | -| | UnionExec, metrics=[elapsed_compute=1.234ms, output_rows=10] | -| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=5] | -| | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, output_rows=5] | -| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1683, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | -| | ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, output_rows=5] | -| | DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, num_dupes=2, output_rows=5] | -| | SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, output_rows=7] | -| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=7] | -| | FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, output_rows=7] | -| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1532, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | +| Plan with Metrics | UnionExec, metrics=[elapsed_compute=1.234ms, output_rows=10] | +| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=5] | +| | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, output_rows=5] | +| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1683, elapsed_compute=1.234ms, file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | +| | ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, output_rows=5] | +| | DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, num_dupes=2, output_rows=5] | +| | SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, output_rows=7] | +| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=7] | +| | FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, output_rows=7] | +| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1532, elapsed_compute=1.234ms, file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | | | | ---------- \ No newline at end of file diff --git a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected index 3b33e02a8d..073c498494 100644 --- a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected +++ b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected @@ -24,7 +24,7 @@ ---------- | logical_plan | Projection: date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, COUNT(cpu.user) | | | GapFill: groupBy=[[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) | -| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]] | +| | Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]] | | | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] | | physical_plan | ProjectionExec: expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0 as minute, COUNT(cpu.user)@1 as COUNT(cpu.user)] | | | GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0], aggr_expr=[COUNT(cpu.user)@1], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") | @@ -33,7 +33,7 @@ | | AggregateExec: mode=FinalPartitioned, gby=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] | | | CoalesceBatchesExec: target_batch_size=8192 | | | RepartitionExec: partitioning=Hash([Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time)", index: 0 }], 4), input_partitions=4 | -| | AggregateExec: mode=Partial, gby=[datebin(600000000000, time@0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] | +| | AggregateExec: mode=Partial, gby=[date_bin(600000000000, time@0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] | | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 | @@ -117,7 +117,7 @@ Error during planning: gap-filling query is missing lower time bound ---------- | logical_plan | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, AVG(cpu.user) AS locf(AVG(cpu.user)) | | | GapFill: groupBy=[[cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[LOCF(AVG(cpu.user))]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) | -| | Aggregate: groupBy=[[cpu.region, datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]] | +| | Aggregate: groupBy=[[cpu.region, date_bin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]] | | | TableScan: cpu projection=[region, time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] | | physical_plan | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as minute, AVG(cpu.user)@2 as locf(AVG(cpu.user))] | | | GapFillExec: group_expr=[region@0, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1], aggr_expr=[LOCF(AVG(cpu.user)@2)], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") | @@ -126,7 +126,7 @@ Error during planning: gap-filling query is missing lower time bound | | AggregateExec: mode=FinalPartitioned, gby=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)] | | | CoalesceBatchesExec: target_batch_size=8192 | | | RepartitionExec: partitioning=Hash([Column { name: "region", index: 0 }, Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time)", index: 1 }], 4), input_partitions=4 | -| | AggregateExec: mode=Partial, gby=[region@0 as region, datebin(600000000000, time@1) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)], ordering_mode=PartiallyOrdered | +| | AggregateExec: mode=Partial, gby=[region@0 as region, date_bin(600000000000, time@1) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)], ordering_mode=PartiallyOrdered | | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: time@1 >= 957528000000000000 AND time@1 <= 957531540000000000 | diff --git a/iox_query/src/exec/gapfill/mod.rs b/iox_query/src/exec/gapfill/mod.rs index 8d88d2d13d..2f32d32758 100644 --- a/iox_query/src/exec/gapfill/mod.rs +++ b/iox_query/src/exec/gapfill/mod.rs @@ -775,7 +775,7 @@ mod test { - " GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0], aggr_expr=[AVG(temps.temp)@1], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")" - " SortExec: expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 ASC]" - " AggregateExec: mode=Final, gby=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]" - - " AggregateExec: mode=Partial, gby=[datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]" + - " AggregateExec: mode=Partial, gby=[date_bin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]" - " EmptyExec: produce_one_row=false" "### ); @@ -805,7 +805,7 @@ mod test { - " GapFillExec: group_expr=[loc@0, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1, concat(Utf8(\"zz\"),temps.loc)@2], aggr_expr=[AVG(temps.temp)@3], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")" - " SortExec: expr=[loc@0 ASC,concat(Utf8(\"zz\"),temps.loc)@2 ASC,date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 ASC]" - " AggregateExec: mode=Final, gby=[loc@0 as loc, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(Utf8(\"zz\"),temps.loc)@2 as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]" - - " AggregateExec: mode=Partial, gby=[loc@1 as loc, datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]" + - " AggregateExec: mode=Partial, gby=[loc@1 as loc, date_bin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]" - " EmptyExec: produce_one_row=false" "### ); diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs index 50be1a52b3..1973877b3a 100644 --- a/iox_query/src/logical_optimizer/handle_gapfill.rs +++ b/iox_query/src/logical_optimizer/handle_gapfill.rs @@ -860,7 +860,7 @@ mod test { @r###" --- - "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -890,7 +890,7 @@ mod test { @r###" --- - "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None)), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time, TimestampNanosecond(7, None)) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time, TimestampNanosecond(7, None)) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -919,7 +919,7 @@ mod test { @r###" --- - "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -971,7 +971,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp)" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -1006,7 +1006,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS locf(AVG(temps.temp)), MIN(temps.temp) AS locf(MIN(temps.temp))" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[LOCF(AVG(temps.temp)), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -1040,7 +1040,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), MIN(temps.temp) AS locf(MIN(temps.temp)) AS locf_min_temp" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -1075,7 +1075,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS interpolate(AVG(temps.temp)), MIN(temps.temp) AS interpolate(MIN(temps.temp))" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[INTERPOLATE(AVG(temps.temp)), INTERPOLATE(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); diff --git a/iox_query/src/physical_optimizer/predicate_pushdown.rs b/iox_query/src/physical_optimizer/predicate_pushdown.rs index d55eea9ffa..117b5bae25 100644 --- a/iox_query/src/physical_optimizer/predicate_pushdown.rs +++ b/iox_query/src/physical_optimizer/predicate_pushdown.rs @@ -314,7 +314,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let plan = Arc::new( diff --git a/iox_query/src/physical_optimizer/projection_pushdown.rs b/iox_query/src/physical_optimizer/projection_pushdown.rs index 9e608eceee..4cde9e3c4e 100644 --- a/iox_query/src/physical_optimizer/projection_pushdown.rs +++ b/iox_query/src/physical_optimizer/projection_pushdown.rs @@ -97,11 +97,11 @@ impl PhysicalOptimizerRule for ProjectionPushdown { let output_ordering = child_parquet .base_config() .output_ordering - .as_ref() + .iter() .map(|output_ordering| { project_output_ordering(output_ordering, projection_exec.schema()) }) - .transpose()?; + .collect::>()?; let base_config = FileScanConfig { projection: Some(projection), output_ordering, @@ -729,7 +729,7 @@ mod tests { projection: Some(projection), limit: None, table_partition_cols: vec![], - output_ordering: Some(vec![ + output_ordering: vec![vec![ PhysicalSortExpr { expr: expr_col("tag3", &schema_projected), options: Default::default(), @@ -742,7 +742,7 @@ mod tests { expr: expr_col("tag2", &schema_projected), options: Default::default(), }, - ]), + ]], infinite_source: false, }; let inner = ParquetExec::new(base_config, Some(expr_string_cmp("tag1", &schema)), None); @@ -1330,7 +1330,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); diff --git a/iox_query/src/physical_optimizer/sort/parquet_sortness.rs b/iox_query/src/physical_optimizer/sort/parquet_sortness.rs index 8ae4ce9271..31c2affbab 100644 --- a/iox_query/src/physical_optimizer/sort/parquet_sortness.rs +++ b/iox_query/src/physical_optimizer/sort/parquet_sortness.rs @@ -126,7 +126,7 @@ impl<'a> TreeNodeRewriter for ParquetSortnessRewriter<'a> { }; let base_config = parquet_exec.base_config(); - if base_config.output_ordering.is_none() { + if base_config.output_ordering.is_empty() { // no output ordering requested return Ok(node); } @@ -207,7 +207,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -242,7 +242,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)), + output_ordering: vec![ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -278,7 +278,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -320,7 +320,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -355,7 +355,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col1", "col2"], &schema)), + output_ordering: vec![ordering(["col1", "col2"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -390,7 +390,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -425,7 +425,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -489,7 +489,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); @@ -518,7 +518,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col1", "col2"], &schema)), + output_ordering: vec![ordering(["col1", "col2"], &schema)], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); @@ -555,7 +555,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col1", "col2"], &schema)), + output_ordering: vec![ordering(["col1", "col2"], &schema)], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); @@ -593,7 +593,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)), + output_ordering: vec![ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)], infinite_source: false, }; let plan_parquet = Arc::new(ParquetExec::new(base_config, None, None)); diff --git a/iox_query/src/provider/physical.rs b/iox_query/src/provider/physical.rs index 48e9fb24ba..4ba7345e7c 100644 --- a/iox_query/src/provider/physical.rs +++ b/iox_query/src/provider/physical.rs @@ -301,6 +301,9 @@ pub fn chunks_to_physical_nodes( is_exact: false, }; + // No sort order is represented by an empty Vec + let output_ordering = vec![output_ordering.unwrap_or_default()]; + let base_config = FileScanConfig { object_store_url, file_schema, diff --git a/iox_query/src/pruning.rs b/iox_query/src/pruning.rs index 21514fba42..5e1b5d0b22 100644 --- a/iox_query/src/pruning.rs +++ b/iox_query/src/pruning.rs @@ -210,7 +210,7 @@ fn get_aggregate(stats: &ColumnStatistics, aggregate: Aggregate) -> Option<&Scal #[cfg(test)] mod test { - use std::sync::Arc; + use std::{ops::Not, sync::Arc}; use datafusion::prelude::{col, lit}; use datafusion_util::lit_dict; diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs index fe0c8b9ed0..b214085ab0 100644 --- a/iox_query_influxql/src/plan/planner.rs +++ b/iox_query_influxql/src/plan/planner.rs @@ -2592,7 +2592,7 @@ mod test { Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N] Filter: all_types.time >= now() - IntervalMonthDayNano("86400000000000") [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] Filter: Boolean(false) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] @@ -2600,7 +2600,7 @@ mod test { Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: cpu.time >= now() - IntervalMonthDayNano("86400000000000") [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Filter: Boolean(false) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] @@ -2608,7 +2608,7 @@ mod test { Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N] Filter: data.time >= now() - IntervalMonthDayNano("86400000000000") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] @@ -2616,7 +2616,7 @@ mod test { Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: disk.time >= now() - IntervalMonthDayNano("86400000000000") [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Filter: Boolean(false) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] @@ -2624,7 +2624,7 @@ mod test { Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N] Filter: diskio.time >= now() - IntervalMonthDayNano("86400000000000") [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] Filter: Boolean(false) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] @@ -2632,7 +2632,7 @@ mod test { Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N] Filter: merge_00.time >= now() - IntervalMonthDayNano("86400000000000") [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] Filter: Boolean(false) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] @@ -2640,7 +2640,7 @@ mod test { Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N] Filter: merge_01.time >= now() - IntervalMonthDayNano("86400000000000") [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] Filter: Boolean(false) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] @@ -2648,7 +2648,7 @@ mod test { Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_01.time >= now() - IntervalMonthDayNano("86400000000000") [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Filter: Boolean(false) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] @@ -2656,7 +2656,7 @@ mod test { Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_02.time >= now() - IntervalMonthDayNano("86400000000000") [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Filter: Boolean(false) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] @@ -2664,7 +2664,7 @@ mod test { Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_03.time >= now() - IntervalMonthDayNano("86400000000000") [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Filter: Boolean(false) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] @@ -2676,70 +2676,70 @@ mod test { Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N] Filter: all_types.time > TimestampNanosecond(1337, None) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: cpu.time > TimestampNanosecond(1337, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N] Filter: data.time > TimestampNanosecond(1337, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: disk.time > TimestampNanosecond(1337, None) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N] Filter: diskio.time > TimestampNanosecond(1337, None) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N] Filter: merge_00.time > TimestampNanosecond(1337, None) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N] Filter: merge_01.time > TimestampNanosecond(1337, None) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_01.time > TimestampNanosecond(1337, None) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_02.time > TimestampNanosecond(1337, None) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_03.time > TimestampNanosecond(1337, None) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] @@ -2836,7 +2836,7 @@ mod test { Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time AS time, value AS value [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, value:Float64;N] Sort: time ASC NULLS LAST [time:Timestamp(Nanosecond, None);N, value:Float64;N] Projection: time, AVG(cpu.usage_idle) AS value [time:Timestamp(Nanosecond, None);N, value:Float64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); } @@ -3016,7 +3016,7 @@ mod test { Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N] GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); @@ -3025,7 +3025,7 @@ mod test { Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time), Struct({value:Float64(0),time:TimestampNanosecond(0, None)})))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N] GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); @@ -3601,7 +3601,7 @@ mod test { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); @@ -3609,102 +3609,137 @@ mod test { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } #[test] - fn group_by_time_gapfill() { + fn group_by_time_gapfill_no_bounds() { // No time bounds assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_no_lower_time_bounds() { // No lower time bounds assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(TimestampNanosecond(1667181720000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Filter: data.time < TimestampNanosecond(1667181720000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_no_upper_time_bounds() { // No upper time bounds assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Filter: data.time >= TimestampNanosecond(1667181600000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_defaul_is_fill_null1() { // Default is FILL(null) assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Excluded(TimestampNanosecond(1667181720000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time < TimestampNanosecond(1667181720000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null1() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null2() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null3() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null4() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(0)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null5() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_coalesces_the_fill_value() { // Coalesces the fill value, which is a float, to the matching type of a `COUNT` aggregate. assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_aggregates_part_of_binary_expression() { // Aggregates as part of a binary expression assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } @@ -3721,6 +3756,10 @@ mod test { Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn with_limit_or_offset2() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo OFFSET 1"), @r###" Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: iox::measurement, time, foo, count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] @@ -3731,6 +3770,10 @@ mod test { Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn with_limit_or_offset3() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo LIMIT 2 OFFSET 3"), @r###" Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: iox::measurement, time, foo, count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] @@ -3741,7 +3784,10 @@ mod test { Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn with_limit_or_offset_errors() { // Fallible // returns an error if LIMIT or OFFSET values exceed i64::MAX @@ -3761,7 +3807,7 @@ mod test { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } diff --git a/parquet_file/src/storage.rs b/parquet_file/src/storage.rs index 290326c63d..6e7c32ddf3 100644 --- a/parquet_file/src/storage.rs +++ b/parquet_file/src/storage.rs @@ -135,7 +135,7 @@ impl ParquetExecInput { limit: None, table_partition_cols: vec![], // Parquet files ARE actually sorted but we don't care here since we just construct a `collect` plan. - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let exec = ParquetExec::new(base_config, None, None); diff --git a/parquet_to_line_protocol/src/lib.rs b/parquet_to_line_protocol/src/lib.rs index 6845e5255e..a0522d66c2 100644 --- a/parquet_to_line_protocol/src/lib.rs +++ b/parquet_to_line_protocol/src/lib.rs @@ -253,7 +253,7 @@ impl ParquetFileReader { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; diff --git a/predicate/src/delete_expr.rs b/predicate/src/delete_expr.rs index 21571a1cce..de7b191646 100644 --- a/predicate/src/delete_expr.rs +++ b/predicate/src/delete_expr.rs @@ -126,7 +126,7 @@ pub(crate) fn df_to_scalar( #[cfg(test)] mod tests { - use std::sync::Arc; + use std::{ops::Not, sync::Arc}; use arrow::datatypes::Field; use test_helpers::assert_contains; diff --git a/predicate/src/lib.rs b/predicate/src/lib.rs index 9734b40900..b7a79df759 100644 --- a/predicate/src/lib.rs +++ b/predicate/src/lib.rs @@ -34,6 +34,7 @@ use schema::TIME_COLUMN_NAME; use std::{ collections::{BTreeSet, HashSet}, fmt, + ops::Not, }; /// This `Predicate` represents the empty predicate (aka that evaluates to true for all rows). diff --git a/predicate/src/rpc_predicate/rewrite.rs b/predicate/src/rpc_predicate/rewrite.rs index fa4f946cca..69cf116ec1 100644 --- a/predicate/src/rpc_predicate/rewrite.rs +++ b/predicate/src/rpc_predicate/rewrite.rs @@ -255,7 +255,7 @@ fn is_col_op_lit(expr: &Expr) -> Option<&str> { #[cfg(test)] mod tests { - use std::ops::Add; + use std::ops::{Add, Not}; use super::*; use arrow::datatypes::DataType; diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index aec7b077aa..c8370fc133 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -30,9 +30,9 @@ bytes = { version = "1" } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] } crossbeam-utils = { version = "0.8" } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } -datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1" } fixedbitset = { version = "0.4" } @@ -81,6 +81,7 @@ smallvec = { version = "1", default-features = false, features = ["union"] } sqlparser = { version = "0.34", features = ["visitor"] } sqlx = { version = "0.6", features = ["json", "postgres", "runtime-tokio-rustls", "sqlite", "tls", "uuid"] } sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] } +strum = { version = "0.24", features = ["derive"] } thrift = { version = "0.17" } tokio = { version = "1", features = ["full", "test-util", "tracing"] } tokio-stream = { version = "0.1", features = ["fs", "net"] }