diff --git a/Cargo.lock b/Cargo.lock index fe168dd63e..bbed8ec598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1382,7 +1382,7 @@ dependencies = [ [[package]] name = "datafusion" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "ahash 0.8.3", "arrow", @@ -1431,7 +1431,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "arrow-array", @@ -1445,7 +1445,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "dashmap", "datafusion-common", @@ -1462,18 +1462,21 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "ahash 0.8.3", "arrow", "datafusion-common", + "lazy_static", "sqlparser", + "strum", + "strum_macros", ] [[package]] name = "datafusion-optimizer" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "async-trait", @@ -1490,7 +1493,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "ahash 0.8.3", "arrow", @@ -1522,7 +1525,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "chrono", @@ -1536,7 +1539,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "datafusion-common", @@ -1547,7 +1550,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "25.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7" dependencies = [ "arrow", "arrow-schema", @@ -1777,9 +1780,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "23.1.21" +version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ "bitflags", "rustc_version", @@ -1841,9 +1844,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -2388,9 +2391,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -3297,9 +3300,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.144" +version = "0.2.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "fc86cde3ff845662b8f4ef6cb50ea0e20c524eb3d29ae048287e06a1b3fa6a81" [[package]] name = "libm" @@ -3860,9 +3863,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.2" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" dependencies = [ "parking_lot_core 0.9.7", ] @@ -4108,9 +4111,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pest" @@ -4707,9 +4710,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ca098a9821bd52d6b24fd8b10bd081f47d39c22778cafaa75a2857a62c6390" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ "aho-corasick", "memchr", @@ -5573,6 +5576,9 @@ name = "strum" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros", +] [[package]] name = "strum_macros" @@ -6311,9 +6317,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" dependencies = [ "form_urlencoded", "idna", @@ -6864,6 +6870,7 @@ dependencies = [ "sqlx", "sqlx-core", "sqlx-macros", + "strum", "syn 1.0.109", "syn 2.0.16", "thrift", @@ -6876,6 +6883,8 @@ dependencies = [ "tracing-core", "tracing-log", "tracing-subscriber", + "unicode-bidi", + "unicode-normalization", "url", "uuid 1.3.3", "webpki", diff --git a/Cargo.toml b/Cargo.toml index 2bbdc9de13..fe4dad8e3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -118,8 +118,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "40.0.0" } arrow-flight = { version = "40.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7" } hashbrown = { version = "0.13.2" } object_store = { version = "0.5.6" } parquet = { version = "40.0.0" } diff --git a/arrow_util/Cargo.toml b/arrow_util/Cargo.toml index 1f7bb4b3bd..2b9ba58274 100644 --- a/arrow_util/Cargo.toml +++ b/arrow_util/Cargo.toml @@ -15,8 +15,8 @@ chrono = { version = "0.4", default-features = false } comfy-table = { version = "6.2", default-features = false } hashbrown = { workspace = true } num-traits = "0.2" -once_cell = { version = "1.17", features = ["parking_lot"] } -regex = "1.8.3" +once_cell = { version = "1.18", features = ["parking_lot"] } +regex = "1.8.4" snafu = "0.7" uuid = "1" workspace-hack = { version = "0.1", path = "../workspace-hack" } diff --git a/datafusion_util/Cargo.toml b/datafusion_util/Cargo.toml index 3b70e903c7..4f45f61d15 100644 --- a/datafusion_util/Cargo.toml +++ b/datafusion_util/Cargo.toml @@ -15,7 +15,7 @@ observability_deps = { path = "../observability_deps" } pin-project = "1.1" tokio = { version = "1.28", features = ["parking_lot", "sync"] } tokio-stream = "0.1" -url = "2.2" +url = "2.4" workspace-hack = { version = "0.1", path = "../workspace-hack" } [dev-dependencies] diff --git a/executor/Cargo.toml b/executor/Cargo.toml index e2a73c6529..f1c337fa8b 100644 --- a/executor/Cargo.toml +++ b/executor/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true [dependencies] futures = "0.3" observability_deps = { path = "../observability_deps" } -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" pin-project = "1.1" tokio = { version = "1.28" } diff --git a/garbage_collector/Cargo.toml b/garbage_collector/Cargo.toml index 6c319dfa02..b0fa415c86 100644 --- a/garbage_collector/Cargo.toml +++ b/garbage_collector/Cargo.toml @@ -28,6 +28,6 @@ bytes = "1.4" data_types = { path = "../data_types" } filetime = "0.2" metric = { path = "../metric" } -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parquet_file = { path = "../parquet_file" } tempfile = "3" diff --git a/generated_types/protos/influxdata/iox/querier/v1/flight.proto b/generated_types/protos/influxdata/iox/querier/v1/flight.proto index d37040e8aa..adf544b16b 100644 --- a/generated_types/protos/influxdata/iox/querier/v1/flight.proto +++ b/generated_types/protos/influxdata/iox/querier/v1/flight.proto @@ -16,8 +16,9 @@ option go_package = "github.com/influxdata/iox/querier/v1"; * `GetFlightInfo` followed by a `DoGet`). */ message ReadInfo { - // Namespace name. - string namespace_name = 1; + // Database name + // This used to be namespace_name + string database = 1; // Query text (either SQL or InfluxQL, depending on query_type) string sql_query = 2; diff --git a/influxdb2_client/Cargo.toml b/influxdb2_client/Cargo.toml index 335bacbc43..f8df7a436f 100644 --- a/influxdb2_client/Cargo.toml +++ b/influxdb2_client/Cargo.toml @@ -12,12 +12,12 @@ reqwest = { version = "0.11", default-features = false, features = ["stream", "j serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.96" snafu = "0.7" -url = "2.3.1" +url = "2.4.0" uuid = { version = "1", features = ["v4"] } [dev-dependencies] # In alphabetical order mockito = "1.0" -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" tokio = { version = "1.28", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] } test_helpers = { path = "../test_helpers" } diff --git a/influxdb_iox/Cargo.toml b/influxdb_iox/Cargo.toml index 1874731fe0..b9009464af 100644 --- a/influxdb_iox/Cargo.toml +++ b/influxdb_iox/Cargo.toml @@ -64,7 +64,7 @@ itertools = "0.10.5" lazy_static = "1.4.0" libc = { version = "0.2" } num_cpus = "1.15.0" -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } rustyline = { version = "11.0", default-features = false, features = ["with-file-history"]} serde_json = "1.0.96" snafu = "0.7" diff --git a/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected b/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected index f4ed971ead..dcbde0c76f 100644 --- a/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected +++ b/influxdb_iox/tests/query_tests/cases/in/date_bin.sql.expected @@ -1,67 +1,67 @@ -- Test Setup: OneMeasurementTwoSeries -- SQL: select date_bin(INTERVAL '1 month', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq; -+---------------------------------------------------------------------------+ -| datebin(IntervalMonthDayNano("79228162514264337593543950336"),sq.column1) | -+---------------------------------------------------------------------------+ -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-02-01T00:00:00Z | -| 2022-02-01T00:00:00Z | -| 2022-03-01T00:00:00Z | -+---------------------------------------------------------------------------+ ++----------------------------------------------------------------------------+ +| date_bin(IntervalMonthDayNano("79228162514264337593543950336"),sq.column1) | ++----------------------------------------------------------------------------+ +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-02-01T00:00:00Z | +| 2022-02-01T00:00:00Z | +| 2022-03-01T00:00:00Z | ++----------------------------------------------------------------------------+ -- SQL: select date_bin('1 year', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2023-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq; -+------------------------------------+ -| datebin(Utf8("1 year"),sq.column1) | -+------------------------------------+ -| 2022-01-01T00:00:00Z | -| 2023-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -| 2022-01-01T00:00:00Z | -+------------------------------------+ ++-------------------------------------+ +| date_bin(Utf8("1 year"),sq.column1) | ++-------------------------------------+ +| 2022-01-01T00:00:00Z | +| 2023-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | +| 2022-01-01T00:00:00Z | ++-------------------------------------+ -- SQL: select date_bin('1 month', column1, '1970-12-31T00:15:00Z') from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq; -+------------------------------------------------------------------+ -| datebin(Utf8("1 month"),sq.column1,Utf8("1970-12-31T00:15:00Z")) | -+------------------------------------------------------------------+ -| 2021-12-31T00:15:00Z | -| 2021-12-31T00:15:00Z | -| 2021-12-31T00:15:00Z | -| 2022-01-31T00:15:00Z | -| 2022-01-31T00:15:00Z | -| 2022-02-28T00:15:00Z | -+------------------------------------------------------------------+ ++-------------------------------------------------------------------+ +| date_bin(Utf8("1 month"),sq.column1,Utf8("1970-12-31T00:15:00Z")) | ++-------------------------------------------------------------------+ +| 2021-12-31T00:15:00Z | +| 2021-12-31T00:15:00Z | +| 2021-12-31T00:15:00Z | +| 2022-01-31T00:15:00Z | +| 2022-01-31T00:15:00Z | +| 2022-02-28T00:15:00Z | ++-------------------------------------------------------------------+ -- SQL: SELECT DATE_BIN('5 month', '2022-01-01T00:00:00Z'); -+-------------------------------------------------------+ -| datebin(Utf8("5 month"),Utf8("2022-01-01T00:00:00Z")) | -+-------------------------------------------------------+ -| 2021-09-01T00:00:00Z | -+-------------------------------------------------------+ ++--------------------------------------------------------+ +| date_bin(Utf8("5 month"),Utf8("2022-01-01T00:00:00Z")) | ++--------------------------------------------------------+ +| 2021-09-01T00:00:00Z | ++--------------------------------------------------------+ -- SQL: SELECT DATE_BIN('3 month', '2022-04-01T00:00:00Z', '2021-05-31T00:04:00Z'); -+------------------------------------------------------------------------------------+ -| datebin(Utf8("3 month"),Utf8("2022-04-01T00:00:00Z"),Utf8("2021-05-31T00:04:00Z")) | -+------------------------------------------------------------------------------------+ -| 2022-02-28T00:04:00Z | -+------------------------------------------------------------------------------------+ ++-------------------------------------------------------------------------------------+ +| date_bin(Utf8("3 month"),Utf8("2022-04-01T00:00:00Z"),Utf8("2021-05-31T00:04:00Z")) | ++-------------------------------------------------------------------------------------+ +| 2022-02-28T00:04:00Z | ++-------------------------------------------------------------------------------------+ -- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-02-29T00:00:00'); -+----------------------------------------------------------------------------------+ -| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-02-29T00:00:00")) | -+----------------------------------------------------------------------------------+ -| 2000-01-29T00:00:00Z | -+----------------------------------------------------------------------------------+ ++-----------------------------------------------------------------------------------+ +| date_bin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-02-29T00:00:00")) | ++-----------------------------------------------------------------------------------+ +| 2000-01-29T00:00:00Z | ++-----------------------------------------------------------------------------------+ -- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-03-29T00:00:00'); -+----------------------------------------------------------------------------------+ -| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-03-29T00:00:00")) | -+----------------------------------------------------------------------------------+ -| 2000-01-29T00:00:00Z | -+----------------------------------------------------------------------------------+ ++-----------------------------------------------------------------------------------+ +| date_bin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-03-29T00:00:00")) | ++-----------------------------------------------------------------------------------+ +| 2000-01-29T00:00:00Z | ++-----------------------------------------------------------------------------------+ -- SQL: SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z'); -+----------------------------------------------------------------+ -| datebin(Utf8("3 years 1 months"),Utf8("2022-09-01 00:00:00Z")) | -+----------------------------------------------------------------+ -| 2022-06-01T00:00:00Z | -+----------------------------------------------------------------+ ++-----------------------------------------------------------------+ +| date_bin(Utf8("3 years 1 months"),Utf8("2022-09-01 00:00:00Z")) | ++-----------------------------------------------------------------+ +| 2022-06-01T00:00:00Z | ++-----------------------------------------------------------------+ -- SQL: SELECT * FROM cpu ORDER BY REGION, TIME; +------+--------+----------------------+------+ | idle | region | time | user | @@ -103,14 +103,14 @@ ---------- | plan_type | plan | ---------- -| logical_plan | Projection: datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z")) AS month, COUNT(cpu.user) | -| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("79228162514264337593543950336"), cpu.time, TimestampNanosecond(31450500000000000, None)) AS datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))]], aggr=[[COUNT(cpu.user)]] | +| logical_plan | Projection: date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z")) AS month, COUNT(cpu.user) | +| | Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("79228162514264337593543950336"), cpu.time, TimestampNanosecond(31450500000000000, None)) AS date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))]], aggr=[[COUNT(cpu.user)]] | | | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] | -| physical_plan | ProjectionExec: expr=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as month, COUNT(cpu.user)@1 as COUNT(cpu.user)] | -| | AggregateExec: mode=FinalPartitioned, gby=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | +| physical_plan | ProjectionExec: expr=[date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as month, COUNT(cpu.user)@1 as COUNT(cpu.user)] | +| | AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | | | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "datebin(Utf8(\"1 month\"),cpu.time,Utf8(\"1970-12-31T00:15:00Z\"))", index: 0 }], 4), input_partitions=4 | -| | AggregateExec: mode=Partial, gby=[datebin(79228162514264337593543950336, time@0, 31450500000000000) as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | +| | RepartitionExec: partitioning=Hash([Column { name: "date_bin(Utf8(\"1 month\"),cpu.time,Utf8(\"1970-12-31T00:15:00Z\"))", index: 0 }], 4), input_partitions=4 | +| | AggregateExec: mode=Partial, gby=[date_bin(79228162514264337593543950336, time@0, 31450500000000000) as date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] | | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 | diff --git a/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected index 20b0c3d3d7..b4d20b99fe 100644 --- a/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected +++ b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet.sql.expected @@ -96,16 +96,15 @@ ---------- | plan_type | plan | ---------- -| Plan with Metrics | CoalescePartitionsExec, metrics=[elapsed_compute=1.234ms, output_rows=10] | -| | UnionExec, metrics=[elapsed_compute=1.234ms, output_rows=10] | -| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=5] | -| | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, output_rows=5] | -| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1683, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | -| | ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, output_rows=5] | -| | DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, num_dupes=2, output_rows=5] | -| | SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, output_rows=7] | -| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=7] | -| | FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, output_rows=7] | -| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1532, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | +| Plan with Metrics | UnionExec, metrics=[elapsed_compute=1.234ms, output_rows=10] | +| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=5] | +| | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, output_rows=5] | +| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1683, elapsed_compute=1.234ms, file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | +| | ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, output_rows=5] | +| | DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, num_dupes=2, output_rows=5] | +| | SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, output_rows=7] | +| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=7] | +| | FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, output_rows=7] | +| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1532, elapsed_compute=1.234ms, file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] | | | | ---------- \ No newline at end of file diff --git a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected index 3b33e02a8d..073c498494 100644 --- a/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected +++ b/influxdb_iox/tests/query_tests/cases/in/gapfill.sql.expected @@ -24,7 +24,7 @@ ---------- | logical_plan | Projection: date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, COUNT(cpu.user) | | | GapFill: groupBy=[[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) | -| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]] | +| | Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]] | | | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] | | physical_plan | ProjectionExec: expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0 as minute, COUNT(cpu.user)@1 as COUNT(cpu.user)] | | | GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0], aggr_expr=[COUNT(cpu.user)@1], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") | @@ -33,7 +33,7 @@ | | AggregateExec: mode=FinalPartitioned, gby=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] | | | CoalesceBatchesExec: target_batch_size=8192 | | | RepartitionExec: partitioning=Hash([Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time)", index: 0 }], 4), input_partitions=4 | -| | AggregateExec: mode=Partial, gby=[datebin(600000000000, time@0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] | +| | AggregateExec: mode=Partial, gby=[date_bin(600000000000, time@0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] | | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 | @@ -117,7 +117,7 @@ Error during planning: gap-filling query is missing lower time bound ---------- | logical_plan | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, AVG(cpu.user) AS locf(AVG(cpu.user)) | | | GapFill: groupBy=[[cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[LOCF(AVG(cpu.user))]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) | -| | Aggregate: groupBy=[[cpu.region, datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]] | +| | Aggregate: groupBy=[[cpu.region, date_bin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]] | | | TableScan: cpu projection=[region, time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] | | physical_plan | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as minute, AVG(cpu.user)@2 as locf(AVG(cpu.user))] | | | GapFillExec: group_expr=[region@0, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1], aggr_expr=[LOCF(AVG(cpu.user)@2)], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") | @@ -126,7 +126,7 @@ Error during planning: gap-filling query is missing lower time bound | | AggregateExec: mode=FinalPartitioned, gby=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)] | | | CoalesceBatchesExec: target_batch_size=8192 | | | RepartitionExec: partitioning=Hash([Column { name: "region", index: 0 }, Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time)", index: 1 }], 4), input_partitions=4 | -| | AggregateExec: mode=Partial, gby=[region@0 as region, datebin(600000000000, time@1) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)], ordering_mode=PartiallyOrdered | +| | AggregateExec: mode=Partial, gby=[region@0 as region, date_bin(600000000000, time@1) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)], ordering_mode=PartiallyOrdered | | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: time@1 >= 957528000000000000 AND time@1 <= 957531540000000000 | diff --git a/influxdb_iox_client/src/client/flight/mod.rs b/influxdb_iox_client/src/client/flight/mod.rs index 9d1ae22177..7180543783 100644 --- a/influxdb_iox_client/src/client/flight/mod.rs +++ b/influxdb_iox_client/src/client/flight/mod.rs @@ -185,15 +185,15 @@ impl Client { Ok(self.inner.add_header(key, value)?) } - /// Query the given namespace with the given SQL query, returning + /// Query the given database with the given SQL query, returning /// a struct that can stream Arrow [`RecordBatch`] results. pub async fn sql( &mut self, - namespace_name: impl Into + Send, + database: impl Into + Send, sql_query: impl Into + Send, ) -> Result { let request = ReadInfo { - namespace_name: namespace_name.into(), + database: database.into(), sql_query: sql_query.into(), query_type: QueryType::Sql.into(), flightsql_command: vec![], @@ -203,15 +203,15 @@ impl Client { self.do_get_with_read_info(request).await } - /// Query the given namespace with the given InfluxQL query, returning + /// Query the given database with the given InfluxQL query, returning /// a struct that can stream Arrow [`RecordBatch`] results. pub async fn influxql( &mut self, - namespace_name: impl Into + Send, + database: impl Into + Send, influxql_query: impl Into + Send, ) -> Result { let request = ReadInfo { - namespace_name: namespace_name.into(), + database: database.into(), sql_query: influxql_query.into(), query_type: QueryType::InfluxQl.into(), flightsql_command: vec![], diff --git a/ingester/Cargo.toml b/ingester/Cargo.toml index e7c6fb1ea8..5528c2ba7c 100644 --- a/ingester/Cargo.toml +++ b/ingester/Cargo.toml @@ -17,7 +17,7 @@ crossbeam-utils = "0.8.15" data_types = { version = "0.1.0", path = "../data_types" } datafusion.workspace = true dml = { version = "0.1.0", path = "../dml" } -flatbuffers = "23.1.21" +flatbuffers = "23.5.26" futures = "0.3.28" generated_types = { version = "0.1.0", path = "../generated_types" } hashbrown.workspace = true @@ -29,7 +29,7 @@ metric = { version = "0.1.0", path = "../metric" } mutable_batch = { version = "0.1.0", path = "../mutable_batch" } mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" } observability_deps = { version = "0.1.0", path = "../observability_deps" } -once_cell = "1.17" +once_cell = "1.18" parking_lot = "0.12.1" parquet_file = { version = "0.1.0", path = "../parquet_file" } pin-project = "1.1.0" diff --git a/iox_query/src/exec/gapfill/mod.rs b/iox_query/src/exec/gapfill/mod.rs index 8d88d2d13d..2f32d32758 100644 --- a/iox_query/src/exec/gapfill/mod.rs +++ b/iox_query/src/exec/gapfill/mod.rs @@ -775,7 +775,7 @@ mod test { - " GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0], aggr_expr=[AVG(temps.temp)@1], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")" - " SortExec: expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 ASC]" - " AggregateExec: mode=Final, gby=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]" - - " AggregateExec: mode=Partial, gby=[datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]" + - " AggregateExec: mode=Partial, gby=[date_bin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]" - " EmptyExec: produce_one_row=false" "### ); @@ -805,7 +805,7 @@ mod test { - " GapFillExec: group_expr=[loc@0, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1, concat(Utf8(\"zz\"),temps.loc)@2], aggr_expr=[AVG(temps.temp)@3], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")" - " SortExec: expr=[loc@0 ASC,concat(Utf8(\"zz\"),temps.loc)@2 ASC,date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 ASC]" - " AggregateExec: mode=Final, gby=[loc@0 as loc, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(Utf8(\"zz\"),temps.loc)@2 as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]" - - " AggregateExec: mode=Partial, gby=[loc@1 as loc, datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]" + - " AggregateExec: mode=Partial, gby=[loc@1 as loc, date_bin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]" - " EmptyExec: produce_one_row=false" "### ); diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs index 50be1a52b3..1973877b3a 100644 --- a/iox_query/src/logical_optimizer/handle_gapfill.rs +++ b/iox_query/src/logical_optimizer/handle_gapfill.rs @@ -860,7 +860,7 @@ mod test { @r###" --- - "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -890,7 +890,7 @@ mod test { @r###" --- - "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None)), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time, TimestampNanosecond(7, None)) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time, TimestampNanosecond(7, None)) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -919,7 +919,7 @@ mod test { @r###" --- - "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -971,7 +971,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp)" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -1006,7 +1006,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS locf(AVG(temps.temp)), MIN(temps.temp) AS locf(MIN(temps.temp))" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[LOCF(AVG(temps.temp)), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -1040,7 +1040,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), MIN(temps.temp) AS locf(MIN(temps.temp)) AS locf_min_temp" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); @@ -1075,7 +1075,7 @@ mod test { --- - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS interpolate(AVG(temps.temp)), MIN(temps.temp) AS interpolate(MIN(temps.temp))" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[INTERPOLATE(AVG(temps.temp)), INTERPOLATE(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" + - " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" - " TableScan: temps" "###); diff --git a/iox_query/src/physical_optimizer/predicate_pushdown.rs b/iox_query/src/physical_optimizer/predicate_pushdown.rs index d55eea9ffa..117b5bae25 100644 --- a/iox_query/src/physical_optimizer/predicate_pushdown.rs +++ b/iox_query/src/physical_optimizer/predicate_pushdown.rs @@ -314,7 +314,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let plan = Arc::new( diff --git a/iox_query/src/physical_optimizer/projection_pushdown.rs b/iox_query/src/physical_optimizer/projection_pushdown.rs index 9e608eceee..4cde9e3c4e 100644 --- a/iox_query/src/physical_optimizer/projection_pushdown.rs +++ b/iox_query/src/physical_optimizer/projection_pushdown.rs @@ -97,11 +97,11 @@ impl PhysicalOptimizerRule for ProjectionPushdown { let output_ordering = child_parquet .base_config() .output_ordering - .as_ref() + .iter() .map(|output_ordering| { project_output_ordering(output_ordering, projection_exec.schema()) }) - .transpose()?; + .collect::>()?; let base_config = FileScanConfig { projection: Some(projection), output_ordering, @@ -729,7 +729,7 @@ mod tests { projection: Some(projection), limit: None, table_partition_cols: vec![], - output_ordering: Some(vec![ + output_ordering: vec![vec![ PhysicalSortExpr { expr: expr_col("tag3", &schema_projected), options: Default::default(), @@ -742,7 +742,7 @@ mod tests { expr: expr_col("tag2", &schema_projected), options: Default::default(), }, - ]), + ]], infinite_source: false, }; let inner = ParquetExec::new(base_config, Some(expr_string_cmp("tag1", &schema)), None); @@ -1330,7 +1330,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); diff --git a/iox_query/src/physical_optimizer/sort/parquet_sortness.rs b/iox_query/src/physical_optimizer/sort/parquet_sortness.rs index 8ae4ce9271..31c2affbab 100644 --- a/iox_query/src/physical_optimizer/sort/parquet_sortness.rs +++ b/iox_query/src/physical_optimizer/sort/parquet_sortness.rs @@ -126,7 +126,7 @@ impl<'a> TreeNodeRewriter for ParquetSortnessRewriter<'a> { }; let base_config = parquet_exec.base_config(); - if base_config.output_ordering.is_none() { + if base_config.output_ordering.is_empty() { // no output ordering requested return Ok(node); } @@ -207,7 +207,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -242,7 +242,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)), + output_ordering: vec![ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -278,7 +278,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -320,7 +320,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -355,7 +355,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col1", "col2"], &schema)), + output_ordering: vec![ordering(["col1", "col2"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -390,7 +390,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -425,7 +425,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let inner = ParquetExec::new(base_config, None, None); @@ -489,7 +489,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1"], &schema)), + output_ordering: vec![ordering(["col2", "col1"], &schema)], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); @@ -518,7 +518,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col1", "col2"], &schema)), + output_ordering: vec![ordering(["col1", "col2"], &schema)], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); @@ -555,7 +555,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col1", "col2"], &schema)), + output_ordering: vec![ordering(["col1", "col2"], &schema)], infinite_source: false, }; let plan = Arc::new(ParquetExec::new(base_config, None, None)); @@ -593,7 +593,7 @@ mod tests { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: Some(ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)), + output_ordering: vec![ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)], infinite_source: false, }; let plan_parquet = Arc::new(ParquetExec::new(base_config, None, None)); diff --git a/iox_query/src/provider/physical.rs b/iox_query/src/provider/physical.rs index 48e9fb24ba..4ba7345e7c 100644 --- a/iox_query/src/provider/physical.rs +++ b/iox_query/src/provider/physical.rs @@ -301,6 +301,9 @@ pub fn chunks_to_physical_nodes( is_exact: false, }; + // No sort order is represented by an empty Vec + let output_ordering = vec![output_ordering.unwrap_or_default()]; + let base_config = FileScanConfig { object_store_url, file_schema, diff --git a/iox_query/src/pruning.rs b/iox_query/src/pruning.rs index 21514fba42..5e1b5d0b22 100644 --- a/iox_query/src/pruning.rs +++ b/iox_query/src/pruning.rs @@ -210,7 +210,7 @@ fn get_aggregate(stats: &ColumnStatistics, aggregate: Aggregate) -> Option<&Scal #[cfg(test)] mod test { - use std::sync::Arc; + use std::{ops::Not, sync::Arc}; use datafusion::prelude::{col, lit}; use datafusion_util::lit_dict; diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs index bcb9559a06..546c218814 100644 --- a/iox_query_influxql/src/plan/planner.rs +++ b/iox_query_influxql/src/plan/planner.rs @@ -2541,70 +2541,70 @@ mod test { Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N] Filter: all_types.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: cpu.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N] Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: disk.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N] Filter: diskio.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N] Filter: merge_00.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N] Filter: merge_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_02.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_03.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] @@ -2615,70 +2615,70 @@ mod test { Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N] Filter: all_types.time >= TimestampNanosecond(1338, None) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: cpu.time >= TimestampNanosecond(1338, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N] Filter: data.time >= TimestampNanosecond(1338, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N] Filter: disk.time >= TimestampNanosecond(1338, None) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] + Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N] Filter: diskio.time >= TimestampNanosecond(1338, None) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N] Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N] Filter: merge_00.time >= TimestampNanosecond(1338, None) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] + Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N] Filter: merge_01.time >= TimestampNanosecond(1338, None) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_01.time >= TimestampNanosecond(1338, None) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_02.time >= TimestampNanosecond(1338, None) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N] Filter: tagKey IS NOT NULL [tagKey:Utf8;N] Unnest: tagKey [tagKey:Utf8;N] - Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] + Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N] Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N] Filter: temp_03.time >= TimestampNanosecond(1338, None) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] @@ -2774,7 +2774,7 @@ mod test { Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time AS time, value AS value [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, value:Float64;N] Sort: time ASC NULLS LAST [time:Timestamp(Nanosecond, None);N, value:Float64;N] Projection: time, AVG(cpu.usage_idle) AS value [time:Timestamp(Nanosecond, None);N, value:Float64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); } @@ -2954,7 +2954,7 @@ mod test { Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N] GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); @@ -2963,7 +2963,7 @@ mod test { Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time), Struct({value:Float64(0),time:TimestampNanosecond(0, None)})))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N] GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); @@ -3539,7 +3539,7 @@ mod test { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); @@ -3547,102 +3547,137 @@ mod test { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } #[test] - fn group_by_time_gapfill() { + fn group_by_time_gapfill_no_bounds() { // No time bounds assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_no_lower_time_bounds() { // No lower time bounds assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1667181719999999999, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Filter: data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_no_upper_time_bounds() { // No upper time bounds assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Filter: data.time >= TimestampNanosecond(1667181600000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_defaul_is_fill_null1() { // Default is FILL(null) assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Included(TimestampNanosecond(1667181719999999999, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null1() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null2() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null3() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null4() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(0)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn group_by_time_gapfill_default_is_fill_null5() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_coalesces_the_fill_value() { // Coalesces the fill value, which is a float, to the matching type of a `COUNT` aggregate. assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn group_by_time_gapfill_aggregates_part_of_binary_expression() { // Aggregates as part of a binary expression assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N] GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } @@ -3659,6 +3694,10 @@ mod test { Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn with_limit_or_offset2() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo OFFSET 1"), @r###" Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: iox::measurement, time, foo, count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] @@ -3669,6 +3708,10 @@ mod test { Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + + #[test] + fn with_limit_or_offset3() { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo LIMIT 2 OFFSET 3"), @r###" Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: iox::measurement, time, foo, count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] @@ -3679,7 +3722,10 @@ mod test { Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); + } + #[test] + fn with_limit_or_offset_errors() { // Fallible // returns an error if LIMIT or OFFSET values exceed i64::MAX @@ -3699,7 +3745,7 @@ mod test { assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###" Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] - Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } diff --git a/logfmt/Cargo.toml b/logfmt/Cargo.toml index 01b6d60b20..e7eceb0476 100644 --- a/logfmt/Cargo.toml +++ b/logfmt/Cargo.toml @@ -12,7 +12,7 @@ tracing-subscriber = "0.3" workspace-hack = { version = "0.1", path = "../workspace-hack" } [dev-dependencies] # In alphabetical order -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" regex = "1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/mutable_batch/src/payload/partition.rs b/mutable_batch/src/payload/partition.rs index 34edb77c5a..066c1aa91a 100644 --- a/mutable_batch/src/payload/partition.rs +++ b/mutable_batch/src/payload/partition.rs @@ -22,12 +22,13 @@ use crate::{ }; /// An error generating a partition key for a row. +#[allow(missing_copy_implementations)] #[derive(Debug, Error, PartialEq, Eq)] pub enum PartitionKeyError { /// The partition template defines a [`Template::TimeFormat`] part, but the /// provided strftime formatter is invalid. - #[error("invalid strftime format in partition template: {0}")] - InvalidStrftime(String), + #[error("invalid strftime format in partition template")] + InvalidStrftime, /// The partition template defines a [`Template::TagValue`] part, but the /// column type is not "tag". @@ -93,7 +94,7 @@ impl<'a> Template<'a> { Utc.timestamp_nanos(t[idx]) .format_with_items(format.clone()) // Cheap clone of refs ) - .map_err(|_| PartitionKeyError::InvalidStrftime(format!("{format:?}")))?; + .map_err(|_| PartitionKeyError::InvalidStrftime)?; out.write_str( Cow::from(utf8_percent_encode( @@ -145,9 +146,18 @@ fn partition_keys<'a>( }) .collect::>(); + // Track the length of the last yielded partition key, and pre-allocate the + // next partition key string to match it. + // + // In the happy path, keys of consistent sizes are generated and the + // allocations reach a minimum. If the keys are inconsistent, at best a + // subset of allocations are eliminated, and at worst, a few bytes of memory + // is temporarily allocated until the resulting string is shrunk down. + let mut last_len = 5; + // Yield a partition key string for each row in `batch` (0..batch.row_count).map(move |idx| { - let mut string = String::new(); + let mut string = String::with_capacity(last_len); // Evaluate each template part for this row for (col_idx, col) in template.iter().enumerate() { @@ -160,6 +170,8 @@ fn partition_keys<'a>( } } + last_len = string.len(); + string.shrink_to_fit(); Ok(string) }) } @@ -474,7 +486,7 @@ mod tests { let ret = partition_keys(&batch, template.parts()).collect::, _>>(); - assert_matches!(ret, Err(PartitionKeyError::InvalidStrftime(_))); + assert_matches!(ret, Err(PartitionKeyError::InvalidStrftime)); } // These values are arbitrarily chosen when building an input to the @@ -605,7 +617,7 @@ mod tests { // properties: match ret { Ok(v) => { assert_eq!(v.len(), 1); }, - Err(e) => { assert_matches!(e, PartitionKeyError::InvalidStrftime(_)); }, + Err(e) => { assert_matches!(e, PartitionKeyError::InvalidStrftime); }, } } } diff --git a/parquet_file/src/storage.rs b/parquet_file/src/storage.rs index 290326c63d..6e7c32ddf3 100644 --- a/parquet_file/src/storage.rs +++ b/parquet_file/src/storage.rs @@ -135,7 +135,7 @@ impl ParquetExecInput { limit: None, table_partition_cols: vec![], // Parquet files ARE actually sorted but we don't care here since we just construct a `collect` plan. - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; let exec = ParquetExec::new(base_config, None, None); diff --git a/parquet_to_line_protocol/src/lib.rs b/parquet_to_line_protocol/src/lib.rs index 6845e5255e..a0522d66c2 100644 --- a/parquet_to_line_protocol/src/lib.rs +++ b/parquet_to_line_protocol/src/lib.rs @@ -253,7 +253,7 @@ impl ParquetFileReader { projection: None, limit: None, table_partition_cols: vec![], - output_ordering: None, + output_ordering: vec![vec![]], infinite_source: false, }; diff --git a/predicate/src/delete_expr.rs b/predicate/src/delete_expr.rs index 21571a1cce..de7b191646 100644 --- a/predicate/src/delete_expr.rs +++ b/predicate/src/delete_expr.rs @@ -126,7 +126,7 @@ pub(crate) fn df_to_scalar( #[cfg(test)] mod tests { - use std::sync::Arc; + use std::{ops::Not, sync::Arc}; use arrow::datatypes::Field; use test_helpers::assert_contains; diff --git a/predicate/src/lib.rs b/predicate/src/lib.rs index 9734b40900..b7a79df759 100644 --- a/predicate/src/lib.rs +++ b/predicate/src/lib.rs @@ -34,6 +34,7 @@ use schema::TIME_COLUMN_NAME; use std::{ collections::{BTreeSet, HashSet}, fmt, + ops::Not, }; /// This `Predicate` represents the empty predicate (aka that evaluates to true for all rows). diff --git a/predicate/src/rpc_predicate/rewrite.rs b/predicate/src/rpc_predicate/rewrite.rs index fa4f946cca..69cf116ec1 100644 --- a/predicate/src/rpc_predicate/rewrite.rs +++ b/predicate/src/rpc_predicate/rewrite.rs @@ -255,7 +255,7 @@ fn is_col_op_lit(expr: &Expr) -> Option<&str> { #[cfg(test)] mod tests { - use std::ops::Add; + use std::ops::{Add, Not}; use super::*; use arrow::datatypes::DataType; diff --git a/router/tests/grpc.rs b/router/tests/grpc.rs index f015b543c1..df5cf29cc1 100644 --- a/router/tests/grpc.rs +++ b/router/tests/grpc.rs @@ -973,7 +973,7 @@ async fn test_invalid_strftime_partition_template() { assert_matches!( got, Err(Error::DmlHandler(DmlError::Partition( - PartitionError::Partitioner(PartitionKeyError::InvalidStrftime(_)) + PartitionError::Partitioner(PartitionKeyError::InvalidStrftime) ))) ); diff --git a/service_grpc_flight/src/lib.rs b/service_grpc_flight/src/lib.rs index 0e5cf6cf9a..c29869425a 100644 --- a/service_grpc_flight/src/lib.rs +++ b/service_grpc_flight/src/lib.rs @@ -497,7 +497,7 @@ where }; let request = request?; - let namespace_name = request.namespace_name(); + let namespace_name = request.database(); let query = request.query(); is_debug |= request.is_debug(); diff --git a/service_grpc_flight/src/request.rs b/service_grpc_flight/src/request.rs index 8e8fa5d01f..fc4fe77bd8 100644 --- a/service_grpc_flight/src/request.rs +++ b/service_grpc_flight/src/request.rs @@ -47,11 +47,11 @@ pub type Result = std::result::Result; /// /// ## Example JSON Ticket format /// -/// This runs the SQL "SELECT 1" in namespace `my_db` +/// This runs the SQL "SELECT 1" in database `my_db` /// /// ```json /// { -/// "namespace_name": "my_db", +/// "database": "my_db", /// "sql_query": "SELECT 1;" /// } /// ``` @@ -60,7 +60,7 @@ pub type Result = std::result::Result; /// /// ```json /// { -/// "namespace_name": "my_db", +/// "database": "my_db", /// "sql_query": "SELECT 1;" /// "query_type": "sql" /// } @@ -70,14 +70,14 @@ pub type Result = std::result::Result; /// /// ```json /// { -/// "namespace_name": "my_db", +/// "database": "my_db", /// "sql_query": "SHOW DATABASES;" /// "query_type": "influxql" /// } /// ``` #[derive(Debug, PartialEq, Clone)] pub struct IoxGetRequest { - namespace_name: String, + database: String, query: RunQuery, is_debug: bool, } @@ -116,9 +116,9 @@ impl Display for RunQuery { impl IoxGetRequest { /// Create a new request to run the specified query - pub fn new(namespace_name: impl Into, query: RunQuery, is_debug: bool) -> Self { + pub fn new(database: impl Into, query: RunQuery, is_debug: bool) -> Self { Self { - namespace_name: namespace_name.into(), + database: database.into(), query, is_debug, } @@ -142,21 +142,21 @@ impl IoxGetRequest { /// Encode the request as a protobuf Ticket pub fn try_encode(self) -> Result { let Self { - namespace_name, + database, query, is_debug, } = self; let read_info = match query { RunQuery::Sql(sql_query) => proto::ReadInfo { - namespace_name, + database, sql_query, query_type: QueryType::Sql.into(), flightsql_command: vec![], is_debug, }, RunQuery::InfluxQL(influxql) => proto::ReadInfo { - namespace_name, + database, // field name is misleading sql_query: influxql, query_type: QueryType::InfluxQl.into(), @@ -164,7 +164,7 @@ impl IoxGetRequest { is_debug, }, RunQuery::FlightSQL(flightsql_command) => proto::ReadInfo { - namespace_name, + database, sql_query: "".into(), query_type: QueryType::FlightSqlMessage.into(), flightsql_command: flightsql_command @@ -189,8 +189,8 @@ impl IoxGetRequest { /// This represents ths JSON fields #[derive(Deserialize, Debug)] struct ReadInfoJson { - #[serde(alias = "database", alias = "bucket", alias = "bucket-name")] - namespace_name: String, + #[serde(alias = "namespace_name", alias = "bucket", alias = "bucket-name")] + database: String, sql_query: String, // If query type is not supplied, defaults to SQL query_type: Option, @@ -199,7 +199,7 @@ impl IoxGetRequest { } let ReadInfoJson { - namespace_name, + database, sql_query, query_type, is_debug, @@ -221,7 +221,7 @@ impl IoxGetRequest { }; Ok(Self { - namespace_name, + database, query, is_debug, }) @@ -233,7 +233,7 @@ impl IoxGetRequest { let query_type = read_info.query_type(); let proto::ReadInfo { - namespace_name, + database, sql_query, query_type: _, flightsql_command, @@ -241,7 +241,7 @@ impl IoxGetRequest { } = read_info; Ok(Self { - namespace_name, + database, query: match query_type { QueryType::Unspecified | QueryType::Sql => { if !flightsql_command.is_empty() { @@ -277,8 +277,8 @@ impl IoxGetRequest { }) } - pub fn namespace_name(&self) -> &str { - self.namespace_name.as_ref() + pub fn database(&self) -> &str { + self.database.as_ref() } pub fn query(&self) -> &RunQuery { @@ -306,10 +306,10 @@ mod tests { // - assert_eq!(query, "SELECT 1;")); } @@ -321,22 +321,22 @@ mod tests { } impl TestCase { - fn new_sql(json: &'static str, expected_namespace: &str, query: &str) -> Self { + fn new_sql(json: &'static str, expected_database: &str, query: &str) -> Self { Self { json, expected: IoxGetRequest { - namespace_name: String::from(expected_namespace), + database: String::from(expected_database), query: RunQuery::Sql(String::from(query)), is_debug: false, }, } } - fn new_influxql(json: &'static str, expected_namespace: &str, query: &str) -> Self { + fn new_influxql(json: &'static str, expected_database: &str, query: &str) -> Self { Self { json, expected: IoxGetRequest { - namespace_name: String::from(expected_namespace), + database: String::from(expected_database), query: RunQuery::InfluxQL(String::from(query)), is_debug: false, }, @@ -346,63 +346,130 @@ mod tests { let cases = vec![ // implict `query_type` + TestCase::new_sql( + r#"{"database": "my_db", "sql_query": "SELECT 1;"}"#, + "my_db", + "SELECT 1;", + ), TestCase::new_sql( r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;"}"#, "my_db", "SELECT 1;", ), + TestCase::new_sql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;"}"#, + "my_db", + "SELECT 1;", + ), // explicit query type, sql + TestCase::new_sql( + r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, + "my_db", + "SELECT 1;", + ), TestCase::new_sql( r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, "my_db", "SELECT 1;", ), + TestCase::new_sql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, + "my_db", + "SELECT 1;", + ), // explicit query type null + TestCase::new_sql( + r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, + "my_db", + "SELECT 1;", + ), TestCase::new_sql( r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, "my_db", "SELECT 1;", ), + TestCase::new_sql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#, + "my_db", + "SELECT 1;", + ), // explicit query type, influxql + TestCase::new_influxql( + r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_influxql( + r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_influxql( + r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + TestCase::new_influxql( + r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#, + "my_db", + "SELECT 1;", + ), + // explicit query type, influxql on metadata + TestCase::new_influxql( + r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, + "my_otherdb", + "SHOW DATABASES;", + ), TestCase::new_influxql( r#"{"namespace_name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, "my_otherdb", "SHOW DATABASES;", ), - TestCase::new_influxql( - r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, - "my_otherdb", - "SHOW DATABASES;", - ), - // influxql bucket metadata TestCase::new_influxql( r#"{"bucket": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, "my_otherdb", "SHOW DATABASES;", ), - // influxql bucket-name metadata TestCase::new_influxql( r#"{"bucket-name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#, "my_otherdb", "SHOW DATABASES;", ), - // sql database metadata + // explicit query type, sql on metadata TestCase::new_sql( - r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, - "my_db", - "SELECT 1;", + r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", ), - // sql bucket metadata TestCase::new_sql( - r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, - "my_db", - "SELECT 1;", + r#"{"namespace_name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", ), - // sql bucket-name metadata TestCase::new_sql( - r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#, - "my_db", - "SELECT 1;", + r#"{"bucket": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", + ), + TestCase::new_sql( + r#"{"bucket-name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#, + "my_otherdb", + "SHOW DATABASES;", ), ]; @@ -446,7 +513,7 @@ mod tests { #[test] fn proto_ticket_decoding_unspecified() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::Unspecified.into(), flightsql_command: vec![], @@ -455,14 +522,14 @@ mod tests { // Reverts to default (unspecified) for invalid query_type enumeration, and thus SQL let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_sql() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::Sql.into(), flightsql_command: vec![], @@ -470,14 +537,14 @@ mod tests { }); let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_influxql() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::InfluxQl.into(), flightsql_command: vec![], @@ -485,14 +552,14 @@ mod tests { }); let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::InfluxQL(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_too_new() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".into(), query_type: 42, // not a known query type flightsql_command: vec![], @@ -501,14 +568,14 @@ mod tests { // Reverts to default (unspecified) for invalid query_type enumeration, and thus SQL let ri = IoxGetRequest::try_decode(ticket).unwrap(); - assert_eq!(ri.namespace_name, "_"); + assert_eq!(ri.database, "_"); assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1")); } #[test] fn proto_ticket_decoding_sql_too_many_fields() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::Sql.into(), // can't have both sql_query and flightsql @@ -523,7 +590,7 @@ mod tests { #[test] fn proto_ticket_decoding_influxql_too_many_fields() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::InfluxQl.into(), // can't have both sql_query and flightsql @@ -538,7 +605,7 @@ mod tests { #[test] fn proto_ticket_decoding_flightsql_too_many_fields() { let ticket = make_proto_ticket(&proto::ReadInfo { - namespace_name: "_".to_string(), + database: "_".to_string(), sql_query: "SELECT 1".to_string(), query_type: QueryType::FlightSqlMessage.into(), // can't have both sql_query and flightsql @@ -564,7 +631,7 @@ mod tests { #[test] fn round_trip_sql() { let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::Sql("select * from bar".into()), is_debug: false, }; @@ -579,7 +646,7 @@ mod tests { #[test] fn round_trip_sql_is_debug() { let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::Sql("select * from bar".into()), is_debug: true, }; @@ -594,7 +661,7 @@ mod tests { #[test] fn round_trip_influxql() { let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::InfluxQL("select * from bar".into()), is_debug: false, }; @@ -613,7 +680,7 @@ mod tests { }); let request = IoxGetRequest { - namespace_name: "foo_blarg".into(), + database: "foo_blarg".into(), query: RunQuery::FlightSQL(cmd), is_debug: false, }; diff --git a/service_grpc_influxrpc/Cargo.toml b/service_grpc_influxrpc/Cargo.toml index 112629d22c..9499baba29 100644 --- a/service_grpc_influxrpc/Cargo.toml +++ b/service_grpc_influxrpc/Cargo.toml @@ -25,7 +25,7 @@ arrow = { workspace = true, features = ["prettyprint"] } futures = "0.3" pin-project = "1.1" prost = "0.11" -regex = "1.8.3" +regex = "1.8.4" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.96" snafu = "0.7" diff --git a/test_helpers_end_to_end/Cargo.toml b/test_helpers_end_to_end/Cargo.toml index 17e2518f6c..ec075dda2e 100644 --- a/test_helpers_end_to_end/Cargo.toml +++ b/test_helpers_end_to_end/Cargo.toml @@ -24,7 +24,7 @@ mutable_batch_lp = { path = "../mutable_batch_lp" } mutable_batch_pb = { path = "../mutable_batch_pb" } nix = "0.26" observability_deps = { path = "../observability_deps" } -once_cell = { version = "1.17", features = ["parking_lot"] } +once_cell = { version = "1.18", features = ["parking_lot"] } parking_lot = "0.12" prost = "0.11" rand = "0.8.3" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 657da5b335..c8370fc133 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -30,9 +30,9 @@ bytes = { version = "1" } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] } crossbeam-utils = { version = "0.8" } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } -datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1" } fixedbitset = { version = "0.4" } @@ -81,6 +81,7 @@ smallvec = { version = "1", default-features = false, features = ["union"] } sqlparser = { version = "0.34", features = ["visitor"] } sqlx = { version = "0.6", features = ["json", "postgres", "runtime-tokio-rustls", "sqlite", "tls", "uuid"] } sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] } +strum = { version = "0.24", features = ["derive"] } thrift = { version = "0.17" } tokio = { version = "1", features = ["full", "test-util", "tracing"] } tokio-stream = { version = "0.1", features = ["fs", "net"] } @@ -91,6 +92,8 @@ tracing = { version = "0.1", features = ["log", "max_level_trace", "release_max_ tracing-core = { version = "0.1" } tracing-log = { version = "0.1" } tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "parking_lot"] } +unicode-bidi = { version = "0.3" } +unicode-normalization = { version = "0.1" } url = { version = "2" } uuid = { version = "1", features = ["v4"] } zstd = { version = "0.12" } @@ -150,6 +153,8 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-trai syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] } tokio = { version = "1", features = ["full", "test-util", "tracing"] } tokio-stream = { version = "0.1", features = ["fs", "net"] } +unicode-bidi = { version = "0.3" } +unicode-normalization = { version = "0.1" } url = { version = "2" } uuid = { version = "1", features = ["v4"] }