Merge branch 'main' into sgc/issue/7829_time_bounds_3

# Conflicts:
#	iox_query_influxql/src/plan/planner.rs
pull/24376/head
Stuart Carnie 2023-06-06 12:55:43 +10:00
commit 9e2550c933
No known key found for this signature in database
GPG Key ID: 848D9C9718D78B4F
35 changed files with 407 additions and 264 deletions

59
Cargo.lock generated
View File

@ -1382,7 +1382,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1431,7 +1431,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"arrow",
"arrow-array",
@ -1445,7 +1445,7 @@ dependencies = [
[[package]]
name = "datafusion-execution"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"dashmap",
"datafusion-common",
@ -1462,18 +1462,21 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"ahash 0.8.3",
"arrow",
"datafusion-common",
"lazy_static",
"sqlparser",
"strum",
"strum_macros",
]
[[package]]
name = "datafusion-optimizer"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"arrow",
"async-trait",
@ -1490,7 +1493,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1522,7 +1525,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"arrow",
"chrono",
@ -1536,7 +1539,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"arrow",
"datafusion-common",
@ -1547,7 +1550,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "25.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=ee1019d9585252066ef5b288c84aabebcbd93ca6#ee1019d9585252066ef5b288c84aabebcbd93ca6"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=859251b4a20e00c3dfe73eee6b605fcf722687e7#859251b4a20e00c3dfe73eee6b605fcf722687e7"
dependencies = [
"arrow",
"arrow-schema",
@ -1777,9 +1780,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "flatbuffers"
version = "23.1.21"
version = "23.5.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619"
checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640"
dependencies = [
"bitflags",
"rustc_version",
@ -1841,9 +1844,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8"
checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
dependencies = [
"percent-encoding",
]
@ -2388,9 +2391,9 @@ dependencies = [
[[package]]
name = "idna"
version = "0.3.0"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6"
checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
dependencies = [
"unicode-bidi",
"unicode-normalization",
@ -3297,9 +3300,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.144"
version = "0.2.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
checksum = "fc86cde3ff845662b8f4ef6cb50ea0e20c524eb3d29ae048287e06a1b3fa6a81"
[[package]]
name = "libm"
@ -3860,9 +3863,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.17.2"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
dependencies = [
"parking_lot_core 0.9.7",
]
@ -4108,9 +4111,9 @@ dependencies = [
[[package]]
name = "percent-encoding"
version = "2.2.0"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "pest"
@ -4707,9 +4710,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.8.3"
version = "1.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81ca098a9821bd52d6b24fd8b10bd081f47d39c22778cafaa75a2857a62c6390"
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
dependencies = [
"aho-corasick",
"memchr",
@ -5573,6 +5576,9 @@ name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
@ -6311,9 +6317,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]]
name = "url"
version = "2.3.1"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643"
checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb"
dependencies = [
"form_urlencoded",
"idna",
@ -6864,6 +6870,7 @@ dependencies = [
"sqlx",
"sqlx-core",
"sqlx-macros",
"strum",
"syn 1.0.109",
"syn 2.0.16",
"thrift",
@ -6876,6 +6883,8 @@ dependencies = [
"tracing-core",
"tracing-log",
"tracing-subscriber",
"unicode-bidi",
"unicode-normalization",
"url",
"uuid 1.3.3",
"webpki",

View File

@ -118,8 +118,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "40.0.0" }
arrow-flight = { version = "40.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7" }
hashbrown = { version = "0.13.2" }
object_store = { version = "0.5.6" }
parquet = { version = "40.0.0" }

View File

@ -15,8 +15,8 @@ chrono = { version = "0.4", default-features = false }
comfy-table = { version = "6.2", default-features = false }
hashbrown = { workspace = true }
num-traits = "0.2"
once_cell = { version = "1.17", features = ["parking_lot"] }
regex = "1.8.3"
once_cell = { version = "1.18", features = ["parking_lot"] }
regex = "1.8.4"
snafu = "0.7"
uuid = "1"
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -15,7 +15,7 @@ observability_deps = { path = "../observability_deps" }
pin-project = "1.1"
tokio = { version = "1.28", features = ["parking_lot", "sync"] }
tokio-stream = "0.1"
url = "2.2"
url = "2.4"
workspace-hack = { version = "0.1", path = "../workspace-hack" }
[dev-dependencies]

View File

@ -8,7 +8,7 @@ license.workspace = true
[dependencies]
futures = "0.3"
observability_deps = { path = "../observability_deps" }
once_cell = { version = "1.17", features = ["parking_lot"] }
once_cell = { version = "1.18", features = ["parking_lot"] }
parking_lot = "0.12"
pin-project = "1.1"
tokio = { version = "1.28" }

View File

@ -28,6 +28,6 @@ bytes = "1.4"
data_types = { path = "../data_types" }
filetime = "0.2"
metric = { path = "../metric" }
once_cell = { version = "1.17", features = ["parking_lot"] }
once_cell = { version = "1.18", features = ["parking_lot"] }
parquet_file = { path = "../parquet_file" }
tempfile = "3"

View File

@ -16,8 +16,9 @@ option go_package = "github.com/influxdata/iox/querier/v1";
* `GetFlightInfo` followed by a `DoGet`).
*/
message ReadInfo {
// Namespace name.
string namespace_name = 1;
// Database name
// This used to be namespace_name
string database = 1;
// Query text (either SQL or InfluxQL, depending on query_type)
string sql_query = 2;

View File

@ -12,12 +12,12 @@ reqwest = { version = "0.11", default-features = false, features = ["stream", "j
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.96"
snafu = "0.7"
url = "2.3.1"
url = "2.4.0"
uuid = { version = "1", features = ["v4"] }
[dev-dependencies] # In alphabetical order
mockito = "1.0"
once_cell = { version = "1.17", features = ["parking_lot"] }
once_cell = { version = "1.18", features = ["parking_lot"] }
parking_lot = "0.12"
tokio = { version = "1.28", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
test_helpers = { path = "../test_helpers" }

View File

@ -64,7 +64,7 @@ itertools = "0.10.5"
lazy_static = "1.4.0"
libc = { version = "0.2" }
num_cpus = "1.15.0"
once_cell = { version = "1.17", features = ["parking_lot"] }
once_cell = { version = "1.18", features = ["parking_lot"] }
rustyline = { version = "11.0", default-features = false, features = ["with-file-history"]}
serde_json = "1.0.96"
snafu = "0.7"

View File

@ -1,67 +1,67 @@
-- Test Setup: OneMeasurementTwoSeries
-- SQL: select date_bin(INTERVAL '1 month', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq;
+---------------------------------------------------------------------------+
| datebin(IntervalMonthDayNano("79228162514264337593543950336"),sq.column1) |
+---------------------------------------------------------------------------+
+----------------------------------------------------------------------------+
| date_bin(IntervalMonthDayNano("79228162514264337593543950336"),sq.column1) |
+----------------------------------------------------------------------------+
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-02-01T00:00:00Z |
| 2022-02-01T00:00:00Z |
| 2022-03-01T00:00:00Z |
+---------------------------------------------------------------------------+
+----------------------------------------------------------------------------+
-- SQL: select date_bin('1 year', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2023-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq;
+------------------------------------+
| datebin(Utf8("1 year"),sq.column1) |
+------------------------------------+
+-------------------------------------+
| date_bin(Utf8("1 year"),sq.column1) |
+-------------------------------------+
| 2022-01-01T00:00:00Z |
| 2023-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
+------------------------------------+
+-------------------------------------+
-- SQL: select date_bin('1 month', column1, '1970-12-31T00:15:00Z') from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq;
+------------------------------------------------------------------+
| datebin(Utf8("1 month"),sq.column1,Utf8("1970-12-31T00:15:00Z")) |
+------------------------------------------------------------------+
+-------------------------------------------------------------------+
| date_bin(Utf8("1 month"),sq.column1,Utf8("1970-12-31T00:15:00Z")) |
+-------------------------------------------------------------------+
| 2021-12-31T00:15:00Z |
| 2021-12-31T00:15:00Z |
| 2021-12-31T00:15:00Z |
| 2022-01-31T00:15:00Z |
| 2022-01-31T00:15:00Z |
| 2022-02-28T00:15:00Z |
+------------------------------------------------------------------+
+-------------------------------------------------------------------+
-- SQL: SELECT DATE_BIN('5 month', '2022-01-01T00:00:00Z');
+-------------------------------------------------------+
| datebin(Utf8("5 month"),Utf8("2022-01-01T00:00:00Z")) |
+-------------------------------------------------------+
+--------------------------------------------------------+
| date_bin(Utf8("5 month"),Utf8("2022-01-01T00:00:00Z")) |
+--------------------------------------------------------+
| 2021-09-01T00:00:00Z |
+-------------------------------------------------------+
+--------------------------------------------------------+
-- SQL: SELECT DATE_BIN('3 month', '2022-04-01T00:00:00Z', '2021-05-31T00:04:00Z');
+------------------------------------------------------------------------------------+
| datebin(Utf8("3 month"),Utf8("2022-04-01T00:00:00Z"),Utf8("2021-05-31T00:04:00Z")) |
+------------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------------+
| date_bin(Utf8("3 month"),Utf8("2022-04-01T00:00:00Z"),Utf8("2021-05-31T00:04:00Z")) |
+-------------------------------------------------------------------------------------+
| 2022-02-28T00:04:00Z |
+------------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------------+
-- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-02-29T00:00:00');
+----------------------------------------------------------------------------------+
| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-02-29T00:00:00")) |
+----------------------------------------------------------------------------------+
+-----------------------------------------------------------------------------------+
| date_bin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-02-29T00:00:00")) |
+-----------------------------------------------------------------------------------+
| 2000-01-29T00:00:00Z |
+----------------------------------------------------------------------------------+
+-----------------------------------------------------------------------------------+
-- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-03-29T00:00:00');
+----------------------------------------------------------------------------------+
| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-03-29T00:00:00")) |
+----------------------------------------------------------------------------------+
+-----------------------------------------------------------------------------------+
| date_bin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-03-29T00:00:00")) |
+-----------------------------------------------------------------------------------+
| 2000-01-29T00:00:00Z |
+----------------------------------------------------------------------------------+
+-----------------------------------------------------------------------------------+
-- SQL: SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z');
+----------------------------------------------------------------+
| datebin(Utf8("3 years 1 months"),Utf8("2022-09-01 00:00:00Z")) |
+----------------------------------------------------------------+
+-----------------------------------------------------------------+
| date_bin(Utf8("3 years 1 months"),Utf8("2022-09-01 00:00:00Z")) |
+-----------------------------------------------------------------+
| 2022-06-01T00:00:00Z |
+----------------------------------------------------------------+
+-----------------------------------------------------------------+
-- SQL: SELECT * FROM cpu ORDER BY REGION, TIME;
+------+--------+----------------------+------+
| idle | region | time | user |
@ -103,14 +103,14 @@
----------
| plan_type | plan |
----------
| logical_plan | Projection: datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z")) AS month, COUNT(cpu.user) |
| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("79228162514264337593543950336"), cpu.time, TimestampNanosecond(31450500000000000, None)) AS datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))]], aggr=[[COUNT(cpu.user)]] |
| logical_plan | Projection: date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z")) AS month, COUNT(cpu.user) |
| | Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("79228162514264337593543950336"), cpu.time, TimestampNanosecond(31450500000000000, None)) AS date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))]], aggr=[[COUNT(cpu.user)]] |
| | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] |
| physical_plan | ProjectionExec: expr=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as month, COUNT(cpu.user)@1 as COUNT(cpu.user)] |
| | AggregateExec: mode=FinalPartitioned, gby=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] |
| physical_plan | ProjectionExec: expr=[date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as month, COUNT(cpu.user)@1 as COUNT(cpu.user)] |
| | AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([Column { name: "datebin(Utf8(\"1 month\"),cpu.time,Utf8(\"1970-12-31T00:15:00Z\"))", index: 0 }], 4), input_partitions=4 |
| | AggregateExec: mode=Partial, gby=[datebin(79228162514264337593543950336, time@0, 31450500000000000) as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] |
| | RepartitionExec: partitioning=Hash([Column { name: "date_bin(Utf8(\"1 month\"),cpu.time,Utf8(\"1970-12-31T00:15:00Z\"))", index: 0 }], 4), input_partitions=4 |
| | AggregateExec: mode=Partial, gby=[date_bin(79228162514264337593543950336, time@0, 31450500000000000) as date_bin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] |
| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 |

View File

@ -96,16 +96,15 @@
----------
| plan_type | plan |
----------
| Plan with Metrics | CoalescePartitionsExec, metrics=[elapsed_compute=1.234ms, output_rows=10] |
| | UnionExec, metrics=[elapsed_compute=1.234ms, output_rows=10] |
| Plan with Metrics | UnionExec, metrics=[elapsed_compute=1.234ms, output_rows=10] |
| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=5] |
| | FilterExec: state@4 = MA, metrics=[elapsed_compute=1.234ms, output_rows=5] |
| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1683, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] |
| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet], [1/1/1/00000000-0000-0000-0000-000000000001.parquet]]}, projection=[area, city, max_temp, min_temp, state, time], output_ordering=[state@4 ASC, city@1 ASC, time@5 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1683, elapsed_compute=1.234ms, file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0, output_rows=5, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=5, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] |
| | ProjectionExec: expr=[area@1 as area, city@2 as city, max_temp@3 as max_temp, min_temp@4 as min_temp, state@5 as state, time@6 as time], metrics=[elapsed_compute=1.234ms, output_rows=5] |
| | DeduplicateExec: [state@5 ASC,city@2 ASC,time@6 ASC], metrics=[elapsed_compute=1.234ms, num_dupes=2, output_rows=5] |
| | SortPreservingMergeExec: [state@5 ASC,city@2 ASC,time@6 ASC,__chunk_order@0 ASC], metrics=[elapsed_compute=1.234ms, output_rows=7] |
| | CoalesceBatchesExec: target_batch_size=8192, metrics=[elapsed_compute=1.234ms, output_rows=7] |
| | FilterExec: state@5 = MA, metrics=[elapsed_compute=1.234ms, output_rows=7] |
| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1532, elapsed_compute=1.234ms, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] |
| | ParquetExec: file_groups={2 groups: [[1/1/1/00000000-0000-0000-0000-000000000002.parquet], [1/1/1/00000000-0000-0000-0000-000000000003.parquet]]}, projection=[__chunk_order, area, city, max_temp, min_temp, state, time], output_ordering=[state@5 ASC, city@2 ASC, time@6 ASC, __chunk_order@0 ASC], predicate=state@4 = MA, pruning_predicate=state_min@0 <= MA AND MA <= state_max@1, metrics=[bytes_scanned=1532, elapsed_compute=1.234ms, file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0, output_rows=7, page_index_eval_time=1.234ms, page_index_rows_filtered=0, predicate_evaluation_errors=0, pushdown_eval_time=1.234ms, pushdown_rows_filtered=3, row_groups_pruned=0, time_elapsed_opening=1.234ms, time_elapsed_processing=1.234ms, time_elapsed_scanning_total=1.234ms, time_elapsed_scanning_until_data=1.234ms] |
| | |
----------

View File

@ -24,7 +24,7 @@
----------
| logical_plan | Projection: date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, COUNT(cpu.user) |
| | GapFill: groupBy=[[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) |
| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]] |
| | Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[COUNT(cpu.user)]] |
| | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] |
| physical_plan | ProjectionExec: expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0 as minute, COUNT(cpu.user)@1 as COUNT(cpu.user)] |
| | GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0], aggr_expr=[COUNT(cpu.user)@1], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") |
@ -33,7 +33,7 @@
| | AggregateExec: mode=FinalPartitioned, gby=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@0 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time)", index: 0 }], 4), input_partitions=4 |
| | AggregateExec: mode=Partial, gby=[datebin(600000000000, time@0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] |
| | AggregateExec: mode=Partial, gby=[date_bin(600000000000, time@0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[COUNT(cpu.user)] |
| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 |
@ -117,7 +117,7 @@ Error during planning: gap-filling query is missing lower time bound
----------
| logical_plan | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, AVG(cpu.user) AS locf(AVG(cpu.user)) |
| | GapFill: groupBy=[[cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[LOCF(AVG(cpu.user))]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) |
| | Aggregate: groupBy=[[cpu.region, datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]] |
| | Aggregate: groupBy=[[cpu.region, date_bin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]] |
| | TableScan: cpu projection=[region, time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] |
| physical_plan | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as minute, AVG(cpu.user)@2 as locf(AVG(cpu.user))] |
| | GapFillExec: group_expr=[region@0, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1], aggr_expr=[LOCF(AVG(cpu.user)@2)], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") |
@ -126,7 +126,7 @@ Error during planning: gap-filling query is missing lower time bound
| | AggregateExec: mode=FinalPartitioned, gby=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([Column { name: "region", index: 0 }, Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time)", index: 1 }], 4), input_partitions=4 |
| | AggregateExec: mode=Partial, gby=[region@0 as region, datebin(600000000000, time@1) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)], ordering_mode=PartiallyOrdered |
| | AggregateExec: mode=Partial, gby=[region@0 as region, date_bin(600000000000, time@1) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)], aggr=[AVG(cpu.user)], ordering_mode=PartiallyOrdered |
| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | FilterExec: time@1 >= 957528000000000000 AND time@1 <= 957531540000000000 |

View File

@ -185,15 +185,15 @@ impl Client {
Ok(self.inner.add_header(key, value)?)
}
/// Query the given namespace with the given SQL query, returning
/// Query the given database with the given SQL query, returning
/// a struct that can stream Arrow [`RecordBatch`] results.
pub async fn sql(
&mut self,
namespace_name: impl Into<String> + Send,
database: impl Into<String> + Send,
sql_query: impl Into<String> + Send,
) -> Result<IOxRecordBatchStream, Error> {
let request = ReadInfo {
namespace_name: namespace_name.into(),
database: database.into(),
sql_query: sql_query.into(),
query_type: QueryType::Sql.into(),
flightsql_command: vec![],
@ -203,15 +203,15 @@ impl Client {
self.do_get_with_read_info(request).await
}
/// Query the given namespace with the given InfluxQL query, returning
/// Query the given database with the given InfluxQL query, returning
/// a struct that can stream Arrow [`RecordBatch`] results.
pub async fn influxql(
&mut self,
namespace_name: impl Into<String> + Send,
database: impl Into<String> + Send,
influxql_query: impl Into<String> + Send,
) -> Result<IOxRecordBatchStream, Error> {
let request = ReadInfo {
namespace_name: namespace_name.into(),
database: database.into(),
sql_query: influxql_query.into(),
query_type: QueryType::InfluxQl.into(),
flightsql_command: vec![],

View File

@ -17,7 +17,7 @@ crossbeam-utils = "0.8.15"
data_types = { version = "0.1.0", path = "../data_types" }
datafusion.workspace = true
dml = { version = "0.1.0", path = "../dml" }
flatbuffers = "23.1.21"
flatbuffers = "23.5.26"
futures = "0.3.28"
generated_types = { version = "0.1.0", path = "../generated_types" }
hashbrown.workspace = true
@ -29,7 +29,7 @@ metric = { version = "0.1.0", path = "../metric" }
mutable_batch = { version = "0.1.0", path = "../mutable_batch" }
mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" }
observability_deps = { version = "0.1.0", path = "../observability_deps" }
once_cell = "1.17"
once_cell = "1.18"
parking_lot = "0.12.1"
parquet_file = { version = "0.1.0", path = "../parquet_file" }
pin-project = "1.1.0"

View File

@ -775,7 +775,7 @@ mod test {
- " GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0], aggr_expr=[AVG(temps.temp)@1], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")"
- " SortExec: expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 ASC]"
- " AggregateExec: mode=Final, gby=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]"
- " AggregateExec: mode=Partial, gby=[datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]"
- " AggregateExec: mode=Partial, gby=[date_bin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]"
- " EmptyExec: produce_one_row=false"
"###
);
@ -805,7 +805,7 @@ mod test {
- " GapFillExec: group_expr=[loc@0, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1, concat(Utf8(\"zz\"),temps.loc)@2], aggr_expr=[AVG(temps.temp)@3], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")"
- " SortExec: expr=[loc@0 ASC,concat(Utf8(\"zz\"),temps.loc)@2 ASC,date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 ASC]"
- " AggregateExec: mode=Final, gby=[loc@0 as loc, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(Utf8(\"zz\"),temps.loc)@2 as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]"
- " AggregateExec: mode=Partial, gby=[loc@1 as loc, datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]"
- " AggregateExec: mode=Partial, gby=[loc@1 as loc, date_bin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]"
- " EmptyExec: produce_one_row=false"
"###
);

View File

@ -860,7 +860,7 @@ mod test {
@r###"
---
- "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
- " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]"
- " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]"
- " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
- " TableScan: temps"
"###);
@ -890,7 +890,7 @@ mod test {
@r###"
---
- "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None)), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
- " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time, TimestampNanosecond(7, None)) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]]"
- " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time, TimestampNanosecond(7, None)) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(7, None))]], aggr=[[AVG(temps.temp)]]"
- " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
- " TableScan: temps"
"###);
@ -919,7 +919,7 @@ mod test {
@r###"
---
- "GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
- " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]]"
- " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), temps.loc]], aggr=[[AVG(temps.temp)]]"
- " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
- " TableScan: temps"
"###);
@ -971,7 +971,7 @@ mod test {
---
- "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp)"
- " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
- " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]"
- " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp)]]"
- " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
- " TableScan: temps"
"###);
@ -1006,7 +1006,7 @@ mod test {
---
- "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS locf(AVG(temps.temp)), MIN(temps.temp) AS locf(MIN(temps.temp))"
- " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[LOCF(AVG(temps.temp)), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
- " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
- " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
- " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
- " TableScan: temps"
"###);
@ -1040,7 +1040,7 @@ mod test {
---
- "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), MIN(temps.temp) AS locf(MIN(temps.temp)) AS locf_min_temp"
- " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
- " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
- " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
- " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
- " TableScan: temps"
"###);
@ -1075,7 +1075,7 @@ mod test {
---
- "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS interpolate(AVG(temps.temp)), MIN(temps.temp) AS interpolate(MIN(temps.temp))"
- " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[INTERPOLATE(AVG(temps.temp)), INTERPOLATE(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))"
- " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
- " Aggregate: groupBy=[[date_bin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]"
- " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)"
- " TableScan: temps"
"###);

View File

@ -314,7 +314,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: None,
output_ordering: vec![vec![]],
infinite_source: false,
};
let plan = Arc::new(

View File

@ -97,11 +97,11 @@ impl PhysicalOptimizerRule for ProjectionPushdown {
let output_ordering = child_parquet
.base_config()
.output_ordering
.as_ref()
.iter()
.map(|output_ordering| {
project_output_ordering(output_ordering, projection_exec.schema())
})
.transpose()?;
.collect::<Result<_>>()?;
let base_config = FileScanConfig {
projection: Some(projection),
output_ordering,
@ -729,7 +729,7 @@ mod tests {
projection: Some(projection),
limit: None,
table_partition_cols: vec![],
output_ordering: Some(vec![
output_ordering: vec![vec![
PhysicalSortExpr {
expr: expr_col("tag3", &schema_projected),
options: Default::default(),
@ -742,7 +742,7 @@ mod tests {
expr: expr_col("tag2", &schema_projected),
options: Default::default(),
},
]),
]],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, Some(expr_string_cmp("tag1", &schema)), None);
@ -1330,7 +1330,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: None,
output_ordering: vec![vec![]],
infinite_source: false,
};
let plan = Arc::new(ParquetExec::new(base_config, None, None));

View File

@ -126,7 +126,7 @@ impl<'a> TreeNodeRewriter for ParquetSortnessRewriter<'a> {
};
let base_config = parquet_exec.base_config();
if base_config.output_ordering.is_none() {
if base_config.output_ordering.is_empty() {
// no output ordering requested
return Ok(node);
}
@ -207,7 +207,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col2", "col1"], &schema)),
output_ordering: vec![ordering(["col2", "col1"], &schema)],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, None, None);
@ -242,7 +242,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)),
output_ordering: vec![ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, None, None);
@ -278,7 +278,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col2", "col1"], &schema)),
output_ordering: vec![ordering(["col2", "col1"], &schema)],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, None, None);
@ -320,7 +320,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col2", "col1"], &schema)),
output_ordering: vec![ordering(["col2", "col1"], &schema)],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, None, None);
@ -355,7 +355,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col1", "col2"], &schema)),
output_ordering: vec![ordering(["col1", "col2"], &schema)],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, None, None);
@ -390,7 +390,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: None,
output_ordering: vec![vec![]],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, None, None);
@ -425,7 +425,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col2", "col1"], &schema)),
output_ordering: vec![ordering(["col2", "col1"], &schema)],
infinite_source: false,
};
let inner = ParquetExec::new(base_config, None, None);
@ -489,7 +489,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col2", "col1"], &schema)),
output_ordering: vec![ordering(["col2", "col1"], &schema)],
infinite_source: false,
};
let plan = Arc::new(ParquetExec::new(base_config, None, None));
@ -518,7 +518,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col1", "col2"], &schema)),
output_ordering: vec![ordering(["col1", "col2"], &schema)],
infinite_source: false,
};
let plan = Arc::new(ParquetExec::new(base_config, None, None));
@ -555,7 +555,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col1", "col2"], &schema)),
output_ordering: vec![ordering(["col1", "col2"], &schema)],
infinite_source: false,
};
let plan = Arc::new(ParquetExec::new(base_config, None, None));
@ -593,7 +593,7 @@ mod tests {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: Some(ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)),
output_ordering: vec![ordering(["col2", "col1", CHUNK_ORDER_COLUMN_NAME], &schema)],
infinite_source: false,
};
let plan_parquet = Arc::new(ParquetExec::new(base_config, None, None));

View File

@ -301,6 +301,9 @@ pub fn chunks_to_physical_nodes(
is_exact: false,
};
// No sort order is represented by an empty Vec
let output_ordering = vec![output_ordering.unwrap_or_default()];
let base_config = FileScanConfig {
object_store_url,
file_schema,

View File

@ -210,7 +210,7 @@ fn get_aggregate(stats: &ColumnStatistics, aggregate: Aggregate) -> Option<&Scal
#[cfg(test)]
mod test {
use std::sync::Arc;
use std::{ops::Not, sync::Arc};
use datafusion::prelude::{col, lit};
use datafusion_util::lit_dict;

View File

@ -2541,70 +2541,70 @@ mod test {
Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
Filter: all_types.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: cpu.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: disk.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
Filter: diskio.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
Filter: merge_00.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
Filter: merge_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_02.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_03.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
@ -2615,70 +2615,70 @@ mod test {
Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
Filter: all_types.time >= TimestampNanosecond(1338, None) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: cpu.time >= TimestampNanosecond(1338, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
Filter: data.time >= TimestampNanosecond(1338, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: disk.time >= TimestampNanosecond(1338, None) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
Filter: diskio.time >= TimestampNanosecond(1338, None) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
Filter: merge_00.time >= TimestampNanosecond(1338, None) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
Filter: merge_01.time >= TimestampNanosecond(1338, None) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_01.time >= TimestampNanosecond(1338, None) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_02.time >= TimestampNanosecond(1338, None) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: makearray(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_03.time >= TimestampNanosecond(1338, None) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
@ -2774,7 +2774,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time AS time, value AS value [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, value:Float64;N]
Sort: time ASC NULLS LAST [time:Timestamp(Nanosecond, None);N, value:Float64;N]
Projection: time, AVG(cpu.usage_idle) AS value [time:Timestamp(Nanosecond, None);N, value:Float64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
}
@ -2954,7 +2954,7 @@ mod test {
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
@ -2963,7 +2963,7 @@ mod test {
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time), Struct({value:Float64(0),time:TimestampNanosecond(0, None)})))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
@ -3539,7 +3539,7 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
@ -3547,102 +3547,137 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill() {
fn group_by_time_gapfill_no_bounds() {
// No time bounds
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_no_lower_time_bounds() {
// No lower time bounds
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1667181719999999999, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_no_upper_time_bounds() {
// No upper time bounds
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_defaul_is_fill_null1() {
// Default is FILL(null)
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Included(TimestampNanosecond(1667181719999999999, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_default_is_fill_null1() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_default_is_fill_null2() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_default_is_fill_null3() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_default_is_fill_null4() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(0)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_default_is_fill_null5() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_coalesces_the_fill_value() {
// Coalesces the fill value, which is a float, to the matching type of a `COUNT` aggregate.
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn group_by_time_gapfill_aggregates_part_of_binary_expression() {
// Aggregates as part of a binary expression
assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3659,6 +3694,10 @@ mod test {
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn with_limit_or_offset2() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo OFFSET 1"), @r###"
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Projection: iox::measurement, time, foo, count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
@ -3669,6 +3708,10 @@ mod test {
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn with_limit_or_offset3() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo LIMIT 2 OFFSET 3"), @r###"
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Projection: iox::measurement, time, foo, count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
@ -3679,7 +3722,10 @@ mod test {
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn with_limit_or_offset_errors() {
// Fallible
// returns an error if LIMIT or OFFSET values exceed i64::MAX
@ -3699,7 +3745,7 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[datebin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}

View File

@ -12,7 +12,7 @@ tracing-subscriber = "0.3"
workspace-hack = { version = "0.1", path = "../workspace-hack" }
[dev-dependencies] # In alphabetical order
once_cell = { version = "1.17", features = ["parking_lot"] }
once_cell = { version = "1.18", features = ["parking_lot"] }
parking_lot = "0.12"
regex = "1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View File

@ -22,12 +22,13 @@ use crate::{
};
/// An error generating a partition key for a row.
#[allow(missing_copy_implementations)]
#[derive(Debug, Error, PartialEq, Eq)]
pub enum PartitionKeyError {
/// The partition template defines a [`Template::TimeFormat`] part, but the
/// provided strftime formatter is invalid.
#[error("invalid strftime format in partition template: {0}")]
InvalidStrftime(String),
#[error("invalid strftime format in partition template")]
InvalidStrftime,
/// The partition template defines a [`Template::TagValue`] part, but the
/// column type is not "tag".
@ -93,7 +94,7 @@ impl<'a> Template<'a> {
Utc.timestamp_nanos(t[idx])
.format_with_items(format.clone()) // Cheap clone of refs
)
.map_err(|_| PartitionKeyError::InvalidStrftime(format!("{format:?}")))?;
.map_err(|_| PartitionKeyError::InvalidStrftime)?;
out.write_str(
Cow::from(utf8_percent_encode(
@ -145,9 +146,18 @@ fn partition_keys<'a>(
})
.collect::<Vec<_>>();
// Track the length of the last yielded partition key, and pre-allocate the
// next partition key string to match it.
//
// In the happy path, keys of consistent sizes are generated and the
// allocations reach a minimum. If the keys are inconsistent, at best a
// subset of allocations are eliminated, and at worst, a few bytes of memory
// is temporarily allocated until the resulting string is shrunk down.
let mut last_len = 5;
// Yield a partition key string for each row in `batch`
(0..batch.row_count).map(move |idx| {
let mut string = String::new();
let mut string = String::with_capacity(last_len);
// Evaluate each template part for this row
for (col_idx, col) in template.iter().enumerate() {
@ -160,6 +170,8 @@ fn partition_keys<'a>(
}
}
last_len = string.len();
string.shrink_to_fit();
Ok(string)
})
}
@ -474,7 +486,7 @@ mod tests {
let ret = partition_keys(&batch, template.parts()).collect::<Result<Vec<_>, _>>();
assert_matches!(ret, Err(PartitionKeyError::InvalidStrftime(_)));
assert_matches!(ret, Err(PartitionKeyError::InvalidStrftime));
}
// These values are arbitrarily chosen when building an input to the
@ -605,7 +617,7 @@ mod tests {
// properties:
match ret {
Ok(v) => { assert_eq!(v.len(), 1); },
Err(e) => { assert_matches!(e, PartitionKeyError::InvalidStrftime(_)); },
Err(e) => { assert_matches!(e, PartitionKeyError::InvalidStrftime); },
}
}
}

View File

@ -135,7 +135,7 @@ impl ParquetExecInput {
limit: None,
table_partition_cols: vec![],
// Parquet files ARE actually sorted but we don't care here since we just construct a `collect` plan.
output_ordering: None,
output_ordering: vec![vec![]],
infinite_source: false,
};
let exec = ParquetExec::new(base_config, None, None);

View File

@ -253,7 +253,7 @@ impl ParquetFileReader {
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: None,
output_ordering: vec![vec![]],
infinite_source: false,
};

View File

@ -126,7 +126,7 @@ pub(crate) fn df_to_scalar(
#[cfg(test)]
mod tests {
use std::sync::Arc;
use std::{ops::Not, sync::Arc};
use arrow::datatypes::Field;
use test_helpers::assert_contains;

View File

@ -34,6 +34,7 @@ use schema::TIME_COLUMN_NAME;
use std::{
collections::{BTreeSet, HashSet},
fmt,
ops::Not,
};
/// This `Predicate` represents the empty predicate (aka that evaluates to true for all rows).

View File

@ -255,7 +255,7 @@ fn is_col_op_lit(expr: &Expr) -> Option<&str> {
#[cfg(test)]
mod tests {
use std::ops::Add;
use std::ops::{Add, Not};
use super::*;
use arrow::datatypes::DataType;

View File

@ -973,7 +973,7 @@ async fn test_invalid_strftime_partition_template() {
assert_matches!(
got,
Err(Error::DmlHandler(DmlError::Partition(
PartitionError::Partitioner(PartitionKeyError::InvalidStrftime(_))
PartitionError::Partitioner(PartitionKeyError::InvalidStrftime)
)))
);

View File

@ -497,7 +497,7 @@ where
};
let request = request?;
let namespace_name = request.namespace_name();
let namespace_name = request.database();
let query = request.query();
is_debug |= request.is_debug();

View File

@ -47,11 +47,11 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
///
/// ## Example JSON Ticket format
///
/// This runs the SQL "SELECT 1" in namespace `my_db`
/// This runs the SQL "SELECT 1" in database `my_db`
///
/// ```json
/// {
/// "namespace_name": "my_db",
/// "database": "my_db",
/// "sql_query": "SELECT 1;"
/// }
/// ```
@ -60,7 +60,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
///
/// ```json
/// {
/// "namespace_name": "my_db",
/// "database": "my_db",
/// "sql_query": "SELECT 1;"
/// "query_type": "sql"
/// }
@ -70,14 +70,14 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
///
/// ```json
/// {
/// "namespace_name": "my_db",
/// "database": "my_db",
/// "sql_query": "SHOW DATABASES;"
/// "query_type": "influxql"
/// }
/// ```
#[derive(Debug, PartialEq, Clone)]
pub struct IoxGetRequest {
namespace_name: String,
database: String,
query: RunQuery,
is_debug: bool,
}
@ -116,9 +116,9 @@ impl Display for RunQuery {
impl IoxGetRequest {
/// Create a new request to run the specified query
pub fn new(namespace_name: impl Into<String>, query: RunQuery, is_debug: bool) -> Self {
pub fn new(database: impl Into<String>, query: RunQuery, is_debug: bool) -> Self {
Self {
namespace_name: namespace_name.into(),
database: database.into(),
query,
is_debug,
}
@ -142,21 +142,21 @@ impl IoxGetRequest {
/// Encode the request as a protobuf Ticket
pub fn try_encode(self) -> Result<Ticket> {
let Self {
namespace_name,
database,
query,
is_debug,
} = self;
let read_info = match query {
RunQuery::Sql(sql_query) => proto::ReadInfo {
namespace_name,
database,
sql_query,
query_type: QueryType::Sql.into(),
flightsql_command: vec![],
is_debug,
},
RunQuery::InfluxQL(influxql) => proto::ReadInfo {
namespace_name,
database,
// field name is misleading
sql_query: influxql,
query_type: QueryType::InfluxQl.into(),
@ -164,7 +164,7 @@ impl IoxGetRequest {
is_debug,
},
RunQuery::FlightSQL(flightsql_command) => proto::ReadInfo {
namespace_name,
database,
sql_query: "".into(),
query_type: QueryType::FlightSqlMessage.into(),
flightsql_command: flightsql_command
@ -189,8 +189,8 @@ impl IoxGetRequest {
/// This represents ths JSON fields
#[derive(Deserialize, Debug)]
struct ReadInfoJson {
#[serde(alias = "database", alias = "bucket", alias = "bucket-name")]
namespace_name: String,
#[serde(alias = "namespace_name", alias = "bucket", alias = "bucket-name")]
database: String,
sql_query: String,
// If query type is not supplied, defaults to SQL
query_type: Option<String>,
@ -199,7 +199,7 @@ impl IoxGetRequest {
}
let ReadInfoJson {
namespace_name,
database,
sql_query,
query_type,
is_debug,
@ -221,7 +221,7 @@ impl IoxGetRequest {
};
Ok(Self {
namespace_name,
database,
query,
is_debug,
})
@ -233,7 +233,7 @@ impl IoxGetRequest {
let query_type = read_info.query_type();
let proto::ReadInfo {
namespace_name,
database,
sql_query,
query_type: _,
flightsql_command,
@ -241,7 +241,7 @@ impl IoxGetRequest {
} = read_info;
Ok(Self {
namespace_name,
database,
query: match query_type {
QueryType::Unspecified | QueryType::Sql => {
if !flightsql_command.is_empty() {
@ -277,8 +277,8 @@ impl IoxGetRequest {
})
}
pub fn namespace_name(&self) -> &str {
self.namespace_name.as_ref()
pub fn database(&self) -> &str {
self.database.as_ref()
}
pub fn query(&self) -> &RunQuery {
@ -306,10 +306,10 @@ mod tests {
// - <https://github.com/influxdata/influxdb-iox-client-go/commit/52f1a1b8d5bb8cc8dc2fe825f4da630ad0b9167c
//
// Do not change this test without having first changed what the Go clients are sending!
let ticket = make_json_ticket(r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;"}"#);
let ticket = make_json_ticket(r#"{"database": "my_db", "sql_query": "SELECT 1;"}"#);
let ri = IoxGetRequest::try_decode(ticket).unwrap();
assert_eq!(ri.namespace_name, "my_db");
assert_eq!(ri.database, "my_db");
assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1;"));
}
@ -321,22 +321,22 @@ mod tests {
}
impl TestCase {
fn new_sql(json: &'static str, expected_namespace: &str, query: &str) -> Self {
fn new_sql(json: &'static str, expected_database: &str, query: &str) -> Self {
Self {
json,
expected: IoxGetRequest {
namespace_name: String::from(expected_namespace),
database: String::from(expected_database),
query: RunQuery::Sql(String::from(query)),
is_debug: false,
},
}
}
fn new_influxql(json: &'static str, expected_namespace: &str, query: &str) -> Self {
fn new_influxql(json: &'static str, expected_database: &str, query: &str) -> Self {
Self {
json,
expected: IoxGetRequest {
namespace_name: String::from(expected_namespace),
database: String::from(expected_database),
query: RunQuery::InfluxQL(String::from(query)),
is_debug: false,
},
@ -346,63 +346,130 @@ mod tests {
let cases = vec![
// implict `query_type`
TestCase::new_sql(
r#"{"database": "my_db", "sql_query": "SELECT 1;"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"bucket": "my_db", "sql_query": "SELECT 1;"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;"}"#,
"my_db",
"SELECT 1;",
),
// explicit query type, sql
TestCase::new_sql(
r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
"my_db",
"SELECT 1;",
),
// explicit query type null
TestCase::new_sql(
r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_sql(
r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": null}"#,
"my_db",
"SELECT 1;",
),
// explicit query type, influxql
TestCase::new_influxql(
r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_influxql(
r#"{"namespace_name": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_influxql(
r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#,
"my_db",
"SELECT 1;",
),
TestCase::new_influxql(
r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "influxql"}"#,
"my_db",
"SELECT 1;",
),
// explicit query type, influxql on metadata
TestCase::new_influxql(
r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
TestCase::new_influxql(
r#"{"namespace_name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
TestCase::new_influxql(
r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
// influxql bucket metadata
TestCase::new_influxql(
r#"{"bucket": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
// influxql bucket-name metadata
TestCase::new_influxql(
r#"{"bucket-name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "influxql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
// sql database metadata
// explicit query type, sql on metadata
TestCase::new_sql(
r#"{"database": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
"my_db",
"SELECT 1;",
r#"{"database": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
// sql bucket metadata
TestCase::new_sql(
r#"{"bucket": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
"my_db",
"SELECT 1;",
r#"{"namespace_name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
// sql bucket-name metadata
TestCase::new_sql(
r#"{"bucket-name": "my_db", "sql_query": "SELECT 1;", "query_type": "sql"}"#,
"my_db",
"SELECT 1;",
r#"{"bucket": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
TestCase::new_sql(
r#"{"bucket-name": "my_otherdb", "sql_query": "SHOW DATABASES;", "query_type": "sql"}"#,
"my_otherdb",
"SHOW DATABASES;",
),
];
@ -446,7 +513,7 @@ mod tests {
#[test]
fn proto_ticket_decoding_unspecified() {
let ticket = make_proto_ticket(&proto::ReadInfo {
namespace_name: "<foo>_<bar>".to_string(),
database: "<foo>_<bar>".to_string(),
sql_query: "SELECT 1".to_string(),
query_type: QueryType::Unspecified.into(),
flightsql_command: vec![],
@ -455,14 +522,14 @@ mod tests {
// Reverts to default (unspecified) for invalid query_type enumeration, and thus SQL
let ri = IoxGetRequest::try_decode(ticket).unwrap();
assert_eq!(ri.namespace_name, "<foo>_<bar>");
assert_eq!(ri.database, "<foo>_<bar>");
assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1"));
}
#[test]
fn proto_ticket_decoding_sql() {
let ticket = make_proto_ticket(&proto::ReadInfo {
namespace_name: "<foo>_<bar>".to_string(),
database: "<foo>_<bar>".to_string(),
sql_query: "SELECT 1".to_string(),
query_type: QueryType::Sql.into(),
flightsql_command: vec![],
@ -470,14 +537,14 @@ mod tests {
});
let ri = IoxGetRequest::try_decode(ticket).unwrap();
assert_eq!(ri.namespace_name, "<foo>_<bar>");
assert_eq!(ri.database, "<foo>_<bar>");
assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1"));
}
#[test]
fn proto_ticket_decoding_influxql() {
let ticket = make_proto_ticket(&proto::ReadInfo {
namespace_name: "<foo>_<bar>".to_string(),
database: "<foo>_<bar>".to_string(),
sql_query: "SELECT 1".to_string(),
query_type: QueryType::InfluxQl.into(),
flightsql_command: vec![],
@ -485,14 +552,14 @@ mod tests {
});
let ri = IoxGetRequest::try_decode(ticket).unwrap();
assert_eq!(ri.namespace_name, "<foo>_<bar>");
assert_eq!(ri.database, "<foo>_<bar>");
assert_matches!(ri.query, RunQuery::InfluxQL(query) => assert_eq!(query, "SELECT 1"));
}
#[test]
fn proto_ticket_decoding_too_new() {
let ticket = make_proto_ticket(&proto::ReadInfo {
namespace_name: "<foo>_<bar>".to_string(),
database: "<foo>_<bar>".to_string(),
sql_query: "SELECT 1".into(),
query_type: 42, // not a known query type
flightsql_command: vec![],
@ -501,14 +568,14 @@ mod tests {
// Reverts to default (unspecified) for invalid query_type enumeration, and thus SQL
let ri = IoxGetRequest::try_decode(ticket).unwrap();
assert_eq!(ri.namespace_name, "<foo>_<bar>");
assert_eq!(ri.database, "<foo>_<bar>");
assert_matches!(ri.query, RunQuery::Sql(query) => assert_eq!(query, "SELECT 1"));
}
#[test]
fn proto_ticket_decoding_sql_too_many_fields() {
let ticket = make_proto_ticket(&proto::ReadInfo {
namespace_name: "<foo>_<bar>".to_string(),
database: "<foo>_<bar>".to_string(),
sql_query: "SELECT 1".to_string(),
query_type: QueryType::Sql.into(),
// can't have both sql_query and flightsql
@ -523,7 +590,7 @@ mod tests {
#[test]
fn proto_ticket_decoding_influxql_too_many_fields() {
let ticket = make_proto_ticket(&proto::ReadInfo {
namespace_name: "<foo>_<bar>".to_string(),
database: "<foo>_<bar>".to_string(),
sql_query: "SELECT 1".to_string(),
query_type: QueryType::InfluxQl.into(),
// can't have both sql_query and flightsql
@ -538,7 +605,7 @@ mod tests {
#[test]
fn proto_ticket_decoding_flightsql_too_many_fields() {
let ticket = make_proto_ticket(&proto::ReadInfo {
namespace_name: "<foo>_<bar>".to_string(),
database: "<foo>_<bar>".to_string(),
sql_query: "SELECT 1".to_string(),
query_type: QueryType::FlightSqlMessage.into(),
// can't have both sql_query and flightsql
@ -564,7 +631,7 @@ mod tests {
#[test]
fn round_trip_sql() {
let request = IoxGetRequest {
namespace_name: "foo_blarg".into(),
database: "foo_blarg".into(),
query: RunQuery::Sql("select * from bar".into()),
is_debug: false,
};
@ -579,7 +646,7 @@ mod tests {
#[test]
fn round_trip_sql_is_debug() {
let request = IoxGetRequest {
namespace_name: "foo_blarg".into(),
database: "foo_blarg".into(),
query: RunQuery::Sql("select * from bar".into()),
is_debug: true,
};
@ -594,7 +661,7 @@ mod tests {
#[test]
fn round_trip_influxql() {
let request = IoxGetRequest {
namespace_name: "foo_blarg".into(),
database: "foo_blarg".into(),
query: RunQuery::InfluxQL("select * from bar".into()),
is_debug: false,
};
@ -613,7 +680,7 @@ mod tests {
});
let request = IoxGetRequest {
namespace_name: "foo_blarg".into(),
database: "foo_blarg".into(),
query: RunQuery::FlightSQL(cmd),
is_debug: false,
};

View File

@ -25,7 +25,7 @@ arrow = { workspace = true, features = ["prettyprint"] }
futures = "0.3"
pin-project = "1.1"
prost = "0.11"
regex = "1.8.3"
regex = "1.8.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.96"
snafu = "0.7"

View File

@ -24,7 +24,7 @@ mutable_batch_lp = { path = "../mutable_batch_lp" }
mutable_batch_pb = { path = "../mutable_batch_pb" }
nix = "0.26"
observability_deps = { path = "../observability_deps" }
once_cell = { version = "1.17", features = ["parking_lot"] }
once_cell = { version = "1.18", features = ["parking_lot"] }
parking_lot = "0.12"
prost = "0.11"
rand = "0.8.3"

View File

@ -30,9 +30,9 @@ bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
crossbeam-utils = { version = "0.8" }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "ee1019d9585252066ef5b288c84aabebcbd93ca6", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "859251b4a20e00c3dfe73eee6b605fcf722687e7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
digest = { version = "0.10", features = ["mac", "std"] }
either = { version = "1" }
fixedbitset = { version = "0.4" }
@ -81,6 +81,7 @@ smallvec = { version = "1", default-features = false, features = ["union"] }
sqlparser = { version = "0.34", features = ["visitor"] }
sqlx = { version = "0.6", features = ["json", "postgres", "runtime-tokio-rustls", "sqlite", "tls", "uuid"] }
sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
strum = { version = "0.24", features = ["derive"] }
thrift = { version = "0.17" }
tokio = { version = "1", features = ["full", "test-util", "tracing"] }
tokio-stream = { version = "0.1", features = ["fs", "net"] }
@ -91,6 +92,8 @@ tracing = { version = "0.1", features = ["log", "max_level_trace", "release_max_
tracing-core = { version = "0.1" }
tracing-log = { version = "0.1" }
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "parking_lot"] }
unicode-bidi = { version = "0.3" }
unicode-normalization = { version = "0.1" }
url = { version = "2" }
uuid = { version = "1", features = ["v4"] }
zstd = { version = "0.12" }
@ -150,6 +153,8 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-trai
syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }
tokio = { version = "1", features = ["full", "test-util", "tracing"] }
tokio-stream = { version = "0.1", features = ["fs", "net"] }
unicode-bidi = { version = "0.3" }
unicode-normalization = { version = "0.1" }
url = { version = "2" }
uuid = { version = "1", features = ["v4"] }