Merge remote-tracking branch 'origin/main' into jrb_63_compactor_spans
# Conflicts: # compactor/src/driver.rspull/24376/head
commit
803122e3b4
|
@ -161,8 +161,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
|||
|
||||
[[package]]
|
||||
name = "arrow"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow-arith",
|
||||
|
@ -182,8 +183,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-arith"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
|
@ -196,8 +198,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-array"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow-buffer",
|
||||
|
@ -212,8 +215,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-buffer"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722"
|
||||
dependencies = [
|
||||
"half 2.3.1",
|
||||
"num",
|
||||
|
@ -221,8 +225,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-cast"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
|
@ -238,8 +243,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-csv"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
|
@ -256,8 +262,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-data"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b"
|
||||
dependencies = [
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
|
@ -267,8 +274,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-flight"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1128a9f099b4e8dc9a67aed274061f3cc95afd8b7aab98f2b44cb8b7b542b71"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
|
@ -293,8 +301,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-ipc"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
|
@ -306,8 +315,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-json"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
|
@ -316,7 +326,7 @@ dependencies = [
|
|||
"arrow-schema",
|
||||
"chrono",
|
||||
"half 2.3.1",
|
||||
"indexmap 1.9.3",
|
||||
"indexmap 2.0.0",
|
||||
"lexical-core",
|
||||
"num",
|
||||
"serde",
|
||||
|
@ -325,8 +335,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-ord"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
|
@ -339,8 +350,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-row"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow-array",
|
||||
|
@ -353,13 +365,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-schema"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e"
|
||||
|
||||
[[package]]
|
||||
name = "arrow-select"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
|
@ -370,16 +384,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow-string"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"num",
|
||||
"regex",
|
||||
"regex-syntax 0.7.3",
|
||||
"regex-syntax 0.7.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1346,7 +1362,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1394,7 +1410,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
|
@ -1408,7 +1424,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-execution"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"dashmap",
|
||||
"datafusion-common",
|
||||
|
@ -1425,7 +1441,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-expr"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1439,7 +1455,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-optimizer"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
@ -1450,19 +1466,20 @@ dependencies = [
|
|||
"hashbrown 0.14.0",
|
||||
"itertools 0.11.0",
|
||||
"log",
|
||||
"regex-syntax 0.7.3",
|
||||
"regex-syntax 0.7.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-physical-expr"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"base64 0.21.2",
|
||||
"blake2",
|
||||
"blake3",
|
||||
"chrono",
|
||||
|
@ -1471,6 +1488,7 @@ dependencies = [
|
|||
"datafusion-row",
|
||||
"half 2.3.1",
|
||||
"hashbrown 0.14.0",
|
||||
"hex",
|
||||
"indexmap 2.0.0",
|
||||
"itertools 0.11.0",
|
||||
"lazy_static",
|
||||
|
@ -1488,7 +1506,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-proto"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
|
@ -1502,7 +1520,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-row"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
|
@ -1513,7 +1531,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-sql"
|
||||
version = "27.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=46752163bd4f30f778850160513e8ca7f15fcf14#46752163bd4f30f778850160513e8ca7f15fcf14"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=04ecaf7405dbbfd43f43acec972f2435ada5ee81#04ecaf7405dbbfd43f43acec972f2435ada5ee81"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
|
@ -1715,7 +1733,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"rustix 0.38.3",
|
||||
"rustix 0.38.4",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
@ -2002,6 +2020,24 @@ version = "0.3.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "gossip"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures",
|
||||
"hashbrown 0.14.0",
|
||||
"metric",
|
||||
"prost",
|
||||
"prost-build",
|
||||
"test_helpers",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpc-binary-logger"
|
||||
version = "0.1.0"
|
||||
|
@ -2279,7 +2315,7 @@ checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7"
|
|||
dependencies = [
|
||||
"http",
|
||||
"hyper",
|
||||
"rustls 0.21.3",
|
||||
"rustls 0.21.5",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.1",
|
||||
]
|
||||
|
@ -3114,7 +3150,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"rustix 0.38.3",
|
||||
"rustix 0.38.4",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
@ -3851,8 +3887,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "parquet"
|
||||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
version = "43.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow-array",
|
||||
|
@ -4397,7 +4434,7 @@ dependencies = [
|
|||
"itertools 0.11.0",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"regex-syntax 0.7.3",
|
||||
"regex-syntax 0.7.4",
|
||||
"schema",
|
||||
"snafu",
|
||||
"tokio",
|
||||
|
@ -4531,7 +4568,7 @@ dependencies = [
|
|||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata 0.3.2",
|
||||
"regex-syntax 0.7.3",
|
||||
"regex-syntax 0.7.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4551,7 +4588,7 @@ checksum = "83d3daa6976cffb758ec878f108ba0e062a45b2d6ca3a2cca965338855476caf"
|
|||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax 0.7.3",
|
||||
"regex-syntax 0.7.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4562,9 +4599,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
|||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.3"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
|
@ -4589,7 +4626,7 @@ dependencies = [
|
|||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.3",
|
||||
"rustls 0.21.5",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
@ -4716,9 +4753,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.3"
|
||||
version = "0.38.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4"
|
||||
checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
|
||||
dependencies = [
|
||||
"bitflags 2.3.3",
|
||||
"errno",
|
||||
|
@ -4741,9 +4778,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.21.3"
|
||||
version = "0.21.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b19faa85ecb5197342b54f987b142fb3e30d0c90da40f80ef4fa9a726e6676ed"
|
||||
checksum = "79ea77c539259495ce8ca47f53e66ae0330a8819f67e23ac96ca02f50e7b7d36"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring",
|
||||
|
@ -4884,9 +4921,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.100"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
|
||||
checksum = "b5062a995d481b2308b6064e9af76011f2921c35f97b0468811ed9f6cd91dfed"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -5767,7 +5804,7 @@ version = "0.24.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
|
||||
dependencies = [
|
||||
"rustls 0.21.3",
|
||||
"rustls 0.21.5",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
|
@ -6100,6 +6137,7 @@ dependencies = [
|
|||
"parking_lot 0.12.1",
|
||||
"pin-project",
|
||||
"sysinfo",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"trace",
|
||||
|
@ -6688,6 +6726,7 @@ dependencies = [
|
|||
"hashbrown 0.14.0",
|
||||
"heck",
|
||||
"indexmap 1.9.3",
|
||||
"indexmap 2.0.0",
|
||||
"itertools 0.10.5",
|
||||
"libc",
|
||||
"lock_api",
|
||||
|
@ -6711,11 +6750,11 @@ dependencies = [
|
|||
"rand_core",
|
||||
"regex",
|
||||
"regex-automata 0.3.2",
|
||||
"regex-syntax 0.7.3",
|
||||
"regex-syntax 0.7.4",
|
||||
"reqwest",
|
||||
"ring",
|
||||
"rustix 0.38.3",
|
||||
"rustls 0.21.3",
|
||||
"rustix 0.38.4",
|
||||
"rustls 0.21.5",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
35
Cargo.toml
35
Cargo.toml
|
@ -17,6 +17,7 @@ members = [
|
|||
"flightsql",
|
||||
"garbage_collector",
|
||||
"generated_types",
|
||||
"gossip",
|
||||
"grpc-binary-logger-proto",
|
||||
"grpc-binary-logger-test-proto",
|
||||
"grpc-binary-logger",
|
||||
|
@ -29,9 +30,9 @@ members = [
|
|||
"influxdb_tsm",
|
||||
"influxdb2_client",
|
||||
"influxrpc_parser",
|
||||
"ingester_query_grpc",
|
||||
"ingester_test_ctx",
|
||||
"ingester",
|
||||
"ingester_query_grpc",
|
||||
"iox_catalog",
|
||||
"iox_data_generator",
|
||||
"iox_query_influxql",
|
||||
|
@ -82,8 +83,8 @@ members = [
|
|||
"trace",
|
||||
"tracker",
|
||||
"trogging",
|
||||
"wal",
|
||||
"wal_inspect",
|
||||
"wal",
|
||||
"workspace-hack",
|
||||
]
|
||||
default-members = ["influxdb_iox"]
|
||||
|
@ -118,13 +119,14 @@ edition = "2021"
|
|||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
arrow = { version = "42.0.0" }
|
||||
arrow-flight = { version = "42.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46752163bd4f30f778850160513e8ca7f15fcf14", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46752163bd4f30f778850160513e8ca7f15fcf14" }
|
||||
arrow = { version = "43.0.0" }
|
||||
arrow-flight = { version = "43.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "04ecaf7405dbbfd43f43acec972f2435ada5ee81", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "04ecaf7405dbbfd43f43acec972f2435ada5ee81" }
|
||||
|
||||
hashbrown = { version = "0.14.0" }
|
||||
object_store = { version = "0.6.0" }
|
||||
parquet = { version = "42.0.0" }
|
||||
parquet = { version = "43.0.0" }
|
||||
tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] }
|
||||
tonic-build = { version = "0.9.2" }
|
||||
tonic-health = { version = "0.9.2" }
|
||||
|
@ -154,22 +156,3 @@ opt-level = 3
|
|||
|
||||
[profile.dev.package.similar]
|
||||
opt-level = 3
|
||||
|
||||
[patch.crates-io]
|
||||
# TODO remove on upgrade to 43.0.0
|
||||
# Use https://github.com/apache/arrow-rs/pull/4467 to get the fix for
|
||||
# https://github.com/apache/arrow-rs/issues/4459
|
||||
parquet = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-data = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-array = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-select = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-ipc = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-row = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-arith = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-string = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched" }
|
||||
|
|
|
@ -7,8 +7,8 @@ use datafusion::{
|
|||
execution::context::TaskContext,
|
||||
physical_expr::PhysicalSortExpr,
|
||||
physical_plan::{
|
||||
stream::RecordBatchStreamAdapter, ExecutionPlan, Partitioning, SendableRecordBatchStream,
|
||||
Statistics,
|
||||
stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan,
|
||||
Partitioning, SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
};
|
||||
use schema::SchemaBuilder;
|
||||
|
@ -93,6 +93,16 @@ impl ExecutionPlan for PanicPlan {
|
|||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for PanicPlan {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "PanicPlan")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use data_types::CompactionLevel;
|
||||
|
|
|
@ -5,7 +5,7 @@ use std::{
|
|||
|
||||
use async_trait::async_trait;
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::{ParquetFile, PartitionId};
|
||||
use data_types::{ParquetFile, PartitionId, TransitionPartitionId};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use observability_deps::tracing::warn;
|
||||
|
||||
|
@ -61,7 +61,7 @@ impl CatalogQuerier for Arc<dyn Catalog> {
|
|||
self.repositories()
|
||||
.await
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(partition_id)
|
||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition_id))
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,8 +2,8 @@ use std::{fmt::Display, sync::Arc};
|
|||
|
||||
use async_trait::async_trait;
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::{Partition, PartitionId};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use data_types::{Partition, PartitionId, TransitionPartitionId};
|
||||
use iox_catalog::{interface::Catalog, partition_lookup};
|
||||
|
||||
use super::PartitionSource;
|
||||
|
||||
|
@ -33,12 +33,9 @@ impl PartitionSource for CatalogPartitionSource {
|
|||
async fn fetch_by_id(&self, partition_id: PartitionId) -> Option<Partition> {
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("partition_by_id", || async {
|
||||
self.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.get_by_id(partition_id)
|
||||
.await
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let id = TransitionPartitionId::Deprecated(partition_id);
|
||||
partition_lookup(repos.as_mut(), &id).await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever")
|
||||
|
|
|
@ -74,7 +74,7 @@ async fn compact_partition(
|
|||
df_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
components: Arc<Components>,
|
||||
) {
|
||||
info!(partition_id = partition_id.get(), "compact partition",);
|
||||
info!(partition_id = partition_id.get(), timeout = ?partition_timeout, "compact partition",);
|
||||
span.set_metadata("partition_id", partition_id.get().to_string());
|
||||
let scratchpad = components.scratchpad_gen.pad();
|
||||
|
||||
|
|
|
@ -542,7 +542,7 @@ mod tests {
|
|||
|
||||
async fn list_by_partition_not_to_delete(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
) -> iox_catalog::interface::Result<Vec<ParquetFile>> {
|
||||
self.inner
|
||||
.list_by_partition_not_to_delete(partition_id)
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
[package]
|
||||
name = "gossip"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.68"
|
||||
futures = "0.3.28"
|
||||
hashbrown.workspace = true
|
||||
metric = { version = "0.1.0", path = "../metric" }
|
||||
prost = "0.11.9"
|
||||
thiserror = "1.0.40"
|
||||
tokio = { version = "1.28.2", features = ["net", "io-util", "time", "rt", "sync", "macros"] }
|
||||
tracing = "0.1.37"
|
||||
uuid = { version = "1.3.3", features = ["v4"] }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[build-dependencies]
|
||||
prost-build = "0.11.9"
|
||||
|
||||
[dev-dependencies]
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
|
@ -0,0 +1,16 @@
|
|||
use std::{error::Error, path::PathBuf};
|
||||
|
||||
use prost_build::Config;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("proto");
|
||||
let proto = root.join("gossip.proto");
|
||||
|
||||
println!("cargo:rerun-if-changed={}", proto.display());
|
||||
|
||||
Config::new()
|
||||
.bytes(["."])
|
||||
.compile_protos(&[proto], &[root])?;
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.gossip.v1;
|
||||
option go_package = "github.com/influxdata/iox/gossip/v1";
|
||||
|
||||
// The payload of a single gossip datagram.
|
||||
message Frame {
|
||||
// Per-instance UUID as raw BE bytes.
|
||||
bytes identity = 1;
|
||||
|
||||
// One or more user/control frames packed into a single message.
|
||||
repeated FrameMessage messages = 2;
|
||||
}
|
||||
|
||||
// A single gossip message within a frame.
|
||||
message FrameMessage {
|
||||
// Various user/control message types.
|
||||
oneof payload {
|
||||
Ping ping = 1;
|
||||
Pong pong = 2;
|
||||
|
||||
// User-provided data payload.
|
||||
UserPayload user_data = 3;
|
||||
}
|
||||
}
|
||||
|
||||
message Ping {}
|
||||
message Pong {}
|
||||
|
||||
// An application payload from the caller of the gossip library.
|
||||
message UserPayload {
|
||||
// An opaque user payload - this is handed back to the gossip library user
|
||||
// unmodified.
|
||||
bytes payload = 1;
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use tokio::{
|
||||
net::{ToSocketAddrs, UdpSocket},
|
||||
sync::mpsc,
|
||||
};
|
||||
|
||||
use crate::{handle::GossipHandle, reactor::Reactor, Dispatcher};
|
||||
|
||||
/// Gossip subsystem configuration and initialisation.
|
||||
#[derive(Debug)]
|
||||
pub struct Builder<T> {
|
||||
seed_addrs: Vec<String>,
|
||||
dispatcher: T,
|
||||
metric: Arc<metric::Registry>,
|
||||
}
|
||||
|
||||
impl<T> Builder<T> {
|
||||
/// Use `seed_addrs` as seed peer addresses, and dispatch any application
|
||||
/// messages to `dispatcher`.
|
||||
///
|
||||
/// Each address in `seed_addrs` is re-resolved periodically and the first
|
||||
/// resolved IP address is used for peer communication.
|
||||
pub fn new(seed_addrs: Vec<String>, dispatcher: T, metric: Arc<metric::Registry>) -> Self {
|
||||
Self {
|
||||
seed_addrs,
|
||||
dispatcher,
|
||||
metric,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Builder<T>
|
||||
where
|
||||
T: Dispatcher + 'static,
|
||||
{
|
||||
/// Initialise the gossip subsystem using `socket` for communication.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This call spawns a tokio task, and as such must be called from within a
|
||||
/// tokio runtime.
|
||||
pub fn build(self, socket: UdpSocket) -> GossipHandle {
|
||||
// Obtain a channel to communicate between the actor, and all handles
|
||||
let (tx, rx) = mpsc::channel(1000);
|
||||
|
||||
// Initialise the reactor
|
||||
let reactor = Reactor::new(self.seed_addrs, socket, self.dispatcher, &self.metric);
|
||||
let identity = reactor.identity().clone();
|
||||
|
||||
// Start the message reactor.
|
||||
tokio::spawn(reactor.run(rx));
|
||||
|
||||
GossipHandle::new(tx, identity)
|
||||
}
|
||||
|
||||
/// Bind to the provided socket address and initialise the gossip subsystem.
|
||||
pub async fn bind<A>(self, bind_addr: A) -> Result<GossipHandle, std::io::Error>
|
||||
where
|
||||
A: ToSocketAddrs + Send,
|
||||
{
|
||||
Ok(self.build(UdpSocket::bind(bind_addr).await?))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
use async_trait::async_trait;
|
||||
use tracing::warn;
|
||||
|
||||
// Re-export the bytes type to ensure upstream users of this crate are
|
||||
// interacting with the same type.
|
||||
pub use prost::bytes::Bytes;
|
||||
|
||||
/// A delegate abstraction through which the gossip subsystem propagates
|
||||
/// application-level messages received from other peers.
|
||||
#[async_trait]
|
||||
pub trait Dispatcher: Send + Sync {
|
||||
/// Invoked when an application-level payload is received from a peer.
|
||||
///
|
||||
/// This call should not block / should complete quickly to avoid blocking
|
||||
/// the gossip reactor loop - if a long-running job must be started within
|
||||
/// this call, consider spawning a separate task.
|
||||
async fn dispatch(&self, payload: Bytes);
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Dispatcher for tokio::sync::mpsc::Sender<Bytes> {
|
||||
async fn dispatch(&self, payload: Bytes) {
|
||||
if let Err(e) = self.send(payload).await {
|
||||
warn!(error=%e, "error dispatching payload to application handler");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A no-op dispatcher.
|
||||
#[cfg(test)]
|
||||
#[async_trait::async_trait]
|
||||
impl Dispatcher for () {
|
||||
async fn dispatch(&self, _payload: crate::Bytes) {}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
use crate::{Bytes, MAX_USER_PAYLOAD_BYTES};
|
||||
use thiserror::Error;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::peers::Identity;
|
||||
|
||||
/// An error indicating a send was attempted with a payload that exceeds
|
||||
/// [`MAX_USER_PAYLOAD_BYTES`].
|
||||
#[derive(Error, Debug)]
|
||||
#[error("max allowed payload size exceeded")]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub struct PayloadSizeError {}
|
||||
|
||||
/// Requests sent to the [`Reactor`] actor task.
|
||||
///
|
||||
/// [`Reactor`]: crate::reactor::Reactor
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum Request {
|
||||
/// Broadcast the given payload to all known peers.
|
||||
Broadcast(Bytes),
|
||||
|
||||
/// Get a snapshot of the peer identities.
|
||||
GetPeers(oneshot::Sender<Vec<Uuid>>),
|
||||
}
|
||||
|
||||
/// A handle to the gossip subsystem.
|
||||
///
|
||||
/// All resources used by the gossip system will be released once this
|
||||
/// [`GossipHandle`] is dropped. To share the handle, wrap it in an [`Arc`].
|
||||
///
|
||||
/// [`Arc`]: std::sync::Arc
|
||||
#[derive(Debug)]
|
||||
pub struct GossipHandle {
|
||||
tx: mpsc::Sender<Request>,
|
||||
identity: Identity,
|
||||
}
|
||||
|
||||
impl GossipHandle {
|
||||
pub(crate) fn new(tx: mpsc::Sender<Request>, identity: Identity) -> Self {
|
||||
Self { tx, identity }
|
||||
}
|
||||
|
||||
/// Return the randomly generated identity of this gossip instance.
|
||||
pub fn identity(&self) -> Uuid {
|
||||
*self.identity
|
||||
}
|
||||
|
||||
/// Broadcast `payload` to all known peers.
|
||||
///
|
||||
/// This is a best-effort operation - peers are not guaranteed to receive
|
||||
/// this broadcast.
|
||||
pub async fn broadcast<T>(&self, payload: T) -> Result<(), PayloadSizeError>
|
||||
where
|
||||
T: Into<Bytes> + Send,
|
||||
{
|
||||
let payload = payload.into();
|
||||
if payload.len() > MAX_USER_PAYLOAD_BYTES {
|
||||
return Err(PayloadSizeError {});
|
||||
}
|
||||
|
||||
self.tx.send(Request::Broadcast(payload)).await.unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieve a snapshot of the connected peer list.
|
||||
pub async fn get_peers(&self) -> Vec<Uuid> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx.send(Request::GetPeers(tx)).await.unwrap();
|
||||
rx.await.unwrap()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
//! A work-in-progress, simple gossip primitive for metadata distribution
|
||||
//! between IOx nodes.
|
||||
//!
|
||||
//! # Transport
|
||||
//!
|
||||
//! Prefer small payloads where possible, and expect loss of some messages -
|
||||
//! this primitive provides *best effort* delivery.
|
||||
//!
|
||||
//! This implementation sends unicast UDP frames between peers, with support for
|
||||
//! both control frames & user payloads. The maximum message size is 65,507
|
||||
//! bytes ([`MAX_USER_PAYLOAD_BYTES`] for application-level payloads), but a
|
||||
//! packet this large is fragmented into smaller (at most MTU-sized) packets and
|
||||
//! is at greater risk of being dropped due to a lost fragment.
|
||||
//!
|
||||
//! # Security
|
||||
//!
|
||||
//! Messages exchanged between peers are unauthenticated and connectionless -
|
||||
//! it's trivial to forge a message appearing to come from a different peer, or
|
||||
//! include malicious payloads.
|
||||
//!
|
||||
//! The security model of this implementation expects the peers to be running in
|
||||
//! a trusted environment, secure from malicious users.
|
||||
|
||||
#![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)]
|
||||
#![warn(
|
||||
clippy::clone_on_ref_ptr,
|
||||
clippy::dbg_macro,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::future_not_send,
|
||||
clippy::todo,
|
||||
clippy::use_self,
|
||||
missing_copy_implementations,
|
||||
missing_debug_implementations,
|
||||
unused_crate_dependencies,
|
||||
missing_docs
|
||||
)]
|
||||
|
||||
mod builder;
|
||||
mod dispatcher;
|
||||
mod handle;
|
||||
mod metric;
|
||||
mod peers;
|
||||
mod proto;
|
||||
mod reactor;
|
||||
pub(crate) mod seed;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
/// Work around the unused_crate_dependencies false positives for test deps.
|
||||
#[cfg(test)]
|
||||
use test_helpers as _;
|
||||
use workspace_hack as _;
|
||||
|
||||
pub use builder::*;
|
||||
pub use dispatcher::*;
|
||||
pub use handle::*;
|
||||
|
||||
/// The maximum duration of time allotted to performing a DNS resolution against
|
||||
/// a seed/peer address.
|
||||
const RESOLVE_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
/// Defines the interval between PING frames sent to all configured seed peers.
|
||||
const SEED_PING_INTERVAL: std::time::Duration = Duration::from_secs(15);
|
||||
|
||||
/// The maximum payload size allowed.
|
||||
///
|
||||
/// Attempting to send a serialised packet (inclusive of control frames/fields)
|
||||
/// in excess of this amount will result in an error.
|
||||
const MAX_FRAME_BYTES: usize = 1024 * 10;
|
||||
|
||||
/// The frame header overhead for user payloads.
|
||||
const USER_PAYLOAD_OVERHEAD: usize = 22;
|
||||
|
||||
/// The maximum allowed byte size of user payloads.
|
||||
///
|
||||
/// Sending payloads of this size is discouraged as it leads to fragmentation of
|
||||
/// the message and increases the chance of the message being undelivered /
|
||||
/// dropped. Smaller is always better for UDP transports!
|
||||
pub const MAX_USER_PAYLOAD_BYTES: usize = MAX_FRAME_BYTES - USER_PAYLOAD_OVERHEAD;
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::assertions_on_constants)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_max_msg_size() {
|
||||
assert!(MAX_FRAME_BYTES < 65_536, "cannot exceed UDP maximum");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max_user_payload_size() {
|
||||
assert_eq!(
|
||||
MAX_USER_PAYLOAD_BYTES, 10_218,
|
||||
"applications may depend on this value not changing"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
//! Metric newtype wrappers for type safety.
|
||||
//!
|
||||
//! The metrics are easily confused (they're all counters) so have the compiler
|
||||
//! check the right ones are being used in the right places.
|
||||
|
||||
use metric::U64Counter;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct SentFrames(metric::U64Counter);
|
||||
|
||||
impl SentFrames {
|
||||
pub(crate) fn inc(&self, v: usize) {
|
||||
self.0.inc(v as u64)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ReceivedFrames(metric::U64Counter);
|
||||
|
||||
impl ReceivedFrames {
|
||||
pub(crate) fn inc(&self, v: usize) {
|
||||
self.0.inc(v as u64)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct SentBytes(metric::U64Counter);
|
||||
|
||||
impl SentBytes {
|
||||
pub(crate) fn inc(&self, v: usize) {
|
||||
self.0.inc(v as u64)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ReceivedBytes(metric::U64Counter);
|
||||
|
||||
impl ReceivedBytes {
|
||||
pub(crate) fn inc(&self, v: usize) {
|
||||
self.0.inc(v as u64)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_metrics(
|
||||
metrics: &metric::Registry,
|
||||
) -> (SentFrames, ReceivedFrames, SentBytes, ReceivedBytes) {
|
||||
let metric_frames = metrics.register_metric::<U64Counter>(
|
||||
"gossip_frames",
|
||||
"number of frames sent/received by this node",
|
||||
);
|
||||
let metric_bytes = metrics
|
||||
.register_metric::<U64Counter>("gossip_bytes", "sum of bytes sent/received by this node");
|
||||
|
||||
(
|
||||
SentFrames(metric_frames.recorder(&[("direction", "sent")])),
|
||||
ReceivedFrames(metric_frames.recorder(&[("direction", "received")])),
|
||||
SentBytes(metric_bytes.recorder(&[("direction", "sent")])),
|
||||
ReceivedBytes(metric_bytes.recorder(&[("direction", "received")])),
|
||||
)
|
||||
}
|
|
@ -0,0 +1,244 @@
|
|||
use std::{io, net::SocketAddr};
|
||||
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use hashbrown::{hash_map::RawEntryMut, HashMap};
|
||||
use metric::U64Counter;
|
||||
use prost::bytes::Bytes;
|
||||
use tokio::net::UdpSocket;
|
||||
use tracing::{trace, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
metric::{SentBytes, SentFrames},
|
||||
MAX_FRAME_BYTES,
|
||||
};
|
||||
|
||||
/// A unique generated identity containing 128 bits of randomness (V4 UUID).
|
||||
#[derive(Debug, Eq, Clone)]
|
||||
pub(crate) struct Identity(Bytes, Uuid);
|
||||
|
||||
impl std::ops::Deref for Identity {
|
||||
type Target = Uuid;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.1
|
||||
}
|
||||
}
|
||||
|
||||
impl std::hash::Hash for Identity {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.0.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Identity {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
debug_assert!((self.1 == other.1) == (self.0 == other.0));
|
||||
self.0 == other.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Identity {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.1.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Bytes> for Identity {
|
||||
type Error = uuid::Error;
|
||||
|
||||
fn try_from(value: Bytes) -> Result<Self, Self::Error> {
|
||||
let uuid = Uuid::from_slice(&value)?;
|
||||
Ok(Self(value, uuid))
|
||||
}
|
||||
}
|
||||
|
||||
impl Identity {
|
||||
/// Generate a new random identity.
|
||||
pub(crate) fn new() -> Self {
|
||||
let id = Uuid::new_v4();
|
||||
Self(Bytes::from(id.as_bytes().to_vec()), id)
|
||||
}
|
||||
|
||||
pub(crate) fn as_bytes(&self) -> &Bytes {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// A discovered peer within the gossip cluster.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Peer {
|
||||
identity: Identity,
|
||||
addr: SocketAddr,
|
||||
}
|
||||
|
||||
impl Peer {
|
||||
pub(crate) async fn send(
|
||||
&self,
|
||||
buf: &[u8],
|
||||
socket: &UdpSocket,
|
||||
frames_sent: &SentFrames,
|
||||
bytes_sent: &SentBytes,
|
||||
) -> Result<usize, io::Error> {
|
||||
// If the frame is larger than the allowed maximum, then the receiver
|
||||
// will truncate the frame when reading the socket.
|
||||
//
|
||||
// Never send frames that will be unprocessable.
|
||||
if buf.len() > MAX_FRAME_BYTES {
|
||||
warn!(
|
||||
n_bytes = buf.len(),
|
||||
"not sending oversized packet - receiver would truncate"
|
||||
);
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"max frame size exceeded",
|
||||
));
|
||||
}
|
||||
|
||||
let ret = socket.send_to(buf, self.addr).await;
|
||||
match &ret {
|
||||
Ok(n_bytes) => {
|
||||
frames_sent.inc(1);
|
||||
bytes_sent.inc(*n_bytes);
|
||||
trace!(identity=%self.identity, n_bytes, peer_addr=%self.addr, "send frame")
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error=%e, identity=%self.identity, peer_addr=%self.addr, "frame send error")
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
/// The set of currently active/known peers.
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct PeerList {
|
||||
list: HashMap<Identity, Peer>,
|
||||
|
||||
/// The number of known, believed-to-be-healthy peers.
|
||||
metric_peer_count: metric::U64Counter,
|
||||
}
|
||||
|
||||
impl PeerList {
|
||||
/// Initialise the [`PeerList`] with capacity for `cap` number of [`Peer`]
|
||||
/// instances.
|
||||
pub(crate) fn with_capacity(cap: usize, metrics: &metric::Registry) -> Self {
|
||||
let metric_peer_count = metrics
|
||||
.register_metric::<U64Counter>(
|
||||
"gossip_known_peers",
|
||||
"number of likely healthy peers known to this node",
|
||||
)
|
||||
.recorder(&[]);
|
||||
|
||||
Self {
|
||||
list: HashMap::with_capacity(cap),
|
||||
metric_peer_count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the UUIDs of all known peers.
|
||||
pub(crate) fn peer_uuids(&self) -> Vec<Uuid> {
|
||||
self.list.keys().map(|v| **v).collect()
|
||||
}
|
||||
|
||||
/// Upsert a peer identified by `identity` to the peer list, associating it
|
||||
/// with the provided `peer_addr`.
|
||||
pub(crate) fn upsert(&mut self, identity: &Identity, peer_addr: SocketAddr) -> &mut Peer {
|
||||
let p = match self.list.raw_entry_mut().from_key(identity) {
|
||||
RawEntryMut::Vacant(v) => {
|
||||
self.metric_peer_count.inc(1);
|
||||
v.insert(
|
||||
identity.to_owned(),
|
||||
Peer {
|
||||
addr: peer_addr,
|
||||
identity: identity.to_owned(),
|
||||
},
|
||||
)
|
||||
.1
|
||||
}
|
||||
RawEntryMut::Occupied(v) => v.into_mut(),
|
||||
};
|
||||
|
||||
p.addr = peer_addr;
|
||||
p
|
||||
}
|
||||
|
||||
/// Broadcast `buf` to all known peers over `socket`, returning the number
|
||||
/// of bytes sent in total.
|
||||
pub(crate) async fn broadcast(
|
||||
&self,
|
||||
buf: &[u8],
|
||||
socket: &UdpSocket,
|
||||
frames_sent: &SentFrames,
|
||||
bytes_sent: &SentBytes,
|
||||
) -> usize {
|
||||
self.list
|
||||
.values()
|
||||
.map(|v| v.send(buf, socket, frames_sent, bytes_sent))
|
||||
.collect::<FuturesUnordered<_>>()
|
||||
.fold(0, |acc, res| async move {
|
||||
match res {
|
||||
Ok(n) => acc + n,
|
||||
Err(_) => acc,
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{
|
||||
collections::hash_map::DefaultHasher,
|
||||
hash::{Hash, Hasher},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_identity_round_trip() {
|
||||
let a = Identity::new();
|
||||
|
||||
let encoded = a.as_bytes().to_owned();
|
||||
let decoded = Identity::try_from(encoded).unwrap();
|
||||
|
||||
assert_eq!(decoded, a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identity_length_mismatch() {
|
||||
let v = Bytes::from_static(&[42, 42, 42, 42]);
|
||||
let _ = Identity::try_from(v).expect_err("short ID should fail");
|
||||
|
||||
let v = Bytes::from_static(&[
|
||||
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
|
||||
]);
|
||||
let _ = Identity::try_from(v).expect_err("long ID should fail");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identity_eq() {
|
||||
let v = Identity::new();
|
||||
assert_eq!(v.clone(), v);
|
||||
assert_eq!(hash_identity(&v), hash_identity(&v));
|
||||
|
||||
let other = Identity::new();
|
||||
assert_ne!(v, other);
|
||||
assert_ne!(hash_identity(&other), hash_identity(&v));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identity_display() {
|
||||
let v = Identity::new();
|
||||
let text = v.to_string();
|
||||
|
||||
let uuid = Uuid::try_parse(&text).expect("display impl should output valid uuids");
|
||||
assert_eq!(*v, uuid);
|
||||
}
|
||||
|
||||
fn hash_identity(v: &Identity) -> u64 {
|
||||
let mut h = DefaultHasher::default();
|
||||
v.hash(&mut h);
|
||||
h.finish()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
//! Proto definitions of gossip message wire types.
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.gossip.v1.rs"));
|
|
@ -0,0 +1,455 @@
|
|||
use std::{net::SocketAddr, sync::Arc};
|
||||
|
||||
use prost::{bytes::BytesMut, Message};
|
||||
use tokio::{
|
||||
net::UdpSocket,
|
||||
sync::mpsc::{self},
|
||||
};
|
||||
use tracing::{debug, error, info, trace, warn};
|
||||
|
||||
use crate::{
|
||||
metric::*,
|
||||
peers::{Identity, PeerList},
|
||||
proto::{self, frame_message::Payload, FrameMessage},
|
||||
seed::{seed_ping_task, Seed},
|
||||
Dispatcher, Request, MAX_FRAME_BYTES,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Error {
|
||||
NoPayload {
|
||||
peer: Identity,
|
||||
addr: SocketAddr,
|
||||
},
|
||||
|
||||
Deserialise {
|
||||
addr: SocketAddr,
|
||||
source: prost::DecodeError,
|
||||
},
|
||||
|
||||
Identity {
|
||||
addr: SocketAddr,
|
||||
},
|
||||
|
||||
Io(std::io::Error),
|
||||
|
||||
MaxSize(usize),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(value: std::io::Error) -> Self {
|
||||
Self::Io(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct AbortOnDrop(tokio::task::JoinHandle<()>);
|
||||
impl Drop for AbortOnDrop {
|
||||
fn drop(&mut self) {
|
||||
self.0.abort()
|
||||
}
|
||||
}
|
||||
|
||||
/// An event loop for gossip frames processing.
|
||||
///
|
||||
/// This actor task is responsible for driving peer discovery, managing the set
|
||||
/// of known peers and exchanging gossip frames between peers.
|
||||
///
|
||||
/// A user interacts with a [`Reactor`] through a [`GossipHandle`].
|
||||
///
|
||||
/// [`GossipHandle`]: crate::GossipHandle
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Reactor<T> {
|
||||
dispatch: T,
|
||||
|
||||
/// The random identity of this gossip instance.
|
||||
identity: Identity,
|
||||
|
||||
/// A cached wire frame, used to generate outgoing messages.
|
||||
cached_frame: proto::Frame,
|
||||
/// A re-used buffer for serialising outgoing messages into.
|
||||
serialisation_buf: Vec<u8>,
|
||||
|
||||
/// The immutable list of seed addresses provided by the user, periodically
|
||||
/// pinged.
|
||||
seed_list: Arc<[Seed]>,
|
||||
/// A task that periodically sends PING frames to all seeds, executing in a
|
||||
/// separate task so that DNS resolution does not block the reactor loop.
|
||||
_seed_ping_task: AbortOnDrop,
|
||||
|
||||
/// The set of active peers this node has communicated with and believes to
|
||||
/// be (recently) healthy.
|
||||
///
|
||||
/// Depending on the perceived availability of the seed nodes, this may
|
||||
/// contain less peers than the number of initial seeds.
|
||||
peer_list: PeerList,
|
||||
|
||||
/// The UDP socket used for communication with peers.
|
||||
socket: Arc<UdpSocket>,
|
||||
|
||||
/// The count of frames sent and received.
|
||||
metric_frames_sent: SentFrames,
|
||||
metric_frames_received: ReceivedFrames,
|
||||
|
||||
/// The sum of bytes sent and received.
|
||||
metric_bytes_sent: SentBytes,
|
||||
metric_bytes_received: ReceivedBytes,
|
||||
}
|
||||
|
||||
impl<T> Reactor<T>
|
||||
where
|
||||
T: Dispatcher,
|
||||
{
|
||||
pub(crate) fn new(
|
||||
seed_list: Vec<String>,
|
||||
socket: UdpSocket,
|
||||
dispatch: T,
|
||||
metrics: &metric::Registry,
|
||||
) -> Self {
|
||||
// Generate a unique UUID for this Reactor instance, and cache the wire
|
||||
// representation.
|
||||
let identity = Identity::new();
|
||||
|
||||
let seed_list = seed_list.into_iter().map(Seed::new).collect();
|
||||
let socket = Arc::new(socket);
|
||||
let mut serialisation_buf = Vec::with_capacity(1024);
|
||||
|
||||
// Generate a pre-populated frame header.
|
||||
let mut cached_frame = proto::Frame {
|
||||
identity: identity.as_bytes().clone(),
|
||||
messages: Vec::with_capacity(1),
|
||||
};
|
||||
|
||||
// A ping frame is static over the lifetime of a Reactor instance, so it
|
||||
// can be pre-serialised, cached, and reused for every ping.
|
||||
let cached_ping_frame = {
|
||||
populate_frame(
|
||||
&mut cached_frame,
|
||||
vec![new_payload(Payload::Ping(proto::Ping {}))],
|
||||
&mut serialisation_buf,
|
||||
)
|
||||
.unwrap();
|
||||
serialisation_buf.clone()
|
||||
};
|
||||
|
||||
// Initialise the various metrics with wrappers to help distinguish
|
||||
// between the (very similar) counters.
|
||||
let (metric_frames_sent, metric_frames_received, metric_bytes_sent, metric_bytes_received) =
|
||||
new_metrics(metrics);
|
||||
|
||||
// Spawn a task that periodically pings all known seeds.
|
||||
//
|
||||
// Pinging all seeds announces this node as alive, propagating the
|
||||
// instance UUID, and requesting PONG responses to drive population of
|
||||
// the active peer list.
|
||||
let seed_ping_task = AbortOnDrop(tokio::spawn(seed_ping_task(
|
||||
Arc::clone(&seed_list),
|
||||
Arc::clone(&socket),
|
||||
cached_ping_frame,
|
||||
metric_frames_sent.clone(),
|
||||
metric_bytes_sent.clone(),
|
||||
)));
|
||||
|
||||
Self {
|
||||
dispatch,
|
||||
identity,
|
||||
cached_frame,
|
||||
serialisation_buf,
|
||||
peer_list: PeerList::with_capacity(seed_list.len(), metrics),
|
||||
seed_list,
|
||||
_seed_ping_task: seed_ping_task,
|
||||
socket,
|
||||
metric_frames_sent,
|
||||
metric_frames_received,
|
||||
metric_bytes_sent,
|
||||
metric_bytes_received,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn run(mut self, mut rx: mpsc::Receiver<Request>) {
|
||||
info!(
|
||||
identity = %self.identity,
|
||||
seed_list = ?self.seed_list,
|
||||
"gossip reactor started",
|
||||
);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = self.read() => {
|
||||
match msg {
|
||||
Ok(()) => {},
|
||||
Err(Error::NoPayload { peer, addr }) => {
|
||||
warn!(%peer, %addr, "message contains no payload");
|
||||
continue;
|
||||
}
|
||||
Err(Error::Deserialise { addr, source }) => {
|
||||
warn!(error=%source, %addr, "error deserialising frame");
|
||||
continue;
|
||||
}
|
||||
Err(Error::Identity { addr }) => {
|
||||
warn!(%addr, "invalid identity value in frame");
|
||||
continue;
|
||||
}
|
||||
Err(Error::Io(error)) => {
|
||||
error!(%error, "i/o error");
|
||||
continue;
|
||||
}
|
||||
Err(Error::MaxSize(_)) => {
|
||||
// Logged at source
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
op = rx.recv() => {
|
||||
match op {
|
||||
None => {
|
||||
info!("stopping gossip reactor");
|
||||
return;
|
||||
}
|
||||
Some(Request::GetPeers(tx)) => {
|
||||
let _ = tx.send(self.peer_list.peer_uuids());
|
||||
},
|
||||
Some(Request::Broadcast(payload)) => {
|
||||
// The user is guaranteed MAX_USER_PAYLOAD_BYTES to
|
||||
// be send-able, so send this frame without packing
|
||||
// others with it for simplicity.
|
||||
populate_frame(
|
||||
&mut self.cached_frame,
|
||||
vec![new_payload(Payload::UserData(proto::UserPayload{payload}))],
|
||||
&mut self.serialisation_buf
|
||||
).expect("size validated in handle at enqueue time");
|
||||
|
||||
self.peer_list.broadcast(
|
||||
&self.serialisation_buf,
|
||||
&self.socket,
|
||||
&self.metric_frames_sent,
|
||||
&self.metric_bytes_sent
|
||||
).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a gossip frame from the socket and potentially respond.
|
||||
///
|
||||
/// This method waits for a frame to be made available by the OS, enumerates
|
||||
/// the contents, batches any responses to those frames and if non-empty,
|
||||
/// returns the result to the sender of the original frame.
|
||||
///
|
||||
/// Returns the bytes read and bytes sent during execution of this method.
|
||||
async fn read(&mut self) -> Result<(), Error> {
|
||||
// Read a frame into buf.
|
||||
let (bytes_read, frame, peer_addr) = read_frame(&self.socket).await?;
|
||||
self.metric_frames_received.inc(1);
|
||||
self.metric_bytes_received.inc(bytes_read as _);
|
||||
|
||||
// Read the peer identity from the frame
|
||||
let identity =
|
||||
Identity::try_from(frame.identity).map_err(|_| Error::Identity { addr: peer_addr })?;
|
||||
|
||||
// Don't process messages from this node.
|
||||
//
|
||||
// It's expected that all N servers will be included in a peer list,
|
||||
// rather than the N-1 peers to this node. By dropping messages from
|
||||
// this node, pings sent by this node will go unprocessed and therefore
|
||||
// this node will not be added to the active peer list.
|
||||
if identity == self.identity {
|
||||
debug!(%identity, %peer_addr, bytes_read, "dropping frame from self");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Find or create the peer in the peer list.
|
||||
let peer = self.peer_list.upsert(&identity, peer_addr);
|
||||
|
||||
let mut out_messages = Vec::with_capacity(1);
|
||||
for msg in frame.messages {
|
||||
// Extract the payload from the frame message
|
||||
let payload = msg.payload.ok_or_else(|| Error::NoPayload {
|
||||
peer: identity.clone(),
|
||||
addr: peer_addr,
|
||||
})?;
|
||||
|
||||
// Handle the frame message from the peer, optionally returning a
|
||||
// response frame.
|
||||
let response = match payload {
|
||||
Payload::Ping(_) => Some(Payload::Pong(proto::Pong {})),
|
||||
Payload::Pong(_) => {
|
||||
debug!(%identity, %peer_addr, "pong");
|
||||
None
|
||||
}
|
||||
Payload::UserData(data) => {
|
||||
self.dispatch.dispatch(data.payload).await;
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(payload) = response {
|
||||
out_messages.push(new_payload(payload));
|
||||
}
|
||||
}
|
||||
|
||||
// Sometimes no message will be returned to the peer - there's no need
|
||||
// to send an empty frame.
|
||||
if out_messages.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Serialise the frame into the serialisation buffer.
|
||||
populate_frame(
|
||||
&mut self.cached_frame,
|
||||
out_messages,
|
||||
&mut self.serialisation_buf,
|
||||
)?;
|
||||
|
||||
peer.send(
|
||||
&self.serialisation_buf,
|
||||
&self.socket,
|
||||
&self.metric_frames_sent,
|
||||
&self.metric_bytes_sent,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the randomised identity assigned to this instance.
|
||||
pub(crate) fn identity(&self) -> &Identity {
|
||||
&self.identity
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait for a UDP datagram to become ready, and read it entirely into `buf`.
|
||||
async fn recv(socket: &UdpSocket, buf: &mut BytesMut) -> (usize, SocketAddr) {
|
||||
let (n_bytes, addr) = socket
|
||||
.recv_buf_from(buf)
|
||||
.await
|
||||
// These errors are libc's recvfrom() or converting the kernel-provided
|
||||
// socket structure to rust's SocketAddr - neither should ever happen.
|
||||
.expect("invalid recvfrom");
|
||||
|
||||
trace!(%addr, n_bytes, "socket read");
|
||||
(n_bytes, addr)
|
||||
}
|
||||
|
||||
/// Wait for a UDP datagram to arrive, and decode it into a gossip Frame.
|
||||
///
|
||||
/// Clears the contents of `buf` before reading the frame.
|
||||
async fn read_frame(socket: &UdpSocket) -> Result<(usize, proto::Frame, SocketAddr), Error> {
|
||||
// Pre-allocate a buffer large enough to hold the maximum message size.
|
||||
//
|
||||
// Reading data from a UDP socket silently truncates if there's not enough
|
||||
// buffer space to write the full packet payload (tokio doesn't support
|
||||
// MSG_TRUNC-like flags on reads).
|
||||
let mut buf = BytesMut::with_capacity(MAX_FRAME_BYTES);
|
||||
|
||||
let (n_bytes, addr) = recv(socket, &mut buf).await;
|
||||
|
||||
// Decode the frame, re-using byte arrays from the underlying buffer.
|
||||
match proto::Frame::decode(buf.freeze()) {
|
||||
Ok(frame) => {
|
||||
debug!(?frame, %addr, n_bytes, "read frame");
|
||||
Ok((n_bytes, frame, addr))
|
||||
}
|
||||
Err(e) => Err(Error::Deserialise { addr, source: e }),
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a pre-allocated `frame`, clear and populate it with the provided
|
||||
/// `payload` containing a set of [`FrameMessage`], serialising it to `buf`.
|
||||
fn populate_frame(
|
||||
frame: &mut proto::Frame,
|
||||
payload: Vec<FrameMessage>,
|
||||
buf: &mut Vec<u8>,
|
||||
) -> Result<(), Error> {
|
||||
frame.messages = payload;
|
||||
|
||||
// Reading data from a UDP socket silently truncates if there's not enough
|
||||
// buffer space to write the full packet payload. This library will
|
||||
// pre-allocate a buffer of this size to read packets into, therefore all
|
||||
// messages must be shorter than this value.
|
||||
if frame.encoded_len() > MAX_FRAME_BYTES {
|
||||
error!(
|
||||
n_bytes=buf.len(),
|
||||
n_max=%MAX_FRAME_BYTES,
|
||||
"attempted to send frame larger than configured maximum"
|
||||
);
|
||||
return Err(Error::MaxSize(buf.len()));
|
||||
}
|
||||
|
||||
buf.clear();
|
||||
frame.encode(buf).expect("buffer should grow");
|
||||
|
||||
debug_assert!(proto::Frame::decode(crate::Bytes::from(buf.clone())).is_ok());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Instantiate a new [`FrameMessage`] from the given [`Payload`].
|
||||
fn new_payload(p: Payload) -> proto::FrameMessage {
|
||||
proto::FrameMessage { payload: Some(p) }
|
||||
}
|
||||
|
||||
/// Send a PING message to `socket`.
|
||||
pub(crate) async fn ping(
|
||||
ping_frame: &[u8],
|
||||
socket: &UdpSocket,
|
||||
addr: SocketAddr,
|
||||
sent_frames: &SentFrames,
|
||||
sent_bytes: &SentBytes,
|
||||
) -> usize {
|
||||
match socket.send_to(ping_frame, &addr).await {
|
||||
Ok(n_bytes) => {
|
||||
debug!(addr = %addr, "ping");
|
||||
sent_frames.inc(1);
|
||||
sent_bytes.inc(n_bytes);
|
||||
n_bytes
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
error=%e,
|
||||
addr = %addr,
|
||||
"ping failed"
|
||||
);
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{MAX_USER_PAYLOAD_BYTES, USER_PAYLOAD_OVERHEAD};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_user_frame_overhead() {
|
||||
let identity = Identity::new();
|
||||
|
||||
// Generate a pre-populated frame header.
|
||||
let mut frame = proto::Frame {
|
||||
identity: identity.as_bytes().clone(),
|
||||
messages: vec![],
|
||||
};
|
||||
|
||||
let mut buf = Vec::new();
|
||||
populate_frame(
|
||||
&mut frame,
|
||||
vec![new_payload(Payload::UserData(proto::UserPayload {
|
||||
payload: crate::Bytes::new(), // Empty/0-sized
|
||||
}))],
|
||||
&mut buf,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// The proto type should self-report the same size.
|
||||
assert_eq!(buf.len(), frame.encoded_len());
|
||||
|
||||
// The overhead const should be accurate
|
||||
assert_eq!(buf.len(), USER_PAYLOAD_OVERHEAD);
|
||||
|
||||
// The max user payload size should be accurate.
|
||||
assert_eq!(MAX_FRAME_BYTES - buf.len(), MAX_USER_PAYLOAD_BYTES);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
use std::{future, net::SocketAddr, sync::Arc};
|
||||
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use tokio::{
|
||||
net::{self, UdpSocket},
|
||||
time::{timeout, MissedTickBehavior},
|
||||
};
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::{
|
||||
metric::{SentBytes, SentFrames},
|
||||
reactor::ping,
|
||||
RESOLVE_TIMEOUT, SEED_PING_INTERVAL,
|
||||
};
|
||||
|
||||
/// The user-provided seed peer address.
|
||||
///
|
||||
/// NOTE: the IP/socket address this resolves to may change over the
|
||||
/// lifetime of the peer, so the raw address is retained instead of
|
||||
/// the [`SocketAddr`] to ensure it is constantly re-resolved when the peer
|
||||
/// is unreachable.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Seed(String);
|
||||
|
||||
impl Seed {
|
||||
pub(crate) fn new(addr: String) -> Self {
|
||||
Self(addr)
|
||||
}
|
||||
|
||||
/// Resolve this peer address, returning an error if resolution is not
|
||||
/// complete within [`RESOLVE_TIMEOUT`].
|
||||
pub(crate) async fn resolve(&self) -> Option<SocketAddr> {
|
||||
match timeout(RESOLVE_TIMEOUT, resolve(&self.0)).await {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
warn!(addr = %self.0, "timeout resolving seed address");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve `addr`, returning the first IP address, if any.
|
||||
async fn resolve(addr: &str) -> Option<SocketAddr> {
|
||||
match net::lookup_host(addr).await.map(|mut v| v.next()) {
|
||||
Ok(Some(v)) => {
|
||||
debug!(%addr, peer=%v, "resolved peer address");
|
||||
Some(v)
|
||||
}
|
||||
Ok(None) => {
|
||||
warn!(%addr, "resolved peer address contains no IPs");
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(%addr, error=%e, "failed to resolve peer address");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Block forever, sending `ping_frame` over `socket` to all the entries in
|
||||
/// `seeds`.
|
||||
///
|
||||
/// This method immediately pings all the seeds, and then pings periodically at
|
||||
/// [`SEED_PING_INTERVAL`].
|
||||
pub(super) async fn seed_ping_task(
|
||||
seeds: Arc<[Seed]>,
|
||||
socket: Arc<UdpSocket>,
|
||||
ping_frame: Vec<u8>,
|
||||
sent_frames: SentFrames,
|
||||
sent_bytes: SentBytes,
|
||||
) {
|
||||
let mut interval = tokio::time::interval(SEED_PING_INTERVAL);
|
||||
|
||||
// Do not burden seeds with faster PING frames to catch up this timer.
|
||||
interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||
|
||||
// Start the ping loop, with the first iteration starting immediately.
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
let bytes_sent = seeds
|
||||
.iter()
|
||||
.map(|seed| async {
|
||||
if let Some(addr) = seed.resolve().await {
|
||||
ping(&ping_frame, &socket, addr, &sent_frames, &sent_bytes).await
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>()
|
||||
.fold(0, |acc, x| future::ready(acc + x))
|
||||
.await;
|
||||
|
||||
debug!(bytes_sent, "seed ping sweep complete");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use test_helpers::{maybe_start_logging, timeout::FutureTimeout};
|
||||
use tokio::{net::UdpSocket, sync::mpsc};
|
||||
|
||||
use gossip::*;
|
||||
|
||||
/// Assert that starting up a reactor performs the initial peer discovery
|
||||
/// from a set of seeds, resulting in both peers known of one another.
|
||||
#[tokio::test]
|
||||
async fn test_payload_exchange() {
|
||||
maybe_start_logging();
|
||||
|
||||
let metrics = Arc::new(metric::Registry::default());
|
||||
|
||||
// How long to wait for peer discovery to complete.
|
||||
const TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
// Bind a UDP socket to a random port
|
||||
let a_socket = UdpSocket::bind("127.0.0.1:0")
|
||||
.await
|
||||
.expect("failed to bind UDP socket");
|
||||
let a_addr = a_socket.local_addr().expect("failed to read local addr");
|
||||
|
||||
// And a socket for the second reactor
|
||||
let b_socket = UdpSocket::bind("127.0.0.1:0")
|
||||
.await
|
||||
.expect("failed to bind UDP socket");
|
||||
let b_addr = b_socket.local_addr().expect("failed to read local addr");
|
||||
|
||||
// Initialise the dispatchers for the reactors
|
||||
let (a_tx, mut a_rx) = mpsc::channel(5);
|
||||
let (b_tx, mut b_rx) = mpsc::channel(5);
|
||||
|
||||
// Initialise both reactors
|
||||
let addrs = vec![a_addr.to_string(), b_addr.to_string()];
|
||||
let a = Builder::new(addrs.clone(), a_tx, Arc::clone(&metrics)).build(a_socket);
|
||||
let b = Builder::new(addrs, b_tx, Arc::clone(&metrics)).build(b_socket);
|
||||
|
||||
// Wait for peer discovery to occur
|
||||
async {
|
||||
loop {
|
||||
if a.get_peers().await.len() == 1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
.with_timeout_panic(TIMEOUT)
|
||||
.await;
|
||||
|
||||
// Send the payload through peer A
|
||||
let a_payload = Bytes::from_static(b"bananas");
|
||||
a.broadcast(a_payload.clone()).await.unwrap();
|
||||
|
||||
// Assert it was received by peer B
|
||||
let got = b_rx
|
||||
.recv()
|
||||
.with_timeout_panic(TIMEOUT)
|
||||
.await
|
||||
.expect("reactor stopped");
|
||||
assert_eq!(got, a_payload);
|
||||
|
||||
// Do the reverse - send from B to A
|
||||
let b_payload = Bytes::from_static(b"platanos");
|
||||
b.broadcast(b_payload.clone()).await.unwrap();
|
||||
let got = a_rx
|
||||
.recv()
|
||||
.with_timeout_panic(TIMEOUT)
|
||||
.await
|
||||
.expect("reactor stopped");
|
||||
assert_eq!(got, b_payload);
|
||||
|
||||
// Send another payload through peer A (ensuring scratch buffers are
|
||||
// correctly wiped, etc)
|
||||
let a_payload = Bytes::from_static(b"platanos");
|
||||
a.broadcast(a_payload.clone()).await.unwrap();
|
||||
let got = b_rx
|
||||
.recv()
|
||||
.with_timeout_panic(TIMEOUT)
|
||||
.await
|
||||
.expect("reactor stopped");
|
||||
assert_eq!(got, a_payload);
|
||||
}
|
|
@ -16,7 +16,7 @@ parquet_file = { path = "../parquet_file" }
|
|||
object_store = { workspace=true }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
thiserror = "1.0.43"
|
||||
tokio = { version = "1.29" }
|
||||
tokio-util = { version = "0.7.8" }
|
||||
|
|
|
@ -526,7 +526,7 @@ impl RemoteImporter {
|
|||
let res = repos
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
partition.id,
|
||||
&partition.transition_partition_id(),
|
||||
Some(partition.sort_key.clone()),
|
||||
&new_sort_key,
|
||||
)
|
||||
|
|
|
@ -10,7 +10,7 @@ bytes = "1.4"
|
|||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
snafu = "0.7"
|
||||
url = "2.4.0"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
|
|
@ -67,7 +67,7 @@ libc = { version = "0.2" }
|
|||
num_cpus = "1.16.0"
|
||||
once_cell = { version = "1.18", features = ["parking_lot"] }
|
||||
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.6.0"
|
||||
thiserror = "1.0.43"
|
||||
|
|
|
@ -33,7 +33,6 @@ use object_store::DynObjectStore;
|
|||
use observability_deps::tracing::*;
|
||||
use parquet_file::storage::{ParquetStorage, StorageId};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
num::NonZeroUsize,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
|
@ -436,6 +435,15 @@ impl Config {
|
|||
catalog_dsn.dsn = Some(dsn);
|
||||
};
|
||||
|
||||
// TODO: make num_threads a parameter (other modes have it
|
||||
// configured by a command line)
|
||||
let num_threads =
|
||||
NonZeroUsize::new(num_cpus::get()).unwrap_or_else(|| NonZeroUsize::new(1).unwrap());
|
||||
|
||||
// Target allowing the compactor to use as many as 1/2 the
|
||||
// cores by default, but at least one.
|
||||
let compactor_concurrency = NonZeroUsize::new((num_threads.get() / 2).max(1)).unwrap();
|
||||
|
||||
let ingester_addresses =
|
||||
vec![IngesterAddress::from_str(&ingester_grpc_bind_address.to_string()).unwrap()];
|
||||
|
||||
|
@ -487,15 +495,15 @@ impl Config {
|
|||
// parameters are redundant with ingester's
|
||||
let compactor_config = CompactorConfig {
|
||||
compactor_scheduler_config,
|
||||
compaction_partition_concurrency: NonZeroUsize::new(1).unwrap(),
|
||||
compaction_df_concurrency: NonZeroUsize::new(1).unwrap(),
|
||||
compaction_partition_scratchpad_concurrency: NonZeroUsize::new(1).unwrap(),
|
||||
query_exec_thread_count: Some(NonZeroUsize::new(1).unwrap()),
|
||||
compaction_partition_concurrency: compactor_concurrency,
|
||||
compaction_df_concurrency: compactor_concurrency,
|
||||
compaction_partition_scratchpad_concurrency: compactor_concurrency,
|
||||
query_exec_thread_count: Some(num_threads),
|
||||
exec_mem_pool_bytes,
|
||||
max_desired_file_size_bytes: 30_000,
|
||||
max_desired_file_size_bytes: 100 * 1024 * 1024, // 100 MB
|
||||
percentage_max_file_size: 30,
|
||||
split_percentage: 80,
|
||||
partition_timeout_secs: 0,
|
||||
partition_timeout_secs: 30 * 60, // 30 minutes
|
||||
shadow_mode: false,
|
||||
enable_scratchpad: true,
|
||||
ignore_partition_skip_marker: false,
|
||||
|
@ -519,6 +527,8 @@ impl Config {
|
|||
};
|
||||
|
||||
SpecializedConfig {
|
||||
num_threads,
|
||||
|
||||
router_run_config,
|
||||
querier_run_config,
|
||||
|
||||
|
@ -550,6 +560,8 @@ fn ensure_directory_exists(p: &Path) {
|
|||
/// Different run configs for the different services (needed as they
|
||||
/// listen on different ports)
|
||||
struct SpecializedConfig {
|
||||
num_threads: NonZeroUsize,
|
||||
|
||||
router_run_config: RunConfig,
|
||||
querier_run_config: RunConfig,
|
||||
ingester_run_config: RunConfig,
|
||||
|
@ -564,6 +576,7 @@ struct SpecializedConfig {
|
|||
|
||||
pub async fn command(config: Config) -> Result<()> {
|
||||
let SpecializedConfig {
|
||||
num_threads,
|
||||
router_run_config,
|
||||
querier_run_config,
|
||||
ingester_run_config,
|
||||
|
@ -595,20 +608,23 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
// create common state from the router and use it below
|
||||
let common_state = CommonServerState::from_config(router_run_config.clone())?;
|
||||
|
||||
// TODO: make num_threads a parameter (other modes have it
|
||||
// configured by a command line)
|
||||
let num_threads = NonZeroUsize::new(num_cpus::get())
|
||||
.unwrap_or_else(|| NonZeroUsize::new(1).expect("1 is valid"));
|
||||
info!(%num_threads, "Creating shared query executor");
|
||||
|
||||
let parquet_store_real = ParquetStorage::new(Arc::clone(&object_store), StorageId::from("iox"));
|
||||
let parquet_store_scratchpad = ParquetStorage::new(
|
||||
Arc::new(MetricsStore::new(
|
||||
Arc::new(object_store::memory::InMemory::new()),
|
||||
&metrics,
|
||||
"scratchpad",
|
||||
)),
|
||||
StorageId::from("iox_scratchpad"),
|
||||
);
|
||||
let exec = Arc::new(Executor::new_with_config(ExecutorConfig {
|
||||
num_threads,
|
||||
target_query_partitions: num_threads,
|
||||
object_stores: HashMap::from([(
|
||||
parquet_store_real.id(),
|
||||
Arc::clone(parquet_store_real.object_store()),
|
||||
)]),
|
||||
object_stores: [&parquet_store_real, &parquet_store_scratchpad]
|
||||
.into_iter()
|
||||
.map(|store| (store.id(), Arc::clone(store.object_store())))
|
||||
.collect(),
|
||||
metric_registry: Arc::clone(&metrics),
|
||||
mem_pool_size: querier_config.exec_mem_pool_bytes,
|
||||
}));
|
||||
|
@ -636,14 +652,6 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
.expect("failed to start ingester");
|
||||
|
||||
info!("starting compactor");
|
||||
let parquet_store_scratchpad = ParquetStorage::new(
|
||||
Arc::new(MetricsStore::new(
|
||||
Arc::new(object_store::memory::InMemory::new()),
|
||||
&metrics,
|
||||
"scratchpad",
|
||||
)),
|
||||
StorageId::from("iox_scratchpad"),
|
||||
);
|
||||
|
||||
let compactor = create_compactor_server_type(
|
||||
&common_state,
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
//! Tests the `influxdb_iox debug` commands
|
||||
use std::{path::Path, time::Duration};
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
|
@ -47,6 +52,8 @@ async fn test_print_cpu() {
|
|||
/// 3. Start a all-in-one instance from that rebuilt catalog
|
||||
/// 4. Can run a query successfully
|
||||
#[tokio::test]
|
||||
// Ignore due to https://github.com/influxdata/influxdb_iox/issues/8203
|
||||
#[ignore]
|
||||
async fn build_catalog() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
@ -105,20 +112,18 @@ async fn build_catalog() {
|
|||
|
||||
// We can build a catalog and start up the server and run a query
|
||||
let restarted = RestartedServer::build_catalog_and_start(&table_dir).await;
|
||||
let batches = run_sql_until_non_empty(&restarted, sql, namespace.as_str())
|
||||
.with_timeout(Duration::from_secs(2))
|
||||
.await
|
||||
.expect("timed out waiting for non-empty batches in result");
|
||||
let batches = restarted
|
||||
.run_sql_until_non_empty(sql, namespace.as_str())
|
||||
.await;
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
|
||||
// We can also rebuild a catalog from just the parquet files
|
||||
let only_parquet_dir = copy_only_parquet_files(&table_dir);
|
||||
let restarted =
|
||||
RestartedServer::build_catalog_and_start(only_parquet_dir.path()).await;
|
||||
let batches = run_sql_until_non_empty(&restarted, sql, namespace.as_str())
|
||||
.with_timeout(Duration::from_secs(2))
|
||||
.await
|
||||
.expect("timed out waiting for non-empty batches in result");
|
||||
let batches = restarted
|
||||
.run_sql_until_non_empty(sql, namespace.as_str())
|
||||
.await;
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
}
|
||||
.boxed()
|
||||
|
@ -129,23 +134,6 @@ async fn build_catalog() {
|
|||
.await
|
||||
}
|
||||
|
||||
/// Loops forever, running the SQL query against the [`RestartedServer`] given
|
||||
/// until the result is non-empty. Callers are responsible for timing out the
|
||||
/// function.
|
||||
async fn run_sql_until_non_empty(
|
||||
restarted: &RestartedServer,
|
||||
sql: &str,
|
||||
namespace: &str,
|
||||
) -> Vec<RecordBatch> {
|
||||
loop {
|
||||
let batches = restarted.run_sql(sql, namespace).await;
|
||||
if !batches.is_empty() {
|
||||
return batches;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// An all in one instance, with data directory of `data_dir`
|
||||
struct RestartedServer {
|
||||
all_in_one: ServerFixture,
|
||||
|
@ -183,27 +171,40 @@ impl RestartedServer {
|
|||
println!("target_directory: {data_dir:?}");
|
||||
|
||||
// call `influxdb_iox debug build-catalog <table_dir> <new_data_dir>`
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
let cmd = Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
// use -v to enable logging so we can check the status messages
|
||||
.arg("-v")
|
||||
.arg("-vv")
|
||||
.arg("debug")
|
||||
.arg("build-catalog")
|
||||
.arg(exported_table_dir.as_os_str().to_str().unwrap())
|
||||
.arg(data_dir.path().as_os_str().to_str().unwrap())
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(
|
||||
predicate::str::contains("Beginning catalog / object_store build")
|
||||
.and(predicate::str::contains(
|
||||
"Begin importing files total_files=1",
|
||||
))
|
||||
.and(predicate::str::contains(
|
||||
"Completed importing files total_files=1",
|
||||
)),
|
||||
);
|
||||
.success();
|
||||
|
||||
// debug information to track down https://github.com/influxdata/influxdb_iox/issues/8203
|
||||
println!("***** Begin build-catalog STDOUT ****");
|
||||
std::io::stdout()
|
||||
.write_all(&cmd.get_output().stdout)
|
||||
.unwrap();
|
||||
println!("***** Begin build-catalog STDERR ****");
|
||||
std::io::stdout()
|
||||
.write_all(&cmd.get_output().stderr)
|
||||
.unwrap();
|
||||
println!("***** DONE ****");
|
||||
|
||||
cmd.stdout(
|
||||
predicate::str::contains("Beginning catalog / object_store build")
|
||||
.and(predicate::str::contains(
|
||||
"Begin importing files total_files=1",
|
||||
))
|
||||
.and(predicate::str::contains(
|
||||
"Completed importing files total_files=1",
|
||||
)),
|
||||
);
|
||||
|
||||
println!("Completed rebuild in {data_dir:?}");
|
||||
RecursiveDirPrinter::new().print(data_dir.path());
|
||||
|
||||
// now, start up a new server in all-in-one mode
|
||||
// using the newly built data directory
|
||||
|
@ -215,6 +216,27 @@ impl RestartedServer {
|
|||
data_dir,
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the SQL query against this server, in a loop until
|
||||
/// results are returned. Panics if the results are not produced
|
||||
/// within a 5 seconds
|
||||
async fn run_sql_until_non_empty(&self, sql: &str, namespace: &str) -> Vec<RecordBatch> {
|
||||
let timeout = Duration::from_secs(5);
|
||||
let loop_sleep = Duration::from_millis(500);
|
||||
let fut = async {
|
||||
loop {
|
||||
let batches = self.run_sql(sql, namespace).await;
|
||||
if !batches.is_empty() {
|
||||
return batches;
|
||||
}
|
||||
tokio::time::sleep(loop_sleep).await;
|
||||
}
|
||||
};
|
||||
|
||||
fut.with_timeout(timeout)
|
||||
.await
|
||||
.expect("timed out waiting for non-empty batches in result")
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies only parquet files from the source directory to a new
|
||||
|
@ -240,3 +262,43 @@ fn copy_only_parquet_files(src: &Path) -> TempDir {
|
|||
}
|
||||
target_dir
|
||||
}
|
||||
|
||||
/// Prints out the contents of the directory recursively
|
||||
/// for debugging.
|
||||
///
|
||||
/// ```text
|
||||
/// RecursiveDirPrinter All files rooted at "/tmp/.tmpvf16r0"
|
||||
/// "/tmp/.tmpvf16r0"
|
||||
/// "/tmp/.tmpvf16r0/catalog.sqlite"
|
||||
/// "/tmp/.tmpvf16r0/object_store"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1/1"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237/d78abef6-6859-48eb-aa62-3518097fbb9b.parquet"
|
||||
///
|
||||
struct RecursiveDirPrinter {
|
||||
paths: VecDeque<PathBuf>,
|
||||
}
|
||||
|
||||
impl RecursiveDirPrinter {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
paths: VecDeque::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// print root and all directories
|
||||
fn print(mut self, root: &Path) {
|
||||
println!("RecursiveDirPrinter All files rooted at {root:?}");
|
||||
self.paths.push_back(PathBuf::from(root));
|
||||
|
||||
while let Some(path) = self.paths.pop_front() {
|
||||
println!("{path:?}");
|
||||
if path.is_dir() {
|
||||
for entry in std::fs::read_dir(path).unwrap() {
|
||||
self.paths.push_front(entry.unwrap().path());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -440,6 +440,19 @@ mod influxql {
|
|||
.await;
|
||||
}
|
||||
|
||||
/// Test PERCENTILE functions.
|
||||
#[tokio::test]
|
||||
async fn percentile() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
TestCase {
|
||||
input: "cases/in/percentile.influxql",
|
||||
chunk_stage: ChunkStage::Ingester,
|
||||
}
|
||||
.run()
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn influxql_metadata() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
-- Query tests for influxql percentile
|
||||
-- IOX_SETUP: percentile
|
||||
|
||||
--
|
||||
-- Selectors
|
||||
--
|
||||
SELECT PERCENTILE(usage_idle,50) FROM cpu;
|
||||
SELECT cpu,PERCENTILE(usage_idle,66.667),usage_system FROM cpu;
|
||||
SELECT PERCENTILE(usage_idle,33.333) FROM cpu GROUP BY cpu;
|
||||
SELECT PERCENTILE(usage_idle,90),usage_user FROM cpu WHERE cpu='3';
|
||||
-- 0th percentile doesn't return any rows.
|
||||
SELECT PERCENTILE(usage_idle,0) FROM cpu;
|
||||
|
||||
--
|
||||
-- Aggregators
|
||||
--
|
||||
SELECT PERCENTILE(usage_system, 50), PERCENTILE(usage_system, 90), PERCENTILE(usage_system, 99) FROM cpu;
|
||||
SELECT PERCENTILE(usage_system, 50), PERCENTILE(usage_system, 90), PERCENTILE(usage_system, 99) FROM cpu GROUP BY cpu;
|
||||
SELECT PERCENTILE(usage_system, 50), PERCENTILE(usage_system, 90), PERCENTILE(usage_system, 99) FROM cpu WHERE time >= '1970-01-01 00:00:00' AND time < '1970-01-01 03:00:00' GROUP BY time(1h),cpu;
|
|
@ -0,0 +1,152 @@
|
|||
-- Test Setup: percentile
|
||||
-- InfluxQL: SELECT PERCENTILE(usage_idle,50) FROM cpu;
|
||||
name: cpu
|
||||
+---------------------+------------+
|
||||
| time | percentile |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T01:10:09 | 49.7047 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT cpu,PERCENTILE(usage_idle,66.667),usage_system FROM cpu;
|
||||
name: cpu
|
||||
+---------------------+-----+------------+--------------+
|
||||
| time | cpu | percentile | usage_system |
|
||||
+---------------------+-----+------------+--------------+
|
||||
| 1970-01-01T01:39:15 | 0 | 66.1469 | 99.8854 |
|
||||
+---------------------+-----+------------+--------------+
|
||||
-- InfluxQL: SELECT PERCENTILE(usage_idle,33.333) FROM cpu GROUP BY cpu;
|
||||
name: cpu
|
||||
tags: cpu=0
|
||||
+---------------------+------------+
|
||||
| time | percentile |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T01:15:15 | 32.9757 |
|
||||
+---------------------+------------+
|
||||
name: cpu
|
||||
tags: cpu=1
|
||||
+---------------------+------------+
|
||||
| time | percentile |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T02:13:36 | 32.3062 |
|
||||
+---------------------+------------+
|
||||
name: cpu
|
||||
tags: cpu=2
|
||||
+---------------------+------------+
|
||||
| time | percentile |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T01:24:22 | 35.0742 |
|
||||
+---------------------+------------+
|
||||
name: cpu
|
||||
tags: cpu=3
|
||||
+---------------------+------------+
|
||||
| time | percentile |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T01:16:58 | 32.0821 |
|
||||
+---------------------+------------+
|
||||
name: cpu
|
||||
tags: cpu=4
|
||||
+---------------------+------------+
|
||||
| time | percentile |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:42:34 | 32.9685 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT PERCENTILE(usage_idle,90),usage_user FROM cpu WHERE cpu='3';
|
||||
name: cpu
|
||||
+---------------------+------------+------------+
|
||||
| time | percentile | usage_user |
|
||||
+---------------------+------------+------------+
|
||||
| 1970-01-01T00:19:23 | 89.7011 | 34.7815 |
|
||||
+---------------------+------------+------------+
|
||||
-- InfluxQL: SELECT PERCENTILE(usage_idle,0) FROM cpu;
|
||||
+------+------------+
|
||||
| time | percentile |
|
||||
+------+------------+
|
||||
+------+------------+
|
||||
-- InfluxQL: SELECT PERCENTILE(usage_system, 50), PERCENTILE(usage_system, 90), PERCENTILE(usage_system, 99) FROM cpu;
|
||||
name: cpu
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 49.2732 | 89.754 | 99.0822 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
-- InfluxQL: SELECT PERCENTILE(usage_system, 50), PERCENTILE(usage_system, 90), PERCENTILE(usage_system, 99) FROM cpu GROUP BY cpu;
|
||||
name: cpu
|
||||
tags: cpu=0
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 49.7946 | 90.0001 | 98.8816 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=1
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 50.148 | 89.4109 | 98.8158 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=2
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 49.0258 | 89.7425 | 99.2486 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=3
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 49.2054 | 89.9907 | 99.244 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=4
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 48.1551 | 89.1691 | 98.9134 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
-- InfluxQL: SELECT PERCENTILE(usage_system, 50), PERCENTILE(usage_system, 90), PERCENTILE(usage_system, 99) FROM cpu WHERE time >= '1970-01-01 00:00:00' AND time < '1970-01-01 03:00:00' GROUP BY time(1h),cpu;
|
||||
name: cpu
|
||||
tags: cpu=0
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 49.9884 | 89.7541 | 99.1413 |
|
||||
| 1970-01-01T01:00:00 | 47.7725 | 90.8035 | 98.8471 |
|
||||
| 1970-01-01T02:00:00 | 53.5363 | 90.0001 | 98.444 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=1
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 48.2785 | 88.3004 | 98.7959 |
|
||||
| 1970-01-01T01:00:00 | 51.1512 | 92.2132 | 98.9797 |
|
||||
| 1970-01-01T02:00:00 | 49.6265 | 87.8342 | 98.0481 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=2
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 50.0065 | 89.5125 | 99.109 |
|
||||
| 1970-01-01T01:00:00 | 47.9867 | 89.5532 | 99.4226 |
|
||||
| 1970-01-01T02:00:00 | 49.4459 | 90.439 | 99.2486 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=3
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 46.7256 | 90.7002 | 99.3269 |
|
||||
| 1970-01-01T01:00:00 | 50.7717 | 89.2459 | 98.9579 |
|
||||
| 1970-01-01T02:00:00 | 49.6766 | 89.555 | 98.9499 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
name: cpu
|
||||
tags: cpu=4
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| time | percentile | percentile_1 | percentile_2 |
|
||||
+---------------------+------------+--------------+--------------+
|
||||
| 1970-01-01T00:00:00 | 47.403 | 89.0086 | 98.9134 |
|
||||
| 1970-01-01T01:00:00 | 50.6295 | 89.1826 | 98.9091 |
|
||||
| 1970-01-01T02:00:00 | 46.1348 | 89.2463 | 98.7592 |
|
||||
+---------------------+------------+--------------+--------------+
|
File diff suppressed because it is too large
Load Diff
|
@ -1392,6 +1392,20 @@ pub static SETUPS: Lazy<HashMap<SetupName, SetupSteps>> = Lazy::new(|| {
|
|||
},
|
||||
],
|
||||
),
|
||||
(
|
||||
// Used for percentile function tests for InfluxQL
|
||||
"percentile",
|
||||
vec![
|
||||
Step::RecordNumParquetFiles,
|
||||
Step::WriteLineProtocol(
|
||||
include_str!("data/percentile.lp").to_string()
|
||||
),
|
||||
Step::Persist,
|
||||
Step::WaitForPersisted {
|
||||
expected_increase: 1,
|
||||
},
|
||||
],
|
||||
),
|
||||
(
|
||||
"DuplicateDifferentDomains",
|
||||
(0..2)
|
||||
|
|
|
@ -24,7 +24,7 @@ prost = "0.11"
|
|||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread"] }
|
||||
tokio-stream = "0.1.13"
|
||||
thiserror = "1.0.43"
|
||||
|
|
|
@ -928,27 +928,26 @@ mod tests {
|
|||
// Populate the catalog with the namespace / table
|
||||
let (_ns_id, table_id) = populate_catalog(&*catalog, "bananas", "platanos").await;
|
||||
|
||||
let partition_id = catalog
|
||||
let partition = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get("test".into(), table_id)
|
||||
.await
|
||||
.expect("should create")
|
||||
.id;
|
||||
.expect("should create");
|
||||
|
||||
catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.cas_sort_key(partition_id, None, &["terrific"])
|
||||
.cas_sort_key(&partition.transition_partition_id(), None, &["terrific"])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Read the just-created sort key (None)
|
||||
let fetcher = Arc::new(DeferredLoad::new(
|
||||
Duration::from_nanos(1),
|
||||
SortKeyResolver::new(partition_id, Arc::clone(&catalog), backoff_config.clone())
|
||||
SortKeyResolver::new(partition.id, Arc::clone(&catalog), backoff_config.clone())
|
||||
.fetch(),
|
||||
&metrics,
|
||||
));
|
||||
|
|
|
@ -100,7 +100,11 @@ mod tests {
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use iox_catalog::test_helpers::{arbitrary_namespace, arbitrary_table};
|
||||
use data_types::TransitionPartitionId;
|
||||
use iox_catalog::{
|
||||
partition_lookup,
|
||||
test_helpers::{arbitrary_namespace, arbitrary_table},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
use crate::buffer_tree::table::TableName;
|
||||
|
@ -161,11 +165,9 @@ mod tests {
|
|||
assert_matches!(got.lock().sort_key(), SortKeyState::Provided(None));
|
||||
assert!(got.lock().partition_key.ptr_eq(&callers_partition_key));
|
||||
|
||||
let got = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.get_by_id(got.lock().partition_id)
|
||||
let mut repos = catalog.repositories().await;
|
||||
let id = TransitionPartitionId::Deprecated(got.lock().partition_id);
|
||||
let got = partition_lookup(repos.as_mut(), &id)
|
||||
.await
|
||||
.unwrap()
|
||||
.expect("partition not created");
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use backoff::{Backoff, BackoffConfig};
|
||||
use data_types::PartitionId;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use data_types::{PartitionId, TransitionPartitionId};
|
||||
use iox_catalog::{interface::Catalog, partition_lookup};
|
||||
use schema::sort::SortKey;
|
||||
|
||||
/// A resolver of [`SortKey`] from the catalog for a given [`PartitionId`].
|
||||
|
@ -33,12 +33,9 @@ impl SortKeyResolver {
|
|||
pub(crate) async fn fetch(self) -> Option<SortKey> {
|
||||
Backoff::new(&self.backoff_config)
|
||||
.retry_all_errors("fetch partition sort key", || async {
|
||||
let s = self
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.get_by_id(self.partition_id)
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let id = TransitionPartitionId::Deprecated(self.partition_id);
|
||||
let s = partition_lookup(repos.as_mut(), &id)
|
||||
.await?
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
|
@ -76,24 +73,27 @@ mod tests {
|
|||
// Populate the catalog with the namespace / table
|
||||
let (_ns_id, table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
let partition_id = catalog
|
||||
let partition = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(PARTITION_KEY.into(), table_id)
|
||||
.await
|
||||
.expect("should create")
|
||||
.id;
|
||||
.expect("should create");
|
||||
|
||||
let fetcher =
|
||||
SortKeyResolver::new(partition_id, Arc::clone(&catalog), backoff_config.clone());
|
||||
SortKeyResolver::new(partition.id, Arc::clone(&catalog), backoff_config.clone());
|
||||
|
||||
// Set the sort key
|
||||
let catalog_state = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.cas_sort_key(partition_id, None, &["uno", "dos", "bananas"])
|
||||
.cas_sort_key(
|
||||
&partition.transition_partition_id(),
|
||||
None,
|
||||
&["uno", "dos", "bananas"],
|
||||
)
|
||||
.await
|
||||
.expect("should update existing partition key");
|
||||
|
||||
|
|
|
@ -745,6 +745,108 @@ mod tests {
|
|||
]
|
||||
);
|
||||
|
||||
/// Ensure partition pruning during query execution also prunes metadata
|
||||
/// frames.
|
||||
///
|
||||
/// Individual frames are fast to serialise, but large numbers of frames can
|
||||
/// add significant query overhead, particularly for queries returning small
|
||||
/// numbers of rows where the metadata becomes a significant portion of the
|
||||
/// response.
|
||||
#[tokio::test]
|
||||
async fn test_partition_metadata_pruning() {
|
||||
let partition_provider = Arc::new(
|
||||
MockPartitionProvider::default()
|
||||
.with_partition(
|
||||
PartitionDataBuilder::new()
|
||||
.with_partition_id(ARBITRARY_PARTITION_ID)
|
||||
.with_partition_key("madrid".into())
|
||||
.build(),
|
||||
)
|
||||
.with_partition(
|
||||
PartitionDataBuilder::new()
|
||||
.with_partition_id(PARTITION2_ID)
|
||||
.with_partition_key("asturias".into())
|
||||
.build(),
|
||||
),
|
||||
);
|
||||
|
||||
// Construct a partition template suitable for pruning on the "region"
|
||||
// tag.
|
||||
let table_provider = Arc::new(MockTableProvider::new(TableMetadata::new_for_testing(
|
||||
ARBITRARY_TABLE_NAME.clone(),
|
||||
test_table_partition_override(vec![TemplatePart::TagValue("region")]),
|
||||
)));
|
||||
|
||||
// Init the buffer tree
|
||||
let buf = BufferTree::new(
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
|
||||
table_provider,
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
);
|
||||
|
||||
// Write to two regions
|
||||
buf.apply(IngestOp::Write(make_write_op(
|
||||
&PartitionKey::from("madrid"),
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
&ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_ID,
|
||||
0,
|
||||
&format!(
|
||||
r#"{},region=madrid temp=35 4242424242"#,
|
||||
&*ARBITRARY_TABLE_NAME
|
||||
),
|
||||
None,
|
||||
)))
|
||||
.await
|
||||
.expect("failed to perform write");
|
||||
|
||||
buf.apply(IngestOp::Write(make_write_op(
|
||||
&PartitionKey::from("asturias"),
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
&ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_ID,
|
||||
0,
|
||||
&format!(
|
||||
r#"{},region=asturias temp=35 4242424242"#,
|
||||
&*ARBITRARY_TABLE_NAME
|
||||
),
|
||||
None,
|
||||
)))
|
||||
.await
|
||||
.expect("failed to perform write");
|
||||
|
||||
// Construct a predicate suitable for pruning partitions based on the
|
||||
// region / partition template.
|
||||
let predicate = Some(Predicate::new().with_expr(col("region").eq(lit(
|
||||
ScalarValue::Dictionary(
|
||||
Box::new(DataType::Int32),
|
||||
Box::new(ScalarValue::from("asturias")),
|
||||
),
|
||||
))));
|
||||
|
||||
// Execute the query and count the number of partitions that are
|
||||
// returned (either data, or metadata).
|
||||
let partition_count = buf
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
predicate,
|
||||
)
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_partition_stream()
|
||||
.count()
|
||||
.await;
|
||||
|
||||
// Because the data in the "madrid" partition was pruned out, the
|
||||
// metadata should not be sent either.
|
||||
assert_eq!(partition_count, 1);
|
||||
}
|
||||
|
||||
/// Assert that multiple writes to a single namespace/table results in a
|
||||
/// single namespace being created, and matching metrics.
|
||||
#[tokio::test]
|
||||
|
|
|
@ -270,7 +270,7 @@ where
|
|||
|
||||
// Gather the partition data from all of the partitions in this table.
|
||||
let span = SpanRecorder::new(span);
|
||||
let partitions = self.partitions().into_iter().map(move |p| {
|
||||
let partitions = self.partitions().into_iter().filter_map(move |p| {
|
||||
let mut span = span.child("partition read");
|
||||
|
||||
let (id, hash_id, completed_persistence_count, data, partition_key) = {
|
||||
|
@ -303,15 +303,24 @@ where
|
|||
})
|
||||
.unwrap_or_default()
|
||||
{
|
||||
return PartitionResponse::new(
|
||||
vec![],
|
||||
id,
|
||||
hash_id,
|
||||
completed_persistence_count,
|
||||
);
|
||||
// This partition will never contain any data that would
|
||||
// form part of the query response.
|
||||
//
|
||||
// Because this is true of buffered data, it is also
|
||||
// true of the persisted data, and therefore sending the
|
||||
// persisted file count metadata is useless because the
|
||||
// querier would never utilise the persisted files as
|
||||
// part of this query.
|
||||
//
|
||||
// This avoids sending O(n) metadata frames for queries
|
||||
// that may only touch one or two actual frames. The N
|
||||
// partition count grows over the lifetime of the
|
||||
// ingester as more partitions are created, and while
|
||||
// fast to serialise individually, the sequentially-sent
|
||||
// N metadata frames add up.
|
||||
return None;
|
||||
}
|
||||
|
||||
// Project the data if necessary
|
||||
PartitionResponse::new(
|
||||
data.into_record_batches(),
|
||||
id,
|
||||
|
@ -323,7 +332,7 @@ where
|
|||
};
|
||||
|
||||
span.ok("read partition data");
|
||||
ret
|
||||
Some(ret)
|
||||
});
|
||||
|
||||
Ok(PartitionStream::new(futures::stream::iter(partitions)))
|
||||
|
|
|
@ -16,7 +16,7 @@ mod tests {
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use data_types::{CompactionLevel, ParquetFile, TransitionPartitionId};
|
||||
use futures::TryStreamExt;
|
||||
use iox_catalog::{
|
||||
interface::{get_schema_by_id, Catalog, SoftDeletedRows},
|
||||
|
@ -243,7 +243,7 @@ mod tests {
|
|||
.repositories()
|
||||
.await
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(partition_id)
|
||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition_id))
|
||||
.await
|
||||
.expect("query for parquet files failed");
|
||||
|
||||
|
@ -344,7 +344,7 @@ mod tests {
|
|||
.await
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
partition_id,
|
||||
&transition_partition_id,
|
||||
None,
|
||||
&["bananas", "are", "good", "for", "you"],
|
||||
)
|
||||
|
@ -392,7 +392,7 @@ mod tests {
|
|||
.repositories()
|
||||
.await
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(partition_id)
|
||||
.list_by_partition_not_to_delete(&TransitionPartitionId::Deprecated(partition_id))
|
||||
.await
|
||||
.expect("query for parquet files failed");
|
||||
|
||||
|
|
|
@ -376,7 +376,11 @@ where
|
|||
let mut repos = catalog.repositories().await;
|
||||
match repos
|
||||
.partitions()
|
||||
.cas_sort_key(ctx.partition_id(), old_sort_key.clone(), &new_sort_key_str)
|
||||
.cas_sort_key(
|
||||
&ctx.transition_partition_id(),
|
||||
old_sort_key.clone(),
|
||||
&new_sort_key_str,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => ControlFlow::Break(Ok(())),
|
||||
|
|
|
@ -19,7 +19,7 @@ use tokio::sync::{Semaphore, TryAcquireError};
|
|||
use tonic::{Request, Response, Streaming};
|
||||
use trace::{
|
||||
ctx::SpanContext,
|
||||
span::{Span, SpanExt},
|
||||
span::{Span, SpanExt, SpanRecorder},
|
||||
};
|
||||
|
||||
mod instrumentation;
|
||||
|
@ -175,7 +175,7 @@ where
|
|||
request: Request<Ticket>,
|
||||
) -> Result<Response<Self::DoGetStream>, tonic::Status> {
|
||||
let span_ctx: Option<SpanContext> = request.extensions().get().cloned();
|
||||
let span = span_ctx.child_span("ingester query");
|
||||
let mut query_recorder = SpanRecorder::new(span_ctx.child_span("ingester query"));
|
||||
|
||||
// Acquire and hold a permit for the duration of this request, or return
|
||||
// an error if the existing requests have already exhausted the
|
||||
|
@ -211,16 +211,23 @@ where
|
|||
|
||||
let response = match self
|
||||
.query_handler
|
||||
.query_exec(namespace_id, table_id, projection, span.clone(), predicate)
|
||||
.query_exec(
|
||||
namespace_id,
|
||||
table_id,
|
||||
projection,
|
||||
query_recorder.child_span("query exec"),
|
||||
predicate,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e @ (QueryError::TableNotFound(_, _) | QueryError::NamespaceNotFound(_))) => {
|
||||
debug!(
|
||||
error=%e,
|
||||
%namespace_id,
|
||||
%table_id,
|
||||
"query error, no buffered data found");
|
||||
error=%e,
|
||||
%namespace_id,
|
||||
%table_id,
|
||||
"no buffered data found for query"
|
||||
);
|
||||
|
||||
return Err(e)?;
|
||||
}
|
||||
|
@ -229,11 +236,12 @@ where
|
|||
let output = encode_response(
|
||||
response,
|
||||
self.ingester_id,
|
||||
span,
|
||||
query_recorder.child_span("serialise response"),
|
||||
Arc::clone(&self.query_request_frame_encoding_duration),
|
||||
)
|
||||
.map_err(tonic::Status::from);
|
||||
|
||||
query_recorder.ok("query exec complete - streaming results");
|
||||
Ok(Response::new(Box::pin(output) as Self::DoGetStream))
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
-- By default, we often only have 5min to finish our statements. The `CREATE INDEX CONCURRENTLY`,
|
||||
-- however, can take longer.
|
||||
-- IOX_NO_TRANSACTION
|
||||
SET statement_timeout TO '60min';
|
||||
|
||||
-- IOX_STEP_BOUNDARY
|
||||
|
||||
-- IOX_NO_TRANSACTION
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS parquet_file_partition_hash_id_idx
|
||||
ON parquet_file (partition_hash_id)
|
||||
WHERE partition_hash_id IS NOT NULL;
|
|
@ -0,0 +1,3 @@
|
|||
CREATE INDEX IF NOT EXISTS parquet_file_partition_hash_id_idx
|
||||
ON parquet_file (partition_hash_id)
|
||||
WHERE partition_hash_id IS NOT NULL;
|
|
@ -6,7 +6,7 @@ use data_types::{
|
|||
Column, ColumnType, ColumnsByName, CompactionLevel, Namespace, NamespaceId, NamespaceName,
|
||||
NamespaceSchema, NamespaceServiceProtectionLimitsOverride, ParquetFile, ParquetFileId,
|
||||
ParquetFileParams, Partition, PartitionHashId, PartitionId, PartitionKey, SkippedCompaction,
|
||||
Table, TableId, TableSchema, Timestamp,
|
||||
Table, TableId, TableSchema, Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use iox_time::TimeProvider;
|
||||
use snafu::{OptionExt, Snafu};
|
||||
|
@ -80,7 +80,7 @@ pub enum Error {
|
|||
TableNotFound { id: TableId },
|
||||
|
||||
#[snafu(display("partition {} not found", id))]
|
||||
PartitionNotFound { id: PartitionId },
|
||||
PartitionNotFound { id: TransitionPartitionId },
|
||||
|
||||
#[snafu(display(
|
||||
"couldn't create column {} in table {}; limit reached on namespace",
|
||||
|
@ -397,7 +397,7 @@ pub trait PartitionRepo: Send + Sync {
|
|||
/// concurrent writers.
|
||||
async fn cas_sort_key(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
old_sort_key: Option<Vec<String>>,
|
||||
new_sort_key: &[&str],
|
||||
) -> Result<Partition, CasFailure<Vec<String>>>;
|
||||
|
@ -483,7 +483,7 @@ pub trait ParquetFileRepo: Send + Sync {
|
|||
/// [`to_delete`](ParquetFile::to_delete).
|
||||
async fn list_by_partition_not_to_delete(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
) -> Result<Vec<ParquetFile>>;
|
||||
|
||||
/// Return the parquet file with the given object store id
|
||||
|
@ -1549,7 +1549,11 @@ pub(crate) mod test_helpers {
|
|||
// test update_sort_key from None to Some
|
||||
repos
|
||||
.partitions()
|
||||
.cas_sort_key(other_partition.id, None, &["tag2", "tag1", "time"])
|
||||
.cas_sort_key(
|
||||
&other_partition.transition_partition_id(),
|
||||
None,
|
||||
&["tag2", "tag1", "time"],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -1557,7 +1561,7 @@ pub(crate) mod test_helpers {
|
|||
let err = repos
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
other_partition.id,
|
||||
&other_partition.transition_partition_id(),
|
||||
Some(["bananas".to_string()].to_vec()),
|
||||
&["tag2", "tag1", "tag3 , with comma", "time"],
|
||||
)
|
||||
|
@ -1593,7 +1597,7 @@ pub(crate) mod test_helpers {
|
|||
let err = repos
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
other_partition.id,
|
||||
&other_partition.transition_partition_id(),
|
||||
None,
|
||||
&["tag2", "tag1", "tag3 , with comma", "time"],
|
||||
)
|
||||
|
@ -1607,7 +1611,7 @@ pub(crate) mod test_helpers {
|
|||
let err = repos
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
other_partition.id,
|
||||
&other_partition.transition_partition_id(),
|
||||
Some(["bananas".to_string()].to_vec()),
|
||||
&["tag2", "tag1", "tag3 , with comma", "time"],
|
||||
)
|
||||
|
@ -1621,7 +1625,7 @@ pub(crate) mod test_helpers {
|
|||
repos
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
other_partition.id,
|
||||
&other_partition.transition_partition_id(),
|
||||
Some(
|
||||
["tag2", "tag1", "time"]
|
||||
.into_iter()
|
||||
|
@ -2676,6 +2680,7 @@ pub(crate) mod test_helpers {
|
|||
|
||||
let other_partition_params = ParquetFileParams {
|
||||
partition_id: partition2.id,
|
||||
partition_hash_id: partition2.hash_id().cloned(),
|
||||
object_store_id: Uuid::new_v4(),
|
||||
..parquet_file_params.clone()
|
||||
};
|
||||
|
@ -2687,14 +2692,16 @@ pub(crate) mod test_helpers {
|
|||
|
||||
let files = repos
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(partition.id)
|
||||
.list_by_partition_not_to_delete(&partition.transition_partition_id())
|
||||
.await
|
||||
.unwrap();
|
||||
// not asserting against a vector literal to guard against flakiness due to uncertain
|
||||
// ordering of SQL query in postgres impl
|
||||
assert_eq!(files.len(), 2);
|
||||
assert_matches!(files.iter().find(|f| f.id == parquet_file.id), Some(_));
|
||||
assert_matches!(files.iter().find(|f| f.id == level1_file.id), Some(_));
|
||||
|
||||
let mut file_ids: Vec<_> = files.into_iter().map(|f| f.id).collect();
|
||||
file_ids.sort();
|
||||
let mut expected_ids = vec![parquet_file.id, level1_file.id];
|
||||
expected_ids.sort();
|
||||
assert_eq!(file_ids, expected_ids);
|
||||
|
||||
// remove namespace to avoid it from affecting later tests
|
||||
repos
|
||||
|
|
|
@ -22,7 +22,7 @@ use workspace_hack as _;
|
|||
use crate::interface::{ColumnTypeMismatchSnafu, Error, RepoCollection, Result};
|
||||
use data_types::{
|
||||
partition_template::{NamespacePartitionTemplateOverride, TablePartitionTemplateOverride},
|
||||
ColumnType, NamespaceId, NamespaceSchema, TableSchema,
|
||||
ColumnType, NamespaceId, NamespaceSchema, Partition, TableSchema, TransitionPartitionId,
|
||||
};
|
||||
use mutable_batch::MutableBatch;
|
||||
use std::{borrow::Cow, collections::HashMap};
|
||||
|
@ -67,6 +67,27 @@ impl TableScopedError {
|
|||
}
|
||||
}
|
||||
|
||||
/// Look up a partition in the catalog by either database-assigned ID or deterministic hash ID.
|
||||
///
|
||||
/// The existence of this function should be temporary; it can be removed once all partition lookup
|
||||
/// is happening with only the deterministic hash ID.
|
||||
pub async fn partition_lookup<R>(
|
||||
repos: &mut R,
|
||||
id: &TransitionPartitionId,
|
||||
) -> Result<Option<Partition>, Error>
|
||||
where
|
||||
R: RepoCollection + ?Sized,
|
||||
{
|
||||
match id {
|
||||
TransitionPartitionId::Deprecated(partition_id) => {
|
||||
repos.partitions().get_by_id(*partition_id).await
|
||||
}
|
||||
TransitionPartitionId::Deterministic(partition_hash_id) => {
|
||||
repos.partitions().get_by_hash_id(partition_hash_id).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Given an iterator of `(table_name, batch)` to validate, this function
|
||||
/// ensures all the columns within `batch` match the existing schema for
|
||||
/// `table_name` in `schema`. If the column does not already exist in `schema`,
|
||||
|
|
|
@ -19,7 +19,7 @@ use data_types::{
|
|||
Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceName,
|
||||
NamespaceServiceProtectionLimitsOverride, ParquetFile, ParquetFileId, ParquetFileParams,
|
||||
Partition, PartitionHashId, PartitionId, PartitionKey, SkippedCompaction, Table, TableId,
|
||||
Timestamp,
|
||||
Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use snafu::ensure;
|
||||
|
@ -625,20 +625,26 @@ impl PartitionRepo for MemTxn {
|
|||
|
||||
async fn cas_sort_key(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
old_sort_key: Option<Vec<String>>,
|
||||
new_sort_key: &[&str],
|
||||
) -> Result<Partition, CasFailure<Vec<String>>> {
|
||||
let stage = self.stage();
|
||||
let old_sort_key = old_sort_key.unwrap_or_default();
|
||||
match stage.partitions.iter_mut().find(|p| p.id == partition_id) {
|
||||
|
||||
match stage.partitions.iter_mut().find(|p| match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => {
|
||||
p.hash_id().map_or(false, |h| h == hash_id)
|
||||
}
|
||||
TransitionPartitionId::Deprecated(id) => p.id == *id,
|
||||
}) {
|
||||
Some(p) if p.sort_key == old_sort_key => {
|
||||
p.sort_key = new_sort_key.iter().map(|s| s.to_string()).collect();
|
||||
Ok(p.clone())
|
||||
}
|
||||
Some(p) => return Err(CasFailure::ValueMismatch(p.sort_key.clone())),
|
||||
None => Err(CasFailure::QueryError(Error::PartitionNotFound {
|
||||
id: partition_id,
|
||||
id: partition_id.clone(),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
@ -844,14 +850,20 @@ impl ParquetFileRepo for MemTxn {
|
|||
|
||||
async fn list_by_partition_not_to_delete(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
let stage = self.stage();
|
||||
|
||||
Ok(stage
|
||||
.parquet_files
|
||||
.iter()
|
||||
.filter(|f| f.partition_id == partition_id && f.to_delete.is_none())
|
||||
.filter(|f| match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => {
|
||||
f.partition_hash_id.as_ref().map_or(false, |h| h == hash_id)
|
||||
}
|
||||
TransitionPartitionId::Deprecated(id) => f.partition_id == *id,
|
||||
})
|
||||
.filter(|f| f.to_delete.is_none())
|
||||
.cloned()
|
||||
.collect())
|
||||
}
|
||||
|
@ -962,7 +974,9 @@ async fn create_parquet_file(
|
|||
.partitions
|
||||
.iter_mut()
|
||||
.find(|p| p.id == partition_id)
|
||||
.ok_or(Error::PartitionNotFound { id: partition_id })?;
|
||||
.ok_or(Error::PartitionNotFound {
|
||||
id: TransitionPartitionId::Deprecated(partition_id),
|
||||
})?;
|
||||
partition.new_file_at = Some(created_at);
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ use data_types::{
|
|||
Column, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceName,
|
||||
NamespaceServiceProtectionLimitsOverride, ParquetFile, ParquetFileId, ParquetFileParams,
|
||||
Partition, PartitionHashId, PartitionId, PartitionKey, SkippedCompaction, Table, TableId,
|
||||
Timestamp,
|
||||
Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use metric::{DurationHistogram, Metric};
|
||||
|
@ -174,7 +174,7 @@ decorate!(
|
|||
"partition_get_by_hash_id" = get_by_hash_id(&mut self, partition_hash_id: &PartitionHashId) -> Result<Option<Partition>>;
|
||||
"partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
|
||||
"partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>;
|
||||
"partition_update_sort_key" = cas_sort_key(&mut self, partition_id: PartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>;
|
||||
"partition_update_sort_key" = cas_sort_key(&mut self, partition_id: &TransitionPartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>;
|
||||
"partition_record_skipped_compaction" = record_skipped_compaction(&mut self, partition_id: PartitionId, reason: &str, num_files: usize, limit_num_files: usize, limit_num_files_first_in_partition: usize, estimated_bytes: u64, limit_bytes: u64) -> Result<()>;
|
||||
"partition_list_skipped_compactions" = list_skipped_compactions(&mut self) -> Result<Vec<SkippedCompaction>>;
|
||||
"partition_delete_skipped_compactions" = delete_skipped_compactions(&mut self, partition_id: PartitionId) -> Result<Option<SkippedCompaction>>;
|
||||
|
@ -193,7 +193,7 @@ decorate!(
|
|||
"parquet_list_by_namespace_not_to_delete" = list_by_namespace_not_to_delete(&mut self, namespace_id: NamespaceId) -> Result<Vec<ParquetFile>>;
|
||||
"parquet_list_by_table_not_to_delete" = list_by_table_not_to_delete(&mut self, table_id: TableId) -> Result<Vec<ParquetFile>>;
|
||||
"parquet_delete_old_ids_only" = delete_old_ids_only(&mut self, older_than: Timestamp) -> Result<Vec<ParquetFileId>>;
|
||||
"parquet_list_by_partition_not_to_delete" = list_by_partition_not_to_delete(&mut self, partition_id: PartitionId) -> Result<Vec<ParquetFile>>;
|
||||
"parquet_list_by_partition_not_to_delete" = list_by_partition_not_to_delete(&mut self, partition_id: &TransitionPartitionId) -> Result<Vec<ParquetFile>>;
|
||||
"parquet_get_by_object_store_id" = get_by_object_store_id(&mut self, object_store_id: Uuid) -> Result<Option<ParquetFile>>;
|
||||
"parquet_exists_by_object_store_id_batch" = exists_by_object_store_id_batch(&mut self, object_store_ids: Vec<Uuid>) -> Result<Vec<Uuid>>;
|
||||
"parquet_create_upgrade_delete" = create_upgrade_delete(&mut self, delete: &[ParquetFileId], upgrade: &[ParquetFileId], create: &[ParquetFileParams], target_level: CompactionLevel) -> Result<Vec<ParquetFileId>>;
|
||||
|
|
|
@ -23,7 +23,7 @@ use data_types::{
|
|||
Column, ColumnType, CompactionLevel, Namespace, NamespaceId, NamespaceName,
|
||||
NamespaceServiceProtectionLimitsOverride, ParquetFile, ParquetFileId, ParquetFileParams,
|
||||
Partition, PartitionHashId, PartitionId, PartitionKey, SkippedCompaction, Table, TableId,
|
||||
Timestamp,
|
||||
Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use observability_deps::tracing::{debug, info, warn};
|
||||
|
@ -1153,24 +1153,38 @@ WHERE table_id = $1;
|
|||
/// round trips to service a transaction in the happy path).
|
||||
async fn cas_sort_key(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
old_sort_key: Option<Vec<String>>,
|
||||
new_sort_key: &[&str],
|
||||
) -> Result<Partition, CasFailure<Vec<String>>> {
|
||||
let old_sort_key = old_sort_key.unwrap_or_default();
|
||||
let res = sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
// This `match` will go away when all partitions have hash IDs in the database.
|
||||
let query = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
UPDATE partition
|
||||
SET sort_key = $1
|
||||
WHERE hash_id = $2 AND sort_key = $3
|
||||
RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(new_sort_key) // $1
|
||||
.bind(hash_id) // $2
|
||||
.bind(&old_sort_key), // $3
|
||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
UPDATE partition
|
||||
SET sort_key = $1
|
||||
WHERE id = $2 AND sort_key = $3
|
||||
RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(new_sort_key) // $1
|
||||
.bind(partition_id) // $2
|
||||
.bind(&old_sort_key) // $3
|
||||
.fetch_one(&mut self.inner)
|
||||
.await;
|
||||
)
|
||||
.bind(new_sort_key) // $1
|
||||
.bind(id) // $2
|
||||
.bind(&old_sort_key), // $3
|
||||
};
|
||||
|
||||
let res = query.fetch_one(&mut self.inner).await;
|
||||
|
||||
let partition = match res {
|
||||
Ok(v) => v,
|
||||
|
@ -1187,11 +1201,11 @@ RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
|||
// NOTE: this is racy, but documented - this might return "Sort
|
||||
// key differs! Old key: <old sort key you provided>"
|
||||
return Err(CasFailure::ValueMismatch(
|
||||
PartitionRepo::get_by_id(self, partition_id)
|
||||
crate::partition_lookup(self, partition_id)
|
||||
.await
|
||||
.map_err(CasFailure::QueryError)?
|
||||
.ok_or(CasFailure::QueryError(Error::PartitionNotFound {
|
||||
id: partition_id,
|
||||
id: partition_id.clone(),
|
||||
}))?
|
||||
.sort_key,
|
||||
));
|
||||
|
@ -1458,10 +1472,23 @@ RETURNING id;
|
|||
|
||||
async fn list_by_partition_not_to_delete(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
// This `match` will go away when all partitions have hash IDs in the database.
|
||||
let query = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
FROM parquet_file
|
||||
WHERE parquet_file.partition_hash_id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
.bind(hash_id), // $1
|
||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFile>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
|
@ -1469,11 +1496,14 @@ FROM parquet_file
|
|||
WHERE parquet_file.partition_id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
.bind(partition_id) // $1
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
)
|
||||
.bind(id), // $1
|
||||
};
|
||||
|
||||
query
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn get_by_object_store_id(
|
||||
|
|
|
@ -21,7 +21,7 @@ use data_types::{
|
|||
Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId,
|
||||
NamespaceName, NamespaceServiceProtectionLimitsOverride, ParquetFile, ParquetFileId,
|
||||
ParquetFileParams, Partition, PartitionHashId, PartitionId, PartitionKey, SkippedCompaction,
|
||||
Table, TableId, Timestamp,
|
||||
Table, TableId, Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
|
@ -952,24 +952,39 @@ WHERE table_id = $1;
|
|||
/// round trips to service a transaction in the happy path).
|
||||
async fn cas_sort_key(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
old_sort_key: Option<Vec<String>>,
|
||||
new_sort_key: &[&str],
|
||||
) -> Result<Partition, CasFailure<Vec<String>>> {
|
||||
let old_sort_key = old_sort_key.unwrap_or_default();
|
||||
let res = sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
|
||||
// This `match` will go away when all partitions have hash IDs in the database.
|
||||
let query = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
UPDATE partition
|
||||
SET sort_key = $1
|
||||
WHERE hash_id = $2 AND sort_key = $3
|
||||
RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(Json(new_sort_key)) // $1
|
||||
.bind(hash_id) // $2
|
||||
.bind(Json(&old_sort_key)), // $3
|
||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
UPDATE partition
|
||||
SET sort_key = $1
|
||||
WHERE id = $2 AND sort_key = $3
|
||||
RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
||||
"#,
|
||||
)
|
||||
.bind(Json(new_sort_key)) // $1
|
||||
.bind(partition_id) // $2
|
||||
.bind(Json(&old_sort_key)) // $3
|
||||
.fetch_one(self.inner.get_mut())
|
||||
.await;
|
||||
)
|
||||
.bind(Json(new_sort_key)) // $1
|
||||
.bind(id) // $2
|
||||
.bind(Json(&old_sort_key)), // $3
|
||||
};
|
||||
|
||||
let res = query.fetch_one(self.inner.get_mut()).await;
|
||||
|
||||
let partition = match res {
|
||||
Ok(v) => v,
|
||||
|
@ -986,11 +1001,11 @@ RETURNING id, hash_id, table_id, partition_key, sort_key, new_file_at;
|
|||
// NOTE: this is racy, but documented - this might return "Sort
|
||||
// key differs! Old key: <old sort key you provided>"
|
||||
return Err(CasFailure::ValueMismatch(
|
||||
PartitionRepo::get_by_id(self, partition_id)
|
||||
crate::partition_lookup(self, partition_id)
|
||||
.await
|
||||
.map_err(CasFailure::QueryError)?
|
||||
.ok_or(CasFailure::QueryError(Error::PartitionNotFound {
|
||||
id: partition_id,
|
||||
id: partition_id.clone(),
|
||||
}))?
|
||||
.sort_key,
|
||||
));
|
||||
|
@ -1323,10 +1338,23 @@ RETURNING id;
|
|||
|
||||
async fn list_by_partition_not_to_delete(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
partition_id: &TransitionPartitionId,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
Ok(sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
// This `match` will go away when all partitions have hash IDs in the database.
|
||||
let query = match partition_id {
|
||||
TransitionPartitionId::Deterministic(hash_id) => sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
FROM parquet_file
|
||||
WHERE parquet_file.partition_hash_id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
.bind(hash_id), // $1
|
||||
TransitionPartitionId::Deprecated(id) => sqlx::query_as::<_, ParquetFilePod>(
|
||||
r#"
|
||||
SELECT id, namespace_id, table_id, partition_id, partition_hash_id, object_store_id, min_time,
|
||||
max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set,
|
||||
max_l0_created_at
|
||||
|
@ -1334,14 +1362,17 @@ FROM parquet_file
|
|||
WHERE parquet_file.partition_id = $1
|
||||
AND parquet_file.to_delete IS NULL;
|
||||
"#,
|
||||
)
|
||||
.bind(partition_id) // $1
|
||||
.fetch_all(self.inner.get_mut())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect())
|
||||
)
|
||||
.bind(id), // $1
|
||||
};
|
||||
|
||||
Ok(query
|
||||
.fetch_all(self.inner.get_mut())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn get_by_object_store_id(
|
||||
|
|
|
@ -23,7 +23,7 @@ rand = { version = "0.8.3", features = ["small_rng"] }
|
|||
regex = "1.9"
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
toml = "0.7.6"
|
||||
|
|
|
@ -27,7 +27,7 @@ use datafusion::{
|
|||
physical_plan::{
|
||||
expressions::Column,
|
||||
metrics::{BaselineMetrics, ExecutionPlanMetricsSet},
|
||||
DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PhysicalExpr,
|
||||
DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PhysicalExpr,
|
||||
SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
prelude::Expr,
|
||||
|
@ -534,6 +534,12 @@ impl ExecutionPlan for GapFillExec {
|
|||
)?))
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
Statistics::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for GapFillExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
|
@ -566,10 +572,6 @@ impl ExecutionPlan for GapFillExec {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
Statistics::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -54,8 +54,8 @@ use datafusion::{
|
|||
physical_plan::{
|
||||
expressions::PhysicalSortExpr,
|
||||
metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
|
||||
DisplayFormatType, Distribution, ExecutionPlan, Partitioning, SendableRecordBatchStream,
|
||||
Statistics,
|
||||
DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
|
||||
SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -272,14 +272,6 @@ impl ExecutionPlan for NonNullCheckerExec {
|
|||
Ok(AdapterStream::adapt(self.schema(), rx, handle))
|
||||
}
|
||||
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "NonNullCheckerExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
|
@ -290,6 +282,16 @@ impl ExecutionPlan for NonNullCheckerExec {
|
|||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for NonNullCheckerExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "NonNullCheckerExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_for_nulls(
|
||||
mut input_stream: SendableRecordBatchStream,
|
||||
schema: SchemaRef,
|
||||
|
|
|
@ -345,7 +345,7 @@ mod tests {
|
|||
physical_plan::{
|
||||
expressions::PhysicalSortExpr,
|
||||
metrics::{Count, Time, Timestamp},
|
||||
Metric,
|
||||
DisplayAs, Metric,
|
||||
},
|
||||
};
|
||||
use std::{collections::BTreeMap, str::FromStr, sync::Arc, time::Duration};
|
||||
|
@ -679,7 +679,9 @@ mod tests {
|
|||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
self.metrics.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for TestExec {
|
||||
fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "TestExec - {}", self.name)
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ use arrow::{
|
|||
error::ArrowError,
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datafusion::{
|
||||
common::{DFSchemaRef, ToDFSchema},
|
||||
error::{DataFusionError as Error, Result},
|
||||
|
@ -43,6 +42,7 @@ use datafusion::{
|
|||
Statistics,
|
||||
},
|
||||
};
|
||||
use datafusion::{error::DataFusionError, physical_plan::DisplayAs};
|
||||
|
||||
use datafusion_util::{watch::WatchedTask, AdapterStream};
|
||||
use observability_deps::tracing::debug;
|
||||
|
@ -247,14 +247,6 @@ impl ExecutionPlan for SchemaPivotExec {
|
|||
Ok(AdapterStream::adapt(self.schema(), rx, handle))
|
||||
}
|
||||
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "SchemaPivotExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
|
@ -265,6 +257,16 @@ impl ExecutionPlan for SchemaPivotExec {
|
|||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for SchemaPivotExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "SchemaPivotExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Algorithm: for each column we haven't seen a value for yet,
|
||||
// check each input row;
|
||||
//
|
||||
|
|
|
@ -67,8 +67,8 @@ use datafusion::{
|
|||
physical_plan::{
|
||||
expressions::PhysicalSortExpr,
|
||||
metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput},
|
||||
ColumnarValue, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PhysicalExpr,
|
||||
SendableRecordBatchStream, Statistics,
|
||||
ColumnarValue, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
|
||||
PhysicalExpr, SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
|
@ -267,14 +267,6 @@ impl ExecutionPlan for StreamSplitExec {
|
|||
}
|
||||
}
|
||||
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "StreamSplitExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
|
@ -286,6 +278,16 @@ impl ExecutionPlan for StreamSplitExec {
|
|||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for StreamSplitExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "StreamSplitExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StreamSplitExec {
|
||||
/// if in State::New, sets up the output running and sets self.state --> `Running`
|
||||
fn start_if_needed(&self, context: Arc<TaskContext>) -> Result<()> {
|
||||
|
|
|
@ -7,7 +7,7 @@ use datafusion::{
|
|||
DFSchema,
|
||||
},
|
||||
error::Result,
|
||||
logical_expr::{Between, BinaryExpr, LogicalPlan, Operator},
|
||||
logical_expr::{expr::Alias, Between, BinaryExpr, LogicalPlan, Operator},
|
||||
optimizer::utils::split_conjunction,
|
||||
prelude::{Column, Expr},
|
||||
};
|
||||
|
@ -79,7 +79,7 @@ impl TreeNodeVisitor for TimeRangeVisitor {
|
|||
fn unwrap_alias(mut e: &Expr) -> &Expr {
|
||||
loop {
|
||||
match e {
|
||||
Expr::Alias(inner, _) => e = inner.as_ref(),
|
||||
Expr::Alias(Alias { expr, .. }) => e = expr.as_ref(),
|
||||
e => break e,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -434,7 +434,7 @@ mod tests {
|
|||
logical_expr::Operator,
|
||||
physical_plan::{
|
||||
expressions::{BinaryExpr, Literal},
|
||||
PhysicalExpr, Statistics,
|
||||
DisplayAs, PhysicalExpr, Statistics,
|
||||
},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
|
@ -1695,6 +1695,12 @@ mod tests {
|
|||
unimplemented!()
|
||||
}
|
||||
|
||||
fn statistics(&self) -> datafusion::physical_plan::Statistics {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for TestExec {
|
||||
fn fmt_as(
|
||||
&self,
|
||||
_t: datafusion::physical_plan::DisplayFormatType,
|
||||
|
@ -1702,9 +1708,5 @@ mod tests {
|
|||
) -> std::fmt::Result {
|
||||
write!(f, "Test")
|
||||
}
|
||||
|
||||
fn statistics(&self) -> datafusion::physical_plan::Statistics {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@ use datafusion::{
|
|||
metrics::{
|
||||
self, BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet, RecordOutput,
|
||||
},
|
||||
DisplayFormatType, Distribution, ExecutionPlan, Partitioning, SendableRecordBatchStream,
|
||||
Statistics,
|
||||
DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
|
||||
SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
};
|
||||
use futures::StreamExt;
|
||||
|
@ -267,15 +267,6 @@ impl ExecutionPlan for DeduplicateExec {
|
|||
vec![Distribution::SinglePartition]
|
||||
}
|
||||
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
let expr: Vec<String> = self.sort_keys.iter().map(|e| e.to_string()).collect();
|
||||
write!(f, "DeduplicateExec: [{}]", expr.join(","))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
|
@ -289,6 +280,17 @@ impl ExecutionPlan for DeduplicateExec {
|
|||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for DeduplicateExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
let expr: Vec<String> = self.sort_keys.iter().map(|e| e.to_string()).collect();
|
||||
write!(f, "DeduplicateExec: [{}]", expr.join(","))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn deduplicate(
|
||||
mut input_stream: SendableRecordBatchStream,
|
||||
sort_keys: Vec<PhysicalSortExpr>,
|
||||
|
@ -1222,4 +1224,10 @@ mod test {
|
|||
Statistics::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for DummyExec {
|
||||
fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "DummyExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ use datafusion::{
|
|||
expressions::{Column, PhysicalSortExpr},
|
||||
memory::MemoryStream,
|
||||
metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
|
||||
ColumnStatistics, DisplayFormatType, ExecutionPlan, Partitioning,
|
||||
ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
|
||||
SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
scalar::ScalarValue,
|
||||
|
@ -234,6 +234,16 @@ impl ExecutionPlan for RecordBatchesExec {
|
|||
Ok(adapter)
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
self.statistics.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for RecordBatchesExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let total_groups = self.chunks.len();
|
||||
|
||||
|
@ -258,12 +268,4 @@ impl ExecutionPlan for RecordBatchesExec {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
self.statistics.clone()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ predicate = { path = "../predicate" }
|
|||
query_functions = { path = "../query_functions" }
|
||||
regex = "1"
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
thiserror = "1.0"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
//! User defined aggregate functions implementing influxQL features.
|
||||
|
||||
use datafusion::logical_expr::{
|
||||
AccumulatorFactoryFunction, AggregateUDF, ReturnTypeFunction, StateTypeFunction,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
mod percentile;
|
||||
|
||||
/// Definition of the `PERCENTILE` user-defined aggregate function.
|
||||
pub(crate) static PERCENTILE: Lazy<Arc<AggregateUDF>> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(percentile::return_type);
|
||||
let accumulator: AccumulatorFactoryFunction = Arc::new(percentile::accumulator);
|
||||
let state_type: StateTypeFunction = Arc::new(percentile::state_type);
|
||||
|
||||
Arc::new(AggregateUDF::new(
|
||||
percentile::NAME,
|
||||
&percentile::SIGNATURE,
|
||||
&return_type,
|
||||
&accumulator,
|
||||
&state_type,
|
||||
))
|
||||
});
|
|
@ -0,0 +1,156 @@
|
|||
use crate::error;
|
||||
use arrow::array::{as_list_array, Array, ArrayRef, Float64Array, Int64Array};
|
||||
use arrow::datatypes::{DataType, Field};
|
||||
use datafusion::common::{downcast_value, DataFusionError, Result, ScalarValue};
|
||||
use datafusion::logical_expr::{Accumulator, Signature, TypeSignature, Volatility};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// The name of the percentile aggregate function.
|
||||
pub(super) const NAME: &str = "percentile";
|
||||
|
||||
/// Valid signatures for the percentile aggregate function.
|
||||
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
|
||||
Signature::one_of(
|
||||
crate::NUMERICS
|
||||
.iter()
|
||||
.flat_map(|dt| {
|
||||
[
|
||||
TypeSignature::Exact(vec![dt.clone(), DataType::Int64]),
|
||||
TypeSignature::Exact(vec![dt.clone(), DataType::Float64]),
|
||||
]
|
||||
})
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
)
|
||||
});
|
||||
|
||||
/// Calculate the return type given the function signature. Percentile
|
||||
/// always returns the same type as the input column.
|
||||
pub(super) fn return_type(signature: &[DataType]) -> Result<Arc<DataType>> {
|
||||
Ok(Arc::new(signature[0].clone()))
|
||||
}
|
||||
|
||||
/// Create a new accumulator for the data type.
|
||||
pub(super) fn accumulator(dt: &DataType) -> Result<Box<dyn Accumulator>> {
|
||||
Ok(Box::new(PercentileAccumulator::new(dt.clone())))
|
||||
}
|
||||
|
||||
/// Calculate the intermediate merge state for the aggregator.
|
||||
pub(super) fn state_type(dt: &DataType) -> Result<Arc<Vec<DataType>>> {
|
||||
Ok(Arc::new(vec![
|
||||
DataType::List(Arc::new(Field::new("state", dt.clone(), false))),
|
||||
DataType::Float64,
|
||||
]))
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PercentileAccumulator {
|
||||
data_type: DataType,
|
||||
data: Vec<ScalarValue>,
|
||||
percentile: Option<f64>,
|
||||
}
|
||||
|
||||
impl PercentileAccumulator {
|
||||
fn new(data_type: DataType) -> Self {
|
||||
Self {
|
||||
data_type,
|
||||
data: vec![],
|
||||
percentile: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, array: ArrayRef) -> Result<()> {
|
||||
let array = Arc::clone(&array);
|
||||
assert_eq!(array.data_type(), &self.data_type);
|
||||
|
||||
let nulls = array.nulls();
|
||||
let null_len = nulls.map_or(0, |nb| nb.null_count());
|
||||
self.data.reserve(array.len() - null_len);
|
||||
for idx in 0..array.len() {
|
||||
if nulls.map_or(true, |nb| nb.is_valid(idx)) {
|
||||
self.data.push(ScalarValue::try_from_array(&array, idx)?)
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_percentile(&mut self, array: ArrayRef) -> Result<()> {
|
||||
if self.percentile.is_none() {
|
||||
self.percentile = match array.data_type() {
|
||||
DataType::Int64 => Some(downcast_value!(array, Int64Array).value(0) as f64),
|
||||
DataType::Float64 => Some(downcast_value!(array, Float64Array).value(0)),
|
||||
dt => {
|
||||
return error::internal(format!(
|
||||
"invalid data type ({dt}) for PERCENTILE n argument"
|
||||
))
|
||||
}
|
||||
};
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for PercentileAccumulator {
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
|
||||
assert_eq!(values.len(), 2);
|
||||
|
||||
self.set_percentile(Arc::clone(&values[1]))?;
|
||||
self.update(Arc::clone(&values[0]))
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<ScalarValue> {
|
||||
let idx = self
|
||||
.percentile
|
||||
.and_then(|n| percentile_idx(self.data.len(), n));
|
||||
if idx.is_none() {
|
||||
return Ok(ScalarValue::Null);
|
||||
}
|
||||
|
||||
let array = ScalarValue::iter_to_array(self.data.clone())?;
|
||||
let indices = arrow::compute::sort_to_indices(&array, None, None)?;
|
||||
let array_idx = indices.value(idx.unwrap());
|
||||
ScalarValue::try_from_array(&array, array_idx as usize)
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
std::mem::size_of::<Option<f64>>()
|
||||
+ std::mem::size_of::<DataType>()
|
||||
+ ScalarValue::size_of_vec(&self.data)
|
||||
}
|
||||
|
||||
fn state(&self) -> Result<Vec<ScalarValue>> {
|
||||
Ok(vec![
|
||||
ScalarValue::new_list(Some(self.data.clone()), self.data_type.clone()),
|
||||
ScalarValue::Float64(self.percentile),
|
||||
])
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
|
||||
assert_eq!(states.len(), 2);
|
||||
|
||||
self.set_percentile(Arc::clone(&states[1]))?;
|
||||
|
||||
let array = Arc::clone(&states[0]);
|
||||
let list_array = as_list_array(&array);
|
||||
for idx in 0..list_array.len() {
|
||||
self.update(list_array.value(idx))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the location in an ordered list of len items where the
|
||||
/// location of the item at the given percentile would be found.
|
||||
///
|
||||
/// This uses the same algorithm as the original influxdb implementation
|
||||
/// of percentile as can be found in
|
||||
/// <https://github.com/influxdata/influxdb/blob/75a8bcfae2af7b0043933be9f96b98c0741ceee3/influxql/query/call_iterator.go#L1087>.
|
||||
fn percentile_idx(len: usize, percentile: f64) -> Option<usize> {
|
||||
match TryInto::<usize>::try_into(
|
||||
(((len as f64) * percentile / 100.0 + 0.5).floor() as isize) - 1,
|
||||
) {
|
||||
Ok(idx) if idx < len => Some(idx),
|
||||
_ => None,
|
||||
}
|
||||
}
|
|
@ -58,7 +58,7 @@ pub(crate) mod map {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::plan::error::map::PlannerError;
|
||||
use crate::error::map::PlannerError;
|
||||
|
||||
#[test]
|
||||
fn test_planner_error_display() {
|
|
@ -17,7 +17,9 @@ use datafusion::datasource::provider_as_source;
|
|||
use datafusion::execution::context::{SessionState, TaskContext};
|
||||
use datafusion::logical_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource};
|
||||
use datafusion::physical_expr::PhysicalSortExpr;
|
||||
use datafusion::physical_plan::{Partitioning, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::{
|
||||
DisplayAs, DisplayFormatType, Partitioning, SendableRecordBatchStream,
|
||||
};
|
||||
use datafusion::{
|
||||
error::{DataFusionError, Result},
|
||||
physical_plan::ExecutionPlan,
|
||||
|
@ -78,7 +80,7 @@ struct SchemaExec {
|
|||
|
||||
impl Debug for SchemaExec {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "SchemaExec")
|
||||
self.fmt_as(DisplayFormatType::Default, f)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -100,7 +102,7 @@ impl ExecutionPlan for SchemaExec {
|
|||
}
|
||||
|
||||
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
|
||||
unimplemented!()
|
||||
vec![Arc::clone(&self.input)]
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
|
@ -123,6 +125,16 @@ impl ExecutionPlan for SchemaExec {
|
|||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for SchemaExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "SchemaExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create plans for running InfluxQL queries against databases
|
||||
#[derive(Debug, Default)]
|
||||
pub struct InfluxQLQueryPlanner {}
|
||||
|
|
|
@ -12,8 +12,17 @@
|
|||
unused_crate_dependencies
|
||||
)]
|
||||
|
||||
use arrow::datatypes::DataType;
|
||||
|
||||
// Workaround for "unused crate" lint false positives.
|
||||
use workspace_hack as _;
|
||||
|
||||
mod aggregate;
|
||||
mod error;
|
||||
pub mod frontend;
|
||||
pub mod plan;
|
||||
mod window;
|
||||
|
||||
/// A list of the numeric types supported by InfluxQL that can be be used
|
||||
/// as input to user-defined functions.
|
||||
static NUMERICS: &[DataType] = &[DataType::Int64, DataType::UInt64, DataType::Float64];
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
use crate::error;
|
||||
use crate::plan::field::field_by_name;
|
||||
use crate::plan::field_mapper::map_type;
|
||||
use crate::plan::ir::DataSource;
|
||||
use crate::plan::var_ref::influx_type_to_var_ref_data_type;
|
||||
use crate::plan::{error, SchemaProvider};
|
||||
use crate::plan::SchemaProvider;
|
||||
use datafusion::common::Result;
|
||||
use influxdb_influxql_parser::expression::{
|
||||
Binary, BinaryOperator, Call, Expr, VarRef, VarRefDataType,
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
//! Defines data structures which represent an InfluxQL
|
||||
//! statement after it has been processed
|
||||
|
||||
use crate::error;
|
||||
use crate::plan::rewriter::ProjectionType;
|
||||
use datafusion::common::Result;
|
||||
use influxdb_influxql_parser::common::{
|
||||
|
@ -17,7 +18,7 @@ use schema::{InfluxColumnType, Schema};
|
|||
use std::collections::HashSet;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use super::{error, SchemaProvider};
|
||||
use super::SchemaProvider;
|
||||
|
||||
/// A set of tag keys.
|
||||
pub(super) type TagSet = HashSet<String>;
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
mod error;
|
||||
mod expr_type_evaluator;
|
||||
mod field;
|
||||
mod field_mapper;
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
mod select;
|
||||
|
||||
use crate::aggregate::PERCENTILE;
|
||||
use crate::error;
|
||||
use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery};
|
||||
use crate::plan::planner::select::{
|
||||
fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort, ProjectionInfo,
|
||||
fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort, ProjectionInfo, Selector,
|
||||
SelectorWindowOrderBy,
|
||||
};
|
||||
use crate::plan::planner_time_range_expression::time_range_to_df_expr;
|
||||
use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
|
||||
|
@ -16,7 +19,8 @@ use crate::plan::udf::{
|
|||
};
|
||||
use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
|
||||
use crate::plan::var_ref::var_ref_data_type_to_data_type;
|
||||
use crate::plan::{error, planner_rewrite_expression, udf, util_copy};
|
||||
use crate::plan::{planner_rewrite_expression, udf, util_copy};
|
||||
use crate::window::PERCENT_ROW_NUMBER;
|
||||
use arrow::array::{StringBuilder, StringDictionaryBuilder};
|
||||
use arrow::datatypes::{DataType, Field as ArrowField, Int32Type, Schema as ArrowSchema};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
|
@ -25,7 +29,7 @@ use datafusion::catalog::TableReference;
|
|||
use datafusion::common::tree_node::{TreeNode, VisitRecursion};
|
||||
use datafusion::common::{DFSchema, DFSchemaRef, Result, ScalarValue, ToDFSchema};
|
||||
use datafusion::datasource::{provider_as_source, MemTable};
|
||||
use datafusion::logical_expr::expr::ScalarFunction;
|
||||
use datafusion::logical_expr::expr::{Alias, ScalarFunction};
|
||||
use datafusion::logical_expr::expr_rewriter::normalize_col;
|
||||
use datafusion::logical_expr::logical_plan::builder::project;
|
||||
use datafusion::logical_expr::logical_plan::Analyze;
|
||||
|
@ -47,6 +51,7 @@ use influxdb_influxql_parser::explain::{ExplainOption, ExplainStatement};
|
|||
use influxdb_influxql_parser::expression::walk::{walk_expr, walk_expression, Expression};
|
||||
use influxdb_influxql_parser::expression::{
|
||||
Binary, Call, ConditionalBinary, ConditionalExpression, ConditionalOperator, VarRef,
|
||||
VarRefDataType,
|
||||
};
|
||||
use influxdb_influxql_parser::functions::{
|
||||
is_aggregate_function, is_now_function, is_scalar_math_function,
|
||||
|
@ -762,16 +767,19 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
group_by_tag_set: &[&str],
|
||||
) -> Result<LogicalPlan> {
|
||||
match ctx.projection_type {
|
||||
ProjectionType::Raw => self.project_select_raw(input, fields),
|
||||
ProjectionType::RawDistinct => self.project_select_raw_distinct(input, fields),
|
||||
ProjectionType::Aggregate | ProjectionType::Selector{..} => self.project_select_aggregate(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::Window => self.project_select_window(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::WindowAggregate => self.project_select_window_aggregate(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::WindowAggregateMixed => error::not_implemented("mixed window-aggregate and aggregate columns, such as DIFFERENCE(MEAN(col)), MEAN(col)"),
|
||||
ProjectionType::TopBottomSelector => self.project_select_top_bottom_selector(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::Raw => self.project_select_raw(input, fields),
|
||||
ProjectionType::RawDistinct => self.project_select_raw_distinct(input, fields),
|
||||
ProjectionType::Aggregate => self.project_select_aggregate(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::Window => self.project_select_window(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::WindowAggregate => self.project_select_window_aggregate(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::WindowAggregateMixed => error::not_implemented("mixed window-aggregate and aggregate columns, such as DIFFERENCE(MEAN(col)), MEAN(col)"),
|
||||
ProjectionType::Selector{..} => self.project_select_selector(ctx, input, fields, group_by_tag_set),
|
||||
ProjectionType::TopBottomSelector => self.project_select_top_bottom_selector(ctx, input, fields, group_by_tag_set),
|
||||
}
|
||||
}
|
||||
|
||||
/// Plan "Raw" SELECT queriers, These are queries that have no grouping
|
||||
/// and call only scalar functions.
|
||||
fn project_select_raw(&self, input: LogicalPlan, fields: &[Field]) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
|
||||
|
@ -782,6 +790,8 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
project(input, select_exprs)
|
||||
}
|
||||
|
||||
/// Plan "RawDistinct" SELECT queriers, These are queries that have no grouping
|
||||
/// and call only scalar functions, but output only distinct rows.
|
||||
fn project_select_raw_distinct(
|
||||
&self,
|
||||
input: LogicalPlan,
|
||||
|
@ -803,7 +813,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
|
||||
// Take ownership of the alias, so we don't reallocate, and temporarily place a literal
|
||||
// `NULL` in its place.
|
||||
let Expr::Alias(_, alias) = std::mem::replace(&mut select_exprs[time_column_index], lit(ScalarValue::Null)) else {
|
||||
let Expr::Alias(Alias{name: alias, ..}) = std::mem::replace(&mut select_exprs[time_column_index], lit(ScalarValue::Null)) else {
|
||||
return error::internal("time column is not an alias")
|
||||
};
|
||||
|
||||
|
@ -815,6 +825,8 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
LogicalPlanBuilder::from(plan).distinct()?.build()
|
||||
}
|
||||
|
||||
/// Plan "Aggregate" SELECT queries. These are queries that use one or
|
||||
/// more aggregate (but not window) functions.
|
||||
fn project_select_aggregate(
|
||||
&self,
|
||||
ctx: &Context<'_>,
|
||||
|
@ -834,6 +846,8 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
project(plan, select_exprs)
|
||||
}
|
||||
|
||||
/// Plan "Window" SELECT queries. These are queries that use one or
|
||||
/// more window functions.
|
||||
fn project_select_window(
|
||||
&self,
|
||||
ctx: &Context<'_>,
|
||||
|
@ -870,6 +884,8 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Plan "WindowAggregate" SELECT queries. These are queries that use
|
||||
/// a combination of window and nested aggregate functions.
|
||||
fn project_select_window_aggregate(
|
||||
&self,
|
||||
ctx: &Context<'_>,
|
||||
|
@ -908,6 +924,102 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Plan the execution of SELECT queries that have the Selector projection
|
||||
/// type. These a queries that include a single FIRST, LAST, MAX, MIN,
|
||||
/// PERCENTILE, or SAMPLE function call, possibly requesting additional
|
||||
/// tags or fields.
|
||||
///
|
||||
/// N.B SAMPLE is not yet implemented.
|
||||
fn project_select_selector(
|
||||
&self,
|
||||
ctx: &Context<'_>,
|
||||
input: LogicalPlan,
|
||||
fields: &[Field],
|
||||
group_by_tag_set: &[&str],
|
||||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
|
||||
let (selector_index, field_key, plan) = match Selector::find_enumerated(fields)? {
|
||||
(_, Selector::First { .. })
|
||||
| (_, Selector::Last { .. })
|
||||
| (_, Selector::Max { .. })
|
||||
| (_, Selector::Min { .. }) => {
|
||||
// The FIRST, LAST, MAX & MIN selectors are implmented as specialised
|
||||
// forms of the equivilent aggregate implementaiion.
|
||||
return self.project_select_aggregate(ctx, input, fields, group_by_tag_set);
|
||||
}
|
||||
(idx, Selector::Percentile { field_key, n }) => {
|
||||
let window_perc_row = Expr::WindowFunction(WindowFunction::new(
|
||||
PERCENT_ROW_NUMBER.clone(),
|
||||
vec![lit(n)],
|
||||
window_partition_by(ctx, input.schema(), group_by_tag_set),
|
||||
vec![field_key.as_expr().sort(true, false), ctx.time_sort_expr()],
|
||||
WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
end_bound: WindowFrameBound::Following(ScalarValue::Null),
|
||||
},
|
||||
));
|
||||
let perc_row_column_name = window_perc_row.display_name()?;
|
||||
|
||||
let window_row = Expr::WindowFunction(WindowFunction::new(
|
||||
window_function::WindowFunction::BuiltInWindowFunction(
|
||||
window_function::BuiltInWindowFunction::RowNumber,
|
||||
),
|
||||
vec![],
|
||||
window_partition_by(ctx, input.schema(), group_by_tag_set),
|
||||
vec![field_key.as_expr().sort(true, false), ctx.time_sort_expr()],
|
||||
WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
end_bound: WindowFrameBound::Following(ScalarValue::Null),
|
||||
},
|
||||
));
|
||||
let row_column_name = window_row.display_name()?;
|
||||
|
||||
let filter_expr = binary_expr(
|
||||
col(perc_row_column_name.clone()),
|
||||
Operator::Eq,
|
||||
col(row_column_name.clone()),
|
||||
);
|
||||
let plan = LogicalPlanBuilder::from(input)
|
||||
.filter(field_key.as_expr().is_not_null())?
|
||||
.window(vec![
|
||||
window_perc_row.alias(perc_row_column_name),
|
||||
window_row.alias(row_column_name),
|
||||
])?
|
||||
.filter(filter_expr)?
|
||||
.build()?;
|
||||
|
||||
(idx, field_key, plan)
|
||||
}
|
||||
(_, Selector::Sample { field_key: _, n: _ }) => {
|
||||
return error::not_implemented("sample selector function")
|
||||
}
|
||||
|
||||
(_, s) => {
|
||||
return error::internal(format!(
|
||||
"unsupported selector function for ProjectionSelector {s}"
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let mut fields_vec = fields.to_vec();
|
||||
fields_vec[selector_index].expr = IQLExpr::VarRef(VarRef {
|
||||
name: field_key.clone(),
|
||||
data_type: None,
|
||||
});
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&plan, fields_vec.as_slice(), &schemas)?;
|
||||
|
||||
// Wrap the plan in a `LogicalPlan::Projection` from the select expressions
|
||||
project(plan, select_exprs)
|
||||
}
|
||||
|
||||
/// Plan the execution of "TopBottomSelector" SELECT queries. These are
|
||||
/// queries that use the TOP or BOTTOM functions to select a number of
|
||||
/// rows from the ends of a partition..
|
||||
fn project_select_top_bottom_selector(
|
||||
&self,
|
||||
ctx: &Context<'_>,
|
||||
|
@ -917,87 +1029,68 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
|
||||
let (selector_index, call) = fields
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, f)| match &f.expr {
|
||||
IQLExpr::Call(c) if c.name == "top" || c.name == "bottom" => Some((idx, c.clone())),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(error::map::internal(
|
||||
"ProjectionTopBottomSelector used without top or bottom field",
|
||||
))?;
|
||||
let (selector_index, is_bottom, field_key, tag_keys, narg) =
|
||||
match Selector::find_enumerated(fields)? {
|
||||
(
|
||||
idx,
|
||||
Selector::Bottom {
|
||||
field_key,
|
||||
tag_keys,
|
||||
n,
|
||||
},
|
||||
) => (idx, true, field_key, tag_keys, n),
|
||||
(
|
||||
idx,
|
||||
Selector::Top {
|
||||
field_key,
|
||||
tag_keys,
|
||||
n,
|
||||
},
|
||||
) => (idx, false, field_key, tag_keys, n),
|
||||
(_, s) => {
|
||||
return error::internal(format!(
|
||||
"ProjectionTopBottomSelector used with unexpected selector function: {s}"
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
// Find the selector parameters.
|
||||
let is_bottom = call.name == "bottom";
|
||||
let [field, tag_keys @ .., narg] = call.args.as_slice() else {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for {}: expected 2 or more, got {}",
|
||||
call.name,
|
||||
call.args.len()
|
||||
));
|
||||
};
|
||||
let field = if let IQLExpr::VarRef(v) = field {
|
||||
Field {
|
||||
expr: IQLExpr::VarRef(v.clone()),
|
||||
name: v.name.clone().take(),
|
||||
data_type: None,
|
||||
}
|
||||
let mut fields_vec = fields.to_vec();
|
||||
fields_vec[selector_index].expr = IQLExpr::VarRef(VarRef {
|
||||
name: field_key.clone(),
|
||||
data_type: None,
|
||||
});
|
||||
let order_by = if is_bottom {
|
||||
SelectorWindowOrderBy::FieldAsc(field_key)
|
||||
} else {
|
||||
return error::internal(format!(
|
||||
"invalid expression for {} field argument, {field}",
|
||||
call.name,
|
||||
));
|
||||
};
|
||||
let n = if let IQLExpr::Literal(Literal::Integer(v)) = narg {
|
||||
*v
|
||||
} else {
|
||||
return error::internal(format!(
|
||||
"invalid expression for {} n argument, {narg}",
|
||||
call.name
|
||||
));
|
||||
SelectorWindowOrderBy::FieldDesc(field_key)
|
||||
};
|
||||
|
||||
let mut internal_group_by = group_by_tag_set.to_vec();
|
||||
let mut fields_vec = fields.to_vec();
|
||||
for (i, tag_key) in tag_keys.iter().enumerate() {
|
||||
if let IQLExpr::VarRef(v) = &tag_key {
|
||||
fields_vec.insert(
|
||||
selector_index + i + 1,
|
||||
Field {
|
||||
expr: IQLExpr::VarRef(v.clone()),
|
||||
name: v.name.clone().take(),
|
||||
data_type: None,
|
||||
},
|
||||
);
|
||||
internal_group_by.push(v.name.as_ref());
|
||||
} else {
|
||||
return error::internal(format!(
|
||||
"invalid expression for {} tag_keys argument, {}",
|
||||
call.name, &tag_key
|
||||
));
|
||||
}
|
||||
fields_vec.insert(
|
||||
selector_index + i + 1,
|
||||
Field {
|
||||
expr: IQLExpr::VarRef(VarRef {
|
||||
name: (*tag_key).clone(),
|
||||
data_type: Some(VarRefDataType::Tag),
|
||||
}),
|
||||
name: (*tag_key).clone().take(),
|
||||
data_type: None,
|
||||
},
|
||||
);
|
||||
internal_group_by.push(*tag_key);
|
||||
}
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields_vec.as_slice(), &schemas)?;
|
||||
|
||||
let plan = if !tag_keys.is_empty() {
|
||||
self.select_first(
|
||||
ctx,
|
||||
input,
|
||||
&schemas,
|
||||
&field,
|
||||
is_bottom,
|
||||
internal_group_by.as_slice(),
|
||||
1,
|
||||
)?
|
||||
self.select_first(ctx, input, order_by, internal_group_by.as_slice(), 1)?
|
||||
} else {
|
||||
input
|
||||
};
|
||||
|
||||
let plan =
|
||||
self.select_first(ctx, plan, &schemas, &field, is_bottom, group_by_tag_set, n)?;
|
||||
let plan = self.select_first(ctx, plan, order_by, group_by_tag_set, narg)?;
|
||||
|
||||
// Wrap the plan in a `LogicalPlan::Projection` from the select expressions
|
||||
project(plan, select_exprs)
|
||||
|
@ -1056,7 +1149,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
continue;
|
||||
}
|
||||
let (expr, out_name) = match expr.clone() {
|
||||
Expr::Alias(expr, out_name) => (*expr, out_name),
|
||||
Expr::Alias(Alias {
|
||||
expr,
|
||||
name: out_name,
|
||||
}) => (*expr, out_name),
|
||||
_ => {
|
||||
return error::internal("other field is not aliased");
|
||||
}
|
||||
|
@ -1102,7 +1198,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
let time_column = {
|
||||
// Take ownership of the alias, so we don't reallocate, and temporarily place a literal
|
||||
// `NULL` in its place.
|
||||
let Expr::Alias(_, alias) = std::mem::replace(&mut select_exprs[time_column_index], lit(ScalarValue::Null)) else {
|
||||
let Expr::Alias(Alias{name: alias, ..}) = std::mem::replace(&mut select_exprs[time_column_index], lit(ScalarValue::Null)) else {
|
||||
return error::internal("time column is not an alias")
|
||||
};
|
||||
|
||||
|
@ -1281,42 +1377,32 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
Ok((plan, select_exprs))
|
||||
}
|
||||
|
||||
/// Generate a plan to select the first n rows from each partition in the input data sorted by the requested field.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// Generate a plan to select the first n rows from each partition in
|
||||
/// the input data, optionally sorted by the requested field.
|
||||
fn select_first(
|
||||
&self,
|
||||
ctx: &Context<'_>,
|
||||
input: LogicalPlan,
|
||||
schemas: &Schemas,
|
||||
field: &Field,
|
||||
asc: bool,
|
||||
order_by: SelectorWindowOrderBy<'_>,
|
||||
group_by_tags: &[&str],
|
||||
count: i64,
|
||||
) -> Result<LogicalPlan> {
|
||||
let mut group_by =
|
||||
fields_to_exprs_no_nulls(input.schema(), group_by_tags).collect::<Vec<_>>();
|
||||
if let Some(i) = ctx.interval {
|
||||
let stride = lit(ScalarValue::new_interval_mdn(0, 0, i.duration));
|
||||
let offset = i.offset.unwrap_or_default();
|
||||
|
||||
group_by.push(date_bin(
|
||||
stride,
|
||||
"time".as_expr(),
|
||||
lit(ScalarValue::TimestampNanosecond(Some(offset), None)),
|
||||
));
|
||||
}
|
||||
|
||||
let field_sort_expr = self
|
||||
.field_to_df_expr(field, &input, schemas)?
|
||||
.sort(asc, false);
|
||||
let order_by_exprs = match order_by {
|
||||
SelectorWindowOrderBy::FieldAsc(id) => {
|
||||
vec![id.as_expr().sort(true, false), ctx.time_sort_expr()]
|
||||
}
|
||||
SelectorWindowOrderBy::FieldDesc(id) => {
|
||||
vec![id.as_expr().sort(false, false), ctx.time_sort_expr()]
|
||||
}
|
||||
};
|
||||
|
||||
let window_expr = Expr::WindowFunction(WindowFunction::new(
|
||||
window_function::WindowFunction::BuiltInWindowFunction(
|
||||
window_function::BuiltInWindowFunction::RowNumber,
|
||||
),
|
||||
Vec::<Expr>::new(),
|
||||
group_by,
|
||||
vec![field_sort_expr, ctx.time_sort_expr()],
|
||||
window_partition_by(ctx, input.schema(), group_by_tags),
|
||||
order_by_exprs,
|
||||
WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
|
@ -1855,6 +1941,21 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
None,
|
||||
)))
|
||||
}
|
||||
"percentile" => {
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = expr {
|
||||
return Ok(expr);
|
||||
}
|
||||
|
||||
check_arg_count(name, args, 2)?;
|
||||
let nexpr = self.expr_to_df_expr(scope, &args[1], schemas)?;
|
||||
Ok(Expr::AggregateUDF(expr::AggregateUDF::new(
|
||||
PERCENTILE.clone(),
|
||||
vec![expr, nexpr],
|
||||
None,
|
||||
None,
|
||||
)))
|
||||
}
|
||||
name @ ("first" | "last" | "min" | "max") => {
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = expr {
|
||||
|
@ -2726,7 +2827,7 @@ fn build_gap_fill_node(
|
|||
fill_strategy: FillStrategy,
|
||||
) -> Result<LogicalPlan> {
|
||||
let (expr, alias) = match time_column {
|
||||
Expr::Alias(expr, alias) => (expr.as_ref(), alias),
|
||||
Expr::Alias(Alias { expr, name: alias }) => (expr.as_ref(), alias),
|
||||
_ => return error::internal("expected time column to have an alias function"),
|
||||
};
|
||||
|
||||
|
@ -3063,6 +3164,26 @@ fn find_var_refs(select: &Select) -> BTreeSet<&VarRef> {
|
|||
var_refs
|
||||
}
|
||||
|
||||
/// Calculate the partitioning for window functions.
|
||||
fn window_partition_by(
|
||||
ctx: &Context<'_>,
|
||||
schema: &DFSchemaRef,
|
||||
group_by_tags: &[&str],
|
||||
) -> Vec<Expr> {
|
||||
let mut parition_by = fields_to_exprs_no_nulls(schema, group_by_tags).collect::<Vec<_>>();
|
||||
if let Some(i) = ctx.interval {
|
||||
let stride = lit(ScalarValue::new_interval_mdn(0, 0, i.duration));
|
||||
let offset = i.offset.unwrap_or_default();
|
||||
|
||||
parition_by.push(date_bin(
|
||||
stride,
|
||||
"time".as_expr(),
|
||||
lit(ScalarValue::TimestampNanosecond(Some(offset), None)),
|
||||
));
|
||||
}
|
||||
parition_by
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
@ -3689,10 +3810,10 @@ mod test {
|
|||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
// aggregate with repeated table
|
||||
// selector with repeated table
|
||||
//
|
||||
// ⚠️ Important
|
||||
// The aggregate must be applied to the UNION of all instances of the cpu table
|
||||
// The selector must be applied to the UNION of all instances of the cpu table
|
||||
assert_snapshot!(plan("SELECT last(usage_idle) FROM cpu, cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, (selector_last(usage_idle,time))[time] AS time, (selector_last(usage_idle,time))[value] AS last [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
|
||||
|
@ -3707,7 +3828,7 @@ mod test {
|
|||
// different tables for each subquery
|
||||
//
|
||||
// ⚠️ Important
|
||||
// The aggregate must be applied independently for each unique table
|
||||
// The selector must be applied independently for each unique table
|
||||
assert_snapshot!(plan("SELECT last(value) FROM (SELECT usage_idle AS value FROM cpu), (SELECT bytes_free AS value FROM disk)"), @r###"
|
||||
Sort: iox::measurement ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
|
||||
Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
|
||||
|
@ -4062,39 +4183,91 @@ mod test {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_percentile() {
|
||||
assert_snapshot!(plan("SELECT percentile(usage_idle,50),usage_system FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), percentile:Float64;N, usage_system:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS percentile, cpu.usage_system AS usage_system [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), percentile:Float64;N, usage_system:Float64;N]
|
||||
Filter: percent_row_number(Float64(50)) ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING = ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, percent_row_number(Float64(50)) ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N]
|
||||
WindowAggr: windowExpr=[[percent_row_number(Float64(50)) ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS percent_row_number(Float64(50)) ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, ROW_NUMBER() ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, percent_row_number(Float64(50)) ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N]
|
||||
Filter: cpu.usage_idle IS NOT NULL [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT percentile(usage_idle,50),usage_system FROM cpu WHERE time >= 0 AND time < 60000000000 GROUP BY cpu"), @r###"
|
||||
Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, usage_system:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.usage_idle AS percentile, cpu.usage_system AS usage_system [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, usage_system:Float64;N]
|
||||
Filter: percent_row_number(Float64(50)) PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING = ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, percent_row_number(Float64(50)) PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N]
|
||||
WindowAggr: windowExpr=[[percent_row_number(Float64(50)) PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS percent_row_number(Float64(50)) PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, percent_row_number(Float64(50)) PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING:UInt64;N]
|
||||
Filter: cpu.usage_idle IS NOT NULL [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
Filter: cpu.time >= TimestampNanosecond(0, None) AND cpu.time <= TimestampNanosecond(59999999999, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT percentile(usage_idle,50), percentile(usage_idle,90) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), percentile:Float64;N, percentile_1:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, percentile(cpu.usage_idle,Int64(50)) AS percentile, percentile(cpu.usage_idle,Int64(90)) AS percentile_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), percentile:Float64;N, percentile_1:Float64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[percentile(cpu.usage_idle, Int64(50)), percentile(cpu.usage_idle, Int64(90))]] [percentile(cpu.usage_idle,Int64(50)):Float64;N, percentile(cpu.usage_idle,Int64(90)):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT percentile(usage_idle,50), percentile(usage_idle,90) FROM cpu GROUP BY cpu"), @r###"
|
||||
Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, percentile_1:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, cpu.cpu AS cpu, percentile(cpu.usage_idle,Int64(50)) AS percentile, percentile(cpu.usage_idle,Int64(90)) AS percentile_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, percentile_1:Float64;N]
|
||||
Aggregate: groupBy=[[cpu.cpu]], aggr=[[percentile(cpu.usage_idle, Int64(50)), percentile(cpu.usage_idle, Int64(90))]] [cpu:Dictionary(Int32, Utf8);N, percentile(cpu.usage_idle,Int64(50)):Float64;N, percentile(cpu.usage_idle,Int64(90)):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT percentile(usage_idle,50), percentile(usage_idle,90) FROM cpu GROUP BY cpu"), @r###"
|
||||
Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, percentile_1:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, cpu.cpu AS cpu, percentile(cpu.usage_idle,Int64(50)) AS percentile, percentile(cpu.usage_idle,Int64(90)) AS percentile_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, percentile_1:Float64;N]
|
||||
Aggregate: groupBy=[[cpu.cpu]], aggr=[[percentile(cpu.usage_idle, Int64(50)), percentile(cpu.usage_idle, Int64(90))]] [cpu:Dictionary(Int32, Utf8);N, percentile(cpu.usage_idle,Int64(50)):Float64;N, percentile(cpu.usage_idle,Int64(90)):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT percentile(usage_idle,50), percentile(usage_idle,90) FROM cpu WHERE time >= 0 AND time < 60000000000 GROUP BY time(10s), cpu"), @r###"
|
||||
Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, percentile_1:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, cpu.cpu AS cpu, percentile(cpu.usage_idle,Int64(50)) AS percentile, percentile(cpu.usage_idle,Int64(90)) AS percentile_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cpu:Dictionary(Int32, Utf8);N, percentile:Float64;N, percentile_1:Float64;N]
|
||||
GapFill: groupBy=[time, cpu.cpu], aggr=[[percentile(cpu.usage_idle,Int64(50)), percentile(cpu.usage_idle,Int64(90))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(0, None)))..Included(Literal(TimestampNanosecond(59999999999, None))) [time:Timestamp(Nanosecond, None);N, cpu:Dictionary(Int32, Utf8);N, percentile(cpu.usage_idle,Int64(50)):Float64;N, percentile(cpu.usage_idle,Int64(90)):Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time, cpu.cpu]], aggr=[[percentile(cpu.usage_idle, Int64(50)), percentile(cpu.usage_idle, Int64(90))]] [time:Timestamp(Nanosecond, None);N, cpu:Dictionary(Int32, Utf8);N, percentile(cpu.usage_idle,Int64(50)):Float64;N, percentile(cpu.usage_idle,Int64(90)):Float64;N]
|
||||
Filter: cpu.time >= TimestampNanosecond(0, None) AND cpu.time <= TimestampNanosecond(59999999999, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_top() {
|
||||
assert_snapshot!(plan("SELECT top(usage_idle,10) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), top:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS top [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), top:Float64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT top(usage_idle,10),cpu FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST, cpu ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), top:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS top, cpu.cpu AS cpu [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), top:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Filter: ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT top(usage_idle,10) FROM cpu GROUP BY cpu"), @r###"
|
||||
Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, top:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.usage_idle AS top [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, top:Float64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT top(usage_idle,cpu,10) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), top:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS top, cpu.cpu AS cpu [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), top:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Filter: ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(1) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(1) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle DESC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle DESC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
@ -4104,34 +4277,34 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT bottom(usage_idle,10) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bottom:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS bottom [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bottom:Float64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT bottom(usage_idle,10),cpu FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST, cpu ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bottom:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS bottom, cpu.cpu AS cpu [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bottom:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Filter: ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT bottom(usage_idle,10) FROM cpu GROUP BY cpu"), @r###"
|
||||
Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, bottom:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.usage_idle AS bottom [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, bottom:Float64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT bottom(usage_idle,cpu,10) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bottom:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS bottom, cpu.cpu AS cpu [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bottom:Float64;N, cpu:Dictionary(Int32, Utf8);N]
|
||||
Filter: ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(1) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [cpu.usage_idle AS usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(10) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N, ROW_NUMBER() ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
Filter: ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(1) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [cpu.cpu] ORDER BY [cpu.usage_idle ASC NULLS LAST, cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, ROW_NUMBER() PARTITION BY [cpu] ORDER BY [usage_idle ASC NULLS LAST, time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW:UInt64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
use crate::error;
|
||||
use crate::plan::ir::Field;
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::common::{DFSchemaRef, Result};
|
||||
use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
|
||||
use datafusion_util::AsExpr;
|
||||
use generated_types::influxdata::iox::querier::v1::influx_ql_metadata::TagKeyColumn;
|
||||
use influxdb_influxql_parser::expression::{Expr as IQLExpr, VarRef, VarRefDataType};
|
||||
use influxdb_influxql_parser::expression::{Call, Expr as IQLExpr, VarRef, VarRefDataType};
|
||||
use influxdb_influxql_parser::identifier::Identifier;
|
||||
use influxdb_influxql_parser::literal::Literal;
|
||||
use itertools::Itertools;
|
||||
use schema::{InfluxColumnType, INFLUXQL_MEASUREMENT_COLUMN_NAME};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
pub(super) fn make_tag_key_column_meta(
|
||||
fields: &[Field],
|
||||
|
@ -198,3 +202,236 @@ fn find_tag_and_unknown_columns(fields: &[Field]) -> impl Iterator<Item = &str>
|
|||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// The selector function that has been specified for use with a selector
|
||||
/// projection type.
|
||||
#[derive(Debug)]
|
||||
pub(super) enum Selector<'a> {
|
||||
Bottom {
|
||||
field_key: &'a Identifier,
|
||||
tag_keys: Vec<&'a Identifier>,
|
||||
n: i64,
|
||||
},
|
||||
First {
|
||||
field_key: &'a Identifier,
|
||||
},
|
||||
Last {
|
||||
field_key: &'a Identifier,
|
||||
},
|
||||
Max {
|
||||
field_key: &'a Identifier,
|
||||
},
|
||||
Min {
|
||||
field_key: &'a Identifier,
|
||||
},
|
||||
Percentile {
|
||||
field_key: &'a Identifier,
|
||||
n: f64,
|
||||
},
|
||||
Sample {
|
||||
field_key: &'a Identifier,
|
||||
n: i64,
|
||||
},
|
||||
Top {
|
||||
field_key: &'a Identifier,
|
||||
tag_keys: Vec<&'a Identifier>,
|
||||
n: i64,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'a> Selector<'a> {
|
||||
/// Find the selector function, with its location, in the specified field list.
|
||||
pub(super) fn find_enumerated(fields: &'a [Field]) -> Result<(usize, Self)> {
|
||||
fields
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, f)| match &f.expr {
|
||||
IQLExpr::Call(c) => Some((idx, c)),
|
||||
_ => None,
|
||||
})
|
||||
.map(|(idx, c)| {
|
||||
Ok((
|
||||
idx,
|
||||
match c.name.as_str() {
|
||||
"bottom" => Self::bottom(c),
|
||||
"first" => Self::first(c),
|
||||
"last" => Self::last(c),
|
||||
"max" => Self::max(c),
|
||||
"min" => Self::min(c),
|
||||
"percentile" => Self::percentile(c),
|
||||
"sample" => Self::sample(c),
|
||||
"top" => Self::top(c),
|
||||
name => error::internal(format!("unexpected selector function: {name}")),
|
||||
}?,
|
||||
))
|
||||
})
|
||||
.ok_or_else(|| error::map::internal("expected Call expression"))?
|
||||
}
|
||||
|
||||
fn bottom(call: &'a Call) -> Result<Self> {
|
||||
let [field_key, tag_keys @ .., narg] = call.args.as_slice() else {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for bottom: expected 2 or more, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
};
|
||||
let tag_keys: Result<Vec<_>> = tag_keys.iter().map(Self::identifier).collect();
|
||||
Ok(Self::Bottom {
|
||||
field_key: Self::identifier(field_key)?,
|
||||
tag_keys: tag_keys?,
|
||||
n: Self::literal_int(narg)?,
|
||||
})
|
||||
}
|
||||
|
||||
fn first(call: &'a Call) -> Result<Self> {
|
||||
if call.args.len() != 1 {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for first: expected 1, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
}
|
||||
Ok(Self::First {
|
||||
field_key: Self::identifier(call.args.get(0).unwrap())?,
|
||||
})
|
||||
}
|
||||
|
||||
fn last(call: &'a Call) -> Result<Self> {
|
||||
if call.args.len() != 1 {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for last: expected 1, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
}
|
||||
Ok(Self::Last {
|
||||
field_key: Self::identifier(call.args.get(0).unwrap())?,
|
||||
})
|
||||
}
|
||||
|
||||
fn max(call: &'a Call) -> Result<Self> {
|
||||
if call.args.len() != 1 {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for max: expected 1, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
}
|
||||
Ok(Self::Max {
|
||||
field_key: Self::identifier(call.args.get(0).unwrap())?,
|
||||
})
|
||||
}
|
||||
|
||||
fn min(call: &'a Call) -> Result<Self> {
|
||||
if call.args.len() != 1 {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for min: expected 1, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
}
|
||||
Ok(Self::Min {
|
||||
field_key: Self::identifier(call.args.get(0).unwrap())?,
|
||||
})
|
||||
}
|
||||
|
||||
fn percentile(call: &'a Call) -> Result<Self> {
|
||||
if call.args.len() != 2 {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for min: expected 1, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
}
|
||||
Ok(Self::Percentile {
|
||||
field_key: Self::identifier(call.args.get(0).unwrap())?,
|
||||
n: Self::literal_num(call.args.get(1).unwrap())?,
|
||||
})
|
||||
}
|
||||
|
||||
fn sample(call: &'a Call) -> Result<Self> {
|
||||
if call.args.len() != 2 {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for min: expected 1, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
}
|
||||
Ok(Self::Sample {
|
||||
field_key: Self::identifier(call.args.get(0).unwrap())?,
|
||||
n: Self::literal_int(call.args.get(1).unwrap())?,
|
||||
})
|
||||
}
|
||||
|
||||
fn top(call: &'a Call) -> Result<Self> {
|
||||
let [field_key, tag_keys @ .., narg] = call.args.as_slice() else {
|
||||
return error::internal(format!(
|
||||
"invalid number of arguments for top: expected 2 or more, got {}",
|
||||
call.args.len()
|
||||
));
|
||||
};
|
||||
let tag_keys: Result<Vec<_>> = tag_keys.iter().map(Self::identifier).collect();
|
||||
Ok(Self::Top {
|
||||
field_key: Self::identifier(field_key)?,
|
||||
tag_keys: tag_keys?,
|
||||
n: Self::literal_int(narg)?,
|
||||
})
|
||||
}
|
||||
|
||||
fn identifier(expr: &'a IQLExpr) -> Result<&'a Identifier> {
|
||||
match expr {
|
||||
IQLExpr::VarRef(v) => Ok(&v.name),
|
||||
e => error::internal(format!("invalid column identifier: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
fn literal_int(expr: &'a IQLExpr) -> Result<i64> {
|
||||
match expr {
|
||||
IQLExpr::Literal(Literal::Integer(n)) => Ok(*n),
|
||||
e => error::internal(format!("invalid integer literal: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
fn literal_num(expr: &'a IQLExpr) -> Result<f64> {
|
||||
match expr {
|
||||
IQLExpr::Literal(Literal::Integer(n)) => Ok(*n as f64),
|
||||
IQLExpr::Literal(Literal::Float(n)) => Ok(*n),
|
||||
e => error::internal(format!("invalid integer literal: {}", e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for Selector<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
|
||||
match self {
|
||||
Self::Bottom {
|
||||
field_key,
|
||||
tag_keys,
|
||||
n,
|
||||
} => {
|
||||
write!(f, "bottom({field_key}")?;
|
||||
for tag_key in tag_keys {
|
||||
write!(f, ", {tag_key}")?;
|
||||
}
|
||||
write!(f, ", {n})")
|
||||
}
|
||||
Self::First { field_key } => write!(f, "first({field_key})"),
|
||||
Self::Last { field_key } => write!(f, "last({field_key})"),
|
||||
Self::Max { field_key } => write!(f, "max({field_key})"),
|
||||
Self::Min { field_key } => write!(f, "min({field_key})"),
|
||||
Self::Percentile { field_key, n } => write!(f, "percentile({field_key}, {n})"),
|
||||
Self::Sample { field_key, n } => write!(f, "sample({field_key}, {n})"),
|
||||
Self::Top {
|
||||
field_key,
|
||||
tag_keys,
|
||||
n,
|
||||
} => {
|
||||
write!(f, "top({field_key}")?;
|
||||
for tag_key in tag_keys {
|
||||
write!(f, ", {tag_key}")?;
|
||||
}
|
||||
write!(f, ", {n})")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(super) enum SelectorWindowOrderBy<'a> {
|
||||
FieldAsc(&'a Identifier),
|
||||
FieldDesc(&'a Identifier),
|
||||
}
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use crate::error;
|
||||
use crate::plan::expr_type_evaluator::TypeEvaluator;
|
||||
use crate::plan::field::{field_by_name, field_name};
|
||||
use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap};
|
||||
use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery, TagSet};
|
||||
use crate::plan::var_ref::{influx_type_to_var_ref_data_type, var_ref_data_type_to_influx_type};
|
||||
use crate::plan::{error, util, SchemaProvider};
|
||||
use crate::plan::{util, SchemaProvider};
|
||||
use datafusion::common::{DataFusionError, Result};
|
||||
use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName, WhereClause};
|
||||
use influxdb_influxql_parser::expression::walk::{
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
//! APIs for testing.
|
||||
#![cfg(test)]
|
||||
|
||||
use crate::plan::{error, SchemaProvider};
|
||||
use crate::error;
|
||||
use crate::plan::SchemaProvider;
|
||||
use chrono::{DateTime, NaiveDate, Utc};
|
||||
use datafusion::common::Result as DataFusionResult;
|
||||
use datafusion::datasource::empty::EmptyTable;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::plan::error;
|
||||
use crate::{error, NUMERICS};
|
||||
use arrow::array::{Array, ArrayRef, Int64Array};
|
||||
use arrow::datatypes::{DataType, TimeUnit};
|
||||
use datafusion::common::{downcast_value, DataFusionError, Result, ScalarValue};
|
||||
|
@ -10,10 +10,6 @@ use once_cell::sync::Lazy;
|
|||
use std::mem::replace;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// A list of the numeric types supported by InfluxQL that can be be used
|
||||
/// as input to user-defined aggregate functions.
|
||||
pub(crate) static NUMERICS: &[DataType] = &[DataType::Int64, DataType::UInt64, DataType::Float64];
|
||||
|
||||
/// Name of the `MOVING_AVERAGE` user-defined aggregate function.
|
||||
pub(crate) const MOVING_AVERAGE_NAME: &str = "moving_average";
|
||||
|
||||
|
|
|
@ -5,9 +5,8 @@
|
|||
//! call information as the InfluxQL AST. These expressions are then
|
||||
//! rewritten at a later stage of planning, with more context available.
|
||||
|
||||
use crate::plan::error;
|
||||
use crate::plan::udaf::NUMERICS;
|
||||
use crate::plan::util_copy::find_exprs_in_exprs;
|
||||
use crate::{error, NUMERICS};
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::logical_expr::{
|
||||
Expr, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF, Signature, TypeSignature,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use crate::plan::{error, util_copy};
|
||||
use crate::error;
|
||||
use crate::plan::util_copy;
|
||||
use arrow::datatypes::{DataType, TimeUnit};
|
||||
use datafusion::common::tree_node::{TreeNode, VisitRecursion};
|
||||
use datafusion::common::{DFSchemaRef, Result};
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
use datafusion::common::tree_node::{TreeNode, VisitRecursion};
|
||||
use datafusion::common::Result;
|
||||
use datafusion::logical_expr::expr::{
|
||||
AggregateUDF, InList, InSubquery, Placeholder, ScalarFunction, ScalarUDF,
|
||||
AggregateUDF, Alias, InList, InSubquery, Placeholder, ScalarFunction, ScalarUDF,
|
||||
};
|
||||
use datafusion::logical_expr::{
|
||||
expr::{
|
||||
|
@ -104,10 +104,13 @@ where
|
|||
filter: filter.clone(),
|
||||
order_by: order_by.clone(),
|
||||
})),
|
||||
Expr::Alias(nested_expr, alias_name) => Ok(Expr::Alias(
|
||||
Box::new(clone_with_replacement(nested_expr, replacement_fn)?),
|
||||
alias_name.clone(),
|
||||
)),
|
||||
Expr::Alias(Alias {
|
||||
expr: nested_expr,
|
||||
name: alias_name,
|
||||
}) => Ok(Expr::Alias(Alias {
|
||||
expr: Box::new(clone_with_replacement(nested_expr, replacement_fn)?),
|
||||
name: alias_name.clone(),
|
||||
})),
|
||||
Expr::Between(Between {
|
||||
expr,
|
||||
negated,
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
//! User defined window functions implementing influxQL features.
|
||||
|
||||
use datafusion::logical_expr::{
|
||||
PartitionEvaluatorFactory, ReturnTypeFunction, WindowFunction, WindowUDF,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
mod percent_row_number;
|
||||
|
||||
/// Definition of the `PERCENT_ROW_NUMBER` user-defined window function.
|
||||
pub(crate) static PERCENT_ROW_NUMBER: Lazy<WindowFunction> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(percent_row_number::return_type);
|
||||
let partition_evaluator_factory: PartitionEvaluatorFactory =
|
||||
Arc::new(percent_row_number::partition_evaluator_factory);
|
||||
|
||||
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
|
||||
percent_row_number::NAME,
|
||||
&percent_row_number::SIGNATURE,
|
||||
&return_type,
|
||||
&partition_evaluator_factory,
|
||||
)))
|
||||
});
|
|
@ -0,0 +1,96 @@
|
|||
use crate::error;
|
||||
use arrow::array::{Array, ArrayRef, Float64Array, Int64Array, UInt64Array};
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::common::{downcast_value, DataFusionError, Result};
|
||||
use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// The name of the percent_row_number aggregate function.
|
||||
pub(super) const NAME: &str = "percent_row_number";
|
||||
|
||||
/// Valid signatures for the percent_row_number aggregate function.
|
||||
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
|
||||
Signature::one_of(
|
||||
vec![
|
||||
TypeSignature::Exact(vec![DataType::Int64]),
|
||||
TypeSignature::Exact(vec![DataType::Float64]),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
});
|
||||
|
||||
/// Calculate the return type given the function signature. Percent_row_number
|
||||
/// always returns a UInt64.
|
||||
pub(super) fn return_type(_: &[DataType]) -> Result<Arc<DataType>> {
|
||||
Ok(Arc::new(DataType::UInt64))
|
||||
}
|
||||
|
||||
/// Create a new partition_evaluator_factory.
|
||||
pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
|
||||
Ok(Box::new(PercentRowNumberPartitionEvaluator {}))
|
||||
}
|
||||
|
||||
/// PartitionEvaluator which returns the row number at which the nth
|
||||
/// percentile of the data will occur.
|
||||
///
|
||||
/// This evaluator calculates the row_number accross the entire partition,
|
||||
/// any data that should not be included must be filtered out before
|
||||
/// evaluating the window function.
|
||||
#[derive(Debug)]
|
||||
struct PercentRowNumberPartitionEvaluator {}
|
||||
|
||||
impl PartitionEvaluator for PercentRowNumberPartitionEvaluator {
|
||||
fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result<Arc<dyn Array>> {
|
||||
assert_eq!(values.len(), 1);
|
||||
|
||||
let array = Arc::clone(&values[0]);
|
||||
let mut builder = UInt64Array::builder(array.len());
|
||||
match array.data_type() {
|
||||
DataType::Int64 => builder.extend(downcast_value!(array, Int64Array).iter().map(|o| {
|
||||
o.and_then(|v| percentile_idx(num_rows, v as f64).map(|v| v as u64))
|
||||
.or(Some(0))
|
||||
})),
|
||||
DataType::Float64 => {
|
||||
builder.extend(downcast_value!(array, Float64Array).iter().map(|o| {
|
||||
o.and_then(|v| percentile_idx(num_rows, v).map(|v| v as u64))
|
||||
.or(Some(0))
|
||||
}))
|
||||
}
|
||||
dt => {
|
||||
return error::internal(format!(
|
||||
"invalid data type ({dt}) for PERCENTILE n argument"
|
||||
))
|
||||
}
|
||||
};
|
||||
Ok(Arc::new(builder.finish()))
|
||||
}
|
||||
|
||||
fn supports_bounded_execution(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn uses_window_frame(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn include_rank(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the location in an ordered list of len items where the
|
||||
/// location of the item at the given percentile would be found.
|
||||
///
|
||||
/// Note that row numbers are 1-based so this returns values in the
|
||||
/// range \[1,len\].
|
||||
///
|
||||
/// This uses the same algorithm as the original influxdb implementation
|
||||
/// of percentile as can be found in
|
||||
/// <https://github.com/influxdata/influxdb/blob/75a8bcfae2af7b0043933be9f96b98c0741ceee3/influxql/query/call_iterator.go#L1087>.
|
||||
fn percentile_idx(len: usize, percentile: f64) -> Option<usize> {
|
||||
match TryInto::<usize>::try_into(((len as f64) * percentile / 100.0 + 0.5).floor() as isize) {
|
||||
Ok(idx) if 0 < idx && idx < len => Some(idx),
|
||||
_ => None,
|
||||
}
|
||||
}
|
|
@ -328,7 +328,11 @@ impl TestTable {
|
|||
|
||||
let partition = repos
|
||||
.partitions()
|
||||
.cas_sort_key(partition.id, None, sort_key)
|
||||
.cas_sort_key(
|
||||
&TransitionPartitionId::Deprecated(partition.id),
|
||||
None,
|
||||
sort_key,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -452,7 +456,7 @@ impl TestPartition {
|
|||
.await
|
||||
.partitions()
|
||||
.cas_sort_key(
|
||||
self.partition.id,
|
||||
&TransitionPartitionId::Deprecated(self.partition.id),
|
||||
Some(old_sort_key),
|
||||
&sort_key.to_columns().collect::<Vec<_>>(),
|
||||
)
|
||||
|
@ -786,7 +790,7 @@ async fn update_catalog_sort_key_if_needed(
|
|||
);
|
||||
partitions_catalog
|
||||
.cas_sort_key(
|
||||
partition_id,
|
||||
&TransitionPartitionId::Deprecated(partition_id),
|
||||
Some(
|
||||
catalog_sort_key
|
||||
.to_columns()
|
||||
|
@ -803,7 +807,11 @@ async fn update_catalog_sort_key_if_needed(
|
|||
let new_columns = sort_key.to_columns().collect::<Vec<_>>();
|
||||
debug!("Updating sort key from None to {:?}", &new_columns);
|
||||
partitions_catalog
|
||||
.cas_sort_key(partition_id, None, &new_columns)
|
||||
.cas_sort_key(
|
||||
&TransitionPartitionId::Deprecated(partition_id),
|
||||
None,
|
||||
&new_columns,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ log = "0.4"
|
|||
parking_lot = "0.12"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
serde_urlencoded = "0.7.0"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
|
|
|
@ -488,7 +488,7 @@ impl TreeNodeVisitor for RowBasedVisitor {
|
|||
|
||||
fn pre_visit(&mut self, expr: &Expr) -> Result<VisitRecursion, DataFusionError> {
|
||||
match expr {
|
||||
Expr::Alias(_, _)
|
||||
Expr::Alias(_)
|
||||
| Expr::Between { .. }
|
||||
| Expr::BinaryExpr { .. }
|
||||
| Expr::Case { .. }
|
||||
|
|
|
@ -13,10 +13,10 @@ use cache_system::{
|
|||
};
|
||||
use data_types::{
|
||||
partition_template::{build_column_values, ColumnValue},
|
||||
ColumnId, Partition, PartitionId,
|
||||
ColumnId, Partition, PartitionId, TransitionPartitionId,
|
||||
};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_catalog::{interface::Catalog, partition_lookup};
|
||||
use iox_query::chunk_statistics::{ColumnRange, ColumnRanges};
|
||||
use iox_time::TimeProvider;
|
||||
use observability_deps::tracing::debug;
|
||||
|
@ -66,12 +66,9 @@ impl PartitionCache {
|
|||
async move {
|
||||
let partition = Backoff::new(&backoff_config)
|
||||
.retry_all_errors("get partition_key", || async {
|
||||
catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.get_by_id(partition_id)
|
||||
.await
|
||||
let mut repos = catalog.repositories().await;
|
||||
let id = TransitionPartitionId::Deprecated(partition_id);
|
||||
partition_lookup(repos.as_mut(), &id).await
|
||||
})
|
||||
.await
|
||||
.expect("retry forever")?;
|
||||
|
|
|
@ -3,6 +3,7 @@ use arrow::{datatypes::SchemaRef, error::Result as ArrowResult, record_batch::Re
|
|||
use async_trait::async_trait;
|
||||
use data_types::NamespaceId;
|
||||
use datafusion::error::DataFusionError;
|
||||
use datafusion::physical_plan::{DisplayAs, DisplayFormatType};
|
||||
use datafusion::{
|
||||
catalog::schema::SchemaProvider,
|
||||
datasource::TableProvider,
|
||||
|
@ -140,9 +141,7 @@ struct SystemTableExecutionPlan<T> {
|
|||
|
||||
impl<T> std::fmt::Debug for SystemTableExecutionPlan<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SystemTableExecutionPlan")
|
||||
.field("projection", &self.projection)
|
||||
.finish()
|
||||
self.fmt_as(DisplayFormatType::Default, f)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -192,6 +191,17 @@ impl<T: IoxSystemTable + 'static> ExecutionPlan for SystemTableExecutionPlan<T>
|
|||
}
|
||||
}
|
||||
|
||||
impl<T> DisplayAs for SystemTableExecutionPlan<T> {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => f
|
||||
.debug_struct("SystemTableExecutionPlan")
|
||||
.field("projection", &self.projection)
|
||||
.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct SystemTableStream {
|
||||
projected_schema: SchemaRef,
|
||||
projection: Option<Vec<usize>>,
|
||||
|
|
|
@ -11,7 +11,7 @@ chrono = { version = "0.4", default-features = false }
|
|||
datafusion = { workspace = true }
|
||||
once_cell = "1"
|
||||
regex = "1"
|
||||
regex-syntax = "0.7.3"
|
||||
regex-syntax = "0.7.4"
|
||||
schema = { path = "../schema" }
|
||||
snafu = "0.7"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
// Workaround for "unused crate" lint false positives.
|
||||
use workspace_hack as _;
|
||||
|
||||
use data_types::{PartitionId, TableId};
|
||||
use data_types::{PartitionId, TableId, TransitionPartitionId};
|
||||
use generated_types::influxdata::iox::catalog::v1::*;
|
||||
use iox_catalog::interface::{Catalog, SoftDeletedRows};
|
||||
use observability_deps::tracing::*;
|
||||
|
@ -47,11 +47,11 @@ impl catalog_service_server::CatalogService for CatalogService {
|
|||
) -> Result<Response<GetParquetFilesByPartitionIdResponse>, Status> {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
let req = request.into_inner();
|
||||
let partition_id = PartitionId::new(req.partition_id);
|
||||
let partition_id = TransitionPartitionId::Deprecated(PartitionId::new(req.partition_id));
|
||||
|
||||
let parquet_files = repos
|
||||
.parquet_files()
|
||||
.list_by_partition_not_to_delete(partition_id)
|
||||
.list_by_partition_not_to_delete(&partition_id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
warn!(error=%e, %req.partition_id, "failed to get parquet_files for partition");
|
||||
|
|
|
@ -26,7 +26,7 @@ bytes = "1.4"
|
|||
futures = "0.3"
|
||||
prost = "0.11"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
tonic = { workspace = true }
|
||||
|
|
|
@ -30,7 +30,7 @@ pin-project = "1.1"
|
|||
prost = "0.11"
|
||||
regex = "1.9.1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.100"
|
||||
serde_json = "1.0.102"
|
||||
snafu = "0.7"
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tonic = { workspace = true }
|
||||
|
|
|
@ -22,5 +22,6 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
|||
sysinfo = "0.29.4"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.6.0"
|
||||
# Need the multi-threaded executor for testing
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] }
|
||||
|
|
|
@ -27,8 +27,10 @@ pub struct DiskSpaceMetrics {
|
|||
impl DiskSpaceMetrics {
|
||||
/// Create a new [`DiskSpaceMetrics`], returning [`None`] if no disk can be
|
||||
/// found for the specified `directory`.
|
||||
pub fn new(mut directory: PathBuf, registry: &metric::Registry) -> Option<Self> {
|
||||
pub fn new(directory: PathBuf, registry: &metric::Registry) -> Option<Self> {
|
||||
let path: Cow<'static, str> = Cow::from(directory.display().to_string());
|
||||
let mut directory = directory.canonicalize().ok()?;
|
||||
|
||||
let attributes = Attributes::from([("path", path)]);
|
||||
|
||||
let available_disk_space = registry
|
||||
|
@ -49,6 +51,7 @@ impl DiskSpaceMetrics {
|
|||
let system = System::new_with_specifics(RefreshKind::new().with_disks_list());
|
||||
|
||||
// Resolve the mount point once.
|
||||
// The directory path may be `/path/to/dir` and the mount point is `/`.
|
||||
let disk_idx = loop {
|
||||
if let Some((idx, _disk)) = system
|
||||
.disks()
|
||||
|
@ -99,15 +102,27 @@ mod tests {
|
|||
use std::{sync::Arc, time::Instant};
|
||||
|
||||
use metric::Metric;
|
||||
use tempfile::tempdir_in;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_metrics() {
|
||||
let tmp_dir = tempdir_in(".").ok().unwrap();
|
||||
let path = tmp_dir.path().display().to_string();
|
||||
// TempDir creates a directory in current directory, so test the relative path (if possible).
|
||||
let path = match path.find("/./") {
|
||||
Some(index) => &path[index + 3..],
|
||||
None => &path[..],
|
||||
};
|
||||
|
||||
let pathbuf = PathBuf::from(path);
|
||||
let metric_label: Cow<'static, str> = path.to_string().into();
|
||||
|
||||
let registry = Arc::new(metric::Registry::new());
|
||||
|
||||
let _handle = tokio::spawn(
|
||||
DiskSpaceMetrics::new(PathBuf::from("/"), ®istry)
|
||||
DiskSpaceMetrics::new(pathbuf, ®istry)
|
||||
.expect("root always exists")
|
||||
.run(),
|
||||
);
|
||||
|
@ -124,14 +139,14 @@ mod tests {
|
|||
let recorded_free_metric = registry
|
||||
.get_instrument::<Metric<U64Gauge>>("disk_space_free")
|
||||
.expect("metric should exist")
|
||||
.get_observer(&Attributes::from(&[("path", "/")]))
|
||||
.get_observer(&Attributes::from([("path", metric_label.clone())]))
|
||||
.expect("metric should have labels")
|
||||
.fetch();
|
||||
|
||||
let recorded_total_metric = registry
|
||||
.get_instrument::<Metric<U64Gauge>>("disk_capacity_total")
|
||||
.expect("metric should exist")
|
||||
.get_observer(&Attributes::from(&[("path", "/")]))
|
||||
.get_observer(&Attributes::from([("path", metric_label.clone())]))
|
||||
.expect("metric should have labels")
|
||||
.fetch();
|
||||
|
||||
|
|
|
@ -17,11 +17,11 @@ license.workspace = true
|
|||
### BEGIN HAKARI SECTION
|
||||
[dependencies]
|
||||
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
|
||||
arrow = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched", features = ["dyn_cmp_dict", "prettyprint"] }
|
||||
arrow-array = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched", default-features = false, features = ["chrono-tz"] }
|
||||
arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched", features = ["flight-sql-experimental"] }
|
||||
arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched", default-features = false, features = ["dyn_cmp_dict"] }
|
||||
arrow-string = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched", default-features = false, features = ["dyn_cmp_dict"] }
|
||||
arrow = { version = "43", features = ["dyn_cmp_dict", "prettyprint"] }
|
||||
arrow-array = { version = "43", default-features = false, features = ["chrono-tz"] }
|
||||
arrow-flight = { version = "43", features = ["flight-sql-experimental"] }
|
||||
arrow-ord = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
|
||||
arrow-string = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
|
||||
base64-594e8ee84c453af0 = { package = "base64", version = "0.13" }
|
||||
base64-647d43efb71741da = { package = "base64", version = "0.21" }
|
||||
bitflags = { version = "1" }
|
||||
|
@ -30,9 +30,9 @@ bytes = { version = "1" }
|
|||
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
|
||||
crossbeam-utils = { version = "0.8" }
|
||||
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46752163bd4f30f778850160513e8ca7f15fcf14" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46752163bd4f30f778850160513e8ca7f15fcf14", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46752163bd4f30f778850160513e8ca7f15fcf14", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "04ecaf7405dbbfd43f43acec972f2435ada5ee81" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "04ecaf7405dbbfd43f43acec972f2435ada5ee81", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "04ecaf7405dbbfd43f43acec972f2435ada5ee81", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
digest = { version = "0.10", features = ["mac", "std"] }
|
||||
either = { version = "1" }
|
||||
fixedbitset = { version = "0.4" }
|
||||
|
@ -47,7 +47,8 @@ futures-task = { version = "0.3", default-features = false, features = ["std"] }
|
|||
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
|
||||
getrandom = { version = "0.2", default-features = false, features = ["std"] }
|
||||
hashbrown = { version = "0.14", features = ["raw"] }
|
||||
indexmap = { version = "1", default-features = false, features = ["std"] }
|
||||
indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
|
||||
indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2" }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
lock_api = { version = "0.4", features = ["arc_lock"] }
|
||||
|
@ -59,7 +60,7 @@ num-traits = { version = "0.2", features = ["i128", "libm"] }
|
|||
object_store = { version = "0.6", default-features = false, features = ["aws", "azure", "gcp"] }
|
||||
once_cell = { version = "1", features = ["parking_lot"] }
|
||||
parking_lot = { version = "0.12", features = ["arc_lock"] }
|
||||
parquet = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/42.0.0_patched", features = ["experimental", "object_store"] }
|
||||
parquet = { version = "43", features = ["experimental", "object_store"] }
|
||||
petgraph = { version = "0.6" }
|
||||
phf_shared = { version = "0.11" }
|
||||
predicates = { version = "3" }
|
||||
|
@ -122,7 +123,7 @@ futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
|
|||
getrandom = { version = "0.2", default-features = false, features = ["std"] }
|
||||
hashbrown = { version = "0.14", features = ["raw"] }
|
||||
heck = { version = "0.4", features = ["unicode"] }
|
||||
indexmap = { version = "1", default-features = false, features = ["std"] }
|
||||
indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
lock_api = { version = "0.4", features = ["arc_lock"] }
|
||||
|
|
Loading…
Reference in New Issue