Merge branch 'main' into dom/perf-sparse-reup
commit
27977299ad
|
@ -43,9 +43,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.1"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"
|
||||
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
@ -67,9 +67,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.14"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e"
|
||||
checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9"
|
||||
|
||||
[[package]]
|
||||
name = "android-tzdata"
|
||||
|
@ -155,9 +155,9 @@ checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
|
|||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.2"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
|
||||
checksum = "8868f09ff8cea88b079da74ae569d9b8c62a23c68c746240b704ee6f7525c89c"
|
||||
|
||||
[[package]]
|
||||
name = "arrow"
|
||||
|
@ -494,7 +494,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -505,7 +505,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -644,9 +644,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "blake3"
|
||||
version = "1.3.3"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef"
|
||||
checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
|
@ -688,9 +688,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.4.0"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09"
|
||||
checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"once_cell",
|
||||
|
@ -700,9 +700,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.12.2"
|
||||
version = "3.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"
|
||||
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
|
@ -902,7 +902,7 @@ dependencies = [
|
|||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1036,9 +1036,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.6"
|
||||
version = "0.15.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0525278dce688103060006713371cedbad27186c7d913f33d866b498da0f595"
|
||||
checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
|
||||
dependencies = [
|
||||
"encode_unicode",
|
||||
"lazy_static",
|
||||
|
@ -1107,9 +1107,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b"
|
||||
checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6"
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
|
@ -1128,9 +1128,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.7"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58"
|
||||
checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -1238,14 +1238,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.14"
|
||||
version = "0.9.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
|
||||
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset 0.8.0",
|
||||
"memoffset 0.9.0",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
|
@ -1286,9 +1286,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.2.1"
|
||||
version = "1.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad"
|
||||
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
|
||||
dependencies = [
|
||||
"csv-core",
|
||||
"itoa",
|
||||
|
@ -1325,7 +1325,7 @@ dependencies = [
|
|||
"hashbrown 0.12.3",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core 0.9.7",
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1355,7 +1355,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1377,7 +1377,7 @@ dependencies = [
|
|||
"flate2",
|
||||
"futures",
|
||||
"glob",
|
||||
"hashbrown 0.13.2",
|
||||
"hashbrown 0.14.0",
|
||||
"indexmap",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
|
@ -1393,7 +1393,6 @@ dependencies = [
|
|||
"sqlparser",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"url",
|
||||
"uuid",
|
||||
|
@ -1404,7 +1403,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
|
@ -1418,12 +1417,12 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-execution"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"dashmap",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"hashbrown 0.13.2",
|
||||
"hashbrown 0.14.0",
|
||||
"log",
|
||||
"object_store",
|
||||
"parking_lot 0.12.1",
|
||||
|
@ -1435,7 +1434,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-expr"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1449,7 +1448,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-optimizer"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
@ -1457,7 +1456,7 @@ dependencies = [
|
|||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-physical-expr",
|
||||
"hashbrown 0.13.2",
|
||||
"hashbrown 0.14.0",
|
||||
"itertools",
|
||||
"log",
|
||||
"regex-syntax 0.7.2",
|
||||
|
@ -1466,7 +1465,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-physical-expr"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"arrow",
|
||||
|
@ -1480,7 +1479,7 @@ dependencies = [
|
|||
"datafusion-expr",
|
||||
"datafusion-row",
|
||||
"half 2.2.1",
|
||||
"hashbrown 0.13.2",
|
||||
"hashbrown 0.14.0",
|
||||
"indexmap",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
|
@ -1498,7 +1497,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-proto"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
|
@ -1512,7 +1511,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-row"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
|
@ -1523,7 +1522,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-sql"
|
||||
version = "26.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
|
@ -1891,7 +1890,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1994,9 +1993,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.27.2"
|
||||
version = "0.27.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
|
||||
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
|
@ -2112,9 +2111,6 @@ name = "hashbrown"
|
|||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
|
@ -2128,11 +2124,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hashlink"
|
||||
version = "0.8.2"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0761a1b9491c4f2e3d66aa0f62d0fba0af9a0e2852e4d48ea506632a4b56e6aa"
|
||||
checksum = "312f66718a2d7789ffef4f4b7b213138ed9f1eb3aa1d0d82fc99f88fb3ffd26f"
|
||||
dependencies = [
|
||||
"hashbrown 0.13.2",
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2300,7 +2296,7 @@ dependencies = [
|
|||
"hyper",
|
||||
"rustls 0.21.2",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-rustls 0.24.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2317,9 +2313,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.56"
|
||||
version = "0.1.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c"
|
||||
checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
|
@ -3134,9 +3130,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.63"
|
||||
version = "0.3.64"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790"
|
||||
checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
@ -3242,9 +3238,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
|
|||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
|
||||
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
|
@ -3355,9 +3351,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.8.0"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
|
||||
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
@ -3413,14 +3409,13 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.6"
|
||||
version = "0.8.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9"
|
||||
checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"windows-sys 0.45.0",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3667,9 +3662,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.30.3"
|
||||
version = "0.30.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439"
|
||||
checksum = "03b4680b86d9cfafba8fc491dc9b6df26b68cf40e9e6cd73909194759a63c385"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
@ -3734,7 +3729,7 @@ version = "1.18.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
dependencies = [
|
||||
"parking_lot_core 0.9.7",
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3804,7 +3799,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core 0.9.7",
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3823,15 +3818,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.7"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
|
||||
checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall 0.2.16",
|
||||
"redox_syscall 0.3.5",
|
||||
"smallvec",
|
||||
"windows-sys 0.45.0",
|
||||
"windows-targets 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4013,7 +4008,7 @@ dependencies = [
|
|||
"pest_meta",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4092,7 +4087,7 @@ checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4225,9 +4220,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.58"
|
||||
version = "1.0.60"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8"
|
||||
checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
@ -4417,9 +4412,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.27"
|
||||
version = "1.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
|
||||
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
@ -4575,7 +4570,7 @@ dependencies = [
|
|||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-rustls 0.24.1",
|
||||
"tokio-util",
|
||||
"tower-service",
|
||||
"url",
|
||||
|
@ -4836,7 +4831,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5060,9 +5055,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.6"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"
|
||||
checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
|
@ -5389,9 +5384,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.4.1"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
|
||||
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-common"
|
||||
|
@ -5429,9 +5424,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.16"
|
||||
version = "2.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01"
|
||||
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -5540,7 +5535,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5668,7 +5663,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5684,9 +5679,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.24.0"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5"
|
||||
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
|
||||
dependencies = [
|
||||
"rustls 0.21.2",
|
||||
"tokio",
|
||||
|
@ -5741,9 +5736,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.19.9"
|
||||
version = "0.19.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92d964908cec0d030b812013af25a0e57fddfadb1e066ecc6681d86253129d4f"
|
||||
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"serde",
|
||||
|
@ -5775,13 +5770,13 @@ dependencies = [
|
|||
"prost",
|
||||
"rustls-pemfile",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-rustls 0.24.1",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"webpki-roots 0.23.0",
|
||||
"webpki-roots 0.23.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5942,7 +5937,7 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -6073,9 +6068,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
|
|||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.8"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
|
||||
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
|
@ -6219,11 +6214,10 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "want"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
|
||||
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
|
||||
dependencies = [
|
||||
"log",
|
||||
"try-lock",
|
||||
]
|
||||
|
||||
|
@ -6235,9 +6229,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
|
||||
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"wasm-bindgen-macro",
|
||||
|
@ -6245,24 +6239,24 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
|
||||
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-futures"
|
||||
version = "0.4.36"
|
||||
version = "0.4.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e"
|
||||
checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
|
@ -6272,9 +6266,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
|
||||
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
|
@ -6282,22 +6276,22 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
|
||||
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
|
||||
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-streams"
|
||||
|
@ -6314,9 +6308,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "web-sys"
|
||||
version = "0.3.63"
|
||||
version = "0.3.64"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2"
|
||||
checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
|
@ -6343,9 +6337,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.23.0"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125"
|
||||
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
|
||||
dependencies = [
|
||||
"rustls-webpki",
|
||||
]
|
||||
|
@ -6545,9 +6539,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
|||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.4.6"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
|
||||
checksum = "ca0ace3845f0d96209f0375e6d367e3eb87eb65d27d445bdc9f1843a26f39448"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
@ -6596,7 +6590,7 @@ dependencies = [
|
|||
"futures-task",
|
||||
"futures-util",
|
||||
"getrandom",
|
||||
"hashbrown 0.13.2",
|
||||
"hashbrown 0.14.0",
|
||||
"heck",
|
||||
"indexmap",
|
||||
"io-lifetimes",
|
||||
|
@ -6639,7 +6633,7 @@ dependencies = [
|
|||
"sqlx-macros",
|
||||
"strum",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.16",
|
||||
"syn 2.0.18",
|
||||
"thrift",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
|
@ -6656,7 +6650,6 @@ dependencies = [
|
|||
"uuid",
|
||||
"webpki",
|
||||
"winapi",
|
||||
"windows-sys 0.45.0",
|
||||
"windows-sys 0.48.0",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
|
|
|
@ -118,8 +118,8 @@ license = "MIT OR Apache-2.0"
|
|||
[workspace.dependencies]
|
||||
arrow = { version = "41.0.0" }
|
||||
arrow-flight = { version = "41.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514" }
|
||||
hashbrown = { version = "0.14.0" }
|
||||
object_store = { version = "0.6.0" }
|
||||
parquet = { version = "41.0.0" }
|
||||
|
|
|
@ -206,6 +206,19 @@ pub struct CompactorConfig {
|
|||
)]
|
||||
pub shadow_mode: bool,
|
||||
|
||||
/// Enable scratchpad.
|
||||
///
|
||||
/// This allows disabling the scratchpad in production.
|
||||
///
|
||||
/// Disabling this is useful for testing performance and memory consequences of the scratchpad.
|
||||
#[clap(
|
||||
long = "compaction-enable-scratchpad",
|
||||
env = "INFLUXDB_IOX_COMPACTION_ENABLE_SCRATCHPAD",
|
||||
default_value = "true",
|
||||
action
|
||||
)]
|
||||
pub enable_scratchpad: bool,
|
||||
|
||||
/// Ignores "partition marked w/ error and shall be skipped" entries in the catalog.
|
||||
///
|
||||
/// This is mostly useful for debugging.
|
||||
|
|
|
@ -402,7 +402,7 @@ fn make_parquet_files_sink(config: &Config) -> Arc<dyn ParquetFilesSink> {
|
|||
}
|
||||
|
||||
fn make_scratchpad_gen(config: &Config) -> Arc<dyn ScratchpadGen> {
|
||||
if config.simulate_without_object_store {
|
||||
if config.simulate_without_object_store || !config.enable_scratchpad {
|
||||
Arc::new(NoopScratchpadGen::new())
|
||||
} else {
|
||||
let scratchpad_store_output = if config.shadow_mode {
|
||||
|
|
|
@ -28,6 +28,7 @@ pub fn log_config(config: &Config) {
|
|||
partition_timeout,
|
||||
partitions_source,
|
||||
shadow_mode,
|
||||
enable_scratchpad,
|
||||
ignore_partition_skip_marker,
|
||||
shard_config,
|
||||
min_num_l1_files_to_compact,
|
||||
|
@ -73,6 +74,7 @@ pub fn log_config(config: &Config) {
|
|||
partition_timeout_secs=partition_timeout.as_secs_f32(),
|
||||
%partitions_source,
|
||||
shadow_mode,
|
||||
enable_scratchpad,
|
||||
ignore_partition_skip_marker,
|
||||
?shard_cfg_n_shards,
|
||||
?shard_cfg_shard_id,
|
||||
|
|
|
@ -91,6 +91,13 @@ pub struct Config {
|
|||
/// This is mostly useful for debugging.
|
||||
pub shadow_mode: bool,
|
||||
|
||||
/// Enable Scratchpad
|
||||
///
|
||||
/// Enabled by default, if this is set to false, the compactor will not use the scratchpad
|
||||
///
|
||||
/// This is useful for disabling the scratchpad in production to evaluate the performance & memory impacts.
|
||||
pub enable_scratchpad: bool,
|
||||
|
||||
/// Ignores "partition marked w/ error and shall be skipped" entries in the catalog.
|
||||
///
|
||||
/// This is mostly useful for debugging.
|
||||
|
|
|
@ -0,0 +1,351 @@
|
|||
//! layout tests related to the size L1/L2 files achieve when the L0 size is small.
|
||||
//!
|
||||
//! The intent of these tests is to ensure that when L0s are arriving in a normal/leading edge pattern,
|
||||
//! even if they're quite small (10KB) the L1 & L2 files should still be accumulated to a reasonable size.
|
||||
//!
|
||||
//! Accumulating large L1/L2 is generally easier when cleaning up a backlogged partition with many L0s,
|
||||
//! so these test try to mimic the more challenging scenario of a steady stream of small L0s.
|
||||
//! The steady stream of L0s can be partially simulated by setting the max files per plan to a small number,
|
||||
//! and putting just a few files in the test case.
|
||||
|
||||
use data_types::CompactionLevel;
|
||||
use iox_time::Time;
|
||||
|
||||
use crate::layouts::{layout_setup_builder, parquet_builder, run_layout_scenario, ONE_MB};
|
||||
|
||||
const MAX_DESIRED_FILE_SIZE: u64 = 100 * ONE_MB;
|
||||
|
||||
// Mimic small L0 files trickling when they overlap in time (by a minor amount, as is common)
|
||||
// In this case, all L1 and L0 files can fit in a single compaction run.
|
||||
#[tokio::test]
|
||||
async fn small_l1_plus_overlapping_l0s_single_run() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let setup = layout_setup_builder()
|
||||
.await
|
||||
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
|
||||
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let size = 10 * 1024;
|
||||
|
||||
// Create 1 L1 file that mimics the output from a previous L0 compaction
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(0)
|
||||
.with_max_time(10)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
|
||||
.with_file_size_bytes(size * 4_u64),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Create 3 L0 files, slighly overlapping in time.
|
||||
// note the first L0 slightly overlaps the L1, as would happen if this slightly overlapping pattern occured
|
||||
// in the files that (we're pretending) were compacted into that L1.
|
||||
for i in 1..=3 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(i * 10)
|
||||
.with_max_time((i + 1) * 10)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
|
||||
.with_file_size_bytes(size),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Required behavior:
|
||||
// 1. (achieved) all files compacted to a single L1 file
|
||||
// Desired behavior:
|
||||
// 1. (achieved) only one compaction is performed to compact them
|
||||
insta::assert_yaml_snapshot!(
|
||||
run_layout_scenario(&setup).await,
|
||||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0 "
|
||||
- "L0.2[10,20] 21ns 10kb |--------L0.2--------| "
|
||||
- "L0.3[20,30] 31ns 10kb |--------L0.3--------| "
|
||||
- "L0.4[30,40] 41ns 10kb |--------L0.4--------| "
|
||||
- "L1 "
|
||||
- "L1.1[0,10] 11ns 40kb |--------L1.1--------| "
|
||||
- "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 4 Input Files, 70kb total:"
|
||||
- "L0 "
|
||||
- "L0.4[30,40] 41ns 10kb |--------L0.4--------| "
|
||||
- "L0.3[20,30] 31ns 10kb |--------L0.3--------| "
|
||||
- "L0.2[10,20] 21ns 10kb |--------L0.2--------| "
|
||||
- "L1 "
|
||||
- "L1.1[0,10] 11ns 40kb |--------L1.1--------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 70kb total:"
|
||||
- "L1, all files 70kb "
|
||||
- "L1.?[0,40] 41ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L1.1, L0.2, L0.3, L0.4"
|
||||
- " Creating 1 files"
|
||||
- "**** Final Output Files (70kb written)"
|
||||
- "L1, all files 70kb "
|
||||
- "L1.5[0,40] 41ns |------------------------------------------L1.5------------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
// Mimic small L0 files trickling when they overlap in time (by a minor amount, as is common)
|
||||
// In this case, all L1 and L0 files do not fit in a single compaction run.
|
||||
#[tokio::test]
|
||||
async fn small_l1_plus_overlapping_l0s_two_runs() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let setup = layout_setup_builder()
|
||||
.await
|
||||
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
|
||||
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let size = 10 * 1024;
|
||||
|
||||
// Create 1 L1 file that mimics the output from a previous L0 compaction
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(0)
|
||||
.with_max_time(10)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
|
||||
.with_file_size_bytes(size * 4_u64),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Create 4 L0 files, slighly overlapping in time
|
||||
// note the first L0 slightly overlaps the L1, as would happen if this slightly overlapping pattern occured
|
||||
// in the files that (we're pretending) were compacted into that L1.
|
||||
for i in 1..=4 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(i * 10)
|
||||
.with_max_time((i + 1) * 10)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
|
||||
.with_file_size_bytes(size),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Required behavior:
|
||||
// 1. (achieved) all files compacted to a single L1 file
|
||||
// Desired behavior:
|
||||
// 1. (not achieved) It may be preferable that the first compaction include the last L1 and as many L0s as are allowed (3).
|
||||
// This does not happen. Instead, the first compaction is the four L0's that are later combined with the L1.
|
||||
// This is not necessarily bad, actually, its better for write amplification. But might hint at the possibility of
|
||||
// compaction sequences that never get around to coming back and picking up the L1.
|
||||
// So the current behavior is noteworthy and unclear if its 'good' or 'bad'.
|
||||
insta::assert_yaml_snapshot!(
|
||||
run_layout_scenario(&setup).await,
|
||||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0 "
|
||||
- "L0.2[10,20] 21ns 10kb |------L0.2------| "
|
||||
- "L0.3[20,30] 31ns 10kb |------L0.3------| "
|
||||
- "L0.4[30,40] 41ns 10kb |------L0.4------| "
|
||||
- "L0.5[40,50] 51ns 10kb |------L0.5------|"
|
||||
- "L1 "
|
||||
- "L1.1[0,10] 11ns 40kb |------L1.1------| "
|
||||
- "**** Simulation run 0, type=compact(ManySmallFiles). 4 Input Files, 40kb total:"
|
||||
- "L0, all files 10kb "
|
||||
- "L0.2[10,20] 21ns |--------L0.2--------| "
|
||||
- "L0.3[20,30] 31ns |--------L0.3--------| "
|
||||
- "L0.4[30,40] 41ns |--------L0.4--------| "
|
||||
- "L0.5[40,50] 51ns |--------L0.5--------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 40kb total:"
|
||||
- "L0, all files 40kb "
|
||||
- "L0.?[10,50] 51ns |------------------------------------------L0.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L0.2, L0.3, L0.4, L0.5"
|
||||
- " Creating 1 files"
|
||||
- "**** Simulation run 1, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 80kb total:"
|
||||
- "L0, all files 40kb "
|
||||
- "L0.6[10,50] 51ns |---------------------------------L0.6---------------------------------|"
|
||||
- "L1, all files 40kb "
|
||||
- "L1.1[0,10] 11ns |------L1.1------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 80kb total:"
|
||||
- "L1, all files 80kb "
|
||||
- "L1.?[0,50] 51ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 2 files: L1.1, L0.6"
|
||||
- " Creating 1 files"
|
||||
- "**** Final Output Files (120kb written)"
|
||||
- "L1, all files 80kb "
|
||||
- "L1.7[0,50] 51ns |------------------------------------------L1.7------------------------------------------|"
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
// Mimic small L0 files trickling when they do NOT overlap in time (i.e. they have gaps between them)
|
||||
// In this case, all L1 and L0 files can fit in a single compaction run.
|
||||
#[tokio::test]
|
||||
async fn small_l1_plus_nonoverlapping_l0s_single_run() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let setup = layout_setup_builder()
|
||||
.await
|
||||
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
|
||||
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let size = 10 * 1024;
|
||||
|
||||
// Create 1 L1 file that mimics the output from a previous L0 compaction
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(0)
|
||||
.with_max_time(9)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
|
||||
.with_file_size_bytes(size * 4_u64),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Create 3 L0 files, not overlapping in time, and not overlapping the L1.
|
||||
for i in 1..=3 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(i * 10 + 1)
|
||||
.with_max_time((i + 1) * 10 - 1)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
|
||||
.with_file_size_bytes(size),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Required behavior:
|
||||
// 1. (not achieved) all files compacted to a single L1 file
|
||||
// The assumption is: since it didn't combine the old L1 with the new one, it will never grow L1.1.
|
||||
// It will eventually compact L1.1 with other L1s to make an L2, but if the write pattern continues
|
||||
// with tiny L0s, the resulting L2 will be N * the L1 size (where N is the number of L1s compacted
|
||||
// into the L2).
|
||||
insta::assert_yaml_snapshot!(
|
||||
run_layout_scenario(&setup).await,
|
||||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0 "
|
||||
- "L0.2[11,19] 21ns 10kb |------L0.2------| "
|
||||
- "L0.3[21,29] 31ns 10kb |------L0.3------| "
|
||||
- "L0.4[31,39] 41ns 10kb |------L0.4------| "
|
||||
- "L1 "
|
||||
- "L1.1[0,9] 11ns 40kb |-------L1.1-------| "
|
||||
- "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 3 Input Files, 30kb total:"
|
||||
- "L0, all files 10kb "
|
||||
- "L0.4[31,39] 41ns |---------L0.4----------| "
|
||||
- "L0.3[21,29] 31ns |---------L0.3----------| "
|
||||
- "L0.2[11,19] 21ns |---------L0.2----------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 30kb total:"
|
||||
- "L1, all files 30kb "
|
||||
- "L1.?[11,39] 41ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 3 files: L0.2, L0.3, L0.4"
|
||||
- " Creating 1 files"
|
||||
- "**** Final Output Files (30kb written)"
|
||||
- "L1 "
|
||||
- "L1.1[0,9] 11ns 40kb |-------L1.1-------| "
|
||||
- "L1.5[11,39] 41ns 30kb |-----------------------------L1.5-----------------------------| "
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
// Mimic small L0 files trickling when they do NOT overlap in time (i.e. they have gaps between them)
|
||||
// In this case, all L1 and L0 files do not fit in a single compaction run.
|
||||
#[tokio::test]
|
||||
async fn small_l1_plus_nonoverlapping_l0s_two_runs() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
let setup = layout_setup_builder()
|
||||
.await
|
||||
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
|
||||
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
|
||||
.build()
|
||||
.await;
|
||||
|
||||
let size = 10 * 1024;
|
||||
|
||||
// Create 1 L1 file that mimics the output from a previous L0 compaction
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(0)
|
||||
.with_max_time(10)
|
||||
.with_compaction_level(CompactionLevel::FileNonOverlapped)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
|
||||
.with_file_size_bytes(size * 4_u64),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Create 4 L0 files, not overlapping in time, and not overlapping the L1.
|
||||
for i in 1..=4 {
|
||||
setup
|
||||
.partition
|
||||
.create_parquet_file(
|
||||
parquet_builder()
|
||||
.with_min_time(i * 10 + 1)
|
||||
.with_max_time((i + 1) * 10 - 1)
|
||||
.with_compaction_level(CompactionLevel::Initial)
|
||||
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
|
||||
.with_file_size_bytes(size),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Required behavior:
|
||||
// 1. (not achieved) all files compacted to a single L1 file
|
||||
// The assumption is: since it didn't combine the old L1 with the new one, it will never grow L1.1.
|
||||
// It will eventually compact L1.1 with other L1s to make an L2, but if the write pattern continues
|
||||
// with tiny L0s, the resulting L2 will be N * the L1 size (where N is the number of L1s compacted
|
||||
// into the L2).
|
||||
insta::assert_yaml_snapshot!(
|
||||
run_layout_scenario(&setup).await,
|
||||
@r###"
|
||||
---
|
||||
- "**** Input Files "
|
||||
- "L0 "
|
||||
- "L0.2[11,19] 21ns 10kb |----L0.2----| "
|
||||
- "L0.3[21,29] 31ns 10kb |----L0.3----| "
|
||||
- "L0.4[31,39] 41ns 10kb |----L0.4----| "
|
||||
- "L0.5[41,49] 51ns 10kb |----L0.5----| "
|
||||
- "L1 "
|
||||
- "L1.1[0,10] 11ns 40kb |------L1.1------| "
|
||||
- "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 4 Input Files, 40kb total:"
|
||||
- "L0, all files 10kb "
|
||||
- "L0.5[41,49] 51ns |------L0.5------| "
|
||||
- "L0.4[31,39] 41ns |------L0.4------| "
|
||||
- "L0.3[21,29] 31ns |------L0.3------| "
|
||||
- "L0.2[11,19] 21ns |------L0.2------| "
|
||||
- "**** 1 Output Files (parquet_file_id not yet assigned), 40kb total:"
|
||||
- "L1, all files 40kb "
|
||||
- "L1.?[11,49] 51ns |------------------------------------------L1.?------------------------------------------|"
|
||||
- "Committing partition 1:"
|
||||
- " Soft Deleting 4 files: L0.2, L0.3, L0.4, L0.5"
|
||||
- " Creating 1 files"
|
||||
- "**** Final Output Files (40kb written)"
|
||||
- "L1, all files 40kb "
|
||||
- "L1.1[0,10] 11ns |------L1.1------| "
|
||||
- "L1.6[11,49] 51ns |-------------------------------L1.6--------------------------------| "
|
||||
"###
|
||||
);
|
||||
}
|
|
@ -48,6 +48,7 @@
|
|||
//! ```text
|
||||
//! - L0.?[300,350] 5kb |-L0.3-|
|
||||
//! ```
|
||||
mod accumulated_size;
|
||||
mod backfill;
|
||||
mod common_use_cases;
|
||||
mod core;
|
||||
|
|
|
@ -142,6 +142,7 @@ impl TestSetupBuilder<false> {
|
|||
threshold: PARTITION_THRESHOLD,
|
||||
},
|
||||
shadow_mode: false,
|
||||
enable_scratchpad: true,
|
||||
ignore_partition_skip_marker: false,
|
||||
shard_config: None,
|
||||
min_num_l1_files_to_compact: MIN_NUM_L1_FILES_TO_COMPACT,
|
||||
|
|
|
@ -500,6 +500,11 @@ impl PartitionKey {
|
|||
pub fn ptr_eq(&self, other: &Self) -> bool {
|
||||
Arc::ptr_eq(&self.0, &other.0)
|
||||
}
|
||||
|
||||
/// Returns underlying string.
|
||||
pub fn inner(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PartitionKey {
|
||||
|
|
|
@ -327,6 +327,13 @@ pub enum OrderByClause {
|
|||
Descending,
|
||||
}
|
||||
|
||||
impl OrderByClause {
|
||||
/// Return `true` if the order by clause is ascending.
|
||||
pub fn is_ascending(self) -> bool {
|
||||
matches!(self, Self::Ascending)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for OrderByClause {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
|
|
|
@ -260,10 +260,7 @@ pub fn parse_conditional_expression(input: &str) -> Result<ConditionalExpression
|
|||
let mut i: &str = input;
|
||||
|
||||
// Consume whitespace from the input
|
||||
i = match ws0(i) {
|
||||
Ok((i1, _)) => i1,
|
||||
_ => unreachable!("ws0 is infallible"),
|
||||
};
|
||||
(i, _) = ws0(i).expect("ws0 is infallible");
|
||||
|
||||
if i.is_empty() {
|
||||
return Err(ParseError {
|
||||
|
@ -293,10 +290,7 @@ pub fn parse_conditional_expression(input: &str) -> Result<ConditionalExpression
|
|||
};
|
||||
|
||||
// Consume remaining whitespace from the input
|
||||
i = match ws0(i) {
|
||||
Ok((i1, _)) => i1,
|
||||
_ => unreachable!("ws0 is infallible"),
|
||||
};
|
||||
(i, _) = ws0(i).expect("ws0 is infallible");
|
||||
|
||||
if !i.is_empty() {
|
||||
return Err(ParseError {
|
||||
|
|
|
@ -69,10 +69,7 @@ pub fn parse_statements(input: &str) -> ParseResult {
|
|||
|
||||
loop {
|
||||
// Consume whitespace from the input
|
||||
i = match ws0(i) {
|
||||
Ok((i1, _)) => i1,
|
||||
_ => unreachable!("ws0 is infallible"),
|
||||
};
|
||||
(i, _) = ws0(i).expect("ws0 is infallible");
|
||||
|
||||
if eof::<_, nom::error::Error<_>>(i).is_ok() {
|
||||
return Ok(res);
|
||||
|
|
|
@ -390,12 +390,9 @@ impl TimeRange {
|
|||
}
|
||||
|
||||
/// Simplifies an InfluxQL duration `expr` to a nanosecond interval represented as an `i64`.
|
||||
pub fn duration_expr_to_nanoseconds(expr: &Expr) -> Result<i64, ExprError> {
|
||||
let ctx = ReduceContext::default();
|
||||
match reduce_expr(&ctx, expr)? {
|
||||
Expr::Literal(Literal::Duration(v)) => Ok(*v),
|
||||
Expr::Literal(Literal::Float(v)) => Ok(v as i64),
|
||||
Expr::Literal(Literal::Integer(v)) => Ok(v),
|
||||
pub fn duration_expr_to_nanoseconds(ctx: &ReduceContext, expr: &Expr) -> Result<i64, ExprError> {
|
||||
match reduce_time_expr(ctx, expr)? {
|
||||
Expr::Literal(Literal::Timestamp(v)) => Ok(v.timestamp_nanos()),
|
||||
_ => error::expr("invalid duration expression"),
|
||||
}
|
||||
}
|
||||
|
@ -444,7 +441,7 @@ pub struct ReduceContext {
|
|||
pub tz: Option<chrono_tz::Tz>,
|
||||
}
|
||||
|
||||
/// Simplify the time range expression.
|
||||
/// Simplify the time range expression and return a literal [timestamp](Timestamp).
|
||||
fn reduce_time_expr(ctx: &ReduceContext, expr: &Expr) -> ExprResult {
|
||||
match reduce_expr(ctx, expr)? {
|
||||
expr @ Expr::Literal(Literal::Timestamp(_)) => Ok(expr),
|
||||
|
@ -732,21 +729,26 @@ mod test {
|
|||
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Offset, Utc};
|
||||
use test_helpers::assert_error;
|
||||
|
||||
/// Return a `ReduceContext` with a value of
|
||||
/// now set to `2023-01-01T00:00:00Z` / `1672531200000000000`
|
||||
/// and not timezone.
|
||||
fn reduce_context() -> ReduceContext {
|
||||
ReduceContext {
|
||||
now: Some(Timestamp::from_utc(
|
||||
NaiveDateTime::new(
|
||||
NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
|
||||
NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
|
||||
),
|
||||
Utc.fix(),
|
||||
)),
|
||||
tz: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_cond() {
|
||||
fn split_exprs(s: &str) -> Result<(Option<ConditionalExpression>, TimeRange), ExprError> {
|
||||
// 2023-01-01T00:00:00Z == 1672531200000000000
|
||||
let ctx = ReduceContext {
|
||||
now: Some(Timestamp::from_utc(
|
||||
NaiveDateTime::new(
|
||||
NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
|
||||
NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
|
||||
),
|
||||
Utc.fix(),
|
||||
)),
|
||||
tz: None,
|
||||
};
|
||||
|
||||
let ctx = reduce_context();
|
||||
let cond: ConditionalExpression = s.parse().unwrap();
|
||||
split_cond(&ctx, &cond)
|
||||
}
|
||||
|
@ -1014,13 +1016,14 @@ mod test {
|
|||
#[test]
|
||||
fn test_expr_to_duration() {
|
||||
fn parse(s: &str) -> Result<i64, ExprError> {
|
||||
let ctx = reduce_context();
|
||||
let expr = s
|
||||
.parse::<ConditionalExpression>()
|
||||
.unwrap()
|
||||
.expr()
|
||||
.unwrap()
|
||||
.clone();
|
||||
duration_expr_to_nanoseconds(&expr)
|
||||
duration_expr_to_nanoseconds(&ctx, &expr)
|
||||
}
|
||||
|
||||
let cases = vec![
|
||||
|
@ -1029,6 +1032,8 @@ mod test {
|
|||
("5d10ms", 432_000_010_000_000),
|
||||
("-2d10ms", -172800010000000),
|
||||
("-2d10ns", -172800000000010),
|
||||
("now()", 1672531200000000000),
|
||||
("'2023-01-01T00:00:00Z'", 1672531200000000000),
|
||||
];
|
||||
|
||||
for (interval_str, exp) in cases {
|
||||
|
|
|
@ -495,6 +495,7 @@ impl Config {
|
|||
partition_timeout_secs: 0,
|
||||
partition_filter: None,
|
||||
shadow_mode: false,
|
||||
enable_scratchpad: true,
|
||||
ignore_partition_skip_marker: false,
|
||||
shard_count: None,
|
||||
shard_id: None,
|
||||
|
|
|
@ -20,6 +20,6 @@
|
|||
| | DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC] |
|
||||
| | SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] |
|
||||
| | SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] |
|
||||
| | ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time] |
|
||||
| | ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, ...], [1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, ...], [1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet, 1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, ...], [1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, ...]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time] |
|
||||
| | |
|
||||
----------
|
|
@ -17,7 +17,7 @@ extensions_options! {
|
|||
/// [^iox_part]: "IOx partition" refers to a partition within the IOx catalog, i.e. a partition within the
|
||||
/// primary key space. This is NOT the same as a DataFusion partition which refers to a stream
|
||||
/// within the physical plan data flow.
|
||||
pub max_dedup_partition_split: usize, default = 100
|
||||
pub max_dedup_partition_split: usize, default = 10_000
|
||||
|
||||
/// When splitting de-duplicate operations based on time-based overlaps, this is the maximum number of groups
|
||||
/// that should be considered. If there are more groups, the split will NOT be performed.
|
||||
|
|
|
@ -33,7 +33,7 @@ use crate::{
|
|||
use arrow::record_batch::RecordBatch;
|
||||
use async_trait::async_trait;
|
||||
use datafusion::{
|
||||
catalog::catalog::CatalogProvider,
|
||||
catalog::CatalogProvider,
|
||||
execution::{
|
||||
context::{QueryPlanner, SessionState, TaskContext},
|
||||
memory_pool::MemoryPool,
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
use datafusion::{
|
||||
common::{tree_node::TreeNodeRewriter, DFSchema},
|
||||
error::DataFusionError,
|
||||
logical_expr::{expr::ScalarUDF, utils::from_plan, LogicalPlan, Operator},
|
||||
optimizer::{utils::rewrite_preserving_name, OptimizerConfig, OptimizerRule},
|
||||
logical_expr::{
|
||||
expr::ScalarUDF, expr_rewriter::rewrite_preserving_name, utils::from_plan, LogicalPlan,
|
||||
Operator,
|
||||
},
|
||||
optimizer::{OptimizerConfig, OptimizerRule},
|
||||
prelude::{binary_expr, lit, Expr},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
|
|
|
@ -2,10 +2,11 @@ use std::sync::Arc;
|
|||
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use datafusion::{
|
||||
datasource::physical_plan::ParquetExec,
|
||||
error::DataFusionError,
|
||||
physical_plan::{
|
||||
empty::EmptyExec, file_format::ParquetExec, union::UnionExec, visit_execution_plan,
|
||||
ExecutionPlan, ExecutionPlanVisitor,
|
||||
empty::EmptyExec, union::UnionExec, visit_execution_plan, ExecutionPlan,
|
||||
ExecutionPlanVisitor,
|
||||
},
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::{collections::HashSet, sync::Arc};
|
|||
use datafusion::{
|
||||
common::tree_node::{RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter},
|
||||
config::ConfigOptions,
|
||||
datasource::physical_plan::ParquetExec,
|
||||
error::{DataFusionError, Result},
|
||||
logical_expr::Operator,
|
||||
physical_expr::{split_conjunction, utils::collect_columns},
|
||||
|
@ -10,7 +11,6 @@ use datafusion::{
|
|||
physical_plan::{
|
||||
empty::EmptyExec,
|
||||
expressions::{BinaryExpr, Column},
|
||||
file_format::ParquetExec,
|
||||
filter::FilterExec,
|
||||
union::UnionExec,
|
||||
ExecutionPlan, PhysicalExpr,
|
||||
|
@ -165,11 +165,11 @@ mod tests {
|
|||
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
|
||||
use datafusion::{
|
||||
datasource::object_store::ObjectStoreUrl,
|
||||
datasource::physical_plan::FileScanConfig,
|
||||
logical_expr::Operator,
|
||||
physical_expr::PhysicalSortExpr,
|
||||
physical_plan::{
|
||||
expressions::{BinaryExpr, Column, Literal},
|
||||
file_format::FileScanConfig,
|
||||
PhysicalExpr, Statistics,
|
||||
},
|
||||
scalar::ScalarValue,
|
||||
|
|
|
@ -7,6 +7,7 @@ use arrow::datatypes::SchemaRef;
|
|||
use datafusion::{
|
||||
common::tree_node::{Transformed, TreeNode},
|
||||
config::ConfigOptions,
|
||||
datasource::physical_plan::{FileScanConfig, ParquetExec},
|
||||
error::{DataFusionError, Result},
|
||||
physical_expr::{
|
||||
utils::{collect_columns, reassign_predicate_columns},
|
||||
|
@ -16,7 +17,6 @@ use datafusion::{
|
|||
physical_plan::{
|
||||
empty::EmptyExec,
|
||||
expressions::Column,
|
||||
file_format::{FileScanConfig, ParquetExec},
|
||||
filter::FilterExec,
|
||||
projection::ProjectionExec,
|
||||
sorts::{sort::SortExec, sort_preserving_merge::SortPreservingMergeExec},
|
||||
|
|
|
@ -3,14 +3,11 @@ use std::sync::Arc;
|
|||
use datafusion::{
|
||||
common::tree_node::{RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter},
|
||||
config::ConfigOptions,
|
||||
datasource::physical_plan::{FileScanConfig, ParquetExec},
|
||||
error::Result,
|
||||
physical_expr::{PhysicalSortExpr, PhysicalSortRequirement},
|
||||
physical_optimizer::PhysicalOptimizerRule,
|
||||
physical_plan::{
|
||||
file_format::{FileScanConfig, ParquetExec},
|
||||
sorts::sort::SortExec,
|
||||
ExecutionPlan,
|
||||
},
|
||||
physical_plan::{sorts::sort::SortExec, ExecutionPlan},
|
||||
};
|
||||
use observability_deps::tracing::warn;
|
||||
|
||||
|
|
|
@ -6,14 +6,15 @@ use crate::{
|
|||
};
|
||||
use arrow::datatypes::{DataType, Fields, Schema as ArrowSchema, SchemaRef};
|
||||
use datafusion::{
|
||||
datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl},
|
||||
datasource::{
|
||||
listing::PartitionedFile,
|
||||
object_store::ObjectStoreUrl,
|
||||
physical_plan::{FileScanConfig, ParquetExec},
|
||||
},
|
||||
physical_expr::PhysicalSortExpr,
|
||||
physical_plan::{
|
||||
empty::EmptyExec,
|
||||
expressions::Column,
|
||||
file_format::{FileScanConfig, ParquetExec},
|
||||
union::UnionExec,
|
||||
ColumnStatistics, ExecutionPlan, Statistics,
|
||||
empty::EmptyExec, expressions::Column, union::UnionExec, ColumnStatistics, ExecutionPlan,
|
||||
Statistics,
|
||||
},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
|
|
|
@ -26,8 +26,8 @@ use datafusion::error::DataFusionError;
|
|||
use datafusion::execution::context::SessionState;
|
||||
use datafusion::logical_expr::Expr;
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use datafusion::{catalog::catalog::CatalogProvider, physical_plan::displayable};
|
||||
use datafusion::{catalog::schema::SchemaProvider, logical_expr::LogicalPlan};
|
||||
use datafusion::{catalog::CatalogProvider, physical_plan::displayable};
|
||||
use datafusion::{
|
||||
datasource::{object_store::ObjectStoreUrl, TableProvider, TableType},
|
||||
physical_plan::{ColumnStatistics, Statistics as DataFusionStatistics},
|
||||
|
|
|
@ -33,6 +33,18 @@ pub(super) struct Select {
|
|||
/// The projection type of the selection.
|
||||
pub(super) projection_type: ProjectionType,
|
||||
|
||||
/// The interval derived from the arguments to the `TIME` function
|
||||
/// when a `GROUP BY` clause is declared with `TIME`.
|
||||
pub(super) interval: Option<Interval>,
|
||||
|
||||
/// The number of additional intervals that must be read
|
||||
/// for queries that group by time and use window functions such as
|
||||
/// `DIFFERENCE` or `DERIVATIVE`. This ensures data for the first
|
||||
/// window is available.
|
||||
///
|
||||
/// See: <https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L50>
|
||||
pub(super) extra_intervals: usize,
|
||||
|
||||
/// Projection clause of the selection.
|
||||
pub(super) fields: Vec<Field>,
|
||||
|
||||
|
@ -194,3 +206,15 @@ impl Display for Field {
|
|||
write!(f, " AS {}", self.name)
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the interval duration and offset
|
||||
/// derived from the `TIME` function when specified
|
||||
/// in a `GROUP BY` clause.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(super) struct Interval {
|
||||
/// The nanosecond duration of the interval
|
||||
pub duration: i64,
|
||||
|
||||
/// The nanosecond offset of the interval.
|
||||
pub offset: Option<i64>,
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
mod select;
|
||||
|
||||
use crate::plan::ir::{DataSource, Field, Select, SelectQuery};
|
||||
use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery};
|
||||
use crate::plan::planner::select::{
|
||||
fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort, ProjectionInfo,
|
||||
};
|
||||
use crate::plan::planner_time_range_expression::{expr_to_df_interval_dt, time_range_to_df_expr};
|
||||
use crate::plan::planner_time_range_expression::time_range_to_df_expr;
|
||||
use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
|
||||
use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
|
||||
use crate::plan::var_ref::var_ref_data_type_to_data_type;
|
||||
|
@ -50,9 +50,7 @@ use influxdb_influxql_parser::show_measurements::{
|
|||
use influxdb_influxql_parser::show_tag_keys::ShowTagKeysStatement;
|
||||
use influxdb_influxql_parser::show_tag_values::{ShowTagValuesStatement, WithKeyClause};
|
||||
use influxdb_influxql_parser::simple_from_clause::ShowFromClause;
|
||||
use influxdb_influxql_parser::time_range::{
|
||||
duration_expr_to_nanoseconds, split_cond, ReduceContext, TimeRange,
|
||||
};
|
||||
use influxdb_influxql_parser::time_range::{split_cond, ReduceContext, TimeRange};
|
||||
use influxdb_influxql_parser::timestamp::Timestamp;
|
||||
use influxdb_influxql_parser::{
|
||||
common::{MeasurementName, WhereClause},
|
||||
|
@ -130,22 +128,43 @@ enum ExprScope {
|
|||
Projection,
|
||||
}
|
||||
|
||||
/// State used to inform the planner.
|
||||
/// State used to inform the planner, which is derived for the
|
||||
/// root `SELECT` and subqueries.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct Context<'a> {
|
||||
/// The name of the table used as the data source for the current query.
|
||||
table_name: &'a str,
|
||||
projection_type: ProjectionType,
|
||||
tz: Option<Tz>,
|
||||
|
||||
// WHERE
|
||||
order_by: OrderByClause,
|
||||
|
||||
/// The column alias for the `time` column.
|
||||
///
|
||||
/// # NOTE
|
||||
///
|
||||
/// The time column can only be aliased for the root query.
|
||||
time_alias: &'a str,
|
||||
|
||||
/// The filter predicate for the query, without `time`.
|
||||
condition: Option<&'a ConditionalExpression>,
|
||||
|
||||
/// The time range of the query
|
||||
time_range: TimeRange,
|
||||
|
||||
// GROUP BY information
|
||||
group_by: Option<&'a GroupByClause>,
|
||||
fill: Option<FillClause>,
|
||||
|
||||
/// Interval of the `TIME` function found in the `GROUP BY` clause.
|
||||
interval: Option<Interval>,
|
||||
|
||||
/// How many additional window intervals must be retrieved, when grouping
|
||||
/// by time, to ensure window functions like `difference` have sufficient
|
||||
/// data to for the first window of the `time_range`.
|
||||
extra_intervals: usize,
|
||||
|
||||
/// The set of tags specified in the top-level `SELECT` statement
|
||||
/// which represent the tag set used for grouping output.
|
||||
root_group_by_tags: &'a [&'a str],
|
||||
|
@ -161,10 +180,14 @@ impl<'a> Context<'a> {
|
|||
table_name,
|
||||
projection_type: select.projection_type,
|
||||
tz: select.timezone,
|
||||
order_by: select.order_by.unwrap_or_default(),
|
||||
time_alias: &select.fields[0].name,
|
||||
condition: select.condition.as_ref(),
|
||||
time_range: select.time_range,
|
||||
group_by: select.group_by.as_ref(),
|
||||
fill: select.fill,
|
||||
interval: select.interval,
|
||||
extra_intervals: select.extra_intervals,
|
||||
root_group_by_tags,
|
||||
}
|
||||
}
|
||||
|
@ -176,6 +199,9 @@ impl<'a> Context<'a> {
|
|||
table_name: self.table_name,
|
||||
projection_type: select.projection_type,
|
||||
tz: select.timezone,
|
||||
order_by: self.order_by,
|
||||
// time is never aliased in subqueries
|
||||
time_alias: "time",
|
||||
condition: select.condition.as_ref(),
|
||||
// Subqueries should be restricted by the time range of the parent
|
||||
//
|
||||
|
@ -183,10 +209,165 @@ impl<'a> Context<'a> {
|
|||
time_range: select.time_range.intersected(self.time_range),
|
||||
group_by: select.group_by.as_ref(),
|
||||
fill: select.fill,
|
||||
interval: select.interval,
|
||||
extra_intervals: select.extra_intervals,
|
||||
root_group_by_tags: self.root_group_by_tags,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a [`Expr::Sort`] expression for the `time` column.
|
||||
#[allow(dead_code)]
|
||||
fn time_sort_expr(&self) -> Expr {
|
||||
self.time_alias.as_expr().sort(
|
||||
match self.order_by {
|
||||
OrderByClause::Ascending => true,
|
||||
OrderByClause::Descending => false,
|
||||
},
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns true if the current context has an extended
|
||||
/// time range to provide leading data for window functions
|
||||
/// to produce the result for the first window.
|
||||
#[allow(dead_code)]
|
||||
fn has_extended_time_range(&self) -> bool {
|
||||
self.extra_intervals > 0 && self.interval.is_some()
|
||||
}
|
||||
|
||||
/// Return the time range of the context, including any
|
||||
/// additional intervals required for window functions like
|
||||
/// `difference` or `moving_average`, when the query contains a
|
||||
/// `GROUP BY TIME` clause.
|
||||
///
|
||||
/// # NOTE
|
||||
///
|
||||
/// This function accounts for a bug in InfluxQL OG that only reads
|
||||
/// a single interval, rather than the number required based on the
|
||||
/// window function.
|
||||
///
|
||||
/// # EXPECTED
|
||||
///
|
||||
/// For InfluxQL OG, the likely intended behaviour of the extra intervals
|
||||
/// was to ensure a minimum number of windows were calculated to ensure
|
||||
/// there was sufficient data for the lower time bound specified
|
||||
/// in the `WHERE` clause, or upper time bound when ordering by `time`
|
||||
/// in descending order.
|
||||
///
|
||||
/// For example, the following InfluxQL query calculates the `moving_average`
|
||||
/// of the `mean` of the `writes` field over 3 intervals. The interval
|
||||
/// is 10 seconds, as specified by the `GROUP BY time(10s)` clause.
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT moving_average(mean(writes), 3)
|
||||
/// FROM diskio
|
||||
/// WHERE time >= '2020-06-11T16:53:00Z' AND time < '2020-06-11T16:55:00Z'
|
||||
/// GROUP BY time(10s)
|
||||
/// ```
|
||||
///
|
||||
/// The intended output was supposed to include the first window of the time
|
||||
/// bounds, or `'2020-06-11T16:53:00Z'`:
|
||||
///
|
||||
/// ```text
|
||||
/// name: diskio
|
||||
/// time moving_average
|
||||
/// ---- --------------
|
||||
/// 2020-06-11T16:53:00Z 5592529.333333333
|
||||
/// 2020-06-11T16:53:10Z 5592677.333333333
|
||||
/// ...
|
||||
/// 2020-06-11T16:54:10Z 5593513.333333333
|
||||
/// 2020-06-11T16:54:20Z 5593612.333333333
|
||||
/// ```
|
||||
/// however, the actual output starts at `2020-06-11T16:53:10Z`.
|
||||
///
|
||||
/// # BUG
|
||||
///
|
||||
/// During compilation of the query, InfluxQL OG determines the `ExtraIntervals`
|
||||
/// required for the `moving_average` function, which in the example is `3` ([source][1]):
|
||||
///
|
||||
/// ```go
|
||||
/// if c.global.ExtraIntervals < int(arg1.Val) {
|
||||
/// c.global.ExtraIntervals = int(arg1.Val)
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// `arg1.Val` is the second argument from the example InfluxQL query, or `3`.
|
||||
///
|
||||
/// When preparing the query for execution, the time range is adjusted by the
|
||||
/// `ExtraIntervals` determined during compilation ([source][2]):
|
||||
///
|
||||
/// ```go
|
||||
/// // Modify the time range if there are extra intervals and an interval.
|
||||
/// if !c.Interval.IsZero() && c.ExtraIntervals > 0 {
|
||||
/// if c.Ascending {
|
||||
/// newTime := timeRange.Min.Add(time.Duration(-c.ExtraIntervals) * c.Interval.Duration)
|
||||
/// if !newTime.Before(time.Unix(0, influxql.MinTime).UTC()) {
|
||||
/// timeRange.Min = newTime
|
||||
/// ```
|
||||
///
|
||||
/// In this case `timeRange.Min` will be adjusted from `2020-06-11T16:53:00Z` to
|
||||
/// `2020-06-11T16:52:30Z`, as `ExtraIntervals` is `3` and `Interval.Duration` is `10s`.
|
||||
///
|
||||
/// The first issue is that the adjusted `timeRange` is only used to determine which
|
||||
/// shards to read per the following ([source][3]):
|
||||
///
|
||||
/// ```go
|
||||
/// // Create an iterator creator based on the shards in the cluster.
|
||||
/// shards, err := shardMapper.MapShards(c.stmt.Sources, timeRange, sopt)
|
||||
/// ```
|
||||
///
|
||||
/// The options used to configure query execution, constructed later in the function,
|
||||
/// use the time range from the compiled statement ([source][4]):
|
||||
///
|
||||
/// ```go
|
||||
/// opt.StartTime, opt.EndTime = c.TimeRange.MinTimeNano(), c.TimeRange.MaxTimeNano()
|
||||
/// ```
|
||||
///
|
||||
/// Specifically, `opt.StartTime` would be `2020-06-11T16:53:00Z` (`1591894380000000000`).
|
||||
///
|
||||
/// Finally, when construction the physical operator to compute the `moving_average`,
|
||||
/// the `StartTime`, or `EndTime` for descending queries, is adjusted by the single
|
||||
/// interval of `10s` ([source][5]):
|
||||
///
|
||||
/// ```go
|
||||
/// if !opt.Interval.IsZero() {
|
||||
/// if opt.Ascending {
|
||||
/// opt.StartTime -= int64(opt.Interval.Duration)
|
||||
/// ```
|
||||
///
|
||||
/// [1]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L592-L594
|
||||
/// [2]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1153-L1158
|
||||
/// [3]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1172-L1173
|
||||
/// [4]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1198
|
||||
/// [5]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/select.go#L259-L261
|
||||
#[allow(dead_code)]
|
||||
fn extended_time_range(&self) -> TimeRange {
|
||||
// As described in the function docs, extra_intervals is either
|
||||
// 1 or 0 to match InfluxQL OG behaviour.
|
||||
match (self.extra_intervals.min(1), self.interval) {
|
||||
(count @ 1.., Some(interval)) => {
|
||||
if self.order_by.is_ascending() {
|
||||
TimeRange {
|
||||
lower: self
|
||||
.time_range
|
||||
.lower
|
||||
.map(|v| v - (count as i64 * interval.duration)),
|
||||
upper: self.time_range.upper,
|
||||
}
|
||||
} else {
|
||||
TimeRange {
|
||||
lower: self.time_range.lower,
|
||||
upper: self
|
||||
.time_range
|
||||
.upper
|
||||
.map(|v| v + (count as i64 * interval.duration)),
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => self.time_range,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the combined `GROUP BY` tags clause from the root
|
||||
/// and current statement. The list is sorted and guaranteed to be unique.
|
||||
fn group_by_tags(&self) -> Vec<&str> {
|
||||
|
@ -210,7 +391,9 @@ impl<'a> Context<'a> {
|
|||
fn is_aggregate(&self) -> bool {
|
||||
matches!(
|
||||
self.projection_type,
|
||||
ProjectionType::Aggregate | ProjectionType::Selector { .. }
|
||||
ProjectionType::Aggregate
|
||||
| ProjectionType::WindowAggregate
|
||||
| ProjectionType::Selector { .. }
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -328,6 +511,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
is_projected,
|
||||
} = ProjectionInfo::new(&select.fields, &group_by_tags);
|
||||
|
||||
let order_by = select.order_by.unwrap_or_default();
|
||||
let time_alias = fields[0].name.as_str();
|
||||
|
||||
let table_names = find_table_names(select);
|
||||
let sort_by_measurement = table_names.len() > 1;
|
||||
let mut plans = Vec::new();
|
||||
|
@ -412,14 +598,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
},
|
||||
)?;
|
||||
|
||||
// the sort planner node must refer to the time column using
|
||||
// the alias that was specified
|
||||
let time_alias = fields[0].name.as_str();
|
||||
let time_sort_expr = time_alias.as_expr().sort(
|
||||
match select.order_by {
|
||||
// Default behaviour is to sort by time in ascending order if there is no ORDER BY
|
||||
None | Some(OrderByClause::Ascending) => true,
|
||||
Some(OrderByClause::Descending) => false,
|
||||
match order_by {
|
||||
OrderByClause::Ascending => true,
|
||||
OrderByClause::Descending => false,
|
||||
},
|
||||
false,
|
||||
);
|
||||
|
@ -465,10 +647,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
let time_alias = fields[0].name.as_str();
|
||||
|
||||
let time_sort_expr = time_alias.as_expr().sort(
|
||||
match select.order_by {
|
||||
// Default behaviour is to sort by time in ascending order if there is no ORDER BY
|
||||
None | Some(OrderByClause::Ascending) => true,
|
||||
Some(OrderByClause::Descending) => false,
|
||||
match ctx.order_by {
|
||||
OrderByClause::Ascending => true,
|
||||
OrderByClause::Descending => false,
|
||||
},
|
||||
false,
|
||||
);
|
||||
|
@ -638,13 +819,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
// 1. is binning by time, project the column using the `DATE_BIN` function,
|
||||
// 2. is a single-selector query, project the `time` field of the selector aggregate,
|
||||
// 3. otherwise, project the Unix epoch (0)
|
||||
select_exprs[time_column_index] = if let Some(dim) = ctx.group_by.and_then(|gb| gb.time_dimension()) {
|
||||
let stride = expr_to_df_interval_dt(&dim.interval)?;
|
||||
let offset = if let Some(offset) = &dim.offset {
|
||||
duration_expr_to_nanoseconds(offset).map_err(error::map::expr_error)?
|
||||
} else {
|
||||
0
|
||||
};
|
||||
select_exprs[time_column_index] = if let Some(i) = ctx.interval {
|
||||
let stride = lit(ScalarValue::new_interval_mdn(0, 0, i.duration));
|
||||
let offset = i.offset.map_or(0, |v|v);
|
||||
|
||||
date_bin(
|
||||
stride,
|
||||
|
@ -2535,7 +2712,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tag_keys() {
|
||||
fn test_show_tag_keys_1() {
|
||||
assert_snapshot!(plan("SHOW TAG KEYS"), @"TableScan: tag_keys [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]");
|
||||
assert_snapshot!(plan("SHOW TAG KEYS LIMIT 1 OFFSET 2"), @r###"
|
||||
Sort: tag_keys.iox::measurement ASC NULLS LAST, tag_keys.tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]
|
||||
|
@ -2544,150 +2721,158 @@ mod test {
|
|||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [tag_keys.iox::measurement] ORDER BY [tag_keys.tagKey ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8), iox::row:UInt64;N]
|
||||
TableScan: tag_keys [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tag_keys_2() {
|
||||
assert_snapshot!(plan("SHOW TAG KEYS WHERE foo = 'some_foo'"), @r###"
|
||||
Sort: iox::measurement ASC NULLS LAST, tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Union [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
|
||||
Filter: all_types.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
|
||||
TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
|
||||
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
|
||||
Filter: cpu.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
|
||||
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
|
||||
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
|
||||
Filter: disk.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
|
||||
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
|
||||
Filter: diskio.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
|
||||
TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
|
||||
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
|
||||
Filter: merge_00.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
|
||||
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
|
||||
Filter: merge_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
|
||||
Filter: temp_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
|
||||
Filter: temp_02.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
|
||||
Filter: temp_03.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tag_keys_3() {
|
||||
assert_snapshot!(plan("SHOW TAG KEYS WHERE time > 1337"), @r###"
|
||||
Sort: iox::measurement ASC NULLS LAST, tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Union [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
|
||||
Filter: all_types.time >= TimestampNanosecond(1338, None) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
|
||||
TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
|
||||
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
|
||||
Filter: cpu.time >= TimestampNanosecond(1338, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
|
||||
Filter: data.time >= TimestampNanosecond(1338, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
|
||||
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
|
||||
Filter: disk.time >= TimestampNanosecond(1338, None) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
|
||||
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
|
||||
Filter: diskio.time >= TimestampNanosecond(1338, None) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
|
||||
TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
|
||||
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
|
||||
Filter: merge_00.time >= TimestampNanosecond(1338, None) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
|
||||
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
|
||||
Filter: merge_01.time >= TimestampNanosecond(1338, None) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
|
||||
Filter: temp_01.time >= TimestampNanosecond(1338, None) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
|
||||
Filter: temp_02.time >= TimestampNanosecond(1338, None) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
|
||||
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
|
||||
Unnest: tagKey [tagKey:Utf8;N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
|
||||
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
|
||||
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
|
||||
Filter: temp_03.time >= TimestampNanosecond(1338, None) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
|
||||
|
@ -2695,7 +2880,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tag_values() {
|
||||
fn test_show_tag_values_1() {
|
||||
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar"), @r###"
|
||||
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
|
@ -2704,6 +2889,10 @@ mod test {
|
|||
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tag_values_2() {
|
||||
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar LIMIT 1 OFFSET 2"), @r###"
|
||||
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
Projection: iox::measurement, key, value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
|
@ -2716,6 +2905,10 @@ mod test {
|
|||
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tag_values_3() {
|
||||
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar WHERE foo = 'some_foo'"), @r###"
|
||||
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
|
@ -2724,6 +2917,10 @@ mod test {
|
|||
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tag_values_4() {
|
||||
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar WHERE time > 1337"), @r###"
|
||||
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
|
||||
|
@ -2784,7 +2981,8 @@ mod test {
|
|||
Sort: time ASC NULLS LAST [time:Timestamp(Nanosecond, None);N, value:Float64;N]
|
||||
Projection: time, AVG(cpu.usage_idle) AS value [time:Timestamp(Nanosecond, None);N, value:Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -2962,18 +3160,20 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT LAST(usage_idle) FROM cpu GROUP BY TIME(5s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
// aggregate query, grouping by time with gap filling
|
||||
assert_snapshot!(plan("SELECT FIRST(usage_idle) FROM cpu GROUP BY TIME(5s) FILL(0)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time), Struct({value:Float64(0),time:TimestampNanosecond(0, None)})))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
// aggregate query, as we're specifying multiple selectors or aggregates
|
||||
|
@ -3549,7 +3749,8 @@ mod test {
|
|||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
||||
// supports offset parameter
|
||||
|
@ -3557,7 +3758,8 @@ mod test {
|
|||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3567,9 +3769,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3592,9 +3795,9 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
@ -3617,9 +3820,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3628,9 +3832,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3639,9 +3844,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3650,9 +3856,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(0)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3661,9 +3868,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3673,9 +3881,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3685,9 +3894,10 @@ mod test {
|
|||
assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
|
||||
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
|
@ -3755,7 +3965,8 @@ mod test {
|
|||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,20 +1,8 @@
|
|||
//! APIs for transforming InfluxQL [expressions][influxdb_influxql_parser::expression::Expr].
|
||||
use crate::plan::error;
|
||||
use datafusion::common::{Result, ScalarValue};
|
||||
use datafusion::common::ScalarValue;
|
||||
use datafusion::logical_expr::{lit, Expr as DFExpr};
|
||||
use datafusion_util::AsExpr;
|
||||
use influxdb_influxql_parser::expression::Expr;
|
||||
use influxdb_influxql_parser::time_range::{duration_expr_to_nanoseconds, TimeRange};
|
||||
|
||||
type ExprResult = Result<DFExpr>;
|
||||
|
||||
/// Simplifies `expr` to an InfluxQL duration and returns a DataFusion interval.
|
||||
///
|
||||
/// Returns an error if `expr` is not a duration expression.
|
||||
pub(super) fn expr_to_df_interval_dt(expr: &Expr) -> ExprResult {
|
||||
let ns = duration_expr_to_nanoseconds(expr).map_err(error::map::expr_error)?;
|
||||
Ok(lit(ScalarValue::new_interval_mdn(0, 0, ns)))
|
||||
}
|
||||
use influxdb_influxql_parser::time_range::TimeRange;
|
||||
|
||||
fn lower_bound_to_df_expr(v: Option<i64>) -> Option<DFExpr> {
|
||||
v.map(|ts| {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::plan::expr_type_evaluator::TypeEvaluator;
|
||||
use crate::plan::field::{field_by_name, field_name};
|
||||
use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap};
|
||||
use crate::plan::ir::{DataSource, Field, Select, SelectQuery, TagSet};
|
||||
use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery, TagSet};
|
||||
use crate::plan::var_ref::{influx_type_to_var_ref_data_type, var_ref_data_type_to_influx_type};
|
||||
use crate::plan::{error, util, SchemaProvider};
|
||||
use datafusion::common::{DataFusionError, Result};
|
||||
|
@ -19,7 +19,9 @@ use influxdb_influxql_parser::select::{
|
|||
Dimension, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection,
|
||||
SelectStatement,
|
||||
};
|
||||
use influxdb_influxql_parser::time_range::{split_cond, ReduceContext, TimeRange};
|
||||
use influxdb_influxql_parser::time_range::{
|
||||
duration_expr_to_nanoseconds, split_cond, ReduceContext, TimeRange,
|
||||
};
|
||||
use influxdb_influxql_parser::timestamp::Timestamp;
|
||||
use itertools::Itertools;
|
||||
use schema::InfluxColumnType;
|
||||
|
@ -100,21 +102,35 @@ impl RewriteSelect {
|
|||
let (fields, group_by) = self.expand_projection(s, stmt, &from, &tag_set)?;
|
||||
let condition = self.condition_resolve_types(s, stmt, &from)?;
|
||||
|
||||
let now = Timestamp::from(s.execution_props().query_execution_start_time);
|
||||
let rc = ReduceContext {
|
||||
now: Some(now),
|
||||
tz: stmt.timezone.map(|tz| *tz),
|
||||
};
|
||||
|
||||
let interval = self.find_interval_offset(&rc, group_by.as_ref())?;
|
||||
|
||||
let (condition, time_range) = match condition {
|
||||
Some(where_clause) => {
|
||||
let rc = ReduceContext {
|
||||
now: Some(Timestamp::from(
|
||||
s.execution_props().query_execution_start_time,
|
||||
)),
|
||||
tz: stmt.timezone.map(|tz| *tz),
|
||||
};
|
||||
split_cond(&rc, &where_clause).map_err(error::map::expr_error)?
|
||||
}
|
||||
Some(where_clause) => split_cond(&rc, &where_clause).map_err(error::map::expr_error)?,
|
||||
None => (None, TimeRange::default()),
|
||||
};
|
||||
|
||||
let SelectStatementInfo { projection_type } =
|
||||
select_statement_info(&fields, &group_by, stmt.fill)?;
|
||||
// If the interval is non-zero and there is no upper bound, default to `now`
|
||||
// for compatibility with InfluxQL OG.
|
||||
//
|
||||
// See: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L172-L179
|
||||
let time_range = match (interval, time_range.upper) {
|
||||
(Some(interval), None) if interval.duration > 0 => TimeRange {
|
||||
lower: time_range.lower,
|
||||
upper: Some(now.timestamp_nanos()),
|
||||
},
|
||||
_ => time_range,
|
||||
};
|
||||
|
||||
let SelectStatementInfo {
|
||||
projection_type,
|
||||
extra_intervals,
|
||||
} = select_statement_info(&fields, &group_by, stmt.fill)?;
|
||||
|
||||
// Following InfluxQL OG behaviour, if this is a subquery, and the fill strategy equates
|
||||
// to `FILL(null)`, switch to `FILL(none)`.
|
||||
|
@ -131,6 +147,8 @@ impl RewriteSelect {
|
|||
|
||||
Ok(Select {
|
||||
projection_type,
|
||||
interval,
|
||||
extra_intervals,
|
||||
fields,
|
||||
from,
|
||||
condition,
|
||||
|
@ -388,6 +406,29 @@ impl RewriteSelect {
|
|||
Ok(Some(where_clause))
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the interval value of the `GROUP BY` clause if it specifies a `TIME`.
|
||||
fn find_interval_offset(
|
||||
&self,
|
||||
ctx: &ReduceContext,
|
||||
group_by: Option<&GroupByClause>,
|
||||
) -> Result<Option<Interval>> {
|
||||
Ok(
|
||||
if let Some(td) = group_by.and_then(|v| v.time_dimension()) {
|
||||
let duration = duration_expr_to_nanoseconds(ctx, &td.interval)
|
||||
.map_err(error::map::expr_error)?;
|
||||
let offset = td
|
||||
.offset
|
||||
.as_ref()
|
||||
.map(|o| duration_expr_to_nanoseconds(ctx, o))
|
||||
.transpose()
|
||||
.map_err(error::map::expr_error)?;
|
||||
Some(Interval { duration, offset })
|
||||
} else {
|
||||
None
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensures the `time` column is presented consistently across all `SELECT` queries.
|
||||
|
@ -865,12 +906,30 @@ macro_rules! lit_string {
|
|||
};
|
||||
}
|
||||
|
||||
/// Set the `extra_intervals` field of [`FieldChecker`] if it is
|
||||
/// less than then proposed new value.
|
||||
macro_rules! set_extra_intervals {
|
||||
($SELF:expr, $NEW:expr) => {
|
||||
if $SELF.extra_intervals < $NEW as usize {
|
||||
$SELF.extra_intervals = $NEW as usize
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Checks a number of expectations for the fields of a [`SelectStatement`].
|
||||
#[derive(Default)]
|
||||
struct FieldChecker {
|
||||
/// `true` if the statement contains a `GROUP BY TIME` clause.
|
||||
has_group_by_time: bool,
|
||||
|
||||
/// The number of additional intervals that must be read
|
||||
/// for queries that group by time and use window functions such as
|
||||
/// `DIFFERENCE` or `DERIVATIVE`. This ensures data for the first
|
||||
/// window is available.
|
||||
///
|
||||
/// See: <https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L50>
|
||||
extra_intervals: usize,
|
||||
|
||||
/// `true` if the interval was inherited by a parent.
|
||||
/// If this is set, then an interval that was inherited will not cause
|
||||
/// a query that shouldn't have an interval to fail.
|
||||
|
@ -888,6 +947,9 @@ struct FieldChecker {
|
|||
/// Accumulator for the number of aggregate or window expressions for the statement.
|
||||
aggregate_count: usize,
|
||||
|
||||
/// Accumulator for the number of window expressions for the statement.
|
||||
window_count: usize,
|
||||
|
||||
/// Accumulator for the number of selector expressions for the statement.
|
||||
selector_count: usize,
|
||||
}
|
||||
|
@ -942,7 +1004,7 @@ impl FieldChecker {
|
|||
|
||||
// Validate we are using a selector or raw query if non-aggregate fields are projected.
|
||||
if self.has_non_aggregate_fields {
|
||||
if self.aggregate_count > 0 {
|
||||
if self.window_aggregate_count() > 0 {
|
||||
return error::query("mixing aggregate and non-aggregate columns is not supported");
|
||||
} else if self.selector_count > 1 {
|
||||
return error::query(
|
||||
|
@ -954,26 +1016,37 @@ impl FieldChecker {
|
|||
// By this point the statement is valid, so lets
|
||||
// determine the projection type
|
||||
|
||||
if self.has_top_bottom {
|
||||
Ok(ProjectionType::TopBottomSelector)
|
||||
Ok(if self.has_top_bottom {
|
||||
ProjectionType::TopBottomSelector
|
||||
} else if self.has_group_by_time {
|
||||
Ok(ProjectionType::Aggregate)
|
||||
if self.window_count > 0 {
|
||||
ProjectionType::WindowAggregate
|
||||
} else {
|
||||
ProjectionType::Aggregate
|
||||
}
|
||||
} else if self.has_distinct {
|
||||
Ok(ProjectionType::RawDistinct)
|
||||
ProjectionType::RawDistinct
|
||||
} else if self.selector_count == 1 && self.aggregate_count == 0 {
|
||||
Ok(ProjectionType::Selector {
|
||||
ProjectionType::Selector {
|
||||
has_fields: self.has_non_aggregate_fields,
|
||||
})
|
||||
}
|
||||
} else if self.selector_count > 1 || self.aggregate_count > 0 {
|
||||
Ok(ProjectionType::Aggregate)
|
||||
ProjectionType::Aggregate
|
||||
} else if self.window_count > 0 {
|
||||
ProjectionType::Window
|
||||
} else {
|
||||
Ok(ProjectionType::Raw)
|
||||
}
|
||||
ProjectionType::Raw
|
||||
})
|
||||
}
|
||||
|
||||
/// The total number of functions observed.
|
||||
fn function_count(&self) -> usize {
|
||||
self.aggregate_count + self.selector_count
|
||||
self.window_aggregate_count() + self.selector_count
|
||||
}
|
||||
|
||||
/// The total number of window and aggregate functions observed.
|
||||
fn window_aggregate_count(&self) -> usize {
|
||||
self.aggregate_count + self.window_count
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1195,9 +1268,12 @@ impl FieldChecker {
|
|||
}
|
||||
|
||||
fn check_derivative(&mut self, name: &str, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
|
||||
check_exp_args!(name, 1, 2, args);
|
||||
|
||||
set_extra_intervals!(self, 1);
|
||||
|
||||
match args.get(1) {
|
||||
Some(Expr::Literal(Literal::Duration(d))) if **d <= 0 => {
|
||||
return error::query(format!("duration argument must be positive, got {d}"))
|
||||
|
@ -1214,9 +1290,11 @@ impl FieldChecker {
|
|||
}
|
||||
|
||||
fn check_elapsed(&mut self, name: &str, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
check_exp_args!(name, 1, 2, args);
|
||||
|
||||
set_extra_intervals!(self, 1);
|
||||
|
||||
match args.get(1) {
|
||||
Some(Expr::Literal(Literal::Duration(d))) if **d <= 0 => {
|
||||
return error::query(format!("duration argument must be positive, got {d}"))
|
||||
|
@ -1233,9 +1311,11 @@ impl FieldChecker {
|
|||
}
|
||||
|
||||
fn check_difference(&mut self, name: &str, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
check_exp_args!(name, 1, args);
|
||||
|
||||
set_extra_intervals!(self, 1);
|
||||
|
||||
self.check_nested_symbol(name, &args[0])
|
||||
}
|
||||
|
||||
|
@ -1243,11 +1323,13 @@ impl FieldChecker {
|
|||
self.inc_aggregate_count();
|
||||
check_exp_args!("cumulative_sum", 1, args);
|
||||
|
||||
set_extra_intervals!(self, 1);
|
||||
|
||||
self.check_nested_symbol("cumulative_sum", &args[0])
|
||||
}
|
||||
|
||||
fn check_moving_average(&mut self, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
check_exp_args!("moving_average", 2, args);
|
||||
|
||||
let v = lit_integer!("moving_average", args, 1);
|
||||
|
@ -1257,11 +1339,13 @@ impl FieldChecker {
|
|||
));
|
||||
}
|
||||
|
||||
set_extra_intervals!(self, v);
|
||||
|
||||
self.check_nested_symbol("moving_average", &args[0])
|
||||
}
|
||||
|
||||
fn check_exponential_moving_average(&mut self, name: &str, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
check_exp_args!(name, 2, 4, args);
|
||||
|
||||
let v = lit_integer!(name, args, 1);
|
||||
|
@ -1269,6 +1353,8 @@ impl FieldChecker {
|
|||
return error::query(format!("{name} period must be greater than 1, got {v}"));
|
||||
}
|
||||
|
||||
set_extra_intervals!(self, v);
|
||||
|
||||
if let Some(v) = lit_integer!(name, args, 2?) {
|
||||
match (v, name) {
|
||||
(v, "triple_exponential_derivative") if v < 1 && v != -1 => {
|
||||
|
@ -1299,7 +1385,7 @@ impl FieldChecker {
|
|||
}
|
||||
|
||||
fn check_kaufmans(&mut self, name: &str, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
check_exp_args!(name, 2, 3, args);
|
||||
|
||||
let v = lit_integer!(name, args, 1);
|
||||
|
@ -1307,6 +1393,8 @@ impl FieldChecker {
|
|||
return error::query(format!("{name} period must be greater than 1, got {v}"));
|
||||
}
|
||||
|
||||
set_extra_intervals!(self, v);
|
||||
|
||||
if let Some(v) = lit_integer!(name, args, 2?) {
|
||||
if v < 0 && v != -1 {
|
||||
return error::query(format!(
|
||||
|
@ -1319,7 +1407,7 @@ impl FieldChecker {
|
|||
}
|
||||
|
||||
fn check_chande_momentum_oscillator(&mut self, name: &str, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
check_exp_args!(name, 2, 4, args);
|
||||
|
||||
let v = lit_integer!(name, args, 1);
|
||||
|
@ -1327,6 +1415,8 @@ impl FieldChecker {
|
|||
return error::query(format!("{name} period must be greater than 1, got {v}"));
|
||||
}
|
||||
|
||||
set_extra_intervals!(self, v);
|
||||
|
||||
if let Some(v) = lit_integer!(name, args, 2?) {
|
||||
if v < 0 && v != -1 {
|
||||
return error::query(format!(
|
||||
|
@ -1401,11 +1491,16 @@ impl FieldChecker {
|
|||
}
|
||||
}
|
||||
|
||||
/// Increments the function call count
|
||||
/// Increments the aggregate function call count
|
||||
fn inc_aggregate_count(&mut self) {
|
||||
self.aggregate_count += 1
|
||||
}
|
||||
|
||||
/// Increments the window function call count
|
||||
fn inc_window_count(&mut self) {
|
||||
self.window_count += 1
|
||||
}
|
||||
|
||||
fn inc_selector_count(&mut self) {
|
||||
self.selector_count += 1
|
||||
}
|
||||
|
@ -1453,6 +1548,10 @@ pub(crate) enum ProjectionType {
|
|||
/// A query that projects one or more aggregate functions or
|
||||
/// two or more selector functions.
|
||||
Aggregate,
|
||||
/// A query that projects one or more window functions.
|
||||
Window,
|
||||
/// A query that projects a combination of window and nested aggregate functions.
|
||||
WindowAggregate,
|
||||
/// A query that projects a single selector function,
|
||||
/// such as `last` or `first`.
|
||||
Selector {
|
||||
|
@ -1468,7 +1567,11 @@ pub(crate) enum ProjectionType {
|
|||
#[derive(Default, Debug, Copy, Clone)]
|
||||
struct SelectStatementInfo {
|
||||
/// Identifies the projection type for the `SELECT` query.
|
||||
pub projection_type: ProjectionType,
|
||||
projection_type: ProjectionType,
|
||||
/// Copied from [extra_intervals](FieldChecker::extra_intervals)
|
||||
///
|
||||
/// [See also](Select::extra_intervals).
|
||||
extra_intervals: usize,
|
||||
}
|
||||
|
||||
/// Gather information about the semantics of a [`SelectStatement`] and verify
|
||||
|
@ -1518,8 +1621,14 @@ fn select_statement_info(
|
|||
};
|
||||
|
||||
let projection_type = fc.check_fields(fields, fill)?;
|
||||
let FieldChecker {
|
||||
extra_intervals, ..
|
||||
} = fc;
|
||||
|
||||
Ok(SelectStatementInfo { projection_type })
|
||||
Ok(SelectStatementInfo {
|
||||
projection_type,
|
||||
extra_intervals,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -1591,6 +1700,22 @@ mod test {
|
|||
ProjectionType::Selector { has_fields: false }
|
||||
);
|
||||
|
||||
// updates extra_intervals
|
||||
let info = select_statement_info(&parse_select("SELECT difference(foo) FROM cpu")).unwrap();
|
||||
assert_matches!(info.projection_type, ProjectionType::Window);
|
||||
assert_matches!(info.extra_intervals, 1);
|
||||
// derives extra intervals from the window function
|
||||
let info =
|
||||
select_statement_info(&parse_select("SELECT moving_average(foo, 5) FROM cpu")).unwrap();
|
||||
assert_matches!(info.projection_type, ProjectionType::Window);
|
||||
assert_matches!(info.extra_intervals, 5);
|
||||
// uses the maximum extra intervals
|
||||
let info = select_statement_info(&parse_select(
|
||||
"SELECT difference(foo), moving_average(foo, 4) FROM cpu",
|
||||
))
|
||||
.unwrap();
|
||||
assert_matches!(info.extra_intervals, 4);
|
||||
|
||||
let info = select_statement_info(&parse_select("SELECT last(foo), bar FROM cpu")).unwrap();
|
||||
assert_matches!(
|
||||
info.projection_type,
|
||||
|
@ -1610,6 +1735,12 @@ mod test {
|
|||
let info = select_statement_info(&parse_select("SELECT count(foo) FROM cpu")).unwrap();
|
||||
assert_matches!(info.projection_type, ProjectionType::Aggregate);
|
||||
|
||||
let info = select_statement_info(&parse_select(
|
||||
"SELECT difference(count(foo)) FROM cpu GROUP BY TIME(10s)",
|
||||
))
|
||||
.unwrap();
|
||||
assert_matches!(info.projection_type, ProjectionType::WindowAggregate);
|
||||
|
||||
let info = select_statement_info(&parse_select("SELECT top(foo, 3) FROM cpu")).unwrap();
|
||||
assert_matches!(info.projection_type, ProjectionType::TopBottomSelector);
|
||||
}
|
||||
|
@ -2216,6 +2347,26 @@ mod test {
|
|||
stmt.to_string(),
|
||||
"SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu::tag, host::tag, region::tag"
|
||||
);
|
||||
|
||||
//
|
||||
// TIME
|
||||
//
|
||||
|
||||
// Explicitly adds an upper bound for the time-range for aggregate queries
|
||||
let stmt = parse_select("SELECT mean(usage_idle) FROM cpu WHERE time >= '2022-04-09T12:13:14Z' GROUP BY TIME(30s)");
|
||||
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
|
||||
assert_eq!(
|
||||
stmt.to_string(),
|
||||
"SELECT time::timestamp AS time, mean(usage_idle::float) AS mean FROM cpu WHERE time >= 1649506394000000000 AND time <= 1672531200000000000 GROUP BY TIME(30s)"
|
||||
);
|
||||
|
||||
// Does not add an upper bound time range if already specified
|
||||
let stmt = parse_select("SELECT mean(usage_idle) FROM cpu WHERE time >= '2022-04-09T12:13:14Z' AND time < '2022-04-10T12:00:00Z' GROUP BY TIME(30s)");
|
||||
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
|
||||
assert_eq!(
|
||||
stmt.to_string(),
|
||||
"SELECT time::timestamp AS time, mean(usage_idle::float) AS mean FROM cpu WHERE time >= 1649506394000000000 AND time <= 1649591999999999999 GROUP BY TIME(30s)"
|
||||
);
|
||||
}
|
||||
|
||||
/// Uncategorized fallible cases
|
||||
|
|
|
@ -225,6 +225,7 @@ pub async fn create_compactor_server_type(
|
|||
partition_timeout: Duration::from_secs(compactor_config.partition_timeout_secs),
|
||||
partitions_source,
|
||||
shadow_mode: compactor_config.shadow_mode,
|
||||
enable_scratchpad: compactor_config.enable_scratchpad,
|
||||
ignore_partition_skip_marker: compactor_config.ignore_partition_skip_marker,
|
||||
shard_config,
|
||||
min_num_l1_files_to_compact: compactor_config.min_num_l1_files_to_compact,
|
||||
|
|
|
@ -83,7 +83,7 @@ impl ParquetChunk {
|
|||
///
|
||||
/// See [`ParquetExecInput`] for more information.
|
||||
///
|
||||
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
|
||||
/// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
|
||||
pub fn parquet_exec_input(&self) -> ParquetExecInput {
|
||||
let path: ParquetFilePath = self.parquet_file.as_ref().into();
|
||||
self.store.parquet_exec_input(&path, self.file_size_bytes())
|
||||
|
|
|
@ -13,13 +13,14 @@ use arrow::{
|
|||
use bytes::Bytes;
|
||||
use data_types::PartitionId;
|
||||
use datafusion::{
|
||||
datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl},
|
||||
datasource::{
|
||||
listing::PartitionedFile,
|
||||
object_store::ObjectStoreUrl,
|
||||
physical_plan::{FileScanConfig, ParquetExec},
|
||||
},
|
||||
error::DataFusionError,
|
||||
execution::memory_pool::MemoryPool,
|
||||
physical_plan::{
|
||||
file_format::{FileScanConfig, ParquetExec},
|
||||
ExecutionPlan, SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
physical_plan::{ExecutionPlan, SendableRecordBatchStream, Statistics},
|
||||
prelude::SessionContext,
|
||||
};
|
||||
use datafusion_util::config::{iox_session_config, register_iox_object_store};
|
||||
|
@ -93,7 +94,7 @@ impl std::fmt::Display for StorageId {
|
|||
/// The files shall be grouped by [`object_store_url`](Self::object_store_url). For each each object store, you shall
|
||||
/// create one [`ParquetExec`] and put each file into its own "file group".
|
||||
///
|
||||
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
|
||||
/// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParquetExecInput {
|
||||
/// Store where the file is located.
|
||||
|
@ -286,7 +287,7 @@ impl ParquetStorage {
|
|||
///
|
||||
/// See [`ParquetExecInput`] for more information.
|
||||
///
|
||||
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
|
||||
/// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
|
||||
pub fn parquet_exec_input(&self, path: &ParquetFilePath, file_size: usize) -> ParquetExecInput {
|
||||
ParquetExecInput {
|
||||
object_store_url: ObjectStoreUrl::parse(format!("iox://{}/", self.id))
|
||||
|
|
|
@ -23,16 +23,13 @@ use datafusion::{
|
|||
file_format::{parquet::ParquetFormat, FileFormat},
|
||||
listing::PartitionedFile,
|
||||
object_store::ObjectStoreUrl,
|
||||
physical_plan::{FileScanConfig, ParquetExec},
|
||||
},
|
||||
execution::{
|
||||
context::{SessionState, TaskContext},
|
||||
runtime_env::RuntimeEnv,
|
||||
},
|
||||
physical_plan::{
|
||||
execute_stream,
|
||||
file_format::{FileScanConfig, ParquetExec},
|
||||
SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
physical_plan::{execute_stream, SendableRecordBatchStream, Statistics},
|
||||
prelude::SessionContext,
|
||||
};
|
||||
use datafusion_util::config::{iox_session_config, register_iox_object_store};
|
||||
|
|
|
@ -11,7 +11,11 @@ use cache_system::{
|
|||
loader::{metrics::MetricsLoader, FunctionLoader},
|
||||
resource_consumption::FunctionEstimator,
|
||||
};
|
||||
use data_types::{ColumnId, PartitionId};
|
||||
use data_types::{
|
||||
partition_template::{build_column_values, ColumnValue},
|
||||
ColumnId, Partition, PartitionId,
|
||||
};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_time::TimeProvider;
|
||||
use schema::sort::SortKey;
|
||||
|
@ -70,11 +74,7 @@ impl PartitionCache {
|
|||
.await
|
||||
.expect("retry forever")?;
|
||||
|
||||
let sort_key = partition.sort_key().map(|sort_key| {
|
||||
Arc::new(PartitionSortKey::new(sort_key, &extra.column_id_map_rev))
|
||||
});
|
||||
|
||||
Some(CachedPartition { sort_key })
|
||||
Some(CachedPartition::new(partition, &extra))
|
||||
}
|
||||
});
|
||||
let loader = Arc::new(MetricsLoader::new(
|
||||
|
@ -144,14 +144,118 @@ impl PartitionCache {
|
|||
.await
|
||||
.and_then(|p| p.sort_key)
|
||||
}
|
||||
|
||||
/// Get known column ranges.
|
||||
#[allow(dead_code)]
|
||||
pub async fn column_ranges(
|
||||
&self,
|
||||
cached_table: Arc<CachedTable>,
|
||||
partition_id: PartitionId,
|
||||
span: Option<Span>,
|
||||
) -> Option<ColumnRanges> {
|
||||
self.cache
|
||||
.get(partition_id, (cached_table, span))
|
||||
.await
|
||||
.map(|p| p.column_ranges)
|
||||
}
|
||||
}
|
||||
|
||||
/// Represent known min/max values for a specific column.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ColumnRange {
|
||||
pub min_value: Arc<ScalarValue>,
|
||||
pub max_value: Arc<ScalarValue>,
|
||||
}
|
||||
|
||||
/// Represents the known min/max values for a subset (not all) of the columns in a partition.
|
||||
///
|
||||
/// The values may not actually in any row.
|
||||
///
|
||||
/// These ranges apply to ALL rows (esp. in ALL files and ingester chunks) within in given partition.
|
||||
pub type ColumnRanges = Arc<HashMap<Arc<str>, ColumnRange>>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CachedPartition {
|
||||
sort_key: Option<Arc<PartitionSortKey>>,
|
||||
column_ranges: ColumnRanges,
|
||||
}
|
||||
|
||||
impl CachedPartition {
|
||||
fn new(partition: Partition, table: &CachedTable) -> Self {
|
||||
let sort_key = partition
|
||||
.sort_key()
|
||||
.map(|sort_key| Arc::new(PartitionSortKey::new(sort_key, &table.column_id_map_rev)));
|
||||
|
||||
let mut column_ranges =
|
||||
build_column_values(&table.partition_template, partition.partition_key.inner())
|
||||
.filter_map(|(col, val)| {
|
||||
// resolve column name to already existing Arc for cheaper storage
|
||||
let col = Arc::clone(table.column_id_map_rev.get_key_value(col)?.0);
|
||||
|
||||
let range = match val {
|
||||
ColumnValue::Identity(s) => {
|
||||
let s = Arc::new(ScalarValue::from(s.as_ref()));
|
||||
ColumnRange {
|
||||
min_value: Arc::clone(&s),
|
||||
max_value: s,
|
||||
}
|
||||
}
|
||||
ColumnValue::Prefix(p) => {
|
||||
if p.is_empty() {
|
||||
// full range => value is useless
|
||||
return None;
|
||||
}
|
||||
|
||||
// If the partition only has a prefix of the tag value (it was truncated) then form a conservative
|
||||
// range:
|
||||
//
|
||||
//
|
||||
// # Minimum
|
||||
// Use the prefix itself.
|
||||
//
|
||||
// Note that the minimum is inclusive.
|
||||
//
|
||||
// All values in the partition are either:
|
||||
// - identical to the prefix, in which case they are included by the inclusive minimum
|
||||
// - have the form `"<prefix><s>"`, and it holds that `"<prefix><s>" > "<prefix>"` for all
|
||||
// strings `"<s>"`.
|
||||
//
|
||||
//
|
||||
// # Maximum
|
||||
// Use `"<prefix_excluding_last_char><char::max>"`.
|
||||
//
|
||||
// Note that the maximum is inclusive.
|
||||
//
|
||||
// All strings in this partition must be smaller than this constructed maximum, because
|
||||
// string comparison is front-to-back and the `"<prefix_excluding_last_char><char::max>" > "<prefix>"`.
|
||||
|
||||
let min_value = Arc::new(ScalarValue::from(p.as_ref()));
|
||||
|
||||
let mut chars = p.as_ref().chars().collect::<Vec<_>>();
|
||||
*chars.last_mut().expect("checked that prefix is not empty") =
|
||||
std::char::MAX;
|
||||
let max_value = Arc::new(ScalarValue::from(
|
||||
chars.into_iter().collect::<String>().as_str(),
|
||||
));
|
||||
|
||||
ColumnRange {
|
||||
min_value,
|
||||
max_value,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Some((col, range))
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
column_ranges.shrink_to_fit();
|
||||
|
||||
Self {
|
||||
sort_key,
|
||||
column_ranges: Arc::new(column_ranges),
|
||||
}
|
||||
}
|
||||
|
||||
/// RAM-bytes EXCLUDING `self`.
|
||||
fn size(&self) -> usize {
|
||||
// Arc content
|
||||
|
@ -159,6 +263,13 @@ impl CachedPartition {
|
|||
.as_ref()
|
||||
.map(|sk| sk.size())
|
||||
.unwrap_or_default()
|
||||
+ std::mem::size_of::<HashMap<Arc<str>, ColumnRange>>()
|
||||
+ (self.column_ranges.capacity() * std::mem::size_of::<(Arc<str>, ColumnRange)>())
|
||||
+ self
|
||||
.column_ranges
|
||||
.iter()
|
||||
.map(|(col, range)| col.len() + range.min_value.size() + range.max_value.size())
|
||||
.sum::<usize>()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -206,6 +317,9 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::cache::{ram::test_util::test_ram_pool, test_util::assert_histogram_metric_count};
|
||||
use data_types::{partition_template::TablePartitionTemplateOverride, ColumnType};
|
||||
use generated_types::influxdata::iox::partition_template::v1::{
|
||||
template_part::Part, PartitionTemplate, TemplatePart,
|
||||
};
|
||||
use iox_tests::TestCatalog;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
|
||||
|
@ -294,6 +408,198 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_column_ranges() {
|
||||
let catalog = TestCatalog::new();
|
||||
|
||||
let ns = catalog.create_namespace_1hr_retention("ns").await;
|
||||
let t = ns
|
||||
.create_table_with_partition_template(
|
||||
"table",
|
||||
Some(PartitionTemplate {
|
||||
parts: vec![
|
||||
TemplatePart {
|
||||
part: Some(Part::TagValue(String::from("tag2"))),
|
||||
},
|
||||
TemplatePart {
|
||||
part: Some(Part::TagValue(String::from("tag1"))),
|
||||
},
|
||||
],
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let c1 = t.create_column("tag1", ColumnType::Tag).await;
|
||||
let c2 = t.create_column("tag2", ColumnType::Tag).await;
|
||||
let c3 = t.create_column("tag3", ColumnType::Tag).await;
|
||||
let c4 = t.create_column("time", ColumnType::Time).await;
|
||||
|
||||
// See `data_types::partition_template` for the template language.
|
||||
// Two normal values.
|
||||
let p1 = t.create_partition("v1|v2").await.partition.clone();
|
||||
// 2nd part is NULL
|
||||
let p2 = t.create_partition("v1|!").await.partition.clone();
|
||||
// 2nd part is empty
|
||||
let p3 = t.create_partition("v1|^").await.partition.clone();
|
||||
// 2nd part is truncated (i.e. the original value was longer)
|
||||
let p4 = t.create_partition("v1|v2#").await.partition.clone();
|
||||
// 2nd part is truncated to empty string
|
||||
let p5 = t.create_partition("v1|#").await.partition.clone();
|
||||
let cached_table = Arc::new(CachedTable {
|
||||
id: t.table.id,
|
||||
schema: schema(),
|
||||
column_id_map: HashMap::from([
|
||||
(c1.column.id, Arc::from(c1.column.name.clone())),
|
||||
(c2.column.id, Arc::from(c2.column.name.clone())),
|
||||
(c3.column.id, Arc::from(c3.column.name.clone())),
|
||||
(c4.column.id, Arc::from(c4.column.name.clone())),
|
||||
]),
|
||||
column_id_map_rev: HashMap::from([
|
||||
(Arc::from(c1.column.name.clone()), c1.column.id),
|
||||
(Arc::from(c2.column.name.clone()), c2.column.id),
|
||||
(Arc::from(c3.column.name.clone()), c3.column.id),
|
||||
(Arc::from(c4.column.name.clone()), c4.column.id),
|
||||
]),
|
||||
primary_key_column_ids: [c1.column.id, c2.column.id, c3.column.id, c4.column.id].into(),
|
||||
partition_template: t.table.partition_template.clone(),
|
||||
});
|
||||
|
||||
let cache = PartitionCache::new(
|
||||
catalog.catalog(),
|
||||
BackoffConfig::default(),
|
||||
catalog.time_provider(),
|
||||
&catalog.metric_registry(),
|
||||
test_ram_pool(),
|
||||
true,
|
||||
);
|
||||
|
||||
let ranges1a = cache
|
||||
.column_ranges(Arc::clone(&cached_table), p1.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
ranges1a.as_ref(),
|
||||
&HashMap::from([
|
||||
(
|
||||
Arc::from("tag1"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("v2")),
|
||||
max_value: Arc::new(ScalarValue::from("v2"))
|
||||
}
|
||||
),
|
||||
(
|
||||
Arc::from("tag2"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("v1")),
|
||||
max_value: Arc::new(ScalarValue::from("v1"))
|
||||
}
|
||||
),
|
||||
]),
|
||||
);
|
||||
assert!(Arc::ptr_eq(
|
||||
&ranges1a.get("tag1").unwrap().min_value,
|
||||
&ranges1a.get("tag1").unwrap().max_value,
|
||||
));
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
|
||||
|
||||
let ranges2 = cache
|
||||
.column_ranges(Arc::clone(&cached_table), p2.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
ranges2.as_ref(),
|
||||
&HashMap::from([(
|
||||
Arc::from("tag2"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("v1")),
|
||||
max_value: Arc::new(ScalarValue::from("v1"))
|
||||
}
|
||||
),]),
|
||||
);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
|
||||
|
||||
let ranges3 = cache
|
||||
.column_ranges(Arc::clone(&cached_table), p3.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
ranges3.as_ref(),
|
||||
&HashMap::from([
|
||||
(
|
||||
Arc::from("tag1"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("")),
|
||||
max_value: Arc::new(ScalarValue::from(""))
|
||||
}
|
||||
),
|
||||
(
|
||||
Arc::from("tag2"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("v1")),
|
||||
max_value: Arc::new(ScalarValue::from("v1"))
|
||||
}
|
||||
),
|
||||
]),
|
||||
);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
|
||||
|
||||
let ranges4 = cache
|
||||
.column_ranges(Arc::clone(&cached_table), p4.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
ranges4.as_ref(),
|
||||
&HashMap::from([
|
||||
(
|
||||
Arc::from("tag1"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("v2")),
|
||||
max_value: Arc::new(ScalarValue::from("v\u{10FFFF}"))
|
||||
}
|
||||
),
|
||||
(
|
||||
Arc::from("tag2"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("v1")),
|
||||
max_value: Arc::new(ScalarValue::from("v1"))
|
||||
}
|
||||
),
|
||||
]),
|
||||
);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
|
||||
|
||||
let ranges5 = cache
|
||||
.column_ranges(Arc::clone(&cached_table), p5.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
ranges5.as_ref(),
|
||||
&HashMap::from([(
|
||||
Arc::from("tag2"),
|
||||
ColumnRange {
|
||||
min_value: Arc::new(ScalarValue::from("v1")),
|
||||
max_value: Arc::new(ScalarValue::from("v1"))
|
||||
}
|
||||
),]),
|
||||
);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||
|
||||
let ranges1b = cache
|
||||
.column_ranges(Arc::clone(&cached_table), p1.id, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(Arc::ptr_eq(&ranges1a, &ranges1b));
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
|
||||
|
||||
// non-existing partition
|
||||
for _ in 0..2 {
|
||||
let res = cache
|
||||
.column_ranges(Arc::clone(&cached_table), PartitionId::new(i64::MAX), None)
|
||||
.await;
|
||||
assert_eq!(res, None);
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 6);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_sharing() {
|
||||
let catalog = TestCatalog::new();
|
||||
|
@ -336,13 +642,22 @@ mod tests {
|
|||
cache
|
||||
.sort_key(Arc::clone(&cached_table), p3.id, &Vec::new(), None)
|
||||
.await;
|
||||
cache
|
||||
.column_ranges(Arc::clone(&cached_table), p3.id, None)
|
||||
.await;
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
|
||||
|
||||
cache
|
||||
.sort_key(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
|
||||
.await;
|
||||
cache
|
||||
.column_ranges(Arc::clone(&cached_table), p2.id, None)
|
||||
.await;
|
||||
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
|
||||
|
||||
cache
|
||||
.column_ranges(Arc::clone(&cached_table), p1.id, None)
|
||||
.await;
|
||||
cache
|
||||
.sort_key(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
|
||||
.await;
|
||||
|
|
|
@ -9,7 +9,7 @@ use crate::{
|
|||
use async_trait::async_trait;
|
||||
use data_types::NamespaceId;
|
||||
use datafusion::{
|
||||
catalog::{catalog::CatalogProvider, schema::SchemaProvider},
|
||||
catalog::{schema::SchemaProvider, CatalogProvider},
|
||||
datasource::TableProvider,
|
||||
error::DataFusionError,
|
||||
};
|
||||
|
|
|
@ -24,8 +24,8 @@ use arrow::datatypes::{DataType, TimeUnit};
|
|||
use datafusion::{
|
||||
error::DataFusionError,
|
||||
logical_expr::{
|
||||
function, BuiltinScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation,
|
||||
ScalarUDF, Signature, TypeSignature, Volatility,
|
||||
BuiltinScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF,
|
||||
Signature, TypeSignature, Volatility,
|
||||
},
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
|
@ -41,7 +41,7 @@ pub const DATE_BIN_GAPFILL_UDF_NAME: &str = "date_bin_gapfill";
|
|||
pub(crate) static DATE_BIN_GAPFILL: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
|
||||
// DATE_BIN_GAPFILL should have the same signature as DATE_BIN,
|
||||
// so that just adding _GAPFILL can turn a query into a gap-filling query.
|
||||
let mut signatures = function::signature(&BuiltinScalarFunction::DateBin);
|
||||
let mut signatures = BuiltinScalarFunction::DateBin.signature();
|
||||
// We don't want this to be optimized away before we can give a helpful error message
|
||||
signatures.volatility = Volatility::Volatile;
|
||||
|
||||
|
|
|
@ -30,9 +30,9 @@ bytes = { version = "1" }
|
|||
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
|
||||
crossbeam-utils = { version = "0.8" }
|
||||
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
digest = { version = "0.10", features = ["mac", "std"] }
|
||||
either = { version = "1" }
|
||||
fixedbitset = { version = "0.4" }
|
||||
|
@ -46,7 +46,7 @@ futures-sink = { version = "0.3" }
|
|||
futures-task = { version = "0.3", default-features = false, features = ["std"] }
|
||||
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
|
||||
getrandom = { version = "0.2", default-features = false, features = ["std"] }
|
||||
hashbrown = { version = "0.13", features = ["raw"] }
|
||||
hashbrown = { version = "0.14", features = ["raw"] }
|
||||
indexmap = { version = "1", default-features = false, features = ["std"] }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
|
@ -120,7 +120,7 @@ futures-sink = { version = "0.3" }
|
|||
futures-task = { version = "0.3", default-features = false, features = ["std"] }
|
||||
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
|
||||
getrandom = { version = "0.2", default-features = false, features = ["std"] }
|
||||
hashbrown = { version = "0.13", features = ["raw"] }
|
||||
hashbrown = { version = "0.14", features = ["raw"] }
|
||||
heck = { version = "0.4", features = ["unicode"] }
|
||||
indexmap = { version = "1", default-features = false, features = ["std"] }
|
||||
itertools = { version = "0.10" }
|
||||
|
@ -205,15 +205,13 @@ rustls = { version = "0.21", features = ["dangerous_configuration"] }
|
|||
scopeguard = { version = "1" }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
|
||||
windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
|
||||
[target.x86_64-pc-windows-msvc.build-dependencies]
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
scopeguard = { version = "1" }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
|
||||
windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
|
||||
### END HAKARI SECTION
|
||||
|
|
Loading…
Reference in New Issue