Merge branch 'main' into dom/perf-sparse-reup

pull/24376/head
Dom 2023-06-16 15:55:21 +01:00 committed by GitHub
commit 27977299ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 1398 additions and 333 deletions

237
Cargo.lock generated
View File

@ -43,9 +43,9 @@ dependencies = [
[[package]]
name = "aho-corasick"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
@ -67,9 +67,9 @@ dependencies = [
[[package]]
name = "allocator-api2"
version = "0.2.14"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e"
checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9"
[[package]]
name = "android-tzdata"
@ -155,9 +155,9 @@ checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
[[package]]
name = "arrayvec"
version = "0.7.2"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
checksum = "8868f09ff8cea88b079da74ae569d9b8c62a23c68c746240b704ee6f7525c89c"
[[package]]
name = "arrow"
@ -494,7 +494,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -505,7 +505,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -644,9 +644,9 @@ dependencies = [
[[package]]
name = "blake3"
version = "1.3.3"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef"
checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888"
dependencies = [
"arrayref",
"arrayvec",
@ -688,9 +688,9 @@ dependencies = [
[[package]]
name = "bstr"
version = "1.4.0"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09"
checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"
dependencies = [
"memchr",
"once_cell",
@ -700,9 +700,9 @@ dependencies = [
[[package]]
name = "bumpalo"
version = "3.12.2"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "bytemuck"
@ -902,7 +902,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -1036,9 +1036,9 @@ dependencies = [
[[package]]
name = "console"
version = "0.15.6"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0525278dce688103060006713371cedbad27186c7d913f33d866b498da0f595"
checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
dependencies = [
"encode_unicode",
"lazy_static",
@ -1107,9 +1107,9 @@ dependencies = [
[[package]]
name = "constant_time_eq"
version = "0.2.5"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b"
checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6"
[[package]]
name = "core-foundation-sys"
@ -1128,9 +1128,9 @@ dependencies = [
[[package]]
name = "cpufeatures"
version = "0.2.7"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58"
checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c"
dependencies = [
"libc",
]
@ -1238,14 +1238,14 @@ dependencies = [
[[package]]
name = "crossbeam-epoch"
version = "0.9.14"
version = "0.9.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset 0.8.0",
"memoffset 0.9.0",
"scopeguard",
]
@ -1286,9 +1286,9 @@ dependencies = [
[[package]]
name = "csv"
version = "1.2.1"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
@ -1325,7 +1325,7 @@ dependencies = [
"hashbrown 0.12.3",
"lock_api",
"once_cell",
"parking_lot_core 0.9.7",
"parking_lot_core 0.9.8",
]
[[package]]
@ -1355,7 +1355,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1377,7 +1377,7 @@ dependencies = [
"flate2",
"futures",
"glob",
"hashbrown 0.13.2",
"hashbrown 0.14.0",
"indexmap",
"itertools",
"lazy_static",
@ -1393,7 +1393,6 @@ dependencies = [
"sqlparser",
"tempfile",
"tokio",
"tokio-stream",
"tokio-util",
"url",
"uuid",
@ -1404,7 +1403,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"arrow",
"arrow-array",
@ -1418,12 +1417,12 @@ dependencies = [
[[package]]
name = "datafusion-execution"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"dashmap",
"datafusion-common",
"datafusion-expr",
"hashbrown 0.13.2",
"hashbrown 0.14.0",
"log",
"object_store",
"parking_lot 0.12.1",
@ -1435,7 +1434,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1449,7 +1448,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"arrow",
"async-trait",
@ -1457,7 +1456,7 @@ dependencies = [
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
"hashbrown 0.13.2",
"hashbrown 0.14.0",
"itertools",
"log",
"regex-syntax 0.7.2",
@ -1466,7 +1465,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1480,7 +1479,7 @@ dependencies = [
"datafusion-expr",
"datafusion-row",
"half 2.2.1",
"hashbrown 0.13.2",
"hashbrown 0.14.0",
"indexmap",
"itertools",
"lazy_static",
@ -1498,7 +1497,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"arrow",
"chrono",
@ -1512,7 +1511,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"arrow",
"datafusion-common",
@ -1523,7 +1522,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "26.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
dependencies = [
"arrow",
"arrow-schema",
@ -1891,7 +1890,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -1994,9 +1993,9 @@ dependencies = [
[[package]]
name = "gimli"
version = "0.27.2"
version = "0.27.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
[[package]]
name = "glob"
@ -2112,9 +2111,6 @@ name = "hashbrown"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash 0.8.3",
]
[[package]]
name = "hashbrown"
@ -2128,11 +2124,11 @@ dependencies = [
[[package]]
name = "hashlink"
version = "0.8.2"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0761a1b9491c4f2e3d66aa0f62d0fba0af9a0e2852e4d48ea506632a4b56e6aa"
checksum = "312f66718a2d7789ffef4f4b7b213138ed9f1eb3aa1d0d82fc99f88fb3ffd26f"
dependencies = [
"hashbrown 0.13.2",
"hashbrown 0.14.0",
]
[[package]]
@ -2300,7 +2296,7 @@ dependencies = [
"hyper",
"rustls 0.21.2",
"tokio",
"tokio-rustls 0.24.0",
"tokio-rustls 0.24.1",
]
[[package]]
@ -2317,9 +2313,9 @@ dependencies = [
[[package]]
name = "iana-time-zone"
version = "0.1.56"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c"
checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
dependencies = [
"android_system_properties",
"core-foundation-sys",
@ -3134,9 +3130,9 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.63"
version = "0.3.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790"
checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
dependencies = [
"wasm-bindgen",
]
@ -3242,9 +3238,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "linux-raw-sys"
version = "0.3.7"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
[[package]]
name = "lock_api"
@ -3355,9 +3351,9 @@ dependencies = [
[[package]]
name = "memoffset"
version = "0.8.0"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
@ -3413,14 +3409,13 @@ dependencies = [
[[package]]
name = "mio"
version = "0.8.6"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9"
checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
dependencies = [
"libc",
"log",
"wasi",
"windows-sys 0.45.0",
"windows-sys 0.48.0",
]
[[package]]
@ -3667,9 +3662,9 @@ dependencies = [
[[package]]
name = "object"
version = "0.30.3"
version = "0.30.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439"
checksum = "03b4680b86d9cfafba8fc491dc9b6df26b68cf40e9e6cd73909194759a63c385"
dependencies = [
"memchr",
]
@ -3734,7 +3729,7 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
dependencies = [
"parking_lot_core 0.9.7",
"parking_lot_core 0.9.8",
]
[[package]]
@ -3804,7 +3799,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core 0.9.7",
"parking_lot_core 0.9.8",
]
[[package]]
@ -3823,15 +3818,15 @@ dependencies = [
[[package]]
name = "parking_lot_core"
version = "0.9.7"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
dependencies = [
"cfg-if",
"libc",
"redox_syscall 0.2.16",
"redox_syscall 0.3.5",
"smallvec",
"windows-sys 0.45.0",
"windows-targets 0.48.0",
]
[[package]]
@ -4013,7 +4008,7 @@ dependencies = [
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -4092,7 +4087,7 @@ checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -4225,9 +4220,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
[[package]]
name = "proc-macro2"
version = "1.0.58"
version = "1.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8"
checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
dependencies = [
"unicode-ident",
]
@ -4417,9 +4412,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.27"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
dependencies = [
"proc-macro2",
]
@ -4575,7 +4570,7 @@ dependencies = [
"serde_json",
"serde_urlencoded",
"tokio",
"tokio-rustls 0.24.0",
"tokio-rustls 0.24.1",
"tokio-util",
"tower-service",
"url",
@ -4836,7 +4831,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -5060,9 +5055,9 @@ dependencies = [
[[package]]
name = "sha2"
version = "0.10.6"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"
checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
dependencies = [
"cfg-if",
"cpufeatures",
@ -5389,9 +5384,9 @@ dependencies = [
[[package]]
name = "subtle"
version = "2.4.1"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "symbolic-common"
@ -5429,9 +5424,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.16"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
dependencies = [
"proc-macro2",
"quote",
@ -5540,7 +5535,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -5668,7 +5663,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -5684,9 +5679,9 @@ dependencies = [
[[package]]
name = "tokio-rustls"
version = "0.24.0"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [
"rustls 0.21.2",
"tokio",
@ -5741,9 +5736,9 @@ dependencies = [
[[package]]
name = "toml_edit"
version = "0.19.9"
version = "0.19.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92d964908cec0d030b812013af25a0e57fddfadb1e066ecc6681d86253129d4f"
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
dependencies = [
"indexmap",
"serde",
@ -5775,13 +5770,13 @@ dependencies = [
"prost",
"rustls-pemfile",
"tokio",
"tokio-rustls 0.24.0",
"tokio-rustls 0.24.1",
"tokio-stream",
"tower",
"tower-layer",
"tower-service",
"tracing",
"webpki-roots 0.23.0",
"webpki-roots 0.23.1",
]
[[package]]
@ -5942,7 +5937,7 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
]
[[package]]
@ -6073,9 +6068,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
[[package]]
name = "unicode-ident"
version = "1.0.8"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
[[package]]
name = "unicode-normalization"
@ -6219,11 +6214,10 @@ dependencies = [
[[package]]
name = "want"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
dependencies = [
"log",
"try-lock",
]
@ -6235,9 +6229,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
@ -6245,24 +6239,24 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.36"
version = "0.4.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e"
checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
dependencies = [
"cfg-if",
"js-sys",
@ -6272,9 +6266,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@ -6282,22 +6276,22 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.16",
"syn 2.0.18",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
[[package]]
name = "wasm-streams"
@ -6314,9 +6308,9 @@ dependencies = [
[[package]]
name = "web-sys"
version = "0.3.63"
version = "0.3.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2"
checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
dependencies = [
"js-sys",
"wasm-bindgen",
@ -6343,9 +6337,9 @@ dependencies = [
[[package]]
name = "webpki-roots"
version = "0.23.0"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125"
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
dependencies = [
"rustls-webpki",
]
@ -6545,9 +6539,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]]
name = "winnow"
version = "0.4.6"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
checksum = "ca0ace3845f0d96209f0375e6d367e3eb87eb65d27d445bdc9f1843a26f39448"
dependencies = [
"memchr",
]
@ -6596,7 +6590,7 @@ dependencies = [
"futures-task",
"futures-util",
"getrandom",
"hashbrown 0.13.2",
"hashbrown 0.14.0",
"heck",
"indexmap",
"io-lifetimes",
@ -6639,7 +6633,7 @@ dependencies = [
"sqlx-macros",
"strum",
"syn 1.0.109",
"syn 2.0.16",
"syn 2.0.18",
"thrift",
"tokio",
"tokio-stream",
@ -6656,7 +6650,6 @@ dependencies = [
"uuid",
"webpki",
"winapi",
"windows-sys 0.45.0",
"windows-sys 0.48.0",
"zstd",
"zstd-safe",

View File

@ -118,8 +118,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "41.0.0" }
arrow-flight = { version = "41.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514" }
hashbrown = { version = "0.14.0" }
object_store = { version = "0.6.0" }
parquet = { version = "41.0.0" }

View File

@ -206,6 +206,19 @@ pub struct CompactorConfig {
)]
pub shadow_mode: bool,
/// Enable scratchpad.
///
/// This allows disabling the scratchpad in production.
///
/// Disabling this is useful for testing performance and memory consequences of the scratchpad.
#[clap(
long = "compaction-enable-scratchpad",
env = "INFLUXDB_IOX_COMPACTION_ENABLE_SCRATCHPAD",
default_value = "true",
action
)]
pub enable_scratchpad: bool,
/// Ignores "partition marked w/ error and shall be skipped" entries in the catalog.
///
/// This is mostly useful for debugging.

View File

@ -402,7 +402,7 @@ fn make_parquet_files_sink(config: &Config) -> Arc<dyn ParquetFilesSink> {
}
fn make_scratchpad_gen(config: &Config) -> Arc<dyn ScratchpadGen> {
if config.simulate_without_object_store {
if config.simulate_without_object_store || !config.enable_scratchpad {
Arc::new(NoopScratchpadGen::new())
} else {
let scratchpad_store_output = if config.shadow_mode {

View File

@ -28,6 +28,7 @@ pub fn log_config(config: &Config) {
partition_timeout,
partitions_source,
shadow_mode,
enable_scratchpad,
ignore_partition_skip_marker,
shard_config,
min_num_l1_files_to_compact,
@ -73,6 +74,7 @@ pub fn log_config(config: &Config) {
partition_timeout_secs=partition_timeout.as_secs_f32(),
%partitions_source,
shadow_mode,
enable_scratchpad,
ignore_partition_skip_marker,
?shard_cfg_n_shards,
?shard_cfg_shard_id,

View File

@ -91,6 +91,13 @@ pub struct Config {
/// This is mostly useful for debugging.
pub shadow_mode: bool,
/// Enable Scratchpad
///
/// Enabled by default, if this is set to false, the compactor will not use the scratchpad
///
/// This is useful for disabling the scratchpad in production to evaluate the performance & memory impacts.
pub enable_scratchpad: bool,
/// Ignores "partition marked w/ error and shall be skipped" entries in the catalog.
///
/// This is mostly useful for debugging.

View File

@ -0,0 +1,351 @@
//! layout tests related to the size L1/L2 files achieve when the L0 size is small.
//!
//! The intent of these tests is to ensure that when L0s are arriving in a normal/leading edge pattern,
//! even if they're quite small (10KB) the L1 & L2 files should still be accumulated to a reasonable size.
//!
//! Accumulating large L1/L2 is generally easier when cleaning up a backlogged partition with many L0s,
//! so these test try to mimic the more challenging scenario of a steady stream of small L0s.
//! The steady stream of L0s can be partially simulated by setting the max files per plan to a small number,
//! and putting just a few files in the test case.
use data_types::CompactionLevel;
use iox_time::Time;
use crate::layouts::{layout_setup_builder, parquet_builder, run_layout_scenario, ONE_MB};
const MAX_DESIRED_FILE_SIZE: u64 = 100 * ONE_MB;
// Mimic small L0 files trickling when they overlap in time (by a minor amount, as is common)
// In this case, all L1 and L0 files can fit in a single compaction run.
#[tokio::test]
async fn small_l1_plus_overlapping_l0s_single_run() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder()
.await
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
.build()
.await;
let size = 10 * 1024;
// Create 1 L1 file that mimics the output from a previous L0 compaction
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(0)
.with_max_time(10)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
.with_file_size_bytes(size * 4_u64),
)
.await;
// Create 3 L0 files, slighly overlapping in time.
// note the first L0 slightly overlaps the L1, as would happen if this slightly overlapping pattern occured
// in the files that (we're pretending) were compacted into that L1.
for i in 1..=3 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(i * 10)
.with_max_time((i + 1) * 10)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
.with_file_size_bytes(size),
)
.await;
}
// Required behavior:
// 1. (achieved) all files compacted to a single L1 file
// Desired behavior:
// 1. (achieved) only one compaction is performed to compact them
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.2[10,20] 21ns 10kb |--------L0.2--------| "
- "L0.3[20,30] 31ns 10kb |--------L0.3--------| "
- "L0.4[30,40] 41ns 10kb |--------L0.4--------| "
- "L1 "
- "L1.1[0,10] 11ns 40kb |--------L1.1--------| "
- "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 4 Input Files, 70kb total:"
- "L0 "
- "L0.4[30,40] 41ns 10kb |--------L0.4--------| "
- "L0.3[20,30] 31ns 10kb |--------L0.3--------| "
- "L0.2[10,20] 21ns 10kb |--------L0.2--------| "
- "L1 "
- "L1.1[0,10] 11ns 40kb |--------L1.1--------| "
- "**** 1 Output Files (parquet_file_id not yet assigned), 70kb total:"
- "L1, all files 70kb "
- "L1.?[0,40] 41ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 4 files: L1.1, L0.2, L0.3, L0.4"
- " Creating 1 files"
- "**** Final Output Files (70kb written)"
- "L1, all files 70kb "
- "L1.5[0,40] 41ns |------------------------------------------L1.5------------------------------------------|"
"###
);
}
// Mimic small L0 files trickling when they overlap in time (by a minor amount, as is common)
// In this case, all L1 and L0 files do not fit in a single compaction run.
#[tokio::test]
async fn small_l1_plus_overlapping_l0s_two_runs() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder()
.await
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
.build()
.await;
let size = 10 * 1024;
// Create 1 L1 file that mimics the output from a previous L0 compaction
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(0)
.with_max_time(10)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
.with_file_size_bytes(size * 4_u64),
)
.await;
// Create 4 L0 files, slighly overlapping in time
// note the first L0 slightly overlaps the L1, as would happen if this slightly overlapping pattern occured
// in the files that (we're pretending) were compacted into that L1.
for i in 1..=4 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(i * 10)
.with_max_time((i + 1) * 10)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
.with_file_size_bytes(size),
)
.await;
}
// Required behavior:
// 1. (achieved) all files compacted to a single L1 file
// Desired behavior:
// 1. (not achieved) It may be preferable that the first compaction include the last L1 and as many L0s as are allowed (3).
// This does not happen. Instead, the first compaction is the four L0's that are later combined with the L1.
// This is not necessarily bad, actually, its better for write amplification. But might hint at the possibility of
// compaction sequences that never get around to coming back and picking up the L1.
// So the current behavior is noteworthy and unclear if its 'good' or 'bad'.
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.2[10,20] 21ns 10kb |------L0.2------| "
- "L0.3[20,30] 31ns 10kb |------L0.3------| "
- "L0.4[30,40] 41ns 10kb |------L0.4------| "
- "L0.5[40,50] 51ns 10kb |------L0.5------|"
- "L1 "
- "L1.1[0,10] 11ns 40kb |------L1.1------| "
- "**** Simulation run 0, type=compact(ManySmallFiles). 4 Input Files, 40kb total:"
- "L0, all files 10kb "
- "L0.2[10,20] 21ns |--------L0.2--------| "
- "L0.3[20,30] 31ns |--------L0.3--------| "
- "L0.4[30,40] 41ns |--------L0.4--------| "
- "L0.5[40,50] 51ns |--------L0.5--------| "
- "**** 1 Output Files (parquet_file_id not yet assigned), 40kb total:"
- "L0, all files 40kb "
- "L0.?[10,50] 51ns |------------------------------------------L0.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 4 files: L0.2, L0.3, L0.4, L0.5"
- " Creating 1 files"
- "**** Simulation run 1, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 80kb total:"
- "L0, all files 40kb "
- "L0.6[10,50] 51ns |---------------------------------L0.6---------------------------------|"
- "L1, all files 40kb "
- "L1.1[0,10] 11ns |------L1.1------| "
- "**** 1 Output Files (parquet_file_id not yet assigned), 80kb total:"
- "L1, all files 80kb "
- "L1.?[0,50] 51ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 2 files: L1.1, L0.6"
- " Creating 1 files"
- "**** Final Output Files (120kb written)"
- "L1, all files 80kb "
- "L1.7[0,50] 51ns |------------------------------------------L1.7------------------------------------------|"
"###
);
}
// Mimic small L0 files trickling when they do NOT overlap in time (i.e. they have gaps between them)
// In this case, all L1 and L0 files can fit in a single compaction run.
#[tokio::test]
async fn small_l1_plus_nonoverlapping_l0s_single_run() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder()
.await
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
.build()
.await;
let size = 10 * 1024;
// Create 1 L1 file that mimics the output from a previous L0 compaction
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(0)
.with_max_time(9)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
.with_file_size_bytes(size * 4_u64),
)
.await;
// Create 3 L0 files, not overlapping in time, and not overlapping the L1.
for i in 1..=3 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(i * 10 + 1)
.with_max_time((i + 1) * 10 - 1)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
.with_file_size_bytes(size),
)
.await;
}
// Required behavior:
// 1. (not achieved) all files compacted to a single L1 file
// The assumption is: since it didn't combine the old L1 with the new one, it will never grow L1.1.
// It will eventually compact L1.1 with other L1s to make an L2, but if the write pattern continues
// with tiny L0s, the resulting L2 will be N * the L1 size (where N is the number of L1s compacted
// into the L2).
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.2[11,19] 21ns 10kb |------L0.2------| "
- "L0.3[21,29] 31ns 10kb |------L0.3------| "
- "L0.4[31,39] 41ns 10kb |------L0.4------| "
- "L1 "
- "L1.1[0,9] 11ns 40kb |-------L1.1-------| "
- "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 3 Input Files, 30kb total:"
- "L0, all files 10kb "
- "L0.4[31,39] 41ns |---------L0.4----------| "
- "L0.3[21,29] 31ns |---------L0.3----------| "
- "L0.2[11,19] 21ns |---------L0.2----------| "
- "**** 1 Output Files (parquet_file_id not yet assigned), 30kb total:"
- "L1, all files 30kb "
- "L1.?[11,39] 41ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 3 files: L0.2, L0.3, L0.4"
- " Creating 1 files"
- "**** Final Output Files (30kb written)"
- "L1 "
- "L1.1[0,9] 11ns 40kb |-------L1.1-------| "
- "L1.5[11,39] 41ns 30kb |-----------------------------L1.5-----------------------------| "
"###
);
}
// Mimic small L0 files trickling when they do NOT overlap in time (i.e. they have gaps between them)
// In this case, all L1 and L0 files do not fit in a single compaction run.
#[tokio::test]
async fn small_l1_plus_nonoverlapping_l0s_two_runs() {
test_helpers::maybe_start_logging();
let setup = layout_setup_builder()
.await
.with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
.with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
.build()
.await;
let size = 10 * 1024;
// Create 1 L1 file that mimics the output from a previous L0 compaction
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(0)
.with_max_time(10)
.with_compaction_level(CompactionLevel::FileNonOverlapped)
.with_max_l0_created_at(Time::from_timestamp_nanos(11))
.with_file_size_bytes(size * 4_u64),
)
.await;
// Create 4 L0 files, not overlapping in time, and not overlapping the L1.
for i in 1..=4 {
setup
.partition
.create_parquet_file(
parquet_builder()
.with_min_time(i * 10 + 1)
.with_max_time((i + 1) * 10 - 1)
.with_compaction_level(CompactionLevel::Initial)
.with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
.with_file_size_bytes(size),
)
.await;
}
// Required behavior:
// 1. (not achieved) all files compacted to a single L1 file
// The assumption is: since it didn't combine the old L1 with the new one, it will never grow L1.1.
// It will eventually compact L1.1 with other L1s to make an L2, but if the write pattern continues
// with tiny L0s, the resulting L2 will be N * the L1 size (where N is the number of L1s compacted
// into the L2).
insta::assert_yaml_snapshot!(
run_layout_scenario(&setup).await,
@r###"
---
- "**** Input Files "
- "L0 "
- "L0.2[11,19] 21ns 10kb |----L0.2----| "
- "L0.3[21,29] 31ns 10kb |----L0.3----| "
- "L0.4[31,39] 41ns 10kb |----L0.4----| "
- "L0.5[41,49] 51ns 10kb |----L0.5----| "
- "L1 "
- "L1.1[0,10] 11ns 40kb |------L1.1------| "
- "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 4 Input Files, 40kb total:"
- "L0, all files 10kb "
- "L0.5[41,49] 51ns |------L0.5------| "
- "L0.4[31,39] 41ns |------L0.4------| "
- "L0.3[21,29] 31ns |------L0.3------| "
- "L0.2[11,19] 21ns |------L0.2------| "
- "**** 1 Output Files (parquet_file_id not yet assigned), 40kb total:"
- "L1, all files 40kb "
- "L1.?[11,49] 51ns |------------------------------------------L1.?------------------------------------------|"
- "Committing partition 1:"
- " Soft Deleting 4 files: L0.2, L0.3, L0.4, L0.5"
- " Creating 1 files"
- "**** Final Output Files (40kb written)"
- "L1, all files 40kb "
- "L1.1[0,10] 11ns |------L1.1------| "
- "L1.6[11,49] 51ns |-------------------------------L1.6--------------------------------| "
"###
);
}

View File

@ -48,6 +48,7 @@
//! ```text
//! - L0.?[300,350] 5kb |-L0.3-|
//! ```
mod accumulated_size;
mod backfill;
mod common_use_cases;
mod core;

View File

@ -142,6 +142,7 @@ impl TestSetupBuilder<false> {
threshold: PARTITION_THRESHOLD,
},
shadow_mode: false,
enable_scratchpad: true,
ignore_partition_skip_marker: false,
shard_config: None,
min_num_l1_files_to_compact: MIN_NUM_L1_FILES_TO_COMPACT,

View File

@ -500,6 +500,11 @@ impl PartitionKey {
pub fn ptr_eq(&self, other: &Self) -> bool {
Arc::ptr_eq(&self.0, &other.0)
}
/// Returns underlying string.
pub fn inner(&self) -> &str {
&self.0
}
}
impl Display for PartitionKey {

View File

@ -327,6 +327,13 @@ pub enum OrderByClause {
Descending,
}
impl OrderByClause {
/// Return `true` if the order by clause is ascending.
pub fn is_ascending(self) -> bool {
matches!(self, Self::Ascending)
}
}
impl Display for OrderByClause {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(

View File

@ -260,10 +260,7 @@ pub fn parse_conditional_expression(input: &str) -> Result<ConditionalExpression
let mut i: &str = input;
// Consume whitespace from the input
i = match ws0(i) {
Ok((i1, _)) => i1,
_ => unreachable!("ws0 is infallible"),
};
(i, _) = ws0(i).expect("ws0 is infallible");
if i.is_empty() {
return Err(ParseError {
@ -293,10 +290,7 @@ pub fn parse_conditional_expression(input: &str) -> Result<ConditionalExpression
};
// Consume remaining whitespace from the input
i = match ws0(i) {
Ok((i1, _)) => i1,
_ => unreachable!("ws0 is infallible"),
};
(i, _) = ws0(i).expect("ws0 is infallible");
if !i.is_empty() {
return Err(ParseError {

View File

@ -69,10 +69,7 @@ pub fn parse_statements(input: &str) -> ParseResult {
loop {
// Consume whitespace from the input
i = match ws0(i) {
Ok((i1, _)) => i1,
_ => unreachable!("ws0 is infallible"),
};
(i, _) = ws0(i).expect("ws0 is infallible");
if eof::<_, nom::error::Error<_>>(i).is_ok() {
return Ok(res);

View File

@ -390,12 +390,9 @@ impl TimeRange {
}
/// Simplifies an InfluxQL duration `expr` to a nanosecond interval represented as an `i64`.
pub fn duration_expr_to_nanoseconds(expr: &Expr) -> Result<i64, ExprError> {
let ctx = ReduceContext::default();
match reduce_expr(&ctx, expr)? {
Expr::Literal(Literal::Duration(v)) => Ok(*v),
Expr::Literal(Literal::Float(v)) => Ok(v as i64),
Expr::Literal(Literal::Integer(v)) => Ok(v),
pub fn duration_expr_to_nanoseconds(ctx: &ReduceContext, expr: &Expr) -> Result<i64, ExprError> {
match reduce_time_expr(ctx, expr)? {
Expr::Literal(Literal::Timestamp(v)) => Ok(v.timestamp_nanos()),
_ => error::expr("invalid duration expression"),
}
}
@ -444,7 +441,7 @@ pub struct ReduceContext {
pub tz: Option<chrono_tz::Tz>,
}
/// Simplify the time range expression.
/// Simplify the time range expression and return a literal [timestamp](Timestamp).
fn reduce_time_expr(ctx: &ReduceContext, expr: &Expr) -> ExprResult {
match reduce_expr(ctx, expr)? {
expr @ Expr::Literal(Literal::Timestamp(_)) => Ok(expr),
@ -732,21 +729,26 @@ mod test {
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Offset, Utc};
use test_helpers::assert_error;
/// Return a `ReduceContext` with a value of
/// now set to `2023-01-01T00:00:00Z` / `1672531200000000000`
/// and not timezone.
fn reduce_context() -> ReduceContext {
ReduceContext {
now: Some(Timestamp::from_utc(
NaiveDateTime::new(
NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
),
Utc.fix(),
)),
tz: None,
}
}
#[test]
fn test_split_cond() {
fn split_exprs(s: &str) -> Result<(Option<ConditionalExpression>, TimeRange), ExprError> {
// 2023-01-01T00:00:00Z == 1672531200000000000
let ctx = ReduceContext {
now: Some(Timestamp::from_utc(
NaiveDateTime::new(
NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
),
Utc.fix(),
)),
tz: None,
};
let ctx = reduce_context();
let cond: ConditionalExpression = s.parse().unwrap();
split_cond(&ctx, &cond)
}
@ -1014,13 +1016,14 @@ mod test {
#[test]
fn test_expr_to_duration() {
fn parse(s: &str) -> Result<i64, ExprError> {
let ctx = reduce_context();
let expr = s
.parse::<ConditionalExpression>()
.unwrap()
.expr()
.unwrap()
.clone();
duration_expr_to_nanoseconds(&expr)
duration_expr_to_nanoseconds(&ctx, &expr)
}
let cases = vec![
@ -1029,6 +1032,8 @@ mod test {
("5d10ms", 432_000_010_000_000),
("-2d10ms", -172800010000000),
("-2d10ns", -172800000000010),
("now()", 1672531200000000000),
("'2023-01-01T00:00:00Z'", 1672531200000000000),
];
for (interval_str, exp) in cases {

View File

@ -495,6 +495,7 @@ impl Config {
partition_timeout_secs: 0,
partition_filter: None,
shadow_mode: false,
enable_scratchpad: true,
ignore_partition_skip_marker: false,
shard_count: None,
shard_id: None,

View File

@ -20,6 +20,6 @@
| | DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC] |
| | SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] |
| | SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] |
| | ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time] |
| | ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, ...], [1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, ...], [1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet, 1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, ...], [1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, ...]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time] |
| | |
----------

View File

@ -17,7 +17,7 @@ extensions_options! {
/// [^iox_part]: "IOx partition" refers to a partition within the IOx catalog, i.e. a partition within the
/// primary key space. This is NOT the same as a DataFusion partition which refers to a stream
/// within the physical plan data flow.
pub max_dedup_partition_split: usize, default = 100
pub max_dedup_partition_split: usize, default = 10_000
/// When splitting de-duplicate operations based on time-based overlaps, this is the maximum number of groups
/// that should be considered. If there are more groups, the split will NOT be performed.

View File

@ -33,7 +33,7 @@ use crate::{
use arrow::record_batch::RecordBatch;
use async_trait::async_trait;
use datafusion::{
catalog::catalog::CatalogProvider,
catalog::CatalogProvider,
execution::{
context::{QueryPlanner, SessionState, TaskContext},
memory_pool::MemoryPool,

View File

@ -1,8 +1,11 @@
use datafusion::{
common::{tree_node::TreeNodeRewriter, DFSchema},
error::DataFusionError,
logical_expr::{expr::ScalarUDF, utils::from_plan, LogicalPlan, Operator},
optimizer::{utils::rewrite_preserving_name, OptimizerConfig, OptimizerRule},
logical_expr::{
expr::ScalarUDF, expr_rewriter::rewrite_preserving_name, utils::from_plan, LogicalPlan,
Operator,
},
optimizer::{OptimizerConfig, OptimizerRule},
prelude::{binary_expr, lit, Expr},
scalar::ScalarValue,
};

View File

@ -2,10 +2,11 @@ use std::sync::Arc;
use arrow::datatypes::SchemaRef;
use datafusion::{
datasource::physical_plan::ParquetExec,
error::DataFusionError,
physical_plan::{
empty::EmptyExec, file_format::ParquetExec, union::UnionExec, visit_execution_plan,
ExecutionPlan, ExecutionPlanVisitor,
empty::EmptyExec, union::UnionExec, visit_execution_plan, ExecutionPlan,
ExecutionPlanVisitor,
},
};
use observability_deps::tracing::debug;

View File

@ -3,6 +3,7 @@ use std::{collections::HashSet, sync::Arc};
use datafusion::{
common::tree_node::{RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter},
config::ConfigOptions,
datasource::physical_plan::ParquetExec,
error::{DataFusionError, Result},
logical_expr::Operator,
physical_expr::{split_conjunction, utils::collect_columns},
@ -10,7 +11,6 @@ use datafusion::{
physical_plan::{
empty::EmptyExec,
expressions::{BinaryExpr, Column},
file_format::ParquetExec,
filter::FilterExec,
union::UnionExec,
ExecutionPlan, PhysicalExpr,
@ -165,11 +165,11 @@ mod tests {
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion::{
datasource::object_store::ObjectStoreUrl,
datasource::physical_plan::FileScanConfig,
logical_expr::Operator,
physical_expr::PhysicalSortExpr,
physical_plan::{
expressions::{BinaryExpr, Column, Literal},
file_format::FileScanConfig,
PhysicalExpr, Statistics,
},
scalar::ScalarValue,

View File

@ -7,6 +7,7 @@ use arrow::datatypes::SchemaRef;
use datafusion::{
common::tree_node::{Transformed, TreeNode},
config::ConfigOptions,
datasource::physical_plan::{FileScanConfig, ParquetExec},
error::{DataFusionError, Result},
physical_expr::{
utils::{collect_columns, reassign_predicate_columns},
@ -16,7 +17,6 @@ use datafusion::{
physical_plan::{
empty::EmptyExec,
expressions::Column,
file_format::{FileScanConfig, ParquetExec},
filter::FilterExec,
projection::ProjectionExec,
sorts::{sort::SortExec, sort_preserving_merge::SortPreservingMergeExec},

View File

@ -3,14 +3,11 @@ use std::sync::Arc;
use datafusion::{
common::tree_node::{RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter},
config::ConfigOptions,
datasource::physical_plan::{FileScanConfig, ParquetExec},
error::Result,
physical_expr::{PhysicalSortExpr, PhysicalSortRequirement},
physical_optimizer::PhysicalOptimizerRule,
physical_plan::{
file_format::{FileScanConfig, ParquetExec},
sorts::sort::SortExec,
ExecutionPlan,
},
physical_plan::{sorts::sort::SortExec, ExecutionPlan},
};
use observability_deps::tracing::warn;

View File

@ -6,14 +6,15 @@ use crate::{
};
use arrow::datatypes::{DataType, Fields, Schema as ArrowSchema, SchemaRef};
use datafusion::{
datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl},
datasource::{
listing::PartitionedFile,
object_store::ObjectStoreUrl,
physical_plan::{FileScanConfig, ParquetExec},
},
physical_expr::PhysicalSortExpr,
physical_plan::{
empty::EmptyExec,
expressions::Column,
file_format::{FileScanConfig, ParquetExec},
union::UnionExec,
ColumnStatistics, ExecutionPlan, Statistics,
empty::EmptyExec, expressions::Column, union::UnionExec, ColumnStatistics, ExecutionPlan,
Statistics,
},
scalar::ScalarValue,
};

View File

@ -26,8 +26,8 @@ use datafusion::error::DataFusionError;
use datafusion::execution::context::SessionState;
use datafusion::logical_expr::Expr;
use datafusion::physical_plan::ExecutionPlan;
use datafusion::{catalog::catalog::CatalogProvider, physical_plan::displayable};
use datafusion::{catalog::schema::SchemaProvider, logical_expr::LogicalPlan};
use datafusion::{catalog::CatalogProvider, physical_plan::displayable};
use datafusion::{
datasource::{object_store::ObjectStoreUrl, TableProvider, TableType},
physical_plan::{ColumnStatistics, Statistics as DataFusionStatistics},

View File

@ -33,6 +33,18 @@ pub(super) struct Select {
/// The projection type of the selection.
pub(super) projection_type: ProjectionType,
/// The interval derived from the arguments to the `TIME` function
/// when a `GROUP BY` clause is declared with `TIME`.
pub(super) interval: Option<Interval>,
/// The number of additional intervals that must be read
/// for queries that group by time and use window functions such as
/// `DIFFERENCE` or `DERIVATIVE`. This ensures data for the first
/// window is available.
///
/// See: <https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L50>
pub(super) extra_intervals: usize,
/// Projection clause of the selection.
pub(super) fields: Vec<Field>,
@ -194,3 +206,15 @@ impl Display for Field {
write!(f, " AS {}", self.name)
}
}
/// Represents the interval duration and offset
/// derived from the `TIME` function when specified
/// in a `GROUP BY` clause.
#[derive(Debug, Clone, Copy)]
pub(super) struct Interval {
/// The nanosecond duration of the interval
pub duration: i64,
/// The nanosecond offset of the interval.
pub offset: Option<i64>,
}

View File

@ -1,10 +1,10 @@
mod select;
use crate::plan::ir::{DataSource, Field, Select, SelectQuery};
use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery};
use crate::plan::planner::select::{
fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort, ProjectionInfo,
};
use crate::plan::planner_time_range_expression::{expr_to_df_interval_dt, time_range_to_df_expr};
use crate::plan::planner_time_range_expression::time_range_to_df_expr;
use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
use crate::plan::var_ref::var_ref_data_type_to_data_type;
@ -50,9 +50,7 @@ use influxdb_influxql_parser::show_measurements::{
use influxdb_influxql_parser::show_tag_keys::ShowTagKeysStatement;
use influxdb_influxql_parser::show_tag_values::{ShowTagValuesStatement, WithKeyClause};
use influxdb_influxql_parser::simple_from_clause::ShowFromClause;
use influxdb_influxql_parser::time_range::{
duration_expr_to_nanoseconds, split_cond, ReduceContext, TimeRange,
};
use influxdb_influxql_parser::time_range::{split_cond, ReduceContext, TimeRange};
use influxdb_influxql_parser::timestamp::Timestamp;
use influxdb_influxql_parser::{
common::{MeasurementName, WhereClause},
@ -130,22 +128,43 @@ enum ExprScope {
Projection,
}
/// State used to inform the planner.
/// State used to inform the planner, which is derived for the
/// root `SELECT` and subqueries.
#[allow(dead_code)]
#[derive(Debug, Default, Clone)]
struct Context<'a> {
/// The name of the table used as the data source for the current query.
table_name: &'a str,
projection_type: ProjectionType,
tz: Option<Tz>,
// WHERE
order_by: OrderByClause,
/// The column alias for the `time` column.
///
/// # NOTE
///
/// The time column can only be aliased for the root query.
time_alias: &'a str,
/// The filter predicate for the query, without `time`.
condition: Option<&'a ConditionalExpression>,
/// The time range of the query
time_range: TimeRange,
// GROUP BY information
group_by: Option<&'a GroupByClause>,
fill: Option<FillClause>,
/// Interval of the `TIME` function found in the `GROUP BY` clause.
interval: Option<Interval>,
/// How many additional window intervals must be retrieved, when grouping
/// by time, to ensure window functions like `difference` have sufficient
/// data to for the first window of the `time_range`.
extra_intervals: usize,
/// The set of tags specified in the top-level `SELECT` statement
/// which represent the tag set used for grouping output.
root_group_by_tags: &'a [&'a str],
@ -161,10 +180,14 @@ impl<'a> Context<'a> {
table_name,
projection_type: select.projection_type,
tz: select.timezone,
order_by: select.order_by.unwrap_or_default(),
time_alias: &select.fields[0].name,
condition: select.condition.as_ref(),
time_range: select.time_range,
group_by: select.group_by.as_ref(),
fill: select.fill,
interval: select.interval,
extra_intervals: select.extra_intervals,
root_group_by_tags,
}
}
@ -176,6 +199,9 @@ impl<'a> Context<'a> {
table_name: self.table_name,
projection_type: select.projection_type,
tz: select.timezone,
order_by: self.order_by,
// time is never aliased in subqueries
time_alias: "time",
condition: select.condition.as_ref(),
// Subqueries should be restricted by the time range of the parent
//
@ -183,10 +209,165 @@ impl<'a> Context<'a> {
time_range: select.time_range.intersected(self.time_range),
group_by: select.group_by.as_ref(),
fill: select.fill,
interval: select.interval,
extra_intervals: select.extra_intervals,
root_group_by_tags: self.root_group_by_tags,
}
}
/// Return a [`Expr::Sort`] expression for the `time` column.
#[allow(dead_code)]
fn time_sort_expr(&self) -> Expr {
self.time_alias.as_expr().sort(
match self.order_by {
OrderByClause::Ascending => true,
OrderByClause::Descending => false,
},
false,
)
}
/// Returns true if the current context has an extended
/// time range to provide leading data for window functions
/// to produce the result for the first window.
#[allow(dead_code)]
fn has_extended_time_range(&self) -> bool {
self.extra_intervals > 0 && self.interval.is_some()
}
/// Return the time range of the context, including any
/// additional intervals required for window functions like
/// `difference` or `moving_average`, when the query contains a
/// `GROUP BY TIME` clause.
///
/// # NOTE
///
/// This function accounts for a bug in InfluxQL OG that only reads
/// a single interval, rather than the number required based on the
/// window function.
///
/// # EXPECTED
///
/// For InfluxQL OG, the likely intended behaviour of the extra intervals
/// was to ensure a minimum number of windows were calculated to ensure
/// there was sufficient data for the lower time bound specified
/// in the `WHERE` clause, or upper time bound when ordering by `time`
/// in descending order.
///
/// For example, the following InfluxQL query calculates the `moving_average`
/// of the `mean` of the `writes` field over 3 intervals. The interval
/// is 10 seconds, as specified by the `GROUP BY time(10s)` clause.
///
/// ```sql
/// SELECT moving_average(mean(writes), 3)
/// FROM diskio
/// WHERE time >= '2020-06-11T16:53:00Z' AND time < '2020-06-11T16:55:00Z'
/// GROUP BY time(10s)
/// ```
///
/// The intended output was supposed to include the first window of the time
/// bounds, or `'2020-06-11T16:53:00Z'`:
///
/// ```text
/// name: diskio
/// time moving_average
/// ---- --------------
/// 2020-06-11T16:53:00Z 5592529.333333333
/// 2020-06-11T16:53:10Z 5592677.333333333
/// ...
/// 2020-06-11T16:54:10Z 5593513.333333333
/// 2020-06-11T16:54:20Z 5593612.333333333
/// ```
/// however, the actual output starts at `2020-06-11T16:53:10Z`.
///
/// # BUG
///
/// During compilation of the query, InfluxQL OG determines the `ExtraIntervals`
/// required for the `moving_average` function, which in the example is `3` ([source][1]):
///
/// ```go
/// if c.global.ExtraIntervals < int(arg1.Val) {
/// c.global.ExtraIntervals = int(arg1.Val)
/// }
/// ```
///
/// `arg1.Val` is the second argument from the example InfluxQL query, or `3`.
///
/// When preparing the query for execution, the time range is adjusted by the
/// `ExtraIntervals` determined during compilation ([source][2]):
///
/// ```go
/// // Modify the time range if there are extra intervals and an interval.
/// if !c.Interval.IsZero() && c.ExtraIntervals > 0 {
/// if c.Ascending {
/// newTime := timeRange.Min.Add(time.Duration(-c.ExtraIntervals) * c.Interval.Duration)
/// if !newTime.Before(time.Unix(0, influxql.MinTime).UTC()) {
/// timeRange.Min = newTime
/// ```
///
/// In this case `timeRange.Min` will be adjusted from `2020-06-11T16:53:00Z` to
/// `2020-06-11T16:52:30Z`, as `ExtraIntervals` is `3` and `Interval.Duration` is `10s`.
///
/// The first issue is that the adjusted `timeRange` is only used to determine which
/// shards to read per the following ([source][3]):
///
/// ```go
/// // Create an iterator creator based on the shards in the cluster.
/// shards, err := shardMapper.MapShards(c.stmt.Sources, timeRange, sopt)
/// ```
///
/// The options used to configure query execution, constructed later in the function,
/// use the time range from the compiled statement ([source][4]):
///
/// ```go
/// opt.StartTime, opt.EndTime = c.TimeRange.MinTimeNano(), c.TimeRange.MaxTimeNano()
/// ```
///
/// Specifically, `opt.StartTime` would be `2020-06-11T16:53:00Z` (`1591894380000000000`).
///
/// Finally, when construction the physical operator to compute the `moving_average`,
/// the `StartTime`, or `EndTime` for descending queries, is adjusted by the single
/// interval of `10s` ([source][5]):
///
/// ```go
/// if !opt.Interval.IsZero() {
/// if opt.Ascending {
/// opt.StartTime -= int64(opt.Interval.Duration)
/// ```
///
/// [1]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L592-L594
/// [2]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1153-L1158
/// [3]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1172-L1173
/// [4]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1198
/// [5]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/select.go#L259-L261
#[allow(dead_code)]
fn extended_time_range(&self) -> TimeRange {
// As described in the function docs, extra_intervals is either
// 1 or 0 to match InfluxQL OG behaviour.
match (self.extra_intervals.min(1), self.interval) {
(count @ 1.., Some(interval)) => {
if self.order_by.is_ascending() {
TimeRange {
lower: self
.time_range
.lower
.map(|v| v - (count as i64 * interval.duration)),
upper: self.time_range.upper,
}
} else {
TimeRange {
lower: self.time_range.lower,
upper: self
.time_range
.upper
.map(|v| v + (count as i64 * interval.duration)),
}
}
}
_ => self.time_range,
}
}
/// Returns the combined `GROUP BY` tags clause from the root
/// and current statement. The list is sorted and guaranteed to be unique.
fn group_by_tags(&self) -> Vec<&str> {
@ -210,7 +391,9 @@ impl<'a> Context<'a> {
fn is_aggregate(&self) -> bool {
matches!(
self.projection_type,
ProjectionType::Aggregate | ProjectionType::Selector { .. }
ProjectionType::Aggregate
| ProjectionType::WindowAggregate
| ProjectionType::Selector { .. }
)
}
@ -328,6 +511,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
is_projected,
} = ProjectionInfo::new(&select.fields, &group_by_tags);
let order_by = select.order_by.unwrap_or_default();
let time_alias = fields[0].name.as_str();
let table_names = find_table_names(select);
let sort_by_measurement = table_names.len() > 1;
let mut plans = Vec::new();
@ -412,14 +598,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
},
)?;
// the sort planner node must refer to the time column using
// the alias that was specified
let time_alias = fields[0].name.as_str();
let time_sort_expr = time_alias.as_expr().sort(
match select.order_by {
// Default behaviour is to sort by time in ascending order if there is no ORDER BY
None | Some(OrderByClause::Ascending) => true,
Some(OrderByClause::Descending) => false,
match order_by {
OrderByClause::Ascending => true,
OrderByClause::Descending => false,
},
false,
);
@ -465,10 +647,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
let time_alias = fields[0].name.as_str();
let time_sort_expr = time_alias.as_expr().sort(
match select.order_by {
// Default behaviour is to sort by time in ascending order if there is no ORDER BY
None | Some(OrderByClause::Ascending) => true,
Some(OrderByClause::Descending) => false,
match ctx.order_by {
OrderByClause::Ascending => true,
OrderByClause::Descending => false,
},
false,
);
@ -638,13 +819,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
// 1. is binning by time, project the column using the `DATE_BIN` function,
// 2. is a single-selector query, project the `time` field of the selector aggregate,
// 3. otherwise, project the Unix epoch (0)
select_exprs[time_column_index] = if let Some(dim) = ctx.group_by.and_then(|gb| gb.time_dimension()) {
let stride = expr_to_df_interval_dt(&dim.interval)?;
let offset = if let Some(offset) = &dim.offset {
duration_expr_to_nanoseconds(offset).map_err(error::map::expr_error)?
} else {
0
};
select_exprs[time_column_index] = if let Some(i) = ctx.interval {
let stride = lit(ScalarValue::new_interval_mdn(0, 0, i.duration));
let offset = i.offset.map_or(0, |v|v);
date_bin(
stride,
@ -2535,7 +2712,7 @@ mod test {
}
#[test]
fn test_show_tag_keys() {
fn test_show_tag_keys_1() {
assert_snapshot!(plan("SHOW TAG KEYS"), @"TableScan: tag_keys [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]");
assert_snapshot!(plan("SHOW TAG KEYS LIMIT 1 OFFSET 2"), @r###"
Sort: tag_keys.iox::measurement ASC NULLS LAST, tag_keys.tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]
@ -2544,150 +2721,158 @@ mod test {
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [tag_keys.iox::measurement] ORDER BY [tag_keys.tagKey ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8), iox::row:UInt64;N]
TableScan: tag_keys [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]
"###);
}
#[test]
fn test_show_tag_keys_2() {
assert_snapshot!(plan("SHOW TAG KEYS WHERE foo = 'some_foo'"), @r###"
Sort: iox::measurement ASC NULLS LAST, tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Union [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
Filter: all_types.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: cpu.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: disk.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
Filter: diskio.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
Filter: merge_00.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
Filter: merge_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_02.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_03.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
"###);
}
#[test]
fn test_show_tag_keys_3() {
assert_snapshot!(plan("SHOW TAG KEYS WHERE time > 1337"), @r###"
Sort: iox::measurement ASC NULLS LAST, tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Union [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
Filter: all_types.time >= TimestampNanosecond(1338, None) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: cpu.time >= TimestampNanosecond(1338, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
Filter: data.time >= TimestampNanosecond(1338, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
Filter: disk.time >= TimestampNanosecond(1338, None) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
Filter: diskio.time >= TimestampNanosecond(1338, None) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
Filter: merge_00.time >= TimestampNanosecond(1338, None) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
Filter: merge_01.time >= TimestampNanosecond(1338, None) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_01.time >= TimestampNanosecond(1338, None) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_02.time >= TimestampNanosecond(1338, None) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
Unnest: tagKey [tagKey:Utf8;N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
Filter: temp_03.time >= TimestampNanosecond(1338, None) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
@ -2695,7 +2880,7 @@ mod test {
}
#[test]
fn test_show_tag_values() {
fn test_show_tag_values_1() {
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar"), @r###"
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2704,6 +2889,10 @@ mod test {
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn test_show_tag_values_2() {
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar LIMIT 1 OFFSET 2"), @r###"
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
Projection: iox::measurement, key, value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2716,6 +2905,10 @@ mod test {
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn test_show_tag_values_3() {
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar WHERE foo = 'some_foo'"), @r###"
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2724,6 +2917,10 @@ mod test {
Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
#[test]
fn test_show_tag_values_4() {
assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar WHERE time > 1337"), @r###"
Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2784,7 +2981,8 @@ mod test {
Sort: time ASC NULLS LAST [time:Timestamp(Nanosecond, None);N, value:Float64;N]
Projection: time, AVG(cpu.usage_idle) AS value [time:Timestamp(Nanosecond, None);N, value:Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
}
@ -2962,18 +3160,20 @@ mod test {
assert_snapshot!(plan("SELECT LAST(usage_idle) FROM cpu GROUP BY TIME(5s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
// aggregate query, grouping by time with gap filling
assert_snapshot!(plan("SELECT FIRST(usage_idle) FROM cpu GROUP BY TIME(5s) FILL(0)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time), Struct({value:Float64(0),time:TimestampNanosecond(0, None)})))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
// aggregate query, as we're specifying multiple selectors or aggregates
@ -3549,7 +3749,8 @@ mod test {
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
// supports offset parameter
@ -3557,7 +3758,8 @@ mod test {
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3567,9 +3769,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3592,9 +3795,9 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3617,9 +3820,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3628,9 +3832,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3639,9 +3844,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3650,9 +3856,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(0)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3661,9 +3868,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3673,9 +3881,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3685,9 +3894,10 @@ mod test {
assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
@ -3755,7 +3965,8 @@ mod test {
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###);
}
}

View File

@ -1,20 +1,8 @@
//! APIs for transforming InfluxQL [expressions][influxdb_influxql_parser::expression::Expr].
use crate::plan::error;
use datafusion::common::{Result, ScalarValue};
use datafusion::common::ScalarValue;
use datafusion::logical_expr::{lit, Expr as DFExpr};
use datafusion_util::AsExpr;
use influxdb_influxql_parser::expression::Expr;
use influxdb_influxql_parser::time_range::{duration_expr_to_nanoseconds, TimeRange};
type ExprResult = Result<DFExpr>;
/// Simplifies `expr` to an InfluxQL duration and returns a DataFusion interval.
///
/// Returns an error if `expr` is not a duration expression.
pub(super) fn expr_to_df_interval_dt(expr: &Expr) -> ExprResult {
let ns = duration_expr_to_nanoseconds(expr).map_err(error::map::expr_error)?;
Ok(lit(ScalarValue::new_interval_mdn(0, 0, ns)))
}
use influxdb_influxql_parser::time_range::TimeRange;
fn lower_bound_to_df_expr(v: Option<i64>) -> Option<DFExpr> {
v.map(|ts| {

View File

@ -1,7 +1,7 @@
use crate::plan::expr_type_evaluator::TypeEvaluator;
use crate::plan::field::{field_by_name, field_name};
use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap};
use crate::plan::ir::{DataSource, Field, Select, SelectQuery, TagSet};
use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery, TagSet};
use crate::plan::var_ref::{influx_type_to_var_ref_data_type, var_ref_data_type_to_influx_type};
use crate::plan::{error, util, SchemaProvider};
use datafusion::common::{DataFusionError, Result};
@ -19,7 +19,9 @@ use influxdb_influxql_parser::select::{
Dimension, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection,
SelectStatement,
};
use influxdb_influxql_parser::time_range::{split_cond, ReduceContext, TimeRange};
use influxdb_influxql_parser::time_range::{
duration_expr_to_nanoseconds, split_cond, ReduceContext, TimeRange,
};
use influxdb_influxql_parser::timestamp::Timestamp;
use itertools::Itertools;
use schema::InfluxColumnType;
@ -100,21 +102,35 @@ impl RewriteSelect {
let (fields, group_by) = self.expand_projection(s, stmt, &from, &tag_set)?;
let condition = self.condition_resolve_types(s, stmt, &from)?;
let now = Timestamp::from(s.execution_props().query_execution_start_time);
let rc = ReduceContext {
now: Some(now),
tz: stmt.timezone.map(|tz| *tz),
};
let interval = self.find_interval_offset(&rc, group_by.as_ref())?;
let (condition, time_range) = match condition {
Some(where_clause) => {
let rc = ReduceContext {
now: Some(Timestamp::from(
s.execution_props().query_execution_start_time,
)),
tz: stmt.timezone.map(|tz| *tz),
};
split_cond(&rc, &where_clause).map_err(error::map::expr_error)?
}
Some(where_clause) => split_cond(&rc, &where_clause).map_err(error::map::expr_error)?,
None => (None, TimeRange::default()),
};
let SelectStatementInfo { projection_type } =
select_statement_info(&fields, &group_by, stmt.fill)?;
// If the interval is non-zero and there is no upper bound, default to `now`
// for compatibility with InfluxQL OG.
//
// See: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L172-L179
let time_range = match (interval, time_range.upper) {
(Some(interval), None) if interval.duration > 0 => TimeRange {
lower: time_range.lower,
upper: Some(now.timestamp_nanos()),
},
_ => time_range,
};
let SelectStatementInfo {
projection_type,
extra_intervals,
} = select_statement_info(&fields, &group_by, stmt.fill)?;
// Following InfluxQL OG behaviour, if this is a subquery, and the fill strategy equates
// to `FILL(null)`, switch to `FILL(none)`.
@ -131,6 +147,8 @@ impl RewriteSelect {
Ok(Select {
projection_type,
interval,
extra_intervals,
fields,
from,
condition,
@ -388,6 +406,29 @@ impl RewriteSelect {
Ok(Some(where_clause))
}
}
/// Return the interval value of the `GROUP BY` clause if it specifies a `TIME`.
fn find_interval_offset(
&self,
ctx: &ReduceContext,
group_by: Option<&GroupByClause>,
) -> Result<Option<Interval>> {
Ok(
if let Some(td) = group_by.and_then(|v| v.time_dimension()) {
let duration = duration_expr_to_nanoseconds(ctx, &td.interval)
.map_err(error::map::expr_error)?;
let offset = td
.offset
.as_ref()
.map(|o| duration_expr_to_nanoseconds(ctx, o))
.transpose()
.map_err(error::map::expr_error)?;
Some(Interval { duration, offset })
} else {
None
},
)
}
}
/// Ensures the `time` column is presented consistently across all `SELECT` queries.
@ -865,12 +906,30 @@ macro_rules! lit_string {
};
}
/// Set the `extra_intervals` field of [`FieldChecker`] if it is
/// less than then proposed new value.
macro_rules! set_extra_intervals {
($SELF:expr, $NEW:expr) => {
if $SELF.extra_intervals < $NEW as usize {
$SELF.extra_intervals = $NEW as usize
}
};
}
/// Checks a number of expectations for the fields of a [`SelectStatement`].
#[derive(Default)]
struct FieldChecker {
/// `true` if the statement contains a `GROUP BY TIME` clause.
has_group_by_time: bool,
/// The number of additional intervals that must be read
/// for queries that group by time and use window functions such as
/// `DIFFERENCE` or `DERIVATIVE`. This ensures data for the first
/// window is available.
///
/// See: <https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L50>
extra_intervals: usize,
/// `true` if the interval was inherited by a parent.
/// If this is set, then an interval that was inherited will not cause
/// a query that shouldn't have an interval to fail.
@ -888,6 +947,9 @@ struct FieldChecker {
/// Accumulator for the number of aggregate or window expressions for the statement.
aggregate_count: usize,
/// Accumulator for the number of window expressions for the statement.
window_count: usize,
/// Accumulator for the number of selector expressions for the statement.
selector_count: usize,
}
@ -942,7 +1004,7 @@ impl FieldChecker {
// Validate we are using a selector or raw query if non-aggregate fields are projected.
if self.has_non_aggregate_fields {
if self.aggregate_count > 0 {
if self.window_aggregate_count() > 0 {
return error::query("mixing aggregate and non-aggregate columns is not supported");
} else if self.selector_count > 1 {
return error::query(
@ -954,26 +1016,37 @@ impl FieldChecker {
// By this point the statement is valid, so lets
// determine the projection type
if self.has_top_bottom {
Ok(ProjectionType::TopBottomSelector)
Ok(if self.has_top_bottom {
ProjectionType::TopBottomSelector
} else if self.has_group_by_time {
Ok(ProjectionType::Aggregate)
if self.window_count > 0 {
ProjectionType::WindowAggregate
} else {
ProjectionType::Aggregate
}
} else if self.has_distinct {
Ok(ProjectionType::RawDistinct)
ProjectionType::RawDistinct
} else if self.selector_count == 1 && self.aggregate_count == 0 {
Ok(ProjectionType::Selector {
ProjectionType::Selector {
has_fields: self.has_non_aggregate_fields,
})
}
} else if self.selector_count > 1 || self.aggregate_count > 0 {
Ok(ProjectionType::Aggregate)
ProjectionType::Aggregate
} else if self.window_count > 0 {
ProjectionType::Window
} else {
Ok(ProjectionType::Raw)
}
ProjectionType::Raw
})
}
/// The total number of functions observed.
fn function_count(&self) -> usize {
self.aggregate_count + self.selector_count
self.window_aggregate_count() + self.selector_count
}
/// The total number of window and aggregate functions observed.
fn window_aggregate_count(&self) -> usize {
self.aggregate_count + self.window_count
}
}
@ -1195,9 +1268,12 @@ impl FieldChecker {
}
fn check_derivative(&mut self, name: &str, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count();
self.inc_window_count();
check_exp_args!(name, 1, 2, args);
set_extra_intervals!(self, 1);
match args.get(1) {
Some(Expr::Literal(Literal::Duration(d))) if **d <= 0 => {
return error::query(format!("duration argument must be positive, got {d}"))
@ -1214,9 +1290,11 @@ impl FieldChecker {
}
fn check_elapsed(&mut self, name: &str, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count();
self.inc_window_count();
check_exp_args!(name, 1, 2, args);
set_extra_intervals!(self, 1);
match args.get(1) {
Some(Expr::Literal(Literal::Duration(d))) if **d <= 0 => {
return error::query(format!("duration argument must be positive, got {d}"))
@ -1233,9 +1311,11 @@ impl FieldChecker {
}
fn check_difference(&mut self, name: &str, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count();
self.inc_window_count();
check_exp_args!(name, 1, args);
set_extra_intervals!(self, 1);
self.check_nested_symbol(name, &args[0])
}
@ -1243,11 +1323,13 @@ impl FieldChecker {
self.inc_aggregate_count();
check_exp_args!("cumulative_sum", 1, args);
set_extra_intervals!(self, 1);
self.check_nested_symbol("cumulative_sum", &args[0])
}
fn check_moving_average(&mut self, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count();
self.inc_window_count();
check_exp_args!("moving_average", 2, args);
let v = lit_integer!("moving_average", args, 1);
@ -1257,11 +1339,13 @@ impl FieldChecker {
));
}
set_extra_intervals!(self, v);
self.check_nested_symbol("moving_average", &args[0])
}
fn check_exponential_moving_average(&mut self, name: &str, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count();
self.inc_window_count();
check_exp_args!(name, 2, 4, args);
let v = lit_integer!(name, args, 1);
@ -1269,6 +1353,8 @@ impl FieldChecker {
return error::query(format!("{name} period must be greater than 1, got {v}"));
}
set_extra_intervals!(self, v);
if let Some(v) = lit_integer!(name, args, 2?) {
match (v, name) {
(v, "triple_exponential_derivative") if v < 1 && v != -1 => {
@ -1299,7 +1385,7 @@ impl FieldChecker {
}
fn check_kaufmans(&mut self, name: &str, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count();
self.inc_window_count();
check_exp_args!(name, 2, 3, args);
let v = lit_integer!(name, args, 1);
@ -1307,6 +1393,8 @@ impl FieldChecker {
return error::query(format!("{name} period must be greater than 1, got {v}"));
}
set_extra_intervals!(self, v);
if let Some(v) = lit_integer!(name, args, 2?) {
if v < 0 && v != -1 {
return error::query(format!(
@ -1319,7 +1407,7 @@ impl FieldChecker {
}
fn check_chande_momentum_oscillator(&mut self, name: &str, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count();
self.inc_window_count();
check_exp_args!(name, 2, 4, args);
let v = lit_integer!(name, args, 1);
@ -1327,6 +1415,8 @@ impl FieldChecker {
return error::query(format!("{name} period must be greater than 1, got {v}"));
}
set_extra_intervals!(self, v);
if let Some(v) = lit_integer!(name, args, 2?) {
if v < 0 && v != -1 {
return error::query(format!(
@ -1401,11 +1491,16 @@ impl FieldChecker {
}
}
/// Increments the function call count
/// Increments the aggregate function call count
fn inc_aggregate_count(&mut self) {
self.aggregate_count += 1
}
/// Increments the window function call count
fn inc_window_count(&mut self) {
self.window_count += 1
}
fn inc_selector_count(&mut self) {
self.selector_count += 1
}
@ -1453,6 +1548,10 @@ pub(crate) enum ProjectionType {
/// A query that projects one or more aggregate functions or
/// two or more selector functions.
Aggregate,
/// A query that projects one or more window functions.
Window,
/// A query that projects a combination of window and nested aggregate functions.
WindowAggregate,
/// A query that projects a single selector function,
/// such as `last` or `first`.
Selector {
@ -1468,7 +1567,11 @@ pub(crate) enum ProjectionType {
#[derive(Default, Debug, Copy, Clone)]
struct SelectStatementInfo {
/// Identifies the projection type for the `SELECT` query.
pub projection_type: ProjectionType,
projection_type: ProjectionType,
/// Copied from [extra_intervals](FieldChecker::extra_intervals)
///
/// [See also](Select::extra_intervals).
extra_intervals: usize,
}
/// Gather information about the semantics of a [`SelectStatement`] and verify
@ -1518,8 +1621,14 @@ fn select_statement_info(
};
let projection_type = fc.check_fields(fields, fill)?;
let FieldChecker {
extra_intervals, ..
} = fc;
Ok(SelectStatementInfo { projection_type })
Ok(SelectStatementInfo {
projection_type,
extra_intervals,
})
}
#[cfg(test)]
@ -1591,6 +1700,22 @@ mod test {
ProjectionType::Selector { has_fields: false }
);
// updates extra_intervals
let info = select_statement_info(&parse_select("SELECT difference(foo) FROM cpu")).unwrap();
assert_matches!(info.projection_type, ProjectionType::Window);
assert_matches!(info.extra_intervals, 1);
// derives extra intervals from the window function
let info =
select_statement_info(&parse_select("SELECT moving_average(foo, 5) FROM cpu")).unwrap();
assert_matches!(info.projection_type, ProjectionType::Window);
assert_matches!(info.extra_intervals, 5);
// uses the maximum extra intervals
let info = select_statement_info(&parse_select(
"SELECT difference(foo), moving_average(foo, 4) FROM cpu",
))
.unwrap();
assert_matches!(info.extra_intervals, 4);
let info = select_statement_info(&parse_select("SELECT last(foo), bar FROM cpu")).unwrap();
assert_matches!(
info.projection_type,
@ -1610,6 +1735,12 @@ mod test {
let info = select_statement_info(&parse_select("SELECT count(foo) FROM cpu")).unwrap();
assert_matches!(info.projection_type, ProjectionType::Aggregate);
let info = select_statement_info(&parse_select(
"SELECT difference(count(foo)) FROM cpu GROUP BY TIME(10s)",
))
.unwrap();
assert_matches!(info.projection_type, ProjectionType::WindowAggregate);
let info = select_statement_info(&parse_select("SELECT top(foo, 3) FROM cpu")).unwrap();
assert_matches!(info.projection_type, ProjectionType::TopBottomSelector);
}
@ -2216,6 +2347,26 @@ mod test {
stmt.to_string(),
"SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu::tag, host::tag, region::tag"
);
//
// TIME
//
// Explicitly adds an upper bound for the time-range for aggregate queries
let stmt = parse_select("SELECT mean(usage_idle) FROM cpu WHERE time >= '2022-04-09T12:13:14Z' GROUP BY TIME(30s)");
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
assert_eq!(
stmt.to_string(),
"SELECT time::timestamp AS time, mean(usage_idle::float) AS mean FROM cpu WHERE time >= 1649506394000000000 AND time <= 1672531200000000000 GROUP BY TIME(30s)"
);
// Does not add an upper bound time range if already specified
let stmt = parse_select("SELECT mean(usage_idle) FROM cpu WHERE time >= '2022-04-09T12:13:14Z' AND time < '2022-04-10T12:00:00Z' GROUP BY TIME(30s)");
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
assert_eq!(
stmt.to_string(),
"SELECT time::timestamp AS time, mean(usage_idle::float) AS mean FROM cpu WHERE time >= 1649506394000000000 AND time <= 1649591999999999999 GROUP BY TIME(30s)"
);
}
/// Uncategorized fallible cases

View File

@ -225,6 +225,7 @@ pub async fn create_compactor_server_type(
partition_timeout: Duration::from_secs(compactor_config.partition_timeout_secs),
partitions_source,
shadow_mode: compactor_config.shadow_mode,
enable_scratchpad: compactor_config.enable_scratchpad,
ignore_partition_skip_marker: compactor_config.ignore_partition_skip_marker,
shard_config,
min_num_l1_files_to_compact: compactor_config.min_num_l1_files_to_compact,

View File

@ -83,7 +83,7 @@ impl ParquetChunk {
///
/// See [`ParquetExecInput`] for more information.
///
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
/// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
pub fn parquet_exec_input(&self) -> ParquetExecInput {
let path: ParquetFilePath = self.parquet_file.as_ref().into();
self.store.parquet_exec_input(&path, self.file_size_bytes())

View File

@ -13,13 +13,14 @@ use arrow::{
use bytes::Bytes;
use data_types::PartitionId;
use datafusion::{
datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl},
datasource::{
listing::PartitionedFile,
object_store::ObjectStoreUrl,
physical_plan::{FileScanConfig, ParquetExec},
},
error::DataFusionError,
execution::memory_pool::MemoryPool,
physical_plan::{
file_format::{FileScanConfig, ParquetExec},
ExecutionPlan, SendableRecordBatchStream, Statistics,
},
physical_plan::{ExecutionPlan, SendableRecordBatchStream, Statistics},
prelude::SessionContext,
};
use datafusion_util::config::{iox_session_config, register_iox_object_store};
@ -93,7 +94,7 @@ impl std::fmt::Display for StorageId {
/// The files shall be grouped by [`object_store_url`](Self::object_store_url). For each each object store, you shall
/// create one [`ParquetExec`] and put each file into its own "file group".
///
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
/// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
#[derive(Debug, Clone)]
pub struct ParquetExecInput {
/// Store where the file is located.
@ -286,7 +287,7 @@ impl ParquetStorage {
///
/// See [`ParquetExecInput`] for more information.
///
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
/// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
pub fn parquet_exec_input(&self, path: &ParquetFilePath, file_size: usize) -> ParquetExecInput {
ParquetExecInput {
object_store_url: ObjectStoreUrl::parse(format!("iox://{}/", self.id))

View File

@ -23,16 +23,13 @@ use datafusion::{
file_format::{parquet::ParquetFormat, FileFormat},
listing::PartitionedFile,
object_store::ObjectStoreUrl,
physical_plan::{FileScanConfig, ParquetExec},
},
execution::{
context::{SessionState, TaskContext},
runtime_env::RuntimeEnv,
},
physical_plan::{
execute_stream,
file_format::{FileScanConfig, ParquetExec},
SendableRecordBatchStream, Statistics,
},
physical_plan::{execute_stream, SendableRecordBatchStream, Statistics},
prelude::SessionContext,
};
use datafusion_util::config::{iox_session_config, register_iox_object_store};

View File

@ -11,7 +11,11 @@ use cache_system::{
loader::{metrics::MetricsLoader, FunctionLoader},
resource_consumption::FunctionEstimator,
};
use data_types::{ColumnId, PartitionId};
use data_types::{
partition_template::{build_column_values, ColumnValue},
ColumnId, Partition, PartitionId,
};
use datafusion::scalar::ScalarValue;
use iox_catalog::interface::Catalog;
use iox_time::TimeProvider;
use schema::sort::SortKey;
@ -70,11 +74,7 @@ impl PartitionCache {
.await
.expect("retry forever")?;
let sort_key = partition.sort_key().map(|sort_key| {
Arc::new(PartitionSortKey::new(sort_key, &extra.column_id_map_rev))
});
Some(CachedPartition { sort_key })
Some(CachedPartition::new(partition, &extra))
}
});
let loader = Arc::new(MetricsLoader::new(
@ -144,14 +144,118 @@ impl PartitionCache {
.await
.and_then(|p| p.sort_key)
}
/// Get known column ranges.
#[allow(dead_code)]
pub async fn column_ranges(
&self,
cached_table: Arc<CachedTable>,
partition_id: PartitionId,
span: Option<Span>,
) -> Option<ColumnRanges> {
self.cache
.get(partition_id, (cached_table, span))
.await
.map(|p| p.column_ranges)
}
}
/// Represent known min/max values for a specific column.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ColumnRange {
pub min_value: Arc<ScalarValue>,
pub max_value: Arc<ScalarValue>,
}
/// Represents the known min/max values for a subset (not all) of the columns in a partition.
///
/// The values may not actually in any row.
///
/// These ranges apply to ALL rows (esp. in ALL files and ingester chunks) within in given partition.
pub type ColumnRanges = Arc<HashMap<Arc<str>, ColumnRange>>;
#[derive(Debug, Clone)]
struct CachedPartition {
sort_key: Option<Arc<PartitionSortKey>>,
column_ranges: ColumnRanges,
}
impl CachedPartition {
fn new(partition: Partition, table: &CachedTable) -> Self {
let sort_key = partition
.sort_key()
.map(|sort_key| Arc::new(PartitionSortKey::new(sort_key, &table.column_id_map_rev)));
let mut column_ranges =
build_column_values(&table.partition_template, partition.partition_key.inner())
.filter_map(|(col, val)| {
// resolve column name to already existing Arc for cheaper storage
let col = Arc::clone(table.column_id_map_rev.get_key_value(col)?.0);
let range = match val {
ColumnValue::Identity(s) => {
let s = Arc::new(ScalarValue::from(s.as_ref()));
ColumnRange {
min_value: Arc::clone(&s),
max_value: s,
}
}
ColumnValue::Prefix(p) => {
if p.is_empty() {
// full range => value is useless
return None;
}
// If the partition only has a prefix of the tag value (it was truncated) then form a conservative
// range:
//
//
// # Minimum
// Use the prefix itself.
//
// Note that the minimum is inclusive.
//
// All values in the partition are either:
// - identical to the prefix, in which case they are included by the inclusive minimum
// - have the form `"<prefix><s>"`, and it holds that `"<prefix><s>" > "<prefix>"` for all
// strings `"<s>"`.
//
//
// # Maximum
// Use `"<prefix_excluding_last_char><char::max>"`.
//
// Note that the maximum is inclusive.
//
// All strings in this partition must be smaller than this constructed maximum, because
// string comparison is front-to-back and the `"<prefix_excluding_last_char><char::max>" > "<prefix>"`.
let min_value = Arc::new(ScalarValue::from(p.as_ref()));
let mut chars = p.as_ref().chars().collect::<Vec<_>>();
*chars.last_mut().expect("checked that prefix is not empty") =
std::char::MAX;
let max_value = Arc::new(ScalarValue::from(
chars.into_iter().collect::<String>().as_str(),
));
ColumnRange {
min_value,
max_value,
}
}
};
Some((col, range))
})
.collect::<HashMap<_, _>>();
column_ranges.shrink_to_fit();
Self {
sort_key,
column_ranges: Arc::new(column_ranges),
}
}
/// RAM-bytes EXCLUDING `self`.
fn size(&self) -> usize {
// Arc content
@ -159,6 +263,13 @@ impl CachedPartition {
.as_ref()
.map(|sk| sk.size())
.unwrap_or_default()
+ std::mem::size_of::<HashMap<Arc<str>, ColumnRange>>()
+ (self.column_ranges.capacity() * std::mem::size_of::<(Arc<str>, ColumnRange)>())
+ self
.column_ranges
.iter()
.map(|(col, range)| col.len() + range.min_value.size() + range.max_value.size())
.sum::<usize>()
}
}
@ -206,6 +317,9 @@ mod tests {
use super::*;
use crate::cache::{ram::test_util::test_ram_pool, test_util::assert_histogram_metric_count};
use data_types::{partition_template::TablePartitionTemplateOverride, ColumnType};
use generated_types::influxdata::iox::partition_template::v1::{
template_part::Part, PartitionTemplate, TemplatePart,
};
use iox_tests::TestCatalog;
use schema::{Schema, SchemaBuilder};
@ -294,6 +408,198 @@ mod tests {
}
}
#[tokio::test]
async fn test_column_ranges() {
let catalog = TestCatalog::new();
let ns = catalog.create_namespace_1hr_retention("ns").await;
let t = ns
.create_table_with_partition_template(
"table",
Some(PartitionTemplate {
parts: vec![
TemplatePart {
part: Some(Part::TagValue(String::from("tag2"))),
},
TemplatePart {
part: Some(Part::TagValue(String::from("tag1"))),
},
],
}),
)
.await;
let c1 = t.create_column("tag1", ColumnType::Tag).await;
let c2 = t.create_column("tag2", ColumnType::Tag).await;
let c3 = t.create_column("tag3", ColumnType::Tag).await;
let c4 = t.create_column("time", ColumnType::Time).await;
// See `data_types::partition_template` for the template language.
// Two normal values.
let p1 = t.create_partition("v1|v2").await.partition.clone();
// 2nd part is NULL
let p2 = t.create_partition("v1|!").await.partition.clone();
// 2nd part is empty
let p3 = t.create_partition("v1|^").await.partition.clone();
// 2nd part is truncated (i.e. the original value was longer)
let p4 = t.create_partition("v1|v2#").await.partition.clone();
// 2nd part is truncated to empty string
let p5 = t.create_partition("v1|#").await.partition.clone();
let cached_table = Arc::new(CachedTable {
id: t.table.id,
schema: schema(),
column_id_map: HashMap::from([
(c1.column.id, Arc::from(c1.column.name.clone())),
(c2.column.id, Arc::from(c2.column.name.clone())),
(c3.column.id, Arc::from(c3.column.name.clone())),
(c4.column.id, Arc::from(c4.column.name.clone())),
]),
column_id_map_rev: HashMap::from([
(Arc::from(c1.column.name.clone()), c1.column.id),
(Arc::from(c2.column.name.clone()), c2.column.id),
(Arc::from(c3.column.name.clone()), c3.column.id),
(Arc::from(c4.column.name.clone()), c4.column.id),
]),
primary_key_column_ids: [c1.column.id, c2.column.id, c3.column.id, c4.column.id].into(),
partition_template: t.table.partition_template.clone(),
});
let cache = PartitionCache::new(
catalog.catalog(),
BackoffConfig::default(),
catalog.time_provider(),
&catalog.metric_registry(),
test_ram_pool(),
true,
);
let ranges1a = cache
.column_ranges(Arc::clone(&cached_table), p1.id, None)
.await
.unwrap();
assert_eq!(
ranges1a.as_ref(),
&HashMap::from([
(
Arc::from("tag1"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("v2")),
max_value: Arc::new(ScalarValue::from("v2"))
}
),
(
Arc::from("tag2"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("v1")),
max_value: Arc::new(ScalarValue::from("v1"))
}
),
]),
);
assert!(Arc::ptr_eq(
&ranges1a.get("tag1").unwrap().min_value,
&ranges1a.get("tag1").unwrap().max_value,
));
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
let ranges2 = cache
.column_ranges(Arc::clone(&cached_table), p2.id, None)
.await
.unwrap();
assert_eq!(
ranges2.as_ref(),
&HashMap::from([(
Arc::from("tag2"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("v1")),
max_value: Arc::new(ScalarValue::from("v1"))
}
),]),
);
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
let ranges3 = cache
.column_ranges(Arc::clone(&cached_table), p3.id, None)
.await
.unwrap();
assert_eq!(
ranges3.as_ref(),
&HashMap::from([
(
Arc::from("tag1"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("")),
max_value: Arc::new(ScalarValue::from(""))
}
),
(
Arc::from("tag2"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("v1")),
max_value: Arc::new(ScalarValue::from("v1"))
}
),
]),
);
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
let ranges4 = cache
.column_ranges(Arc::clone(&cached_table), p4.id, None)
.await
.unwrap();
assert_eq!(
ranges4.as_ref(),
&HashMap::from([
(
Arc::from("tag1"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("v2")),
max_value: Arc::new(ScalarValue::from("v\u{10FFFF}"))
}
),
(
Arc::from("tag2"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("v1")),
max_value: Arc::new(ScalarValue::from("v1"))
}
),
]),
);
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
let ranges5 = cache
.column_ranges(Arc::clone(&cached_table), p5.id, None)
.await
.unwrap();
assert_eq!(
ranges5.as_ref(),
&HashMap::from([(
Arc::from("tag2"),
ColumnRange {
min_value: Arc::new(ScalarValue::from("v1")),
max_value: Arc::new(ScalarValue::from("v1"))
}
),]),
);
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
let ranges1b = cache
.column_ranges(Arc::clone(&cached_table), p1.id, None)
.await
.unwrap();
assert!(Arc::ptr_eq(&ranges1a, &ranges1b));
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
// non-existing partition
for _ in 0..2 {
let res = cache
.column_ranges(Arc::clone(&cached_table), PartitionId::new(i64::MAX), None)
.await;
assert_eq!(res, None);
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 6);
}
}
#[tokio::test]
async fn test_cache_sharing() {
let catalog = TestCatalog::new();
@ -336,13 +642,22 @@ mod tests {
cache
.sort_key(Arc::clone(&cached_table), p3.id, &Vec::new(), None)
.await;
cache
.column_ranges(Arc::clone(&cached_table), p3.id, None)
.await;
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
cache
.sort_key(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
.await;
cache
.column_ranges(Arc::clone(&cached_table), p2.id, None)
.await;
assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
cache
.column_ranges(Arc::clone(&cached_table), p1.id, None)
.await;
cache
.sort_key(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
.await;

View File

@ -9,7 +9,7 @@ use crate::{
use async_trait::async_trait;
use data_types::NamespaceId;
use datafusion::{
catalog::{catalog::CatalogProvider, schema::SchemaProvider},
catalog::{schema::SchemaProvider, CatalogProvider},
datasource::TableProvider,
error::DataFusionError,
};

View File

@ -24,8 +24,8 @@ use arrow::datatypes::{DataType, TimeUnit};
use datafusion::{
error::DataFusionError,
logical_expr::{
function, BuiltinScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation,
ScalarUDF, Signature, TypeSignature, Volatility,
BuiltinScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF,
Signature, TypeSignature, Volatility,
},
};
use once_cell::sync::Lazy;
@ -41,7 +41,7 @@ pub const DATE_BIN_GAPFILL_UDF_NAME: &str = "date_bin_gapfill";
pub(crate) static DATE_BIN_GAPFILL: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
// DATE_BIN_GAPFILL should have the same signature as DATE_BIN,
// so that just adding _GAPFILL can turn a query into a gap-filling query.
let mut signatures = function::signature(&BuiltinScalarFunction::DateBin);
let mut signatures = BuiltinScalarFunction::DateBin.signature();
// We don't want this to be optimized away before we can give a helpful error message
signatures.volatility = Volatility::Volatile;

View File

@ -30,9 +30,9 @@ bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
crossbeam-utils = { version = "0.8" }
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
digest = { version = "0.10", features = ["mac", "std"] }
either = { version = "1" }
fixedbitset = { version = "0.4" }
@ -46,7 +46,7 @@ futures-sink = { version = "0.3" }
futures-task = { version = "0.3", default-features = false, features = ["std"] }
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
getrandom = { version = "0.2", default-features = false, features = ["std"] }
hashbrown = { version = "0.13", features = ["raw"] }
hashbrown = { version = "0.14", features = ["raw"] }
indexmap = { version = "1", default-features = false, features = ["std"] }
itertools = { version = "0.10" }
libc = { version = "0.2", features = ["extra_traits"] }
@ -120,7 +120,7 @@ futures-sink = { version = "0.3" }
futures-task = { version = "0.3", default-features = false, features = ["std"] }
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
getrandom = { version = "0.2", default-features = false, features = ["std"] }
hashbrown = { version = "0.13", features = ["raw"] }
hashbrown = { version = "0.14", features = ["raw"] }
heck = { version = "0.4", features = ["unicode"] }
indexmap = { version = "1", default-features = false, features = ["std"] }
itertools = { version = "0.10" }
@ -205,15 +205,13 @@ rustls = { version = "0.21", features = ["dangerous_configuration"] }
scopeguard = { version = "1" }
webpki = { version = "0.22", default-features = false, features = ["std"] }
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
[target.x86_64-pc-windows-msvc.build-dependencies]
once_cell = { version = "1", default-features = false, features = ["unstable"] }
scopeguard = { version = "1" }
webpki = { version = "0.22", default-features = false, features = ["std"] }
winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
### END HAKARI SECTION