Merge branch 'main' into dom/perf-sparse-reup

2023-06-16 15:55:21 +01:00 · 2023-06-16 15:55:21 +01:00 · 27977299ad
parent 7b51aed69a a21596f604
commit 27977299ad
37 changed files with 1398 additions and 333 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -43,9 +43,9 @@ dependencies = [

 [[package]]
 name = "aho-corasick"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"
+checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
 dependencies = [
 "memchr",
 ]
@ -67,9 +67,9 @@ dependencies = [

 [[package]]
 name = "allocator-api2"
-version = "0.2.14"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e"
+checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9"

 [[package]]
 name = "android-tzdata"
@ -155,9 +155,9 @@ checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"

 [[package]]
 name = "arrayvec"
-version = "0.7.2"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
+checksum = "8868f09ff8cea88b079da74ae569d9b8c62a23c68c746240b704ee6f7525c89c"

 [[package]]
 name = "arrow"
@ -494,7 +494,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -505,7 +505,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -644,9 +644,9 @@ dependencies = [

 [[package]]
 name = "blake3"
-version = "1.3.3"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef"
+checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888"
 dependencies = [
 "arrayref",
 "arrayvec",
@ -688,9 +688,9 @@ dependencies = [

 [[package]]
 name = "bstr"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09"
+checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"
 dependencies = [
 "memchr",
 "once_cell",
@ -700,9 +700,9 @@ dependencies = [

 [[package]]
 name = "bumpalo"
-version = "3.12.2"
+version = "3.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"
+checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"

 [[package]]
 name = "bytemuck"
@ -902,7 +902,7 @@ dependencies = [
 "heck",
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -1036,9 +1036,9 @@ dependencies = [

 [[package]]
 name = "console"
-version = "0.15.6"
+version = "0.15.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0525278dce688103060006713371cedbad27186c7d913f33d866b498da0f595"
+checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
 dependencies = [
 "encode_unicode",
 "lazy_static",
@ -1107,9 +1107,9 @@ dependencies = [

 [[package]]
 name = "constant_time_eq"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b"
+checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6"

 [[package]]
 name = "core-foundation-sys"
@ -1128,9 +1128,9 @@ dependencies = [

 [[package]]
 name = "cpufeatures"
-version = "0.2.7"
+version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58"
+checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c"
 dependencies = [
 "libc",
 ]
@ -1238,14 +1238,14 @@ dependencies = [

 [[package]]
 name = "crossbeam-epoch"
-version = "0.9.14"
+version = "0.9.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
+checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
 dependencies = [
 "autocfg",
 "cfg-if",
 "crossbeam-utils",
- "memoffset 0.8.0",
+ "memoffset 0.9.0",
 "scopeguard",
 ]

@ -1286,9 +1286,9 @@ dependencies = [

 [[package]]
 name = "csv"
-version = "1.2.1"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad"
+checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
 dependencies = [
 "csv-core",
 "itoa",
@ -1325,7 +1325,7 @@ dependencies = [
 "hashbrown 0.12.3",
 "lock_api",
 "once_cell",
- "parking_lot_core 0.9.7",
+ "parking_lot_core 0.9.8",
 ]

 [[package]]
@ -1355,7 +1355,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "ahash 0.8.3",
 "arrow",
@ -1377,7 +1377,7 @@ dependencies = [
 "flate2",
 "futures",
 "glob",
- "hashbrown 0.13.2",
+ "hashbrown 0.14.0",
 "indexmap",
 "itertools",
 "lazy_static",
@ -1393,7 +1393,6 @@ dependencies = [
 "sqlparser",
 "tempfile",
 "tokio",
- "tokio-stream",
 "tokio-util",
 "url",
 "uuid",
@ -1404,7 +1403,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "arrow",
 "arrow-array",
@ -1418,12 +1417,12 @@ dependencies = [
 [[package]]
 name = "datafusion-execution"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "dashmap",
 "datafusion-common",
 "datafusion-expr",
- "hashbrown 0.13.2",
+ "hashbrown 0.14.0",
 "log",
 "object_store",
 "parking_lot 0.12.1",
@ -1435,7 +1434,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "ahash 0.8.3",
 "arrow",
@ -1449,7 +1448,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "arrow",
 "async-trait",
@ -1457,7 +1456,7 @@ dependencies = [
 "datafusion-common",
 "datafusion-expr",
 "datafusion-physical-expr",
- "hashbrown 0.13.2",
+ "hashbrown 0.14.0",
 "itertools",
 "log",
 "regex-syntax 0.7.2",
@ -1466,7 +1465,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "ahash 0.8.3",
 "arrow",
@ -1480,7 +1479,7 @@ dependencies = [
 "datafusion-expr",
 "datafusion-row",
 "half 2.2.1",
- "hashbrown 0.13.2",
+ "hashbrown 0.14.0",
 "indexmap",
 "itertools",
 "lazy_static",
@ -1498,7 +1497,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "arrow",
 "chrono",
@ -1512,7 +1511,7 @@ dependencies = [
 [[package]]
 name = "datafusion-row"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "arrow",
 "datafusion-common",
@ -1523,7 +1522,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "26.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=91e75d7e6303c1a7331e8e90eaad9b095ace929b#91e75d7e6303c1a7331e8e90eaad9b095ace929b"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=84e49771b7403b3d313d8493b61d2d58dcdd7514#84e49771b7403b3d313d8493b61d2d58dcdd7514"
 dependencies = [
 "arrow",
 "arrow-schema",
@ -1891,7 +1890,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -1994,9 +1993,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.27.2"
+version = "0.27.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
+checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"

 [[package]]
 name = "glob"
@ -2112,9 +2111,6 @@ name = "hashbrown"
 version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
-dependencies = [
- "ahash 0.8.3",
-]

 [[package]]
 name = "hashbrown"
@ -2128,11 +2124,11 @@ dependencies = [

 [[package]]
 name = "hashlink"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0761a1b9491c4f2e3d66aa0f62d0fba0af9a0e2852e4d48ea506632a4b56e6aa"
+checksum = "312f66718a2d7789ffef4f4b7b213138ed9f1eb3aa1d0d82fc99f88fb3ffd26f"
 dependencies = [
- "hashbrown 0.13.2",
+ "hashbrown 0.14.0",
 ]

 [[package]]
@ -2300,7 +2296,7 @@ dependencies = [
 "hyper",
 "rustls 0.21.2",
 "tokio",
- "tokio-rustls 0.24.0",
+ "tokio-rustls 0.24.1",
 ]

 [[package]]
@ -2317,9 +2313,9 @@ dependencies = [

 [[package]]
 name = "iana-time-zone"
-version = "0.1.56"
+version = "0.1.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c"
+checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
 dependencies = [
 "android_system_properties",
 "core-foundation-sys",
@ -3134,9 +3130,9 @@ dependencies = [

 [[package]]
 name = "js-sys"
-version = "0.3.63"
+version = "0.3.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790"
+checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
 dependencies = [
 "wasm-bindgen",
 ]
@ -3242,9 +3238,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"

 [[package]]
 name = "linux-raw-sys"
-version = "0.3.7"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
+checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"

 [[package]]
 name = "lock_api"
@ -3355,9 +3351,9 @@ dependencies = [

 [[package]]
 name = "memoffset"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
+checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
 dependencies = [
 "autocfg",
 ]
@ -3413,14 +3409,13 @@ dependencies = [

 [[package]]
 name = "mio"
-version = "0.8.6"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9"
+checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
 dependencies = [
 "libc",
- "log",
 "wasi",
- "windows-sys 0.45.0",
+ "windows-sys 0.48.0",
 ]

 [[package]]
@ -3667,9 +3662,9 @@ dependencies = [

 [[package]]
 name = "object"
-version = "0.30.3"
+version = "0.30.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439"
+checksum = "03b4680b86d9cfafba8fc491dc9b6df26b68cf40e9e6cd73909194759a63c385"
 dependencies = [
 "memchr",
 ]
@ -3734,7 +3729,7 @@ version = "1.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
 dependencies = [
- "parking_lot_core 0.9.7",
+ "parking_lot_core 0.9.8",
 ]

 [[package]]
@ -3804,7 +3799,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
 dependencies = [
 "lock_api",
- "parking_lot_core 0.9.7",
+ "parking_lot_core 0.9.8",
 ]

 [[package]]
@ -3823,15 +3818,15 @@ dependencies = [

 [[package]]
 name = "parking_lot_core"
-version = "0.9.7"
+version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
+checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
 dependencies = [
 "cfg-if",
 "libc",
- "redox_syscall 0.2.16",
+ "redox_syscall 0.3.5",
 "smallvec",
- "windows-sys 0.45.0",
+ "windows-targets 0.48.0",
 ]

 [[package]]
@ -4013,7 +4008,7 @@ dependencies = [
 "pest_meta",
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -4092,7 +4087,7 @@ checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -4225,9 +4220,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"

 [[package]]
 name = "proc-macro2"
-version = "1.0.58"
+version = "1.0.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8"
+checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
 dependencies = [
 "unicode-ident",
 ]
@ -4417,9 +4412,9 @@ dependencies = [

 [[package]]
 name = "quote"
-version = "1.0.27"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
+checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
 dependencies = [
 "proc-macro2",
 ]
@ -4575,7 +4570,7 @@ dependencies = [
 "serde_json",
 "serde_urlencoded",
 "tokio",
- "tokio-rustls 0.24.0",
+ "tokio-rustls 0.24.1",
 "tokio-util",
 "tower-service",
 "url",
@ -4836,7 +4831,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -5060,9 +5055,9 @@ dependencies = [

 [[package]]
 name = "sha2"
-version = "0.10.6"
+version = "0.10.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"
+checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
 dependencies = [
 "cfg-if",
 "cpufeatures",
@ -5389,9 +5384,9 @@ dependencies = [

 [[package]]
 name = "subtle"
-version = "2.4.1"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
+checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"

 [[package]]
 name = "symbolic-common"
@ -5429,9 +5424,9 @@ dependencies = [

 [[package]]
 name = "syn"
-version = "2.0.16"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01"
+checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
 dependencies = [
 "proc-macro2",
 "quote",
@ -5540,7 +5535,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -5668,7 +5663,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -5684,9 +5679,9 @@ dependencies = [

 [[package]]
 name = "tokio-rustls"
-version = "0.24.0"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5"
+checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
 dependencies = [
 "rustls 0.21.2",
 "tokio",
@ -5741,9 +5736,9 @@ dependencies = [

 [[package]]
 name = "toml_edit"
-version = "0.19.9"
+version = "0.19.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92d964908cec0d030b812013af25a0e57fddfadb1e066ecc6681d86253129d4f"
+checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
 dependencies = [
 "indexmap",
 "serde",
@ -5775,13 +5770,13 @@ dependencies = [
 "prost",
 "rustls-pemfile",
 "tokio",
- "tokio-rustls 0.24.0",
+ "tokio-rustls 0.24.1",
 "tokio-stream",
 "tower",
 "tower-layer",
 "tower-service",
 "tracing",
- "webpki-roots 0.23.0",
+ "webpki-roots 0.23.1",
 ]

 [[package]]
@ -5942,7 +5937,7 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 ]

 [[package]]
@ -6073,9 +6068,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"

 [[package]]
 name = "unicode-ident"
-version = "1.0.8"
+version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
+checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"

 [[package]]
 name = "unicode-normalization"
@ -6219,11 +6214,10 @@ dependencies = [

 [[package]]
 name = "want"
-version = "0.3.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
 dependencies = [
- "log",
 "try-lock",
 ]

@ -6235,9 +6229,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"

 [[package]]
 name = "wasm-bindgen"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
+checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
 dependencies = [
 "cfg-if",
 "wasm-bindgen-macro",
@ -6245,24 +6239,24 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
+checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
 dependencies = [
 "bumpalo",
 "log",
 "once_cell",
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 "wasm-bindgen-shared",
 ]

 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.36"
+version = "0.4.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e"
+checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
 dependencies = [
 "cfg-if",
 "js-sys",
@ -6272,9 +6266,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
+checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@ -6282,22 +6276,22 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
+checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.16",
+ "syn 2.0.18",
 "wasm-bindgen-backend",
 "wasm-bindgen-shared",
 ]

 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
+checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"

 [[package]]
 name = "wasm-streams"
@ -6314,9 +6308,9 @@ dependencies = [

 [[package]]
 name = "web-sys"
-version = "0.3.63"
+version = "0.3.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2"
+checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
 dependencies = [
 "js-sys",
 "wasm-bindgen",
@ -6343,9 +6337,9 @@ dependencies = [

 [[package]]
 name = "webpki-roots"
-version = "0.23.0"
+version = "0.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125"
+checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
 dependencies = [
 "rustls-webpki",
 ]
@ -6545,9 +6539,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"

 [[package]]
 name = "winnow"
-version = "0.4.6"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
+checksum = "ca0ace3845f0d96209f0375e6d367e3eb87eb65d27d445bdc9f1843a26f39448"
 dependencies = [
 "memchr",
 ]
@ -6596,7 +6590,7 @@ dependencies = [
 "futures-task",
 "futures-util",
 "getrandom",
- "hashbrown 0.13.2",
+ "hashbrown 0.14.0",
 "heck",
 "indexmap",
 "io-lifetimes",
@ -6639,7 +6633,7 @@ dependencies = [
 "sqlx-macros",
 "strum",
 "syn 1.0.109",
- "syn 2.0.16",
+ "syn 2.0.18",
 "thrift",
 "tokio",
 "tokio-stream",
@ -6656,7 +6650,6 @@ dependencies = [
 "uuid",
 "webpki",
 "winapi",
- "windows-sys 0.45.0",
 "windows-sys 0.48.0",
 "zstd",
 "zstd-safe",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -118,8 +118,8 @@ license = "MIT OR Apache-2.0"
 [workspace.dependencies]
 arrow = { version = "41.0.0" }
 arrow-flight = { version = "41.0.0" }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false }
-datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b" }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false }
+datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514" }
 hashbrown = { version = "0.14.0" }
 object_store = { version = "0.6.0" }
 parquet = { version = "41.0.0" }
--- a/clap_blocks/src/compactor.rs
+++ b/clap_blocks/src/compactor.rs
@ -206,6 +206,19 @@ pub struct CompactorConfig {
    )]
    pub shadow_mode: bool,

+    /// Enable scratchpad.
+    ///
+    /// This allows disabling the scratchpad in production.
+    ///
+    /// Disabling this is useful for testing performance and memory consequences of the scratchpad.
+    #[clap(
+        long = "compaction-enable-scratchpad",
+        env = "INFLUXDB_IOX_COMPACTION_ENABLE_SCRATCHPAD",
+        default_value = "true",
+        action
+    )]
+    pub enable_scratchpad: bool,
+
    /// Ignores "partition marked w/ error and shall be skipped" entries in the catalog.
    ///
    /// This is mostly useful for debugging.
--- a/compactor/src/components/hardcoded.rs
+++ b/compactor/src/components/hardcoded.rs
@ -402,7 +402,7 @@ fn make_parquet_files_sink(config: &Config) -> Arc<dyn ParquetFilesSink> {
 }

 fn make_scratchpad_gen(config: &Config) -> Arc<dyn ScratchpadGen> {
-    if config.simulate_without_object_store {
+    if config.simulate_without_object_store || !config.enable_scratchpad {
        Arc::new(NoopScratchpadGen::new())
    } else {
        let scratchpad_store_output = if config.shadow_mode {
--- a/compactor/src/components/report.rs
+++ b/compactor/src/components/report.rs
@ -28,6 +28,7 @@ pub fn log_config(config: &Config) {
        partition_timeout,
        partitions_source,
        shadow_mode,
+        enable_scratchpad,
        ignore_partition_skip_marker,
        shard_config,
        min_num_l1_files_to_compact,
@ -73,6 +74,7 @@ pub fn log_config(config: &Config) {
        partition_timeout_secs=partition_timeout.as_secs_f32(),
        %partitions_source,
        shadow_mode,
+        enable_scratchpad,
        ignore_partition_skip_marker,
        ?shard_cfg_n_shards,
        ?shard_cfg_shard_id,
--- a/compactor/src/config.rs
+++ b/compactor/src/config.rs
@ -91,6 +91,13 @@ pub struct Config {
    /// This is mostly useful for debugging.
    pub shadow_mode: bool,

+    /// Enable Scratchpad
+    ///
+    /// Enabled by default, if this is set to false, the compactor will not use the scratchpad
+    ///
+    /// This is useful for disabling the scratchpad in production to evaluate the performance & memory impacts.
+    pub enable_scratchpad: bool,
+
    /// Ignores "partition marked w/ error and shall be skipped" entries in the catalog.
    ///
    /// This is mostly useful for debugging.
--- a/compactor/tests/layouts/accumulated_size.rs
+++ b/compactor/tests/layouts/accumulated_size.rs
@ -0,0 +1,351 @@
+//! layout tests related to the size L1/L2 files achieve when the L0 size is small.
+//!
+//! The intent of these tests is to ensure that when L0s are arriving in a normal/leading edge pattern,
+//! even if they're quite small (10KB) the L1 & L2 files should still be accumulated to a reasonable size.
+//!
+//! Accumulating large L1/L2 is generally easier when cleaning up a backlogged partition with many L0s,
+//! so these test try to mimic the more challenging scenario of a steady stream of small L0s.
+//! The steady stream of L0s can be partially simulated by setting the max files per plan to a small number,
+//! and putting just a few files in the test case.
+
+use data_types::CompactionLevel;
+use iox_time::Time;
+
+use crate::layouts::{layout_setup_builder, parquet_builder, run_layout_scenario, ONE_MB};
+
+const MAX_DESIRED_FILE_SIZE: u64 = 100 * ONE_MB;
+
+// Mimic small L0 files trickling when they overlap in time (by a minor amount, as is common)
+// In this case, all L1 and L0 files can fit in a single compaction run.
+#[tokio::test]
+async fn small_l1_plus_overlapping_l0s_single_run() {
+    test_helpers::maybe_start_logging();
+
+    let setup = layout_setup_builder()
+        .await
+        .with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
+        .with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
+        .build()
+        .await;
+
+    let size = 10 * 1024;
+
+    // Create 1 L1 file that mimics the output from a previous L0 compaction
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(0)
+                .with_max_time(10)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(11))
+                .with_file_size_bytes(size * 4_u64),
+        )
+        .await;
+
+    // Create 3 L0 files, slighly overlapping in time.
+    // note the first L0 slightly overlaps the L1, as would happen if this slightly overlapping pattern occured
+    // in the files that (we're pretending) were compacted into that L1.
+    for i in 1..=3 {
+        setup
+            .partition
+            .create_parquet_file(
+                parquet_builder()
+                    .with_min_time(i * 10)
+                    .with_max_time((i + 1) * 10)
+                    .with_compaction_level(CompactionLevel::Initial)
+                    .with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
+                    .with_file_size_bytes(size),
+            )
+            .await;
+    }
+
+    // Required behavior:
+    //    1. (achieved) all files compacted to a single L1 file
+    // Desired behavior:
+    //    1. (achieved) only one compaction is performed to compact them
+    insta::assert_yaml_snapshot!(
+        run_layout_scenario(&setup).await,
+        @r###"
+    ---
+    - "**** Input Files "
+    - "L0                                                                                                                 "
+    - "L0.2[10,20] 21ns 10kb                          |--------L0.2--------|                                              "
+    - "L0.3[20,30] 31ns 10kb                                                 |--------L0.3--------|                       "
+    - "L0.4[30,40] 41ns 10kb                                                                       |--------L0.4--------| "
+    - "L1                                                                                                                 "
+    - "L1.1[0,10] 11ns 40kb     |--------L1.1--------|                                                                    "
+    - "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 4 Input Files, 70kb total:"
+    - "L0                                                                                                                 "
+    - "L0.4[30,40] 41ns 10kb                                                                       |--------L0.4--------| "
+    - "L0.3[20,30] 31ns 10kb                                                 |--------L0.3--------|                       "
+    - "L0.2[10,20] 21ns 10kb                          |--------L0.2--------|                                              "
+    - "L1                                                                                                                 "
+    - "L1.1[0,10] 11ns 40kb     |--------L1.1--------|                                                                    "
+    - "**** 1 Output Files (parquet_file_id not yet assigned), 70kb total:"
+    - "L1, all files 70kb                                                                                                 "
+    - "L1.?[0,40] 41ns          |------------------------------------------L1.?------------------------------------------|"
+    - "Committing partition 1:"
+    - "  Soft Deleting 4 files: L1.1, L0.2, L0.3, L0.4"
+    - "  Creating 1 files"
+    - "**** Final Output Files (70kb written)"
+    - "L1, all files 70kb                                                                                                 "
+    - "L1.5[0,40] 41ns          |------------------------------------------L1.5------------------------------------------|"
+    "###
+    );
+}
+
+// Mimic small L0 files trickling when they overlap in time (by a minor amount, as is common)
+// In this case, all L1 and L0 files do not fit in a single compaction run.
+#[tokio::test]
+async fn small_l1_plus_overlapping_l0s_two_runs() {
+    test_helpers::maybe_start_logging();
+
+    let setup = layout_setup_builder()
+        .await
+        .with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
+        .with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
+        .build()
+        .await;
+
+    let size = 10 * 1024;
+
+    // Create 1 L1 file that mimics the output from a previous L0 compaction
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(0)
+                .with_max_time(10)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(11))
+                .with_file_size_bytes(size * 4_u64),
+        )
+        .await;
+
+    // Create 4 L0 files, slighly overlapping in time
+    // note the first L0 slightly overlaps the L1, as would happen if this slightly overlapping pattern occured
+    // in the files that (we're pretending) were compacted into that L1.
+    for i in 1..=4 {
+        setup
+            .partition
+            .create_parquet_file(
+                parquet_builder()
+                    .with_min_time(i * 10)
+                    .with_max_time((i + 1) * 10)
+                    .with_compaction_level(CompactionLevel::Initial)
+                    .with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
+                    .with_file_size_bytes(size),
+            )
+            .await;
+    }
+
+    // Required behavior:
+    //    1. (achieved) all files compacted to a single L1 file
+    // Desired behavior:
+    //    1. (not achieved) It may be preferable that the first compaction include the last L1 and as many L0s as are allowed (3).
+    //       This does not happen.  Instead, the first compaction is the four L0's that are later combined with the L1.
+    //       This is not necessarily bad, actually, its better for write amplification.  But might hint at the possibility of
+    //       compaction sequences that never get around to coming back and picking up the L1.
+    //       So the current behavior is noteworthy and unclear if its 'good' or 'bad'.
+    insta::assert_yaml_snapshot!(
+        run_layout_scenario(&setup).await,
+        @r###"
+    ---
+    - "**** Input Files "
+    - "L0                                                                                                                 "
+    - "L0.2[10,20] 21ns 10kb                      |------L0.2------|                                                      "
+    - "L0.3[20,30] 31ns 10kb                                        |------L0.3------|                                    "
+    - "L0.4[30,40] 41ns 10kb                                                          |------L0.4------|                  "
+    - "L0.5[40,50] 51ns 10kb                                                                            |------L0.5------|"
+    - "L1                                                                                                                 "
+    - "L1.1[0,10] 11ns 40kb     |------L1.1------|                                                                        "
+    - "**** Simulation run 0, type=compact(ManySmallFiles). 4 Input Files, 40kb total:"
+    - "L0, all files 10kb                                                                                                 "
+    - "L0.2[10,20] 21ns         |--------L0.2--------|                                                                    "
+    - "L0.3[20,30] 31ns                               |--------L0.3--------|                                              "
+    - "L0.4[30,40] 41ns                                                      |--------L0.4--------|                       "
+    - "L0.5[40,50] 51ns                                                                            |--------L0.5--------| "
+    - "**** 1 Output Files (parquet_file_id not yet assigned), 40kb total:"
+    - "L0, all files 40kb                                                                                                 "
+    - "L0.?[10,50] 51ns         |------------------------------------------L0.?------------------------------------------|"
+    - "Committing partition 1:"
+    - "  Soft Deleting 4 files: L0.2, L0.3, L0.4, L0.5"
+    - "  Creating 1 files"
+    - "**** Simulation run 1, type=compact(TotalSizeLessThanMaxCompactSize). 2 Input Files, 80kb total:"
+    - "L0, all files 40kb                                                                                                 "
+    - "L0.6[10,50] 51ns                           |---------------------------------L0.6---------------------------------|"
+    - "L1, all files 40kb                                                                                                 "
+    - "L1.1[0,10] 11ns          |------L1.1------|                                                                        "
+    - "**** 1 Output Files (parquet_file_id not yet assigned), 80kb total:"
+    - "L1, all files 80kb                                                                                                 "
+    - "L1.?[0,50] 51ns          |------------------------------------------L1.?------------------------------------------|"
+    - "Committing partition 1:"
+    - "  Soft Deleting 2 files: L1.1, L0.6"
+    - "  Creating 1 files"
+    - "**** Final Output Files (120kb written)"
+    - "L1, all files 80kb                                                                                                 "
+    - "L1.7[0,50] 51ns          |------------------------------------------L1.7------------------------------------------|"
+    "###
+    );
+}
+
+// Mimic small L0 files trickling when they do NOT overlap in time (i.e. they have gaps between them)
+// In this case, all L1 and L0 files can fit in a single compaction run.
+#[tokio::test]
+async fn small_l1_plus_nonoverlapping_l0s_single_run() {
+    test_helpers::maybe_start_logging();
+
+    let setup = layout_setup_builder()
+        .await
+        .with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
+        .with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
+        .build()
+        .await;
+
+    let size = 10 * 1024;
+
+    // Create 1 L1 file that mimics the output from a previous L0 compaction
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(0)
+                .with_max_time(9)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(11))
+                .with_file_size_bytes(size * 4_u64),
+        )
+        .await;
+
+    // Create 3 L0 files, not overlapping in time, and not overlapping the L1.
+    for i in 1..=3 {
+        setup
+            .partition
+            .create_parquet_file(
+                parquet_builder()
+                    .with_min_time(i * 10 + 1)
+                    .with_max_time((i + 1) * 10 - 1)
+                    .with_compaction_level(CompactionLevel::Initial)
+                    .with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
+                    .with_file_size_bytes(size),
+            )
+            .await;
+    }
+
+    // Required behavior:
+    //    1. (not achieved) all files compacted to a single L1 file
+    //       The assumption is: since it didn't combine the old L1 with the new one, it will never grow L1.1.
+    //       It will eventually compact L1.1 with other L1s to make an L2, but if the write pattern continues
+    //       with tiny L0s, the resulting L2 will be N * the L1 size (where N is the number of L1s compacted
+    //       into the L2).
+    insta::assert_yaml_snapshot!(
+        run_layout_scenario(&setup).await,
+        @r###"
+    ---
+    - "**** Input Files "
+    - "L0                                                                                                                 "
+    - "L0.2[11,19] 21ns 10kb                             |------L0.2------|                                               "
+    - "L0.3[21,29] 31ns 10kb                                                    |------L0.3------|                        "
+    - "L0.4[31,39] 41ns 10kb                                                                           |------L0.4------| "
+    - "L1                                                                                                                 "
+    - "L1.1[0,9] 11ns 40kb      |-------L1.1-------|                                                                      "
+    - "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 3 Input Files, 30kb total:"
+    - "L0, all files 10kb                                                                                                 "
+    - "L0.4[31,39] 41ns                                                                         |---------L0.4----------| "
+    - "L0.3[21,29] 31ns                                         |---------L0.3----------|                                 "
+    - "L0.2[11,19] 21ns         |---------L0.2----------|                                                                 "
+    - "**** 1 Output Files (parquet_file_id not yet assigned), 30kb total:"
+    - "L1, all files 30kb                                                                                                 "
+    - "L1.?[11,39] 41ns         |------------------------------------------L1.?------------------------------------------|"
+    - "Committing partition 1:"
+    - "  Soft Deleting 3 files: L0.2, L0.3, L0.4"
+    - "  Creating 1 files"
+    - "**** Final Output Files (30kb written)"
+    - "L1                                                                                                                 "
+    - "L1.1[0,9] 11ns 40kb      |-------L1.1-------|                                                                      "
+    - "L1.5[11,39] 41ns 30kb                             |-----------------------------L1.5-----------------------------| "
+    "###
+    );
+}
+
+// Mimic small L0 files trickling when they do NOT overlap in time (i.e. they have gaps between them)
+// In this case, all L1 and L0 files do not fit in a single compaction run.
+#[tokio::test]
+async fn small_l1_plus_nonoverlapping_l0s_two_runs() {
+    test_helpers::maybe_start_logging();
+
+    let setup = layout_setup_builder()
+        .await
+        .with_max_desired_file_size_bytes(MAX_DESIRED_FILE_SIZE)
+        .with_max_num_files_per_plan(4) // artificially limit to 4 / plan to similuate a steady stream of small files compacted as they come in.
+        .build()
+        .await;
+
+    let size = 10 * 1024;
+
+    // Create 1 L1 file that mimics the output from a previous L0 compaction
+    setup
+        .partition
+        .create_parquet_file(
+            parquet_builder()
+                .with_min_time(0)
+                .with_max_time(10)
+                .with_compaction_level(CompactionLevel::FileNonOverlapped)
+                .with_max_l0_created_at(Time::from_timestamp_nanos(11))
+                .with_file_size_bytes(size * 4_u64),
+        )
+        .await;
+
+    // Create 4 L0 files, not overlapping in time, and not overlapping the L1.
+    for i in 1..=4 {
+        setup
+            .partition
+            .create_parquet_file(
+                parquet_builder()
+                    .with_min_time(i * 10 + 1)
+                    .with_max_time((i + 1) * 10 - 1)
+                    .with_compaction_level(CompactionLevel::Initial)
+                    .with_max_l0_created_at(Time::from_timestamp_nanos((i + 1) * 10 + 1))
+                    .with_file_size_bytes(size),
+            )
+            .await;
+    }
+
+    // Required behavior:
+    //    1. (not achieved) all files compacted to a single L1 file
+    //       The assumption is: since it didn't combine the old L1 with the new one, it will never grow L1.1.
+    //       It will eventually compact L1.1 with other L1s to make an L2, but if the write pattern continues
+    //       with tiny L0s, the resulting L2 will be N * the L1 size (where N is the number of L1s compacted
+    //       into the L2).
+    insta::assert_yaml_snapshot!(
+        run_layout_scenario(&setup).await,
+        @r###"
+    ---
+    - "**** Input Files "
+    - "L0                                                                                                                 "
+    - "L0.2[11,19] 21ns 10kb                        |----L0.2----|                                                        "
+    - "L0.3[21,29] 31ns 10kb                                          |----L0.3----|                                      "
+    - "L0.4[31,39] 41ns 10kb                                                            |----L0.4----|                    "
+    - "L0.5[41,49] 51ns 10kb                                                                               |----L0.5----| "
+    - "L1                                                                                                                 "
+    - "L1.1[0,10] 11ns 40kb     |------L1.1------|                                                                        "
+    - "**** Simulation run 0, type=compact(TotalSizeLessThanMaxCompactSize). 4 Input Files, 40kb total:"
+    - "L0, all files 10kb                                                                                                 "
+    - "L0.5[41,49] 51ns                                                                                |------L0.5------| "
+    - "L0.4[31,39] 41ns                                                        |------L0.4------|                         "
+    - "L0.3[21,29] 31ns                                |------L0.3------|                                                 "
+    - "L0.2[11,19] 21ns         |------L0.2------|                                                                        "
+    - "**** 1 Output Files (parquet_file_id not yet assigned), 40kb total:"
+    - "L1, all files 40kb                                                                                                 "
+    - "L1.?[11,49] 51ns         |------------------------------------------L1.?------------------------------------------|"
+    - "Committing partition 1:"
+    - "  Soft Deleting 4 files: L0.2, L0.3, L0.4, L0.5"
+    - "  Creating 1 files"
+    - "**** Final Output Files (40kb written)"
+    - "L1, all files 40kb                                                                                                 "
+    - "L1.1[0,10] 11ns          |------L1.1------|                                                                        "
+    - "L1.6[11,49] 51ns                             |-------------------------------L1.6--------------------------------| "
+    "###
+    );
+}
--- a/compactor/tests/layouts/mod.rs
+++ b/compactor/tests/layouts/mod.rs
@ -48,6 +48,7 @@
 //! ```text
 //!     - L0.?[300,350] 5kb                                           |-L0.3-|
 //! ```
+mod accumulated_size;
 mod backfill;
 mod common_use_cases;
 mod core;
--- a/compactor_test_utils/src/lib.rs
+++ b/compactor_test_utils/src/lib.rs
@ -142,6 +142,7 @@ impl TestSetupBuilder<false> {
                threshold: PARTITION_THRESHOLD,
            },
            shadow_mode: false,
+            enable_scratchpad: true,
            ignore_partition_skip_marker: false,
            shard_config: None,
            min_num_l1_files_to_compact: MIN_NUM_L1_FILES_TO_COMPACT,
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@ -500,6 +500,11 @@ impl PartitionKey {
    pub fn ptr_eq(&self, other: &Self) -> bool {
        Arc::ptr_eq(&self.0, &other.0)
    }
+
+    /// Returns underlying string.
+    pub fn inner(&self) -> &str {
+        &self.0
+    }
 }

 impl Display for PartitionKey {
--- a/influxdb_influxql_parser/src/common.rs
+++ b/influxdb_influxql_parser/src/common.rs
@ -327,6 +327,13 @@ pub enum OrderByClause {
    Descending,
 }

+impl OrderByClause {
+    /// Return `true` if the order by clause is ascending.
+    pub fn is_ascending(self) -> bool {
+        matches!(self, Self::Ascending)
+    }
+}
+
 impl Display for OrderByClause {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
--- a/influxdb_influxql_parser/src/expression/conditional.rs
+++ b/influxdb_influxql_parser/src/expression/conditional.rs
@ -260,10 +260,7 @@ pub fn parse_conditional_expression(input: &str) -> Result<ConditionalExpression
    let mut i: &str = input;

    // Consume whitespace from the input
-    i = match ws0(i) {
-        Ok((i1, _)) => i1,
-        _ => unreachable!("ws0 is infallible"),
-    };
+    (i, _) = ws0(i).expect("ws0 is infallible");

    if i.is_empty() {
        return Err(ParseError {
@ -293,10 +290,7 @@ pub fn parse_conditional_expression(input: &str) -> Result<ConditionalExpression
    };

    // Consume remaining whitespace from the input
-    i = match ws0(i) {
-        Ok((i1, _)) => i1,
-        _ => unreachable!("ws0 is infallible"),
-    };
+    (i, _) = ws0(i).expect("ws0 is infallible");

    if !i.is_empty() {
        return Err(ParseError {
--- a/influxdb_influxql_parser/src/lib.rs
+++ b/influxdb_influxql_parser/src/lib.rs
@ -69,10 +69,7 @@ pub fn parse_statements(input: &str) -> ParseResult {

    loop {
        // Consume whitespace from the input
-        i = match ws0(i) {
-            Ok((i1, _)) => i1,
-            _ => unreachable!("ws0 is infallible"),
-        };
+        (i, _) = ws0(i).expect("ws0 is infallible");

        if eof::<_, nom::error::Error<_>>(i).is_ok() {
            return Ok(res);
--- a/influxdb_influxql_parser/src/time_range.rs
+++ b/influxdb_influxql_parser/src/time_range.rs
@ -390,12 +390,9 @@ impl TimeRange {
 }

 /// Simplifies an InfluxQL duration `expr` to a nanosecond interval represented as an `i64`.
-pub fn duration_expr_to_nanoseconds(expr: &Expr) -> Result<i64, ExprError> {
-    let ctx = ReduceContext::default();
-    match reduce_expr(&ctx, expr)? {
-        Expr::Literal(Literal::Duration(v)) => Ok(*v),
-        Expr::Literal(Literal::Float(v)) => Ok(v as i64),
-        Expr::Literal(Literal::Integer(v)) => Ok(v),
+pub fn duration_expr_to_nanoseconds(ctx: &ReduceContext, expr: &Expr) -> Result<i64, ExprError> {
+    match reduce_time_expr(ctx, expr)? {
+        Expr::Literal(Literal::Timestamp(v)) => Ok(v.timestamp_nanos()),
        _ => error::expr("invalid duration expression"),
    }
 }
@ -444,7 +441,7 @@ pub struct ReduceContext {
    pub tz: Option<chrono_tz::Tz>,
 }

-/// Simplify the time range expression.
+/// Simplify the time range expression and return a literal [timestamp](Timestamp).
 fn reduce_time_expr(ctx: &ReduceContext, expr: &Expr) -> ExprResult {
    match reduce_expr(ctx, expr)? {
        expr @ Expr::Literal(Literal::Timestamp(_)) => Ok(expr),
@ -732,21 +729,26 @@ mod test {
    use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Offset, Utc};
    use test_helpers::assert_error;

+    /// Return a `ReduceContext` with a value of
+    /// now set to `2023-01-01T00:00:00Z` / `1672531200000000000`
+    /// and not timezone.
+    fn reduce_context() -> ReduceContext {
+        ReduceContext {
+            now: Some(Timestamp::from_utc(
+                NaiveDateTime::new(
+                    NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
+                    NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
+                ),
+                Utc.fix(),
+            )),
+            tz: None,
+        }
+    }
+
    #[test]
    fn test_split_cond() {
        fn split_exprs(s: &str) -> Result<(Option<ConditionalExpression>, TimeRange), ExprError> {
-            // 2023-01-01T00:00:00Z == 1672531200000000000
-            let ctx = ReduceContext {
-                now: Some(Timestamp::from_utc(
-                    NaiveDateTime::new(
-                        NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
-                        NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
-                    ),
-                    Utc.fix(),
-                )),
-                tz: None,
-            };
-
+            let ctx = reduce_context();
            let cond: ConditionalExpression = s.parse().unwrap();
            split_cond(&ctx, &cond)
        }
@ -1014,13 +1016,14 @@ mod test {
    #[test]
    fn test_expr_to_duration() {
        fn parse(s: &str) -> Result<i64, ExprError> {
+            let ctx = reduce_context();
            let expr = s
                .parse::<ConditionalExpression>()
                .unwrap()
                .expr()
                .unwrap()
                .clone();
-            duration_expr_to_nanoseconds(&expr)
+            duration_expr_to_nanoseconds(&ctx, &expr)
        }

        let cases = vec![
@ -1029,6 +1032,8 @@ mod test {
            ("5d10ms", 432_000_010_000_000),
            ("-2d10ms", -172800010000000),
            ("-2d10ns", -172800000000010),
+            ("now()", 1672531200000000000),
+            ("'2023-01-01T00:00:00Z'", 1672531200000000000),
        ];

        for (interval_str, exp) in cases {
--- a/influxdb_iox/src/commands/run/all_in_one.rs
+++ b/influxdb_iox/src/commands/run/all_in_one.rs
@ -495,6 +495,7 @@ impl Config {
            partition_timeout_secs: 0,
            partition_filter: None,
            shadow_mode: false,
+            enable_scratchpad: true,
            ignore_partition_skip_marker: false,
            shard_count: None,
            shard_id: None,
--- a/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_50.sql.expected
+++ b/influxdb_iox/tests/query_tests/cases/in/duplicates_parquet_50.sql.expected
@ -20,6 +20,6 @@
 |    |           DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC]    |
 |    |             SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC]    |
 |    |               SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC]    |
-|    |                 ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time]    |
+|    |                 ParquetExec: file_groups={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, ...], [1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, ...], [1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet, 1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, ...], [1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, ...]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time]    |
 |    |    |
 ----------
--- a/iox_query/src/config.rs
+++ b/iox_query/src/config.rs
@ -17,7 +17,7 @@ extensions_options! {
        /// [^iox_part]: "IOx partition" refers to a partition within the IOx catalog, i.e. a partition within the
        ///              primary key space. This is NOT the same as a DataFusion partition which refers to a stream
        ///              within the physical plan data flow.
-        pub max_dedup_partition_split: usize, default = 100
+        pub max_dedup_partition_split: usize, default = 10_000

        /// When splitting de-duplicate operations based on time-based overlaps, this is the maximum number of groups
        /// that should be considered. If there are more groups, the split will NOT be performed.
--- a/iox_query/src/exec/context.rs
+++ b/iox_query/src/exec/context.rs
@ -33,7 +33,7 @@ use crate::{
 use arrow::record_batch::RecordBatch;
 use async_trait::async_trait;
 use datafusion::{
-    catalog::catalog::CatalogProvider,
+    catalog::CatalogProvider,
    execution::{
        context::{QueryPlanner, SessionState, TaskContext},
        memory_pool::MemoryPool,
--- a/iox_query/src/logical_optimizer/influx_regex_to_datafusion_regex.rs
+++ b/iox_query/src/logical_optimizer/influx_regex_to_datafusion_regex.rs
@ -1,8 +1,11 @@
 use datafusion::{
    common::{tree_node::TreeNodeRewriter, DFSchema},
    error::DataFusionError,
-    logical_expr::{expr::ScalarUDF, utils::from_plan, LogicalPlan, Operator},
-    optimizer::{utils::rewrite_preserving_name, OptimizerConfig, OptimizerRule},
+    logical_expr::{
+        expr::ScalarUDF, expr_rewriter::rewrite_preserving_name, utils::from_plan, LogicalPlan,
+        Operator,
+    },
+    optimizer::{OptimizerConfig, OptimizerRule},
    prelude::{binary_expr, lit, Expr},
    scalar::ScalarValue,
 };
--- a/iox_query/src/physical_optimizer/chunk_extraction.rs
+++ b/iox_query/src/physical_optimizer/chunk_extraction.rs
@ -2,10 +2,11 @@ use std::sync::Arc;

 use arrow::datatypes::SchemaRef;
 use datafusion::{
+    datasource::physical_plan::ParquetExec,
    error::DataFusionError,
    physical_plan::{
-        empty::EmptyExec, file_format::ParquetExec, union::UnionExec, visit_execution_plan,
-        ExecutionPlan, ExecutionPlanVisitor,
+        empty::EmptyExec, union::UnionExec, visit_execution_plan, ExecutionPlan,
+        ExecutionPlanVisitor,
    },
 };
 use observability_deps::tracing::debug;
--- a/iox_query/src/physical_optimizer/predicate_pushdown.rs
+++ b/iox_query/src/physical_optimizer/predicate_pushdown.rs
@ -3,6 +3,7 @@ use std::{collections::HashSet, sync::Arc};
 use datafusion::{
    common::tree_node::{RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter},
    config::ConfigOptions,
+    datasource::physical_plan::ParquetExec,
    error::{DataFusionError, Result},
    logical_expr::Operator,
    physical_expr::{split_conjunction, utils::collect_columns},
@ -10,7 +11,6 @@ use datafusion::{
    physical_plan::{
        empty::EmptyExec,
        expressions::{BinaryExpr, Column},
-        file_format::ParquetExec,
        filter::FilterExec,
        union::UnionExec,
        ExecutionPlan, PhysicalExpr,
@ -165,11 +165,11 @@ mod tests {
    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
    use datafusion::{
        datasource::object_store::ObjectStoreUrl,
+        datasource::physical_plan::FileScanConfig,
        logical_expr::Operator,
        physical_expr::PhysicalSortExpr,
        physical_plan::{
            expressions::{BinaryExpr, Column, Literal},
-            file_format::FileScanConfig,
            PhysicalExpr, Statistics,
        },
        scalar::ScalarValue,
--- a/iox_query/src/physical_optimizer/projection_pushdown.rs
+++ b/iox_query/src/physical_optimizer/projection_pushdown.rs
@ -7,6 +7,7 @@ use arrow::datatypes::SchemaRef;
 use datafusion::{
    common::tree_node::{Transformed, TreeNode},
    config::ConfigOptions,
+    datasource::physical_plan::{FileScanConfig, ParquetExec},
    error::{DataFusionError, Result},
    physical_expr::{
        utils::{collect_columns, reassign_predicate_columns},
@ -16,7 +17,6 @@ use datafusion::{
    physical_plan::{
        empty::EmptyExec,
        expressions::Column,
-        file_format::{FileScanConfig, ParquetExec},
        filter::FilterExec,
        projection::ProjectionExec,
        sorts::{sort::SortExec, sort_preserving_merge::SortPreservingMergeExec},
--- a/iox_query/src/physical_optimizer/sort/parquet_sortness.rs
+++ b/iox_query/src/physical_optimizer/sort/parquet_sortness.rs
@ -3,14 +3,11 @@ use std::sync::Arc;
 use datafusion::{
    common::tree_node::{RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter},
    config::ConfigOptions,
+    datasource::physical_plan::{FileScanConfig, ParquetExec},
    error::Result,
    physical_expr::{PhysicalSortExpr, PhysicalSortRequirement},
    physical_optimizer::PhysicalOptimizerRule,
-    physical_plan::{
-        file_format::{FileScanConfig, ParquetExec},
-        sorts::sort::SortExec,
-        ExecutionPlan,
-    },
+    physical_plan::{sorts::sort::SortExec, ExecutionPlan},
 };
 use observability_deps::tracing::warn;

--- a/iox_query/src/provider/physical.rs
+++ b/iox_query/src/provider/physical.rs
@ -6,14 +6,15 @@ use crate::{
 };
 use arrow::datatypes::{DataType, Fields, Schema as ArrowSchema, SchemaRef};
 use datafusion::{
-    datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl},
+    datasource::{
+        listing::PartitionedFile,
+        object_store::ObjectStoreUrl,
+        physical_plan::{FileScanConfig, ParquetExec},
+    },
    physical_expr::PhysicalSortExpr,
    physical_plan::{
-        empty::EmptyExec,
-        expressions::Column,
-        file_format::{FileScanConfig, ParquetExec},
-        union::UnionExec,
-        ColumnStatistics, ExecutionPlan, Statistics,
+        empty::EmptyExec, expressions::Column, union::UnionExec, ColumnStatistics, ExecutionPlan,
+        Statistics,
    },
    scalar::ScalarValue,
 };
--- a/iox_query/src/test.rs
+++ b/iox_query/src/test.rs
@ -26,8 +26,8 @@ use datafusion::error::DataFusionError;
 use datafusion::execution::context::SessionState;
 use datafusion::logical_expr::Expr;
 use datafusion::physical_plan::ExecutionPlan;
-use datafusion::{catalog::catalog::CatalogProvider, physical_plan::displayable};
 use datafusion::{catalog::schema::SchemaProvider, logical_expr::LogicalPlan};
+use datafusion::{catalog::CatalogProvider, physical_plan::displayable};
 use datafusion::{
    datasource::{object_store::ObjectStoreUrl, TableProvider, TableType},
    physical_plan::{ColumnStatistics, Statistics as DataFusionStatistics},
--- a/iox_query_influxql/src/plan/ir.rs
+++ b/iox_query_influxql/src/plan/ir.rs
@ -33,6 +33,18 @@ pub(super) struct Select {
    /// The projection type of the selection.
    pub(super) projection_type: ProjectionType,

+    /// The interval derived from the arguments to the `TIME` function
+    /// when a `GROUP BY` clause is declared with `TIME`.
+    pub(super) interval: Option<Interval>,
+
+    /// The number of additional intervals that must be read
+    /// for queries that group by time and use window functions such as
+    /// `DIFFERENCE` or `DERIVATIVE`. This ensures data for the first
+    /// window is available.
+    ///
+    /// See: <https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L50>
+    pub(super) extra_intervals: usize,
+
    /// Projection clause of the selection.
    pub(super) fields: Vec<Field>,

@ -194,3 +206,15 @@ impl Display for Field {
        write!(f, " AS {}", self.name)
    }
 }
+
+/// Represents the interval duration and offset
+/// derived from the `TIME` function when specified
+/// in a `GROUP BY` clause.
+#[derive(Debug, Clone, Copy)]
+pub(super) struct Interval {
+    /// The nanosecond duration of the interval
+    pub duration: i64,
+
+    /// The nanosecond offset of the interval.
+    pub offset: Option<i64>,
+}
--- a/iox_query_influxql/src/plan/planner.rs
+++ b/iox_query_influxql/src/plan/planner.rs
@ -1,10 +1,10 @@
 mod select;

-use crate::plan::ir::{DataSource, Field, Select, SelectQuery};
+use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery};
 use crate::plan::planner::select::{
    fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort, ProjectionInfo,
 };
-use crate::plan::planner_time_range_expression::{expr_to_df_interval_dt, time_range_to_df_expr};
+use crate::plan::planner_time_range_expression::time_range_to_df_expr;
 use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
 use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
 use crate::plan::var_ref::var_ref_data_type_to_data_type;
@ -50,9 +50,7 @@ use influxdb_influxql_parser::show_measurements::{
 use influxdb_influxql_parser::show_tag_keys::ShowTagKeysStatement;
 use influxdb_influxql_parser::show_tag_values::{ShowTagValuesStatement, WithKeyClause};
 use influxdb_influxql_parser::simple_from_clause::ShowFromClause;
-use influxdb_influxql_parser::time_range::{
-    duration_expr_to_nanoseconds, split_cond, ReduceContext, TimeRange,
-};
+use influxdb_influxql_parser::time_range::{split_cond, ReduceContext, TimeRange};
 use influxdb_influxql_parser::timestamp::Timestamp;
 use influxdb_influxql_parser::{
    common::{MeasurementName, WhereClause},
@ -130,22 +128,43 @@ enum ExprScope {
    Projection,
 }

-/// State used to inform the planner.
+/// State used to inform the planner, which is derived for the
+/// root `SELECT` and subqueries.
 #[allow(dead_code)]
 #[derive(Debug, Default, Clone)]
 struct Context<'a> {
+    /// The name of the table used as the data source for the current query.
    table_name: &'a str,
    projection_type: ProjectionType,
    tz: Option<Tz>,

-    // WHERE
+    order_by: OrderByClause,
+
+    /// The column alias for the `time` column.
+    ///
+    /// # NOTE
+    ///
+    /// The time column can only be aliased for the root query.
+    time_alias: &'a str,
+
+    /// The filter predicate for the query, without `time`.
    condition: Option<&'a ConditionalExpression>,
+
+    /// The time range of the query
    time_range: TimeRange,

    // GROUP BY information
    group_by: Option<&'a GroupByClause>,
    fill: Option<FillClause>,

+    /// Interval of the `TIME` function found in the `GROUP BY` clause.
+    interval: Option<Interval>,
+
+    /// How many additional window intervals must be retrieved, when grouping
+    /// by time, to ensure window functions like `difference` have sufficient
+    /// data to for the first window of the `time_range`.
+    extra_intervals: usize,
+
    /// The set of tags specified in the top-level `SELECT` statement
    /// which represent the tag set used for grouping output.
    root_group_by_tags: &'a [&'a str],
@ -161,10 +180,14 @@ impl<'a> Context<'a> {
            table_name,
            projection_type: select.projection_type,
            tz: select.timezone,
+            order_by: select.order_by.unwrap_or_default(),
+            time_alias: &select.fields[0].name,
            condition: select.condition.as_ref(),
            time_range: select.time_range,
            group_by: select.group_by.as_ref(),
            fill: select.fill,
+            interval: select.interval,
+            extra_intervals: select.extra_intervals,
            root_group_by_tags,
        }
    }
@ -176,6 +199,9 @@ impl<'a> Context<'a> {
            table_name: self.table_name,
            projection_type: select.projection_type,
            tz: select.timezone,
+            order_by: self.order_by,
+            // time is never aliased in subqueries
+            time_alias: "time",
            condition: select.condition.as_ref(),
            // Subqueries should be restricted by the time range of the parent
            //
@ -183,10 +209,165 @@ impl<'a> Context<'a> {
            time_range: select.time_range.intersected(self.time_range),
            group_by: select.group_by.as_ref(),
            fill: select.fill,
+            interval: select.interval,
+            extra_intervals: select.extra_intervals,
            root_group_by_tags: self.root_group_by_tags,
        }
    }

+    /// Return a [`Expr::Sort`] expression for the `time` column.
+    #[allow(dead_code)]
+    fn time_sort_expr(&self) -> Expr {
+        self.time_alias.as_expr().sort(
+            match self.order_by {
+                OrderByClause::Ascending => true,
+                OrderByClause::Descending => false,
+            },
+            false,
+        )
+    }
+
+    /// Returns true if the current context has an extended
+    /// time range to provide leading data for window functions
+    /// to produce the result for the first window.
+    #[allow(dead_code)]
+    fn has_extended_time_range(&self) -> bool {
+        self.extra_intervals > 0 && self.interval.is_some()
+    }
+
+    /// Return the time range of the context, including any
+    /// additional intervals required for window functions like
+    /// `difference` or `moving_average`, when the query contains a
+    /// `GROUP BY TIME` clause.
+    ///
+    /// # NOTE
+    ///
+    /// This function accounts for a bug in InfluxQL OG that only reads
+    /// a single interval, rather than the number required based on the
+    /// window function.
+    ///
+    /// # EXPECTED
+    ///
+    /// For InfluxQL OG, the likely intended behaviour of the extra intervals
+    /// was to ensure a minimum number of windows were calculated to ensure
+    /// there was sufficient data for the lower time bound specified
+    /// in the `WHERE` clause, or upper time bound when ordering by `time`
+    /// in descending order.
+    ///
+    /// For example, the following InfluxQL query calculates the `moving_average`
+    /// of the `mean` of the `writes` field over 3 intervals. The interval
+    /// is 10 seconds, as specified by the `GROUP BY time(10s)` clause.
+    ///
+    /// ```sql
+    /// SELECT moving_average(mean(writes), 3)
+    /// FROM diskio
+    /// WHERE time >= '2020-06-11T16:53:00Z' AND time < '2020-06-11T16:55:00Z'
+    /// GROUP BY time(10s)
+    /// ```
+    ///
+    /// The intended output was supposed to include the first window of the time
+    /// bounds, or `'2020-06-11T16:53:00Z'`:
+    ///
+    /// ```text
+    /// name: diskio
+    /// time                 moving_average
+    /// ----                 --------------
+    /// 2020-06-11T16:53:00Z 5592529.333333333
+    /// 2020-06-11T16:53:10Z 5592677.333333333
+    /// ...
+    /// 2020-06-11T16:54:10Z 5593513.333333333
+    /// 2020-06-11T16:54:20Z 5593612.333333333
+    /// ```
+    /// however, the actual output starts at `2020-06-11T16:53:10Z`.
+    ///
+    /// # BUG
+    ///
+    /// During compilation of the query, InfluxQL OG determines the `ExtraIntervals`
+    /// required for the `moving_average` function, which in the example is `3` ([source][1]):
+    ///
+    /// ```go
+    /// if c.global.ExtraIntervals < int(arg1.Val) {
+    ///     c.global.ExtraIntervals = int(arg1.Val)
+    /// }
+    /// ```
+    ///
+    /// `arg1.Val` is the second argument from the example InfluxQL query, or `3`.
+    ///
+    /// When preparing the query for execution, the time range is adjusted by the
+    /// `ExtraIntervals` determined during compilation ([source][2]):
+    ///
+    /// ```go
+    /// // Modify the time range if there are extra intervals and an interval.
+    /// if !c.Interval.IsZero() && c.ExtraIntervals > 0 {
+    ///     if c.Ascending {
+    ///         newTime := timeRange.Min.Add(time.Duration(-c.ExtraIntervals) * c.Interval.Duration)
+    ///         if !newTime.Before(time.Unix(0, influxql.MinTime).UTC()) {
+    ///             timeRange.Min = newTime
+    /// ```
+    ///
+    /// In this case `timeRange.Min` will be adjusted from `2020-06-11T16:53:00Z` to
+    /// `2020-06-11T16:52:30Z`, as `ExtraIntervals` is `3` and `Interval.Duration` is `10s`.
+    ///
+    /// The first issue is that the adjusted `timeRange` is only used to determine which
+    /// shards to read per the following ([source][3]):
+    ///
+    /// ```go
+    /// // Create an iterator creator based on the shards in the cluster.
+    /// shards, err := shardMapper.MapShards(c.stmt.Sources, timeRange, sopt)
+    /// ```
+    ///
+    /// The options used to configure query execution, constructed later in the function,
+    /// use the time range from the compiled statement ([source][4]):
+    ///
+    /// ```go
+    /// opt.StartTime, opt.EndTime = c.TimeRange.MinTimeNano(), c.TimeRange.MaxTimeNano()
+    /// ```
+    ///
+    /// Specifically, `opt.StartTime` would be `2020-06-11T16:53:00Z` (`1591894380000000000`).
+    ///
+    /// Finally, when construction the physical operator to compute the `moving_average`,
+    /// the `StartTime`, or `EndTime` for descending queries, is adjusted by the single
+    /// interval of `10s` ([source][5]):
+    ///
+    /// ```go
+    /// if !opt.Interval.IsZero() {
+    ///     if opt.Ascending {
+    ///         opt.StartTime -= int64(opt.Interval.Duration)
+    /// ```
+    ///
+    /// [1]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L592-L594
+    /// [2]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1153-L1158
+    /// [3]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1172-L1173
+    /// [4]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L1198
+    /// [5]: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/select.go#L259-L261
+    #[allow(dead_code)]
+    fn extended_time_range(&self) -> TimeRange {
+        // As described in the function docs, extra_intervals is either
+        // 1 or 0 to match InfluxQL OG behaviour.
+        match (self.extra_intervals.min(1), self.interval) {
+            (count @ 1.., Some(interval)) => {
+                if self.order_by.is_ascending() {
+                    TimeRange {
+                        lower: self
+                            .time_range
+                            .lower
+                            .map(|v| v - (count as i64 * interval.duration)),
+                        upper: self.time_range.upper,
+                    }
+                } else {
+                    TimeRange {
+                        lower: self.time_range.lower,
+                        upper: self
+                            .time_range
+                            .upper
+                            .map(|v| v + (count as i64 * interval.duration)),
+                    }
+                }
+            }
+            _ => self.time_range,
+        }
+    }
+
    /// Returns the combined `GROUP BY` tags clause from the root
    /// and current statement. The list is sorted and guaranteed to be unique.
    fn group_by_tags(&self) -> Vec<&str> {
@ -210,7 +391,9 @@ impl<'a> Context<'a> {
    fn is_aggregate(&self) -> bool {
        matches!(
            self.projection_type,
-            ProjectionType::Aggregate | ProjectionType::Selector { .. }
+            ProjectionType::Aggregate
+                | ProjectionType::WindowAggregate
+                | ProjectionType::Selector { .. }
        )
    }

@ -328,6 +511,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            is_projected,
        } = ProjectionInfo::new(&select.fields, &group_by_tags);

+        let order_by = select.order_by.unwrap_or_default();
+        let time_alias = fields[0].name.as_str();
+
        let table_names = find_table_names(select);
        let sort_by_measurement = table_names.len() > 1;
        let mut plans = Vec::new();
@ -412,14 +598,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            },
        )?;

-        // the sort planner node must refer to the time column using
-        // the alias that was specified
-        let time_alias = fields[0].name.as_str();
        let time_sort_expr = time_alias.as_expr().sort(
-            match select.order_by {
-                // Default behaviour is to sort by time in ascending order if there is no ORDER BY
-                None | Some(OrderByClause::Ascending) => true,
-                Some(OrderByClause::Descending) => false,
+            match order_by {
+                OrderByClause::Ascending => true,
+                OrderByClause::Descending => false,
            },
            false,
        );
@ -465,10 +647,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
        let time_alias = fields[0].name.as_str();

        let time_sort_expr = time_alias.as_expr().sort(
-            match select.order_by {
-                // Default behaviour is to sort by time in ascending order if there is no ORDER BY
-                None | Some(OrderByClause::Ascending) => true,
-                Some(OrderByClause::Descending) => false,
+            match ctx.order_by {
+                OrderByClause::Ascending => true,
+                OrderByClause::Descending => false,
            },
            false,
        );
@ -638,13 +819,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
            // 1. is binning by time, project the column using the `DATE_BIN` function,
            // 2. is a single-selector query, project the `time` field of the selector aggregate,
            // 3. otherwise, project the Unix epoch (0)
-            select_exprs[time_column_index] = if let Some(dim) = ctx.group_by.and_then(|gb| gb.time_dimension()) {
-                let stride = expr_to_df_interval_dt(&dim.interval)?;
-                let offset = if let Some(offset) = &dim.offset {
-                    duration_expr_to_nanoseconds(offset).map_err(error::map::expr_error)?
-                } else {
-                    0
-                };
+            select_exprs[time_column_index] = if let Some(i) = ctx.interval {
+                let stride = lit(ScalarValue::new_interval_mdn(0, 0, i.duration));
+                let offset = i.offset.map_or(0, |v|v);

                date_bin(
                    stride,
@ -2535,7 +2712,7 @@ mod test {
        }

        #[test]
-        fn test_show_tag_keys() {
+        fn test_show_tag_keys_1() {
            assert_snapshot!(plan("SHOW TAG KEYS"), @"TableScan: tag_keys [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]");
            assert_snapshot!(plan("SHOW TAG KEYS LIMIT 1 OFFSET 2"), @r###"
            Sort: tag_keys.iox::measurement ASC NULLS LAST, tag_keys.tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]
@ -2544,150 +2721,158 @@ mod test {
                  WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [tag_keys.iox::measurement] ORDER BY [tag_keys.tagKey ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8), iox::row:UInt64;N]
                    TableScan: tag_keys [iox::measurement:Dictionary(Int32, Utf8), tagKey:Dictionary(Int32, Utf8)]
            "###);
+        }
+
+        #[test]
+        fn test_show_tag_keys_2() {
            assert_snapshot!(plan("SHOW TAG KEYS WHERE foo = 'some_foo'"), @r###"
            Sort: iox::measurement ASC NULLS LAST, tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
              Union [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
                          Filter: all_types.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
                            TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
                Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
+                      Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
                          Filter: cpu.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                            TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
                          Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                            TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
+                      Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
                          Filter: disk.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
+                      Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
                          Filter: diskio.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
                            TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
                Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
+                      Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
                          Filter: merge_00.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
                            TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
+                      Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
                          Filter: merge_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
                            TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
                          Filter: temp_01.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
                          Filter: temp_02.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
                          Filter: temp_03.time >= TimestampNanosecond(1672444800000000000, None) AND Boolean(false) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
            "###);
+        }
+
+        #[test]
+        fn test_show_tag_keys_3() {
            assert_snapshot!(plan("SHOW TAG KEYS WHERE time > 1337"), @r###"
            Sort: iox::measurement ASC NULLS LAST, tagKey ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
              Union [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN tag0 > Int32(0) THEN Utf8("tag0") END, CASE WHEN tag1 > Int32(0) THEN Utf8("tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(all_types.tag0 IS NOT NULL AS UInt64)) AS tag0, SUM(CAST(all_types.tag1 IS NOT NULL AS UInt64)) AS tag1]] [tag0:UInt64;N, tag1:UInt64;N]
                          Filter: all_types.time >= TimestampNanosecond(1338, None) [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
                            TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N]
                Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
+                      Projection: make_array(CASE WHEN cpu > Int32(0) THEN Utf8("cpu") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(cpu.cpu IS NOT NULL AS UInt64)) AS cpu, SUM(CAST(cpu.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(cpu.region IS NOT NULL AS UInt64)) AS region]] [cpu:UInt64;N, host:UInt64;N, region:UInt64;N]
                          Filter: cpu.time >= TimestampNanosecond(1338, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                            TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN bar > Int32(0) THEN Utf8("bar") END, CASE WHEN foo > Int32(0) THEN Utf8("foo") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(data.bar IS NOT NULL AS UInt64)) AS bar, SUM(CAST(data.foo IS NOT NULL AS UInt64)) AS foo]] [bar:UInt64;N, foo:UInt64;N]
                          Filter: data.time >= TimestampNanosecond(1338, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                            TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
+                      Projection: make_array(CASE WHEN device > Int32(0) THEN Utf8("device") END, CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(disk.device IS NOT NULL AS UInt64)) AS device, SUM(CAST(disk.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(disk.region IS NOT NULL AS UInt64)) AS region]] [device:UInt64;N, host:UInt64;N, region:UInt64;N]
                          Filter: disk.time >= TimestampNanosecond(1338, None) [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("diskio")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3);N]
+                      Projection: make_array(CASE WHEN host > Int32(0) THEN Utf8("host") END, CASE WHEN region > Int32(0) THEN Utf8("region") END, CASE WHEN status > Int32(0) THEN Utf8("status") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(diskio.host IS NOT NULL AS UInt64)) AS host, SUM(CAST(diskio.region IS NOT NULL AS UInt64)) AS region, SUM(CAST(diskio.status IS NOT NULL AS UInt64)) AS status]] [host:UInt64;N, region:UInt64;N, status:UInt64;N]
                          Filter: diskio.time >= TimestampNanosecond(1338, None) [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
                            TableScan: diskio [bytes_read:Int64;N, bytes_written:Int64;N, host:Dictionary(Int32, Utf8);N, is_local:Boolean;N, read_utilization:Float64;N, region:Dictionary(Int32, Utf8);N, status:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), write_utilization:Float64;N]
                Projection: Dictionary(Int32, Utf8("merge_00")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
+                      Projection: make_array(CASE WHEN col0 > Int32(0) THEN Utf8("col0") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_00.col0 IS NOT NULL AS UInt64)) AS col0]] [col0:UInt64;N]
                          Filter: merge_00.time >= TimestampNanosecond(1338, None) [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
                            TableScan: merge_00 [col0:Dictionary(Int32, Utf8);N, col1:Float64;N, col2:Boolean;N, col3:Utf8;N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("merge_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 1);N]
+                      Projection: make_array(CASE WHEN col1 > Int32(0) THEN Utf8("col1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(merge_01.col1 IS NOT NULL AS UInt64)) AS col1]] [col1:UInt64;N]
                          Filter: merge_01.time >= TimestampNanosecond(1338, None) [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
                            TableScan: merge_01 [col0:Float64;N, col1:Dictionary(Int32, Utf8);N, col2:Utf8;N, col3:Boolean;N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("temp_01")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_01.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_01.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
                          Filter: temp_01.time >= TimestampNanosecond(1338, None) [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: temp_01 [field_f64:Float64;N, field_i64:Int64;N, field_str:Utf8;N, field_u64:UInt64;N, shared_field0:Float64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("temp_02")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_02.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_02.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
                          Filter: temp_02.time >= TimestampNanosecond(1338, None) [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: temp_02 [shared_field0:Int64;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                Projection: Dictionary(Int32, Utf8("temp_03")) AS iox::measurement, tagKey [iox::measurement:Dictionary(Int32, Utf8), tagKey:Utf8;N]
                  Filter: tagKey IS NOT NULL [tagKey:Utf8;N]
                    Unnest: tagKey [tagKey:Utf8;N]
-                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:FixedSizeList(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2);N]
+                      Projection: make_array(CASE WHEN shared_tag0 > Int32(0) THEN Utf8("shared_tag0") END, CASE WHEN shared_tag1 > Int32(0) THEN Utf8("shared_tag1") END) AS tagKey [tagKey:List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
                        Aggregate: groupBy=[[]], aggr=[[SUM(CAST(temp_03.shared_tag0 IS NOT NULL AS UInt64)) AS shared_tag0, SUM(CAST(temp_03.shared_tag1 IS NOT NULL AS UInt64)) AS shared_tag1]] [shared_tag0:UInt64;N, shared_tag1:UInt64;N]
                          Filter: temp_03.time >= TimestampNanosecond(1338, None) [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
                            TableScan: temp_03 [shared_field0:Utf8;N, shared_tag0:Dictionary(Int32, Utf8);N, shared_tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)]
@ -2695,7 +2880,7 @@ mod test {
        }

        #[test]
-        fn test_show_tag_values() {
+        fn test_show_tag_values_1() {
            assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar"), @r###"
            Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2704,6 +2889,10 @@ mod test {
                    Filter: data.time >= TimestampNanosecond(1672444800000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
            "###);
+        }
+
+        #[test]
+        fn test_show_tag_values_2() {
            assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar LIMIT 1 OFFSET 2"), @r###"
            Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
              Projection: iox::measurement, key, value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2716,6 +2905,10 @@ mod test {
                            Filter: data.time >= TimestampNanosecond(1672444800000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                              TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
            "###);
+        }
+
+        #[test]
+        fn test_show_tag_values_3() {
            assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar WHERE foo = 'some_foo'"), @r###"
            Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2724,6 +2917,10 @@ mod test {
                    Filter: data.time >= TimestampNanosecond(1672444800000000000, None) AND data.foo = Dictionary(Int32, Utf8("some_foo")) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
            "###);
+        }
+
+        #[test]
+        fn test_show_tag_values_4() {
            assert_snapshot!(plan("SHOW TAG VALUES WITH KEY = bar WHERE time > 1337"), @r###"
            Sort: iox::measurement ASC NULLS LAST, key ASC NULLS LAST, value ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
              Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, Dictionary(Int32, Utf8("bar")) AS key, data.bar AS value [iox::measurement:Dictionary(Int32, Utf8), key:Dictionary(Int32, Utf8), value:Dictionary(Int32, Utf8);N]
@ -2784,7 +2981,8 @@ mod test {
                    Sort: time ASC NULLS LAST [time:Timestamp(Nanosecond, None);N, value:Float64;N]
                      Projection: time, AVG(cpu.usage_idle) AS value [time:Timestamp(Nanosecond, None);N, value:Float64;N]
                        Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
-                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                          Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                            TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);
            }

@ -2962,18 +3160,20 @@ mod test {
                assert_snapshot!(plan("SELECT LAST(usage_idle) FROM cpu GROUP BY TIME(5s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, last:Float64;N]
-                    GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
+                    GapFill: groupBy=[[time]], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
-                        TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                        Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);

                // aggregate query, grouping by time with gap filling
                assert_snapshot!(plan("SELECT FIRST(usage_idle) FROM cpu GROUP BY TIME(5s) FILL(0)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
                  Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time), Struct({value:Float64(0),time:TimestampNanosecond(0, None)})))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, first:Float64;N]
-                    GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
+                    GapFill: groupBy=[[time]], aggr=[[selector_first(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("5000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("5000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
-                        TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                        Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
+                          TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
                "###);

                // aggregate query, as we're specifying multiple selectors or aggregates
@ -3549,7 +3749,8 @@ mod test {
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                      Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);

                // supports offset parameter
@ -3557,7 +3758,8 @@ mod test {
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                    Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                      TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                      Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3567,9 +3769,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3592,9 +3795,9 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(TimestampNanosecond(1667181600000000000, None))..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        Filter: data.time >= TimestampNanosecond(1667181600000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }
@ -3617,9 +3820,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3628,9 +3832,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3639,9 +3844,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3650,9 +3856,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(0)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3661,9 +3868,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3673,9 +3881,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
-                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
+                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3685,9 +3894,10 @@ mod test {
                assert_snapshot!(plan("SELECT COUNT(f64_field) + MEAN(f64_field) FROM data GROUP BY TIME(10s) FILL(3.2)"), @r###"
                Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
                  Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(3)) + coalesce_struct(AVG(data.f64_field), Float64(3.2)) AS count_mean [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count_mean:Float64;N]
-                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Excluded(now()) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
+                    GapFill: groupBy=[[time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(TimestampNanosecond(1672531200000000000, None)) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
                      Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field), AVG(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N, AVG(data.f64_field):Float64;N]
-                        TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                        Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                "###);
            }

@ -3755,7 +3965,8 @@ mod test {
                    Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                      Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
                        Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
-                          TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                          Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
+                            TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
                    "###);
                }
            }
--- a/iox_query_influxql/src/plan/planner_time_range_expression.rs
+++ b/iox_query_influxql/src/plan/planner_time_range_expression.rs
@ -1,20 +1,8 @@
 //! APIs for transforming InfluxQL [expressions][influxdb_influxql_parser::expression::Expr].
-use crate::plan::error;
-use datafusion::common::{Result, ScalarValue};
+use datafusion::common::ScalarValue;
 use datafusion::logical_expr::{lit, Expr as DFExpr};
 use datafusion_util::AsExpr;
-use influxdb_influxql_parser::expression::Expr;
-use influxdb_influxql_parser::time_range::{duration_expr_to_nanoseconds, TimeRange};
-
-type ExprResult = Result<DFExpr>;
-
-/// Simplifies `expr` to an InfluxQL duration and returns a DataFusion interval.
-///
-/// Returns an error if `expr` is not a duration expression.
-pub(super) fn expr_to_df_interval_dt(expr: &Expr) -> ExprResult {
-    let ns = duration_expr_to_nanoseconds(expr).map_err(error::map::expr_error)?;
-    Ok(lit(ScalarValue::new_interval_mdn(0, 0, ns)))
-}
+use influxdb_influxql_parser::time_range::TimeRange;

 fn lower_bound_to_df_expr(v: Option<i64>) -> Option<DFExpr> {
    v.map(|ts| {
--- a/iox_query_influxql/src/plan/rewriter.rs
+++ b/iox_query_influxql/src/plan/rewriter.rs
@ -1,7 +1,7 @@
 use crate::plan::expr_type_evaluator::TypeEvaluator;
 use crate::plan::field::{field_by_name, field_name};
 use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap};
-use crate::plan::ir::{DataSource, Field, Select, SelectQuery, TagSet};
+use crate::plan::ir::{DataSource, Field, Interval, Select, SelectQuery, TagSet};
 use crate::plan::var_ref::{influx_type_to_var_ref_data_type, var_ref_data_type_to_influx_type};
 use crate::plan::{error, util, SchemaProvider};
 use datafusion::common::{DataFusionError, Result};
@ -19,7 +19,9 @@ use influxdb_influxql_parser::select::{
    Dimension, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection,
    SelectStatement,
 };
-use influxdb_influxql_parser::time_range::{split_cond, ReduceContext, TimeRange};
+use influxdb_influxql_parser::time_range::{
+    duration_expr_to_nanoseconds, split_cond, ReduceContext, TimeRange,
+};
 use influxdb_influxql_parser::timestamp::Timestamp;
 use itertools::Itertools;
 use schema::InfluxColumnType;
@ -100,21 +102,35 @@ impl RewriteSelect {
        let (fields, group_by) = self.expand_projection(s, stmt, &from, &tag_set)?;
        let condition = self.condition_resolve_types(s, stmt, &from)?;

+        let now = Timestamp::from(s.execution_props().query_execution_start_time);
+        let rc = ReduceContext {
+            now: Some(now),
+            tz: stmt.timezone.map(|tz| *tz),
+        };
+
+        let interval = self.find_interval_offset(&rc, group_by.as_ref())?;
+
        let (condition, time_range) = match condition {
-            Some(where_clause) => {
-                let rc = ReduceContext {
-                    now: Some(Timestamp::from(
-                        s.execution_props().query_execution_start_time,
-                    )),
-                    tz: stmt.timezone.map(|tz| *tz),
-                };
-                split_cond(&rc, &where_clause).map_err(error::map::expr_error)?
-            }
+            Some(where_clause) => split_cond(&rc, &where_clause).map_err(error::map::expr_error)?,
            None => (None, TimeRange::default()),
        };

-        let SelectStatementInfo { projection_type } =
-            select_statement_info(&fields, &group_by, stmt.fill)?;
+        // If the interval is non-zero and there is no upper bound, default to `now`
+        // for compatibility with InfluxQL OG.
+        //
+        // See: https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L172-L179
+        let time_range = match (interval, time_range.upper) {
+            (Some(interval), None) if interval.duration > 0 => TimeRange {
+                lower: time_range.lower,
+                upper: Some(now.timestamp_nanos()),
+            },
+            _ => time_range,
+        };
+
+        let SelectStatementInfo {
+            projection_type,
+            extra_intervals,
+        } = select_statement_info(&fields, &group_by, stmt.fill)?;

        // Following InfluxQL OG behaviour, if this is a subquery, and the fill strategy equates
        // to `FILL(null)`, switch to `FILL(none)`.
@ -131,6 +147,8 @@ impl RewriteSelect {

        Ok(Select {
            projection_type,
+            interval,
+            extra_intervals,
            fields,
            from,
            condition,
@ -388,6 +406,29 @@ impl RewriteSelect {
            Ok(Some(where_clause))
        }
    }
+
+    /// Return the interval value of the `GROUP BY` clause if it specifies a `TIME`.
+    fn find_interval_offset(
+        &self,
+        ctx: &ReduceContext,
+        group_by: Option<&GroupByClause>,
+    ) -> Result<Option<Interval>> {
+        Ok(
+            if let Some(td) = group_by.and_then(|v| v.time_dimension()) {
+                let duration = duration_expr_to_nanoseconds(ctx, &td.interval)
+                    .map_err(error::map::expr_error)?;
+                let offset = td
+                    .offset
+                    .as_ref()
+                    .map(|o| duration_expr_to_nanoseconds(ctx, o))
+                    .transpose()
+                    .map_err(error::map::expr_error)?;
+                Some(Interval { duration, offset })
+            } else {
+                None
+            },
+        )
+    }
 }

 /// Ensures the `time` column is presented consistently across all `SELECT` queries.
@ -865,12 +906,30 @@ macro_rules! lit_string {
    };
 }

+/// Set the `extra_intervals` field of [`FieldChecker`] if it is
+/// less than then proposed new value.
+macro_rules! set_extra_intervals {
+    ($SELF:expr, $NEW:expr) => {
+        if $SELF.extra_intervals < $NEW as usize {
+            $SELF.extra_intervals = $NEW as usize
+        }
+    };
+}
+
 /// Checks a number of expectations for the fields of a [`SelectStatement`].
 #[derive(Default)]
 struct FieldChecker {
    /// `true` if the statement contains a `GROUP BY TIME` clause.
    has_group_by_time: bool,

+    /// The number of additional intervals that must be read
+    /// for queries that group by time and use window functions such as
+    /// `DIFFERENCE` or `DERIVATIVE`. This ensures data for the first
+    /// window is available.
+    ///
+    /// See: <https://github.com/influxdata/influxdb/blob/f365bb7e3a9c5e227dbf66d84adf674d3d127176/query/compile.go#L50>
+    extra_intervals: usize,
+
    /// `true` if the interval was inherited by a parent.
    /// If this is set, then an interval that was inherited will not cause
    /// a query that shouldn't have an interval to fail.
@ -888,6 +947,9 @@ struct FieldChecker {
    /// Accumulator for the number of aggregate or window expressions for the statement.
    aggregate_count: usize,

+    /// Accumulator for the number of window expressions for the statement.
+    window_count: usize,
+
    /// Accumulator for the number of selector expressions for the statement.
    selector_count: usize,
 }
@ -942,7 +1004,7 @@ impl FieldChecker {

        // Validate we are using a selector or raw query if non-aggregate fields are projected.
        if self.has_non_aggregate_fields {
-            if self.aggregate_count > 0 {
+            if self.window_aggregate_count() > 0 {
                return error::query("mixing aggregate and non-aggregate columns is not supported");
            } else if self.selector_count > 1 {
                return error::query(
@ -954,26 +1016,37 @@ impl FieldChecker {
        // By this point the statement is valid, so lets
        // determine the projection type

-        if self.has_top_bottom {
-            Ok(ProjectionType::TopBottomSelector)
+        Ok(if self.has_top_bottom {
+            ProjectionType::TopBottomSelector
        } else if self.has_group_by_time {
-            Ok(ProjectionType::Aggregate)
+            if self.window_count > 0 {
+                ProjectionType::WindowAggregate
+            } else {
+                ProjectionType::Aggregate
+            }
        } else if self.has_distinct {
-            Ok(ProjectionType::RawDistinct)
+            ProjectionType::RawDistinct
        } else if self.selector_count == 1 && self.aggregate_count == 0 {
-            Ok(ProjectionType::Selector {
+            ProjectionType::Selector {
                has_fields: self.has_non_aggregate_fields,
-            })
+            }
        } else if self.selector_count > 1 || self.aggregate_count > 0 {
-            Ok(ProjectionType::Aggregate)
+            ProjectionType::Aggregate
+        } else if self.window_count > 0 {
+            ProjectionType::Window
        } else {
-            Ok(ProjectionType::Raw)
-        }
+            ProjectionType::Raw
+        })
    }

    /// The total number of functions observed.
    fn function_count(&self) -> usize {
-        self.aggregate_count + self.selector_count
+        self.window_aggregate_count() + self.selector_count
+    }
+
+    /// The total number of window and aggregate functions observed.
+    fn window_aggregate_count(&self) -> usize {
+        self.aggregate_count + self.window_count
    }
 }

@ -1195,9 +1268,12 @@ impl FieldChecker {
    }

    fn check_derivative(&mut self, name: &str, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();

        check_exp_args!(name, 1, 2, args);
+
+        set_extra_intervals!(self, 1);
+
        match args.get(1) {
            Some(Expr::Literal(Literal::Duration(d))) if **d <= 0 => {
                return error::query(format!("duration argument must be positive, got {d}"))
@ -1214,9 +1290,11 @@ impl FieldChecker {
    }

    fn check_elapsed(&mut self, name: &str, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();
        check_exp_args!(name, 1, 2, args);

+        set_extra_intervals!(self, 1);
+
        match args.get(1) {
            Some(Expr::Literal(Literal::Duration(d))) if **d <= 0 => {
                return error::query(format!("duration argument must be positive, got {d}"))
@ -1233,9 +1311,11 @@ impl FieldChecker {
    }

    fn check_difference(&mut self, name: &str, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();
        check_exp_args!(name, 1, args);

+        set_extra_intervals!(self, 1);
+
        self.check_nested_symbol(name, &args[0])
    }

@ -1243,11 +1323,13 @@ impl FieldChecker {
        self.inc_aggregate_count();
        check_exp_args!("cumulative_sum", 1, args);

+        set_extra_intervals!(self, 1);
+
        self.check_nested_symbol("cumulative_sum", &args[0])
    }

    fn check_moving_average(&mut self, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();
        check_exp_args!("moving_average", 2, args);

        let v = lit_integer!("moving_average", args, 1);
@ -1257,11 +1339,13 @@ impl FieldChecker {
            ));
        }

+        set_extra_intervals!(self, v);
+
        self.check_nested_symbol("moving_average", &args[0])
    }

    fn check_exponential_moving_average(&mut self, name: &str, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();
        check_exp_args!(name, 2, 4, args);

        let v = lit_integer!(name, args, 1);
@ -1269,6 +1353,8 @@ impl FieldChecker {
            return error::query(format!("{name} period must be greater than 1, got {v}"));
        }

+        set_extra_intervals!(self, v);
+
        if let Some(v) = lit_integer!(name, args, 2?) {
            match (v, name) {
                (v, "triple_exponential_derivative") if v < 1 && v != -1 => {
@ -1299,7 +1385,7 @@ impl FieldChecker {
    }

    fn check_kaufmans(&mut self, name: &str, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();
        check_exp_args!(name, 2, 3, args);

        let v = lit_integer!(name, args, 1);
@ -1307,6 +1393,8 @@ impl FieldChecker {
            return error::query(format!("{name} period must be greater than 1, got {v}"));
        }

+        set_extra_intervals!(self, v);
+
        if let Some(v) = lit_integer!(name, args, 2?) {
            if v < 0 && v != -1 {
                return error::query(format!(
@ -1319,7 +1407,7 @@ impl FieldChecker {
    }

    fn check_chande_momentum_oscillator(&mut self, name: &str, args: &[Expr]) -> Result<()> {
-        self.inc_aggregate_count();
+        self.inc_window_count();
        check_exp_args!(name, 2, 4, args);

        let v = lit_integer!(name, args, 1);
@ -1327,6 +1415,8 @@ impl FieldChecker {
            return error::query(format!("{name} period must be greater than 1, got {v}"));
        }

+        set_extra_intervals!(self, v);
+
        if let Some(v) = lit_integer!(name, args, 2?) {
            if v < 0 && v != -1 {
                return error::query(format!(
@ -1401,11 +1491,16 @@ impl FieldChecker {
        }
    }

-    /// Increments the function call count
+    /// Increments the aggregate function call count
    fn inc_aggregate_count(&mut self) {
        self.aggregate_count += 1
    }

+    /// Increments the window function call count
+    fn inc_window_count(&mut self) {
+        self.window_count += 1
+    }
+
    fn inc_selector_count(&mut self) {
        self.selector_count += 1
    }
@ -1453,6 +1548,10 @@ pub(crate) enum ProjectionType {
    /// A query that projects one or more aggregate functions or
    /// two or more selector functions.
    Aggregate,
+    /// A query that projects one or more window functions.
+    Window,
+    /// A query that projects a combination of window and nested aggregate functions.
+    WindowAggregate,
    /// A query that projects a single selector function,
    /// such as `last` or `first`.
    Selector {
@ -1468,7 +1567,11 @@ pub(crate) enum ProjectionType {
 #[derive(Default, Debug, Copy, Clone)]
 struct SelectStatementInfo {
    /// Identifies the projection type for the `SELECT` query.
-    pub projection_type: ProjectionType,
+    projection_type: ProjectionType,
+    /// Copied from [extra_intervals](FieldChecker::extra_intervals)
+    ///
+    /// [See also](Select::extra_intervals).
+    extra_intervals: usize,
 }

 /// Gather information about the semantics of a [`SelectStatement`] and verify
@ -1518,8 +1621,14 @@ fn select_statement_info(
    };

    let projection_type = fc.check_fields(fields, fill)?;
+    let FieldChecker {
+        extra_intervals, ..
+    } = fc;

-    Ok(SelectStatementInfo { projection_type })
+    Ok(SelectStatementInfo {
+        projection_type,
+        extra_intervals,
+    })
 }

 #[cfg(test)]
@ -1591,6 +1700,22 @@ mod test {
            ProjectionType::Selector { has_fields: false }
        );

+        // updates extra_intervals
+        let info = select_statement_info(&parse_select("SELECT difference(foo) FROM cpu")).unwrap();
+        assert_matches!(info.projection_type, ProjectionType::Window);
+        assert_matches!(info.extra_intervals, 1);
+        // derives extra intervals from the window function
+        let info =
+            select_statement_info(&parse_select("SELECT moving_average(foo, 5) FROM cpu")).unwrap();
+        assert_matches!(info.projection_type, ProjectionType::Window);
+        assert_matches!(info.extra_intervals, 5);
+        // uses the maximum extra intervals
+        let info = select_statement_info(&parse_select(
+            "SELECT difference(foo), moving_average(foo, 4) FROM cpu",
+        ))
+        .unwrap();
+        assert_matches!(info.extra_intervals, 4);
+
        let info = select_statement_info(&parse_select("SELECT last(foo), bar FROM cpu")).unwrap();
        assert_matches!(
            info.projection_type,
@ -1610,6 +1735,12 @@ mod test {
        let info = select_statement_info(&parse_select("SELECT count(foo) FROM cpu")).unwrap();
        assert_matches!(info.projection_type, ProjectionType::Aggregate);

+        let info = select_statement_info(&parse_select(
+            "SELECT difference(count(foo)) FROM cpu GROUP BY TIME(10s)",
+        ))
+        .unwrap();
+        assert_matches!(info.projection_type, ProjectionType::WindowAggregate);
+
        let info = select_statement_info(&parse_select("SELECT top(foo, 3) FROM cpu")).unwrap();
        assert_matches!(info.projection_type, ProjectionType::TopBottomSelector);
    }
@ -2216,6 +2347,26 @@ mod test {
                stmt.to_string(),
                "SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu::tag, host::tag, region::tag"
            );
+
+            //
+            // TIME
+            //
+
+            // Explicitly adds an upper bound for the time-range for aggregate queries
+            let stmt = parse_select("SELECT mean(usage_idle) FROM cpu WHERE time >= '2022-04-09T12:13:14Z' GROUP BY TIME(30s)");
+            let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, mean(usage_idle::float) AS mean FROM cpu WHERE time >= 1649506394000000000 AND time <= 1672531200000000000 GROUP BY TIME(30s)"
+            );
+
+            // Does not add an upper bound time range if already specified
+            let stmt = parse_select("SELECT mean(usage_idle) FROM cpu WHERE time >= '2022-04-09T12:13:14Z' AND time < '2022-04-10T12:00:00Z' GROUP BY TIME(30s)");
+            let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
+            assert_eq!(
+                stmt.to_string(),
+                "SELECT time::timestamp AS time, mean(usage_idle::float) AS mean FROM cpu WHERE time >= 1649506394000000000 AND time <= 1649591999999999999 GROUP BY TIME(30s)"
+            );
        }

        /// Uncategorized fallible cases
--- a/ioxd_compactor/src/lib.rs
+++ b/ioxd_compactor/src/lib.rs
@ -225,6 +225,7 @@ pub async fn create_compactor_server_type(
        partition_timeout: Duration::from_secs(compactor_config.partition_timeout_secs),
        partitions_source,
        shadow_mode: compactor_config.shadow_mode,
+        enable_scratchpad: compactor_config.enable_scratchpad,
        ignore_partition_skip_marker: compactor_config.ignore_partition_skip_marker,
        shard_config,
        min_num_l1_files_to_compact: compactor_config.min_num_l1_files_to_compact,
--- a/parquet_file/src/chunk.rs
+++ b/parquet_file/src/chunk.rs
@ -83,7 +83,7 @@ impl ParquetChunk {
    ///
    /// See [`ParquetExecInput`] for more information.
    ///
-    /// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
+    /// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
    pub fn parquet_exec_input(&self) -> ParquetExecInput {
        let path: ParquetFilePath = self.parquet_file.as_ref().into();
        self.store.parquet_exec_input(&path, self.file_size_bytes())
--- a/parquet_file/src/storage.rs
+++ b/parquet_file/src/storage.rs
@ -13,13 +13,14 @@ use arrow::{
 use bytes::Bytes;
 use data_types::PartitionId;
 use datafusion::{
-    datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl},
+    datasource::{
+        listing::PartitionedFile,
+        object_store::ObjectStoreUrl,
+        physical_plan::{FileScanConfig, ParquetExec},
+    },
    error::DataFusionError,
    execution::memory_pool::MemoryPool,
-    physical_plan::{
-        file_format::{FileScanConfig, ParquetExec},
-        ExecutionPlan, SendableRecordBatchStream, Statistics,
-    },
+    physical_plan::{ExecutionPlan, SendableRecordBatchStream, Statistics},
    prelude::SessionContext,
 };
 use datafusion_util::config::{iox_session_config, register_iox_object_store};
@ -93,7 +94,7 @@ impl std::fmt::Display for StorageId {
 /// The files shall be grouped by [`object_store_url`](Self::object_store_url). For each each object store, you shall
 /// create one [`ParquetExec`] and put each file into its own "file group".
 ///
-/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
+/// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
 #[derive(Debug, Clone)]
 pub struct ParquetExecInput {
    /// Store where the file is located.
@ -286,7 +287,7 @@ impl ParquetStorage {
    ///
    /// See [`ParquetExecInput`] for more information.
    ///
-    /// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
+    /// [`ParquetExec`]: datafusion::datasource::physical_plan::ParquetExec
    pub fn parquet_exec_input(&self, path: &ParquetFilePath, file_size: usize) -> ParquetExecInput {
        ParquetExecInput {
            object_store_url: ObjectStoreUrl::parse(format!("iox://{}/", self.id))
--- a/parquet_to_line_protocol/src/lib.rs
+++ b/parquet_to_line_protocol/src/lib.rs
@ -23,16 +23,13 @@ use datafusion::{
        file_format::{parquet::ParquetFormat, FileFormat},
        listing::PartitionedFile,
        object_store::ObjectStoreUrl,
+        physical_plan::{FileScanConfig, ParquetExec},
    },
    execution::{
        context::{SessionState, TaskContext},
        runtime_env::RuntimeEnv,
    },
-    physical_plan::{
-        execute_stream,
-        file_format::{FileScanConfig, ParquetExec},
-        SendableRecordBatchStream, Statistics,
-    },
+    physical_plan::{execute_stream, SendableRecordBatchStream, Statistics},
    prelude::SessionContext,
 };
 use datafusion_util::config::{iox_session_config, register_iox_object_store};
--- a/querier/src/cache/partition.rs
+++ b/querier/src/cache/partition.rs
@ -11,7 +11,11 @@ use cache_system::{
    loader::{metrics::MetricsLoader, FunctionLoader},
    resource_consumption::FunctionEstimator,
 };
-use data_types::{ColumnId, PartitionId};
+use data_types::{
+    partition_template::{build_column_values, ColumnValue},
+    ColumnId, Partition, PartitionId,
+};
+use datafusion::scalar::ScalarValue;
 use iox_catalog::interface::Catalog;
 use iox_time::TimeProvider;
 use schema::sort::SortKey;
@ -70,11 +74,7 @@ impl PartitionCache {
                        .await
                        .expect("retry forever")?;

-                    let sort_key = partition.sort_key().map(|sort_key| {
-                        Arc::new(PartitionSortKey::new(sort_key, &extra.column_id_map_rev))
-                    });
-
-                    Some(CachedPartition { sort_key })
+                    Some(CachedPartition::new(partition, &extra))
                }
            });
        let loader = Arc::new(MetricsLoader::new(
@ -144,14 +144,118 @@ impl PartitionCache {
            .await
            .and_then(|p| p.sort_key)
    }
+
+    /// Get known column ranges.
+    #[allow(dead_code)]
+    pub async fn column_ranges(
+        &self,
+        cached_table: Arc<CachedTable>,
+        partition_id: PartitionId,
+        span: Option<Span>,
+    ) -> Option<ColumnRanges> {
+        self.cache
+            .get(partition_id, (cached_table, span))
+            .await
+            .map(|p| p.column_ranges)
+    }
 }

+/// Represent known min/max values for a specific column.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ColumnRange {
+    pub min_value: Arc<ScalarValue>,
+    pub max_value: Arc<ScalarValue>,
+}
+
+/// Represents the known min/max values for a subset (not all) of the columns in a partition.
+///
+/// The values may not actually in any row.
+///
+/// These ranges apply to ALL rows (esp. in ALL files and ingester chunks) within in given partition.
+pub type ColumnRanges = Arc<HashMap<Arc<str>, ColumnRange>>;
+
 #[derive(Debug, Clone)]
 struct CachedPartition {
    sort_key: Option<Arc<PartitionSortKey>>,
+    column_ranges: ColumnRanges,
 }

 impl CachedPartition {
+    fn new(partition: Partition, table: &CachedTable) -> Self {
+        let sort_key = partition
+            .sort_key()
+            .map(|sort_key| Arc::new(PartitionSortKey::new(sort_key, &table.column_id_map_rev)));
+
+        let mut column_ranges =
+            build_column_values(&table.partition_template, partition.partition_key.inner())
+                .filter_map(|(col, val)| {
+                    // resolve column name to already existing Arc for cheaper storage
+                    let col = Arc::clone(table.column_id_map_rev.get_key_value(col)?.0);
+
+                    let range = match val {
+                        ColumnValue::Identity(s) => {
+                            let s = Arc::new(ScalarValue::from(s.as_ref()));
+                            ColumnRange {
+                                min_value: Arc::clone(&s),
+                                max_value: s,
+                            }
+                        }
+                        ColumnValue::Prefix(p) => {
+                            if p.is_empty() {
+                                // full range => value is useless
+                                return None;
+                            }
+
+                            // If the partition only has a prefix of the tag value (it was truncated) then form a conservative
+                            // range:
+                            //
+                            //
+                            // # Minimum
+                            // Use the prefix itself.
+                            //
+                            // Note that the minimum is inclusive.
+                            //
+                            // All values in the partition are either:
+                            // - identical to the prefix, in which case they are included by the inclusive minimum
+                            // - have the form `"<prefix><s>"`, and it holds that `"<prefix><s>" > "<prefix>"` for all
+                            //   strings `"<s>"`.
+                            //
+                            //
+                            // # Maximum
+                            // Use `"<prefix_excluding_last_char><char::max>"`.
+                            //
+                            // Note that the maximum is inclusive.
+                            //
+                            // All strings in this partition must be smaller than this constructed maximum, because
+                            // string comparison is front-to-back and the `"<prefix_excluding_last_char><char::max>" > "<prefix>"`.
+
+                            let min_value = Arc::new(ScalarValue::from(p.as_ref()));
+
+                            let mut chars = p.as_ref().chars().collect::<Vec<_>>();
+                            *chars.last_mut().expect("checked that prefix is not empty") =
+                                std::char::MAX;
+                            let max_value = Arc::new(ScalarValue::from(
+                                chars.into_iter().collect::<String>().as_str(),
+                            ));
+
+                            ColumnRange {
+                                min_value,
+                                max_value,
+                            }
+                        }
+                    };
+
+                    Some((col, range))
+                })
+                .collect::<HashMap<_, _>>();
+        column_ranges.shrink_to_fit();
+
+        Self {
+            sort_key,
+            column_ranges: Arc::new(column_ranges),
+        }
+    }
+
    /// RAM-bytes EXCLUDING `self`.
    fn size(&self) -> usize {
        // Arc content
@ -159,6 +263,13 @@ impl CachedPartition {
            .as_ref()
            .map(|sk| sk.size())
            .unwrap_or_default()
+            + std::mem::size_of::<HashMap<Arc<str>, ColumnRange>>()
+            + (self.column_ranges.capacity() * std::mem::size_of::<(Arc<str>, ColumnRange)>())
+            + self
+                .column_ranges
+                .iter()
+                .map(|(col, range)| col.len() + range.min_value.size() + range.max_value.size())
+                .sum::<usize>()
    }
 }

@ -206,6 +317,9 @@ mod tests {
    use super::*;
    use crate::cache::{ram::test_util::test_ram_pool, test_util::assert_histogram_metric_count};
    use data_types::{partition_template::TablePartitionTemplateOverride, ColumnType};
+    use generated_types::influxdata::iox::partition_template::v1::{
+        template_part::Part, PartitionTemplate, TemplatePart,
+    };
    use iox_tests::TestCatalog;
    use schema::{Schema, SchemaBuilder};

@ -294,6 +408,198 @@ mod tests {
        }
    }

+    #[tokio::test]
+    async fn test_column_ranges() {
+        let catalog = TestCatalog::new();
+
+        let ns = catalog.create_namespace_1hr_retention("ns").await;
+        let t = ns
+            .create_table_with_partition_template(
+                "table",
+                Some(PartitionTemplate {
+                    parts: vec![
+                        TemplatePart {
+                            part: Some(Part::TagValue(String::from("tag2"))),
+                        },
+                        TemplatePart {
+                            part: Some(Part::TagValue(String::from("tag1"))),
+                        },
+                    ],
+                }),
+            )
+            .await;
+        let c1 = t.create_column("tag1", ColumnType::Tag).await;
+        let c2 = t.create_column("tag2", ColumnType::Tag).await;
+        let c3 = t.create_column("tag3", ColumnType::Tag).await;
+        let c4 = t.create_column("time", ColumnType::Time).await;
+
+        // See `data_types::partition_template` for the template language.
+        // Two normal values.
+        let p1 = t.create_partition("v1|v2").await.partition.clone();
+        // 2nd part is NULL
+        let p2 = t.create_partition("v1|!").await.partition.clone();
+        // 2nd part is empty
+        let p3 = t.create_partition("v1|^").await.partition.clone();
+        // 2nd part is truncated (i.e. the original value was longer)
+        let p4 = t.create_partition("v1|v2#").await.partition.clone();
+        // 2nd part is truncated to empty string
+        let p5 = t.create_partition("v1|#").await.partition.clone();
+        let cached_table = Arc::new(CachedTable {
+            id: t.table.id,
+            schema: schema(),
+            column_id_map: HashMap::from([
+                (c1.column.id, Arc::from(c1.column.name.clone())),
+                (c2.column.id, Arc::from(c2.column.name.clone())),
+                (c3.column.id, Arc::from(c3.column.name.clone())),
+                (c4.column.id, Arc::from(c4.column.name.clone())),
+            ]),
+            column_id_map_rev: HashMap::from([
+                (Arc::from(c1.column.name.clone()), c1.column.id),
+                (Arc::from(c2.column.name.clone()), c2.column.id),
+                (Arc::from(c3.column.name.clone()), c3.column.id),
+                (Arc::from(c4.column.name.clone()), c4.column.id),
+            ]),
+            primary_key_column_ids: [c1.column.id, c2.column.id, c3.column.id, c4.column.id].into(),
+            partition_template: t.table.partition_template.clone(),
+        });
+
+        let cache = PartitionCache::new(
+            catalog.catalog(),
+            BackoffConfig::default(),
+            catalog.time_provider(),
+            &catalog.metric_registry(),
+            test_ram_pool(),
+            true,
+        );
+
+        let ranges1a = cache
+            .column_ranges(Arc::clone(&cached_table), p1.id, None)
+            .await
+            .unwrap();
+        assert_eq!(
+            ranges1a.as_ref(),
+            &HashMap::from([
+                (
+                    Arc::from("tag1"),
+                    ColumnRange {
+                        min_value: Arc::new(ScalarValue::from("v2")),
+                        max_value: Arc::new(ScalarValue::from("v2"))
+                    }
+                ),
+                (
+                    Arc::from("tag2"),
+                    ColumnRange {
+                        min_value: Arc::new(ScalarValue::from("v1")),
+                        max_value: Arc::new(ScalarValue::from("v1"))
+                    }
+                ),
+            ]),
+        );
+        assert!(Arc::ptr_eq(
+            &ranges1a.get("tag1").unwrap().min_value,
+            &ranges1a.get("tag1").unwrap().max_value,
+        ));
+        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);
+
+        let ranges2 = cache
+            .column_ranges(Arc::clone(&cached_table), p2.id, None)
+            .await
+            .unwrap();
+        assert_eq!(
+            ranges2.as_ref(),
+            &HashMap::from([(
+                Arc::from("tag2"),
+                ColumnRange {
+                    min_value: Arc::new(ScalarValue::from("v1")),
+                    max_value: Arc::new(ScalarValue::from("v1"))
+                }
+            ),]),
+        );
+        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);
+
+        let ranges3 = cache
+            .column_ranges(Arc::clone(&cached_table), p3.id, None)
+            .await
+            .unwrap();
+        assert_eq!(
+            ranges3.as_ref(),
+            &HashMap::from([
+                (
+                    Arc::from("tag1"),
+                    ColumnRange {
+                        min_value: Arc::new(ScalarValue::from("")),
+                        max_value: Arc::new(ScalarValue::from(""))
+                    }
+                ),
+                (
+                    Arc::from("tag2"),
+                    ColumnRange {
+                        min_value: Arc::new(ScalarValue::from("v1")),
+                        max_value: Arc::new(ScalarValue::from("v1"))
+                    }
+                ),
+            ]),
+        );
+        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 3);
+
+        let ranges4 = cache
+            .column_ranges(Arc::clone(&cached_table), p4.id, None)
+            .await
+            .unwrap();
+        assert_eq!(
+            ranges4.as_ref(),
+            &HashMap::from([
+                (
+                    Arc::from("tag1"),
+                    ColumnRange {
+                        min_value: Arc::new(ScalarValue::from("v2")),
+                        max_value: Arc::new(ScalarValue::from("v\u{10FFFF}"))
+                    }
+                ),
+                (
+                    Arc::from("tag2"),
+                    ColumnRange {
+                        min_value: Arc::new(ScalarValue::from("v1")),
+                        max_value: Arc::new(ScalarValue::from("v1"))
+                    }
+                ),
+            ]),
+        );
+        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 4);
+
+        let ranges5 = cache
+            .column_ranges(Arc::clone(&cached_table), p5.id, None)
+            .await
+            .unwrap();
+        assert_eq!(
+            ranges5.as_ref(),
+            &HashMap::from([(
+                Arc::from("tag2"),
+                ColumnRange {
+                    min_value: Arc::new(ScalarValue::from("v1")),
+                    max_value: Arc::new(ScalarValue::from("v1"))
+                }
+            ),]),
+        );
+        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+
+        let ranges1b = cache
+            .column_ranges(Arc::clone(&cached_table), p1.id, None)
+            .await
+            .unwrap();
+        assert!(Arc::ptr_eq(&ranges1a, &ranges1b));
+        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 5);
+
+        // non-existing partition
+        for _ in 0..2 {
+            let res = cache
+                .column_ranges(Arc::clone(&cached_table), PartitionId::new(i64::MAX), None)
+                .await;
+            assert_eq!(res, None);
+            assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 6);
+        }
+    }
+
    #[tokio::test]
    async fn test_cache_sharing() {
        let catalog = TestCatalog::new();
@ -336,13 +642,22 @@ mod tests {
        cache
            .sort_key(Arc::clone(&cached_table), p3.id, &Vec::new(), None)
            .await;
+        cache
+            .column_ranges(Arc::clone(&cached_table), p3.id, None)
+            .await;
        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 1);

        cache
            .sort_key(Arc::clone(&cached_table), p2.id, &Vec::new(), None)
            .await;
+        cache
+            .column_ranges(Arc::clone(&cached_table), p2.id, None)
+            .await;
        assert_histogram_metric_count(&catalog.metric_registry, "partition_get_by_id", 2);

+        cache
+            .column_ranges(Arc::clone(&cached_table), p1.id, None)
+            .await;
        cache
            .sort_key(Arc::clone(&cached_table), p1.id, &Vec::new(), None)
            .await;
--- a/querier/src/namespace/query_access.rs
+++ b/querier/src/namespace/query_access.rs
@ -9,7 +9,7 @@ use crate::{
 use async_trait::async_trait;
 use data_types::NamespaceId;
 use datafusion::{
-    catalog::{catalog::CatalogProvider, schema::SchemaProvider},
+    catalog::{schema::SchemaProvider, CatalogProvider},
    datasource::TableProvider,
    error::DataFusionError,
 };
--- a/query_functions/src/gapfill.rs
+++ b/query_functions/src/gapfill.rs
@ -24,8 +24,8 @@ use arrow::datatypes::{DataType, TimeUnit};
 use datafusion::{
    error::DataFusionError,
    logical_expr::{
-        function, BuiltinScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation,
-        ScalarUDF, Signature, TypeSignature, Volatility,
+        BuiltinScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF,
+        Signature, TypeSignature, Volatility,
    },
 };
 use once_cell::sync::Lazy;
@ -41,7 +41,7 @@ pub const DATE_BIN_GAPFILL_UDF_NAME: &str = "date_bin_gapfill";
 pub(crate) static DATE_BIN_GAPFILL: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
    // DATE_BIN_GAPFILL should have the same signature as DATE_BIN,
    // so that just adding _GAPFILL can turn a query into a gap-filling query.
-    let mut signatures = function::signature(&BuiltinScalarFunction::DateBin);
+    let mut signatures = BuiltinScalarFunction::DateBin.signature();
    // We don't want this to be optimized away before we can give a helpful error message
    signatures.volatility = Volatility::Volatile;

--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@ -30,9 +30,9 @@ bytes = { version = "1" }
 chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
 crossbeam-utils = { version = "0.8" }
 crypto-common = { version = "0.1", default-features = false, features = ["std"] }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "91e75d7e6303c1a7331e8e90eaad9b095ace929b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514" }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
+datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "84e49771b7403b3d313d8493b61d2d58dcdd7514", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
 digest = { version = "0.10", features = ["mac", "std"] }
 either = { version = "1" }
 fixedbitset = { version = "0.4" }
@ -46,7 +46,7 @@ futures-sink = { version = "0.3" }
 futures-task = { version = "0.3", default-features = false, features = ["std"] }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
-hashbrown = { version = "0.13", features = ["raw"] }
+hashbrown = { version = "0.14", features = ["raw"] }
 indexmap = { version = "1", default-features = false, features = ["std"] }
 itertools = { version = "0.10" }
 libc = { version = "0.2", features = ["extra_traits"] }
@ -120,7 +120,7 @@ futures-sink = { version = "0.3" }
 futures-task = { version = "0.3", default-features = false, features = ["std"] }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
-hashbrown = { version = "0.13", features = ["raw"] }
+hashbrown = { version = "0.14", features = ["raw"] }
 heck = { version = "0.4", features = ["unicode"] }
 indexmap = { version = "1", default-features = false, features = ["std"] }
 itertools = { version = "0.10" }
@ -205,15 +205,13 @@ rustls = { version = "0.21", features = ["dangerous_configuration"] }
 scopeguard = { version = "1" }
 webpki = { version = "0.22", default-features = false, features = ["std"] }
 winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
-windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
-windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
+windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }

 [target.x86_64-pc-windows-msvc.build-dependencies]
 once_cell = { version = "1", default-features = false, features = ["unstable"] }
 scopeguard = { version = "1" }
 webpki = { version = "0.22", default-features = false, features = ["std"] }
 winapi = { version = "0.3", default-features = false, features = ["basetsd", "consoleapi", "errhandlingapi", "fileapi", "handleapi", "impl-debug", "impl-default", "knownfolders", "minwinbase", "minwindef", "ntsecapi", "ntstatus", "objbase", "processenv", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "wincon", "winerror", "winnt", "winreg", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
-windows-sys-53888c27b7ba5cf4 = { package = "windows-sys", version = "0.45", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_LibraryLoader", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse"] }
-windows-sys-c8eced492e86ede7 = { package = "windows-sys", version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
+windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }

 ### END HAKARI SECTION