Merge branch 'main' into 7899/wal-disk-metrics
commit
36e7f53f9b
|
@ -3,9 +3,6 @@
|
|||
rustflags = [
|
||||
"--cfg", "tokio_unstable",
|
||||
]
|
||||
rustdocflags = [
|
||||
"--cfg", "tokio_unstable",
|
||||
]
|
||||
|
||||
# sparse protocol opt-in
|
||||
# See https://blog.rust-lang.org/2023/03/09/Rust-1.68.0.html#cargos-sparse-protocol
|
||||
|
|
|
@ -190,7 +190,7 @@ dependencies = [
|
|||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"num",
|
||||
]
|
||||
|
||||
|
@ -205,7 +205,7 @@ dependencies = [
|
|||
"arrow-schema",
|
||||
"chrono",
|
||||
"chrono-tz",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"hashbrown 0.14.0",
|
||||
"num",
|
||||
]
|
||||
|
@ -215,7 +215,7 @@ name = "arrow-buffer"
|
|||
version = "42.0.0"
|
||||
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
|
||||
dependencies = [
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"num",
|
||||
]
|
||||
|
||||
|
@ -231,7 +231,7 @@ dependencies = [
|
|||
"arrow-select",
|
||||
"chrono",
|
||||
"comfy-table",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"lexical-core",
|
||||
"num",
|
||||
]
|
||||
|
@ -261,7 +261,7 @@ source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#
|
|||
dependencies = [
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"num",
|
||||
]
|
||||
|
||||
|
@ -315,7 +315,7 @@ dependencies = [
|
|||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"indexmap 1.9.3",
|
||||
"lexical-core",
|
||||
"num",
|
||||
|
@ -333,7 +333,7 @@ dependencies = [
|
|||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"num",
|
||||
]
|
||||
|
||||
|
@ -347,7 +347,7 @@ dependencies = [
|
|||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
|
@ -480,18 +480,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.68"
|
||||
version = "0.1.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
|
||||
checksum = "79fa67157abdfd688a259b6648808757db9347af834624f27ec646da976aee5d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -621,9 +621,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
|||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.3.2"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dbe3c979c178231552ecba20214a8272df4e09f232a87aef4320cf06539aded"
|
||||
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
|
||||
|
||||
[[package]]
|
||||
name = "blake2"
|
||||
|
@ -841,9 +841,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.3.9"
|
||||
version = "4.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bba77a07e4489fb41bd90e8d4201c3eb246b3c2c9ea2ba0bddd6c1d1df87db7d"
|
||||
checksum = "384e169cc618c613d5e3ca6404dda77a8685a63e08660dcc64abaf7da7cb0c7a"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
|
@ -873,13 +873,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.3.9"
|
||||
version = "4.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c9b4a88bb4bc35d3d6f65a21b0f0bafe9c894fa00978de242c555ec28bea1c0"
|
||||
checksum = "ef137bbe35aab78bdb468ccfba75a5f4d8321ae011d34063770780545176af2d"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"bitflags 1.3.2",
|
||||
"clap_lex",
|
||||
"once_cell",
|
||||
"strsim",
|
||||
|
@ -894,7 +893,7 @@ dependencies = [
|
|||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -969,7 +968,6 @@ dependencies = [
|
|||
"object_store",
|
||||
"observability_deps",
|
||||
"parquet_file",
|
||||
"predicate",
|
||||
"rand",
|
||||
"schema",
|
||||
"test_helpers",
|
||||
|
@ -1059,9 +1057,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "console-subscriber"
|
||||
version = "0.1.9"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57ab2224a0311582eb03adba4caaf18644f7b1f10a760803a803b9b605187fc7"
|
||||
checksum = "d4cf42660ac07fcebed809cfe561dd8730bcd35b075215e6479c516bcd0d11cb"
|
||||
dependencies = [
|
||||
"console-api",
|
||||
"crossbeam-channel",
|
||||
|
@ -1118,9 +1116,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
|
|||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c76f98bdfc7f66172e6c7065f981ebb576ffc903fe4c0561d9f0c2509226dc6"
|
||||
checksum = "ee34052ee3d93d6d8f3e6f81d85c47921f6653a19a7b70e939e3e602d893a674"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
@ -1479,7 +1477,7 @@ dependencies = [
|
|||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-row",
|
||||
"half 2.2.1",
|
||||
"half 2.3.1",
|
||||
"hashbrown 0.14.0",
|
||||
"indexmap 1.9.3",
|
||||
"itertools 0.10.5",
|
||||
|
@ -1720,12 +1718,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "fd-lock"
|
||||
version = "3.0.12"
|
||||
version = "3.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ae6b3d9530211fb3b12a95374b8b0823be812f53d09e18c5675c0146b09642"
|
||||
checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"rustix",
|
||||
"rustix 0.38.2",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
@ -1899,7 +1897,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2062,9 +2060,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.3.19"
|
||||
version = "0.3.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782"
|
||||
checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
|
@ -2087,10 +2085,11 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
|
|||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "2.2.1"
|
||||
version = "2.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0"
|
||||
checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crunchy",
|
||||
"num-traits",
|
||||
]
|
||||
|
@ -2150,7 +2149,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "heappy"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/mkmik/heappy?rev=1d6ac77a4026fffce8680a7b31a9f6e9859b5e73#1d6ac77a4026fffce8680a7b31a9f6e9859b5e73"
|
||||
source = "git+https://github.com/mkmik/heappy?rev=1de977a241cdd768acc5b6c82c0728b30c7db7b4#1de977a241cdd768acc5b6c82c0728b30c7db7b4"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
|
@ -2173,9 +2172,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.1"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
|
||||
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
|
@ -2571,7 +2570,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"flate2",
|
||||
"hex",
|
||||
"integer-encoding",
|
||||
"integer-encoding 4.0.0",
|
||||
"observability_deps",
|
||||
"rand",
|
||||
"snafu",
|
||||
|
@ -2730,6 +2729,12 @@ version = "3.0.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
|
||||
|
||||
[[package]]
|
||||
name = "integer-encoding"
|
||||
version = "4.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "924df4f0e24e2e7f9cdd90babb0b96f93b20f3ecfa949ea9e6613756b8c8e1bf"
|
||||
|
||||
[[package]]
|
||||
name = "io-lifetimes"
|
||||
version = "1.0.11"
|
||||
|
@ -3099,19 +3104,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.7.2"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f"
|
||||
checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.7"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
|
||||
checksum = "24fddda5af7e54bf7da53067d6e802dbcc381d0a8eef629df528e3ebf68755cb"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"io-lifetimes",
|
||||
"rustix",
|
||||
"rustix 0.38.2",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
@ -3135,9 +3139,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.6"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
|
||||
checksum = "c0aa48fab2893d8a49caa94082ae8488f4e1050d73b367881dcd2198f4199fd8"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
|
@ -3262,6 +3266,12 @@ version = "0.3.8"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.10"
|
||||
|
@ -3846,7 +3856,7 @@ dependencies = [
|
|||
"libc",
|
||||
"redox_syscall 0.3.5",
|
||||
"smallvec",
|
||||
"windows-targets 0.48.0",
|
||||
"windows-targets 0.48.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3941,9 +3951,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.12"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79"
|
||||
checksum = "b4b27ab7be369122c218afc2079489cdcb4b517c0a3fc386ff11e1fedfcc2b35"
|
||||
|
||||
[[package]]
|
||||
name = "pbjson"
|
||||
|
@ -4027,7 +4037,7 @@ dependencies = [
|
|||
"pest_meta",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4053,18 +4063,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.1"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
|
||||
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.11.1"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770"
|
||||
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
|
@ -4072,9 +4082,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.11.1"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
|
||||
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand",
|
||||
|
@ -4091,29 +4101,29 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.1"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e138fdd8263907a2b0e1b4e80b7e58c721126479b6e6eedfb1b402acea7b9bd"
|
||||
checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.1"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d1fef411b303e3e12d534fb6e7852de82da56edd937d895125821fb7c09436c7"
|
||||
checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.9"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
|
||||
checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57"
|
||||
|
||||
[[package]]
|
||||
name = "pin-utils"
|
||||
|
@ -4129,9 +4139,9 @@ checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
|||
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
version = "0.11.1"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "196ded5d4be535690899a4631cc9f18cdc41b7ebf24a79400f46f48e49a11059"
|
||||
checksum = "6b90f8560ad8bd57b207b8293bc5226e48e89039a6e590c12a297d91b84c7e60"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"cfg-if",
|
||||
|
@ -4239,9 +4249,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.60"
|
||||
version = "1.0.63"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
@ -4428,9 +4438,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.28"
|
||||
version = "1.0.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
@ -4693,15 +4703,28 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.37.21"
|
||||
version = "0.37.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62f25693a73057a1b4cb56179dd3c7ea21a7c6c5ee7d85781f5749b46f34b79c"
|
||||
checksum = "8818fa822adcc98b18fedbb3632a6a33213c070556b5aa7c4c8cc21cff565c4c"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"errno",
|
||||
"io-lifetimes",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"linux-raw-sys 0.3.8",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aabcb0461ebd01d6b79945797c27f8529082226cb630a9865a71870ff63532a4"
|
||||
dependencies = [
|
||||
"bitflags 2.3.3",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.4.3",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
@ -4731,9 +4754,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustls-pemfile"
|
||||
version = "1.0.2"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
|
||||
checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
]
|
||||
|
@ -4760,7 +4783,7 @@ version = "12.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9"
|
||||
dependencies = [
|
||||
"bitflags 2.3.2",
|
||||
"bitflags 2.3.3",
|
||||
"cfg-if",
|
||||
"clipboard-win",
|
||||
"fd-lock",
|
||||
|
@ -4832,29 +4855,29 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.164"
|
||||
version = "1.0.166"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
|
||||
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.164"
|
||||
version = "1.0.166"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
|
||||
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.99"
|
||||
version = "1.0.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3"
|
||||
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -5423,7 +5446,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5434,9 +5457,9 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
|||
|
||||
[[package]]
|
||||
name = "symbolic-common"
|
||||
version = "10.2.1"
|
||||
version = "12.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b55cdc318ede251d0957f07afe5fed912119b8c1bc5a7804151826db999e737"
|
||||
checksum = "38f7afd8bcd36190409e6b71d89928f7f09d918a7aa3460d847bc49a538d672e"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"memmap2",
|
||||
|
@ -5446,9 +5469,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "symbolic-demangle"
|
||||
version = "10.2.1"
|
||||
version = "12.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79be897be8a483a81fff6a3a4e195b4ac838ef73ca42d348b3f722da9902e489"
|
||||
checksum = "ec64922563a36e3fe686b6d99f06f25dacad2a202ac7502ed642930a188fb20a"
|
||||
dependencies = [
|
||||
"cpp_demangle",
|
||||
"rustc-demangle",
|
||||
|
@ -5468,9 +5491,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.18"
|
||||
version = "2.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -5514,7 +5537,7 @@ dependencies = [
|
|||
"cfg-if",
|
||||
"fastrand",
|
||||
"redox_syscall 0.3.5",
|
||||
"rustix",
|
||||
"rustix 0.37.22",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
@ -5579,22 +5602,22 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.40"
|
||||
version = "1.0.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
|
||||
checksum = "c16a64ba9387ef3fdae4f9c1a7f07a0997fce91985c0336f1ddc1822b3b37802"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.40"
|
||||
version = "1.0.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
||||
checksum = "d14928354b01c4d6a4f0e549069adef399a284e7995c7ccca94e8a07a5346c59"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5623,7 +5646,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"integer-encoding",
|
||||
"integer-encoding 3.0.4",
|
||||
"log",
|
||||
"ordered-float 2.10.0",
|
||||
"threadpool",
|
||||
|
@ -5723,7 +5746,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5914,7 +5937,7 @@ version = "0.4.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8bd22a874a2d0b70452d5597b12c537331d49060824a95f49f108994f94aa4c"
|
||||
dependencies = [
|
||||
"bitflags 2.3.2",
|
||||
"bitflags 2.3.3",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
|
@ -6007,7 +6030,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -6319,7 +6342,7 @@ dependencies = [
|
|||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
|
@ -6353,7 +6376,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
@ -6428,9 +6451,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "whoami"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c70234412ca409cc04e864e89523cb0fc37f5e1344ebed5a3ebf4192b6b9f68"
|
||||
checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
|
@ -6473,7 +6496,7 @@ version = "0.48.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.0",
|
||||
"windows-targets 0.48.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -6491,7 +6514,7 @@ version = "0.48.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.0",
|
||||
"windows-targets 0.48.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -6511,9 +6534,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.0"
|
||||
version = "0.48.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
|
||||
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.48.0",
|
||||
"windows_aarch64_msvc 0.48.0",
|
||||
|
@ -6664,7 +6687,6 @@ dependencies = [
|
|||
"hashbrown 0.14.0",
|
||||
"heck",
|
||||
"indexmap 1.9.3",
|
||||
"io-lifetimes",
|
||||
"itertools 0.10.5",
|
||||
"libc",
|
||||
"lock_api",
|
||||
|
@ -6690,7 +6712,7 @@ dependencies = [
|
|||
"regex-syntax 0.7.2",
|
||||
"reqwest",
|
||||
"ring",
|
||||
"rustix",
|
||||
"rustix 0.38.2",
|
||||
"rustls 0.21.2",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
|
@ -6702,7 +6724,7 @@ dependencies = [
|
|||
"sqlx-core",
|
||||
"sqlx-macros",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.23",
|
||||
"thrift",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
|
|
|
@ -25,7 +25,7 @@ tonic = { workspace = true }
|
|||
[dev-dependencies]
|
||||
assert_matches = "1.5.0"
|
||||
parking_lot = "0.12.1"
|
||||
paste = "1.0.12"
|
||||
paste = "1.0.13"
|
||||
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
|
||||
tokio = "1.29.1"
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
backoff = { path = "../backoff" }
|
||||
futures = "0.3"
|
||||
iox_time = { path = "../iox_time" }
|
||||
|
|
|
@ -9,7 +9,7 @@ license.workspace = true
|
|||
[dependencies]
|
||||
http = "0.2.9"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tonic = { workspace = true }
|
||||
tower = "0.4"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -6,7 +6,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
backoff = { path = "../backoff" }
|
||||
bytes = "1.4"
|
||||
compactor_scheduler = { path = "../compactor_scheduler" }
|
||||
|
@ -21,7 +21,6 @@ metric = { path = "../metric" }
|
|||
object_store = { workspace = true }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
predicate = { path = "../predicate" }
|
||||
rand = "0.8.3"
|
||||
schema = { path = "../schema" }
|
||||
tokio = { version = "1", features = ["macros", "rt", "sync"] }
|
||||
|
|
|
@ -2,15 +2,10 @@
|
|||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use data_types::{ChunkId, ChunkOrder, PartitionId};
|
||||
use datafusion::{error::DataFusionError, physical_plan::Statistics};
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
util::create_basic_summary,
|
||||
QueryChunk, QueryChunkData,
|
||||
};
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use iox_query::{util::create_basic_summary, QueryChunk, QueryChunkData};
|
||||
use observability_deps::tracing::debug;
|
||||
use parquet_file::{chunk::ParquetChunk, storage::ParquetStorage};
|
||||
use predicate::Predicate;
|
||||
use schema::{merge::SchemaMerger, sort::SortKey, Schema};
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -96,20 +91,6 @@ impl QueryChunk for QueryableParquetChunk {
|
|||
false
|
||||
}
|
||||
|
||||
/// Return a set of Strings containing the distinct values in the
|
||||
/// specified columns. If the predicate can be evaluated entirely
|
||||
/// on the metadata of this Chunk. Returns `None` otherwise
|
||||
///
|
||||
/// The requested columns must all have String type.
|
||||
fn column_values(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn data(&self) -> QueryChunkData {
|
||||
QueryChunkData::Parquet(self.data.parquet_exec_input())
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
backoff = { path = "../backoff" }
|
||||
data_types = { path = "../data_types" }
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
|
|
|
@ -74,18 +74,21 @@ impl PartitionsSource for CatalogToCompactPartitionsSource {
|
|||
// we're going check the time range we'd like to query for against the end time of the last query.
|
||||
let mut last = self.last_maximum_time.lock().unwrap();
|
||||
|
||||
// if the last query ended further back in time than this query starts, we're about to skip something.
|
||||
if *last < minimum_time {
|
||||
if minimum_time.sub(*last) < self.min_threshold * 3 {
|
||||
// the end of the last query says we're skipping less than 3x our configured lookback, so
|
||||
// back up and query everything since the last query.
|
||||
minimum_time = *last;
|
||||
} else {
|
||||
// end of the last query says we're skiping a lot. We should limit how far we lookback to avoid
|
||||
// returning all partitions, so we'll just backup 3x the configured lookback.
|
||||
// this might skip something (until cold compaction), but we need a limit in how far we look back.
|
||||
minimum_time = self.time_provider.now() - self.min_threshold * 3;
|
||||
}
|
||||
// query for partitions with activity since the last query. We shouldn't query for a time range
|
||||
// we've already covered. So if the prior query was 2m ago, and the query covered 10m, ending at
|
||||
// the time of that query, we just need to query for activity in the last 2m. Asking for more than
|
||||
// that creates busy-work that will spam the catalog with more queries to determine no compaction
|
||||
// nneded. But we also don't want to query so far back in time that we get all partitions, so the
|
||||
// lookback is limited to 3x the configured threshold.
|
||||
if minimum_time < *last || minimum_time.sub(*last) < self.min_threshold * 3 {
|
||||
// the end of the last query is less than 3x our configured lookback, so we can query everything
|
||||
// since the last query.
|
||||
minimum_time = *last;
|
||||
} else {
|
||||
// end of the last query says we're skiping a lot. We should limit how far we lookback to avoid
|
||||
// returning all partitions, so we'll just backup 3x the configured lookback.
|
||||
// this might skip something (until cold compaction), but we need a limit in how far we look back.
|
||||
minimum_time = self.time_provider.now() - self.min_threshold * 3;
|
||||
}
|
||||
maximum_time = self.max_threshold.map(|max| self.time_provider.now() - max);
|
||||
|
||||
|
@ -113,6 +116,7 @@ mod tests {
|
|||
use data_types::Timestamp;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
use iox_tests::PartitionBuilder;
|
||||
use iox_time::MockProvider;
|
||||
|
||||
fn partition_ids(ids: &[i64]) -> Vec<PartitionId> {
|
||||
ids.iter().cloned().map(PartitionId::new).collect()
|
||||
|
@ -122,17 +126,18 @@ mod tests {
|
|||
catalog: Arc<MemCatalog>,
|
||||
min_threshold: Duration,
|
||||
max_threshold: Option<Duration>,
|
||||
second_query_delta: Duration, // time between first and second query
|
||||
first_expected_ids: &[i64], // expected values on first fetch, which does a 3x on min_threshold
|
||||
second_expected_ids: &[i64], // expected values on second fetch, which uses min_threshold unmodified
|
||||
) {
|
||||
let time_provider = catalog.time_provider();
|
||||
let time_provider = Arc::new(MockProvider::new(catalog.time_provider().now()));
|
||||
|
||||
let partitions_source = CatalogToCompactPartitionsSource::new(
|
||||
Default::default(),
|
||||
catalog,
|
||||
min_threshold,
|
||||
max_threshold,
|
||||
time_provider,
|
||||
Arc::<iox_time::MockProvider>::clone(&time_provider),
|
||||
);
|
||||
|
||||
let mut actual_partition_ids = partitions_source.fetch().await;
|
||||
|
@ -145,6 +150,7 @@ mod tests {
|
|||
max_threshold {max_threshold:?} failed (first fetch, 3x lookback)",
|
||||
);
|
||||
|
||||
time_provider.inc(second_query_delta);
|
||||
let mut actual_partition_ids = partitions_source.fetch().await;
|
||||
actual_partition_ids.sort();
|
||||
|
||||
|
@ -163,10 +169,15 @@ mod tests {
|
|||
|
||||
let time_three_hour_ago = Timestamp::from(time_provider.hours_ago(3));
|
||||
let time_six_hour_ago = Timestamp::from(time_provider.hours_ago(6));
|
||||
let time_one_min_future = Timestamp::from(time_provider.minutes_into_future(1));
|
||||
|
||||
for (id, time) in [(1, time_three_hour_ago), (2, time_six_hour_ago)]
|
||||
.iter()
|
||||
.cloned()
|
||||
for (id, time) in [
|
||||
(1, time_three_hour_ago),
|
||||
(2, time_six_hour_ago),
|
||||
(3, time_one_min_future),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
{
|
||||
let partition = PartitionBuilder::new(id as i64)
|
||||
.with_new_file_at(time)
|
||||
|
@ -175,13 +186,44 @@ mod tests {
|
|||
}
|
||||
|
||||
let one_minute = Duration::from_secs(60);
|
||||
fetch_test(Arc::clone(&catalog), one_minute, None, &[], &[]).await;
|
||||
let ten_minute = Duration::from_secs(60) * 10;
|
||||
|
||||
// the lack of end time means it gets the future file (3) in the first query, this is an
|
||||
// oddity of a test case that has files with a future timestamp (not a real world concern).
|
||||
// the second query 10m later with a cap of 3m lookback doesn't get it.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
one_minute,
|
||||
None,
|
||||
ten_minute,
|
||||
&[3],
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
|
||||
let four_hours = Duration::from_secs(60 * 60 * 4);
|
||||
fetch_test(Arc::clone(&catalog), four_hours, None, &[1, 2], &[1]).await;
|
||||
// again the future file is included in he first query, just an oddity of the test case.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
four_hours,
|
||||
None,
|
||||
ten_minute,
|
||||
&[1, 2, 3],
|
||||
&[3],
|
||||
)
|
||||
.await;
|
||||
|
||||
let seven_hours = Duration::from_secs(60 * 60 * 7);
|
||||
fetch_test(Arc::clone(&catalog), seven_hours, None, &[1, 2], &[1, 2]).await;
|
||||
// again the future file is included in he first query, just an oddity of the test case.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
seven_hours,
|
||||
None,
|
||||
ten_minute,
|
||||
&[1, 2, 3],
|
||||
&[3],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -192,11 +234,13 @@ mod tests {
|
|||
let time_now = Timestamp::from(time_provider.now());
|
||||
let time_three_hour_ago = Timestamp::from(time_provider.hours_ago(3));
|
||||
let time_six_hour_ago = Timestamp::from(time_provider.hours_ago(6));
|
||||
let time_one_min_future = Timestamp::from(time_provider.minutes_into_future(1));
|
||||
|
||||
for (id, time) in [
|
||||
(1, time_now),
|
||||
(2, time_three_hour_ago),
|
||||
(3, time_six_hour_ago),
|
||||
(4, time_one_min_future),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
|
@ -209,54 +253,80 @@ mod tests {
|
|||
|
||||
let one_minute = Duration::from_secs(60);
|
||||
let one_hour = Duration::from_secs(60 * 60);
|
||||
let two_hour = Duration::from_secs(60 * 60 * 2);
|
||||
let four_hours = Duration::from_secs(60 * 60 * 4);
|
||||
let seven_hours = Duration::from_secs(60 * 60 * 7);
|
||||
|
||||
// File 3 is all that falls within the 7-4h lookback window. With 1m to the next query,
|
||||
// nothing is found with windows advanced by 1m.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
seven_hours,
|
||||
Some(four_hours),
|
||||
one_minute,
|
||||
&[3],
|
||||
&[3],
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
|
||||
// With a 7-1h lookback window, files 2 and 3 are found. With 2h to the next query, the
|
||||
// window advances to find the two newer files.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
seven_hours,
|
||||
Some(one_hour),
|
||||
two_hour,
|
||||
&[2, 3],
|
||||
&[2, 3],
|
||||
&[1, 4],
|
||||
)
|
||||
.await;
|
||||
|
||||
// With a 7h-1m lookback window, files 2 and 3 are found. With 1m to the next query, the
|
||||
// window advances to find the one newer file.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
seven_hours,
|
||||
Some(one_minute),
|
||||
one_minute,
|
||||
&[2, 3],
|
||||
&[2, 3],
|
||||
&[1],
|
||||
)
|
||||
.await;
|
||||
|
||||
// With a 4h-1h lookback window, files 2 and 3 are found. With 1m to the next query, there's
|
||||
// nothing new in the next window.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
four_hours,
|
||||
Some(one_hour),
|
||||
one_minute,
|
||||
&[2, 3],
|
||||
&[2],
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
|
||||
// With a 4h-1m lookback window, files 2 and 3 are found. With 4h to the next query, the
|
||||
// remaining files are found.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
four_hours,
|
||||
Some(one_minute),
|
||||
four_hours,
|
||||
&[2, 3],
|
||||
&[2],
|
||||
&[1, 4],
|
||||
)
|
||||
.await;
|
||||
|
||||
fetch_test(Arc::clone(&catalog), one_hour, Some(one_minute), &[], &[]).await;
|
||||
// With a 1h-1m lookback window, nothing is found. In the second query 1m later, it finds
|
||||
// the file create 'now'.
|
||||
fetch_test(
|
||||
Arc::clone(&catalog),
|
||||
one_hour,
|
||||
Some(one_minute),
|
||||
one_minute,
|
||||
&[],
|
||||
&[1],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
backoff = { path = "../backoff" }
|
||||
compactor = { path = "../compactor" }
|
||||
compactor_scheduler = { path = "../compactor_scheduler" }
|
||||
|
|
|
@ -18,14 +18,14 @@ ordered-float = "3"
|
|||
schema = { path = "../schema" }
|
||||
sha2 = "0.10"
|
||||
sqlx = { version = "0.6", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
percent-encoding = "2.2.0"
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
assert_matches = "1"
|
||||
paste = "1.0.12"
|
||||
paste = "1.0.13"
|
||||
proptest = { version = "1.2.0", default-features = false }
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
hex = "0.4.2"
|
||||
|
|
|
@ -160,33 +160,32 @@ impl std::fmt::Display for TableId {
|
|||
}
|
||||
}
|
||||
|
||||
/// A sequence number from a `router::Shard` (kafka partition)
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
pub struct SequenceNumber(i64);
|
||||
/// A sequence number from an ingester
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct SequenceNumber(u64);
|
||||
|
||||
#[allow(missing_docs)]
|
||||
impl SequenceNumber {
|
||||
pub fn new(v: i64) -> Self {
|
||||
pub fn new(v: u64) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
pub fn get(&self) -> i64 {
|
||||
pub fn get(&self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<i64> for SequenceNumber {
|
||||
impl Add<u64> for SequenceNumber {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, other: i64) -> Self {
|
||||
fn add(self, other: u64) -> Self {
|
||||
Self(self.0 + other)
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<i64> for SequenceNumber {
|
||||
impl Sub<u64> for SequenceNumber {
|
||||
type Output = Self;
|
||||
|
||||
fn sub(self, other: i64) -> Self {
|
||||
fn sub(self, other: u64) -> Self {
|
||||
Self(self.0 - other)
|
||||
}
|
||||
}
|
||||
|
@ -614,7 +613,13 @@ impl ParquetFile {
|
|||
|
||||
/// Estimate the memory consumption of this object and its contents
|
||||
pub fn size(&self) -> usize {
|
||||
std::mem::size_of_val(self) + self.column_set.size()
|
||||
std::mem::size_of_val(self)
|
||||
+ self
|
||||
.partition_hash_id
|
||||
.as_ref()
|
||||
.map(|id| id.size() - std::mem::size_of_val(id))
|
||||
.unwrap_or_default()
|
||||
+ self.column_set.size()
|
||||
- std::mem::size_of_val(&self.column_set)
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,18 @@ pub enum TransitionPartitionId {
|
|||
Deterministic(PartitionHashId),
|
||||
}
|
||||
|
||||
impl TransitionPartitionId {
|
||||
/// Size in bytes including `self`.
|
||||
pub fn size(&self) -> usize {
|
||||
match self {
|
||||
Self::Deprecated(_) => std::mem::size_of::<Self>(),
|
||||
Self::Deterministic(id) => {
|
||||
std::mem::size_of::<Self>() + id.size() - std::mem::size_of_val(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for TransitionPartitionId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
|
@ -216,6 +228,11 @@ impl PartitionHashId {
|
|||
pub fn as_bytes(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
/// Size in bytes including `Self`.
|
||||
pub fn size(&self) -> usize {
|
||||
std::mem::size_of::<Self>() + self.0.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'q> sqlx::encode::Encode<'q, sqlx::Postgres> for &'q PartitionHashId {
|
||||
|
|
|
@ -207,18 +207,18 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_intersect() {
|
||||
let a = [0, i64::MAX, 40, 41, 42, 43, 44, 45]
|
||||
let a = [0, u64::MAX, 40, 41, 42, 43, 44, 45]
|
||||
.into_iter()
|
||||
.map(SequenceNumber::new)
|
||||
.collect::<SequenceNumberSet>();
|
||||
|
||||
let b = [1, 5, i64::MAX, 42]
|
||||
let b = [1, 5, u64::MAX, 42]
|
||||
.into_iter()
|
||||
.map(SequenceNumber::new)
|
||||
.collect::<SequenceNumberSet>();
|
||||
|
||||
let intersection = intersect(&a, &b);
|
||||
let want = [i64::MAX, 42]
|
||||
let want = [u64::MAX, 42]
|
||||
.into_iter()
|
||||
.map(SequenceNumber::new)
|
||||
.collect::<SequenceNumberSet>();
|
||||
|
@ -226,21 +226,17 @@ mod tests {
|
|||
assert_eq!(intersection, want);
|
||||
}
|
||||
|
||||
/// Yield vec's of [`SequenceNumber`] derived from u64 values and cast to
|
||||
/// i64.
|
||||
/// Yield vec's of [`SequenceNumber`] derived from u64 values.
|
||||
///
|
||||
/// This matches how the ingester allocates [`SequenceNumber`] - from a u64
|
||||
/// source.
|
||||
fn sequence_number_vec() -> impl Strategy<Value = Vec<SequenceNumber>> {
|
||||
prop::collection::vec(0..u64::MAX, 0..1024).prop_map(|vec| {
|
||||
vec.into_iter()
|
||||
.map(|v| SequenceNumber::new(v as i64))
|
||||
.collect()
|
||||
})
|
||||
prop::collection::vec(0..u64::MAX, 0..1024)
|
||||
.prop_map(|vec| vec.into_iter().map(SequenceNumber::new).collect())
|
||||
}
|
||||
|
||||
// The following tests compare to an order-independent HashSet, as the
|
||||
// SequenceNumber uses the PartialOrd impl of the inner i64 for ordering,
|
||||
// SequenceNumber uses the PartialOrd impl of the inner u64 for ordering,
|
||||
// resulting in incorrect output when compared to an ordered set of cast as
|
||||
// u64.
|
||||
//
|
||||
|
|
|
@ -16,8 +16,11 @@
|
|||
)]
|
||||
|
||||
use metric::Registry;
|
||||
#[cfg(tokio_unstable)]
|
||||
use tokio_metrics_bridge::setup_tokio_metrics;
|
||||
// Workaround for "unused crate" lint false positives.
|
||||
#[cfg(not(tokio_unstable))]
|
||||
use tokio_metrics_bridge as _;
|
||||
use workspace_hack as _;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
|
@ -242,7 +245,10 @@ impl DedicatedExecutor {
|
|||
.build()
|
||||
.expect("Creating tokio runtime");
|
||||
|
||||
#[cfg(tokio_unstable)]
|
||||
setup_tokio_metrics(runtime.metrics(), thread_name, metric_registry);
|
||||
#[cfg(not(tokio_unstable))]
|
||||
let _ = metric_registry;
|
||||
|
||||
runtime.block_on(async move {
|
||||
// Dropping the tokio runtime only waits for tasks to yield not to complete
|
||||
|
|
|
@ -9,8 +9,8 @@ license.workspace = true
|
|||
futures-util = { version = "0.3" }
|
||||
influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
serde_json = "1.0.99"
|
||||
thiserror = "1.0.40"
|
||||
serde_json = "1.0.100"
|
||||
thiserror = "1.0.41"
|
||||
tokio = { version = "1.29" }
|
||||
tokio-util = { version = "0.7.8" }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -10,7 +10,7 @@ bytes = "1.4"
|
|||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.99"
|
||||
serde_json = "1.0.100"
|
||||
snafu = "0.7"
|
||||
url = "2.4.0"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
|
|
@ -18,4 +18,4 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
|||
test_helpers = { path = "../test_helpers" }
|
||||
assert_matches = "1"
|
||||
insta = { version = "1.30.0", features = ["yaml"] }
|
||||
paste = "1.0.12"
|
||||
paste = "1.0.13"
|
|
@ -52,7 +52,7 @@ backtrace = "0.3"
|
|||
bytes = "1.4"
|
||||
clap = { version = "4", features = ["derive", "env"] }
|
||||
comfy-table = { version = "7.0", default-features = false }
|
||||
console-subscriber = { version = "0.1.9", optional = true, features = ["parking_lot"] }
|
||||
console-subscriber = { version = "0.1.10", optional = true, features = ["parking_lot"] }
|
||||
dotenvy = "0.15.7"
|
||||
futures = "0.3"
|
||||
futures-util = { version = "0.3" }
|
||||
|
@ -67,10 +67,10 @@ libc = { version = "0.2" }
|
|||
num_cpus = "1.16.0"
|
||||
once_cell = { version = "1.18", features = ["parking_lot"] }
|
||||
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
|
||||
serde_json = "1.0.99"
|
||||
serde_json = "1.0.100"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.6.0"
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
|
@ -93,7 +93,7 @@ predicate = { path = "../predicate" }
|
|||
predicates = "3.0.3"
|
||||
pretty_assertions = "1.3.0"
|
||||
proptest = { version = "1.2.0", default-features = false }
|
||||
serde = "1.0.164"
|
||||
serde = "1.0.166"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
|
||||
insta = { version = "1", features = ["yaml"] }
|
||||
|
|
|
@ -3,7 +3,8 @@ use std::sync::Arc;
|
|||
use iox_time::{SystemProvider, Time, TimeProvider};
|
||||
use metric::U64Gauge;
|
||||
use once_cell::sync::Lazy;
|
||||
use tokio::runtime::Handle;
|
||||
|
||||
#[cfg(tokio_unstable)]
|
||||
use tokio_metrics_bridge::setup_tokio_metrics;
|
||||
|
||||
/// Package version.
|
||||
|
@ -54,7 +55,12 @@ pub fn setup_metric_registry() -> Arc<metric::Registry> {
|
|||
registry.register_instrument("jemalloc_metrics", crate::jemalloc::JemallocMetrics::new);
|
||||
|
||||
// Register tokio metric for main runtime
|
||||
setup_tokio_metrics(Handle::current().metrics(), "main", Arc::clone(®istry));
|
||||
#[cfg(tokio_unstable)]
|
||||
setup_tokio_metrics(
|
||||
tokio::runtime::Handle::current().metrics(),
|
||||
"main",
|
||||
Arc::clone(®istry),
|
||||
);
|
||||
|
||||
registry
|
||||
}
|
||||
|
|
|
@ -1323,10 +1323,15 @@ async fn assert_ingester_contains_results(
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
let ingester_uuid = ingester_response.app_metadata.ingester_uuid;
|
||||
let ingester_partition = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("at least one ingester partition");
|
||||
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid;
|
||||
assert!(!ingester_uuid.is_empty());
|
||||
|
||||
assert_batches_sorted_eq!(expected, &ingester_response.record_batches);
|
||||
assert_batches_sorted_eq!(expected, &ingester_partition.record_batches);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
use arrow::datatypes::DataType;
|
||||
use arrow_flight::{error::FlightError, Ticket};
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use datafusion::{
|
||||
prelude::{col, lit},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
use futures::FutureExt;
|
||||
use http::StatusCode;
|
||||
use influxdb_iox_client::table::generated_types::{Part, PartitionTemplate, TemplatePart};
|
||||
use ingester_query_grpc::{influxdata::iox::ingester::v1 as proto, IngesterQueryRequest};
|
||||
use prost::Message;
|
||||
use test_helpers_end_to_end::{maybe_skip_integration, MiniCluster, Step, StepTest, StepTestState};
|
||||
|
@ -39,7 +45,14 @@ async fn persist_on_demand() {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
let ingester_uuid = ingester_response.app_metadata.ingester_uuid;
|
||||
assert_eq!(ingester_response.partitions.len(), 1);
|
||||
let ingester_partition = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("just checked len");
|
||||
|
||||
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid;
|
||||
assert!(!ingester_uuid.is_empty());
|
||||
|
||||
let expected = [
|
||||
|
@ -49,7 +62,7 @@ async fn persist_on_demand() {
|
|||
"| A | B | 1970-01-01T00:00:00.000123456Z | 42 |",
|
||||
"+------+------+--------------------------------+-----+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &ingester_response.record_batches);
|
||||
assert_batches_sorted_eq!(&expected, &ingester_partition.record_batches);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
|
@ -77,8 +90,15 @@ async fn persist_on_demand() {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(ingester_response.partitions.len(), 1);
|
||||
let ingester_partition = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("just checked len");
|
||||
|
||||
let num_files_persisted =
|
||||
ingester_response.app_metadata.completed_persistence_count;
|
||||
ingester_partition.app_metadata.completed_persistence_count;
|
||||
assert_eq!(num_files_persisted, 1);
|
||||
}
|
||||
.boxed()
|
||||
|
@ -121,11 +141,17 @@ async fn ingester_flight_api() {
|
|||
.query_ingester(query.clone(), cluster.ingester().ingester_grpc_connection())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(ingester_response.partitions.len(), 1);
|
||||
let ingester_partition = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("just checked len");
|
||||
|
||||
let ingester_uuid = ingester_response.app_metadata.ingester_uuid.clone();
|
||||
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid.clone();
|
||||
assert!(!ingester_uuid.is_empty());
|
||||
|
||||
let schema = ingester_response.schema.unwrap();
|
||||
let schema = ingester_partition.schema.unwrap();
|
||||
|
||||
let expected = [
|
||||
"+------+------+--------------------------------+-----+",
|
||||
|
@ -135,11 +161,11 @@ async fn ingester_flight_api() {
|
|||
"| B | A | 1970-01-01T00:00:00.001234567Z | 84 |",
|
||||
"+------+------+--------------------------------+-----+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &ingester_response.record_batches);
|
||||
assert_batches_sorted_eq!(&expected, &ingester_partition.record_batches);
|
||||
|
||||
// Also ensure that the schema of the batches matches what is
|
||||
// reported by the performed_query.
|
||||
ingester_response
|
||||
ingester_partition
|
||||
.record_batches
|
||||
.iter()
|
||||
.enumerate()
|
||||
|
@ -152,7 +178,13 @@ async fn ingester_flight_api() {
|
|||
.query_ingester(query.clone(), cluster.ingester().ingester_grpc_connection())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(ingester_response.app_metadata.ingester_uuid, ingester_uuid);
|
||||
assert_eq!(ingester_response.partitions.len(), 1);
|
||||
let ingester_partition = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("just checked len");
|
||||
assert_eq!(ingester_partition.app_metadata.ingester_uuid, ingester_uuid);
|
||||
|
||||
// Restart the ingesters
|
||||
cluster.restart_ingesters().await;
|
||||
|
@ -167,7 +199,146 @@ async fn ingester_flight_api() {
|
|||
.query_ingester(query, cluster.ingester().ingester_grpc_connection())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_ne!(ingester_response.app_metadata.ingester_uuid, ingester_uuid);
|
||||
assert_eq!(ingester_response.partitions.len(), 1);
|
||||
let ingester_partition = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("just checked len");
|
||||
assert_ne!(ingester_partition.app_metadata.ingester_uuid, ingester_uuid);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ingester_partition_pruning() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
// Set up cluster
|
||||
let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
|
||||
|
||||
let mut steps: Vec<_> = vec![Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let namespace_name = state.cluster().namespace();
|
||||
|
||||
let mut namespace_client = influxdb_iox_client::namespace::Client::new(
|
||||
state.cluster().router().router_grpc_connection(),
|
||||
);
|
||||
namespace_client
|
||||
.create_namespace(
|
||||
namespace_name,
|
||||
None,
|
||||
None,
|
||||
Some(PartitionTemplate {
|
||||
parts: vec![
|
||||
TemplatePart {
|
||||
part: Some(Part::TagValue("tag1".into())),
|
||||
},
|
||||
TemplatePart {
|
||||
part: Some(Part::TagValue("tag3".into())),
|
||||
},
|
||||
],
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut table_client = influxdb_iox_client::table::Client::new(
|
||||
state.cluster().router().router_grpc_connection(),
|
||||
);
|
||||
|
||||
// table1: create implicitly by writing to it
|
||||
|
||||
// table2: do not override partition template => use namespace template
|
||||
table_client
|
||||
.create_table(namespace_name, "table2", None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// table3: overide namespace template
|
||||
table_client
|
||||
.create_table(
|
||||
namespace_name,
|
||||
"table3",
|
||||
Some(PartitionTemplate {
|
||||
parts: vec![TemplatePart {
|
||||
part: Some(Part::TagValue("tag2".into())),
|
||||
}],
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
.boxed()
|
||||
}))]
|
||||
.into_iter()
|
||||
.chain((1..=3).flat_map(|tid| {
|
||||
[Step::WriteLineProtocol(
|
||||
[
|
||||
format!("table{tid},tag1=v1a,tag2=v2a,tag3=v3a f=1 11"),
|
||||
format!("table{tid},tag1=v1b,tag2=v2a,tag3=v3a f=1 11"),
|
||||
format!("table{tid},tag1=v1a,tag2=v2b,tag3=v3a f=1 11"),
|
||||
format!("table{tid},tag1=v1b,tag2=v2b,tag3=v3a f=1 11"),
|
||||
format!("table{tid},tag1=v1a,tag2=v2a,tag3=v3b f=1 11"),
|
||||
format!("table{tid},tag1=v1b,tag2=v2a,tag3=v3b f=1 11"),
|
||||
format!("table{tid},tag1=v1a,tag2=v2b,tag3=v3b f=1 11"),
|
||||
format!("table{tid},tag1=v1b,tag2=v2b,tag3=v3b f=1 11"),
|
||||
]
|
||||
.join("\n"),
|
||||
)]
|
||||
.into_iter()
|
||||
}))
|
||||
.collect();
|
||||
|
||||
steps.push(Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
// Note: The querier will perform correct type coercion. We must simulate this here, otherwise the ingester
|
||||
// will NOT be able to prune the data because the predicate evaluation will fail with a type error
|
||||
// and the predicate will be ignored.
|
||||
let predicate = ::predicate::Predicate::new().with_expr(col("tag1").eq(lit(
|
||||
ScalarValue::Dictionary(
|
||||
Box::new(DataType::Int32),
|
||||
Box::new(ScalarValue::from("v1a")),
|
||||
),
|
||||
)));
|
||||
|
||||
let query = IngesterQueryRequest::new(
|
||||
state.cluster().namespace_id().await,
|
||||
state.cluster().table_id("table1").await,
|
||||
vec![],
|
||||
Some(predicate),
|
||||
);
|
||||
|
||||
let query: proto::IngesterQueryRequest = query.try_into().unwrap();
|
||||
let ingester_response = state
|
||||
.cluster()
|
||||
.query_ingester(
|
||||
query.clone(),
|
||||
state.cluster().ingester().ingester_grpc_connection(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let expected = [
|
||||
"+-----+------+------+------+--------------------------------+",
|
||||
"| f | tag1 | tag2 | tag3 | time |",
|
||||
"+-----+------+------+------+--------------------------------+",
|
||||
"| 1.0 | v1a | v2a | v3a | 1970-01-01T00:00:00.000000011Z |",
|
||||
"| 1.0 | v1a | v2a | v3b | 1970-01-01T00:00:00.000000011Z |",
|
||||
"| 1.0 | v1a | v2b | v3a | 1970-01-01T00:00:00.000000011Z |",
|
||||
"| 1.0 | v1a | v2b | v3b | 1970-01-01T00:00:00.000000011Z |",
|
||||
"+-----+------+------+------+--------------------------------+",
|
||||
];
|
||||
let record_batches = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.flat_map(|p| p.record_batches)
|
||||
.collect::<Vec<_>>();
|
||||
assert_batches_sorted_eq!(&expected, &record_batches);
|
||||
}
|
||||
.boxed()
|
||||
})));
|
||||
|
||||
StepTest::new(&mut cluster, steps).run().await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
@ -299,6 +299,48 @@ async fn query_after_persist_sees_new_files() {
|
|||
StepTest::new(&mut cluster, steps).run().await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn query_after_shutdown_sees_new_files() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
// Configure a cluster such that the ingester never persists (until
|
||||
// shutdown)
|
||||
let ingester_config = TestConfig::new_ingester_never_persist(&database_url);
|
||||
let router_config = TestConfig::new_router(&ingester_config);
|
||||
// Querier configured to quickly consider ingesters dead to speed up the
|
||||
// test.
|
||||
let querier_config =
|
||||
TestConfig::new_querier(&ingester_config).with_querier_circuit_breaker_threshold(1);
|
||||
|
||||
let mut cluster = MiniCluster::new()
|
||||
.with_ingester(ingester_config)
|
||||
.await
|
||||
.with_router(router_config)
|
||||
.await
|
||||
.with_querier(querier_config)
|
||||
.await;
|
||||
|
||||
let steps = vec![
|
||||
Step::WriteLineProtocol("bananas,tag1=A,tag2=B val=42i 123456".to_string()),
|
||||
Step::AssertNumParquetFiles { expected: 0 }, // test invariant
|
||||
Step::GracefulStopIngesters,
|
||||
Step::AssertNumParquetFiles { expected: 1 },
|
||||
Step::Query {
|
||||
sql: "select * from bananas".to_string(),
|
||||
expected: vec![
|
||||
"+------+------+--------------------------------+-----+",
|
||||
"| tag1 | tag2 | time | val |",
|
||||
"+------+------+--------------------------------+-----+",
|
||||
"| A | B | 1970-01-01T00:00:00.000123456Z | 42 |",
|
||||
"+------+------+--------------------------------+-----+",
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
StepTest::new(&mut cluster, steps).run().await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn table_not_found_on_ingester() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
|
|
@ -193,7 +193,14 @@ async fn write_replication() {
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
let ingester_uuid = ingester_response.app_metadata.ingester_uuid;
|
||||
assert_eq!(ingester_response.partitions.len(), 1);
|
||||
let ingester_partition = ingester_response
|
||||
.partitions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("just checked len");
|
||||
|
||||
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid;
|
||||
assert!(!ingester_uuid.is_empty());
|
||||
|
||||
let expected = [
|
||||
|
@ -212,7 +219,7 @@ async fn write_replication() {
|
|||
"| A | B | 1970-01-01T00:00:00.000000020Z | 20 |",
|
||||
"+------+------+--------------------------------+-----+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &ingester_response.record_batches);
|
||||
assert_batches_sorted_eq!(&expected, &ingester_partition.record_batches);
|
||||
}
|
||||
.boxed()
|
||||
})));
|
||||
|
|
|
@ -24,10 +24,10 @@ prost = "0.11"
|
|||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.99"
|
||||
serde_json = "1.0.100"
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread"] }
|
||||
tokio-stream = "0.1.13"
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tonic = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -6,7 +6,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies] # In alphabetical order
|
||||
integer-encoding = "3.0.4"
|
||||
integer-encoding = "4.0.0"
|
||||
snafu = "0.7"
|
||||
snap = "1.1.0"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
|
|
|
@ -10,7 +10,7 @@ arrow = { workspace = true, features = ["prettyprint"] }
|
|||
arrow_util = { version = "0.1.0", path = "../arrow_util" }
|
||||
arrow-flight = { workspace = true }
|
||||
async-channel = "1.8.0"
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
backoff = { version = "0.1.0", path = "../backoff" }
|
||||
bytes = "1.4.0"
|
||||
crossbeam-utils = "0.8.16"
|
||||
|
@ -31,7 +31,7 @@ observability_deps = { version = "0.1.0", path = "../observability_deps" }
|
|||
once_cell = "1.18"
|
||||
parking_lot = "0.12.1"
|
||||
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
||||
pin-project = "1.1.1"
|
||||
pin-project = "1.1.2"
|
||||
predicate = { version = "0.1.0", path = "../predicate" }
|
||||
prost = { version = "0.11.9", default-features = false, features = ["std"] }
|
||||
rand = "0.8.5"
|
||||
|
@ -39,7 +39,7 @@ schema = { version = "0.1.0", path = "../schema" }
|
|||
service_grpc_catalog = { version = "0.1.0", path = "../service_grpc_catalog" }
|
||||
sharder = { version = "0.1.0", path = "../sharder" }
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"], optional = true }
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tracker = { path = "../tracker" }
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
tokio-util = "0.7.8"
|
||||
|
@ -58,7 +58,7 @@ ingester_test_ctx = { path = "../ingester_test_ctx" }
|
|||
lazy_static = "1.4.0"
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
object_store = { workspace = true }
|
||||
paste = "1.0.12"
|
||||
paste = "1.0.13"
|
||||
tempfile = "3.6.0"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
tokio = { version = "1.29", features = ["macros", "time", "test-util"] }
|
||||
|
@ -81,3 +81,7 @@ name = "write"
|
|||
harness = false
|
||||
# Require some internal types be made visible for benchmark code.
|
||||
required-features = ["benches"]
|
||||
|
||||
[[bench]]
|
||||
name = "query"
|
||||
harness = false
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use data_types::{NamespaceId, PartitionKey, TableId};
|
||||
use ingester::IngesterRpcInterface;
|
||||
use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
|
||||
use ingester_test_ctx::{TestContext, TestContextBuilder};
|
||||
use std::fmt::Write;
|
||||
|
||||
const TEST_NAMESPACE: &str = "bananas";
|
||||
const PARTITION_KEY: &str = "platanos";
|
||||
|
||||
fn generate_table_data(rows: usize, cols: usize) -> String {
|
||||
let mut buf = String::new();
|
||||
for i in 0..rows {
|
||||
write!(&mut buf, "bananas ").unwrap();
|
||||
for j in 0..(cols - 1) {
|
||||
write!(&mut buf, "v{j}={i}{j},").unwrap();
|
||||
}
|
||||
writeln!(&mut buf, "v{cols}={i}{cols} 42{i}").unwrap();
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
/// Return an initialised and pre-warmed ingester instance backed by a catalog
|
||||
/// correctly populated to accept writes of `lp`.
|
||||
async fn init(
|
||||
lp: impl AsRef<str>,
|
||||
) -> (TestContext<impl IngesterRpcInterface>, NamespaceId, TableId) {
|
||||
let lp = lp.as_ref();
|
||||
|
||||
let mut ctx = TestContextBuilder::default()
|
||||
// Don't stop ingest during benchmarks
|
||||
.with_max_persist_queue_depth(10_000_000)
|
||||
.with_persist_hot_partition_cost(10_000_000_000)
|
||||
.build()
|
||||
.await;
|
||||
|
||||
// Ensure the namespace exists in the catalog.
|
||||
let ns = ctx.ensure_namespace(TEST_NAMESPACE, None).await;
|
||||
|
||||
// Write the test data
|
||||
ctx.write_lp(TEST_NAMESPACE, lp, PartitionKey::from(PARTITION_KEY), 42)
|
||||
.await;
|
||||
|
||||
let table_id = ctx.table_id(TEST_NAMESPACE, "bananas").await;
|
||||
|
||||
(ctx, ns.id, table_id)
|
||||
}
|
||||
|
||||
fn bench_query(c: &mut Criterion) {
|
||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("failed to initialise tokio runtime for benchmark");
|
||||
|
||||
for (rows, cols) in [(100_000, 10), (100_000, 100), (100_000, 200)] {
|
||||
run_bench("no projection", rows, cols, vec![], &runtime, c);
|
||||
run_bench(
|
||||
"project 1 column",
|
||||
rows,
|
||||
cols,
|
||||
vec!["time".to_string()],
|
||||
&runtime,
|
||||
c,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn run_bench(
|
||||
name: &str,
|
||||
rows: usize,
|
||||
cols: usize,
|
||||
projection: Vec<String>,
|
||||
runtime: &tokio::runtime::Runtime,
|
||||
c: &mut Criterion,
|
||||
) {
|
||||
let lp = generate_table_data(rows, cols);
|
||||
let (ctx, namespace_id, table_id) = runtime.block_on(init(lp));
|
||||
|
||||
let mut group = c.benchmark_group("query");
|
||||
group.throughput(Throughput::Elements(1)); // Queries per second
|
||||
group.bench_function(
|
||||
BenchmarkId::new(name, format!("rows_{rows}_cols{cols}")),
|
||||
|b| {
|
||||
let ctx = &ctx;
|
||||
let projection = &projection;
|
||||
b.to_async(runtime).iter(|| async move {
|
||||
ctx.query(IngesterQueryRequest {
|
||||
namespace_id: namespace_id.get(),
|
||||
table_id: table_id.get(),
|
||||
columns: projection.clone(),
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query request failed");
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_query);
|
||||
criterion_main!(benches);
|
|
@ -7,19 +7,23 @@ use std::sync::Arc;
|
|||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use metric::U64Counter;
|
||||
use predicate::Predicate;
|
||||
use trace::span::Span;
|
||||
|
||||
use super::{
|
||||
partition::resolver::PartitionProvider,
|
||||
post_write::PostWriteObserver,
|
||||
table::{name_resolver::TableNameProvider, TableData},
|
||||
table::{metadata_resolver::TableProvider, TableData},
|
||||
};
|
||||
use crate::{
|
||||
arcmap::ArcMap,
|
||||
deferred_load::DeferredLoad,
|
||||
dml_payload::IngestOp,
|
||||
dml_sink::DmlSink,
|
||||
query::{response::QueryResponse, tracing::QueryExecTracing, QueryError, QueryExec},
|
||||
query::{
|
||||
projection::OwnedProjection, response::QueryResponse, tracing::QueryExecTracing,
|
||||
QueryError, QueryExec,
|
||||
},
|
||||
};
|
||||
|
||||
/// The string name / identifier of a Namespace.
|
||||
|
@ -60,12 +64,13 @@ pub(crate) struct NamespaceData<O> {
|
|||
/// A set of tables this [`NamespaceData`] instance has processed
|
||||
/// [`IngestOp`]'s for.
|
||||
///
|
||||
/// The [`TableNameProvider`] acts as a [`DeferredLoad`] constructor to
|
||||
/// resolve the [`TableName`] for new [`TableData`] out of the hot path.
|
||||
/// The [`TableProvider`] acts as a [`DeferredLoad`] constructor to
|
||||
/// resolve the catalog [`Table`] for new [`TableData`] out of the hot path.
|
||||
///
|
||||
/// [`TableName`]: crate::buffer_tree::table::TableName
|
||||
///
|
||||
/// [`Table`]: data_types::Table
|
||||
tables: ArcMap<TableId, TableData<O>>,
|
||||
table_name_resolver: Arc<dyn TableNameProvider>,
|
||||
catalog_table_resolver: Arc<dyn TableProvider>,
|
||||
/// The count of tables initialised in this Ingester so far, across all
|
||||
/// namespaces.
|
||||
table_count: U64Counter,
|
||||
|
@ -83,7 +88,7 @@ impl<O> NamespaceData<O> {
|
|||
pub(super) fn new(
|
||||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_name_resolver: Arc<dyn TableNameProvider>,
|
||||
catalog_table_resolver: Arc<dyn TableProvider>,
|
||||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
post_write_observer: Arc<O>,
|
||||
metrics: &metric::Registry,
|
||||
|
@ -99,7 +104,7 @@ impl<O> NamespaceData<O> {
|
|||
namespace_id,
|
||||
namespace_name,
|
||||
tables: Default::default(),
|
||||
table_name_resolver,
|
||||
catalog_table_resolver,
|
||||
table_count,
|
||||
partition_provider,
|
||||
post_write_observer,
|
||||
|
@ -151,7 +156,7 @@ where
|
|||
self.table_count.inc(1);
|
||||
Arc::new(TableData::new(
|
||||
table_id,
|
||||
Arc::new(self.table_name_resolver.for_table(table_id)),
|
||||
Arc::new(self.catalog_table_resolver.for_table(table_id)),
|
||||
self.namespace_id,
|
||||
Arc::clone(&self.namespace_name),
|
||||
Arc::clone(&self.partition_provider),
|
||||
|
@ -187,8 +192,9 @@ where
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
assert_eq!(
|
||||
self.namespace_id, namespace_id,
|
||||
|
@ -204,7 +210,7 @@ where
|
|||
// a tracing delegate to emit a child span.
|
||||
Ok(QueryResponse::new(
|
||||
QueryExecTracing::new(inner, "table")
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.query_exec(namespace_id, table_id, projection, span, predicate)
|
||||
.await?,
|
||||
))
|
||||
}
|
||||
|
@ -226,7 +232,7 @@ mod tests {
|
|||
test_util::{
|
||||
defer_namespace_name_1_ms, make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
|
||||
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
|
||||
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_PROVIDER,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -243,7 +249,7 @@ mod tests {
|
|||
let ns = NamespaceData::new(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_ms(),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
&metrics,
|
||||
|
|
|
@ -14,8 +14,10 @@ use self::{
|
|||
buffer::{traits::Queryable, BufferState, DataBuffer, Persisting},
|
||||
persisting::{BatchIdent, PersistingData},
|
||||
};
|
||||
use super::{namespace::NamespaceName, table::TableName};
|
||||
use crate::{deferred_load::DeferredLoad, query_adaptor::QueryAdaptor};
|
||||
use super::{namespace::NamespaceName, table::TableMetadata};
|
||||
use crate::{
|
||||
deferred_load::DeferredLoad, query::projection::OwnedProjection, query_adaptor::QueryAdaptor,
|
||||
};
|
||||
|
||||
mod buffer;
|
||||
pub(crate) mod persisting;
|
||||
|
@ -73,9 +75,9 @@ pub struct PartitionData {
|
|||
|
||||
/// The catalog ID for the table this partition is part of.
|
||||
table_id: TableId,
|
||||
/// The name of the table this partition is part of, potentially unresolved
|
||||
/// The catalog metadata for the table this partition is part of, potentially unresolved
|
||||
/// / deferred.
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
|
||||
/// A [`DataBuffer`] for incoming writes.
|
||||
buffer: DataBuffer,
|
||||
|
@ -108,7 +110,7 @@ impl PartitionData {
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
sort_key: SortKeyState,
|
||||
) -> Self {
|
||||
Self {
|
||||
|
@ -119,7 +121,7 @@ impl PartitionData {
|
|||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
table,
|
||||
buffer: DataBuffer::default(),
|
||||
persisting: VecDeque::with_capacity(1),
|
||||
started_persistence_count: BatchIdent::default(),
|
||||
|
@ -139,7 +141,7 @@ impl PartitionData {
|
|||
trace!(
|
||||
namespace_id = %self.namespace_id,
|
||||
table_id = %self.table_id,
|
||||
table_name = %self.table_name,
|
||||
table = %self.table,
|
||||
partition_id = %self.partition_id,
|
||||
partition_key = %self.partition_key,
|
||||
"buffered write"
|
||||
|
@ -156,9 +158,9 @@ impl PartitionData {
|
|||
|
||||
/// Return all data for this partition, ordered by the calls to
|
||||
/// [`PartitionData::buffer_write()`].
|
||||
pub(crate) fn get_query_data(&mut self) -> Option<QueryAdaptor> {
|
||||
pub(crate) fn get_query_data(&mut self, projection: &OwnedProjection) -> Option<QueryAdaptor> {
|
||||
// Extract the buffered data, if any.
|
||||
let buffered_data = self.buffer.get_query_data();
|
||||
let buffered_data = self.buffer.get_query_data(projection);
|
||||
|
||||
// Prepend any currently persisting batches.
|
||||
//
|
||||
|
@ -168,14 +170,14 @@ impl PartitionData {
|
|||
let data = self
|
||||
.persisting
|
||||
.iter()
|
||||
.flat_map(|(_, b)| b.get_query_data())
|
||||
.flat_map(|(_, b)| b.get_query_data(projection))
|
||||
.chain(buffered_data)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
trace!(
|
||||
namespace_id = %self.namespace_id,
|
||||
table_id = %self.table_id,
|
||||
table_name = %self.table_name,
|
||||
table = %self.table,
|
||||
partition_id = %self.partition_id,
|
||||
partition_key = %self.partition_key,
|
||||
n_batches = data.len(),
|
||||
|
@ -221,7 +223,7 @@ impl PartitionData {
|
|||
debug!(
|
||||
namespace_id = %self.namespace_id,
|
||||
table_id = %self.table_id,
|
||||
table_name = %self.table_name,
|
||||
table = %self.table,
|
||||
partition_id = %self.partition_id,
|
||||
partition_key = %self.partition_key,
|
||||
%batch_ident,
|
||||
|
@ -230,7 +232,10 @@ impl PartitionData {
|
|||
|
||||
// Wrap the persisting data in the type wrapper
|
||||
let data = PersistingData::new(
|
||||
QueryAdaptor::new(self.partition_id, fsm.get_query_data()),
|
||||
QueryAdaptor::new(
|
||||
self.partition_id,
|
||||
fsm.get_query_data(&OwnedProjection::default()),
|
||||
),
|
||||
batch_ident,
|
||||
);
|
||||
|
||||
|
@ -271,7 +276,7 @@ impl PartitionData {
|
|||
persistence_count = %self.completed_persistence_count,
|
||||
namespace_id = %self.namespace_id,
|
||||
table_id = %self.table_id,
|
||||
table_name = %self.table_name,
|
||||
table = %self.table,
|
||||
partition_id = %self.partition_id,
|
||||
partition_key = %self.partition_key,
|
||||
batch_ident = %batch.batch_ident(),
|
||||
|
@ -302,10 +307,10 @@ impl PartitionData {
|
|||
self.completed_persistence_count
|
||||
}
|
||||
|
||||
/// Return the name of the table this [`PartitionData`] is buffering writes
|
||||
/// Return the metadata of the table this [`PartitionData`] is buffering writes
|
||||
/// for.
|
||||
pub(crate) fn table_name(&self) -> &Arc<DeferredLoad<TableName>> {
|
||||
&self.table_name
|
||||
pub(crate) fn table(&self) -> &Arc<DeferredLoad<TableMetadata>> {
|
||||
&self.table
|
||||
}
|
||||
|
||||
/// Return the table ID for this partition.
|
||||
|
@ -349,7 +354,7 @@ impl PartitionData {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{ops::Deref, time::Duration};
|
||||
use std::time::Duration;
|
||||
|
||||
use arrow::compute::SortOptions;
|
||||
use arrow_util::assert_batches_eq;
|
||||
|
@ -378,7 +383,7 @@ mod tests {
|
|||
let mut p = PartitionDataBuilder::new().build();
|
||||
|
||||
// And no data should be returned when queried.
|
||||
assert!(p.get_query_data().is_none());
|
||||
assert!(p.get_query_data(&OwnedProjection::default()).is_none());
|
||||
|
||||
// Perform a single write.
|
||||
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;
|
||||
|
@ -387,7 +392,9 @@ mod tests {
|
|||
|
||||
// The data should be readable.
|
||||
{
|
||||
let data = p.get_query_data().expect("should return data");
|
||||
let data = p
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.expect("should return data");
|
||||
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
|
||||
|
||||
let expected = [
|
||||
|
@ -397,15 +404,7 @@ mod tests {
|
|||
"| London | 2.0 | millions | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+--------+--------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(
|
||||
expected,
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
assert_batches_eq!(expected, data.record_batches());
|
||||
}
|
||||
|
||||
// Perform a another write, adding data to the existing queryable data
|
||||
|
@ -416,7 +415,9 @@ mod tests {
|
|||
|
||||
// And finally both writes should be readable.
|
||||
{
|
||||
let data = p.get_query_data().expect("should contain data");
|
||||
let data = p
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.expect("should contain data");
|
||||
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
|
||||
|
||||
let expected = [
|
||||
|
@ -427,15 +428,7 @@ mod tests {
|
|||
"| Madrid | 4.0 | none | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+--------+--------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(
|
||||
expected,
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
assert_batches_eq!(expected, data.record_batches());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -445,7 +438,7 @@ mod tests {
|
|||
async fn test_persist() {
|
||||
let mut p = PartitionDataBuilder::new().build();
|
||||
|
||||
assert!(p.get_query_data().is_none());
|
||||
assert!(p.get_query_data(&OwnedProjection::default()).is_none());
|
||||
|
||||
// Perform a single write.
|
||||
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;
|
||||
|
@ -468,15 +461,7 @@ mod tests {
|
|||
"| London | 2.0 | millions | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+--------+--------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(
|
||||
expected,
|
||||
&*persisting_data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
assert_batches_eq!(expected, persisting_data.record_batches());
|
||||
|
||||
// Ensure the started batch ident is increased after a persist call, but not the completed
|
||||
// batch ident.
|
||||
|
@ -492,7 +477,9 @@ mod tests {
|
|||
|
||||
// Which must be readable, alongside the ongoing persist data.
|
||||
{
|
||||
let data = p.get_query_data().expect("must have data");
|
||||
let data = p
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.expect("must have data");
|
||||
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
|
||||
assert_eq!(data.record_batches().len(), 2);
|
||||
let expected = [
|
||||
|
@ -503,15 +490,7 @@ mod tests {
|
|||
"| Madrid | 4.0 | none | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+--------+--------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(
|
||||
expected,
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
assert_batches_eq!(expected, data.record_batches());
|
||||
}
|
||||
|
||||
// The persist now "completes".
|
||||
|
@ -526,7 +505,9 @@ mod tests {
|
|||
|
||||
// Querying the buffer should now return only the second write.
|
||||
{
|
||||
let data = p.get_query_data().expect("must have data");
|
||||
let data = p
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.expect("must have data");
|
||||
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
|
||||
assert_eq!(data.record_batches().len(), 1);
|
||||
let expected = [
|
||||
|
@ -536,15 +517,7 @@ mod tests {
|
|||
"| Madrid | 4.0 | none | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+--------+--------+---------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(
|
||||
expected,
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
assert_batches_eq!(expected, data.record_batches());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -557,12 +530,7 @@ mod tests {
|
|||
// A helper function to dedupe the record batches in [`QueryAdaptor`]
|
||||
// and assert the resulting batch contents.
|
||||
async fn assert_deduped(expect: &[&str], batch: QueryAdaptor) {
|
||||
let batch = batch
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
let batch = batch.record_batches().to_vec();
|
||||
|
||||
let sort_keys = vec![PhysicalSortExpr {
|
||||
expr: col("time", &batch[0].schema()).unwrap(),
|
||||
|
@ -596,7 +564,13 @@ mod tests {
|
|||
p.buffer_write(mb, SequenceNumber::new(1))
|
||||
.expect("write should succeed");
|
||||
|
||||
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 1);
|
||||
assert_eq!(
|
||||
p.get_query_data(&OwnedProjection::default())
|
||||
.unwrap()
|
||||
.record_batches()
|
||||
.len(),
|
||||
1
|
||||
);
|
||||
assert_deduped(
|
||||
&[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -605,7 +579,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 1.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
p.get_query_data().unwrap(),
|
||||
p.get_query_data(&OwnedProjection::default()).unwrap(),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -614,7 +588,13 @@ mod tests {
|
|||
p.buffer_write(mb, SequenceNumber::new(2))
|
||||
.expect("write should succeed");
|
||||
|
||||
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 1);
|
||||
assert_eq!(
|
||||
p.get_query_data(&OwnedProjection::default())
|
||||
.unwrap()
|
||||
.record_batches()
|
||||
.len(),
|
||||
1
|
||||
);
|
||||
assert_deduped(
|
||||
&[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -623,7 +603,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 2.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
p.get_query_data().unwrap(),
|
||||
p.get_query_data(&OwnedProjection::default()).unwrap(),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -656,7 +636,13 @@ mod tests {
|
|||
p.buffer_write(mb, SequenceNumber::new(3))
|
||||
.expect("write should succeed");
|
||||
|
||||
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 2);
|
||||
assert_eq!(
|
||||
p.get_query_data(&OwnedProjection::default())
|
||||
.unwrap()
|
||||
.record_batches()
|
||||
.len(),
|
||||
2
|
||||
);
|
||||
assert_deduped(
|
||||
&[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -665,7 +651,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 3.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
p.get_query_data().unwrap(),
|
||||
p.get_query_data(&OwnedProjection::default()).unwrap(),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -697,7 +683,13 @@ mod tests {
|
|||
p.buffer_write(mb, SequenceNumber::new(3))
|
||||
.expect("write should succeed");
|
||||
|
||||
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 3);
|
||||
assert_eq!(
|
||||
p.get_query_data(&OwnedProjection::default())
|
||||
.unwrap()
|
||||
.record_batches()
|
||||
.len(),
|
||||
3
|
||||
);
|
||||
assert_deduped(
|
||||
&[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -706,7 +698,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
p.get_query_data().unwrap(),
|
||||
p.get_query_data(&OwnedProjection::default()).unwrap(),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -717,7 +709,13 @@ mod tests {
|
|||
assert!(set.contains(SequenceNumber::new(2)));
|
||||
|
||||
// And assert the correct value remains.
|
||||
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 2);
|
||||
assert_eq!(
|
||||
p.get_query_data(&OwnedProjection::default())
|
||||
.unwrap()
|
||||
.record_batches()
|
||||
.len(),
|
||||
2
|
||||
);
|
||||
assert_deduped(
|
||||
&[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -726,7 +724,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
p.get_query_data().unwrap(),
|
||||
p.get_query_data(&OwnedProjection::default()).unwrap(),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -736,7 +734,13 @@ mod tests {
|
|||
assert!(set.contains(SequenceNumber::new(3)));
|
||||
|
||||
// And assert the correct value remains.
|
||||
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 1);
|
||||
assert_eq!(
|
||||
p.get_query_data(&OwnedProjection::default())
|
||||
.unwrap()
|
||||
.record_batches()
|
||||
.len(),
|
||||
1
|
||||
);
|
||||
assert_deduped(
|
||||
&[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -745,7 +749,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
p.get_query_data().unwrap(),
|
||||
p.get_query_data(&OwnedProjection::default()).unwrap(),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -777,7 +781,7 @@ mod tests {
|
|||
p.buffer_write(mb, SequenceNumber::new(3))
|
||||
.expect("write should succeed");
|
||||
|
||||
let data = p.get_query_data().unwrap();
|
||||
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
|
||||
assert_batches_eq!(
|
||||
[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -787,12 +791,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 2.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
&*data.record_batches().to_vec()
|
||||
);
|
||||
|
||||
// Persist again, moving the last write to the persisting state and
|
||||
|
@ -805,7 +804,7 @@ mod tests {
|
|||
p.buffer_write(mb, SequenceNumber::new(4))
|
||||
.expect("write should succeed");
|
||||
|
||||
let data = p.get_query_data().unwrap();
|
||||
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
|
||||
assert_batches_eq!(
|
||||
[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -816,12 +815,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 3.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
&*data.record_batches().to_vec()
|
||||
);
|
||||
|
||||
// Persist again, moving the last write to the persisting state and
|
||||
|
@ -834,7 +828,7 @@ mod tests {
|
|||
p.buffer_write(mb, SequenceNumber::new(5))
|
||||
.expect("write should succeed");
|
||||
|
||||
let data = p.get_query_data().unwrap();
|
||||
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
|
||||
assert_batches_eq!(
|
||||
[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -846,12 +840,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
&*data.record_batches().to_vec()
|
||||
);
|
||||
|
||||
// Finish persisting the second batch out-of-order! The middle entry,
|
||||
|
@ -860,7 +849,7 @@ mod tests {
|
|||
assert_eq!(set.len(), 1);
|
||||
assert!(set.contains(SequenceNumber::new(3)));
|
||||
|
||||
let data = p.get_query_data().unwrap();
|
||||
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
|
||||
assert_batches_eq!(
|
||||
[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -871,12 +860,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
&*data.record_batches().to_vec()
|
||||
);
|
||||
|
||||
// Finish persisting the last batch.
|
||||
|
@ -884,7 +868,7 @@ mod tests {
|
|||
assert_eq!(set.len(), 1);
|
||||
assert!(set.contains(SequenceNumber::new(4)));
|
||||
|
||||
let data = p.get_query_data().unwrap();
|
||||
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
|
||||
assert_batches_eq!(
|
||||
[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -894,12 +878,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
&*data.record_batches().to_vec()
|
||||
);
|
||||
|
||||
// Finish persisting the first batch.
|
||||
|
@ -908,7 +887,7 @@ mod tests {
|
|||
assert!(set.contains(SequenceNumber::new(1)));
|
||||
|
||||
// Assert only the buffered data remains
|
||||
let data = p.get_query_data().unwrap();
|
||||
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
|
||||
assert_batches_eq!(
|
||||
[
|
||||
"+--------------------------------+-----+",
|
||||
|
@ -917,12 +896,7 @@ mod tests {
|
|||
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
|
||||
"+--------------------------------+-----+",
|
||||
],
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
&*data.record_batches().to_vec()
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1009,7 +983,7 @@ mod tests {
|
|||
);
|
||||
|
||||
// Nothing should explode, data should be readable.
|
||||
let data = p.get_query_data().unwrap();
|
||||
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
|
||||
assert_batches_eq!(
|
||||
[
|
||||
"+--------+--------+----------+--------------------------------+",
|
||||
|
@ -1019,12 +993,7 @@ mod tests {
|
|||
"| Madrid | 2.0 | none | 1970-01-01T00:00:00.000000011Z |",
|
||||
"+--------+--------+----------+--------------------------------+",
|
||||
],
|
||||
&*data
|
||||
.record_batches()
|
||||
.iter()
|
||||
.map(Deref::deref)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
&*data.record_batches().to_vec()
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1053,6 +1022,6 @@ mod tests {
|
|||
async fn test_empty_partition_no_queryadaptor_panic() {
|
||||
let mut p = PartitionDataBuilder::new().build();
|
||||
|
||||
assert!(p.get_query_data().is_none());
|
||||
assert!(p.get_query_data(&OwnedProjection::default()).is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::SequenceNumber;
|
||||
use mutable_batch::MutableBatch;
|
||||
|
@ -11,6 +9,8 @@ pub(crate) mod traits;
|
|||
|
||||
pub(crate) use state_machine::*;
|
||||
|
||||
use crate::query::projection::OwnedProjection;
|
||||
|
||||
use self::{always_some::AlwaysSome, traits::Queryable};
|
||||
|
||||
/// The current state of the [`BufferState`] state machine.
|
||||
|
@ -63,12 +63,12 @@ impl DataBuffer {
|
|||
|
||||
/// Return all data for this buffer, ordered by the [`SequenceNumber`] from
|
||||
/// which it was buffered with.
|
||||
pub(crate) fn get_query_data(&mut self) -> Vec<Arc<RecordBatch>> {
|
||||
pub(crate) fn get_query_data(&mut self, projection: &OwnedProjection) -> Vec<RecordBatch> {
|
||||
// Take ownership of the FSM and return the data within it.
|
||||
self.0.mutate(|fsm| match fsm {
|
||||
// The buffering state can return data.
|
||||
FsmState::Buffering(b) => {
|
||||
let ret = b.get_query_data();
|
||||
let ret = b.get_query_data(projection);
|
||||
(FsmState::Buffering(b), ret)
|
||||
}
|
||||
})
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::Projection;
|
||||
|
@ -39,12 +37,12 @@ impl Buffer {
|
|||
/// # Panics
|
||||
///
|
||||
/// If generating the snapshot fails, this method panics.
|
||||
pub(super) fn snapshot(self) -> Option<Arc<RecordBatch>> {
|
||||
Some(Arc::new(
|
||||
pub(super) fn snapshot(self) -> Option<RecordBatch> {
|
||||
Some(
|
||||
self.buffer?
|
||||
.to_arrow(Projection::All)
|
||||
.expect("failed to snapshot buffer data"),
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn is_empty(&self) -> bool {
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
#![allow(dead_code)]
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::{sequence_number_set::SequenceNumberSet, SequenceNumber};
|
||||
use mutable_batch::MutableBatch;
|
||||
|
@ -12,6 +10,8 @@ mod snapshot;
|
|||
pub(in crate::buffer_tree::partition::buffer) use buffering::*;
|
||||
pub(crate) use persisting::*;
|
||||
|
||||
use crate::query::projection::OwnedProjection;
|
||||
|
||||
use super::traits::{Queryable, Writeable};
|
||||
|
||||
/// A result type for fallible transitions.
|
||||
|
@ -122,14 +122,14 @@ where
|
|||
/// Returns the current buffer data.
|
||||
///
|
||||
/// This is always a cheap method call.
|
||||
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
|
||||
self.state.get_query_data()
|
||||
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
|
||||
self.state.get_query_data(projection)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_util::assert_batches_eq;
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
|
@ -139,6 +139,8 @@ mod tests {
|
|||
use super::*;
|
||||
|
||||
#[test]
|
||||
// comparing dyn Array always has same vtable, so is accurate to use Arc::ptr_eq
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
fn test_buffer_lifecycle() {
|
||||
// Initialise a buffer in the base state.
|
||||
let mut buffer: BufferState<Buffering> = BufferState::new();
|
||||
|
@ -166,7 +168,7 @@ mod tests {
|
|||
// Keep the data to validate they are ref-counted copies after further
|
||||
// writes below. Note this construct allows the caller to decide when/if
|
||||
// to allocate.
|
||||
let w1_data = buffer.get_query_data();
|
||||
let w1_data = buffer.get_query_data(&OwnedProjection::default());
|
||||
|
||||
let expected = vec![
|
||||
"+-------+----------+----------+--------------------------------+",
|
||||
|
@ -175,7 +177,7 @@ mod tests {
|
|||
"| true | 42.0 | platanos | 1991-03-10T00:00:42.000000042Z |",
|
||||
"+-------+----------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[w1_data[0].deref().clone()]);
|
||||
assert_batches_eq!(&expected, &[w1_data[0].clone()]);
|
||||
|
||||
// Apply another write.
|
||||
buffer
|
||||
|
@ -195,7 +197,7 @@ mod tests {
|
|||
};
|
||||
|
||||
// Verify the writes are still queryable.
|
||||
let w2_data = buffer.get_query_data();
|
||||
let w2_data = buffer.get_query_data(&OwnedProjection::default());
|
||||
let expected = vec![
|
||||
"+-------+----------+----------+--------------------------------+",
|
||||
"| great | how_much | tag | time |",
|
||||
|
@ -205,18 +207,18 @@ mod tests {
|
|||
"+-------+----------+----------+--------------------------------+",
|
||||
];
|
||||
assert_eq!(w2_data.len(), 1);
|
||||
assert_batches_eq!(&expected, &[w2_data[0].deref().clone()]);
|
||||
assert_batches_eq!(&expected, &[w2_data[0].clone()]);
|
||||
|
||||
// Ensure the same data is returned for a second read.
|
||||
{
|
||||
let second_read = buffer.get_query_data();
|
||||
let second_read = buffer.get_query_data(&OwnedProjection::default());
|
||||
assert_eq!(w2_data, second_read);
|
||||
|
||||
// And that no data was actually copied.
|
||||
let same_arcs = w2_data
|
||||
.iter()
|
||||
.zip(second_read.iter())
|
||||
.all(|(a, b)| Arc::ptr_eq(a, b));
|
||||
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
|
||||
assert!(same_arcs);
|
||||
}
|
||||
|
||||
|
@ -224,14 +226,120 @@ mod tests {
|
|||
let buffer: BufferState<Persisting> = buffer.into_persisting();
|
||||
|
||||
// Extract the final buffered result
|
||||
let final_data = buffer.get_query_data();
|
||||
let final_data = buffer.get_query_data(&OwnedProjection::default());
|
||||
|
||||
// And once again verify no data was changed, copied or re-ordered.
|
||||
assert_eq!(w2_data, final_data);
|
||||
let same_arcs = w2_data
|
||||
.into_iter()
|
||||
.zip(final_data.into_iter())
|
||||
.all(|(a, b)| Arc::ptr_eq(&a, &b));
|
||||
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
|
||||
assert!(same_arcs);
|
||||
|
||||
// Assert the sequence numbers were recorded.
|
||||
let set = buffer.into_sequence_number_set();
|
||||
assert!(set.contains(SequenceNumber::new(0)));
|
||||
assert!(set.contains(SequenceNumber::new(1)));
|
||||
assert_eq!(set.len(), 2);
|
||||
}
|
||||
|
||||
/// Assert projection is correct across all the queryable FSM states.
|
||||
#[test]
|
||||
// comparing dyn Array always has same vtable, so is accurate to use Arc::ptr_eq
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
fn test_buffer_projection() {
|
||||
let projection = OwnedProjection::from(vec![
|
||||
"tag".to_string(),
|
||||
"great".to_string(),
|
||||
"missing".to_string(),
|
||||
"time".to_string(),
|
||||
]);
|
||||
|
||||
// Initialise a buffer in the base state.
|
||||
let mut buffer: BufferState<Buffering> = BufferState::new();
|
||||
|
||||
// Write some data to a buffer.
|
||||
buffer
|
||||
.write(
|
||||
lp_to_mutable_batch(
|
||||
r#"bananas,tag=platanos great=true,how_much=42 668563242000000042"#,
|
||||
)
|
||||
.1,
|
||||
SequenceNumber::new(0),
|
||||
)
|
||||
.expect("write to empty buffer should succeed");
|
||||
|
||||
// Extract the queryable data from the buffer and validate it.
|
||||
//
|
||||
// Keep the data to validate they are ref-counted copies after further
|
||||
// writes below. Note this construct allows the caller to decide when/if
|
||||
// to allocate.
|
||||
let w1_data = buffer.get_query_data(&projection);
|
||||
|
||||
let expected = vec![
|
||||
"+----------+-------+--------------------------------+",
|
||||
"| tag | great | time |",
|
||||
"+----------+-------+--------------------------------+",
|
||||
"| platanos | true | 1991-03-10T00:00:42.000000042Z |",
|
||||
"+----------+-------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[w1_data[0].clone()]);
|
||||
|
||||
// Apply another write.
|
||||
buffer
|
||||
.write(
|
||||
lp_to_mutable_batch(
|
||||
r#"bananas,tag=platanos great=true,how_much=1000 668563242000000043"#,
|
||||
)
|
||||
.1,
|
||||
SequenceNumber::new(1),
|
||||
)
|
||||
.expect("write to empty buffer should succeed");
|
||||
|
||||
// Snapshot the buffer into an immutable, queryable data format.
|
||||
let buffer: BufferState<Snapshot> = match buffer.snapshot() {
|
||||
Transition::Ok(v) => v,
|
||||
Transition::Unchanged(_) => panic!("did not transition to snapshot state"),
|
||||
};
|
||||
|
||||
// Verify the writes are still queryable.
|
||||
let w2_data = buffer.get_query_data(&projection);
|
||||
let expected = vec![
|
||||
"+----------+-------+--------------------------------+",
|
||||
"| tag | great | time |",
|
||||
"+----------+-------+--------------------------------+",
|
||||
"| platanos | true | 1991-03-10T00:00:42.000000042Z |",
|
||||
"| platanos | true | 1991-03-10T00:00:42.000000043Z |",
|
||||
"+----------+-------+--------------------------------+",
|
||||
];
|
||||
assert_eq!(w2_data.len(), 1);
|
||||
assert_batches_eq!(&expected, &[w2_data[0].clone()]);
|
||||
|
||||
// Ensure the same data is returned for a second read.
|
||||
{
|
||||
let second_read = buffer.get_query_data(&projection);
|
||||
assert_eq!(w2_data, second_read);
|
||||
|
||||
// And that no data was actually copied.
|
||||
let same_arcs = w2_data
|
||||
.iter()
|
||||
.zip(second_read.iter())
|
||||
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
|
||||
assert!(same_arcs);
|
||||
}
|
||||
|
||||
// Finally transition into the terminal persisting state.
|
||||
let buffer: BufferState<Persisting> = buffer.into_persisting();
|
||||
|
||||
// Extract the final buffered result
|
||||
let final_data = buffer.get_query_data(&projection);
|
||||
|
||||
// And once again verify no data was changed, copied or re-ordered.
|
||||
assert_eq!(w2_data, final_data);
|
||||
let same_arcs = w2_data
|
||||
.into_iter()
|
||||
.zip(final_data.into_iter())
|
||||
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
|
||||
assert!(same_arcs);
|
||||
|
||||
// Assert the sequence numbers were recorded.
|
||||
|
@ -258,16 +366,16 @@ mod tests {
|
|||
Transition::Unchanged(_) => panic!("failed to transition"),
|
||||
};
|
||||
|
||||
assert_eq!(buffer.get_query_data().len(), 1);
|
||||
assert_eq!(buffer.get_query_data(&OwnedProjection::default()).len(), 1);
|
||||
|
||||
let snapshot = &buffer.get_query_data()[0];
|
||||
let snapshot = buffer.get_query_data(&OwnedProjection::default())[0].clone();
|
||||
|
||||
// Generate the combined buffer from the original inputs to compare
|
||||
// against.
|
||||
mb1.extend_from(&mb2).unwrap();
|
||||
let want = mb1.to_arrow(Projection::All).unwrap();
|
||||
|
||||
assert_eq!(&**snapshot, &want);
|
||||
assert_eq!(snapshot, want);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
//! A write buffer.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::Projection;
|
||||
|
||||
use super::{snapshot::Snapshot, BufferState, Transition};
|
||||
use crate::buffer_tree::partition::buffer::{
|
||||
mutable_buffer::Buffer,
|
||||
traits::{Queryable, Writeable},
|
||||
use crate::{
|
||||
buffer_tree::partition::buffer::{
|
||||
mutable_buffer::Buffer,
|
||||
traits::{Queryable, Writeable},
|
||||
},
|
||||
query::projection::OwnedProjection,
|
||||
};
|
||||
|
||||
/// The FSM starting ingest state - a mutable buffer collecting writes.
|
||||
|
@ -35,18 +35,11 @@ pub(crate) struct Buffering {
|
|||
/// This method panics if converting the buffered data (if any) into an Arrow
|
||||
/// [`RecordBatch`] fails (a non-transient error).
|
||||
impl Queryable for Buffering {
|
||||
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
|
||||
let data = self.buffer.buffer().map(|v| {
|
||||
Arc::new(
|
||||
v.to_arrow(Projection::All)
|
||||
.expect("failed to snapshot buffer data"),
|
||||
)
|
||||
});
|
||||
|
||||
match data {
|
||||
Some(v) => vec![v],
|
||||
None => vec![],
|
||||
}
|
||||
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
|
||||
self.buffer
|
||||
.buffer()
|
||||
.map(|v| vec![projection.project_mutable_batches(v)])
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
//! A writfield1 buffer, with one or more snapshots.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::sequence_number_set::SequenceNumberSet;
|
||||
|
||||
use super::BufferState;
|
||||
use crate::buffer_tree::partition::buffer::traits::Queryable;
|
||||
use crate::{
|
||||
buffer_tree::partition::buffer::traits::Queryable, query::projection::OwnedProjection,
|
||||
};
|
||||
|
||||
/// An immutable set of [`RecordBatch`] in the process of being persisted.
|
||||
#[derive(Debug)]
|
||||
|
@ -14,18 +14,18 @@ pub(crate) struct Persisting {
|
|||
/// Snapshots generated from previous buffer contents to be persisted.
|
||||
///
|
||||
/// INVARIANT: this array is always non-empty.
|
||||
snapshots: Vec<Arc<RecordBatch>>,
|
||||
snapshots: Vec<RecordBatch>,
|
||||
}
|
||||
|
||||
impl Persisting {
|
||||
pub(super) fn new(snapshots: Vec<Arc<RecordBatch>>) -> Self {
|
||||
pub(super) fn new(snapshots: Vec<RecordBatch>) -> Self {
|
||||
Self { snapshots }
|
||||
}
|
||||
}
|
||||
|
||||
impl Queryable for Persisting {
|
||||
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
|
||||
self.snapshots.clone()
|
||||
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
|
||||
projection.project_record_batch(&self.snapshots)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
//! A writfield1 buffer, with one or more snapshots.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
|
||||
use super::BufferState;
|
||||
use crate::buffer_tree::partition::buffer::{
|
||||
state_machine::persisting::Persisting, traits::Queryable,
|
||||
use crate::{
|
||||
buffer_tree::partition::buffer::{state_machine::persisting::Persisting, traits::Queryable},
|
||||
query::projection::OwnedProjection,
|
||||
};
|
||||
|
||||
/// An immutable, queryable FSM state containing at least one buffer snapshot.
|
||||
|
@ -15,19 +14,19 @@ pub(crate) struct Snapshot {
|
|||
/// Snapshots generated from previous buffer contents.
|
||||
///
|
||||
/// INVARIANT: this array is always non-empty.
|
||||
snapshots: Vec<Arc<RecordBatch>>,
|
||||
snapshots: Vec<RecordBatch>,
|
||||
}
|
||||
|
||||
impl Snapshot {
|
||||
pub(super) fn new(snapshots: Vec<Arc<RecordBatch>>) -> Self {
|
||||
pub(super) fn new(snapshots: Vec<RecordBatch>) -> Self {
|
||||
assert!(!snapshots.is_empty());
|
||||
Self { snapshots }
|
||||
}
|
||||
}
|
||||
|
||||
impl Queryable for Snapshot {
|
||||
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
|
||||
self.snapshots.clone()
|
||||
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
|
||||
projection.project_record_batch(&self.snapshots)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
//! Private traits for state machine states.
|
||||
|
||||
use std::{fmt::Debug, sync::Arc};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use mutable_batch::MutableBatch;
|
||||
|
||||
use crate::query::projection::OwnedProjection;
|
||||
|
||||
/// A state that can accept writes.
|
||||
pub(crate) trait Writeable: Debug {
|
||||
fn write(&mut self, batch: MutableBatch) -> Result<(), mutable_batch::Error>;
|
||||
|
@ -13,5 +15,5 @@ pub(crate) trait Writeable: Debug {
|
|||
/// A state that can return the contents of the buffer as one or more
|
||||
/// [`RecordBatch`] instances.
|
||||
pub(crate) trait Queryable: Debug {
|
||||
fn get_query_data(&self) -> Vec<Arc<RecordBatch>>;
|
||||
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch>;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ use crate::{
|
|||
buffer_tree::{
|
||||
namespace::NamespaceName,
|
||||
partition::{resolver::SortKeyResolver, PartitionData, SortKeyState},
|
||||
table::TableName,
|
||||
table::TableMetadata,
|
||||
},
|
||||
deferred_load::DeferredLoad,
|
||||
};
|
||||
|
@ -173,7 +173,7 @@ where
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
// Use the cached PartitionKey instead of the caller's partition_key,
|
||||
// instead preferring to reuse the already-shared Arc<str> in the cache.
|
||||
|
@ -203,7 +203,7 @@ where
|
|||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
table,
|
||||
SortKeyState::Deferred(Arc::new(sort_key_resolver)),
|
||||
)));
|
||||
}
|
||||
|
@ -212,13 +212,7 @@ where
|
|||
|
||||
// Otherwise delegate to the catalog / inner impl.
|
||||
self.inner
|
||||
.get_partition(
|
||||
partition_key,
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
)
|
||||
.get_partition(partition_key, namespace_id, namespace_name, table_id, table)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
@ -234,7 +228,7 @@ mod tests {
|
|||
use crate::{
|
||||
buffer_tree::partition::resolver::mock::MockPartitionProvider,
|
||||
test_util::{
|
||||
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
|
||||
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_ID,
|
||||
ARBITRARY_PARTITION_KEY, ARBITRARY_PARTITION_KEY_STR, ARBITRARY_TABLE_ID,
|
||||
ARBITRARY_TABLE_NAME,
|
||||
|
@ -270,15 +264,15 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_sec(),
|
||||
ARBITRARY_TABLE_ID,
|
||||
defer_table_name_1_sec(),
|
||||
defer_table_metadata_1_sec(),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
|
||||
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
|
||||
assert_eq!(
|
||||
&**got.lock().table_name().get().await,
|
||||
&***ARBITRARY_TABLE_NAME
|
||||
&**got.lock().table().get().await.name(),
|
||||
&**ARBITRARY_TABLE_NAME
|
||||
);
|
||||
assert_eq!(
|
||||
&**got.lock().namespace_name().get().await,
|
||||
|
@ -309,15 +303,15 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_sec(),
|
||||
ARBITRARY_TABLE_ID,
|
||||
defer_table_name_1_sec(),
|
||||
defer_table_metadata_1_sec(),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
|
||||
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
|
||||
assert_eq!(
|
||||
&**got.lock().table_name().get().await,
|
||||
&***ARBITRARY_TABLE_NAME
|
||||
&**got.lock().table().get().await.name(),
|
||||
&**ARBITRARY_TABLE_NAME
|
||||
);
|
||||
assert_eq!(
|
||||
&**got.lock().namespace_name().get().await,
|
||||
|
@ -366,15 +360,15 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_sec(),
|
||||
ARBITRARY_TABLE_ID,
|
||||
defer_table_name_1_sec(),
|
||||
defer_table_metadata_1_sec(),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.lock().partition_id(), other_key_id);
|
||||
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
|
||||
assert_eq!(
|
||||
&**got.lock().table_name().get().await,
|
||||
&***ARBITRARY_TABLE_NAME
|
||||
&**got.lock().table().get().await.name(),
|
||||
&**ARBITRARY_TABLE_NAME
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -402,15 +396,15 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_sec(),
|
||||
other_table,
|
||||
defer_table_name_1_sec(),
|
||||
defer_table_metadata_1_sec(),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
|
||||
assert_eq!(got.lock().table_id(), other_table);
|
||||
assert_eq!(
|
||||
&**got.lock().table_name().get().await,
|
||||
&***ARBITRARY_TABLE_NAME
|
||||
&**got.lock().table().get().await.name(),
|
||||
&**ARBITRARY_TABLE_NAME
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ use crate::{
|
|||
buffer_tree::{
|
||||
namespace::NamespaceName,
|
||||
partition::{PartitionData, SortKeyState},
|
||||
table::TableName,
|
||||
table::TableMetadata,
|
||||
},
|
||||
deferred_load::DeferredLoad,
|
||||
};
|
||||
|
@ -61,12 +61,12 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
debug!(
|
||||
%partition_key,
|
||||
%table_id,
|
||||
%table_name,
|
||||
%table,
|
||||
"upserting partition in catalog"
|
||||
);
|
||||
let p = Backoff::new(&self.backoff_config)
|
||||
|
@ -86,7 +86,7 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
table,
|
||||
SortKeyState::Provided(p.sort_key()),
|
||||
)))
|
||||
}
|
||||
|
@ -103,6 +103,7 @@ mod tests {
|
|||
use iox_catalog::test_helpers::{arbitrary_namespace, arbitrary_table};
|
||||
|
||||
use super::*;
|
||||
use crate::buffer_tree::table::TableName;
|
||||
|
||||
const TABLE_NAME: &str = "bananas";
|
||||
const NAMESPACE_NAME: &str = "ns-bananas";
|
||||
|
@ -138,17 +139,25 @@ mod tests {
|
|||
table_id,
|
||||
Arc::new(DeferredLoad::new(
|
||||
Duration::from_secs(1),
|
||||
async { TableName::from(TABLE_NAME) },
|
||||
async {
|
||||
TableMetadata::new_for_testing(
|
||||
TableName::from(TABLE_NAME),
|
||||
Default::default(),
|
||||
)
|
||||
},
|
||||
&metrics,
|
||||
)),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Ensure the table name is available.
|
||||
let _ = got.lock().table_name().get().await;
|
||||
let _ = got.lock().table().get().await.name();
|
||||
|
||||
assert_eq!(got.lock().namespace_id(), namespace_id);
|
||||
assert_eq!(got.lock().table_name().to_string(), table_name.to_string());
|
||||
assert_eq!(
|
||||
got.lock().table().get().await.name().to_string(),
|
||||
table_name.to_string()
|
||||
);
|
||||
assert_matches!(got.lock().sort_key(), SortKeyState::Provided(None));
|
||||
assert!(got.lock().partition_key.ptr_eq(&callers_partition_key));
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ use hashbrown::{hash_map::Entry, HashMap};
|
|||
use parking_lot::Mutex;
|
||||
|
||||
use crate::{
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableMetadata},
|
||||
deferred_load::DeferredLoad,
|
||||
};
|
||||
|
||||
|
@ -146,7 +146,7 @@ where
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let key = Key {
|
||||
namespace_id,
|
||||
|
@ -170,7 +170,7 @@ where
|
|||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
table,
|
||||
));
|
||||
|
||||
// Make the future poll-able by many callers, all of which
|
||||
|
@ -233,7 +233,7 @@ async fn do_fetch<T>(
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>>
|
||||
where
|
||||
T: PartitionProvider + 'static,
|
||||
|
@ -248,13 +248,7 @@ where
|
|||
// (which would cause the connection to be returned).
|
||||
tokio::spawn(async move {
|
||||
inner
|
||||
.get_partition(
|
||||
partition_key,
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
)
|
||||
.get_partition(partition_key, namespace_id, namespace_name, table_id, table)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
|
@ -280,7 +274,7 @@ mod tests {
|
|||
use crate::{
|
||||
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
|
||||
test_util::{
|
||||
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
|
||||
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
|
||||
},
|
||||
};
|
||||
|
@ -308,7 +302,7 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_sec(),
|
||||
ARBITRARY_TABLE_ID,
|
||||
defer_table_name_1_sec(),
|
||||
defer_table_metadata_1_sec(),
|
||||
)
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>()
|
||||
|
@ -342,7 +336,7 @@ mod tests {
|
|||
_namespace_id: NamespaceId,
|
||||
_namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
_table_id: TableId,
|
||||
_table_name: Arc<DeferredLoad<TableName>>,
|
||||
_table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> core::pin::Pin<
|
||||
Box<
|
||||
dyn core::future::Future<Output = Arc<Mutex<PartitionData>>>
|
||||
|
@ -368,7 +362,7 @@ mod tests {
|
|||
|
||||
let data = PartitionDataBuilder::new().build();
|
||||
let namespace_loader = defer_namespace_name_1_sec();
|
||||
let table_name_loader = defer_table_name_1_sec();
|
||||
let table_loader = defer_table_metadata_1_sec();
|
||||
|
||||
// Add a single instance of the partition - if more than one call is
|
||||
// made to the mock, it will panic.
|
||||
|
@ -384,14 +378,14 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
Arc::clone(&namespace_loader),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&table_name_loader),
|
||||
Arc::clone(&table_loader),
|
||||
);
|
||||
let pa_2 = layer.get_partition(
|
||||
ARBITRARY_PARTITION_KEY.clone(),
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
Arc::clone(&namespace_loader),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&table_name_loader),
|
||||
Arc::clone(&table_loader),
|
||||
);
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
|
@ -411,7 +405,7 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
namespace_loader,
|
||||
ARBITRARY_TABLE_ID,
|
||||
table_name_loader,
|
||||
table_loader,
|
||||
)
|
||||
.with_timeout_panic(Duration::from_secs(5))
|
||||
.await;
|
||||
|
@ -441,7 +435,7 @@ mod tests {
|
|||
_namespace_id: NamespaceId,
|
||||
_namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
_table_id: TableId,
|
||||
_table_name: Arc<DeferredLoad<TableName>>,
|
||||
_table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let waker = self.wait.notified();
|
||||
let permit = self.sem.acquire().await.unwrap();
|
||||
|
@ -481,7 +475,7 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_sec(),
|
||||
ARBITRARY_TABLE_ID,
|
||||
defer_table_name_1_sec(),
|
||||
defer_table_metadata_1_sec(),
|
||||
);
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
|
|
|
@ -8,7 +8,7 @@ use parking_lot::Mutex;
|
|||
|
||||
use super::r#trait::PartitionProvider;
|
||||
use crate::{
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableMetadata},
|
||||
deferred_load::{self, DeferredLoad},
|
||||
};
|
||||
|
||||
|
@ -53,7 +53,7 @@ impl PartitionProvider for MockPartitionProvider {
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let p = self
|
||||
.partitions
|
||||
|
@ -75,8 +75,8 @@ impl PartitionProvider for MockPartitionProvider {
|
|||
deferred_load::UNRESOLVED_DISPLAY_STRING,
|
||||
);
|
||||
|
||||
let actual_table_name = p.table_name().to_string();
|
||||
let expected_table_name = table_name.get().await.to_string();
|
||||
let actual_table_name = p.table().to_string();
|
||||
let expected_table_name = table.get().await.name().to_string();
|
||||
assert!(
|
||||
(actual_table_name.as_str() == expected_table_name)
|
||||
|| (actual_table_name == deferred_load::UNRESOLVED_DISPLAY_STRING),
|
||||
|
|
|
@ -5,7 +5,7 @@ use data_types::{NamespaceId, PartitionKey, TableId};
|
|||
use parking_lot::Mutex;
|
||||
|
||||
use crate::{
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableMetadata},
|
||||
deferred_load::DeferredLoad,
|
||||
};
|
||||
|
||||
|
@ -24,7 +24,7 @@ pub(crate) trait PartitionProvider: Send + Sync + Debug {
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>>;
|
||||
}
|
||||
|
||||
|
@ -39,16 +39,10 @@ where
|
|||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
(**self)
|
||||
.get_partition(
|
||||
partition_key,
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
)
|
||||
.get_partition(partition_key, namespace_id, namespace_name, table_id, table)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
@ -61,7 +55,7 @@ mod tests {
|
|||
use crate::{
|
||||
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
|
||||
test_util::{
|
||||
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
|
||||
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
|
||||
ARBITRARY_TABLE_ID,
|
||||
},
|
||||
|
@ -70,10 +64,10 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_arc_impl() {
|
||||
let namespace_loader = defer_namespace_name_1_sec();
|
||||
let table_name_loader = defer_table_name_1_sec();
|
||||
let table_loader = defer_table_metadata_1_sec();
|
||||
|
||||
let data = PartitionDataBuilder::new()
|
||||
.with_table_name_loader(Arc::clone(&table_name_loader))
|
||||
.with_table_loader(Arc::clone(&table_loader))
|
||||
.with_namespace_loader(Arc::clone(&namespace_loader))
|
||||
.build();
|
||||
|
||||
|
@ -85,7 +79,7 @@ mod tests {
|
|||
ARBITRARY_NAMESPACE_ID,
|
||||
Arc::clone(&namespace_loader),
|
||||
ARBITRARY_TABLE_ID,
|
||||
Arc::clone(&table_name_loader),
|
||||
Arc::clone(&table_loader),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
|
||||
|
@ -94,9 +88,6 @@ mod tests {
|
|||
got.lock().namespace_name().to_string(),
|
||||
namespace_loader.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
got.lock().table_name().to_string(),
|
||||
table_name_loader.to_string()
|
||||
);
|
||||
assert_eq!(got.lock().table().to_string(), table_loader.to_string());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,20 +4,24 @@ use async_trait::async_trait;
|
|||
use data_types::{NamespaceId, TableId};
|
||||
use metric::U64Counter;
|
||||
use parking_lot::Mutex;
|
||||
use predicate::Predicate;
|
||||
use trace::span::Span;
|
||||
|
||||
use super::{
|
||||
namespace::{name_resolver::NamespaceNameProvider, NamespaceData},
|
||||
partition::{resolver::PartitionProvider, PartitionData},
|
||||
post_write::PostWriteObserver,
|
||||
table::name_resolver::TableNameProvider,
|
||||
table::metadata_resolver::TableProvider,
|
||||
};
|
||||
use crate::{
|
||||
arcmap::ArcMap,
|
||||
dml_payload::IngestOp,
|
||||
dml_sink::DmlSink,
|
||||
partition_iter::PartitionIter,
|
||||
query::{response::QueryResponse, tracing::QueryExecTracing, QueryError, QueryExec},
|
||||
query::{
|
||||
projection::OwnedProjection, response::QueryResponse, tracing::QueryExecTracing,
|
||||
QueryError, QueryExec,
|
||||
},
|
||||
};
|
||||
|
||||
/// A [`BufferTree`] is the root of an in-memory tree of many [`NamespaceData`]
|
||||
|
@ -92,12 +96,12 @@ pub(crate) struct BufferTree<O> {
|
|||
/// [`NamespaceName`]: data_types::NamespaceName
|
||||
namespaces: ArcMap<NamespaceId, NamespaceData<O>>,
|
||||
namespace_name_resolver: Arc<dyn NamespaceNameProvider>,
|
||||
/// The [`TableName`] provider used by [`NamespaceData`] to initialise a
|
||||
/// The [`TableMetadata`] provider used by [`NamespaceData`] to initialise a
|
||||
/// [`TableData`].
|
||||
///
|
||||
/// [`TableName`]: crate::buffer_tree::table::TableName
|
||||
/// [`TableMetadata`]: crate::buffer_tree::table::TableMetadata
|
||||
/// [`TableData`]: crate::buffer_tree::table::TableData
|
||||
table_name_resolver: Arc<dyn TableNameProvider>,
|
||||
table_resolver: Arc<dyn TableProvider>,
|
||||
|
||||
metrics: Arc<metric::Registry>,
|
||||
namespace_count: U64Counter,
|
||||
|
@ -112,7 +116,7 @@ where
|
|||
/// Initialise a new [`BufferTree`] that emits metrics to `metrics`.
|
||||
pub(crate) fn new(
|
||||
namespace_name_resolver: Arc<dyn NamespaceNameProvider>,
|
||||
table_name_resolver: Arc<dyn TableNameProvider>,
|
||||
table_resolver: Arc<dyn TableProvider>,
|
||||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
post_write_observer: Arc<O>,
|
||||
metrics: Arc<metric::Registry>,
|
||||
|
@ -127,7 +131,7 @@ where
|
|||
Self {
|
||||
namespaces: Default::default(),
|
||||
namespace_name_resolver,
|
||||
table_name_resolver,
|
||||
table_resolver,
|
||||
metrics,
|
||||
partition_provider,
|
||||
post_write_observer,
|
||||
|
@ -178,7 +182,7 @@ where
|
|||
Arc::new(NamespaceData::new(
|
||||
namespace_id,
|
||||
Arc::new(self.namespace_name_resolver.for_namespace(namespace_id)),
|
||||
Arc::clone(&self.table_name_resolver),
|
||||
Arc::clone(&self.table_resolver),
|
||||
Arc::clone(&self.partition_provider),
|
||||
Arc::clone(&self.post_write_observer),
|
||||
&self.metrics,
|
||||
|
@ -200,8 +204,9 @@ where
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
// Extract the namespace if it exists.
|
||||
let inner = self
|
||||
|
@ -211,7 +216,7 @@ where
|
|||
// Delegate query execution to the namespace, wrapping the execution in
|
||||
// a tracing delegate to emit a child span.
|
||||
QueryExecTracing::new(inner, "namespace")
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.query_exec(namespace_id, table_id, projection, span, predicate)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
@ -227,29 +232,41 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use arrow::datatypes::DataType;
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{
|
||||
partition_template::{test_table_partition_override, TemplatePart},
|
||||
PartitionId, PartitionKey,
|
||||
};
|
||||
use datafusion::{
|
||||
assert_batches_eq, assert_batches_sorted_eq,
|
||||
prelude::{col, lit},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use lazy_static::lazy_static;
|
||||
use metric::{Attributes, Metric};
|
||||
use predicate::Predicate;
|
||||
use test_helpers::maybe_start_logging;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
buffer_tree::{
|
||||
namespace::{name_resolver::mock::MockNamespaceNameProvider, NamespaceData},
|
||||
partition::resolver::mock::MockPartitionProvider,
|
||||
post_write::mock::MockPostWriteObserver,
|
||||
table::TableName,
|
||||
table::{metadata_resolver::mock::MockTableProvider, TableMetadata},
|
||||
},
|
||||
deferred_load::{self, DeferredLoad},
|
||||
query::partition_response::PartitionResponse,
|
||||
test_util::{
|
||||
defer_namespace_name_1_ms, make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
|
||||
ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
|
||||
ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_PROVIDER,
|
||||
},
|
||||
};
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{PartitionId, PartitionKey};
|
||||
use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
|
||||
use futures::StreamExt;
|
||||
use lazy_static::lazy_static;
|
||||
use metric::{Attributes, Metric};
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
const PARTITION2_ID: PartitionId = PartitionId::new(2);
|
||||
const PARTITION3_ID: PartitionId = PartitionId::new(3);
|
||||
|
@ -278,7 +295,7 @@ mod tests {
|
|||
let ns = NamespaceData::new(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_ms(),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
&metrics,
|
||||
|
@ -337,13 +354,19 @@ mod tests {
|
|||
macro_rules! test_write_query {
|
||||
(
|
||||
$name:ident,
|
||||
partitions = [$($partition:expr), +], // The set of PartitionData for the mock partition provider
|
||||
$(table_provider = $table_provider:expr,)? // An optional table provider
|
||||
partitions = [$($partition:expr), +], // The set of PartitionData for the mock
|
||||
// partition provider
|
||||
writes = [$($write:expr), *], // The set of WriteOperation to apply()
|
||||
want = $want:expr // The expected results of querying ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
|
||||
predicate = $predicate:expr, // An optional predicate to use for the query
|
||||
want = $want:expr // The expected results of querying
|
||||
// ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
|
||||
) => {
|
||||
paste::paste! {
|
||||
#[tokio::test]
|
||||
async fn [<test_write_query_ $name>]() {
|
||||
maybe_start_logging();
|
||||
|
||||
// Configure the mock partition provider with the provided
|
||||
// partitions.
|
||||
let partition_provider = Arc::new(MockPartitionProvider::default()
|
||||
|
@ -352,10 +375,16 @@ mod tests {
|
|||
)+
|
||||
);
|
||||
|
||||
#[allow(unused_variables)]
|
||||
let table_provider = Arc::clone(&*ARBITRARY_TABLE_PROVIDER);
|
||||
$(
|
||||
let table_provider: Arc<dyn TableProvider> = $table_provider;
|
||||
)?
|
||||
|
||||
// Init the buffer tree
|
||||
let buf = BufferTree::new(
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
table_provider,
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
|
@ -370,7 +399,13 @@ mod tests {
|
|||
|
||||
// Execute the query against ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
|
||||
let batches = buf
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
$predicate
|
||||
)
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_partition_stream()
|
||||
|
@ -407,6 +442,7 @@ mod tests {
|
|||
),
|
||||
None,
|
||||
)],
|
||||
predicate = None,
|
||||
want = [
|
||||
"+----------+------+-------------------------------+",
|
||||
"| region | temp | time |",
|
||||
|
@ -456,6 +492,7 @@ mod tests {
|
|||
None,
|
||||
)
|
||||
],
|
||||
predicate = None,
|
||||
want = [
|
||||
"+----------+------+-------------------------------+",
|
||||
"| region | temp | time |",
|
||||
|
@ -508,6 +545,7 @@ mod tests {
|
|||
None,
|
||||
)
|
||||
],
|
||||
predicate = None,
|
||||
want = [
|
||||
"+--------+------+-------------------------------+",
|
||||
"| region | temp | time |",
|
||||
|
@ -520,7 +558,7 @@ mod tests {
|
|||
// A query that ensures the data across multiple tables (with the same table
|
||||
// name!) is correctly filtered to return only the queried table.
|
||||
test_write_query!(
|
||||
filter_multiple_tabls,
|
||||
filter_multiple_tables,
|
||||
partitions = [
|
||||
PartitionDataBuilder::new()
|
||||
.with_partition_id(ARBITRARY_PARTITION_ID)
|
||||
|
@ -558,6 +596,7 @@ mod tests {
|
|||
None,
|
||||
)
|
||||
],
|
||||
predicate = None,
|
||||
want = [
|
||||
"+--------+------+-------------------------------+",
|
||||
"| region | temp | time |",
|
||||
|
@ -603,6 +642,7 @@ mod tests {
|
|||
None,
|
||||
)
|
||||
],
|
||||
predicate = None,
|
||||
want = [
|
||||
"+----------+------+-------------------------------+",
|
||||
"| region | temp | time |",
|
||||
|
@ -613,6 +653,98 @@ mod tests {
|
|||
]
|
||||
);
|
||||
|
||||
// This test asserts that the results returned from a query to the
|
||||
// [`BufferTree`] filters rows from the result as directed by the
|
||||
// query's [`Predicate`].
|
||||
//
|
||||
// It makes sure that for a [`BufferTree`] with a set of partitions split
|
||||
// by some key a query with a predicate `<partition key column> == <arbitrary literal>`
|
||||
// returns partition data that has been filtered to contain only rows which
|
||||
// contain the specified value in that partition key column.
|
||||
test_write_query!(
|
||||
filter_by_predicate_partition_key,
|
||||
table_provider = Arc::new(MockTableProvider::new(TableMetadata::new_for_testing(
|
||||
ARBITRARY_TABLE_NAME.clone(),
|
||||
test_table_partition_override(vec![TemplatePart::TagValue("region")])
|
||||
))),
|
||||
partitions = [
|
||||
PartitionDataBuilder::new()
|
||||
.with_partition_id(ARBITRARY_PARTITION_ID)
|
||||
.with_partition_key(ARBITRARY_PARTITION_KEY.clone()) // "platanos"
|
||||
.build(),
|
||||
PartitionDataBuilder::new()
|
||||
.with_partition_id(PARTITION2_ID)
|
||||
.with_partition_key(PARTITION2_KEY.clone()) // "p2"
|
||||
.build()
|
||||
],
|
||||
writes = [
|
||||
make_write_op(
|
||||
&ARBITRARY_PARTITION_KEY,
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
&ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_ID,
|
||||
0,
|
||||
&format!(
|
||||
r#"{},region={} temp=35 4242424242"#,
|
||||
&*ARBITRARY_TABLE_NAME, &*ARBITRARY_PARTITION_KEY
|
||||
),
|
||||
None,
|
||||
),
|
||||
make_write_op(
|
||||
&ARBITRARY_PARTITION_KEY,
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
&ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_ID,
|
||||
1,
|
||||
&format!(
|
||||
r#"{},region={} temp=12 4242424242"#,
|
||||
&*ARBITRARY_TABLE_NAME, &*ARBITRARY_PARTITION_KEY
|
||||
),
|
||||
None,
|
||||
),
|
||||
make_write_op(
|
||||
&PARTITION2_KEY,
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
&ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_ID,
|
||||
2,
|
||||
&format!(
|
||||
r#"{},region={} temp=17 7676767676"#,
|
||||
&*ARBITRARY_TABLE_NAME, *PARTITION2_KEY
|
||||
),
|
||||
None,
|
||||
),
|
||||
make_write_op(
|
||||
&PARTITION2_KEY,
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
&ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_ID,
|
||||
3,
|
||||
&format!(
|
||||
r#"{},region={} temp=13 7676767676"#,
|
||||
&*ARBITRARY_TABLE_NAME, *PARTITION2_KEY,
|
||||
),
|
||||
None,
|
||||
)
|
||||
],
|
||||
// NOTE: The querier will coerce the type of the predicates correctly, so the ingester does NOT need to perform
|
||||
// type coercion. This type should reflect that.
|
||||
predicate = Some(Predicate::new().with_expr(col("region").eq(lit(
|
||||
ScalarValue::Dictionary(
|
||||
Box::new(DataType::Int32),
|
||||
Box::new(ScalarValue::from(PARTITION2_KEY.inner()))
|
||||
)
|
||||
)))),
|
||||
want = [
|
||||
"+--------+------+-------------------------------+",
|
||||
"| region | temp | time |",
|
||||
"+--------+------+-------------------------------+",
|
||||
"| p2 | 13.0 | 1970-01-01T00:00:07.676767676 |",
|
||||
"| p2 | 17.0 | 1970-01-01T00:00:07.676767676 |",
|
||||
"+--------+------+-------------------------------+",
|
||||
]
|
||||
);
|
||||
|
||||
/// Assert that multiple writes to a single namespace/table results in a
|
||||
/// single namespace being created, and matching metrics.
|
||||
#[tokio::test]
|
||||
|
@ -627,7 +759,7 @@ mod tests {
|
|||
)
|
||||
.with_partition(
|
||||
PartitionDataBuilder::new()
|
||||
.with_partition_id(ARBITRARY_PARTITION_ID)
|
||||
.with_partition_id(PARTITION2_ID)
|
||||
.with_partition_key(PARTITION2_KEY.clone())
|
||||
.build(),
|
||||
),
|
||||
|
@ -638,7 +770,7 @@ mod tests {
|
|||
// Init the buffer tree
|
||||
let buf = BufferTree::new(
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::clone(&metrics),
|
||||
|
@ -722,9 +854,14 @@ mod tests {
|
|||
.with_partition_id(PARTITION3_ID)
|
||||
.with_partition_key(PARTITION3_KEY.clone())
|
||||
.with_table_id(TABLE2_ID)
|
||||
.with_table_name_loader(Arc::new(DeferredLoad::new(
|
||||
.with_table_loader(Arc::new(DeferredLoad::new(
|
||||
Duration::from_secs(1),
|
||||
async move { TableName::from(TABLE2_NAME) },
|
||||
async move {
|
||||
TableMetadata::new_for_testing(
|
||||
TABLE2_NAME.into(),
|
||||
Default::default(),
|
||||
)
|
||||
},
|
||||
&metric::Registry::default(),
|
||||
)))
|
||||
.build(),
|
||||
|
@ -734,7 +871,7 @@ mod tests {
|
|||
// Init the buffer tree
|
||||
let buf = BufferTree::new(
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::clone(&Arc::new(metric::Registry::default())),
|
||||
|
@ -821,7 +958,7 @@ mod tests {
|
|||
// Init the BufferTree
|
||||
let buf = BufferTree::new(
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
|
@ -829,7 +966,13 @@ mod tests {
|
|||
|
||||
// Query the empty tree
|
||||
let err = buf
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect_err("query should fail");
|
||||
assert_matches!(err, QueryError::NamespaceNotFound(ns) => {
|
||||
|
@ -854,7 +997,13 @@ mod tests {
|
|||
|
||||
// Ensure an unknown table errors
|
||||
let err = buf
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, TABLE2_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
TABLE2_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect_err("query should fail");
|
||||
assert_matches!(err, QueryError::TableNotFound(ns, t) => {
|
||||
|
@ -863,9 +1012,15 @@ mod tests {
|
|||
});
|
||||
|
||||
// Ensure a valid namespace / table does not error
|
||||
buf.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.await
|
||||
.expect("namespace / table should exist");
|
||||
buf.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("namespace / table should exist");
|
||||
}
|
||||
|
||||
/// This test asserts the read consistency properties defined in the
|
||||
|
@ -906,7 +1061,7 @@ mod tests {
|
|||
// Init the buffer tree
|
||||
let buf = BufferTree::new(
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
partition_provider,
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
|
@ -931,7 +1086,13 @@ mod tests {
|
|||
// Execute a query of the buffer tree, generating the result stream, but
|
||||
// DO NOT consume it.
|
||||
let stream = buf
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_partition_stream();
|
||||
|
|
|
@ -1,14 +1,23 @@
|
|||
//! Table level data buffer structures.
|
||||
|
||||
pub(crate) mod name_resolver;
|
||||
pub(crate) mod metadata_resolver;
|
||||
|
||||
use std::{fmt::Debug, sync::Arc};
|
||||
use std::{collections::HashMap, fmt::Debug, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, SequenceNumber, TableId};
|
||||
use data_types::{
|
||||
partition_template::{build_column_values, ColumnValue, TablePartitionTemplateOverride},
|
||||
NamespaceId, PartitionKey, SequenceNumber, Table, TableId,
|
||||
};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use iox_query::{
|
||||
chunk_statistics::{create_chunk_statistics, ColumnRange},
|
||||
pruning::prune_summaries,
|
||||
QueryChunk,
|
||||
};
|
||||
use mutable_batch::MutableBatch;
|
||||
use parking_lot::Mutex;
|
||||
use schema::Projection;
|
||||
use predicate::Predicate;
|
||||
use trace::span::{Span, SpanRecorder};
|
||||
|
||||
use super::{
|
||||
|
@ -20,10 +29,55 @@ use crate::{
|
|||
arcmap::ArcMap,
|
||||
deferred_load::DeferredLoad,
|
||||
query::{
|
||||
partition_response::PartitionResponse, response::PartitionStream, QueryError, QueryExec,
|
||||
partition_response::PartitionResponse, projection::OwnedProjection,
|
||||
response::PartitionStream, QueryError, QueryExec,
|
||||
},
|
||||
query_adaptor::QueryAdaptor,
|
||||
};
|
||||
|
||||
/// Metadata from the catalog for a table
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct TableMetadata {
|
||||
name: TableName,
|
||||
partition_template: TablePartitionTemplateOverride,
|
||||
}
|
||||
|
||||
impl TableMetadata {
|
||||
#[cfg(test)]
|
||||
pub fn new_for_testing(
|
||||
name: TableName,
|
||||
partition_template: TablePartitionTemplateOverride,
|
||||
) -> Self {
|
||||
Self {
|
||||
name,
|
||||
partition_template,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn name(&self) -> &TableName {
|
||||
&self.name
|
||||
}
|
||||
|
||||
pub(crate) fn partition_template(&self) -> &TablePartitionTemplateOverride {
|
||||
&self.partition_template
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Table> for TableMetadata {
|
||||
fn from(t: Table) -> Self {
|
||||
Self {
|
||||
name: t.name.into(),
|
||||
partition_template: t.partition_template,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for TableMetadata {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
std::fmt::Display::fmt(&self.name, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// The string name / identifier of a Table.
|
||||
///
|
||||
/// A reference-counted, cheap clone-able string.
|
||||
|
@ -69,7 +123,7 @@ impl PartialEq<str> for TableName {
|
|||
#[derive(Debug)]
|
||||
pub(crate) struct TableData<O> {
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
catalog_table: Arc<DeferredLoad<TableMetadata>>,
|
||||
|
||||
/// The catalog ID of the namespace this table is being populated from.
|
||||
namespace_id: NamespaceId,
|
||||
|
@ -93,7 +147,7 @@ impl<O> TableData<O> {
|
|||
/// for the first time.
|
||||
pub(super) fn new(
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
catalog_table: Arc<DeferredLoad<TableMetadata>>,
|
||||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
partition_provider: Arc<dyn PartitionProvider>,
|
||||
|
@ -101,7 +155,7 @@ impl<O> TableData<O> {
|
|||
) -> Self {
|
||||
Self {
|
||||
table_id,
|
||||
table_name,
|
||||
catalog_table,
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
partition_data: Default::default(),
|
||||
|
@ -132,9 +186,9 @@ impl<O> TableData<O> {
|
|||
self.table_id
|
||||
}
|
||||
|
||||
/// Returns the name of this table.
|
||||
pub(crate) fn table_name(&self) -> &Arc<DeferredLoad<TableName>> {
|
||||
&self.table_name
|
||||
/// Returns the catalog data for this table.
|
||||
pub(crate) fn catalog_table(&self) -> &Arc<DeferredLoad<TableMetadata>> {
|
||||
&self.catalog_table
|
||||
}
|
||||
|
||||
/// Return the [`NamespaceId`] this table is a part of.
|
||||
|
@ -166,7 +220,7 @@ where
|
|||
self.namespace_id,
|
||||
Arc::clone(&self.namespace_name),
|
||||
self.table_id,
|
||||
Arc::clone(&self.table_name),
|
||||
Arc::clone(&self.catalog_table),
|
||||
)
|
||||
.await;
|
||||
// Add the partition to the map.
|
||||
|
@ -202,8 +256,9 @@ where
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
assert_eq!(self.table_id, table_id, "buffer tree index inconsistency");
|
||||
assert_eq!(
|
||||
|
@ -211,18 +266,21 @@ where
|
|||
"buffer tree index inconsistency"
|
||||
);
|
||||
|
||||
let table_partition_template = self.catalog_table.get().await.partition_template;
|
||||
|
||||
// Gather the partition data from all of the partitions in this table.
|
||||
let span = SpanRecorder::new(span);
|
||||
let partitions = self.partitions().into_iter().map(move |p| {
|
||||
let mut span = span.child("partition read");
|
||||
|
||||
let (id, hash_id, completed_persistence_count, data) = {
|
||||
let (id, hash_id, completed_persistence_count, data, partition_key) = {
|
||||
let mut p = p.lock();
|
||||
(
|
||||
p.partition_id(),
|
||||
p.partition_hash_id().cloned(),
|
||||
p.completed_persistence_count(),
|
||||
p.get_query_data(),
|
||||
p.get_query_data(&projection),
|
||||
p.partition_key().clone(),
|
||||
)
|
||||
};
|
||||
|
||||
|
@ -230,16 +288,36 @@ where
|
|||
Some(data) => {
|
||||
assert_eq!(id, data.partition_id());
|
||||
|
||||
// Project the data if necessary
|
||||
let columns = columns.iter().map(String::as_str).collect::<Vec<_>>();
|
||||
let selection = if columns.is_empty() {
|
||||
Projection::All
|
||||
} else {
|
||||
Projection::Some(columns.as_ref())
|
||||
};
|
||||
// Potentially prune out this partition if the partition
|
||||
// template & derived partition key can be used to match
|
||||
// against the optional predicate.
|
||||
if predicate
|
||||
.as_ref()
|
||||
.map(|p| {
|
||||
!keep_after_pruning_partition_key(
|
||||
&table_partition_template,
|
||||
&partition_key,
|
||||
p,
|
||||
&data,
|
||||
)
|
||||
})
|
||||
.unwrap_or_default()
|
||||
{
|
||||
return PartitionResponse::new(
|
||||
vec![],
|
||||
id,
|
||||
hash_id,
|
||||
completed_persistence_count,
|
||||
);
|
||||
}
|
||||
|
||||
let data = data.project_selection(selection).into_iter().collect();
|
||||
PartitionResponse::new(data, id, hash_id, completed_persistence_count)
|
||||
// Project the data if necessary
|
||||
PartitionResponse::new(
|
||||
data.into_record_batches(),
|
||||
id,
|
||||
hash_id,
|
||||
completed_persistence_count,
|
||||
)
|
||||
}
|
||||
None => PartitionResponse::new(vec![], id, hash_id, completed_persistence_count),
|
||||
};
|
||||
|
@ -252,6 +330,106 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Return true if `data` contains one or more rows matching `predicate`,
|
||||
/// pruning based on the `partition_key` and `template`.
|
||||
///
|
||||
/// Returns false iff it can be proven that all of data does not match the
|
||||
/// predicate.
|
||||
fn keep_after_pruning_partition_key(
|
||||
table_partition_template: &TablePartitionTemplateOverride,
|
||||
partition_key: &PartitionKey,
|
||||
predicate: &Predicate,
|
||||
data: &QueryAdaptor,
|
||||
) -> bool {
|
||||
// Construct a set of per-column min/max statistics based on the partition
|
||||
// key values.
|
||||
let column_ranges = Arc::new(
|
||||
build_column_values(table_partition_template, partition_key.inner())
|
||||
.filter_map(|(col, val)| {
|
||||
let range = match val {
|
||||
ColumnValue::Identity(s) => {
|
||||
let s = Arc::new(ScalarValue::from(s.as_ref()));
|
||||
ColumnRange {
|
||||
min_value: Arc::clone(&s),
|
||||
max_value: s,
|
||||
}
|
||||
}
|
||||
ColumnValue::Prefix(p) if p.is_empty() => return None,
|
||||
ColumnValue::Prefix(p) => {
|
||||
// If the partition only has a prefix of the tag value
|
||||
// (it was truncated) then form a conservative range:
|
||||
//
|
||||
// # Minimum
|
||||
// Use the prefix itself.
|
||||
//
|
||||
// Note that the minimum is inclusive.
|
||||
//
|
||||
// All values in the partition are either:
|
||||
//
|
||||
// - identical to the prefix, in which case they are
|
||||
// included by the inclusive minimum
|
||||
//
|
||||
// - have the form `"<prefix><s>"`, and it holds that
|
||||
// `"<prefix><s>" > "<prefix>"` for all strings
|
||||
// `"<s>"`.
|
||||
//
|
||||
// # Maximum
|
||||
// Use `"<prefix_excluding_last_char><char::max>"`.
|
||||
//
|
||||
// Note that the maximum is inclusive.
|
||||
//
|
||||
// All strings in this partition must be smaller than
|
||||
// this constructed maximum, because string comparison
|
||||
// is front-to-back and the
|
||||
// `"<prefix_excluding_last_char><char::max>" >
|
||||
// "<prefix>"`.
|
||||
|
||||
let min_value = Arc::new(ScalarValue::from(p.as_ref()));
|
||||
|
||||
let mut chars = p.as_ref().chars().collect::<Vec<_>>();
|
||||
*chars.last_mut().expect("checked that prefix is not empty") =
|
||||
std::char::MAX;
|
||||
let max_value = Arc::new(ScalarValue::from(
|
||||
chars.into_iter().collect::<String>().as_str(),
|
||||
));
|
||||
|
||||
ColumnRange {
|
||||
min_value,
|
||||
max_value,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Some((Arc::from(col), range))
|
||||
})
|
||||
.collect::<HashMap<_, _>>(),
|
||||
);
|
||||
|
||||
let chunk_statistics = Arc::new(create_chunk_statistics(
|
||||
data.num_rows(),
|
||||
data.schema(),
|
||||
data.ts_min_max(),
|
||||
&column_ranges,
|
||||
));
|
||||
|
||||
prune_summaries(
|
||||
data.schema(),
|
||||
&[(chunk_statistics, data.schema().as_arrow())],
|
||||
predicate,
|
||||
)
|
||||
// Errors are logged by `iox_query` and sometimes fine, e.g. for not
|
||||
// implemented DataFusion features or upstream bugs. The querier uses the
|
||||
// same strategy. Pruning is a mere optimization and should not lead to
|
||||
// crashes or unreadable data.
|
||||
.ok()
|
||||
.map(|vals| {
|
||||
vals.into_iter()
|
||||
.next()
|
||||
.expect("one chunk in, one chunk out")
|
||||
})
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
@ -265,7 +443,7 @@ mod tests {
|
|||
post_write::mock::MockPostWriteObserver,
|
||||
},
|
||||
test_util::{
|
||||
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
|
||||
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
|
||||
ARBITRARY_TABLE_NAME,
|
||||
},
|
||||
|
@ -280,7 +458,7 @@ mod tests {
|
|||
|
||||
let table = TableData::new(
|
||||
ARBITRARY_TABLE_ID,
|
||||
defer_table_name_1_sec(),
|
||||
defer_table_metadata_1_sec(),
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
defer_namespace_name_1_sec(),
|
||||
partition_provider,
|
||||
|
|
|
@ -4,24 +4,24 @@ use backoff::{Backoff, BackoffConfig};
|
|||
use data_types::TableId;
|
||||
use iox_catalog::interface::Catalog;
|
||||
|
||||
use super::TableName;
|
||||
use super::TableMetadata;
|
||||
use crate::deferred_load::DeferredLoad;
|
||||
|
||||
/// An abstract provider of a [`DeferredLoad`] configured to fetch the
|
||||
/// [`TableName`] of the specified [`TableId`].
|
||||
pub(crate) trait TableNameProvider: Send + Sync + std::fmt::Debug {
|
||||
fn for_table(&self, id: TableId) -> DeferredLoad<TableName>;
|
||||
/// catalog [`TableMetadata`] of the specified [`TableId`].
|
||||
pub(crate) trait TableProvider: Send + Sync + std::fmt::Debug {
|
||||
fn for_table(&self, id: TableId) -> DeferredLoad<TableMetadata>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct TableNameResolver {
|
||||
pub(crate) struct TableResolver {
|
||||
max_smear: Duration,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
backoff_config: BackoffConfig,
|
||||
metrics: Arc<metric::Registry>,
|
||||
}
|
||||
|
||||
impl TableNameResolver {
|
||||
impl TableResolver {
|
||||
pub(crate) fn new(
|
||||
max_smear: Duration,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
|
@ -36,16 +36,16 @@ impl TableNameResolver {
|
|||
}
|
||||
}
|
||||
|
||||
/// Fetch the [`TableName`] from the [`Catalog`] for specified
|
||||
/// Fetch the [`TableMetadata`] from the [`Catalog`] for specified
|
||||
/// `table_id`, retrying endlessly when errors occur.
|
||||
pub(crate) async fn fetch(
|
||||
table_id: TableId,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
backoff_config: BackoffConfig,
|
||||
) -> TableName {
|
||||
) -> TableMetadata {
|
||||
Backoff::new(&backoff_config)
|
||||
.retry_all_errors("fetch table name", || async {
|
||||
let s = catalog
|
||||
.retry_all_errors("fetch table", || async {
|
||||
let table = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.tables()
|
||||
|
@ -54,18 +54,17 @@ impl TableNameResolver {
|
|||
.unwrap_or_else(|| {
|
||||
panic!("resolving table name for non-existent table id {table_id}")
|
||||
})
|
||||
.name
|
||||
.into();
|
||||
|
||||
Result::<_, iox_catalog::interface::Error>::Ok(s)
|
||||
Result::<_, iox_catalog::interface::Error>::Ok(table)
|
||||
})
|
||||
.await
|
||||
.expect("retry forever")
|
||||
}
|
||||
}
|
||||
|
||||
impl TableNameProvider for TableNameResolver {
|
||||
fn for_table(&self, id: TableId) -> DeferredLoad<TableName> {
|
||||
impl TableProvider for TableResolver {
|
||||
fn for_table(&self, id: TableId) -> DeferredLoad<TableMetadata> {
|
||||
DeferredLoad::new(
|
||||
self.max_smear,
|
||||
Self::fetch(id, Arc::clone(&self.catalog), self.backoff_config.clone()),
|
||||
|
@ -79,28 +78,33 @@ pub(crate) mod mock {
|
|||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct MockTableNameProvider {
|
||||
name: TableName,
|
||||
pub(crate) struct MockTableProvider {
|
||||
table: TableMetadata,
|
||||
}
|
||||
|
||||
impl MockTableNameProvider {
|
||||
pub(crate) fn new(name: impl Into<TableName>) -> Self {
|
||||
Self { name: name.into() }
|
||||
impl MockTableProvider {
|
||||
pub(crate) fn new(table: impl Into<TableMetadata>) -> Self {
|
||||
Self {
|
||||
table: table.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MockTableNameProvider {
|
||||
impl Default for MockTableProvider {
|
||||
fn default() -> Self {
|
||||
Self::new("bananas")
|
||||
Self::new(TableMetadata::new_for_testing(
|
||||
"bananas".into(),
|
||||
Default::default(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl TableNameProvider for MockTableNameProvider {
|
||||
fn for_table(&self, _id: TableId) -> DeferredLoad<TableName> {
|
||||
let name = self.name.clone();
|
||||
impl TableProvider for MockTableProvider {
|
||||
fn for_table(&self, _id: TableId) -> DeferredLoad<TableMetadata> {
|
||||
let table = self.table.clone();
|
||||
DeferredLoad::new(
|
||||
Duration::from_secs(1),
|
||||
async { name },
|
||||
async { table },
|
||||
&metric::Registry::default(),
|
||||
)
|
||||
}
|
||||
|
@ -129,7 +133,7 @@ mod tests {
|
|||
// Populate the catalog with the namespace / table
|
||||
let (_ns_id, table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await;
|
||||
|
||||
let fetcher = Arc::new(TableNameResolver::new(
|
||||
let fetcher = Arc::new(TableResolver::new(
|
||||
Duration::from_secs(10),
|
||||
Arc::clone(&catalog),
|
||||
backoff_config.clone(),
|
||||
|
@ -141,6 +145,6 @@ mod tests {
|
|||
.get()
|
||||
.with_timeout_panic(Duration::from_secs(5))
|
||||
.await;
|
||||
assert_eq!(&**got, TABLE_NAME);
|
||||
assert_eq!(got.name(), TABLE_NAME);
|
||||
}
|
||||
}
|
|
@ -30,7 +30,7 @@ use crate::{
|
|||
partition::resolver::{
|
||||
CatalogPartitionResolver, CoalescePartitionResolver, PartitionCache, PartitionProvider,
|
||||
},
|
||||
table::name_resolver::{TableNameProvider, TableNameResolver},
|
||||
table::metadata_resolver::{TableProvider, TableResolver},
|
||||
BufferTree,
|
||||
},
|
||||
dml_sink::{instrumentation::DmlSinkInstrumentation, tracing::DmlSinkTracing},
|
||||
|
@ -253,8 +253,8 @@ where
|
|||
Arc::clone(&metrics),
|
||||
));
|
||||
|
||||
// Initialise the deferred table name resolver.
|
||||
let table_name_provider: Arc<dyn TableNameProvider> = Arc::new(TableNameResolver::new(
|
||||
// Initialise the deferred table metadata resolver.
|
||||
let table_provider: Arc<dyn TableProvider> = Arc::new(TableResolver::new(
|
||||
persist_background_fetch_time,
|
||||
Arc::clone(&catalog),
|
||||
BackoffConfig::default(),
|
||||
|
@ -326,7 +326,7 @@ where
|
|||
|
||||
let buffer = Arc::new(BufferTree::new(
|
||||
namespace_name_provider,
|
||||
table_name_provider,
|
||||
table_provider,
|
||||
partition_provider,
|
||||
Arc::new(hot_partition_persister),
|
||||
Arc::clone(&metrics),
|
||||
|
@ -389,9 +389,7 @@ where
|
|||
// ingester, but they are only used for internal ordering of operations at
|
||||
// runtime.
|
||||
let timestamp = Arc::new(TimestampOracle::new(
|
||||
max_sequence_number
|
||||
.map(|v| u64::try_from(v.get()).expect("sequence number overflow"))
|
||||
.unwrap_or(0),
|
||||
max_sequence_number.map(|v| v.get()).unwrap_or(0),
|
||||
));
|
||||
|
||||
let (shutdown_tx, shutdown_rx) = oneshot::channel();
|
||||
|
|
|
@ -9,6 +9,7 @@ use crate::{
|
|||
ingest_state::{IngestState, IngestStateError},
|
||||
partition_iter::PartitionIter,
|
||||
persist::{drain_buffer::persist_partitions, queue::PersistQueue},
|
||||
query::projection::OwnedProjection,
|
||||
};
|
||||
|
||||
/// Defines how often the shutdown task polls the partition buffers for
|
||||
|
@ -77,10 +78,11 @@ pub(super) async fn graceful_shutdown_handler<F, T, P>(
|
|||
// springs to life and buffers in the buffer tree after this check has
|
||||
// completed - I think this is extreme enough to accept as a theoretical
|
||||
// possibility that doesn't need covering off in practice.
|
||||
while buffer
|
||||
.partition_iter()
|
||||
.any(|p| p.lock().get_query_data().is_some())
|
||||
{
|
||||
while buffer.partition_iter().any(|p| {
|
||||
p.lock()
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.is_some()
|
||||
}) {
|
||||
if persist_partitions(buffer.partition_iter(), &persist).await != 0 {
|
||||
// Late arriving writes needed persisting.
|
||||
debug!("re-persisting late arriving data");
|
||||
|
|
|
@ -199,9 +199,7 @@ where
|
|||
op,
|
||||
} = op;
|
||||
|
||||
let sequence_number = SequenceNumber::new(
|
||||
i64::try_from(sequence_number).expect("sequence number overflow"),
|
||||
);
|
||||
let sequence_number = SequenceNumber::new(sequence_number);
|
||||
|
||||
max_sequence = max_sequence.max(Some(sequence_number));
|
||||
|
||||
|
|
|
@ -67,10 +67,7 @@ pub(super) async fn compact_persisting_batch(
|
|||
adjust_sort_key_columns(&sk, &batch.schema().primary_key())
|
||||
}
|
||||
None => {
|
||||
let sort_key = compute_sort_key(
|
||||
batch.schema(),
|
||||
batch.record_batches().iter().map(|sb| sb.as_ref()),
|
||||
);
|
||||
let sort_key = compute_sort_key(batch.schema(), batch.record_batches().iter());
|
||||
// Use the sort key computed from the cardinality as the sort key for this parquet
|
||||
// file's metadata, also return the sort key to be stored in the catalog
|
||||
(sort_key.clone(), Some(sort_key))
|
||||
|
@ -127,7 +124,7 @@ mod tests {
|
|||
.to_arrow(Projection::All)
|
||||
.unwrap();
|
||||
|
||||
let batch = QueryAdaptor::new(ARBITRARY_PARTITION_ID, vec![Arc::new(batch)]);
|
||||
let batch = QueryAdaptor::new(ARBITRARY_PARTITION_ID, vec![batch]);
|
||||
|
||||
// verify PK
|
||||
let schema = batch.schema();
|
||||
|
@ -459,8 +456,7 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key =
|
||||
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
|
||||
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
|
||||
assert_eq!(sort_key, SortKey::from_columns(["tag1", "time"]));
|
||||
|
||||
// compact
|
||||
|
@ -500,8 +496,7 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key =
|
||||
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
|
||||
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
|
||||
assert_eq!(sort_key, SortKey::from_columns(["tag1", "time"]));
|
||||
|
||||
// compact
|
||||
|
@ -549,8 +544,7 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key =
|
||||
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
|
||||
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
|
||||
assert_eq!(sort_key, SortKey::from_columns(["tag1", "time"]));
|
||||
|
||||
// compact
|
||||
|
@ -596,8 +590,7 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "tag2", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key =
|
||||
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
|
||||
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
|
||||
assert_eq!(sort_key, SortKey::from_columns(["tag1", "tag2", "time"]));
|
||||
|
||||
// compact
|
||||
|
@ -647,8 +640,7 @@ mod tests {
|
|||
let expected_pk = vec!["tag1", "tag2", "time"];
|
||||
assert_eq!(expected_pk, pk);
|
||||
|
||||
let sort_key =
|
||||
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
|
||||
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
|
||||
assert_eq!(sort_key, SortKey::from_columns(["tag1", "tag2", "time"]));
|
||||
|
||||
// compact
|
||||
|
@ -699,7 +691,7 @@ mod tests {
|
|||
batch.schema();
|
||||
}
|
||||
|
||||
async fn create_one_row_record_batch_with_influxtype() -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_one_row_record_batch_with_influxtype() -> Vec<RecordBatch> {
|
||||
let chunk1 = Arc::new(
|
||||
TestChunk::new("t")
|
||||
.with_id(1)
|
||||
|
@ -723,11 +715,10 @@ mod tests {
|
|||
];
|
||||
assert_batches_eq!(&expected, &batches);
|
||||
|
||||
let batches: Vec<_> = batches.iter().map(|r| Arc::new(r.clone())).collect();
|
||||
batches
|
||||
}
|
||||
|
||||
async fn create_one_record_batch_with_influxtype_no_duplicates() -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_one_record_batch_with_influxtype_no_duplicates() -> Vec<RecordBatch> {
|
||||
let chunk1 = Arc::new(
|
||||
TestChunk::new("t")
|
||||
.with_id(1)
|
||||
|
@ -753,11 +744,10 @@ mod tests {
|
|||
];
|
||||
assert_batches_eq!(&expected, &batches);
|
||||
|
||||
let batches: Vec<_> = batches.iter().map(|r| Arc::new(r.clone())).collect();
|
||||
batches
|
||||
}
|
||||
|
||||
async fn create_one_record_batch_with_influxtype_duplicates() -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_one_record_batch_with_influxtype_duplicates() -> Vec<RecordBatch> {
|
||||
let chunk1 = Arc::new(
|
||||
TestChunk::new("t")
|
||||
.with_id(1)
|
||||
|
@ -790,12 +780,11 @@ mod tests {
|
|||
];
|
||||
assert_batches_eq!(&expected, &batches);
|
||||
|
||||
let batches: Vec<_> = batches.iter().map(|r| Arc::new(r.clone())).collect();
|
||||
batches
|
||||
}
|
||||
|
||||
/// RecordBatches with knowledge of influx metadata
|
||||
async fn create_batches_with_influxtype() -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_batches_with_influxtype() -> Vec<RecordBatch> {
|
||||
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
|
||||
let mut batches = vec![];
|
||||
|
||||
|
@ -826,7 +815,7 @@ mod tests {
|
|||
"+-----------+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch1.clone()]);
|
||||
batches.push(Arc::new(batch1));
|
||||
batches.push(batch1);
|
||||
|
||||
// chunk2 having duplicate data with chunk 1
|
||||
let chunk2 = Arc::new(
|
||||
|
@ -850,7 +839,7 @@ mod tests {
|
|||
"+-----------+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch2.clone()]);
|
||||
batches.push(Arc::new(batch2));
|
||||
batches.push(batch2);
|
||||
|
||||
// verify data from both batches
|
||||
let expected = vec![
|
||||
|
@ -874,14 +863,13 @@ mod tests {
|
|||
"| 5 | MT | 1970-01-01T00:00:00.000005Z |",
|
||||
"+-----------+------+--------------------------------+",
|
||||
];
|
||||
let b: Vec<_> = batches.iter().map(|b| (**b).clone()).collect();
|
||||
assert_batches_eq!(&expected, &b);
|
||||
assert_batches_eq!(&expected, &batches);
|
||||
|
||||
batches
|
||||
}
|
||||
|
||||
/// RecordBatches with knowledge of influx metadata
|
||||
async fn create_batches_with_influxtype_different_columns() -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_batches_with_influxtype_different_columns() -> Vec<RecordBatch> {
|
||||
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
|
||||
let mut batches = vec![];
|
||||
|
||||
|
@ -912,7 +900,7 @@ mod tests {
|
|||
"+-----------+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch1.clone()]);
|
||||
batches.push(Arc::new(batch1));
|
||||
batches.push(batch1);
|
||||
|
||||
// chunk2 having duplicate data with chunk 1
|
||||
// mmore columns
|
||||
|
@ -939,14 +927,14 @@ mod tests {
|
|||
"+-----------+------------+------+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch2.clone()]);
|
||||
batches.push(Arc::new(batch2));
|
||||
batches.push(batch2);
|
||||
|
||||
batches
|
||||
}
|
||||
|
||||
/// RecordBatches with knowledge of influx metadata
|
||||
async fn create_batches_with_influxtype_different_columns_different_order(
|
||||
) -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_batches_with_influxtype_different_columns_different_order() -> Vec<RecordBatch>
|
||||
{
|
||||
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
|
||||
let mut batches = vec![];
|
||||
|
||||
|
@ -978,7 +966,7 @@ mod tests {
|
|||
"+-----------+------+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch1.clone()]);
|
||||
batches.push(Arc::new(batch1.clone()));
|
||||
batches.push(batch1.clone());
|
||||
|
||||
// chunk2 having duplicate data with chunk 1
|
||||
// mmore columns
|
||||
|
@ -1003,13 +991,13 @@ mod tests {
|
|||
"+-----------+------+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch2.clone()]);
|
||||
batches.push(Arc::new(batch2));
|
||||
batches.push(batch2);
|
||||
|
||||
batches
|
||||
}
|
||||
|
||||
/// Has 2 tag columns; tag1 has a lower cardinality (3) than tag3 (4)
|
||||
async fn create_batches_with_influxtype_different_cardinality() -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_batches_with_influxtype_different_cardinality() -> Vec<RecordBatch> {
|
||||
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
|
||||
let mut batches = vec![];
|
||||
|
||||
|
@ -1034,7 +1022,7 @@ mod tests {
|
|||
"+-----------+------+------+-----------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch1.clone()]);
|
||||
batches.push(Arc::new(batch1.clone()));
|
||||
batches.push(batch1.clone());
|
||||
|
||||
let chunk2 = Arc::new(
|
||||
TestChunk::new("t")
|
||||
|
@ -1057,13 +1045,13 @@ mod tests {
|
|||
"+-----------+------+------+-----------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch2.clone()]);
|
||||
batches.push(Arc::new(batch2));
|
||||
batches.push(batch2);
|
||||
|
||||
batches
|
||||
}
|
||||
|
||||
/// RecordBatches with knowledge of influx metadata
|
||||
async fn create_batches_with_influxtype_same_columns_different_type() -> Vec<Arc<RecordBatch>> {
|
||||
async fn create_batches_with_influxtype_same_columns_different_type() -> Vec<RecordBatch> {
|
||||
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
|
||||
let mut batches = vec![];
|
||||
|
||||
|
@ -1087,7 +1075,7 @@ mod tests {
|
|||
"+-----------+------+-----------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch1.clone()]);
|
||||
batches.push(Arc::new(batch1));
|
||||
batches.push(batch1);
|
||||
|
||||
// chunk2 having duplicate data with chunk 1
|
||||
// mmore columns
|
||||
|
@ -1110,7 +1098,7 @@ mod tests {
|
|||
"+-----------+------+-----------------------------+",
|
||||
];
|
||||
assert_batches_eq!(&expected, &[batch2.clone()]);
|
||||
batches.push(Arc::new(batch2));
|
||||
batches.push(batch2);
|
||||
|
||||
batches
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ use crate::{
|
|||
buffer_tree::{
|
||||
namespace::NamespaceName,
|
||||
partition::{persisting::PersistingData, PartitionData, SortKeyState},
|
||||
table::TableName,
|
||||
table::TableMetadata,
|
||||
},
|
||||
deferred_load::DeferredLoad,
|
||||
persist::completion_observer::CompletedPersist,
|
||||
|
@ -94,14 +94,14 @@ pub(super) struct Context {
|
|||
// The partition key for this partition
|
||||
partition_key: PartitionKey,
|
||||
|
||||
/// Deferred strings needed for persistence.
|
||||
/// Deferred data needed for persistence.
|
||||
///
|
||||
/// These [`DeferredLoad`] are given a pre-fetch hint when this [`Context`]
|
||||
/// is constructed to load them in the background (if not already resolved)
|
||||
/// in order to avoid incurring the query latency when the values are
|
||||
/// needed.
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
|
||||
/// The [`SortKey`] for the [`PartitionData`] at the time of [`Context`]
|
||||
/// construction.
|
||||
|
@ -164,7 +164,7 @@ impl Context {
|
|||
partition_hash_id: guard.partition_hash_id().cloned(),
|
||||
partition_key: guard.partition_key().clone(),
|
||||
namespace_name: Arc::clone(guard.namespace_name()),
|
||||
table_name: Arc::clone(guard.table_name()),
|
||||
table: Arc::clone(guard.table()),
|
||||
|
||||
// Technically the sort key isn't immutable, but MUST NOT be
|
||||
// changed by an external actor (by something other than code in
|
||||
|
@ -182,7 +182,7 @@ impl Context {
|
|||
// Pre-fetch the deferred values in a background thread (if not already
|
||||
// resolved)
|
||||
s.namespace_name.prefetch_now();
|
||||
s.table_name.prefetch_now();
|
||||
s.table.prefetch_now();
|
||||
if let SortKeyState::Deferred(ref d) = s.sort_key {
|
||||
d.prefetch_now();
|
||||
}
|
||||
|
@ -253,7 +253,7 @@ impl Context {
|
|||
namespace_id = %self.namespace_id,
|
||||
namespace_name = %self.namespace_name,
|
||||
table_id = %self.table_id,
|
||||
table_name = %self.table_name,
|
||||
table = %self.table,
|
||||
partition_id = %self.partition_id,
|
||||
partition_key = %self.partition_key,
|
||||
total_persist_duration = ?now.duration_since(self.enqueued_at),
|
||||
|
@ -315,7 +315,7 @@ impl Context {
|
|||
self.namespace_name.as_ref()
|
||||
}
|
||||
|
||||
pub(super) fn table_name(&self) -> &DeferredLoad<TableName> {
|
||||
self.table_name.as_ref()
|
||||
pub(super) fn table(&self) -> &DeferredLoad<TableMetadata> {
|
||||
self.table.as_ref()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -501,7 +501,7 @@ mod tests {
|
|||
test_util::{
|
||||
make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
|
||||
ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
|
||||
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
|
||||
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_PROVIDER,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -510,7 +510,7 @@ mod tests {
|
|||
async fn new_partition(sort_key: SortKeyState) -> Arc<Mutex<PartitionData>> {
|
||||
let buffer_tree = BufferTree::new(
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
Arc::new(
|
||||
MockPartitionProvider::default().with_partition(
|
||||
PartitionDataBuilder::new()
|
||||
|
|
|
@ -110,6 +110,7 @@ mod tests {
|
|||
|
||||
use crate::{
|
||||
persist::queue::mock::MockPersistQueue,
|
||||
query::projection::OwnedProjection,
|
||||
test_util::{PartitionDataBuilder, ARBITRARY_TABLE_NAME},
|
||||
};
|
||||
|
||||
|
@ -162,7 +163,9 @@ mod tests {
|
|||
guard
|
||||
.buffer_write(mb, SequenceNumber::new(2))
|
||||
.expect("write should succeed");
|
||||
guard.get_query_data().expect("should have query adaptor")
|
||||
guard
|
||||
.get_query_data(&OwnedProjection::default())
|
||||
.expect("should have query adaptor")
|
||||
};
|
||||
|
||||
hot_partition_persister.observe(Arc::clone(&p), p.lock());
|
||||
|
@ -170,7 +173,7 @@ mod tests {
|
|||
tokio::task::yield_now().await;
|
||||
// Assert the partition was queued for persistence with the correct data.
|
||||
assert_matches!(persist_handle.calls().as_slice(), [got] => {
|
||||
let got_query_data = got.lock().get_query_data().expect("should have query adaptor");
|
||||
let got_query_data = got.lock().get_query_data(&OwnedProjection::default(),).expect("should have query adaptor");
|
||||
assert_eq!(got_query_data.record_batches(), want_query_data.record_batches());
|
||||
});
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ mod tests {
|
|||
test_util::{
|
||||
make_write_op, populate_catalog, ARBITRARY_NAMESPACE_NAME,
|
||||
ARBITRARY_NAMESPACE_NAME_PROVIDER, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_NAME,
|
||||
ARBITRARY_TABLE_NAME_PROVIDER,
|
||||
ARBITRARY_TABLE_PROVIDER,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -67,7 +67,7 @@ mod tests {
|
|||
// Init the buffer tree
|
||||
let buf = BufferTree::new(
|
||||
Arc::clone(&*ARBITRARY_NAMESPACE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
|
||||
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
|
||||
Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))),
|
||||
Arc::new(MockPostWriteObserver::default()),
|
||||
Arc::new(metric::Registry::default()),
|
||||
|
|
|
@ -202,7 +202,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
?sort_key,
|
||||
|
@ -218,7 +218,7 @@ where
|
|||
compact_persisting_batch(
|
||||
&worker_state.exec,
|
||||
sort_key,
|
||||
ctx.table_name().get().await,
|
||||
ctx.table().get().await.name().clone(),
|
||||
ctx.data().query_adaptor(),
|
||||
)
|
||||
.await
|
||||
|
@ -249,7 +249,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
%object_store_id,
|
||||
|
@ -265,7 +265,7 @@ where
|
|||
namespace_id: ctx.namespace_id(),
|
||||
namespace_name: Arc::clone(&*ctx.namespace_name().get().await),
|
||||
table_id: ctx.table_id(),
|
||||
table_name: Arc::clone(&*ctx.table_name().get().await),
|
||||
table_name: Arc::clone(ctx.table().get().await.name()),
|
||||
partition_key: ctx.partition_key().clone(),
|
||||
compaction_level: CompactionLevel::Initial,
|
||||
sort_key: Some(data_sort_key),
|
||||
|
@ -291,7 +291,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
%object_store_id,
|
||||
|
@ -358,7 +358,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
?new_sort_key,
|
||||
|
@ -394,7 +394,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
expected=?old_sort_key,
|
||||
|
@ -420,7 +420,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
expected=?old_sort_key,
|
||||
|
@ -460,7 +460,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
?old_sort_key,
|
||||
|
@ -488,7 +488,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
%object_store_id,
|
||||
|
@ -512,7 +512,7 @@ where
|
|||
namespace_id = %ctx.namespace_id(),
|
||||
namespace_name = %ctx.namespace_name(),
|
||||
table_id = %ctx.table_id(),
|
||||
table_name = %ctx.table_name(),
|
||||
table = %ctx.table(),
|
||||
partition_id = %ctx.partition_id(),
|
||||
partition_key = %ctx.partition_key(),
|
||||
%object_store_id,
|
||||
|
|
|
@ -4,9 +4,10 @@ use async_trait::async_trait;
|
|||
use data_types::{NamespaceId, TableId};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use metric::{DurationHistogram, Metric};
|
||||
use predicate::Predicate;
|
||||
use trace::span::Span;
|
||||
|
||||
use super::QueryExec;
|
||||
use super::{projection::OwnedProjection, QueryExec};
|
||||
use crate::query::QueryError;
|
||||
|
||||
/// An instrumentation decorator over a [`QueryExec`] implementation.
|
||||
|
@ -62,14 +63,15 @@ where
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
let t = self.time_provider.now();
|
||||
|
||||
let res = self
|
||||
.inner
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.query_exec(namespace_id, table_id, projection, span, predicate)
|
||||
.await;
|
||||
|
||||
if let Some(delta) = self.time_provider.now().checked_duration_since(t) {
|
||||
|
@ -113,7 +115,7 @@ mod tests {
|
|||
|
||||
// Call the decorator and assert the return value
|
||||
let got = decorator
|
||||
.query_exec(NamespaceId::new(42), TableId::new(24), vec![], None)
|
||||
.query_exec(NamespaceId::new(42), TableId::new(24),OwnedProjection::default(), None, None)
|
||||
.await;
|
||||
assert_matches!(got, $($want_ret)+);
|
||||
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use parking_lot::Mutex;
|
||||
use predicate::Predicate;
|
||||
use trace::span::Span;
|
||||
|
||||
use super::{response::QueryResponse, QueryError, QueryExec};
|
||||
use super::{projection::OwnedProjection, response::QueryResponse, QueryError, QueryExec};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct MockQueryExec {
|
||||
|
@ -25,8 +26,9 @@ impl QueryExec for MockQueryExec {
|
|||
&self,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_id: TableId,
|
||||
_columns: Vec<String>,
|
||||
_projection: OwnedProjection,
|
||||
_span: Option<Span>,
|
||||
_predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
self.response
|
||||
.lock()
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
mod r#trait;
|
||||
pub(crate) use r#trait::*;
|
||||
|
||||
pub(crate) mod projection;
|
||||
|
||||
// Response types
|
||||
pub(crate) mod partition_response;
|
||||
pub(crate) mod response;
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
use arrow::record_batch::RecordBatch;
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::SchemaBuilder;
|
||||
|
||||
/// The private inner type to prevent callers from constructing an empty Subset.
|
||||
#[derive(Debug, Default)]
|
||||
enum Projection {
|
||||
/// Return all columns.
|
||||
#[default]
|
||||
All,
|
||||
|
||||
/// Return the specified subset of columns.
|
||||
///
|
||||
/// The returned columns MAY NOT match the specified column order.
|
||||
//
|
||||
// Invariant: subset is never empty - this variant is only constructed when
|
||||
// there is at least one column to project.
|
||||
Project(Vec<String>),
|
||||
}
|
||||
|
||||
/// Specify the set of columns to project during a query.
|
||||
///
|
||||
/// Defaults to "all columns".
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct OwnedProjection(Projection);
|
||||
|
||||
impl From<Vec<String>> for OwnedProjection {
|
||||
fn from(value: Vec<String>) -> Self {
|
||||
if value.is_empty() {
|
||||
return Self(Projection::All);
|
||||
}
|
||||
|
||||
Self(Projection::Project(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl OwnedProjection {
|
||||
/// Copy the data within a [`MutableBatch`] into a [`RecordBatch`], applying
|
||||
/// the the specified projection.
|
||||
///
|
||||
/// This avoids copying column data for columns that are not part of the
|
||||
/// projection.
|
||||
///
|
||||
/// NOTE: this copies the underlying column data
|
||||
pub(crate) fn project_mutable_batches(&self, batch: &MutableBatch) -> RecordBatch {
|
||||
// Pre-allocate the outputs to their maximal possible size to avoid
|
||||
// reallocations.
|
||||
let max_capacity = match &self.0 {
|
||||
Projection::All => batch.columns().len(),
|
||||
Projection::Project(s) => s.len(),
|
||||
};
|
||||
|
||||
let mut schema_builder = SchemaBuilder::with_capacity(max_capacity);
|
||||
let mut column_data = Vec::with_capacity(max_capacity);
|
||||
|
||||
// Compute the schema overlap between the requested projection, and the
|
||||
// buffered data.
|
||||
//
|
||||
// Generate the RecordBatch contents in a single pass.
|
||||
match &self.0 {
|
||||
Projection::All => {
|
||||
// If there's no projection, the columns must be emitted ordered
|
||||
// by their name.
|
||||
let mut columns = batch.columns().collect::<Vec<_>>();
|
||||
columns.sort_unstable_by_key(|v| v.0);
|
||||
|
||||
for (name, column) in columns.into_iter() {
|
||||
schema_builder.influx_column(name, column.influx_type());
|
||||
column_data.push(column.to_arrow().expect("failed to snapshot buffer data"));
|
||||
}
|
||||
}
|
||||
|
||||
Projection::Project(cols) => {
|
||||
// Invariant: subset is never empty
|
||||
assert!(!cols.is_empty());
|
||||
|
||||
// Construct the schema & data arrays in a single pass, ordered
|
||||
// by the projection and ignoring any missing columns.
|
||||
for name in cols {
|
||||
if let Ok(column) = batch.column(name) {
|
||||
schema_builder.influx_column(name, column.influx_type());
|
||||
column_data
|
||||
.push(column.to_arrow().expect("failed to snapshot buffer data"));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let schema = schema_builder
|
||||
.build()
|
||||
.expect("failed to create batch schema");
|
||||
|
||||
RecordBatch::try_new(schema.into(), column_data)
|
||||
.expect("failed to generate snapshot record batch")
|
||||
}
|
||||
|
||||
/// Apply the specified projection to `batches`.
|
||||
///
|
||||
/// This projection requires relatively cheap ref-counting clones and does
|
||||
/// not copy the underlying data.
|
||||
pub(crate) fn project_record_batch(&self, batches: &[RecordBatch]) -> Vec<RecordBatch> {
|
||||
match &self.0 {
|
||||
Projection::All => batches.to_vec(),
|
||||
Projection::Project(columns) => {
|
||||
// Invariant: subset is never empty
|
||||
assert!(!columns.is_empty());
|
||||
|
||||
batches
|
||||
.iter()
|
||||
.map(|batch| {
|
||||
let schema = batch.schema();
|
||||
|
||||
// Map the column names to column indexes, ignoring
|
||||
// columns specified in the columns that do not exist
|
||||
// in this batch.
|
||||
let projection = columns
|
||||
.iter()
|
||||
.flat_map(|column_name| schema.index_of(column_name).ok())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
batch
|
||||
.project(&projection)
|
||||
.expect("batch projection failure")
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -58,6 +58,7 @@ use iox_time::{SystemProvider, Time, TimeProvider};
|
|||
use metric::{DurationHistogram, Metric, U64Histogram, U64HistogramOptions};
|
||||
use observability_deps::tracing::debug;
|
||||
use pin_project::{pin_project, pinned_drop};
|
||||
use predicate::Predicate;
|
||||
use trace::span::Span;
|
||||
|
||||
use crate::query::{
|
||||
|
@ -66,6 +67,8 @@ use crate::query::{
|
|||
QueryError, QueryExec,
|
||||
};
|
||||
|
||||
use super::projection::OwnedProjection;
|
||||
|
||||
/// A [`QueryExec`] decorator adding instrumentation to the [`QueryResponse`]
|
||||
/// returned by the inner implementation.
|
||||
///
|
||||
|
@ -202,14 +205,17 @@ where
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
let started_at = self.time_provider.now();
|
||||
|
||||
// TODO(savage): Would accepting a predicate here require additional
|
||||
// metrics to be added?
|
||||
let stream = self
|
||||
.inner
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.query_exec(namespace_id, table_id, projection, span, predicate)
|
||||
.await?;
|
||||
|
||||
let stream = QueryMetricContext::new(
|
||||
|
@ -467,7 +473,13 @@ mod tests {
|
|||
.with_time_provider(Arc::clone(&mock_time));
|
||||
|
||||
let response = layer
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("query should succeed");
|
||||
|
||||
|
@ -548,7 +560,13 @@ mod tests {
|
|||
.with_time_provider(Arc::clone(&mock_time));
|
||||
|
||||
let response = layer
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("query should succeed");
|
||||
|
||||
|
@ -628,7 +646,13 @@ mod tests {
|
|||
.with_time_provider(Arc::clone(&mock_time));
|
||||
|
||||
let response = layer
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("query should succeed");
|
||||
|
||||
|
@ -708,7 +732,13 @@ mod tests {
|
|||
.with_time_provider(Arc::clone(&mock_time));
|
||||
|
||||
let response = layer
|
||||
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
|
||||
.query_exec(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_TABLE_ID,
|
||||
OwnedProjection::default(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("query should succeed");
|
||||
|
||||
|
|
|
@ -2,9 +2,10 @@ use std::borrow::Cow;
|
|||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use predicate::Predicate;
|
||||
use trace::span::{Span, SpanRecorder};
|
||||
|
||||
use super::QueryExec;
|
||||
use super::{projection::OwnedProjection, QueryExec};
|
||||
use crate::query::QueryError;
|
||||
|
||||
/// An tracing decorator over a [`QueryExec`] implementation.
|
||||
|
@ -40,14 +41,21 @@ where
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
let mut recorder = SpanRecorder::new(span).child(self.name.clone());
|
||||
|
||||
match self
|
||||
.inner
|
||||
.query_exec(namespace_id, table_id, columns, recorder.span().cloned())
|
||||
.query_exec(
|
||||
namespace_id,
|
||||
table_id,
|
||||
projection,
|
||||
recorder.span().cloned(),
|
||||
predicate,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(v) => {
|
||||
|
@ -109,8 +117,9 @@ mod tests {
|
|||
.query_exec(
|
||||
NamespaceId::new(42),
|
||||
TableId::new(24),
|
||||
vec![],
|
||||
OwnedProjection::default(),
|
||||
Some(span.child("root span")),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("wrapper should not modify result");
|
||||
|
@ -132,8 +141,9 @@ mod tests {
|
|||
.query_exec(
|
||||
NamespaceId::new(42),
|
||||
TableId::new(24),
|
||||
vec![],
|
||||
OwnedProjection::default(),
|
||||
Some(span.child("root span")),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect_err("wrapper should not modify result");
|
||||
|
|
|
@ -2,9 +2,12 @@ use std::{fmt::Debug, ops::Deref, sync::Arc};
|
|||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use predicate::Predicate;
|
||||
use thiserror::Error;
|
||||
use trace::span::Span;
|
||||
|
||||
use super::projection::OwnedProjection;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub(crate) enum QueryError {
|
||||
|
@ -23,8 +26,9 @@ pub(crate) trait QueryExec: Send + Sync + Debug {
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError>;
|
||||
}
|
||||
|
||||
|
@ -39,11 +43,12 @@ where
|
|||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
projection: OwnedProjection,
|
||||
span: Option<Span>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
self.deref()
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.query_exec(namespace_id, table_id, projection, span, predicate)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,15 +5,13 @@ use std::{any::Any, sync::Arc};
|
|||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_util::util::ensure_schema;
|
||||
use data_types::{ChunkId, ChunkOrder, PartitionId};
|
||||
use datafusion::{error::DataFusionError, physical_plan::Statistics};
|
||||
use data_types::{ChunkId, ChunkOrder, PartitionId, TimestampMinMax};
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
util::{compute_timenanosecond_min_max, create_basic_summary},
|
||||
QueryChunk, QueryChunkData,
|
||||
};
|
||||
use once_cell::sync::OnceCell;
|
||||
use predicate::Predicate;
|
||||
use schema::{merge::merge_record_batch_schemas, sort::SortKey, Projection, Schema};
|
||||
|
||||
/// A queryable wrapper over a set of ordered [`RecordBatch`] snapshot from a
|
||||
|
@ -30,7 +28,7 @@ pub struct QueryAdaptor {
|
|||
///
|
||||
/// This MUST be non-pub(crate) / closed for modification / immutable to support
|
||||
/// interning the merged schema in [`Self::schema()`].
|
||||
data: Vec<Arc<RecordBatch>>,
|
||||
data: Vec<RecordBatch>,
|
||||
|
||||
/// The catalog ID of the partition the this data is part of.
|
||||
partition_id: PartitionId,
|
||||
|
@ -52,12 +50,12 @@ impl QueryAdaptor {
|
|||
///
|
||||
/// This constructor panics if `data` contains no [`RecordBatch`], or all
|
||||
/// [`RecordBatch`] are empty.
|
||||
pub(crate) fn new(partition_id: PartitionId, data: Vec<Arc<RecordBatch>>) -> Self {
|
||||
pub(crate) fn new(partition_id: PartitionId, data: Vec<RecordBatch>) -> Self {
|
||||
// There must always be at least one record batch and one row.
|
||||
//
|
||||
// This upholds an invariant that simplifies dealing with empty
|
||||
// partitions - if there is a QueryAdaptor, it contains data.
|
||||
assert!(data.iter().map(|b| b.num_rows()).sum::<usize>() > 0);
|
||||
assert!(data.iter().any(|b| b.num_rows() > 0));
|
||||
|
||||
let schema = merge_record_batch_schemas(&data);
|
||||
Self {
|
||||
|
@ -75,8 +73,7 @@ impl QueryAdaptor {
|
|||
// Project the column selection across all RecordBatch
|
||||
self.data
|
||||
.iter()
|
||||
.map(|data| {
|
||||
let batch = data.as_ref();
|
||||
.map(|batch| {
|
||||
let schema = batch.schema();
|
||||
|
||||
// Apply selection to in-memory batch
|
||||
|
@ -98,25 +95,40 @@ impl QueryAdaptor {
|
|||
}
|
||||
|
||||
/// Returns the [`RecordBatch`] instances in this [`QueryAdaptor`].
|
||||
pub(crate) fn record_batches(&self) -> &[Arc<RecordBatch>] {
|
||||
pub(crate) fn record_batches(&self) -> &[RecordBatch] {
|
||||
self.data.as_ref()
|
||||
}
|
||||
|
||||
/// Unwrap this [`QueryAdaptor`], yielding the inner [`RecordBatch`]
|
||||
/// instances.
|
||||
pub(crate) fn into_record_batches(self) -> Vec<RecordBatch> {
|
||||
self.data
|
||||
}
|
||||
|
||||
/// Returns the partition ID from which the data this [`QueryAdaptor`] was
|
||||
/// sourced from.
|
||||
pub(crate) fn partition_id(&self) -> PartitionId {
|
||||
self.partition_id
|
||||
}
|
||||
|
||||
/// Number of rows, useful for building stats
|
||||
pub(crate) fn num_rows(&self) -> u64 {
|
||||
self.data.iter().map(|b| b.num_rows()).sum::<usize>() as u64
|
||||
}
|
||||
|
||||
/// Time range, useful for building stats
|
||||
pub(crate) fn ts_min_max(&self) -> TimestampMinMax {
|
||||
compute_timenanosecond_min_max(self.data.iter()).expect("Should have time range")
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryChunk for QueryAdaptor {
|
||||
fn stats(&self) -> Arc<Statistics> {
|
||||
Arc::clone(self.stats.get_or_init(|| {
|
||||
let ts_min_max = compute_timenanosecond_min_max(self.data.iter().map(|b| b.as_ref()))
|
||||
.expect("Should have time range");
|
||||
let ts_min_max = self.ts_min_max();
|
||||
|
||||
Arc::new(create_basic_summary(
|
||||
self.data.iter().map(|b| b.num_rows()).sum::<usize>() as u64,
|
||||
self.num_rows(),
|
||||
self.schema(),
|
||||
ts_min_max,
|
||||
))
|
||||
|
@ -147,20 +159,6 @@ impl QueryChunk for QueryAdaptor {
|
|||
true
|
||||
}
|
||||
|
||||
/// Return a set of Strings containing the distinct values in the
|
||||
/// specified columns. If the predicate can be evaluated entirely
|
||||
/// on the metadata of this Chunk. Returns `None` otherwise
|
||||
///
|
||||
/// The requested columns must all have String type.
|
||||
fn column_values(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn data(&self) -> QueryChunkData {
|
||||
let schema = self.schema().as_arrow();
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ use futures::{Stream, StreamExt, TryStreamExt};
|
|||
use ingester_query_grpc::influxdata::iox::ingester::v1 as proto;
|
||||
use metric::{DurationHistogram, U64Counter};
|
||||
use observability_deps::tracing::*;
|
||||
use predicate::Predicate;
|
||||
use prost::Message;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::{Semaphore, TryAcquireError};
|
||||
|
@ -26,7 +27,7 @@ use instrumentation::FlightFrameEncodeInstrumentation;
|
|||
|
||||
use crate::{
|
||||
ingester_id::IngesterId,
|
||||
query::{response::QueryResponse, QueryError, QueryExec},
|
||||
query::{projection::OwnedProjection, response::QueryResponse, QueryError, QueryExec},
|
||||
};
|
||||
|
||||
/// Error states for the query RPC handler.
|
||||
|
@ -48,6 +49,10 @@ enum Error {
|
|||
/// The number of simultaneous queries being executed has been reached.
|
||||
#[error("simultaneous query limit exceeded")]
|
||||
RequestLimit,
|
||||
|
||||
/// The payload within the request has an invalid field value.
|
||||
#[error("field violation: {0}")]
|
||||
FieldViolation(#[from] ingester_query_grpc::FieldViolation),
|
||||
}
|
||||
|
||||
/// Map a query-execution error into a [`tonic::Status`].
|
||||
|
@ -77,6 +82,10 @@ impl From<Error> for tonic::Status {
|
|||
warn!("simultaneous query limit exceeded");
|
||||
Code::ResourceExhausted
|
||||
}
|
||||
Error::FieldViolation(_) => {
|
||||
debug!(error=%e, "request contains field violation");
|
||||
Code::InvalidArgument
|
||||
}
|
||||
};
|
||||
|
||||
Self::new(code, e.to_string())
|
||||
|
@ -188,18 +197,21 @@ where
|
|||
let ticket = request.into_inner();
|
||||
let request = proto::IngesterQueryRequest::decode(&*ticket.ticket).map_err(Error::from)?;
|
||||
|
||||
// Extract the namespace/table identifiers
|
||||
// Extract the namespace/table identifiers and the query predicate
|
||||
let namespace_id = NamespaceId::new(request.namespace_id);
|
||||
let table_id = TableId::new(request.table_id);
|
||||
let predicate = if let Some(p) = request.predicate {
|
||||
debug!(predicate=?p, "received query predicate");
|
||||
Some(Predicate::try_from(p).map_err(Error::from)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Predicate pushdown is part of the API, but not implemented.
|
||||
if let Some(p) = request.predicate {
|
||||
debug!(predicate=?p, "ignoring query predicate (unsupported)");
|
||||
}
|
||||
let projection = OwnedProjection::from(request.columns);
|
||||
|
||||
let response = match self
|
||||
.query_handler
|
||||
.query_exec(namespace_id, table_id, request.columns, span.clone())
|
||||
.query_exec(namespace_id, table_id, projection, span.clone(), predicate)
|
||||
.await
|
||||
{
|
||||
Ok(v) => v,
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
use std::{collections::BTreeMap, sync::Arc, time::Duration};
|
||||
|
||||
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, TableId};
|
||||
use data_types::{
|
||||
partition_template::TablePartitionTemplateOverride, NamespaceId, PartitionId, PartitionKey,
|
||||
SequenceNumber, TableId,
|
||||
};
|
||||
use iox_catalog::{interface::Catalog, test_helpers::arbitrary_namespace};
|
||||
use lazy_static::lazy_static;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
|
@ -15,8 +18,8 @@ use crate::{
|
|||
},
|
||||
partition::{PartitionData, SortKeyState},
|
||||
table::{
|
||||
name_resolver::{mock::MockTableNameProvider, TableNameProvider},
|
||||
TableName,
|
||||
metadata_resolver::{mock::MockTableProvider, TableProvider},
|
||||
TableMetadata, TableName,
|
||||
},
|
||||
},
|
||||
deferred_load::DeferredLoad,
|
||||
|
@ -44,10 +47,15 @@ pub(crate) fn defer_namespace_name_1_ms() -> Arc<DeferredLoad<NamespaceName>> {
|
|||
))
|
||||
}
|
||||
|
||||
pub(crate) fn defer_table_name_1_sec() -> Arc<DeferredLoad<TableName>> {
|
||||
pub(crate) fn defer_table_metadata_1_sec() -> Arc<DeferredLoad<TableMetadata>> {
|
||||
Arc::new(DeferredLoad::new(
|
||||
Duration::from_secs(1),
|
||||
async { ARBITRARY_TABLE_NAME.clone() },
|
||||
async {
|
||||
TableMetadata::new_for_testing(
|
||||
ARBITRARY_TABLE_NAME.clone(),
|
||||
TablePartitionTemplateOverride::default(),
|
||||
)
|
||||
},
|
||||
&metric::Registry::default(),
|
||||
))
|
||||
}
|
||||
|
@ -60,8 +68,11 @@ lazy_static! {
|
|||
pub(crate) static ref ARBITRARY_NAMESPACE_NAME_PROVIDER: Arc<dyn NamespaceNameProvider> =
|
||||
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME));
|
||||
pub(crate) static ref ARBITRARY_TABLE_NAME: TableName = TableName::from("bananas");
|
||||
pub(crate) static ref ARBITRARY_TABLE_NAME_PROVIDER: Arc<dyn TableNameProvider> =
|
||||
Arc::new(MockTableNameProvider::new(&**ARBITRARY_TABLE_NAME));
|
||||
pub(crate) static ref ARBITRARY_TABLE_PROVIDER: Arc<dyn TableProvider> =
|
||||
Arc::new(MockTableProvider::new(TableMetadata::new_for_testing(
|
||||
ARBITRARY_TABLE_NAME.clone(),
|
||||
TablePartitionTemplateOverride::default()
|
||||
)));
|
||||
}
|
||||
|
||||
/// Build a [`PartitionData`] with mostly arbitrary-yet-valid values for tests.
|
||||
|
@ -71,7 +82,7 @@ pub(crate) struct PartitionDataBuilder {
|
|||
partition_key: Option<PartitionKey>,
|
||||
namespace_id: Option<NamespaceId>,
|
||||
table_id: Option<TableId>,
|
||||
table_name_loader: Option<Arc<DeferredLoad<TableName>>>,
|
||||
table_loader: Option<Arc<DeferredLoad<TableMetadata>>>,
|
||||
namespace_loader: Option<Arc<DeferredLoad<NamespaceName>>>,
|
||||
sort_key: Option<SortKeyState>,
|
||||
}
|
||||
|
@ -101,11 +112,11 @@ impl PartitionDataBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
pub(crate) fn with_table_name_loader(
|
||||
pub(crate) fn with_table_loader(
|
||||
mut self,
|
||||
table_name_loader: Arc<DeferredLoad<TableName>>,
|
||||
table_loader: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Self {
|
||||
self.table_name_loader = Some(table_name_loader);
|
||||
self.table_loader = Some(table_loader);
|
||||
self
|
||||
}
|
||||
|
||||
|
@ -134,8 +145,7 @@ impl PartitionDataBuilder {
|
|||
self.namespace_loader
|
||||
.unwrap_or_else(defer_namespace_name_1_sec),
|
||||
self.table_id.unwrap_or(ARBITRARY_TABLE_ID),
|
||||
self.table_name_loader
|
||||
.unwrap_or_else(defer_table_name_1_sec),
|
||||
self.table_loader.unwrap_or_else(defer_table_metadata_1_sec),
|
||||
self.sort_key.unwrap_or(SortKeyState::Provided(None)),
|
||||
)
|
||||
}
|
||||
|
@ -270,7 +280,7 @@ pub(crate) fn make_write_op(
|
|||
namespace_id: NamespaceId,
|
||||
table_name: &str,
|
||||
table_id: TableId,
|
||||
sequence_number: i64,
|
||||
sequence_number: u64,
|
||||
lines: &str,
|
||||
span_ctx: Option<SpanContext>,
|
||||
) -> WriteOperation {
|
||||
|
|
|
@ -32,7 +32,7 @@ impl TimestampOracle {
|
|||
// or diverge between threads.
|
||||
let v = self.0.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
SequenceNumber::new(v as i64)
|
||||
SequenceNumber::new(v)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -106,6 +106,6 @@ mod tests {
|
|||
timestamps
|
||||
.into_iter()
|
||||
.zip(expected)
|
||||
.for_each(|(got, want)| assert_eq!(got, want as i64));
|
||||
.for_each(|(got, want)| assert_eq!(got, want as u64));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -248,7 +248,7 @@ mod tests {
|
|||
/// Return a [`SequenceNumberSet`] containing `vals`.
|
||||
fn new_set<T>(vals: T) -> SequenceNumberSet
|
||||
where
|
||||
T: IntoIterator<Item = i64>,
|
||||
T: IntoIterator<Item = u64>,
|
||||
{
|
||||
vals.into_iter().map(SequenceNumber::new).collect()
|
||||
}
|
||||
|
@ -257,7 +257,7 @@ mod tests {
|
|||
/// [`SequenceNumberSet`] values.
|
||||
fn new_note<T>(vals: T) -> Arc<CompletedPersist>
|
||||
where
|
||||
T: IntoIterator<Item = i64>,
|
||||
T: IntoIterator<Item = u64>,
|
||||
{
|
||||
Arc::new(CompletedPersist::new(
|
||||
ParquetFileParams {
|
||||
|
|
|
@ -105,10 +105,7 @@ impl WalAppender for Arc<wal::Wal> {
|
|||
let partition_sequence_numbers = w
|
||||
.tables()
|
||||
.map(|(table_id, data)| {
|
||||
(
|
||||
*table_id,
|
||||
data.partitioned_data().sequence_number().get() as u64,
|
||||
)
|
||||
(*table_id, data.partitioned_data().sequence_number().get())
|
||||
})
|
||||
.collect::<HashMap<TableId, u64>>();
|
||||
(
|
||||
|
|
|
@ -0,0 +1,162 @@
|
|||
use arrow_util::assert_batches_sorted_eq;
|
||||
use data_types::PartitionKey;
|
||||
use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
|
||||
use ingester_test_ctx::TestContextBuilder;
|
||||
use metric::{DurationHistogram, U64Histogram};
|
||||
|
||||
// Write data to an ingester through the RPC interface and query the data, validating the contents.
|
||||
#[tokio::test]
|
||||
async fn write_query() {
|
||||
let namespace_name = "write_query_test_namespace";
|
||||
let mut ctx = TestContextBuilder::default().build().await;
|
||||
let ns = ctx.ensure_namespace(namespace_name, None).await;
|
||||
|
||||
// Initial write
|
||||
let partition_key = PartitionKey::from("1970-01-01");
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"bananas greatness=\"unbounded\" 10",
|
||||
partition_key.clone(),
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
|
||||
// A subsequent write with a non-contiguous sequence number to a different table.
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"cpu bar=2 20\ncpu bar=3 30",
|
||||
partition_key.clone(),
|
||||
7,
|
||||
)
|
||||
.await;
|
||||
|
||||
// And a third write that appends more data to the table in the initial
|
||||
// write.
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"bananas count=42 200",
|
||||
partition_key.clone(),
|
||||
42,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Perform a query to validate the actual data buffered.
|
||||
let data: Vec<_> = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace_id: ns.id.get(),
|
||||
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query request failed");
|
||||
|
||||
let expected = vec![
|
||||
"+-------+-----------+--------------------------------+",
|
||||
"| count | greatness | time |",
|
||||
"+-------+-----------+--------------------------------+",
|
||||
"| | unbounded | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| 42.0 | | 1970-01-01T00:00:00.000000200Z |",
|
||||
"+-------+-----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
|
||||
// Assert various ingest metrics.
|
||||
let hist = ctx
|
||||
.get_metric::<DurationHistogram, _>(
|
||||
"ingester_dml_sink_apply_duration",
|
||||
&[("handler", "write_apply"), ("result", "success")],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(hist.sample_count(), 3);
|
||||
|
||||
// Read metrics
|
||||
let hist = ctx
|
||||
.get_metric::<DurationHistogram, _>(
|
||||
"ingester_query_stream_duration",
|
||||
&[("request", "complete")],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(hist.sample_count(), 1);
|
||||
|
||||
let hist = ctx
|
||||
.get_metric::<U64Histogram, _>("ingester_query_result_row", &[])
|
||||
.fetch();
|
||||
assert_eq!(hist.sample_count(), 1);
|
||||
assert_eq!(hist.total, 2);
|
||||
}
|
||||
|
||||
// Write data to an ingester through the RPC interface and query the data, validating the contents.
|
||||
#[tokio::test]
|
||||
async fn write_query_projection() {
|
||||
let namespace_name = "write_query_test_namespace";
|
||||
let mut ctx = TestContextBuilder::default().build().await;
|
||||
let ns = ctx.ensure_namespace(namespace_name, None).await;
|
||||
|
||||
// Initial write
|
||||
let partition_key = PartitionKey::from("1970-01-01");
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"bananas greatness=\"unbounded\",level=42 10",
|
||||
partition_key.clone(),
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Another write that appends more data to the table in the initial write.
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"bananas count=42,level=4242 200",
|
||||
partition_key.clone(),
|
||||
42,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Perform a query to validate the actual data buffered.
|
||||
let data: Vec<_> = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace_id: ns.id.get(),
|
||||
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query request failed");
|
||||
|
||||
let expected = vec![
|
||||
"+-------+-----------+--------+--------------------------------+",
|
||||
"| count | greatness | level | time |",
|
||||
"+-------+-----------+--------+--------------------------------+",
|
||||
"| | unbounded | 42.0 | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| 42.0 | | 4242.0 | 1970-01-01T00:00:00.000000200Z |",
|
||||
"+-------+-----------+--------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
|
||||
// And perform a query with projection, selecting a column that is entirely
|
||||
// non-NULL, a column containing NULLs (in a different order to the above)
|
||||
// and a column that does not exist.
|
||||
let data: Vec<_> = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace_id: ns.id.get(),
|
||||
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
|
||||
columns: vec![
|
||||
"level".to_string(),
|
||||
"greatness".to_string(),
|
||||
"platanos".to_string(),
|
||||
],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query request failed");
|
||||
|
||||
let expected = vec![
|
||||
"+--------+-----------+",
|
||||
"| level | greatness |",
|
||||
"+--------+-----------+",
|
||||
"| 42.0 | unbounded |",
|
||||
"| 4242.0 | |",
|
||||
"+--------+-----------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
}
|
|
@ -10,88 +10,6 @@ use metric::{
|
|||
use parquet_file::ParquetFilePath;
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
// Write data to an ingester through the RPC interface and query the data, validating the contents.
|
||||
#[tokio::test]
|
||||
async fn write_query() {
|
||||
let namespace_name = "write_query_test_namespace";
|
||||
let mut ctx = TestContextBuilder::default().build().await;
|
||||
let ns = ctx.ensure_namespace(namespace_name, None).await;
|
||||
|
||||
// Initial write
|
||||
let partition_key = PartitionKey::from("1970-01-01");
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"bananas greatness=\"unbounded\" 10",
|
||||
partition_key.clone(),
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
|
||||
// A subsequent write with a non-contiguous sequence number to a different table.
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"cpu bar=2 20\ncpu bar=3 30",
|
||||
partition_key.clone(),
|
||||
7,
|
||||
)
|
||||
.await;
|
||||
|
||||
// And a third write that appends more data to the table in the initial
|
||||
// write.
|
||||
ctx.write_lp(
|
||||
namespace_name,
|
||||
"bananas count=42 200",
|
||||
partition_key.clone(),
|
||||
42,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Perform a query to validate the actual data buffered.
|
||||
let data: Vec<_> = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace_id: ns.id.get(),
|
||||
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query request failed");
|
||||
|
||||
let expected = vec![
|
||||
"+-------+-----------+--------------------------------+",
|
||||
"| count | greatness | time |",
|
||||
"+-------+-----------+--------------------------------+",
|
||||
"| | unbounded | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| 42.0 | | 1970-01-01T00:00:00.000000200Z |",
|
||||
"+-------+-----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
|
||||
// Assert various ingest metrics.
|
||||
let hist = ctx
|
||||
.get_metric::<DurationHistogram, _>(
|
||||
"ingester_dml_sink_apply_duration",
|
||||
&[("handler", "write_apply"), ("result", "success")],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(hist.sample_count(), 3);
|
||||
|
||||
// Read metrics
|
||||
let hist = ctx
|
||||
.get_metric::<DurationHistogram, _>(
|
||||
"ingester_query_stream_duration",
|
||||
&[("request", "complete")],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(hist.sample_count(), 1);
|
||||
|
||||
let hist = ctx
|
||||
.get_metric::<U64Histogram, _>("ingester_query_result_row", &[])
|
||||
.fetch();
|
||||
assert_eq!(hist.sample_count(), 1);
|
||||
assert_eq!(hist.total, 2);
|
||||
}
|
||||
|
||||
// Write data to an ingester through the RPC interface and persist the data.
|
||||
#[tokio::test]
|
||||
async fn write_persist() {
|
||||
|
|
|
@ -242,7 +242,7 @@ where
|
|||
namespace: &str,
|
||||
lp: &str,
|
||||
partition_key: PartitionKey,
|
||||
sequence_number: i64,
|
||||
sequence_number: u64,
|
||||
) {
|
||||
// Resolve the namespace ID needed to construct the DML op
|
||||
let namespace_id = self.namespace_id(namespace).await;
|
||||
|
|
|
@ -6,7 +6,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies] # In alphabetical order
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
data_types = { path = "../data_types" }
|
||||
futures = "0.3"
|
||||
iox_time = { version = "0.1.0", path = "../iox_time" }
|
||||
|
@ -20,7 +20,7 @@ siphasher = "0.3"
|
|||
snafu = "0.7"
|
||||
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
|
||||
sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tokio = { version = "1.29", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
@ -30,7 +30,7 @@ assert_matches = "1.5.0"
|
|||
dotenvy = "0.15.7"
|
||||
generated_types = { path = "../generated_types" }
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
paste = "1.0.12"
|
||||
paste = "1.0.13"
|
||||
pretty_assertions = "1.3.0"
|
||||
rand = "0.8"
|
||||
tempfile = "3"
|
||||
|
|
|
@ -179,8 +179,8 @@ decorate!(
|
|||
"partition_list_skipped_compactions" = list_skipped_compactions(&mut self) -> Result<Vec<SkippedCompaction>>;
|
||||
"partition_delete_skipped_compactions" = delete_skipped_compactions(&mut self, partition_id: PartitionId) -> Result<Option<SkippedCompaction>>;
|
||||
"partition_most_recent_n" = most_recent_n(&mut self, n: usize) -> Result<Vec<Partition>>;
|
||||
"partitions_new_file_between" = partitions_new_file_between(&mut self, minimum_time: Timestamp, maximum_time: Option<Timestamp>) -> Result<Vec<PartitionId>>;
|
||||
"get_in_skipped_compaction" = get_in_skipped_compaction(&mut self, partition_id: PartitionId) -> Result<Option<SkippedCompaction>>;
|
||||
"partition_partitions_new_file_between" = partitions_new_file_between(&mut self, minimum_time: Timestamp, maximum_time: Option<Timestamp>) -> Result<Vec<PartitionId>>;
|
||||
"partition_get_in_skipped_compaction" = get_in_skipped_compaction(&mut self, partition_id: PartitionId) -> Result<Option<SkippedCompaction>>;
|
||||
]
|
||||
);
|
||||
|
||||
|
@ -195,7 +195,7 @@ decorate!(
|
|||
"parquet_delete_old_ids_only" = delete_old_ids_only(&mut self, older_than: Timestamp) -> Result<Vec<ParquetFileId>>;
|
||||
"parquet_list_by_partition_not_to_delete" = list_by_partition_not_to_delete(&mut self, partition_id: PartitionId) -> Result<Vec<ParquetFile>>;
|
||||
"parquet_get_by_object_store_id" = get_by_object_store_id(&mut self, object_store_id: Uuid) -> Result<Option<ParquetFile>>;
|
||||
"exists_by_object_store_id_batch" = exists_by_object_store_id_batch(&mut self, object_store_ids: Vec<Uuid>) -> Result<Vec<Uuid>>;
|
||||
"parquet_exists_by_object_store_id_batch" = exists_by_object_store_id_batch(&mut self, object_store_ids: Vec<Uuid>) -> Result<Vec<Uuid>>;
|
||||
"parquet_create_upgrade_delete" = create_upgrade_delete(&mut self, delete: &[ParquetFileId], upgrade: &[ParquetFileId], create: &[ParquetFileParams], target_level: CompactionLevel) -> Result<Vec<ParquetFileId>>;
|
||||
]
|
||||
);
|
||||
|
|
|
@ -23,7 +23,7 @@ rand = { version = "0.8.3", features = ["small_rng"] }
|
|||
regex = "1.8"
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.99"
|
||||
serde_json = "1.0.100"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
toml = "0.7.5"
|
||||
|
|
|
@ -22,7 +22,7 @@ use arrow::{
|
|||
use async_trait::async_trait;
|
||||
use data_types::{ChunkId, ChunkOrder, PartitionId};
|
||||
use datafusion::{error::DataFusionError, physical_plan::Statistics, prelude::SessionContext};
|
||||
use exec::{stringset::StringSet, IOxSessionContext};
|
||||
use exec::IOxSessionContext;
|
||||
use hashbrown::HashMap;
|
||||
use observability_deps::tracing::trace;
|
||||
use once_cell::sync::Lazy;
|
||||
|
@ -34,6 +34,7 @@ use schema::{
|
|||
};
|
||||
use std::{any::Any, fmt::Debug, sync::Arc};
|
||||
|
||||
pub mod chunk_statistics;
|
||||
pub mod config;
|
||||
pub mod exec;
|
||||
pub mod frontend;
|
||||
|
@ -81,18 +82,6 @@ pub trait QueryChunk: Debug + Send + Sync + 'static {
|
|||
/// key" within itself
|
||||
fn may_contain_pk_duplicates(&self) -> bool;
|
||||
|
||||
/// Return a set of Strings containing the distinct values in the
|
||||
/// specified columns. If the predicate can be evaluated entirely
|
||||
/// on the metadata of this Chunk. Returns `None` otherwise
|
||||
///
|
||||
/// The requested columns must all have String type.
|
||||
fn column_values(
|
||||
&self,
|
||||
ctx: IOxSessionContext,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError>;
|
||||
|
||||
/// Provides access to raw [`QueryChunk`] data.
|
||||
///
|
||||
/// The engine assume that minimal work shall be performed to gather the `QueryChunkData`.
|
||||
|
@ -271,15 +260,6 @@ where
|
|||
self.as_ref().may_contain_pk_duplicates()
|
||||
}
|
||||
|
||||
fn column_values(
|
||||
&self,
|
||||
ctx: IOxSessionContext,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
self.as_ref().column_values(ctx, column_name, predicate)
|
||||
}
|
||||
|
||||
fn data(&self) -> QueryChunkData {
|
||||
self.as_ref().data()
|
||||
}
|
||||
|
@ -323,15 +303,6 @@ impl QueryChunk for Arc<dyn QueryChunk> {
|
|||
self.as_ref().may_contain_pk_duplicates()
|
||||
}
|
||||
|
||||
fn column_values(
|
||||
&self,
|
||||
ctx: IOxSessionContext,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
self.as_ref().column_values(ctx, column_name, predicate)
|
||||
}
|
||||
|
||||
fn data(&self) -> QueryChunkData {
|
||||
self.as_ref().data()
|
||||
}
|
||||
|
|
|
@ -1120,18 +1120,6 @@ impl QueryChunk for TestChunk {
|
|||
"Test Chunk"
|
||||
}
|
||||
|
||||
fn column_values(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
self.check_error()?;
|
||||
|
||||
// Model not being able to get column values from metadata
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn order(&self) -> ChunkOrder {
|
||||
self.order
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ predicate = { path = "../predicate" }
|
|||
query_functions = { path = "../query_functions" }
|
||||
regex = "1"
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.99"
|
||||
serde_json = "1.0.100"
|
||||
thiserror = "1.0"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
|
|
|
@ -2028,7 +2028,7 @@ mod test {
|
|||
use crate::plan::ir::TagSet;
|
||||
use datafusion::common::Result;
|
||||
use influxdb_influxql_parser::select::SelectStatement;
|
||||
use schema::{InfluxColumnType, InfluxFieldType};
|
||||
use schema::{InfluxColumnType, InfluxFieldType, SchemaBuilder};
|
||||
|
||||
/// Test implementation that converts `Select` to `SelectStatement` so that it can be
|
||||
/// converted back to a string.
|
||||
|
@ -2647,7 +2647,18 @@ mod test {
|
|||
/// Projections which contain function calls
|
||||
#[test]
|
||||
fn projection_call_expr() {
|
||||
let namespace = MockSchemaProvider::default();
|
||||
let mut namespace = MockSchemaProvider::default();
|
||||
// Add a schema with tags that could conflict with aliasing against an
|
||||
// existing call expression, in this case "last"
|
||||
namespace.add_schema(
|
||||
SchemaBuilder::new()
|
||||
.measurement("conflicts")
|
||||
.timestamp()
|
||||
.tag("last")
|
||||
.influx_field("field_f64", InfluxFieldType::Float)
|
||||
.build()
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01");
|
||||
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
|
||||
|
@ -2694,6 +2705,14 @@ mod test {
|
|||
stmt.to_string(),
|
||||
"SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
|
||||
);
|
||||
|
||||
// Handles conflicts when call expression is renamed to match an existing tag
|
||||
let stmt = parse_select("SELECT LAST(field_f64), last FROM conflicts");
|
||||
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
|
||||
assert_eq!(
|
||||
stmt.to_string(),
|
||||
"SELECT time::timestamp AS time, last(field_f64::float) AS last, last::tag AS last_1 FROM conflicts"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -66,9 +66,6 @@ const CONCURRENT_TABLE_JOBS: usize = 10;
|
|||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("gRPC planner got error finding column values: {}", source))]
|
||||
FindingColumnValues { source: DataFusionError },
|
||||
|
||||
#[snafu(display(
|
||||
"gRPC planner got error fetching chunks for table '{}': {}",
|
||||
table_name,
|
||||
|
@ -180,7 +177,6 @@ impl Error {
|
|||
| Self::BuildingPlan { source, .. }
|
||||
| Self::ReadColumns { source, .. }
|
||||
| Self::CheckingChunkPredicate { source, .. }
|
||||
| Self::FindingColumnValues { source, .. }
|
||||
| Self::CastingAggregates { source, .. } => {
|
||||
DataFusionError::Context(format!("{method}: {msg}"), Box::new(source))
|
||||
}
|
||||
|
@ -480,7 +476,6 @@ impl InfluxRpcPlanner {
|
|||
)
|
||||
.and_then(|(table_name, table_schema, predicate, chunks)| async move {
|
||||
let mut chunks_full = vec![];
|
||||
let mut known_values = BTreeSet::new();
|
||||
|
||||
let chunks = prune_chunks(&table_schema, chunks, &predicate);
|
||||
for chunk in cheap_chunk_first(chunks) {
|
||||
|
@ -513,36 +508,15 @@ impl InfluxRpcPlanner {
|
|||
}
|
||||
);
|
||||
|
||||
// try and get the list of values directly from metadata
|
||||
let mut ctx = self.ctx.child_ctx("tag_values execution");
|
||||
ctx.set_metadata("table", table_name.to_string());
|
||||
|
||||
let maybe_values = chunk
|
||||
.column_values(ctx, tag_name, &predicate)
|
||||
.context(FindingColumnValuesSnafu)?;
|
||||
|
||||
match maybe_values {
|
||||
Some(mut names) => {
|
||||
debug!(
|
||||
%table_name,
|
||||
names=?names,
|
||||
chunk_id=%chunk.id().get(),
|
||||
"tag values found from metadata",
|
||||
);
|
||||
known_values.append(&mut names);
|
||||
}
|
||||
None => {
|
||||
debug!(
|
||||
%table_name,
|
||||
chunk_id=%chunk.id().get(),
|
||||
"need full plan to find tag values"
|
||||
);
|
||||
chunks_full.push(chunk);
|
||||
}
|
||||
}
|
||||
debug!(
|
||||
%table_name,
|
||||
chunk_id=%chunk.id().get(),
|
||||
"need full plan to find tag values"
|
||||
);
|
||||
chunks_full.push(chunk);
|
||||
}
|
||||
|
||||
Ok((table_name, predicate, chunks_full, known_values))
|
||||
Ok((table_name, predicate, chunks_full))
|
||||
})
|
||||
.try_collect()
|
||||
.await?;
|
||||
|
@ -554,9 +528,7 @@ impl InfluxRpcPlanner {
|
|||
// At this point, we have a set of tag_values we know at plan
|
||||
// time in `known_columns`, and some tables in chunks that we
|
||||
// need to run a plan to find what values pass the predicate.
|
||||
for (table_name, predicate, chunks_full, known_values) in tables {
|
||||
builder = builder.append_other(known_values.into());
|
||||
|
||||
for (table_name, predicate, chunks_full) in tables {
|
||||
if !chunks_full.is_empty() {
|
||||
let schema = namespace
|
||||
.table_schema(table_name)
|
||||
|
|
|
@ -12,14 +12,14 @@ license.workspace = true
|
|||
authz = { path = "../authz", features = ["http"] }
|
||||
clap_blocks = { path = "../clap_blocks" }
|
||||
generated_types = { path = "../generated_types" }
|
||||
heappy = { git = "https://github.com/mkmik/heappy", rev = "1d6ac77a4026fffce8680a7b31a9f6e9859b5e73", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
|
||||
heappy = { git = "https://github.com/mkmik/heappy", rev = "1de977a241cdd768acc5b6c82c0728b30c7db7b4", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
|
||||
metric = { path = "../metric" }
|
||||
metric_exporters = { path = "../metric_exporters" }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
# NOTE: we may not notice that we need the "backtrace-rs" feature if we also build with the heappy feature, which depends on backtrace-rs.
|
||||
# (honestly I thought that cargo dependencies were isolated on a per crate basis so I'm a bit surprised that pprof accidentally builds
|
||||
# successfully just because another crate happens to depend on backtrace-rs)
|
||||
pprof = { version = "0.11", default-features = false, features = ["flamegraph", "prost-codec"], optional = true }
|
||||
pprof = { version = "0.12", default-features = false, features = ["flamegraph", "prost-codec"], optional = true }
|
||||
service_grpc_testing = { path = "../service_grpc_testing" }
|
||||
trace = { path = "../trace" }
|
||||
trace_exporters = { path = "../trace_exporters" }
|
||||
|
@ -38,7 +38,7 @@ log = "0.4"
|
|||
parking_lot = "0.12"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.99"
|
||||
serde_json = "1.0.100"
|
||||
serde_urlencoded = "0.7.0"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
|
|
|
@ -18,7 +18,7 @@ iox_query = { version = "0.1.0", path = "../iox_query" }
|
|||
ioxd_common = { path = "../ioxd_common" }
|
||||
metric = { path = "../metric" }
|
||||
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
tokio-util = { version = "0.7.8" }
|
||||
trace = { path = "../trace" }
|
||||
|
|
|
@ -30,7 +30,7 @@ trace = { path = "../trace" }
|
|||
arrow-flight = { workspace = true }
|
||||
async-trait = "0.1"
|
||||
hyper = "0.14"
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
tonic = { workspace = true }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -18,7 +18,7 @@ metric = { path = "../metric" }
|
|||
mutable_batch = { path = "../mutable_batch" }
|
||||
object_store = { workspace = true }
|
||||
router = { path = "../router" }
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
tokio-util = { version = "0.7.8" }
|
||||
trace = { path = "../trace" }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -18,12 +18,12 @@ hashbrown = { workspace = true }
|
|||
itertools = "0.11"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
percent-encoding = "2.2.0"
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
unicode-segmentation = "1.10.1"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.5.0"
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
paste = "1.0.12"
|
||||
paste = "1.0.13"
|
||||
proptest = { version = "1.2.0", default-features = false }
|
||||
rand = "0.8"
|
||||
|
|
|
@ -6,13 +6,13 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies] # In alphabetical order
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
bytes = "1.4"
|
||||
futures = "0.3"
|
||||
iox_time = { version = "0.1.0", path = "../iox_time" }
|
||||
metric = { version = "0.1.0", path = "../metric" }
|
||||
object_store = { workspace = true }
|
||||
pin-project = "1.1.1"
|
||||
pin-project = "1.1.2"
|
||||
tokio = { version = "1.29", features = ["io-util"] }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ pbjson-types = "0.5"
|
|||
prost = "0.11"
|
||||
schema = { path = "../schema" }
|
||||
snafu = "0.7"
|
||||
thiserror = "1.0.40"
|
||||
thiserror = "1.0.41"
|
||||
thrift = "0.17"
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt", "rt-multi-thread", "sync"] }
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
|
|
@ -8,7 +8,7 @@ license.workspace = true
|
|||
[dependencies]
|
||||
arrow = { workspace = true }
|
||||
arrow-flight = { workspace = true }
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
backoff = { path = "../backoff" }
|
||||
bytes = "1.4"
|
||||
cache_system = { path = "../cache_system" }
|
||||
|
|
|
@ -361,8 +361,8 @@ mod tests {
|
|||
partition.create_parquet_file(builder).await;
|
||||
let table_id = table.table.id;
|
||||
|
||||
let single_file_size = 208;
|
||||
let two_file_size = 384;
|
||||
let single_file_size = 240;
|
||||
let two_file_size = 448;
|
||||
assert!(single_file_size < two_file_size);
|
||||
|
||||
let cache = make_cache(&catalog);
|
||||
|
|
|
@ -17,6 +17,7 @@ use data_types::{
|
|||
};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::chunk_statistics::{ColumnRange, ColumnRanges};
|
||||
use iox_time::TimeProvider;
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::sort::SortKey;
|
||||
|
@ -27,8 +28,6 @@ use std::{
|
|||
};
|
||||
use trace::span::Span;
|
||||
|
||||
use crate::df_stats::{ColumnRange, ColumnRanges};
|
||||
|
||||
use super::{namespace::CachedTable, ram::RamSize};
|
||||
|
||||
const CACHE_ID: &str = "partition";
|
||||
|
|
|
@ -6,24 +6,21 @@ use self::{
|
|||
invalidate_on_error::InvalidateOnErrorFlightClient,
|
||||
test_util::MockIngesterConnection,
|
||||
};
|
||||
use crate::{
|
||||
cache::{namespace::CachedTable, CatalogCache},
|
||||
df_stats::{create_chunk_statistics, ColumnRanges},
|
||||
};
|
||||
use crate::cache::{namespace::CachedTable, CatalogCache};
|
||||
use arrow::{datatypes::DataType, error::ArrowError, record_batch::RecordBatch};
|
||||
use arrow_flight::decode::DecodedPayload;
|
||||
use async_trait::async_trait;
|
||||
use backoff::{Backoff, BackoffConfig, BackoffError};
|
||||
use client_util::connection;
|
||||
use data_types::{ChunkId, ChunkOrder, NamespaceId, PartitionHashId, PartitionId};
|
||||
use datafusion::{error::DataFusionError, physical_plan::Statistics};
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use futures::{stream::FuturesUnordered, TryStreamExt};
|
||||
use ingester_query_grpc::{
|
||||
encode_proto_predicate_as_base64, influxdata::iox::ingester::v1::IngesterQueryResponseMetadata,
|
||||
IngesterQueryRequest,
|
||||
};
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
chunk_statistics::{create_chunk_statistics, ColumnRanges},
|
||||
util::compute_timenanosecond_min_max,
|
||||
QueryChunk, QueryChunkData,
|
||||
};
|
||||
|
@ -941,16 +938,6 @@ impl QueryChunk for IngesterChunk {
|
|||
true
|
||||
}
|
||||
|
||||
fn column_values(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
// TODO maybe some special handling?
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn data(&self) -> QueryChunkData {
|
||||
QueryChunkData::RecordBatches(self.batches.clone())
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ use workspace_hack as _;
|
|||
|
||||
mod cache;
|
||||
mod database;
|
||||
mod df_stats;
|
||||
mod ingester;
|
||||
mod namespace;
|
||||
mod parquet;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
use data_types::{ChunkId, ChunkOrder, PartitionId};
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use iox_query::chunk_statistics::{create_chunk_statistics, ColumnRanges};
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use schema::sort::SortKey;
|
||||
use std::sync::Arc;
|
||||
|
@ -11,8 +12,6 @@ mod query_access;
|
|||
|
||||
pub use creation::ChunkAdapter;
|
||||
|
||||
use crate::df_stats::{create_chunk_statistics, ColumnRanges};
|
||||
|
||||
/// Immutable metadata attached to a [`QuerierParquetChunk`].
|
||||
#[derive(Debug)]
|
||||
pub struct QuerierParquetChunkMeta {
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
use crate::parquet::QuerierParquetChunk;
|
||||
use data_types::{ChunkId, ChunkOrder, PartitionId};
|
||||
use datafusion::{error::DataFusionError, physical_plan::Statistics};
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
QueryChunk, QueryChunkData,
|
||||
};
|
||||
use predicate::Predicate;
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use iox_query::{QueryChunk, QueryChunkData};
|
||||
use schema::{sort::SortKey, Schema};
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
|
@ -34,21 +30,6 @@ impl QueryChunk for QuerierParquetChunk {
|
|||
false
|
||||
}
|
||||
|
||||
fn column_values(
|
||||
&self,
|
||||
mut ctx: IOxSessionContext,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
ctx.set_metadata("column_name", column_name.to_string());
|
||||
ctx.set_metadata("predicate", format!("{}", &predicate));
|
||||
ctx.set_metadata("storage", "parquet");
|
||||
|
||||
// Since DataFusion can read Parquet, there is no advantage to
|
||||
// manually implementing this vs just letting DataFusion do its thing
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn data(&self) -> QueryChunkData {
|
||||
QueryChunkData::Parquet(self.parquet_chunk.parquet_exec_input())
|
||||
}
|
||||
|
|
|
@ -492,7 +492,6 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::{
|
||||
cache::test_util::{assert_cache_access_metric_count, assert_catalog_access_metric_count},
|
||||
df_stats::ColumnRange,
|
||||
ingester::{test_util::MockIngesterConnection, IngesterPartition},
|
||||
table::test_util::{querier_table, IngesterPartitionBuilder},
|
||||
};
|
||||
|
@ -506,7 +505,7 @@ mod tests {
|
|||
use generated_types::influxdata::iox::partition_template::v1::{
|
||||
template_part::Part, PartitionTemplate, TemplatePart,
|
||||
};
|
||||
use iox_query::exec::IOxSessionContext;
|
||||
use iox_query::{chunk_statistics::ColumnRange, exec::IOxSessionContext};
|
||||
use iox_tests::{TestCatalog, TestParquetFileBuilder, TestTable};
|
||||
use predicate::Predicate;
|
||||
use schema::{builder::SchemaBuilder, InfluxFieldType, TIME_COLUMN_NAME};
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
use super::{PruneMetrics, QuerierTable, QuerierTableArgs};
|
||||
use crate::{
|
||||
cache::CatalogCache, create_ingester_connection_for_testing, df_stats::ColumnRanges,
|
||||
parquet::ChunkAdapter, IngesterPartition,
|
||||
cache::CatalogCache, create_ingester_connection_for_testing, parquet::ChunkAdapter,
|
||||
IngesterPartition,
|
||||
};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::ChunkId;
|
||||
use iox_catalog::interface::{get_schema_by_name, SoftDeletedRows};
|
||||
use iox_query::chunk_statistics::ColumnRanges;
|
||||
use iox_tests::{TestCatalog, TestPartition, TestTable};
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use schema::{Projection, Schema};
|
||||
|
|
|
@ -49,7 +49,7 @@ criterion = { version = "0.5", default-features = false, features = ["async_toki
|
|||
influxdb-line-protocol = { path = "../influxdb_line_protocol" }
|
||||
iox_tests = { path = "../iox_tests" }
|
||||
once_cell = "1"
|
||||
paste = "1.0.12"
|
||||
paste = "1.0.13"
|
||||
pretty_assertions = "1.3.0"
|
||||
proptest = { version = "1.2.0", default-features = false }
|
||||
rand = "0.8.3"
|
||||
|
|
|
@ -32,6 +32,14 @@ impl SchemaBuilder {
|
|||
Self::default()
|
||||
}
|
||||
|
||||
pub fn with_capacity(n: usize) -> Self {
|
||||
Self {
|
||||
measurement: Default::default(),
|
||||
fields: Vec::with_capacity(n),
|
||||
finished: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a new tag column to this schema. By default tags are
|
||||
/// potentially nullable as they are not guaranteed to be present
|
||||
/// for all rows
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow::{datatypes::Field, record_batch::RecordBatch};
|
||||
use hashbrown::hash_map::RawEntryMut;
|
||||
use hashbrown::HashMap;
|
||||
|
@ -44,7 +42,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|||
/// This is infallable because the schemas of chunks within a
|
||||
/// partition are assumed to be compatible because that schema was
|
||||
/// enforced as part of writing into the partition
|
||||
pub fn merge_record_batch_schemas(batches: &[Arc<RecordBatch>]) -> Schema {
|
||||
pub fn merge_record_batch_schemas(batches: &[RecordBatch]) -> Schema {
|
||||
let mut merger = SchemaMerger::new();
|
||||
for batch in batches {
|
||||
let schema = Schema::try_from(batch.schema()).expect("Schema conversion error");
|
||||
|
@ -172,6 +170,8 @@ impl<'a> SchemaMerger<'a> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::builder::SchemaBuilder;
|
||||
use crate::InfluxFieldType::Integer;
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies] # In alphabetical order
|
||||
async-trait = "0.1.68"
|
||||
async-trait = "0.1.70"
|
||||
bytes = "1.4"
|
||||
datafusion = { workspace = true }
|
||||
iox_query = { path = "../iox_query" }
|
||||
|
|
|
@ -26,7 +26,7 @@ bytes = "1.4"
|
|||
futures = "0.3"
|
||||
prost = "0.11"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.99"
|
||||
serde_json = "1.0.100"
|
||||
snafu = "0.7"
|
||||
tonic = { workspace = true }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue