Merge branch 'main' into 7899/wal-disk-metrics

pull/24376/head
wiedld 2023-07-05 13:52:43 -07:00 committed by GitHub
commit 36e7f53f9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
113 changed files with 2559 additions and 1458 deletions

View File

@ -3,9 +3,6 @@
rustflags = [
"--cfg", "tokio_unstable",
]
rustdocflags = [
"--cfg", "tokio_unstable",
]
# sparse protocol opt-in
# See https://blog.rust-lang.org/2023/03/09/Rust-1.68.0.html#cargos-sparse-protocol

242
Cargo.lock generated
View File

@ -190,7 +190,7 @@ dependencies = [
"arrow-data",
"arrow-schema",
"chrono",
"half 2.2.1",
"half 2.3.1",
"num",
]
@ -205,7 +205,7 @@ dependencies = [
"arrow-schema",
"chrono",
"chrono-tz",
"half 2.2.1",
"half 2.3.1",
"hashbrown 0.14.0",
"num",
]
@ -215,7 +215,7 @@ name = "arrow-buffer"
version = "42.0.0"
source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#20f6bd7ed730d937abe76ab859088094dee8a5d3"
dependencies = [
"half 2.2.1",
"half 2.3.1",
"num",
]
@ -231,7 +231,7 @@ dependencies = [
"arrow-select",
"chrono",
"comfy-table",
"half 2.2.1",
"half 2.3.1",
"lexical-core",
"num",
]
@ -261,7 +261,7 @@ source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/42.0.0_patched#
dependencies = [
"arrow-buffer",
"arrow-schema",
"half 2.2.1",
"half 2.3.1",
"num",
]
@ -315,7 +315,7 @@ dependencies = [
"arrow-data",
"arrow-schema",
"chrono",
"half 2.2.1",
"half 2.3.1",
"indexmap 1.9.3",
"lexical-core",
"num",
@ -333,7 +333,7 @@ dependencies = [
"arrow-data",
"arrow-schema",
"arrow-select",
"half 2.2.1",
"half 2.3.1",
"num",
]
@ -347,7 +347,7 @@ dependencies = [
"arrow-buffer",
"arrow-data",
"arrow-schema",
"half 2.2.1",
"half 2.3.1",
"hashbrown 0.14.0",
]
@ -480,18 +480,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
name = "async-trait"
version = "0.1.68"
version = "0.1.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
checksum = "79fa67157abdfd688a259b6648808757db9347af834624f27ec646da976aee5d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -621,9 +621,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.3.2"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbe3c979c178231552ecba20214a8272df4e09f232a87aef4320cf06539aded"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
[[package]]
name = "blake2"
@ -841,9 +841,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.3.9"
version = "4.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bba77a07e4489fb41bd90e8d4201c3eb246b3c2c9ea2ba0bddd6c1d1df87db7d"
checksum = "384e169cc618c613d5e3ca6404dda77a8685a63e08660dcc64abaf7da7cb0c7a"
dependencies = [
"clap_builder",
"clap_derive",
@ -873,13 +873,12 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.3.9"
version = "4.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9b4a88bb4bc35d3d6f65a21b0f0bafe9c894fa00978de242c555ec28bea1c0"
checksum = "ef137bbe35aab78bdb468ccfba75a5f4d8321ae011d34063770780545176af2d"
dependencies = [
"anstream",
"anstyle",
"bitflags 1.3.2",
"clap_lex",
"once_cell",
"strsim",
@ -894,7 +893,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -969,7 +968,6 @@ dependencies = [
"object_store",
"observability_deps",
"parquet_file",
"predicate",
"rand",
"schema",
"test_helpers",
@ -1059,9 +1057,9 @@ dependencies = [
[[package]]
name = "console-subscriber"
version = "0.1.9"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ab2224a0311582eb03adba4caaf18644f7b1f10a760803a803b9b605187fc7"
checksum = "d4cf42660ac07fcebed809cfe561dd8730bcd35b075215e6479c516bcd0d11cb"
dependencies = [
"console-api",
"crossbeam-channel",
@ -1118,9 +1116,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
[[package]]
name = "cpp_demangle"
version = "0.4.1"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c76f98bdfc7f66172e6c7065f981ebb576ffc903fe4c0561d9f0c2509226dc6"
checksum = "ee34052ee3d93d6d8f3e6f81d85c47921f6653a19a7b70e939e3e602d893a674"
dependencies = [
"cfg-if",
]
@ -1479,7 +1477,7 @@ dependencies = [
"datafusion-common",
"datafusion-expr",
"datafusion-row",
"half 2.2.1",
"half 2.3.1",
"hashbrown 0.14.0",
"indexmap 1.9.3",
"itertools 0.10.5",
@ -1720,12 +1718,12 @@ dependencies = [
[[package]]
name = "fd-lock"
version = "3.0.12"
version = "3.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ae6b3d9530211fb3b12a95374b8b0823be812f53d09e18c5675c0146b09642"
checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5"
dependencies = [
"cfg-if",
"rustix",
"rustix 0.38.2",
"windows-sys 0.48.0",
]
@ -1899,7 +1897,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -2062,9 +2060,9 @@ dependencies = [
[[package]]
name = "h2"
version = "0.3.19"
version = "0.3.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782"
checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049"
dependencies = [
"bytes",
"fnv",
@ -2087,10 +2085,11 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "half"
version = "2.2.1"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0"
checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
dependencies = [
"cfg-if",
"crunchy",
"num-traits",
]
@ -2150,7 +2149,7 @@ dependencies = [
[[package]]
name = "heappy"
version = "0.1.0"
source = "git+https://github.com/mkmik/heappy?rev=1d6ac77a4026fffce8680a7b31a9f6e9859b5e73#1d6ac77a4026fffce8680a7b31a9f6e9859b5e73"
source = "git+https://github.com/mkmik/heappy?rev=1de977a241cdd768acc5b6c82c0728b30c7db7b4#1de977a241cdd768acc5b6c82c0728b30c7db7b4"
dependencies = [
"backtrace",
"bytes",
@ -2173,9 +2172,9 @@ dependencies = [
[[package]]
name = "hermit-abi"
version = "0.3.1"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
[[package]]
name = "hex"
@ -2571,7 +2570,7 @@ version = "0.1.0"
dependencies = [
"flate2",
"hex",
"integer-encoding",
"integer-encoding 4.0.0",
"observability_deps",
"rand",
"snafu",
@ -2730,6 +2729,12 @@ version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
[[package]]
name = "integer-encoding"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "924df4f0e24e2e7f9cdd90babb0b96f93b20f3ecfa949ea9e6613756b8c8e1bf"
[[package]]
name = "io-lifetimes"
version = "1.0.11"
@ -3099,19 +3104,18 @@ dependencies = [
[[package]]
name = "ipnet"
version = "2.7.2"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f"
checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
[[package]]
name = "is-terminal"
version = "0.4.7"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
checksum = "24fddda5af7e54bf7da53067d6e802dbcc381d0a8eef629df528e3ebf68755cb"
dependencies = [
"hermit-abi",
"io-lifetimes",
"rustix",
"rustix 0.38.2",
"windows-sys 0.48.0",
]
@ -3135,9 +3139,9 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.6"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
checksum = "c0aa48fab2893d8a49caa94082ae8488f4e1050d73b367881dcd2198f4199fd8"
[[package]]
name = "jobserver"
@ -3262,6 +3266,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
[[package]]
name = "linux-raw-sys"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
[[package]]
name = "lock_api"
version = "0.4.10"
@ -3846,7 +3856,7 @@ dependencies = [
"libc",
"redox_syscall 0.3.5",
"smallvec",
"windows-targets 0.48.0",
"windows-targets 0.48.1",
]
[[package]]
@ -3941,9 +3951,9 @@ dependencies = [
[[package]]
name = "paste"
version = "1.0.12"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79"
checksum = "b4b27ab7be369122c218afc2079489cdcb4b517c0a3fc386ff11e1fedfcc2b35"
[[package]]
name = "pbjson"
@ -4027,7 +4037,7 @@ dependencies = [
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -4053,18 +4063,18 @@ dependencies = [
[[package]]
name = "phf"
version = "0.11.1"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.11.1"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770"
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
dependencies = [
"phf_generator",
"phf_shared",
@ -4072,9 +4082,9 @@ dependencies = [
[[package]]
name = "phf_generator"
version = "0.11.1"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared",
"rand",
@ -4091,29 +4101,29 @@ dependencies = [
[[package]]
name = "pin-project"
version = "1.1.1"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e138fdd8263907a2b0e1b4e80b7e58c721126479b6e6eedfb1b402acea7b9bd"
checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.1"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1fef411b303e3e12d534fb6e7852de82da56edd937d895125821fb7c09436c7"
checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
name = "pin-project-lite"
version = "0.2.9"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57"
[[package]]
name = "pin-utils"
@ -4129,9 +4139,9 @@ checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]]
name = "pprof"
version = "0.11.1"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "196ded5d4be535690899a4631cc9f18cdc41b7ebf24a79400f46f48e49a11059"
checksum = "6b90f8560ad8bd57b207b8293bc5226e48e89039a6e590c12a297d91b84c7e60"
dependencies = [
"backtrace",
"cfg-if",
@ -4239,9 +4249,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
[[package]]
name = "proc-macro2"
version = "1.0.60"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
dependencies = [
"unicode-ident",
]
@ -4428,9 +4438,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.28"
version = "1.0.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
dependencies = [
"proc-macro2",
]
@ -4693,15 +4703,28 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.37.21"
version = "0.37.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62f25693a73057a1b4cb56179dd3c7ea21a7c6c5ee7d85781f5749b46f34b79c"
checksum = "8818fa822adcc98b18fedbb3632a6a33213c070556b5aa7c4c8cc21cff565c4c"
dependencies = [
"bitflags 1.3.2",
"errno",
"io-lifetimes",
"libc",
"linux-raw-sys",
"linux-raw-sys 0.3.8",
"windows-sys 0.48.0",
]
[[package]]
name = "rustix"
version = "0.38.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aabcb0461ebd01d6b79945797c27f8529082226cb630a9865a71870ff63532a4"
dependencies = [
"bitflags 2.3.3",
"errno",
"libc",
"linux-raw-sys 0.4.3",
"windows-sys 0.48.0",
]
@ -4731,9 +4754,9 @@ dependencies = [
[[package]]
name = "rustls-pemfile"
version = "1.0.2"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2"
dependencies = [
"base64 0.21.2",
]
@ -4760,7 +4783,7 @@ version = "12.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9"
dependencies = [
"bitflags 2.3.2",
"bitflags 2.3.3",
"cfg-if",
"clipboard-win",
"fd-lock",
@ -4832,29 +4855,29 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
[[package]]
name = "serde"
version = "1.0.164"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.164"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
name = "serde_json"
version = "1.0.99"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3"
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
dependencies = [
"itoa",
"ryu",
@ -5423,7 +5446,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -5434,9 +5457,9 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "symbolic-common"
version = "10.2.1"
version = "12.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b55cdc318ede251d0957f07afe5fed912119b8c1bc5a7804151826db999e737"
checksum = "38f7afd8bcd36190409e6b71d89928f7f09d918a7aa3460d847bc49a538d672e"
dependencies = [
"debugid",
"memmap2",
@ -5446,9 +5469,9 @@ dependencies = [
[[package]]
name = "symbolic-demangle"
version = "10.2.1"
version = "12.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79be897be8a483a81fff6a3a4e195b4ac838ef73ca42d348b3f722da9902e489"
checksum = "ec64922563a36e3fe686b6d99f06f25dacad2a202ac7502ed642930a188fb20a"
dependencies = [
"cpp_demangle",
"rustc-demangle",
@ -5468,9 +5491,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.18"
version = "2.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
dependencies = [
"proc-macro2",
"quote",
@ -5514,7 +5537,7 @@ dependencies = [
"cfg-if",
"fastrand",
"redox_syscall 0.3.5",
"rustix",
"rustix 0.37.22",
"windows-sys 0.48.0",
]
@ -5579,22 +5602,22 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.40"
version = "1.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
checksum = "c16a64ba9387ef3fdae4f9c1a7f07a0997fce91985c0336f1ddc1822b3b37802"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.40"
version = "1.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
checksum = "d14928354b01c4d6a4f0e549069adef399a284e7995c7ccca94e8a07a5346c59"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -5623,7 +5646,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
dependencies = [
"byteorder",
"integer-encoding",
"integer-encoding 3.0.4",
"log",
"ordered-float 2.10.0",
"threadpool",
@ -5723,7 +5746,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -5914,7 +5937,7 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8bd22a874a2d0b70452d5597b12c537331d49060824a95f49f108994f94aa4c"
dependencies = [
"bitflags 2.3.2",
"bitflags 2.3.3",
"bytes",
"futures-core",
"futures-util",
@ -6007,7 +6030,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
]
[[package]]
@ -6319,7 +6342,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
"wasm-bindgen-shared",
]
@ -6353,7 +6376,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.23",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -6428,9 +6451,9 @@ dependencies = [
[[package]]
name = "whoami"
version = "1.4.0"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c70234412ca409cc04e864e89523cb0fc37f5e1344ebed5a3ebf4192b6b9f68"
checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50"
dependencies = [
"wasm-bindgen",
"web-sys",
@ -6473,7 +6496,7 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
dependencies = [
"windows-targets 0.48.0",
"windows-targets 0.48.1",
]
[[package]]
@ -6491,7 +6514,7 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.0",
"windows-targets 0.48.1",
]
[[package]]
@ -6511,9 +6534,9 @@ dependencies = [
[[package]]
name = "windows-targets"
version = "0.48.0"
version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
dependencies = [
"windows_aarch64_gnullvm 0.48.0",
"windows_aarch64_msvc 0.48.0",
@ -6664,7 +6687,6 @@ dependencies = [
"hashbrown 0.14.0",
"heck",
"indexmap 1.9.3",
"io-lifetimes",
"itertools 0.10.5",
"libc",
"lock_api",
@ -6690,7 +6712,7 @@ dependencies = [
"regex-syntax 0.7.2",
"reqwest",
"ring",
"rustix",
"rustix 0.38.2",
"rustls 0.21.2",
"scopeguard",
"serde",
@ -6702,7 +6724,7 @@ dependencies = [
"sqlx-core",
"sqlx-macros",
"syn 1.0.109",
"syn 2.0.18",
"syn 2.0.23",
"thrift",
"tokio",
"tokio-stream",

View File

@ -25,7 +25,7 @@ tonic = { workspace = true }
[dev-dependencies]
assert_matches = "1.5.0"
parking_lot = "0.12.1"
paste = "1.0.12"
paste = "1.0.13"
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
tokio = "1.29.1"

View File

@ -6,7 +6,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-trait = "0.1.68"
async-trait = "0.1.70"
backoff = { path = "../backoff" }
futures = "0.3"
iox_time = { path = "../iox_time" }

View File

@ -9,7 +9,7 @@ license.workspace = true
[dependencies]
http = "0.2.9"
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
thiserror = "1.0.40"
thiserror = "1.0.41"
tonic = { workspace = true }
tower = "0.4"
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -6,7 +6,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-trait = "0.1.68"
async-trait = "0.1.70"
backoff = { path = "../backoff" }
bytes = "1.4"
compactor_scheduler = { path = "../compactor_scheduler" }
@ -21,7 +21,6 @@ metric = { path = "../metric" }
object_store = { workspace = true }
observability_deps = { path = "../observability_deps" }
parquet_file = { path = "../parquet_file" }
predicate = { path = "../predicate" }
rand = "0.8.3"
schema = { path = "../schema" }
tokio = { version = "1", features = ["macros", "rt", "sync"] }

View File

@ -2,15 +2,10 @@
use std::{any::Any, sync::Arc};
use data_types::{ChunkId, ChunkOrder, PartitionId};
use datafusion::{error::DataFusionError, physical_plan::Statistics};
use iox_query::{
exec::{stringset::StringSet, IOxSessionContext},
util::create_basic_summary,
QueryChunk, QueryChunkData,
};
use datafusion::physical_plan::Statistics;
use iox_query::{util::create_basic_summary, QueryChunk, QueryChunkData};
use observability_deps::tracing::debug;
use parquet_file::{chunk::ParquetChunk, storage::ParquetStorage};
use predicate::Predicate;
use schema::{merge::SchemaMerger, sort::SortKey, Schema};
use uuid::Uuid;
@ -96,20 +91,6 @@ impl QueryChunk for QueryableParquetChunk {
false
}
/// Return a set of Strings containing the distinct values in the
/// specified columns. If the predicate can be evaluated entirely
/// on the metadata of this Chunk. Returns `None` otherwise
///
/// The requested columns must all have String type.
fn column_values(
&self,
_ctx: IOxSessionContext,
_column_name: &str,
_predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError> {
Ok(None)
}
fn data(&self) -> QueryChunkData {
QueryChunkData::Parquet(self.data.parquet_exec_input())
}

View File

@ -6,7 +6,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-trait = "0.1.68"
async-trait = "0.1.70"
backoff = { path = "../backoff" }
data_types = { path = "../data_types" }
iox_catalog = { path = "../iox_catalog" }

View File

@ -74,18 +74,21 @@ impl PartitionsSource for CatalogToCompactPartitionsSource {
// we're going check the time range we'd like to query for against the end time of the last query.
let mut last = self.last_maximum_time.lock().unwrap();
// if the last query ended further back in time than this query starts, we're about to skip something.
if *last < minimum_time {
if minimum_time.sub(*last) < self.min_threshold * 3 {
// the end of the last query says we're skipping less than 3x our configured lookback, so
// back up and query everything since the last query.
minimum_time = *last;
} else {
// end of the last query says we're skiping a lot. We should limit how far we lookback to avoid
// returning all partitions, so we'll just backup 3x the configured lookback.
// this might skip something (until cold compaction), but we need a limit in how far we look back.
minimum_time = self.time_provider.now() - self.min_threshold * 3;
}
// query for partitions with activity since the last query. We shouldn't query for a time range
// we've already covered. So if the prior query was 2m ago, and the query covered 10m, ending at
// the time of that query, we just need to query for activity in the last 2m. Asking for more than
// that creates busy-work that will spam the catalog with more queries to determine no compaction
// nneded. But we also don't want to query so far back in time that we get all partitions, so the
// lookback is limited to 3x the configured threshold.
if minimum_time < *last || minimum_time.sub(*last) < self.min_threshold * 3 {
// the end of the last query is less than 3x our configured lookback, so we can query everything
// since the last query.
minimum_time = *last;
} else {
// end of the last query says we're skiping a lot. We should limit how far we lookback to avoid
// returning all partitions, so we'll just backup 3x the configured lookback.
// this might skip something (until cold compaction), but we need a limit in how far we look back.
minimum_time = self.time_provider.now() - self.min_threshold * 3;
}
maximum_time = self.max_threshold.map(|max| self.time_provider.now() - max);
@ -113,6 +116,7 @@ mod tests {
use data_types::Timestamp;
use iox_catalog::mem::MemCatalog;
use iox_tests::PartitionBuilder;
use iox_time::MockProvider;
fn partition_ids(ids: &[i64]) -> Vec<PartitionId> {
ids.iter().cloned().map(PartitionId::new).collect()
@ -122,17 +126,18 @@ mod tests {
catalog: Arc<MemCatalog>,
min_threshold: Duration,
max_threshold: Option<Duration>,
second_query_delta: Duration, // time between first and second query
first_expected_ids: &[i64], // expected values on first fetch, which does a 3x on min_threshold
second_expected_ids: &[i64], // expected values on second fetch, which uses min_threshold unmodified
) {
let time_provider = catalog.time_provider();
let time_provider = Arc::new(MockProvider::new(catalog.time_provider().now()));
let partitions_source = CatalogToCompactPartitionsSource::new(
Default::default(),
catalog,
min_threshold,
max_threshold,
time_provider,
Arc::<iox_time::MockProvider>::clone(&time_provider),
);
let mut actual_partition_ids = partitions_source.fetch().await;
@ -145,6 +150,7 @@ mod tests {
max_threshold {max_threshold:?} failed (first fetch, 3x lookback)",
);
time_provider.inc(second_query_delta);
let mut actual_partition_ids = partitions_source.fetch().await;
actual_partition_ids.sort();
@ -163,10 +169,15 @@ mod tests {
let time_three_hour_ago = Timestamp::from(time_provider.hours_ago(3));
let time_six_hour_ago = Timestamp::from(time_provider.hours_ago(6));
let time_one_min_future = Timestamp::from(time_provider.minutes_into_future(1));
for (id, time) in [(1, time_three_hour_ago), (2, time_six_hour_ago)]
.iter()
.cloned()
for (id, time) in [
(1, time_three_hour_ago),
(2, time_six_hour_ago),
(3, time_one_min_future),
]
.iter()
.cloned()
{
let partition = PartitionBuilder::new(id as i64)
.with_new_file_at(time)
@ -175,13 +186,44 @@ mod tests {
}
let one_minute = Duration::from_secs(60);
fetch_test(Arc::clone(&catalog), one_minute, None, &[], &[]).await;
let ten_minute = Duration::from_secs(60) * 10;
// the lack of end time means it gets the future file (3) in the first query, this is an
// oddity of a test case that has files with a future timestamp (not a real world concern).
// the second query 10m later with a cap of 3m lookback doesn't get it.
fetch_test(
Arc::clone(&catalog),
one_minute,
None,
ten_minute,
&[3],
&[],
)
.await;
let four_hours = Duration::from_secs(60 * 60 * 4);
fetch_test(Arc::clone(&catalog), four_hours, None, &[1, 2], &[1]).await;
// again the future file is included in he first query, just an oddity of the test case.
fetch_test(
Arc::clone(&catalog),
four_hours,
None,
ten_minute,
&[1, 2, 3],
&[3],
)
.await;
let seven_hours = Duration::from_secs(60 * 60 * 7);
fetch_test(Arc::clone(&catalog), seven_hours, None, &[1, 2], &[1, 2]).await;
// again the future file is included in he first query, just an oddity of the test case.
fetch_test(
Arc::clone(&catalog),
seven_hours,
None,
ten_minute,
&[1, 2, 3],
&[3],
)
.await;
}
#[tokio::test]
@ -192,11 +234,13 @@ mod tests {
let time_now = Timestamp::from(time_provider.now());
let time_three_hour_ago = Timestamp::from(time_provider.hours_ago(3));
let time_six_hour_ago = Timestamp::from(time_provider.hours_ago(6));
let time_one_min_future = Timestamp::from(time_provider.minutes_into_future(1));
for (id, time) in [
(1, time_now),
(2, time_three_hour_ago),
(3, time_six_hour_ago),
(4, time_one_min_future),
]
.iter()
.cloned()
@ -209,54 +253,80 @@ mod tests {
let one_minute = Duration::from_secs(60);
let one_hour = Duration::from_secs(60 * 60);
let two_hour = Duration::from_secs(60 * 60 * 2);
let four_hours = Duration::from_secs(60 * 60 * 4);
let seven_hours = Duration::from_secs(60 * 60 * 7);
// File 3 is all that falls within the 7-4h lookback window. With 1m to the next query,
// nothing is found with windows advanced by 1m.
fetch_test(
Arc::clone(&catalog),
seven_hours,
Some(four_hours),
one_minute,
&[3],
&[3],
&[],
)
.await;
// With a 7-1h lookback window, files 2 and 3 are found. With 2h to the next query, the
// window advances to find the two newer files.
fetch_test(
Arc::clone(&catalog),
seven_hours,
Some(one_hour),
two_hour,
&[2, 3],
&[2, 3],
&[1, 4],
)
.await;
// With a 7h-1m lookback window, files 2 and 3 are found. With 1m to the next query, the
// window advances to find the one newer file.
fetch_test(
Arc::clone(&catalog),
seven_hours,
Some(one_minute),
one_minute,
&[2, 3],
&[2, 3],
&[1],
)
.await;
// With a 4h-1h lookback window, files 2 and 3 are found. With 1m to the next query, there's
// nothing new in the next window.
fetch_test(
Arc::clone(&catalog),
four_hours,
Some(one_hour),
one_minute,
&[2, 3],
&[2],
&[],
)
.await;
// With a 4h-1m lookback window, files 2 and 3 are found. With 4h to the next query, the
// remaining files are found.
fetch_test(
Arc::clone(&catalog),
four_hours,
Some(one_minute),
four_hours,
&[2, 3],
&[2],
&[1, 4],
)
.await;
fetch_test(Arc::clone(&catalog), one_hour, Some(one_minute), &[], &[]).await;
// With a 1h-1m lookback window, nothing is found. In the second query 1m later, it finds
// the file create 'now'.
fetch_test(
Arc::clone(&catalog),
one_hour,
Some(one_minute),
one_minute,
&[],
&[1],
)
.await;
}
}

View File

@ -7,7 +7,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-trait = "0.1.68"
async-trait = "0.1.70"
backoff = { path = "../backoff" }
compactor = { path = "../compactor" }
compactor_scheduler = { path = "../compactor_scheduler" }

View File

@ -18,14 +18,14 @@ ordered-float = "3"
schema = { path = "../schema" }
sha2 = "0.10"
sqlx = { version = "0.6", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
thiserror = "1.0.40"
thiserror = "1.0.41"
uuid = { version = "1", features = ["v4"] }
workspace-hack = { version = "0.1", path = "../workspace-hack" }
percent-encoding = "2.2.0"
[dev-dependencies] # In alphabetical order
assert_matches = "1"
paste = "1.0.12"
paste = "1.0.13"
proptest = { version = "1.2.0", default-features = false }
test_helpers = { path = "../test_helpers" }
hex = "0.4.2"

View File

@ -160,33 +160,32 @@ impl std::fmt::Display for TableId {
}
}
/// A sequence number from a `router::Shard` (kafka partition)
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
#[sqlx(transparent)]
pub struct SequenceNumber(i64);
/// A sequence number from an ingester
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SequenceNumber(u64);
#[allow(missing_docs)]
impl SequenceNumber {
pub fn new(v: i64) -> Self {
pub fn new(v: u64) -> Self {
Self(v)
}
pub fn get(&self) -> i64 {
pub fn get(&self) -> u64 {
self.0
}
}
impl Add<i64> for SequenceNumber {
impl Add<u64> for SequenceNumber {
type Output = Self;
fn add(self, other: i64) -> Self {
fn add(self, other: u64) -> Self {
Self(self.0 + other)
}
}
impl Sub<i64> for SequenceNumber {
impl Sub<u64> for SequenceNumber {
type Output = Self;
fn sub(self, other: i64) -> Self {
fn sub(self, other: u64) -> Self {
Self(self.0 - other)
}
}
@ -614,7 +613,13 @@ impl ParquetFile {
/// Estimate the memory consumption of this object and its contents
pub fn size(&self) -> usize {
std::mem::size_of_val(self) + self.column_set.size()
std::mem::size_of_val(self)
+ self
.partition_hash_id
.as_ref()
.map(|id| id.size() - std::mem::size_of_val(id))
.unwrap_or_default()
+ self.column_set.size()
- std::mem::size_of_val(&self.column_set)
}

View File

@ -19,6 +19,18 @@ pub enum TransitionPartitionId {
Deterministic(PartitionHashId),
}
impl TransitionPartitionId {
/// Size in bytes including `self`.
pub fn size(&self) -> usize {
match self {
Self::Deprecated(_) => std::mem::size_of::<Self>(),
Self::Deterministic(id) => {
std::mem::size_of::<Self>() + id.size() - std::mem::size_of_val(id)
}
}
}
}
impl std::fmt::Display for TransitionPartitionId {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
@ -216,6 +228,11 @@ impl PartitionHashId {
pub fn as_bytes(&self) -> &[u8] {
self.0.as_ref()
}
/// Size in bytes including `Self`.
pub fn size(&self) -> usize {
std::mem::size_of::<Self>() + self.0.len()
}
}
impl<'q> sqlx::encode::Encode<'q, sqlx::Postgres> for &'q PartitionHashId {

View File

@ -207,18 +207,18 @@ mod tests {
#[test]
fn test_intersect() {
let a = [0, i64::MAX, 40, 41, 42, 43, 44, 45]
let a = [0, u64::MAX, 40, 41, 42, 43, 44, 45]
.into_iter()
.map(SequenceNumber::new)
.collect::<SequenceNumberSet>();
let b = [1, 5, i64::MAX, 42]
let b = [1, 5, u64::MAX, 42]
.into_iter()
.map(SequenceNumber::new)
.collect::<SequenceNumberSet>();
let intersection = intersect(&a, &b);
let want = [i64::MAX, 42]
let want = [u64::MAX, 42]
.into_iter()
.map(SequenceNumber::new)
.collect::<SequenceNumberSet>();
@ -226,21 +226,17 @@ mod tests {
assert_eq!(intersection, want);
}
/// Yield vec's of [`SequenceNumber`] derived from u64 values and cast to
/// i64.
/// Yield vec's of [`SequenceNumber`] derived from u64 values.
///
/// This matches how the ingester allocates [`SequenceNumber`] - from a u64
/// source.
fn sequence_number_vec() -> impl Strategy<Value = Vec<SequenceNumber>> {
prop::collection::vec(0..u64::MAX, 0..1024).prop_map(|vec| {
vec.into_iter()
.map(|v| SequenceNumber::new(v as i64))
.collect()
})
prop::collection::vec(0..u64::MAX, 0..1024)
.prop_map(|vec| vec.into_iter().map(SequenceNumber::new).collect())
}
// The following tests compare to an order-independent HashSet, as the
// SequenceNumber uses the PartialOrd impl of the inner i64 for ordering,
// SequenceNumber uses the PartialOrd impl of the inner u64 for ordering,
// resulting in incorrect output when compared to an ordered set of cast as
// u64.
//

View File

@ -16,8 +16,11 @@
)]
use metric::Registry;
#[cfg(tokio_unstable)]
use tokio_metrics_bridge::setup_tokio_metrics;
// Workaround for "unused crate" lint false positives.
#[cfg(not(tokio_unstable))]
use tokio_metrics_bridge as _;
use workspace_hack as _;
use once_cell::sync::Lazy;
@ -242,7 +245,10 @@ impl DedicatedExecutor {
.build()
.expect("Creating tokio runtime");
#[cfg(tokio_unstable)]
setup_tokio_metrics(runtime.metrics(), thread_name, metric_registry);
#[cfg(not(tokio_unstable))]
let _ = metric_registry;
runtime.block_on(async move {
// Dropping the tokio runtime only waits for tasks to yield not to complete

View File

@ -9,8 +9,8 @@ license.workspace = true
futures-util = { version = "0.3" }
influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] }
observability_deps = { path = "../observability_deps" }
serde_json = "1.0.99"
thiserror = "1.0.40"
serde_json = "1.0.100"
thiserror = "1.0.41"
tokio = { version = "1.29" }
tokio-util = { version = "0.7.8" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -10,7 +10,7 @@ bytes = "1.4"
futures = { version = "0.3", default-features = false }
reqwest = { version = "0.11", default-features = false, features = ["stream", "json", "rustls-tls"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.99"
serde_json = "1.0.100"
snafu = "0.7"
url = "2.4.0"
uuid = { version = "1", features = ["v4"] }

View File

@ -18,4 +18,4 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
test_helpers = { path = "../test_helpers" }
assert_matches = "1"
insta = { version = "1.30.0", features = ["yaml"] }
paste = "1.0.12"
paste = "1.0.13"

View File

@ -52,7 +52,7 @@ backtrace = "0.3"
bytes = "1.4"
clap = { version = "4", features = ["derive", "env"] }
comfy-table = { version = "7.0", default-features = false }
console-subscriber = { version = "0.1.9", optional = true, features = ["parking_lot"] }
console-subscriber = { version = "0.1.10", optional = true, features = ["parking_lot"] }
dotenvy = "0.15.7"
futures = "0.3"
futures-util = { version = "0.3" }
@ -67,10 +67,10 @@ libc = { version = "0.2" }
num_cpus = "1.16.0"
once_cell = { version = "1.18", features = ["parking_lot"] }
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
serde_json = "1.0.99"
serde_json = "1.0.100"
snafu = "0.7"
tempfile = "3.6.0"
thiserror = "1.0.40"
thiserror = "1.0.41"
tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] }
tokio-stream = { version = "0.1", features = ["net"] }
@ -93,7 +93,7 @@ predicate = { path = "../predicate" }
predicates = "3.0.3"
pretty_assertions = "1.3.0"
proptest = { version = "1.2.0", default-features = false }
serde = "1.0.164"
serde = "1.0.166"
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
insta = { version = "1", features = ["yaml"] }

View File

@ -3,7 +3,8 @@ use std::sync::Arc;
use iox_time::{SystemProvider, Time, TimeProvider};
use metric::U64Gauge;
use once_cell::sync::Lazy;
use tokio::runtime::Handle;
#[cfg(tokio_unstable)]
use tokio_metrics_bridge::setup_tokio_metrics;
/// Package version.
@ -54,7 +55,12 @@ pub fn setup_metric_registry() -> Arc<metric::Registry> {
registry.register_instrument("jemalloc_metrics", crate::jemalloc::JemallocMetrics::new);
// Register tokio metric for main runtime
setup_tokio_metrics(Handle::current().metrics(), "main", Arc::clone(&registry));
#[cfg(tokio_unstable)]
setup_tokio_metrics(
tokio::runtime::Handle::current().metrics(),
"main",
Arc::clone(&registry),
);
registry
}

View File

@ -1323,10 +1323,15 @@ async fn assert_ingester_contains_results(
.await
.unwrap();
let ingester_uuid = ingester_response.app_metadata.ingester_uuid;
let ingester_partition = ingester_response
.partitions
.into_iter()
.next()
.expect("at least one ingester partition");
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid;
assert!(!ingester_uuid.is_empty());
assert_batches_sorted_eq!(expected, &ingester_response.record_batches);
assert_batches_sorted_eq!(expected, &ingester_partition.record_batches);
}
#[tokio::test]

View File

@ -1,8 +1,14 @@
use arrow::datatypes::DataType;
use arrow_flight::{error::FlightError, Ticket};
use arrow_util::assert_batches_sorted_eq;
use data_types::{NamespaceId, TableId};
use datafusion::{
prelude::{col, lit},
scalar::ScalarValue,
};
use futures::FutureExt;
use http::StatusCode;
use influxdb_iox_client::table::generated_types::{Part, PartitionTemplate, TemplatePart};
use ingester_query_grpc::{influxdata::iox::ingester::v1 as proto, IngesterQueryRequest};
use prost::Message;
use test_helpers_end_to_end::{maybe_skip_integration, MiniCluster, Step, StepTest, StepTestState};
@ -39,7 +45,14 @@ async fn persist_on_demand() {
.await
.unwrap();
let ingester_uuid = ingester_response.app_metadata.ingester_uuid;
assert_eq!(ingester_response.partitions.len(), 1);
let ingester_partition = ingester_response
.partitions
.into_iter()
.next()
.expect("just checked len");
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid;
assert!(!ingester_uuid.is_empty());
let expected = [
@ -49,7 +62,7 @@ async fn persist_on_demand() {
"| A | B | 1970-01-01T00:00:00.000123456Z | 42 |",
"+------+------+--------------------------------+-----+",
];
assert_batches_sorted_eq!(&expected, &ingester_response.record_batches);
assert_batches_sorted_eq!(&expected, &ingester_partition.record_batches);
}
.boxed()
})),
@ -77,8 +90,15 @@ async fn persist_on_demand() {
.await
.unwrap();
assert_eq!(ingester_response.partitions.len(), 1);
let ingester_partition = ingester_response
.partitions
.into_iter()
.next()
.expect("just checked len");
let num_files_persisted =
ingester_response.app_metadata.completed_persistence_count;
ingester_partition.app_metadata.completed_persistence_count;
assert_eq!(num_files_persisted, 1);
}
.boxed()
@ -121,11 +141,17 @@ async fn ingester_flight_api() {
.query_ingester(query.clone(), cluster.ingester().ingester_grpc_connection())
.await
.unwrap();
assert_eq!(ingester_response.partitions.len(), 1);
let ingester_partition = ingester_response
.partitions
.into_iter()
.next()
.expect("just checked len");
let ingester_uuid = ingester_response.app_metadata.ingester_uuid.clone();
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid.clone();
assert!(!ingester_uuid.is_empty());
let schema = ingester_response.schema.unwrap();
let schema = ingester_partition.schema.unwrap();
let expected = [
"+------+------+--------------------------------+-----+",
@ -135,11 +161,11 @@ async fn ingester_flight_api() {
"| B | A | 1970-01-01T00:00:00.001234567Z | 84 |",
"+------+------+--------------------------------+-----+",
];
assert_batches_sorted_eq!(&expected, &ingester_response.record_batches);
assert_batches_sorted_eq!(&expected, &ingester_partition.record_batches);
// Also ensure that the schema of the batches matches what is
// reported by the performed_query.
ingester_response
ingester_partition
.record_batches
.iter()
.enumerate()
@ -152,7 +178,13 @@ async fn ingester_flight_api() {
.query_ingester(query.clone(), cluster.ingester().ingester_grpc_connection())
.await
.unwrap();
assert_eq!(ingester_response.app_metadata.ingester_uuid, ingester_uuid);
assert_eq!(ingester_response.partitions.len(), 1);
let ingester_partition = ingester_response
.partitions
.into_iter()
.next()
.expect("just checked len");
assert_eq!(ingester_partition.app_metadata.ingester_uuid, ingester_uuid);
// Restart the ingesters
cluster.restart_ingesters().await;
@ -167,7 +199,146 @@ async fn ingester_flight_api() {
.query_ingester(query, cluster.ingester().ingester_grpc_connection())
.await
.unwrap();
assert_ne!(ingester_response.app_metadata.ingester_uuid, ingester_uuid);
assert_eq!(ingester_response.partitions.len(), 1);
let ingester_partition = ingester_response
.partitions
.into_iter()
.next()
.expect("just checked len");
assert_ne!(ingester_partition.app_metadata.ingester_uuid, ingester_uuid);
}
#[tokio::test]
async fn ingester_partition_pruning() {
test_helpers::maybe_start_logging();
let database_url = maybe_skip_integration!();
// Set up cluster
let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
let mut steps: Vec<_> = vec![Step::Custom(Box::new(move |state: &mut StepTestState| {
async move {
let namespace_name = state.cluster().namespace();
let mut namespace_client = influxdb_iox_client::namespace::Client::new(
state.cluster().router().router_grpc_connection(),
);
namespace_client
.create_namespace(
namespace_name,
None,
None,
Some(PartitionTemplate {
parts: vec![
TemplatePart {
part: Some(Part::TagValue("tag1".into())),
},
TemplatePart {
part: Some(Part::TagValue("tag3".into())),
},
],
}),
)
.await
.unwrap();
let mut table_client = influxdb_iox_client::table::Client::new(
state.cluster().router().router_grpc_connection(),
);
// table1: create implicitly by writing to it
// table2: do not override partition template => use namespace template
table_client
.create_table(namespace_name, "table2", None)
.await
.unwrap();
// table3: overide namespace template
table_client
.create_table(
namespace_name,
"table3",
Some(PartitionTemplate {
parts: vec![TemplatePart {
part: Some(Part::TagValue("tag2".into())),
}],
}),
)
.await
.unwrap();
}
.boxed()
}))]
.into_iter()
.chain((1..=3).flat_map(|tid| {
[Step::WriteLineProtocol(
[
format!("table{tid},tag1=v1a,tag2=v2a,tag3=v3a f=1 11"),
format!("table{tid},tag1=v1b,tag2=v2a,tag3=v3a f=1 11"),
format!("table{tid},tag1=v1a,tag2=v2b,tag3=v3a f=1 11"),
format!("table{tid},tag1=v1b,tag2=v2b,tag3=v3a f=1 11"),
format!("table{tid},tag1=v1a,tag2=v2a,tag3=v3b f=1 11"),
format!("table{tid},tag1=v1b,tag2=v2a,tag3=v3b f=1 11"),
format!("table{tid},tag1=v1a,tag2=v2b,tag3=v3b f=1 11"),
format!("table{tid},tag1=v1b,tag2=v2b,tag3=v3b f=1 11"),
]
.join("\n"),
)]
.into_iter()
}))
.collect();
steps.push(Step::Custom(Box::new(move |state: &mut StepTestState| {
async move {
// Note: The querier will perform correct type coercion. We must simulate this here, otherwise the ingester
// will NOT be able to prune the data because the predicate evaluation will fail with a type error
// and the predicate will be ignored.
let predicate = ::predicate::Predicate::new().with_expr(col("tag1").eq(lit(
ScalarValue::Dictionary(
Box::new(DataType::Int32),
Box::new(ScalarValue::from("v1a")),
),
)));
let query = IngesterQueryRequest::new(
state.cluster().namespace_id().await,
state.cluster().table_id("table1").await,
vec![],
Some(predicate),
);
let query: proto::IngesterQueryRequest = query.try_into().unwrap();
let ingester_response = state
.cluster()
.query_ingester(
query.clone(),
state.cluster().ingester().ingester_grpc_connection(),
)
.await
.unwrap();
let expected = [
"+-----+------+------+------+--------------------------------+",
"| f | tag1 | tag2 | tag3 | time |",
"+-----+------+------+------+--------------------------------+",
"| 1.0 | v1a | v2a | v3a | 1970-01-01T00:00:00.000000011Z |",
"| 1.0 | v1a | v2a | v3b | 1970-01-01T00:00:00.000000011Z |",
"| 1.0 | v1a | v2b | v3a | 1970-01-01T00:00:00.000000011Z |",
"| 1.0 | v1a | v2b | v3b | 1970-01-01T00:00:00.000000011Z |",
"+-----+------+------+------+--------------------------------+",
];
let record_batches = ingester_response
.partitions
.into_iter()
.flat_map(|p| p.record_batches)
.collect::<Vec<_>>();
assert_batches_sorted_eq!(&expected, &record_batches);
}
.boxed()
})));
StepTest::new(&mut cluster, steps).run().await
}
#[tokio::test]

View File

@ -299,6 +299,48 @@ async fn query_after_persist_sees_new_files() {
StepTest::new(&mut cluster, steps).run().await
}
#[tokio::test]
async fn query_after_shutdown_sees_new_files() {
test_helpers::maybe_start_logging();
let database_url = maybe_skip_integration!();
// Configure a cluster such that the ingester never persists (until
// shutdown)
let ingester_config = TestConfig::new_ingester_never_persist(&database_url);
let router_config = TestConfig::new_router(&ingester_config);
// Querier configured to quickly consider ingesters dead to speed up the
// test.
let querier_config =
TestConfig::new_querier(&ingester_config).with_querier_circuit_breaker_threshold(1);
let mut cluster = MiniCluster::new()
.with_ingester(ingester_config)
.await
.with_router(router_config)
.await
.with_querier(querier_config)
.await;
let steps = vec![
Step::WriteLineProtocol("bananas,tag1=A,tag2=B val=42i 123456".to_string()),
Step::AssertNumParquetFiles { expected: 0 }, // test invariant
Step::GracefulStopIngesters,
Step::AssertNumParquetFiles { expected: 1 },
Step::Query {
sql: "select * from bananas".to_string(),
expected: vec![
"+------+------+--------------------------------+-----+",
"| tag1 | tag2 | time | val |",
"+------+------+--------------------------------+-----+",
"| A | B | 1970-01-01T00:00:00.000123456Z | 42 |",
"+------+------+--------------------------------+-----+",
],
},
];
StepTest::new(&mut cluster, steps).run().await
}
#[tokio::test]
async fn table_not_found_on_ingester() {
test_helpers::maybe_start_logging();

View File

@ -193,7 +193,14 @@ async fn write_replication() {
.await
.unwrap();
let ingester_uuid = ingester_response.app_metadata.ingester_uuid;
assert_eq!(ingester_response.partitions.len(), 1);
let ingester_partition = ingester_response
.partitions
.into_iter()
.next()
.expect("just checked len");
let ingester_uuid = ingester_partition.app_metadata.ingester_uuid;
assert!(!ingester_uuid.is_empty());
let expected = [
@ -212,7 +219,7 @@ async fn write_replication() {
"| A | B | 1970-01-01T00:00:00.000000020Z | 20 |",
"+------+------+--------------------------------+-----+",
];
assert_batches_sorted_eq!(&expected, &ingester_response.record_batches);
assert_batches_sorted_eq!(&expected, &ingester_partition.record_batches);
}
.boxed()
})));

View File

@ -24,10 +24,10 @@ prost = "0.11"
rand = "0.8.3"
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
schema = { path = "../schema" }
serde_json = "1.0.99"
serde_json = "1.0.100"
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread"] }
tokio-stream = "0.1.13"
thiserror = "1.0.40"
thiserror = "1.0.41"
tonic = { workspace = true }
[dev-dependencies]

View File

@ -6,7 +6,7 @@ edition.workspace = true
license.workspace = true
[dependencies] # In alphabetical order
integer-encoding = "3.0.4"
integer-encoding = "4.0.0"
snafu = "0.7"
snap = "1.1.0"
observability_deps = { path = "../observability_deps" }

View File

@ -10,7 +10,7 @@ arrow = { workspace = true, features = ["prettyprint"] }
arrow_util = { version = "0.1.0", path = "../arrow_util" }
arrow-flight = { workspace = true }
async-channel = "1.8.0"
async-trait = "0.1.68"
async-trait = "0.1.70"
backoff = { version = "0.1.0", path = "../backoff" }
bytes = "1.4.0"
crossbeam-utils = "0.8.16"
@ -31,7 +31,7 @@ observability_deps = { version = "0.1.0", path = "../observability_deps" }
once_cell = "1.18"
parking_lot = "0.12.1"
parquet_file = { version = "0.1.0", path = "../parquet_file" }
pin-project = "1.1.1"
pin-project = "1.1.2"
predicate = { version = "0.1.0", path = "../predicate" }
prost = { version = "0.11.9", default-features = false, features = ["std"] }
rand = "0.8.5"
@ -39,7 +39,7 @@ schema = { version = "0.1.0", path = "../schema" }
service_grpc_catalog = { version = "0.1.0", path = "../service_grpc_catalog" }
sharder = { version = "0.1.0", path = "../sharder" }
test_helpers = { path = "../test_helpers", features = ["future_timeout"], optional = true }
thiserror = "1.0.40"
thiserror = "1.0.41"
tracker = { path = "../tracker" }
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
tokio-util = "0.7.8"
@ -58,7 +58,7 @@ ingester_test_ctx = { path = "../ingester_test_ctx" }
lazy_static = "1.4.0"
mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store = { workspace = true }
paste = "1.0.12"
paste = "1.0.13"
tempfile = "3.6.0"
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
tokio = { version = "1.29", features = ["macros", "time", "test-util"] }
@ -81,3 +81,7 @@ name = "write"
harness = false
# Require some internal types be made visible for benchmark code.
required-features = ["benches"]
[[bench]]
name = "query"
harness = false

102
ingester/benches/query.rs Normal file
View File

@ -0,0 +1,102 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use data_types::{NamespaceId, PartitionKey, TableId};
use ingester::IngesterRpcInterface;
use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
use ingester_test_ctx::{TestContext, TestContextBuilder};
use std::fmt::Write;
const TEST_NAMESPACE: &str = "bananas";
const PARTITION_KEY: &str = "platanos";
fn generate_table_data(rows: usize, cols: usize) -> String {
let mut buf = String::new();
for i in 0..rows {
write!(&mut buf, "bananas ").unwrap();
for j in 0..(cols - 1) {
write!(&mut buf, "v{j}={i}{j},").unwrap();
}
writeln!(&mut buf, "v{cols}={i}{cols} 42{i}").unwrap();
}
buf
}
/// Return an initialised and pre-warmed ingester instance backed by a catalog
/// correctly populated to accept writes of `lp`.
async fn init(
lp: impl AsRef<str>,
) -> (TestContext<impl IngesterRpcInterface>, NamespaceId, TableId) {
let lp = lp.as_ref();
let mut ctx = TestContextBuilder::default()
// Don't stop ingest during benchmarks
.with_max_persist_queue_depth(10_000_000)
.with_persist_hot_partition_cost(10_000_000_000)
.build()
.await;
// Ensure the namespace exists in the catalog.
let ns = ctx.ensure_namespace(TEST_NAMESPACE, None).await;
// Write the test data
ctx.write_lp(TEST_NAMESPACE, lp, PartitionKey::from(PARTITION_KEY), 42)
.await;
let table_id = ctx.table_id(TEST_NAMESPACE, "bananas").await;
(ctx, ns.id, table_id)
}
fn bench_query(c: &mut Criterion) {
let runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.expect("failed to initialise tokio runtime for benchmark");
for (rows, cols) in [(100_000, 10), (100_000, 100), (100_000, 200)] {
run_bench("no projection", rows, cols, vec![], &runtime, c);
run_bench(
"project 1 column",
rows,
cols,
vec!["time".to_string()],
&runtime,
c,
);
}
}
fn run_bench(
name: &str,
rows: usize,
cols: usize,
projection: Vec<String>,
runtime: &tokio::runtime::Runtime,
c: &mut Criterion,
) {
let lp = generate_table_data(rows, cols);
let (ctx, namespace_id, table_id) = runtime.block_on(init(lp));
let mut group = c.benchmark_group("query");
group.throughput(Throughput::Elements(1)); // Queries per second
group.bench_function(
BenchmarkId::new(name, format!("rows_{rows}_cols{cols}")),
|b| {
let ctx = &ctx;
let projection = &projection;
b.to_async(runtime).iter(|| async move {
ctx.query(IngesterQueryRequest {
namespace_id: namespace_id.get(),
table_id: table_id.get(),
columns: projection.clone(),
predicate: None,
})
.await
.expect("query request failed");
});
},
);
}
criterion_group!(benches, bench_query);
criterion_main!(benches);

View File

@ -7,19 +7,23 @@ use std::sync::Arc;
use async_trait::async_trait;
use data_types::{NamespaceId, TableId};
use metric::U64Counter;
use predicate::Predicate;
use trace::span::Span;
use super::{
partition::resolver::PartitionProvider,
post_write::PostWriteObserver,
table::{name_resolver::TableNameProvider, TableData},
table::{metadata_resolver::TableProvider, TableData},
};
use crate::{
arcmap::ArcMap,
deferred_load::DeferredLoad,
dml_payload::IngestOp,
dml_sink::DmlSink,
query::{response::QueryResponse, tracing::QueryExecTracing, QueryError, QueryExec},
query::{
projection::OwnedProjection, response::QueryResponse, tracing::QueryExecTracing,
QueryError, QueryExec,
},
};
/// The string name / identifier of a Namespace.
@ -60,12 +64,13 @@ pub(crate) struct NamespaceData<O> {
/// A set of tables this [`NamespaceData`] instance has processed
/// [`IngestOp`]'s for.
///
/// The [`TableNameProvider`] acts as a [`DeferredLoad`] constructor to
/// resolve the [`TableName`] for new [`TableData`] out of the hot path.
/// The [`TableProvider`] acts as a [`DeferredLoad`] constructor to
/// resolve the catalog [`Table`] for new [`TableData`] out of the hot path.
///
/// [`TableName`]: crate::buffer_tree::table::TableName
///
/// [`Table`]: data_types::Table
tables: ArcMap<TableId, TableData<O>>,
table_name_resolver: Arc<dyn TableNameProvider>,
catalog_table_resolver: Arc<dyn TableProvider>,
/// The count of tables initialised in this Ingester so far, across all
/// namespaces.
table_count: U64Counter,
@ -83,7 +88,7 @@ impl<O> NamespaceData<O> {
pub(super) fn new(
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_name_resolver: Arc<dyn TableNameProvider>,
catalog_table_resolver: Arc<dyn TableProvider>,
partition_provider: Arc<dyn PartitionProvider>,
post_write_observer: Arc<O>,
metrics: &metric::Registry,
@ -99,7 +104,7 @@ impl<O> NamespaceData<O> {
namespace_id,
namespace_name,
tables: Default::default(),
table_name_resolver,
catalog_table_resolver,
table_count,
partition_provider,
post_write_observer,
@ -151,7 +156,7 @@ where
self.table_count.inc(1);
Arc::new(TableData::new(
table_id,
Arc::new(self.table_name_resolver.for_table(table_id)),
Arc::new(self.catalog_table_resolver.for_table(table_id)),
self.namespace_id,
Arc::clone(&self.namespace_name),
Arc::clone(&self.partition_provider),
@ -187,8 +192,9 @@ where
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
assert_eq!(
self.namespace_id, namespace_id,
@ -204,7 +210,7 @@ where
// a tracing delegate to emit a child span.
Ok(QueryResponse::new(
QueryExecTracing::new(inner, "table")
.query_exec(namespace_id, table_id, columns, span)
.query_exec(namespace_id, table_id, projection, span, predicate)
.await?,
))
}
@ -226,7 +232,7 @@ mod tests {
test_util::{
defer_namespace_name_1_ms, make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_PROVIDER,
},
};
@ -243,7 +249,7 @@ mod tests {
let ns = NamespaceData::new(
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_ms(),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
&metrics,

View File

@ -14,8 +14,10 @@ use self::{
buffer::{traits::Queryable, BufferState, DataBuffer, Persisting},
persisting::{BatchIdent, PersistingData},
};
use super::{namespace::NamespaceName, table::TableName};
use crate::{deferred_load::DeferredLoad, query_adaptor::QueryAdaptor};
use super::{namespace::NamespaceName, table::TableMetadata};
use crate::{
deferred_load::DeferredLoad, query::projection::OwnedProjection, query_adaptor::QueryAdaptor,
};
mod buffer;
pub(crate) mod persisting;
@ -73,9 +75,9 @@ pub struct PartitionData {
/// The catalog ID for the table this partition is part of.
table_id: TableId,
/// The name of the table this partition is part of, potentially unresolved
/// The catalog metadata for the table this partition is part of, potentially unresolved
/// / deferred.
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
/// A [`DataBuffer`] for incoming writes.
buffer: DataBuffer,
@ -108,7 +110,7 @@ impl PartitionData {
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
sort_key: SortKeyState,
) -> Self {
Self {
@ -119,7 +121,7 @@ impl PartitionData {
namespace_id,
namespace_name,
table_id,
table_name,
table,
buffer: DataBuffer::default(),
persisting: VecDeque::with_capacity(1),
started_persistence_count: BatchIdent::default(),
@ -139,7 +141,7 @@ impl PartitionData {
trace!(
namespace_id = %self.namespace_id,
table_id = %self.table_id,
table_name = %self.table_name,
table = %self.table,
partition_id = %self.partition_id,
partition_key = %self.partition_key,
"buffered write"
@ -156,9 +158,9 @@ impl PartitionData {
/// Return all data for this partition, ordered by the calls to
/// [`PartitionData::buffer_write()`].
pub(crate) fn get_query_data(&mut self) -> Option<QueryAdaptor> {
pub(crate) fn get_query_data(&mut self, projection: &OwnedProjection) -> Option<QueryAdaptor> {
// Extract the buffered data, if any.
let buffered_data = self.buffer.get_query_data();
let buffered_data = self.buffer.get_query_data(projection);
// Prepend any currently persisting batches.
//
@ -168,14 +170,14 @@ impl PartitionData {
let data = self
.persisting
.iter()
.flat_map(|(_, b)| b.get_query_data())
.flat_map(|(_, b)| b.get_query_data(projection))
.chain(buffered_data)
.collect::<Vec<_>>();
trace!(
namespace_id = %self.namespace_id,
table_id = %self.table_id,
table_name = %self.table_name,
table = %self.table,
partition_id = %self.partition_id,
partition_key = %self.partition_key,
n_batches = data.len(),
@ -221,7 +223,7 @@ impl PartitionData {
debug!(
namespace_id = %self.namespace_id,
table_id = %self.table_id,
table_name = %self.table_name,
table = %self.table,
partition_id = %self.partition_id,
partition_key = %self.partition_key,
%batch_ident,
@ -230,7 +232,10 @@ impl PartitionData {
// Wrap the persisting data in the type wrapper
let data = PersistingData::new(
QueryAdaptor::new(self.partition_id, fsm.get_query_data()),
QueryAdaptor::new(
self.partition_id,
fsm.get_query_data(&OwnedProjection::default()),
),
batch_ident,
);
@ -271,7 +276,7 @@ impl PartitionData {
persistence_count = %self.completed_persistence_count,
namespace_id = %self.namespace_id,
table_id = %self.table_id,
table_name = %self.table_name,
table = %self.table,
partition_id = %self.partition_id,
partition_key = %self.partition_key,
batch_ident = %batch.batch_ident(),
@ -302,10 +307,10 @@ impl PartitionData {
self.completed_persistence_count
}
/// Return the name of the table this [`PartitionData`] is buffering writes
/// Return the metadata of the table this [`PartitionData`] is buffering writes
/// for.
pub(crate) fn table_name(&self) -> &Arc<DeferredLoad<TableName>> {
&self.table_name
pub(crate) fn table(&self) -> &Arc<DeferredLoad<TableMetadata>> {
&self.table
}
/// Return the table ID for this partition.
@ -349,7 +354,7 @@ impl PartitionData {
#[cfg(test)]
mod tests {
use std::{ops::Deref, time::Duration};
use std::time::Duration;
use arrow::compute::SortOptions;
use arrow_util::assert_batches_eq;
@ -378,7 +383,7 @@ mod tests {
let mut p = PartitionDataBuilder::new().build();
// And no data should be returned when queried.
assert!(p.get_query_data().is_none());
assert!(p.get_query_data(&OwnedProjection::default()).is_none());
// Perform a single write.
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;
@ -387,7 +392,9 @@ mod tests {
// The data should be readable.
{
let data = p.get_query_data().expect("should return data");
let data = p
.get_query_data(&OwnedProjection::default())
.expect("should return data");
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
let expected = [
@ -397,15 +404,7 @@ mod tests {
"| London | 2.0 | millions | 1970-01-01T00:00:00.000000010Z |",
"+--------+--------+----------+--------------------------------+",
];
assert_batches_eq!(
expected,
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
);
assert_batches_eq!(expected, data.record_batches());
}
// Perform a another write, adding data to the existing queryable data
@ -416,7 +415,9 @@ mod tests {
// And finally both writes should be readable.
{
let data = p.get_query_data().expect("should contain data");
let data = p
.get_query_data(&OwnedProjection::default())
.expect("should contain data");
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
let expected = [
@ -427,15 +428,7 @@ mod tests {
"| Madrid | 4.0 | none | 1970-01-01T00:00:00.000000020Z |",
"+--------+--------+----------+--------------------------------+",
];
assert_batches_eq!(
expected,
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
);
assert_batches_eq!(expected, data.record_batches());
}
}
@ -445,7 +438,7 @@ mod tests {
async fn test_persist() {
let mut p = PartitionDataBuilder::new().build();
assert!(p.get_query_data().is_none());
assert!(p.get_query_data(&OwnedProjection::default()).is_none());
// Perform a single write.
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;
@ -468,15 +461,7 @@ mod tests {
"| London | 2.0 | millions | 1970-01-01T00:00:00.000000010Z |",
"+--------+--------+----------+--------------------------------+",
];
assert_batches_eq!(
expected,
&*persisting_data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
);
assert_batches_eq!(expected, persisting_data.record_batches());
// Ensure the started batch ident is increased after a persist call, but not the completed
// batch ident.
@ -492,7 +477,9 @@ mod tests {
// Which must be readable, alongside the ongoing persist data.
{
let data = p.get_query_data().expect("must have data");
let data = p
.get_query_data(&OwnedProjection::default())
.expect("must have data");
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(data.record_batches().len(), 2);
let expected = [
@ -503,15 +490,7 @@ mod tests {
"| Madrid | 4.0 | none | 1970-01-01T00:00:00.000000020Z |",
"+--------+--------+----------+--------------------------------+",
];
assert_batches_eq!(
expected,
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
);
assert_batches_eq!(expected, data.record_batches());
}
// The persist now "completes".
@ -526,7 +505,9 @@ mod tests {
// Querying the buffer should now return only the second write.
{
let data = p.get_query_data().expect("must have data");
let data = p
.get_query_data(&OwnedProjection::default())
.expect("must have data");
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(data.record_batches().len(), 1);
let expected = [
@ -536,15 +517,7 @@ mod tests {
"| Madrid | 4.0 | none | 1970-01-01T00:00:00.000000020Z |",
"+--------+--------+---------+--------------------------------+",
];
assert_batches_eq!(
expected,
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
);
assert_batches_eq!(expected, data.record_batches());
}
}
@ -557,12 +530,7 @@ mod tests {
// A helper function to dedupe the record batches in [`QueryAdaptor`]
// and assert the resulting batch contents.
async fn assert_deduped(expect: &[&str], batch: QueryAdaptor) {
let batch = batch
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>();
let batch = batch.record_batches().to_vec();
let sort_keys = vec![PhysicalSortExpr {
expr: col("time", &batch[0].schema()).unwrap(),
@ -596,7 +564,13 @@ mod tests {
p.buffer_write(mb, SequenceNumber::new(1))
.expect("write should succeed");
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 1);
assert_eq!(
p.get_query_data(&OwnedProjection::default())
.unwrap()
.record_batches()
.len(),
1
);
assert_deduped(
&[
"+--------------------------------+-----+",
@ -605,7 +579,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 1.0 |",
"+--------------------------------+-----+",
],
p.get_query_data().unwrap(),
p.get_query_data(&OwnedProjection::default()).unwrap(),
)
.await;
@ -614,7 +588,13 @@ mod tests {
p.buffer_write(mb, SequenceNumber::new(2))
.expect("write should succeed");
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 1);
assert_eq!(
p.get_query_data(&OwnedProjection::default())
.unwrap()
.record_batches()
.len(),
1
);
assert_deduped(
&[
"+--------------------------------+-----+",
@ -623,7 +603,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 2.0 |",
"+--------------------------------+-----+",
],
p.get_query_data().unwrap(),
p.get_query_data(&OwnedProjection::default()).unwrap(),
)
.await;
@ -656,7 +636,13 @@ mod tests {
p.buffer_write(mb, SequenceNumber::new(3))
.expect("write should succeed");
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 2);
assert_eq!(
p.get_query_data(&OwnedProjection::default())
.unwrap()
.record_batches()
.len(),
2
);
assert_deduped(
&[
"+--------------------------------+-----+",
@ -665,7 +651,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 3.0 |",
"+--------------------------------+-----+",
],
p.get_query_data().unwrap(),
p.get_query_data(&OwnedProjection::default()).unwrap(),
)
.await;
@ -697,7 +683,13 @@ mod tests {
p.buffer_write(mb, SequenceNumber::new(3))
.expect("write should succeed");
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 3);
assert_eq!(
p.get_query_data(&OwnedProjection::default())
.unwrap()
.record_batches()
.len(),
3
);
assert_deduped(
&[
"+--------------------------------+-----+",
@ -706,7 +698,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
"+--------------------------------+-----+",
],
p.get_query_data().unwrap(),
p.get_query_data(&OwnedProjection::default()).unwrap(),
)
.await;
@ -717,7 +709,13 @@ mod tests {
assert!(set.contains(SequenceNumber::new(2)));
// And assert the correct value remains.
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 2);
assert_eq!(
p.get_query_data(&OwnedProjection::default())
.unwrap()
.record_batches()
.len(),
2
);
assert_deduped(
&[
"+--------------------------------+-----+",
@ -726,7 +724,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
"+--------------------------------+-----+",
],
p.get_query_data().unwrap(),
p.get_query_data(&OwnedProjection::default()).unwrap(),
)
.await;
@ -736,7 +734,13 @@ mod tests {
assert!(set.contains(SequenceNumber::new(3)));
// And assert the correct value remains.
assert_eq!(p.get_query_data().unwrap().record_batches().len(), 1);
assert_eq!(
p.get_query_data(&OwnedProjection::default())
.unwrap()
.record_batches()
.len(),
1
);
assert_deduped(
&[
"+--------------------------------+-----+",
@ -745,7 +749,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
"+--------------------------------+-----+",
],
p.get_query_data().unwrap(),
p.get_query_data(&OwnedProjection::default()).unwrap(),
)
.await;
@ -777,7 +781,7 @@ mod tests {
p.buffer_write(mb, SequenceNumber::new(3))
.expect("write should succeed");
let data = p.get_query_data().unwrap();
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
assert_batches_eq!(
[
"+--------------------------------+-----+",
@ -787,12 +791,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 2.0 |",
"+--------------------------------+-----+",
],
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
&*data.record_batches().to_vec()
);
// Persist again, moving the last write to the persisting state and
@ -805,7 +804,7 @@ mod tests {
p.buffer_write(mb, SequenceNumber::new(4))
.expect("write should succeed");
let data = p.get_query_data().unwrap();
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
assert_batches_eq!(
[
"+--------------------------------+-----+",
@ -816,12 +815,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 3.0 |",
"+--------------------------------+-----+",
],
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
&*data.record_batches().to_vec()
);
// Persist again, moving the last write to the persisting state and
@ -834,7 +828,7 @@ mod tests {
p.buffer_write(mb, SequenceNumber::new(5))
.expect("write should succeed");
let data = p.get_query_data().unwrap();
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
assert_batches_eq!(
[
"+--------------------------------+-----+",
@ -846,12 +840,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
"+--------------------------------+-----+",
],
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
&*data.record_batches().to_vec()
);
// Finish persisting the second batch out-of-order! The middle entry,
@ -860,7 +849,7 @@ mod tests {
assert_eq!(set.len(), 1);
assert!(set.contains(SequenceNumber::new(3)));
let data = p.get_query_data().unwrap();
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
assert_batches_eq!(
[
"+--------------------------------+-----+",
@ -871,12 +860,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
"+--------------------------------+-----+",
],
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
&*data.record_batches().to_vec()
);
// Finish persisting the last batch.
@ -884,7 +868,7 @@ mod tests {
assert_eq!(set.len(), 1);
assert!(set.contains(SequenceNumber::new(4)));
let data = p.get_query_data().unwrap();
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
assert_batches_eq!(
[
"+--------------------------------+-----+",
@ -894,12 +878,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
"+--------------------------------+-----+",
],
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
&*data.record_batches().to_vec()
);
// Finish persisting the first batch.
@ -908,7 +887,7 @@ mod tests {
assert!(set.contains(SequenceNumber::new(1)));
// Assert only the buffered data remains
let data = p.get_query_data().unwrap();
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
assert_batches_eq!(
[
"+--------------------------------+-----+",
@ -917,12 +896,7 @@ mod tests {
"| 1970-01-01T00:00:00.000000042Z | 4.0 |",
"+--------------------------------+-----+",
],
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
&*data.record_batches().to_vec()
);
}
@ -1009,7 +983,7 @@ mod tests {
);
// Nothing should explode, data should be readable.
let data = p.get_query_data().unwrap();
let data = p.get_query_data(&OwnedProjection::default()).unwrap();
assert_batches_eq!(
[
"+--------+--------+----------+--------------------------------+",
@ -1019,12 +993,7 @@ mod tests {
"| Madrid | 2.0 | none | 1970-01-01T00:00:00.000000011Z |",
"+--------+--------+----------+--------------------------------+",
],
&*data
.record_batches()
.iter()
.map(Deref::deref)
.cloned()
.collect::<Vec<_>>()
&*data.record_batches().to_vec()
);
}
@ -1053,6 +1022,6 @@ mod tests {
async fn test_empty_partition_no_queryadaptor_panic() {
let mut p = PartitionDataBuilder::new().build();
assert!(p.get_query_data().is_none());
assert!(p.get_query_data(&OwnedProjection::default()).is_none());
}
}

View File

@ -1,5 +1,3 @@
use std::sync::Arc;
use arrow::record_batch::RecordBatch;
use data_types::SequenceNumber;
use mutable_batch::MutableBatch;
@ -11,6 +9,8 @@ pub(crate) mod traits;
pub(crate) use state_machine::*;
use crate::query::projection::OwnedProjection;
use self::{always_some::AlwaysSome, traits::Queryable};
/// The current state of the [`BufferState`] state machine.
@ -63,12 +63,12 @@ impl DataBuffer {
/// Return all data for this buffer, ordered by the [`SequenceNumber`] from
/// which it was buffered with.
pub(crate) fn get_query_data(&mut self) -> Vec<Arc<RecordBatch>> {
pub(crate) fn get_query_data(&mut self, projection: &OwnedProjection) -> Vec<RecordBatch> {
// Take ownership of the FSM and return the data within it.
self.0.mutate(|fsm| match fsm {
// The buffering state can return data.
FsmState::Buffering(b) => {
let ret = b.get_query_data();
let ret = b.get_query_data(projection);
(FsmState::Buffering(b), ret)
}
})

View File

@ -1,5 +1,3 @@
use std::sync::Arc;
use arrow::record_batch::RecordBatch;
use mutable_batch::MutableBatch;
use schema::Projection;
@ -39,12 +37,12 @@ impl Buffer {
/// # Panics
///
/// If generating the snapshot fails, this method panics.
pub(super) fn snapshot(self) -> Option<Arc<RecordBatch>> {
Some(Arc::new(
pub(super) fn snapshot(self) -> Option<RecordBatch> {
Some(
self.buffer?
.to_arrow(Projection::All)
.expect("failed to snapshot buffer data"),
))
)
}
pub(super) fn is_empty(&self) -> bool {

View File

@ -1,6 +1,4 @@
#![allow(dead_code)]
use std::sync::Arc;
use arrow::record_batch::RecordBatch;
use data_types::{sequence_number_set::SequenceNumberSet, SequenceNumber};
use mutable_batch::MutableBatch;
@ -12,6 +10,8 @@ mod snapshot;
pub(in crate::buffer_tree::partition::buffer) use buffering::*;
pub(crate) use persisting::*;
use crate::query::projection::OwnedProjection;
use super::traits::{Queryable, Writeable};
/// A result type for fallible transitions.
@ -122,14 +122,14 @@ where
/// Returns the current buffer data.
///
/// This is always a cheap method call.
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
self.state.get_query_data()
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
self.state.get_query_data(projection)
}
}
#[cfg(test)]
mod tests {
use std::ops::Deref;
use std::sync::Arc;
use arrow_util::assert_batches_eq;
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
@ -139,6 +139,8 @@ mod tests {
use super::*;
#[test]
// comparing dyn Array always has same vtable, so is accurate to use Arc::ptr_eq
#[allow(clippy::vtable_address_comparisons)]
fn test_buffer_lifecycle() {
// Initialise a buffer in the base state.
let mut buffer: BufferState<Buffering> = BufferState::new();
@ -166,7 +168,7 @@ mod tests {
// Keep the data to validate they are ref-counted copies after further
// writes below. Note this construct allows the caller to decide when/if
// to allocate.
let w1_data = buffer.get_query_data();
let w1_data = buffer.get_query_data(&OwnedProjection::default());
let expected = vec![
"+-------+----------+----------+--------------------------------+",
@ -175,7 +177,7 @@ mod tests {
"| true | 42.0 | platanos | 1991-03-10T00:00:42.000000042Z |",
"+-------+----------+----------+--------------------------------+",
];
assert_batches_eq!(&expected, &[w1_data[0].deref().clone()]);
assert_batches_eq!(&expected, &[w1_data[0].clone()]);
// Apply another write.
buffer
@ -195,7 +197,7 @@ mod tests {
};
// Verify the writes are still queryable.
let w2_data = buffer.get_query_data();
let w2_data = buffer.get_query_data(&OwnedProjection::default());
let expected = vec![
"+-------+----------+----------+--------------------------------+",
"| great | how_much | tag | time |",
@ -205,18 +207,18 @@ mod tests {
"+-------+----------+----------+--------------------------------+",
];
assert_eq!(w2_data.len(), 1);
assert_batches_eq!(&expected, &[w2_data[0].deref().clone()]);
assert_batches_eq!(&expected, &[w2_data[0].clone()]);
// Ensure the same data is returned for a second read.
{
let second_read = buffer.get_query_data();
let second_read = buffer.get_query_data(&OwnedProjection::default());
assert_eq!(w2_data, second_read);
// And that no data was actually copied.
let same_arcs = w2_data
.iter()
.zip(second_read.iter())
.all(|(a, b)| Arc::ptr_eq(a, b));
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
assert!(same_arcs);
}
@ -224,14 +226,120 @@ mod tests {
let buffer: BufferState<Persisting> = buffer.into_persisting();
// Extract the final buffered result
let final_data = buffer.get_query_data();
let final_data = buffer.get_query_data(&OwnedProjection::default());
// And once again verify no data was changed, copied or re-ordered.
assert_eq!(w2_data, final_data);
let same_arcs = w2_data
.into_iter()
.zip(final_data.into_iter())
.all(|(a, b)| Arc::ptr_eq(&a, &b));
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
assert!(same_arcs);
// Assert the sequence numbers were recorded.
let set = buffer.into_sequence_number_set();
assert!(set.contains(SequenceNumber::new(0)));
assert!(set.contains(SequenceNumber::new(1)));
assert_eq!(set.len(), 2);
}
/// Assert projection is correct across all the queryable FSM states.
#[test]
// comparing dyn Array always has same vtable, so is accurate to use Arc::ptr_eq
#[allow(clippy::vtable_address_comparisons)]
fn test_buffer_projection() {
let projection = OwnedProjection::from(vec![
"tag".to_string(),
"great".to_string(),
"missing".to_string(),
"time".to_string(),
]);
// Initialise a buffer in the base state.
let mut buffer: BufferState<Buffering> = BufferState::new();
// Write some data to a buffer.
buffer
.write(
lp_to_mutable_batch(
r#"bananas,tag=platanos great=true,how_much=42 668563242000000042"#,
)
.1,
SequenceNumber::new(0),
)
.expect("write to empty buffer should succeed");
// Extract the queryable data from the buffer and validate it.
//
// Keep the data to validate they are ref-counted copies after further
// writes below. Note this construct allows the caller to decide when/if
// to allocate.
let w1_data = buffer.get_query_data(&projection);
let expected = vec![
"+----------+-------+--------------------------------+",
"| tag | great | time |",
"+----------+-------+--------------------------------+",
"| platanos | true | 1991-03-10T00:00:42.000000042Z |",
"+----------+-------+--------------------------------+",
];
assert_batches_eq!(&expected, &[w1_data[0].clone()]);
// Apply another write.
buffer
.write(
lp_to_mutable_batch(
r#"bananas,tag=platanos great=true,how_much=1000 668563242000000043"#,
)
.1,
SequenceNumber::new(1),
)
.expect("write to empty buffer should succeed");
// Snapshot the buffer into an immutable, queryable data format.
let buffer: BufferState<Snapshot> = match buffer.snapshot() {
Transition::Ok(v) => v,
Transition::Unchanged(_) => panic!("did not transition to snapshot state"),
};
// Verify the writes are still queryable.
let w2_data = buffer.get_query_data(&projection);
let expected = vec![
"+----------+-------+--------------------------------+",
"| tag | great | time |",
"+----------+-------+--------------------------------+",
"| platanos | true | 1991-03-10T00:00:42.000000042Z |",
"| platanos | true | 1991-03-10T00:00:42.000000043Z |",
"+----------+-------+--------------------------------+",
];
assert_eq!(w2_data.len(), 1);
assert_batches_eq!(&expected, &[w2_data[0].clone()]);
// Ensure the same data is returned for a second read.
{
let second_read = buffer.get_query_data(&projection);
assert_eq!(w2_data, second_read);
// And that no data was actually copied.
let same_arcs = w2_data
.iter()
.zip(second_read.iter())
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
assert!(same_arcs);
}
// Finally transition into the terminal persisting state.
let buffer: BufferState<Persisting> = buffer.into_persisting();
// Extract the final buffered result
let final_data = buffer.get_query_data(&projection);
// And once again verify no data was changed, copied or re-ordered.
assert_eq!(w2_data, final_data);
let same_arcs = w2_data
.into_iter()
.zip(final_data.into_iter())
.all(|(a, b)| Arc::ptr_eq(a.column(0), b.column(0)));
assert!(same_arcs);
// Assert the sequence numbers were recorded.
@ -258,16 +366,16 @@ mod tests {
Transition::Unchanged(_) => panic!("failed to transition"),
};
assert_eq!(buffer.get_query_data().len(), 1);
assert_eq!(buffer.get_query_data(&OwnedProjection::default()).len(), 1);
let snapshot = &buffer.get_query_data()[0];
let snapshot = buffer.get_query_data(&OwnedProjection::default())[0].clone();
// Generate the combined buffer from the original inputs to compare
// against.
mb1.extend_from(&mb2).unwrap();
let want = mb1.to_arrow(Projection::All).unwrap();
assert_eq!(&**snapshot, &want);
assert_eq!(snapshot, want);
}
#[test]

View File

@ -1,15 +1,15 @@
//! A write buffer.
use std::sync::Arc;
use arrow::record_batch::RecordBatch;
use mutable_batch::MutableBatch;
use schema::Projection;
use super::{snapshot::Snapshot, BufferState, Transition};
use crate::buffer_tree::partition::buffer::{
mutable_buffer::Buffer,
traits::{Queryable, Writeable},
use crate::{
buffer_tree::partition::buffer::{
mutable_buffer::Buffer,
traits::{Queryable, Writeable},
},
query::projection::OwnedProjection,
};
/// The FSM starting ingest state - a mutable buffer collecting writes.
@ -35,18 +35,11 @@ pub(crate) struct Buffering {
/// This method panics if converting the buffered data (if any) into an Arrow
/// [`RecordBatch`] fails (a non-transient error).
impl Queryable for Buffering {
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
let data = self.buffer.buffer().map(|v| {
Arc::new(
v.to_arrow(Projection::All)
.expect("failed to snapshot buffer data"),
)
});
match data {
Some(v) => vec![v],
None => vec![],
}
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
self.buffer
.buffer()
.map(|v| vec![projection.project_mutable_batches(v)])
.unwrap_or_default()
}
}

View File

@ -1,12 +1,12 @@
//! A writfield1 buffer, with one or more snapshots.
use std::sync::Arc;
use arrow::record_batch::RecordBatch;
use data_types::sequence_number_set::SequenceNumberSet;
use super::BufferState;
use crate::buffer_tree::partition::buffer::traits::Queryable;
use crate::{
buffer_tree::partition::buffer::traits::Queryable, query::projection::OwnedProjection,
};
/// An immutable set of [`RecordBatch`] in the process of being persisted.
#[derive(Debug)]
@ -14,18 +14,18 @@ pub(crate) struct Persisting {
/// Snapshots generated from previous buffer contents to be persisted.
///
/// INVARIANT: this array is always non-empty.
snapshots: Vec<Arc<RecordBatch>>,
snapshots: Vec<RecordBatch>,
}
impl Persisting {
pub(super) fn new(snapshots: Vec<Arc<RecordBatch>>) -> Self {
pub(super) fn new(snapshots: Vec<RecordBatch>) -> Self {
Self { snapshots }
}
}
impl Queryable for Persisting {
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
self.snapshots.clone()
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
projection.project_record_batch(&self.snapshots)
}
}

View File

@ -1,12 +1,11 @@
//! A writfield1 buffer, with one or more snapshots.
use std::sync::Arc;
use arrow::record_batch::RecordBatch;
use super::BufferState;
use crate::buffer_tree::partition::buffer::{
state_machine::persisting::Persisting, traits::Queryable,
use crate::{
buffer_tree::partition::buffer::{state_machine::persisting::Persisting, traits::Queryable},
query::projection::OwnedProjection,
};
/// An immutable, queryable FSM state containing at least one buffer snapshot.
@ -15,19 +14,19 @@ pub(crate) struct Snapshot {
/// Snapshots generated from previous buffer contents.
///
/// INVARIANT: this array is always non-empty.
snapshots: Vec<Arc<RecordBatch>>,
snapshots: Vec<RecordBatch>,
}
impl Snapshot {
pub(super) fn new(snapshots: Vec<Arc<RecordBatch>>) -> Self {
pub(super) fn new(snapshots: Vec<RecordBatch>) -> Self {
assert!(!snapshots.is_empty());
Self { snapshots }
}
}
impl Queryable for Snapshot {
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
self.snapshots.clone()
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch> {
projection.project_record_batch(&self.snapshots)
}
}

View File

@ -1,10 +1,12 @@
//! Private traits for state machine states.
use std::{fmt::Debug, sync::Arc};
use std::fmt::Debug;
use arrow::record_batch::RecordBatch;
use mutable_batch::MutableBatch;
use crate::query::projection::OwnedProjection;
/// A state that can accept writes.
pub(crate) trait Writeable: Debug {
fn write(&mut self, batch: MutableBatch) -> Result<(), mutable_batch::Error>;
@ -13,5 +15,5 @@ pub(crate) trait Writeable: Debug {
/// A state that can return the contents of the buffer as one or more
/// [`RecordBatch`] instances.
pub(crate) trait Queryable: Debug {
fn get_query_data(&self) -> Vec<Arc<RecordBatch>>;
fn get_query_data(&self, projection: &OwnedProjection) -> Vec<RecordBatch>;
}

View File

@ -14,7 +14,7 @@ use crate::{
buffer_tree::{
namespace::NamespaceName,
partition::{resolver::SortKeyResolver, PartitionData, SortKeyState},
table::TableName,
table::TableMetadata,
},
deferred_load::DeferredLoad,
};
@ -173,7 +173,7 @@ where
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>> {
// Use the cached PartitionKey instead of the caller's partition_key,
// instead preferring to reuse the already-shared Arc<str> in the cache.
@ -203,7 +203,7 @@ where
namespace_id,
namespace_name,
table_id,
table_name,
table,
SortKeyState::Deferred(Arc::new(sort_key_resolver)),
)));
}
@ -212,13 +212,7 @@ where
// Otherwise delegate to the catalog / inner impl.
self.inner
.get_partition(
partition_key,
namespace_id,
namespace_name,
table_id,
table_name,
)
.get_partition(partition_key, namespace_id, namespace_name, table_id, table)
.await
}
}
@ -234,7 +228,7 @@ mod tests {
use crate::{
buffer_tree::partition::resolver::mock::MockPartitionProvider,
test_util::{
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_ID,
ARBITRARY_PARTITION_KEY, ARBITRARY_PARTITION_KEY_STR, ARBITRARY_TABLE_ID,
ARBITRARY_TABLE_NAME,
@ -270,15 +264,15 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_sec(),
ARBITRARY_TABLE_ID,
defer_table_name_1_sec(),
defer_table_metadata_1_sec(),
)
.await;
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
&**got.lock().table().get().await.name(),
&**ARBITRARY_TABLE_NAME
);
assert_eq!(
&**got.lock().namespace_name().get().await,
@ -309,15 +303,15 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_sec(),
ARBITRARY_TABLE_ID,
defer_table_name_1_sec(),
defer_table_metadata_1_sec(),
)
.await;
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
&**got.lock().table().get().await.name(),
&**ARBITRARY_TABLE_NAME
);
assert_eq!(
&**got.lock().namespace_name().get().await,
@ -366,15 +360,15 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_sec(),
ARBITRARY_TABLE_ID,
defer_table_name_1_sec(),
defer_table_metadata_1_sec(),
)
.await;
assert_eq!(got.lock().partition_id(), other_key_id);
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
&**got.lock().table().get().await.name(),
&**ARBITRARY_TABLE_NAME
);
}
@ -402,15 +396,15 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_sec(),
other_table,
defer_table_name_1_sec(),
defer_table_metadata_1_sec(),
)
.await;
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(got.lock().table_id(), other_table);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
&**got.lock().table().get().await.name(),
&**ARBITRARY_TABLE_NAME
);
}
}

View File

@ -15,7 +15,7 @@ use crate::{
buffer_tree::{
namespace::NamespaceName,
partition::{PartitionData, SortKeyState},
table::TableName,
table::TableMetadata,
},
deferred_load::DeferredLoad,
};
@ -61,12 +61,12 @@ impl PartitionProvider for CatalogPartitionResolver {
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>> {
debug!(
%partition_key,
%table_id,
%table_name,
%table,
"upserting partition in catalog"
);
let p = Backoff::new(&self.backoff_config)
@ -86,7 +86,7 @@ impl PartitionProvider for CatalogPartitionResolver {
namespace_id,
namespace_name,
table_id,
table_name,
table,
SortKeyState::Provided(p.sort_key()),
)))
}
@ -103,6 +103,7 @@ mod tests {
use iox_catalog::test_helpers::{arbitrary_namespace, arbitrary_table};
use super::*;
use crate::buffer_tree::table::TableName;
const TABLE_NAME: &str = "bananas";
const NAMESPACE_NAME: &str = "ns-bananas";
@ -138,17 +139,25 @@ mod tests {
table_id,
Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async { TableName::from(TABLE_NAME) },
async {
TableMetadata::new_for_testing(
TableName::from(TABLE_NAME),
Default::default(),
)
},
&metrics,
)),
)
.await;
// Ensure the table name is available.
let _ = got.lock().table_name().get().await;
let _ = got.lock().table().get().await.name();
assert_eq!(got.lock().namespace_id(), namespace_id);
assert_eq!(got.lock().table_name().to_string(), table_name.to_string());
assert_eq!(
got.lock().table().get().await.name().to_string(),
table_name.to_string()
);
assert_matches!(got.lock().sort_key(), SortKeyState::Provided(None));
assert!(got.lock().partition_key.ptr_eq(&callers_partition_key));

View File

@ -14,7 +14,7 @@ use hashbrown::{hash_map::Entry, HashMap};
use parking_lot::Mutex;
use crate::{
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableMetadata},
deferred_load::DeferredLoad,
};
@ -146,7 +146,7 @@ where
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>> {
let key = Key {
namespace_id,
@ -170,7 +170,7 @@ where
namespace_id,
namespace_name,
table_id,
table_name,
table,
));
// Make the future poll-able by many callers, all of which
@ -233,7 +233,7 @@ async fn do_fetch<T>(
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>>
where
T: PartitionProvider + 'static,
@ -248,13 +248,7 @@ where
// (which would cause the connection to be returned).
tokio::spawn(async move {
inner
.get_partition(
partition_key,
namespace_id,
namespace_name,
table_id,
table_name,
)
.get_partition(partition_key, namespace_id, namespace_name, table_id, table)
.await
})
.await
@ -280,7 +274,7 @@ mod tests {
use crate::{
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
test_util::{
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
},
};
@ -308,7 +302,7 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_sec(),
ARBITRARY_TABLE_ID,
defer_table_name_1_sec(),
defer_table_metadata_1_sec(),
)
})
.collect::<FuturesUnordered<_>>()
@ -342,7 +336,7 @@ mod tests {
_namespace_id: NamespaceId,
_namespace_name: Arc<DeferredLoad<NamespaceName>>,
_table_id: TableId,
_table_name: Arc<DeferredLoad<TableName>>,
_table: Arc<DeferredLoad<TableMetadata>>,
) -> core::pin::Pin<
Box<
dyn core::future::Future<Output = Arc<Mutex<PartitionData>>>
@ -368,7 +362,7 @@ mod tests {
let data = PartitionDataBuilder::new().build();
let namespace_loader = defer_namespace_name_1_sec();
let table_name_loader = defer_table_name_1_sec();
let table_loader = defer_table_metadata_1_sec();
// Add a single instance of the partition - if more than one call is
// made to the mock, it will panic.
@ -384,14 +378,14 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
Arc::clone(&namespace_loader),
ARBITRARY_TABLE_ID,
Arc::clone(&table_name_loader),
Arc::clone(&table_loader),
);
let pa_2 = layer.get_partition(
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&namespace_loader),
ARBITRARY_TABLE_ID,
Arc::clone(&table_name_loader),
Arc::clone(&table_loader),
);
let waker = futures::task::noop_waker();
@ -411,7 +405,7 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
namespace_loader,
ARBITRARY_TABLE_ID,
table_name_loader,
table_loader,
)
.with_timeout_panic(Duration::from_secs(5))
.await;
@ -441,7 +435,7 @@ mod tests {
_namespace_id: NamespaceId,
_namespace_name: Arc<DeferredLoad<NamespaceName>>,
_table_id: TableId,
_table_name: Arc<DeferredLoad<TableName>>,
_table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>> {
let waker = self.wait.notified();
let permit = self.sem.acquire().await.unwrap();
@ -481,7 +475,7 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_sec(),
ARBITRARY_TABLE_ID,
defer_table_name_1_sec(),
defer_table_metadata_1_sec(),
);
let waker = futures::task::noop_waker();

View File

@ -8,7 +8,7 @@ use parking_lot::Mutex;
use super::r#trait::PartitionProvider;
use crate::{
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableMetadata},
deferred_load::{self, DeferredLoad},
};
@ -53,7 +53,7 @@ impl PartitionProvider for MockPartitionProvider {
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>> {
let p = self
.partitions
@ -75,8 +75,8 @@ impl PartitionProvider for MockPartitionProvider {
deferred_load::UNRESOLVED_DISPLAY_STRING,
);
let actual_table_name = p.table_name().to_string();
let expected_table_name = table_name.get().await.to_string();
let actual_table_name = p.table().to_string();
let expected_table_name = table.get().await.name().to_string();
assert!(
(actual_table_name.as_str() == expected_table_name)
|| (actual_table_name == deferred_load::UNRESOLVED_DISPLAY_STRING),

View File

@ -5,7 +5,7 @@ use data_types::{NamespaceId, PartitionKey, TableId};
use parking_lot::Mutex;
use crate::{
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableMetadata},
deferred_load::DeferredLoad,
};
@ -24,7 +24,7 @@ pub(crate) trait PartitionProvider: Send + Sync + Debug {
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>>;
}
@ -39,16 +39,10 @@ where
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>> {
(**self)
.get_partition(
partition_key,
namespace_id,
namespace_name,
table_id,
table_name,
)
.get_partition(partition_key, namespace_id, namespace_name, table_id, table)
.await
}
}
@ -61,7 +55,7 @@ mod tests {
use crate::{
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
test_util::{
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
ARBITRARY_TABLE_ID,
},
@ -70,10 +64,10 @@ mod tests {
#[tokio::test]
async fn test_arc_impl() {
let namespace_loader = defer_namespace_name_1_sec();
let table_name_loader = defer_table_name_1_sec();
let table_loader = defer_table_metadata_1_sec();
let data = PartitionDataBuilder::new()
.with_table_name_loader(Arc::clone(&table_name_loader))
.with_table_loader(Arc::clone(&table_loader))
.with_namespace_loader(Arc::clone(&namespace_loader))
.build();
@ -85,7 +79,7 @@ mod tests {
ARBITRARY_NAMESPACE_ID,
Arc::clone(&namespace_loader),
ARBITRARY_TABLE_ID,
Arc::clone(&table_name_loader),
Arc::clone(&table_loader),
)
.await;
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
@ -94,9 +88,6 @@ mod tests {
got.lock().namespace_name().to_string(),
namespace_loader.to_string()
);
assert_eq!(
got.lock().table_name().to_string(),
table_name_loader.to_string()
);
assert_eq!(got.lock().table().to_string(), table_loader.to_string());
}
}

View File

@ -4,20 +4,24 @@ use async_trait::async_trait;
use data_types::{NamespaceId, TableId};
use metric::U64Counter;
use parking_lot::Mutex;
use predicate::Predicate;
use trace::span::Span;
use super::{
namespace::{name_resolver::NamespaceNameProvider, NamespaceData},
partition::{resolver::PartitionProvider, PartitionData},
post_write::PostWriteObserver,
table::name_resolver::TableNameProvider,
table::metadata_resolver::TableProvider,
};
use crate::{
arcmap::ArcMap,
dml_payload::IngestOp,
dml_sink::DmlSink,
partition_iter::PartitionIter,
query::{response::QueryResponse, tracing::QueryExecTracing, QueryError, QueryExec},
query::{
projection::OwnedProjection, response::QueryResponse, tracing::QueryExecTracing,
QueryError, QueryExec,
},
};
/// A [`BufferTree`] is the root of an in-memory tree of many [`NamespaceData`]
@ -92,12 +96,12 @@ pub(crate) struct BufferTree<O> {
/// [`NamespaceName`]: data_types::NamespaceName
namespaces: ArcMap<NamespaceId, NamespaceData<O>>,
namespace_name_resolver: Arc<dyn NamespaceNameProvider>,
/// The [`TableName`] provider used by [`NamespaceData`] to initialise a
/// The [`TableMetadata`] provider used by [`NamespaceData`] to initialise a
/// [`TableData`].
///
/// [`TableName`]: crate::buffer_tree::table::TableName
/// [`TableMetadata`]: crate::buffer_tree::table::TableMetadata
/// [`TableData`]: crate::buffer_tree::table::TableData
table_name_resolver: Arc<dyn TableNameProvider>,
table_resolver: Arc<dyn TableProvider>,
metrics: Arc<metric::Registry>,
namespace_count: U64Counter,
@ -112,7 +116,7 @@ where
/// Initialise a new [`BufferTree`] that emits metrics to `metrics`.
pub(crate) fn new(
namespace_name_resolver: Arc<dyn NamespaceNameProvider>,
table_name_resolver: Arc<dyn TableNameProvider>,
table_resolver: Arc<dyn TableProvider>,
partition_provider: Arc<dyn PartitionProvider>,
post_write_observer: Arc<O>,
metrics: Arc<metric::Registry>,
@ -127,7 +131,7 @@ where
Self {
namespaces: Default::default(),
namespace_name_resolver,
table_name_resolver,
table_resolver,
metrics,
partition_provider,
post_write_observer,
@ -178,7 +182,7 @@ where
Arc::new(NamespaceData::new(
namespace_id,
Arc::new(self.namespace_name_resolver.for_namespace(namespace_id)),
Arc::clone(&self.table_name_resolver),
Arc::clone(&self.table_resolver),
Arc::clone(&self.partition_provider),
Arc::clone(&self.post_write_observer),
&self.metrics,
@ -200,8 +204,9 @@ where
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
// Extract the namespace if it exists.
let inner = self
@ -211,7 +216,7 @@ where
// Delegate query execution to the namespace, wrapping the execution in
// a tracing delegate to emit a child span.
QueryExecTracing::new(inner, "namespace")
.query_exec(namespace_id, table_id, columns, span)
.query_exec(namespace_id, table_id, projection, span, predicate)
.await
}
}
@ -227,29 +232,41 @@ where
#[cfg(test)]
mod tests {
use std::{sync::Arc, time::Duration};
use arrow::datatypes::DataType;
use assert_matches::assert_matches;
use data_types::{
partition_template::{test_table_partition_override, TemplatePart},
PartitionId, PartitionKey,
};
use datafusion::{
assert_batches_eq, assert_batches_sorted_eq,
prelude::{col, lit},
scalar::ScalarValue,
};
use futures::StreamExt;
use lazy_static::lazy_static;
use metric::{Attributes, Metric};
use predicate::Predicate;
use test_helpers::maybe_start_logging;
use super::*;
use crate::{
buffer_tree::{
namespace::{name_resolver::mock::MockNamespaceNameProvider, NamespaceData},
partition::resolver::mock::MockPartitionProvider,
post_write::mock::MockPostWriteObserver,
table::TableName,
table::{metadata_resolver::mock::MockTableProvider, TableMetadata},
},
deferred_load::{self, DeferredLoad},
query::partition_response::PartitionResponse,
test_util::{
defer_namespace_name_1_ms, make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_PROVIDER,
},
};
use assert_matches::assert_matches;
use data_types::{PartitionId, PartitionKey};
use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
use futures::StreamExt;
use lazy_static::lazy_static;
use metric::{Attributes, Metric};
use std::{sync::Arc, time::Duration};
const PARTITION2_ID: PartitionId = PartitionId::new(2);
const PARTITION3_ID: PartitionId = PartitionId::new(3);
@ -278,7 +295,7 @@ mod tests {
let ns = NamespaceData::new(
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_ms(),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
&metrics,
@ -337,13 +354,19 @@ mod tests {
macro_rules! test_write_query {
(
$name:ident,
partitions = [$($partition:expr), +], // The set of PartitionData for the mock partition provider
$(table_provider = $table_provider:expr,)? // An optional table provider
partitions = [$($partition:expr), +], // The set of PartitionData for the mock
// partition provider
writes = [$($write:expr), *], // The set of WriteOperation to apply()
want = $want:expr // The expected results of querying ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
predicate = $predicate:expr, // An optional predicate to use for the query
want = $want:expr // The expected results of querying
// ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
) => {
paste::paste! {
#[tokio::test]
async fn [<test_write_query_ $name>]() {
maybe_start_logging();
// Configure the mock partition provider with the provided
// partitions.
let partition_provider = Arc::new(MockPartitionProvider::default()
@ -352,10 +375,16 @@ mod tests {
)+
);
#[allow(unused_variables)]
let table_provider = Arc::clone(&*ARBITRARY_TABLE_PROVIDER);
$(
let table_provider: Arc<dyn TableProvider> = $table_provider;
)?
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
table_provider,
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),
@ -370,7 +399,13 @@ mod tests {
// Execute the query against ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
let batches = buf
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
$predicate
)
.await
.expect("query should succeed")
.into_partition_stream()
@ -407,6 +442,7 @@ mod tests {
),
None,
)],
predicate = None,
want = [
"+----------+------+-------------------------------+",
"| region | temp | time |",
@ -456,6 +492,7 @@ mod tests {
None,
)
],
predicate = None,
want = [
"+----------+------+-------------------------------+",
"| region | temp | time |",
@ -508,6 +545,7 @@ mod tests {
None,
)
],
predicate = None,
want = [
"+--------+------+-------------------------------+",
"| region | temp | time |",
@ -520,7 +558,7 @@ mod tests {
// A query that ensures the data across multiple tables (with the same table
// name!) is correctly filtered to return only the queried table.
test_write_query!(
filter_multiple_tabls,
filter_multiple_tables,
partitions = [
PartitionDataBuilder::new()
.with_partition_id(ARBITRARY_PARTITION_ID)
@ -558,6 +596,7 @@ mod tests {
None,
)
],
predicate = None,
want = [
"+--------+------+-------------------------------+",
"| region | temp | time |",
@ -603,6 +642,7 @@ mod tests {
None,
)
],
predicate = None,
want = [
"+----------+------+-------------------------------+",
"| region | temp | time |",
@ -613,6 +653,98 @@ mod tests {
]
);
// This test asserts that the results returned from a query to the
// [`BufferTree`] filters rows from the result as directed by the
// query's [`Predicate`].
//
// It makes sure that for a [`BufferTree`] with a set of partitions split
// by some key a query with a predicate `<partition key column> == <arbitrary literal>`
// returns partition data that has been filtered to contain only rows which
// contain the specified value in that partition key column.
test_write_query!(
filter_by_predicate_partition_key,
table_provider = Arc::new(MockTableProvider::new(TableMetadata::new_for_testing(
ARBITRARY_TABLE_NAME.clone(),
test_table_partition_override(vec![TemplatePart::TagValue("region")])
))),
partitions = [
PartitionDataBuilder::new()
.with_partition_id(ARBITRARY_PARTITION_ID)
.with_partition_key(ARBITRARY_PARTITION_KEY.clone()) // "platanos"
.build(),
PartitionDataBuilder::new()
.with_partition_id(PARTITION2_ID)
.with_partition_key(PARTITION2_KEY.clone()) // "p2"
.build()
],
writes = [
make_write_op(
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
&format!(
r#"{},region={} temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME, &*ARBITRARY_PARTITION_KEY
),
None,
),
make_write_op(
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
1,
&format!(
r#"{},region={} temp=12 4242424242"#,
&*ARBITRARY_TABLE_NAME, &*ARBITRARY_PARTITION_KEY
),
None,
),
make_write_op(
&PARTITION2_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
2,
&format!(
r#"{},region={} temp=17 7676767676"#,
&*ARBITRARY_TABLE_NAME, *PARTITION2_KEY
),
None,
),
make_write_op(
&PARTITION2_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
3,
&format!(
r#"{},region={} temp=13 7676767676"#,
&*ARBITRARY_TABLE_NAME, *PARTITION2_KEY,
),
None,
)
],
// NOTE: The querier will coerce the type of the predicates correctly, so the ingester does NOT need to perform
// type coercion. This type should reflect that.
predicate = Some(Predicate::new().with_expr(col("region").eq(lit(
ScalarValue::Dictionary(
Box::new(DataType::Int32),
Box::new(ScalarValue::from(PARTITION2_KEY.inner()))
)
)))),
want = [
"+--------+------+-------------------------------+",
"| region | temp | time |",
"+--------+------+-------------------------------+",
"| p2 | 13.0 | 1970-01-01T00:00:07.676767676 |",
"| p2 | 17.0 | 1970-01-01T00:00:07.676767676 |",
"+--------+------+-------------------------------+",
]
);
/// Assert that multiple writes to a single namespace/table results in a
/// single namespace being created, and matching metrics.
#[tokio::test]
@ -627,7 +759,7 @@ mod tests {
)
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(ARBITRARY_PARTITION_ID)
.with_partition_id(PARTITION2_ID)
.with_partition_key(PARTITION2_KEY.clone())
.build(),
),
@ -638,7 +770,7 @@ mod tests {
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::clone(&metrics),
@ -722,9 +854,14 @@ mod tests {
.with_partition_id(PARTITION3_ID)
.with_partition_key(PARTITION3_KEY.clone())
.with_table_id(TABLE2_ID)
.with_table_name_loader(Arc::new(DeferredLoad::new(
.with_table_loader(Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async move { TableName::from(TABLE2_NAME) },
async move {
TableMetadata::new_for_testing(
TABLE2_NAME.into(),
Default::default(),
)
},
&metric::Registry::default(),
)))
.build(),
@ -734,7 +871,7 @@ mod tests {
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::clone(&Arc::new(metric::Registry::default())),
@ -821,7 +958,7 @@ mod tests {
// Init the BufferTree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),
@ -829,7 +966,13 @@ mod tests {
// Query the empty tree
let err = buf
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect_err("query should fail");
assert_matches!(err, QueryError::NamespaceNotFound(ns) => {
@ -854,7 +997,13 @@ mod tests {
// Ensure an unknown table errors
let err = buf
.query_exec(ARBITRARY_NAMESPACE_ID, TABLE2_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
TABLE2_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect_err("query should fail");
assert_matches!(err, QueryError::TableNotFound(ns, t) => {
@ -863,9 +1012,15 @@ mod tests {
});
// Ensure a valid namespace / table does not error
buf.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.await
.expect("namespace / table should exist");
buf.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect("namespace / table should exist");
}
/// This test asserts the read consistency properties defined in the
@ -906,7 +1061,7 @@ mod tests {
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),
@ -931,7 +1086,13 @@ mod tests {
// Execute a query of the buffer tree, generating the result stream, but
// DO NOT consume it.
let stream = buf
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect("query should succeed")
.into_partition_stream();

View File

@ -1,14 +1,23 @@
//! Table level data buffer structures.
pub(crate) mod name_resolver;
pub(crate) mod metadata_resolver;
use std::{fmt::Debug, sync::Arc};
use std::{collections::HashMap, fmt::Debug, sync::Arc};
use async_trait::async_trait;
use data_types::{NamespaceId, PartitionKey, SequenceNumber, TableId};
use data_types::{
partition_template::{build_column_values, ColumnValue, TablePartitionTemplateOverride},
NamespaceId, PartitionKey, SequenceNumber, Table, TableId,
};
use datafusion::scalar::ScalarValue;
use iox_query::{
chunk_statistics::{create_chunk_statistics, ColumnRange},
pruning::prune_summaries,
QueryChunk,
};
use mutable_batch::MutableBatch;
use parking_lot::Mutex;
use schema::Projection;
use predicate::Predicate;
use trace::span::{Span, SpanRecorder};
use super::{
@ -20,10 +29,55 @@ use crate::{
arcmap::ArcMap,
deferred_load::DeferredLoad,
query::{
partition_response::PartitionResponse, response::PartitionStream, QueryError, QueryExec,
partition_response::PartitionResponse, projection::OwnedProjection,
response::PartitionStream, QueryError, QueryExec,
},
query_adaptor::QueryAdaptor,
};
/// Metadata from the catalog for a table
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct TableMetadata {
name: TableName,
partition_template: TablePartitionTemplateOverride,
}
impl TableMetadata {
#[cfg(test)]
pub fn new_for_testing(
name: TableName,
partition_template: TablePartitionTemplateOverride,
) -> Self {
Self {
name,
partition_template,
}
}
pub(crate) fn name(&self) -> &TableName {
&self.name
}
pub(crate) fn partition_template(&self) -> &TablePartitionTemplateOverride {
&self.partition_template
}
}
impl From<Table> for TableMetadata {
fn from(t: Table) -> Self {
Self {
name: t.name.into(),
partition_template: t.partition_template,
}
}
}
impl std::fmt::Display for TableMetadata {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.name, f)
}
}
/// The string name / identifier of a Table.
///
/// A reference-counted, cheap clone-able string.
@ -69,7 +123,7 @@ impl PartialEq<str> for TableName {
#[derive(Debug)]
pub(crate) struct TableData<O> {
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
catalog_table: Arc<DeferredLoad<TableMetadata>>,
/// The catalog ID of the namespace this table is being populated from.
namespace_id: NamespaceId,
@ -93,7 +147,7 @@ impl<O> TableData<O> {
/// for the first time.
pub(super) fn new(
table_id: TableId,
table_name: Arc<DeferredLoad<TableName>>,
catalog_table: Arc<DeferredLoad<TableMetadata>>,
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
partition_provider: Arc<dyn PartitionProvider>,
@ -101,7 +155,7 @@ impl<O> TableData<O> {
) -> Self {
Self {
table_id,
table_name,
catalog_table,
namespace_id,
namespace_name,
partition_data: Default::default(),
@ -132,9 +186,9 @@ impl<O> TableData<O> {
self.table_id
}
/// Returns the name of this table.
pub(crate) fn table_name(&self) -> &Arc<DeferredLoad<TableName>> {
&self.table_name
/// Returns the catalog data for this table.
pub(crate) fn catalog_table(&self) -> &Arc<DeferredLoad<TableMetadata>> {
&self.catalog_table
}
/// Return the [`NamespaceId`] this table is a part of.
@ -166,7 +220,7 @@ where
self.namespace_id,
Arc::clone(&self.namespace_name),
self.table_id,
Arc::clone(&self.table_name),
Arc::clone(&self.catalog_table),
)
.await;
// Add the partition to the map.
@ -202,8 +256,9 @@ where
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
assert_eq!(self.table_id, table_id, "buffer tree index inconsistency");
assert_eq!(
@ -211,18 +266,21 @@ where
"buffer tree index inconsistency"
);
let table_partition_template = self.catalog_table.get().await.partition_template;
// Gather the partition data from all of the partitions in this table.
let span = SpanRecorder::new(span);
let partitions = self.partitions().into_iter().map(move |p| {
let mut span = span.child("partition read");
let (id, hash_id, completed_persistence_count, data) = {
let (id, hash_id, completed_persistence_count, data, partition_key) = {
let mut p = p.lock();
(
p.partition_id(),
p.partition_hash_id().cloned(),
p.completed_persistence_count(),
p.get_query_data(),
p.get_query_data(&projection),
p.partition_key().clone(),
)
};
@ -230,16 +288,36 @@ where
Some(data) => {
assert_eq!(id, data.partition_id());
// Project the data if necessary
let columns = columns.iter().map(String::as_str).collect::<Vec<_>>();
let selection = if columns.is_empty() {
Projection::All
} else {
Projection::Some(columns.as_ref())
};
// Potentially prune out this partition if the partition
// template & derived partition key can be used to match
// against the optional predicate.
if predicate
.as_ref()
.map(|p| {
!keep_after_pruning_partition_key(
&table_partition_template,
&partition_key,
p,
&data,
)
})
.unwrap_or_default()
{
return PartitionResponse::new(
vec![],
id,
hash_id,
completed_persistence_count,
);
}
let data = data.project_selection(selection).into_iter().collect();
PartitionResponse::new(data, id, hash_id, completed_persistence_count)
// Project the data if necessary
PartitionResponse::new(
data.into_record_batches(),
id,
hash_id,
completed_persistence_count,
)
}
None => PartitionResponse::new(vec![], id, hash_id, completed_persistence_count),
};
@ -252,6 +330,106 @@ where
}
}
/// Return true if `data` contains one or more rows matching `predicate`,
/// pruning based on the `partition_key` and `template`.
///
/// Returns false iff it can be proven that all of data does not match the
/// predicate.
fn keep_after_pruning_partition_key(
table_partition_template: &TablePartitionTemplateOverride,
partition_key: &PartitionKey,
predicate: &Predicate,
data: &QueryAdaptor,
) -> bool {
// Construct a set of per-column min/max statistics based on the partition
// key values.
let column_ranges = Arc::new(
build_column_values(table_partition_template, partition_key.inner())
.filter_map(|(col, val)| {
let range = match val {
ColumnValue::Identity(s) => {
let s = Arc::new(ScalarValue::from(s.as_ref()));
ColumnRange {
min_value: Arc::clone(&s),
max_value: s,
}
}
ColumnValue::Prefix(p) if p.is_empty() => return None,
ColumnValue::Prefix(p) => {
// If the partition only has a prefix of the tag value
// (it was truncated) then form a conservative range:
//
// # Minimum
// Use the prefix itself.
//
// Note that the minimum is inclusive.
//
// All values in the partition are either:
//
// - identical to the prefix, in which case they are
// included by the inclusive minimum
//
// - have the form `"<prefix><s>"`, and it holds that
// `"<prefix><s>" > "<prefix>"` for all strings
// `"<s>"`.
//
// # Maximum
// Use `"<prefix_excluding_last_char><char::max>"`.
//
// Note that the maximum is inclusive.
//
// All strings in this partition must be smaller than
// this constructed maximum, because string comparison
// is front-to-back and the
// `"<prefix_excluding_last_char><char::max>" >
// "<prefix>"`.
let min_value = Arc::new(ScalarValue::from(p.as_ref()));
let mut chars = p.as_ref().chars().collect::<Vec<_>>();
*chars.last_mut().expect("checked that prefix is not empty") =
std::char::MAX;
let max_value = Arc::new(ScalarValue::from(
chars.into_iter().collect::<String>().as_str(),
));
ColumnRange {
min_value,
max_value,
}
}
};
Some((Arc::from(col), range))
})
.collect::<HashMap<_, _>>(),
);
let chunk_statistics = Arc::new(create_chunk_statistics(
data.num_rows(),
data.schema(),
data.ts_min_max(),
&column_ranges,
));
prune_summaries(
data.schema(),
&[(chunk_statistics, data.schema().as_arrow())],
predicate,
)
// Errors are logged by `iox_query` and sometimes fine, e.g. for not
// implemented DataFusion features or upstream bugs. The querier uses the
// same strategy. Pruning is a mere optimization and should not lead to
// crashes or unreadable data.
.ok()
.map(|vals| {
vals.into_iter()
.next()
.expect("one chunk in, one chunk out")
})
.unwrap_or(true)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@ -265,7 +443,7 @@ mod tests {
post_write::mock::MockPostWriteObserver,
},
test_util::{
defer_namespace_name_1_sec, defer_table_name_1_sec, PartitionDataBuilder,
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
ARBITRARY_TABLE_NAME,
},
@ -280,7 +458,7 @@ mod tests {
let table = TableData::new(
ARBITRARY_TABLE_ID,
defer_table_name_1_sec(),
defer_table_metadata_1_sec(),
ARBITRARY_NAMESPACE_ID,
defer_namespace_name_1_sec(),
partition_provider,

View File

@ -4,24 +4,24 @@ use backoff::{Backoff, BackoffConfig};
use data_types::TableId;
use iox_catalog::interface::Catalog;
use super::TableName;
use super::TableMetadata;
use crate::deferred_load::DeferredLoad;
/// An abstract provider of a [`DeferredLoad`] configured to fetch the
/// [`TableName`] of the specified [`TableId`].
pub(crate) trait TableNameProvider: Send + Sync + std::fmt::Debug {
fn for_table(&self, id: TableId) -> DeferredLoad<TableName>;
/// catalog [`TableMetadata`] of the specified [`TableId`].
pub(crate) trait TableProvider: Send + Sync + std::fmt::Debug {
fn for_table(&self, id: TableId) -> DeferredLoad<TableMetadata>;
}
#[derive(Debug)]
pub(crate) struct TableNameResolver {
pub(crate) struct TableResolver {
max_smear: Duration,
catalog: Arc<dyn Catalog>,
backoff_config: BackoffConfig,
metrics: Arc<metric::Registry>,
}
impl TableNameResolver {
impl TableResolver {
pub(crate) fn new(
max_smear: Duration,
catalog: Arc<dyn Catalog>,
@ -36,16 +36,16 @@ impl TableNameResolver {
}
}
/// Fetch the [`TableName`] from the [`Catalog`] for specified
/// Fetch the [`TableMetadata`] from the [`Catalog`] for specified
/// `table_id`, retrying endlessly when errors occur.
pub(crate) async fn fetch(
table_id: TableId,
catalog: Arc<dyn Catalog>,
backoff_config: BackoffConfig,
) -> TableName {
) -> TableMetadata {
Backoff::new(&backoff_config)
.retry_all_errors("fetch table name", || async {
let s = catalog
.retry_all_errors("fetch table", || async {
let table = catalog
.repositories()
.await
.tables()
@ -54,18 +54,17 @@ impl TableNameResolver {
.unwrap_or_else(|| {
panic!("resolving table name for non-existent table id {table_id}")
})
.name
.into();
Result::<_, iox_catalog::interface::Error>::Ok(s)
Result::<_, iox_catalog::interface::Error>::Ok(table)
})
.await
.expect("retry forever")
}
}
impl TableNameProvider for TableNameResolver {
fn for_table(&self, id: TableId) -> DeferredLoad<TableName> {
impl TableProvider for TableResolver {
fn for_table(&self, id: TableId) -> DeferredLoad<TableMetadata> {
DeferredLoad::new(
self.max_smear,
Self::fetch(id, Arc::clone(&self.catalog), self.backoff_config.clone()),
@ -79,28 +78,33 @@ pub(crate) mod mock {
use super::*;
#[derive(Debug)]
pub(crate) struct MockTableNameProvider {
name: TableName,
pub(crate) struct MockTableProvider {
table: TableMetadata,
}
impl MockTableNameProvider {
pub(crate) fn new(name: impl Into<TableName>) -> Self {
Self { name: name.into() }
impl MockTableProvider {
pub(crate) fn new(table: impl Into<TableMetadata>) -> Self {
Self {
table: table.into(),
}
}
}
impl Default for MockTableNameProvider {
impl Default for MockTableProvider {
fn default() -> Self {
Self::new("bananas")
Self::new(TableMetadata::new_for_testing(
"bananas".into(),
Default::default(),
))
}
}
impl TableNameProvider for MockTableNameProvider {
fn for_table(&self, _id: TableId) -> DeferredLoad<TableName> {
let name = self.name.clone();
impl TableProvider for MockTableProvider {
fn for_table(&self, _id: TableId) -> DeferredLoad<TableMetadata> {
let table = self.table.clone();
DeferredLoad::new(
Duration::from_secs(1),
async { name },
async { table },
&metric::Registry::default(),
)
}
@ -129,7 +133,7 @@ mod tests {
// Populate the catalog with the namespace / table
let (_ns_id, table_id) = populate_catalog(&*catalog, NAMESPACE_NAME, TABLE_NAME).await;
let fetcher = Arc::new(TableNameResolver::new(
let fetcher = Arc::new(TableResolver::new(
Duration::from_secs(10),
Arc::clone(&catalog),
backoff_config.clone(),
@ -141,6 +145,6 @@ mod tests {
.get()
.with_timeout_panic(Duration::from_secs(5))
.await;
assert_eq!(&**got, TABLE_NAME);
assert_eq!(got.name(), TABLE_NAME);
}
}

View File

@ -30,7 +30,7 @@ use crate::{
partition::resolver::{
CatalogPartitionResolver, CoalescePartitionResolver, PartitionCache, PartitionProvider,
},
table::name_resolver::{TableNameProvider, TableNameResolver},
table::metadata_resolver::{TableProvider, TableResolver},
BufferTree,
},
dml_sink::{instrumentation::DmlSinkInstrumentation, tracing::DmlSinkTracing},
@ -253,8 +253,8 @@ where
Arc::clone(&metrics),
));
// Initialise the deferred table name resolver.
let table_name_provider: Arc<dyn TableNameProvider> = Arc::new(TableNameResolver::new(
// Initialise the deferred table metadata resolver.
let table_provider: Arc<dyn TableProvider> = Arc::new(TableResolver::new(
persist_background_fetch_time,
Arc::clone(&catalog),
BackoffConfig::default(),
@ -326,7 +326,7 @@ where
let buffer = Arc::new(BufferTree::new(
namespace_name_provider,
table_name_provider,
table_provider,
partition_provider,
Arc::new(hot_partition_persister),
Arc::clone(&metrics),
@ -389,9 +389,7 @@ where
// ingester, but they are only used for internal ordering of operations at
// runtime.
let timestamp = Arc::new(TimestampOracle::new(
max_sequence_number
.map(|v| u64::try_from(v.get()).expect("sequence number overflow"))
.unwrap_or(0),
max_sequence_number.map(|v| v.get()).unwrap_or(0),
));
let (shutdown_tx, shutdown_rx) = oneshot::channel();

View File

@ -9,6 +9,7 @@ use crate::{
ingest_state::{IngestState, IngestStateError},
partition_iter::PartitionIter,
persist::{drain_buffer::persist_partitions, queue::PersistQueue},
query::projection::OwnedProjection,
};
/// Defines how often the shutdown task polls the partition buffers for
@ -77,10 +78,11 @@ pub(super) async fn graceful_shutdown_handler<F, T, P>(
// springs to life and buffers in the buffer tree after this check has
// completed - I think this is extreme enough to accept as a theoretical
// possibility that doesn't need covering off in practice.
while buffer
.partition_iter()
.any(|p| p.lock().get_query_data().is_some())
{
while buffer.partition_iter().any(|p| {
p.lock()
.get_query_data(&OwnedProjection::default())
.is_some()
}) {
if persist_partitions(buffer.partition_iter(), &persist).await != 0 {
// Late arriving writes needed persisting.
debug!("re-persisting late arriving data");

View File

@ -199,9 +199,7 @@ where
op,
} = op;
let sequence_number = SequenceNumber::new(
i64::try_from(sequence_number).expect("sequence number overflow"),
);
let sequence_number = SequenceNumber::new(sequence_number);
max_sequence = max_sequence.max(Some(sequence_number));

View File

@ -67,10 +67,7 @@ pub(super) async fn compact_persisting_batch(
adjust_sort_key_columns(&sk, &batch.schema().primary_key())
}
None => {
let sort_key = compute_sort_key(
batch.schema(),
batch.record_batches().iter().map(|sb| sb.as_ref()),
);
let sort_key = compute_sort_key(batch.schema(), batch.record_batches().iter());
// Use the sort key computed from the cardinality as the sort key for this parquet
// file's metadata, also return the sort key to be stored in the catalog
(sort_key.clone(), Some(sort_key))
@ -127,7 +124,7 @@ mod tests {
.to_arrow(Projection::All)
.unwrap();
let batch = QueryAdaptor::new(ARBITRARY_PARTITION_ID, vec![Arc::new(batch)]);
let batch = QueryAdaptor::new(ARBITRARY_PARTITION_ID, vec![batch]);
// verify PK
let schema = batch.schema();
@ -459,8 +456,7 @@ mod tests {
let expected_pk = vec!["tag1", "time"];
assert_eq!(expected_pk, pk);
let sort_key =
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
assert_eq!(sort_key, SortKey::from_columns(["tag1", "time"]));
// compact
@ -500,8 +496,7 @@ mod tests {
let expected_pk = vec!["tag1", "time"];
assert_eq!(expected_pk, pk);
let sort_key =
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
assert_eq!(sort_key, SortKey::from_columns(["tag1", "time"]));
// compact
@ -549,8 +544,7 @@ mod tests {
let expected_pk = vec!["tag1", "time"];
assert_eq!(expected_pk, pk);
let sort_key =
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
assert_eq!(sort_key, SortKey::from_columns(["tag1", "time"]));
// compact
@ -596,8 +590,7 @@ mod tests {
let expected_pk = vec!["tag1", "tag2", "time"];
assert_eq!(expected_pk, pk);
let sort_key =
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
assert_eq!(sort_key, SortKey::from_columns(["tag1", "tag2", "time"]));
// compact
@ -647,8 +640,7 @@ mod tests {
let expected_pk = vec!["tag1", "tag2", "time"];
assert_eq!(expected_pk, pk);
let sort_key =
compute_sort_key(schema, batch.record_batches().iter().map(|rb| rb.as_ref()));
let sort_key = compute_sort_key(schema, batch.record_batches().iter());
assert_eq!(sort_key, SortKey::from_columns(["tag1", "tag2", "time"]));
// compact
@ -699,7 +691,7 @@ mod tests {
batch.schema();
}
async fn create_one_row_record_batch_with_influxtype() -> Vec<Arc<RecordBatch>> {
async fn create_one_row_record_batch_with_influxtype() -> Vec<RecordBatch> {
let chunk1 = Arc::new(
TestChunk::new("t")
.with_id(1)
@ -723,11 +715,10 @@ mod tests {
];
assert_batches_eq!(&expected, &batches);
let batches: Vec<_> = batches.iter().map(|r| Arc::new(r.clone())).collect();
batches
}
async fn create_one_record_batch_with_influxtype_no_duplicates() -> Vec<Arc<RecordBatch>> {
async fn create_one_record_batch_with_influxtype_no_duplicates() -> Vec<RecordBatch> {
let chunk1 = Arc::new(
TestChunk::new("t")
.with_id(1)
@ -753,11 +744,10 @@ mod tests {
];
assert_batches_eq!(&expected, &batches);
let batches: Vec<_> = batches.iter().map(|r| Arc::new(r.clone())).collect();
batches
}
async fn create_one_record_batch_with_influxtype_duplicates() -> Vec<Arc<RecordBatch>> {
async fn create_one_record_batch_with_influxtype_duplicates() -> Vec<RecordBatch> {
let chunk1 = Arc::new(
TestChunk::new("t")
.with_id(1)
@ -790,12 +780,11 @@ mod tests {
];
assert_batches_eq!(&expected, &batches);
let batches: Vec<_> = batches.iter().map(|r| Arc::new(r.clone())).collect();
batches
}
/// RecordBatches with knowledge of influx metadata
async fn create_batches_with_influxtype() -> Vec<Arc<RecordBatch>> {
async fn create_batches_with_influxtype() -> Vec<RecordBatch> {
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
let mut batches = vec![];
@ -826,7 +815,7 @@ mod tests {
"+-----------+------+--------------------------------+",
];
assert_batches_eq!(&expected, &[batch1.clone()]);
batches.push(Arc::new(batch1));
batches.push(batch1);
// chunk2 having duplicate data with chunk 1
let chunk2 = Arc::new(
@ -850,7 +839,7 @@ mod tests {
"+-----------+------+--------------------------------+",
];
assert_batches_eq!(&expected, &[batch2.clone()]);
batches.push(Arc::new(batch2));
batches.push(batch2);
// verify data from both batches
let expected = vec![
@ -874,14 +863,13 @@ mod tests {
"| 5 | MT | 1970-01-01T00:00:00.000005Z |",
"+-----------+------+--------------------------------+",
];
let b: Vec<_> = batches.iter().map(|b| (**b).clone()).collect();
assert_batches_eq!(&expected, &b);
assert_batches_eq!(&expected, &batches);
batches
}
/// RecordBatches with knowledge of influx metadata
async fn create_batches_with_influxtype_different_columns() -> Vec<Arc<RecordBatch>> {
async fn create_batches_with_influxtype_different_columns() -> Vec<RecordBatch> {
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
let mut batches = vec![];
@ -912,7 +900,7 @@ mod tests {
"+-----------+------+--------------------------------+",
];
assert_batches_eq!(&expected, &[batch1.clone()]);
batches.push(Arc::new(batch1));
batches.push(batch1);
// chunk2 having duplicate data with chunk 1
// mmore columns
@ -939,14 +927,14 @@ mod tests {
"+-----------+------------+------+------+--------------------------------+",
];
assert_batches_eq!(&expected, &[batch2.clone()]);
batches.push(Arc::new(batch2));
batches.push(batch2);
batches
}
/// RecordBatches with knowledge of influx metadata
async fn create_batches_with_influxtype_different_columns_different_order(
) -> Vec<Arc<RecordBatch>> {
async fn create_batches_with_influxtype_different_columns_different_order() -> Vec<RecordBatch>
{
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
let mut batches = vec![];
@ -978,7 +966,7 @@ mod tests {
"+-----------+------+------+--------------------------------+",
];
assert_batches_eq!(&expected, &[batch1.clone()]);
batches.push(Arc::new(batch1.clone()));
batches.push(batch1.clone());
// chunk2 having duplicate data with chunk 1
// mmore columns
@ -1003,13 +991,13 @@ mod tests {
"+-----------+------+--------------------------------+",
];
assert_batches_eq!(&expected, &[batch2.clone()]);
batches.push(Arc::new(batch2));
batches.push(batch2);
batches
}
/// Has 2 tag columns; tag1 has a lower cardinality (3) than tag3 (4)
async fn create_batches_with_influxtype_different_cardinality() -> Vec<Arc<RecordBatch>> {
async fn create_batches_with_influxtype_different_cardinality() -> Vec<RecordBatch> {
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
let mut batches = vec![];
@ -1034,7 +1022,7 @@ mod tests {
"+-----------+------+------+-----------------------------+",
];
assert_batches_eq!(&expected, &[batch1.clone()]);
batches.push(Arc::new(batch1.clone()));
batches.push(batch1.clone());
let chunk2 = Arc::new(
TestChunk::new("t")
@ -1057,13 +1045,13 @@ mod tests {
"+-----------+------+------+-----------------------------+",
];
assert_batches_eq!(&expected, &[batch2.clone()]);
batches.push(Arc::new(batch2));
batches.push(batch2);
batches
}
/// RecordBatches with knowledge of influx metadata
async fn create_batches_with_influxtype_same_columns_different_type() -> Vec<Arc<RecordBatch>> {
async fn create_batches_with_influxtype_same_columns_different_type() -> Vec<RecordBatch> {
// Use the available TestChunk to create chunks and then convert them to raw RecordBatches
let mut batches = vec![];
@ -1087,7 +1075,7 @@ mod tests {
"+-----------+------+-----------------------------+",
];
assert_batches_eq!(&expected, &[batch1.clone()]);
batches.push(Arc::new(batch1));
batches.push(batch1);
// chunk2 having duplicate data with chunk 1
// mmore columns
@ -1110,7 +1098,7 @@ mod tests {
"+-----------+------+-----------------------------+",
];
assert_batches_eq!(&expected, &[batch2.clone()]);
batches.push(Arc::new(batch2));
batches.push(batch2);
batches
}

View File

@ -18,7 +18,7 @@ use crate::{
buffer_tree::{
namespace::NamespaceName,
partition::{persisting::PersistingData, PartitionData, SortKeyState},
table::TableName,
table::TableMetadata,
},
deferred_load::DeferredLoad,
persist::completion_observer::CompletedPersist,
@ -94,14 +94,14 @@ pub(super) struct Context {
// The partition key for this partition
partition_key: PartitionKey,
/// Deferred strings needed for persistence.
/// Deferred data needed for persistence.
///
/// These [`DeferredLoad`] are given a pre-fetch hint when this [`Context`]
/// is constructed to load them in the background (if not already resolved)
/// in order to avoid incurring the query latency when the values are
/// needed.
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_name: Arc<DeferredLoad<TableName>>,
table: Arc<DeferredLoad<TableMetadata>>,
/// The [`SortKey`] for the [`PartitionData`] at the time of [`Context`]
/// construction.
@ -164,7 +164,7 @@ impl Context {
partition_hash_id: guard.partition_hash_id().cloned(),
partition_key: guard.partition_key().clone(),
namespace_name: Arc::clone(guard.namespace_name()),
table_name: Arc::clone(guard.table_name()),
table: Arc::clone(guard.table()),
// Technically the sort key isn't immutable, but MUST NOT be
// changed by an external actor (by something other than code in
@ -182,7 +182,7 @@ impl Context {
// Pre-fetch the deferred values in a background thread (if not already
// resolved)
s.namespace_name.prefetch_now();
s.table_name.prefetch_now();
s.table.prefetch_now();
if let SortKeyState::Deferred(ref d) = s.sort_key {
d.prefetch_now();
}
@ -253,7 +253,7 @@ impl Context {
namespace_id = %self.namespace_id,
namespace_name = %self.namespace_name,
table_id = %self.table_id,
table_name = %self.table_name,
table = %self.table,
partition_id = %self.partition_id,
partition_key = %self.partition_key,
total_persist_duration = ?now.duration_since(self.enqueued_at),
@ -315,7 +315,7 @@ impl Context {
self.namespace_name.as_ref()
}
pub(super) fn table_name(&self) -> &DeferredLoad<TableName> {
self.table_name.as_ref()
pub(super) fn table(&self) -> &DeferredLoad<TableMetadata> {
self.table.as_ref()
}
}

View File

@ -501,7 +501,7 @@ mod tests {
test_util::{
make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_PROVIDER,
},
};
@ -510,7 +510,7 @@ mod tests {
async fn new_partition(sort_key: SortKeyState) -> Arc<Mutex<PartitionData>> {
let buffer_tree = BufferTree::new(
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
Arc::new(
MockPartitionProvider::default().with_partition(
PartitionDataBuilder::new()

View File

@ -110,6 +110,7 @@ mod tests {
use crate::{
persist::queue::mock::MockPersistQueue,
query::projection::OwnedProjection,
test_util::{PartitionDataBuilder, ARBITRARY_TABLE_NAME},
};
@ -162,7 +163,9 @@ mod tests {
guard
.buffer_write(mb, SequenceNumber::new(2))
.expect("write should succeed");
guard.get_query_data().expect("should have query adaptor")
guard
.get_query_data(&OwnedProjection::default())
.expect("should have query adaptor")
};
hot_partition_persister.observe(Arc::clone(&p), p.lock());
@ -170,7 +173,7 @@ mod tests {
tokio::task::yield_now().await;
// Assert the partition was queued for persistence with the correct data.
assert_matches!(persist_handle.calls().as_slice(), [got] => {
let got_query_data = got.lock().get_query_data().expect("should have query adaptor");
let got_query_data = got.lock().get_query_data(&OwnedProjection::default(),).expect("should have query adaptor");
assert_eq!(got_query_data.record_batches(), want_query_data.record_batches());
});

View File

@ -48,7 +48,7 @@ mod tests {
test_util::{
make_write_op, populate_catalog, ARBITRARY_NAMESPACE_NAME,
ARBITRARY_NAMESPACE_NAME_PROVIDER, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_NAME_PROVIDER,
ARBITRARY_TABLE_PROVIDER,
},
};
@ -67,7 +67,7 @@ mod tests {
// Init the buffer tree
let buf = BufferTree::new(
Arc::clone(&*ARBITRARY_NAMESPACE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_PROVIDER),
Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))),
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),

View File

@ -202,7 +202,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
?sort_key,
@ -218,7 +218,7 @@ where
compact_persisting_batch(
&worker_state.exec,
sort_key,
ctx.table_name().get().await,
ctx.table().get().await.name().clone(),
ctx.data().query_adaptor(),
)
.await
@ -249,7 +249,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
%object_store_id,
@ -265,7 +265,7 @@ where
namespace_id: ctx.namespace_id(),
namespace_name: Arc::clone(&*ctx.namespace_name().get().await),
table_id: ctx.table_id(),
table_name: Arc::clone(&*ctx.table_name().get().await),
table_name: Arc::clone(ctx.table().get().await.name()),
partition_key: ctx.partition_key().clone(),
compaction_level: CompactionLevel::Initial,
sort_key: Some(data_sort_key),
@ -291,7 +291,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
%object_store_id,
@ -358,7 +358,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
?new_sort_key,
@ -394,7 +394,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
expected=?old_sort_key,
@ -420,7 +420,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
expected=?old_sort_key,
@ -460,7 +460,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
?old_sort_key,
@ -488,7 +488,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
%object_store_id,
@ -512,7 +512,7 @@ where
namespace_id = %ctx.namespace_id(),
namespace_name = %ctx.namespace_name(),
table_id = %ctx.table_id(),
table_name = %ctx.table_name(),
table = %ctx.table(),
partition_id = %ctx.partition_id(),
partition_key = %ctx.partition_key(),
%object_store_id,

View File

@ -4,9 +4,10 @@ use async_trait::async_trait;
use data_types::{NamespaceId, TableId};
use iox_time::{SystemProvider, TimeProvider};
use metric::{DurationHistogram, Metric};
use predicate::Predicate;
use trace::span::Span;
use super::QueryExec;
use super::{projection::OwnedProjection, QueryExec};
use crate::query::QueryError;
/// An instrumentation decorator over a [`QueryExec`] implementation.
@ -62,14 +63,15 @@ where
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
let t = self.time_provider.now();
let res = self
.inner
.query_exec(namespace_id, table_id, columns, span)
.query_exec(namespace_id, table_id, projection, span, predicate)
.await;
if let Some(delta) = self.time_provider.now().checked_duration_since(t) {
@ -113,7 +115,7 @@ mod tests {
// Call the decorator and assert the return value
let got = decorator
.query_exec(NamespaceId::new(42), TableId::new(24), vec![], None)
.query_exec(NamespaceId::new(42), TableId::new(24),OwnedProjection::default(), None, None)
.await;
assert_matches!(got, $($want_ret)+);

View File

@ -1,9 +1,10 @@
use async_trait::async_trait;
use data_types::{NamespaceId, TableId};
use parking_lot::Mutex;
use predicate::Predicate;
use trace::span::Span;
use super::{response::QueryResponse, QueryError, QueryExec};
use super::{projection::OwnedProjection, response::QueryResponse, QueryError, QueryExec};
#[derive(Debug, Default)]
pub(crate) struct MockQueryExec {
@ -25,8 +26,9 @@ impl QueryExec for MockQueryExec {
&self,
_namespace_id: NamespaceId,
_table_id: TableId,
_columns: Vec<String>,
_projection: OwnedProjection,
_span: Option<Span>,
_predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
self.response
.lock()

View File

@ -3,6 +3,8 @@
mod r#trait;
pub(crate) use r#trait::*;
pub(crate) mod projection;
// Response types
pub(crate) mod partition_response;
pub(crate) mod response;

View File

@ -0,0 +1,129 @@
use arrow::record_batch::RecordBatch;
use mutable_batch::MutableBatch;
use schema::SchemaBuilder;
/// The private inner type to prevent callers from constructing an empty Subset.
#[derive(Debug, Default)]
enum Projection {
/// Return all columns.
#[default]
All,
/// Return the specified subset of columns.
///
/// The returned columns MAY NOT match the specified column order.
//
// Invariant: subset is never empty - this variant is only constructed when
// there is at least one column to project.
Project(Vec<String>),
}
/// Specify the set of columns to project during a query.
///
/// Defaults to "all columns".
#[derive(Debug, Default)]
pub(crate) struct OwnedProjection(Projection);
impl From<Vec<String>> for OwnedProjection {
fn from(value: Vec<String>) -> Self {
if value.is_empty() {
return Self(Projection::All);
}
Self(Projection::Project(value))
}
}
impl OwnedProjection {
/// Copy the data within a [`MutableBatch`] into a [`RecordBatch`], applying
/// the the specified projection.
///
/// This avoids copying column data for columns that are not part of the
/// projection.
///
/// NOTE: this copies the underlying column data
pub(crate) fn project_mutable_batches(&self, batch: &MutableBatch) -> RecordBatch {
// Pre-allocate the outputs to their maximal possible size to avoid
// reallocations.
let max_capacity = match &self.0 {
Projection::All => batch.columns().len(),
Projection::Project(s) => s.len(),
};
let mut schema_builder = SchemaBuilder::with_capacity(max_capacity);
let mut column_data = Vec::with_capacity(max_capacity);
// Compute the schema overlap between the requested projection, and the
// buffered data.
//
// Generate the RecordBatch contents in a single pass.
match &self.0 {
Projection::All => {
// If there's no projection, the columns must be emitted ordered
// by their name.
let mut columns = batch.columns().collect::<Vec<_>>();
columns.sort_unstable_by_key(|v| v.0);
for (name, column) in columns.into_iter() {
schema_builder.influx_column(name, column.influx_type());
column_data.push(column.to_arrow().expect("failed to snapshot buffer data"));
}
}
Projection::Project(cols) => {
// Invariant: subset is never empty
assert!(!cols.is_empty());
// Construct the schema & data arrays in a single pass, ordered
// by the projection and ignoring any missing columns.
for name in cols {
if let Ok(column) = batch.column(name) {
schema_builder.influx_column(name, column.influx_type());
column_data
.push(column.to_arrow().expect("failed to snapshot buffer data"));
}
}
}
};
let schema = schema_builder
.build()
.expect("failed to create batch schema");
RecordBatch::try_new(schema.into(), column_data)
.expect("failed to generate snapshot record batch")
}
/// Apply the specified projection to `batches`.
///
/// This projection requires relatively cheap ref-counting clones and does
/// not copy the underlying data.
pub(crate) fn project_record_batch(&self, batches: &[RecordBatch]) -> Vec<RecordBatch> {
match &self.0 {
Projection::All => batches.to_vec(),
Projection::Project(columns) => {
// Invariant: subset is never empty
assert!(!columns.is_empty());
batches
.iter()
.map(|batch| {
let schema = batch.schema();
// Map the column names to column indexes, ignoring
// columns specified in the columns that do not exist
// in this batch.
let projection = columns
.iter()
.flat_map(|column_name| schema.index_of(column_name).ok())
.collect::<Vec<_>>();
batch
.project(&projection)
.expect("batch projection failure")
})
.collect()
}
}
}
}

View File

@ -58,6 +58,7 @@ use iox_time::{SystemProvider, Time, TimeProvider};
use metric::{DurationHistogram, Metric, U64Histogram, U64HistogramOptions};
use observability_deps::tracing::debug;
use pin_project::{pin_project, pinned_drop};
use predicate::Predicate;
use trace::span::Span;
use crate::query::{
@ -66,6 +67,8 @@ use crate::query::{
QueryError, QueryExec,
};
use super::projection::OwnedProjection;
/// A [`QueryExec`] decorator adding instrumentation to the [`QueryResponse`]
/// returned by the inner implementation.
///
@ -202,14 +205,17 @@ where
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
let started_at = self.time_provider.now();
// TODO(savage): Would accepting a predicate here require additional
// metrics to be added?
let stream = self
.inner
.query_exec(namespace_id, table_id, columns, span)
.query_exec(namespace_id, table_id, projection, span, predicate)
.await?;
let stream = QueryMetricContext::new(
@ -467,7 +473,13 @@ mod tests {
.with_time_provider(Arc::clone(&mock_time));
let response = layer
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect("query should succeed");
@ -548,7 +560,13 @@ mod tests {
.with_time_provider(Arc::clone(&mock_time));
let response = layer
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect("query should succeed");
@ -628,7 +646,13 @@ mod tests {
.with_time_provider(Arc::clone(&mock_time));
let response = layer
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect("query should succeed");
@ -708,7 +732,13 @@ mod tests {
.with_time_provider(Arc::clone(&mock_time));
let response = layer
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.query_exec(
ARBITRARY_NAMESPACE_ID,
ARBITRARY_TABLE_ID,
OwnedProjection::default(),
None,
None,
)
.await
.expect("query should succeed");

View File

@ -2,9 +2,10 @@ use std::borrow::Cow;
use async_trait::async_trait;
use data_types::{NamespaceId, TableId};
use predicate::Predicate;
use trace::span::{Span, SpanRecorder};
use super::QueryExec;
use super::{projection::OwnedProjection, QueryExec};
use crate::query::QueryError;
/// An tracing decorator over a [`QueryExec`] implementation.
@ -40,14 +41,21 @@ where
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
let mut recorder = SpanRecorder::new(span).child(self.name.clone());
match self
.inner
.query_exec(namespace_id, table_id, columns, recorder.span().cloned())
.query_exec(
namespace_id,
table_id,
projection,
recorder.span().cloned(),
predicate,
)
.await
{
Ok(v) => {
@ -109,8 +117,9 @@ mod tests {
.query_exec(
NamespaceId::new(42),
TableId::new(24),
vec![],
OwnedProjection::default(),
Some(span.child("root span")),
None,
)
.await
.expect("wrapper should not modify result");
@ -132,8 +141,9 @@ mod tests {
.query_exec(
NamespaceId::new(42),
TableId::new(24),
vec![],
OwnedProjection::default(),
Some(span.child("root span")),
None,
)
.await
.expect_err("wrapper should not modify result");

View File

@ -2,9 +2,12 @@ use std::{fmt::Debug, ops::Deref, sync::Arc};
use async_trait::async_trait;
use data_types::{NamespaceId, TableId};
use predicate::Predicate;
use thiserror::Error;
use trace::span::Span;
use super::projection::OwnedProjection;
#[derive(Debug, Error)]
#[allow(missing_copy_implementations)]
pub(crate) enum QueryError {
@ -23,8 +26,9 @@ pub(crate) trait QueryExec: Send + Sync + Debug {
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError>;
}
@ -39,11 +43,12 @@ where
&self,
namespace_id: NamespaceId,
table_id: TableId,
columns: Vec<String>,
projection: OwnedProjection,
span: Option<Span>,
predicate: Option<Predicate>,
) -> Result<Self::Response, QueryError> {
self.deref()
.query_exec(namespace_id, table_id, columns, span)
.query_exec(namespace_id, table_id, projection, span, predicate)
.await
}
}

View File

@ -5,15 +5,13 @@ use std::{any::Any, sync::Arc};
use arrow::record_batch::RecordBatch;
use arrow_util::util::ensure_schema;
use data_types::{ChunkId, ChunkOrder, PartitionId};
use datafusion::{error::DataFusionError, physical_plan::Statistics};
use data_types::{ChunkId, ChunkOrder, PartitionId, TimestampMinMax};
use datafusion::physical_plan::Statistics;
use iox_query::{
exec::{stringset::StringSet, IOxSessionContext},
util::{compute_timenanosecond_min_max, create_basic_summary},
QueryChunk, QueryChunkData,
};
use once_cell::sync::OnceCell;
use predicate::Predicate;
use schema::{merge::merge_record_batch_schemas, sort::SortKey, Projection, Schema};
/// A queryable wrapper over a set of ordered [`RecordBatch`] snapshot from a
@ -30,7 +28,7 @@ pub struct QueryAdaptor {
///
/// This MUST be non-pub(crate) / closed for modification / immutable to support
/// interning the merged schema in [`Self::schema()`].
data: Vec<Arc<RecordBatch>>,
data: Vec<RecordBatch>,
/// The catalog ID of the partition the this data is part of.
partition_id: PartitionId,
@ -52,12 +50,12 @@ impl QueryAdaptor {
///
/// This constructor panics if `data` contains no [`RecordBatch`], or all
/// [`RecordBatch`] are empty.
pub(crate) fn new(partition_id: PartitionId, data: Vec<Arc<RecordBatch>>) -> Self {
pub(crate) fn new(partition_id: PartitionId, data: Vec<RecordBatch>) -> Self {
// There must always be at least one record batch and one row.
//
// This upholds an invariant that simplifies dealing with empty
// partitions - if there is a QueryAdaptor, it contains data.
assert!(data.iter().map(|b| b.num_rows()).sum::<usize>() > 0);
assert!(data.iter().any(|b| b.num_rows() > 0));
let schema = merge_record_batch_schemas(&data);
Self {
@ -75,8 +73,7 @@ impl QueryAdaptor {
// Project the column selection across all RecordBatch
self.data
.iter()
.map(|data| {
let batch = data.as_ref();
.map(|batch| {
let schema = batch.schema();
// Apply selection to in-memory batch
@ -98,25 +95,40 @@ impl QueryAdaptor {
}
/// Returns the [`RecordBatch`] instances in this [`QueryAdaptor`].
pub(crate) fn record_batches(&self) -> &[Arc<RecordBatch>] {
pub(crate) fn record_batches(&self) -> &[RecordBatch] {
self.data.as_ref()
}
/// Unwrap this [`QueryAdaptor`], yielding the inner [`RecordBatch`]
/// instances.
pub(crate) fn into_record_batches(self) -> Vec<RecordBatch> {
self.data
}
/// Returns the partition ID from which the data this [`QueryAdaptor`] was
/// sourced from.
pub(crate) fn partition_id(&self) -> PartitionId {
self.partition_id
}
/// Number of rows, useful for building stats
pub(crate) fn num_rows(&self) -> u64 {
self.data.iter().map(|b| b.num_rows()).sum::<usize>() as u64
}
/// Time range, useful for building stats
pub(crate) fn ts_min_max(&self) -> TimestampMinMax {
compute_timenanosecond_min_max(self.data.iter()).expect("Should have time range")
}
}
impl QueryChunk for QueryAdaptor {
fn stats(&self) -> Arc<Statistics> {
Arc::clone(self.stats.get_or_init(|| {
let ts_min_max = compute_timenanosecond_min_max(self.data.iter().map(|b| b.as_ref()))
.expect("Should have time range");
let ts_min_max = self.ts_min_max();
Arc::new(create_basic_summary(
self.data.iter().map(|b| b.num_rows()).sum::<usize>() as u64,
self.num_rows(),
self.schema(),
ts_min_max,
))
@ -147,20 +159,6 @@ impl QueryChunk for QueryAdaptor {
true
}
/// Return a set of Strings containing the distinct values in the
/// specified columns. If the predicate can be evaluated entirely
/// on the metadata of this Chunk. Returns `None` otherwise
///
/// The requested columns must all have String type.
fn column_values(
&self,
_ctx: IOxSessionContext,
_column_name: &str,
_predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError> {
Ok(None)
}
fn data(&self) -> QueryChunkData {
let schema = self.schema().as_arrow();

View File

@ -12,6 +12,7 @@ use futures::{Stream, StreamExt, TryStreamExt};
use ingester_query_grpc::influxdata::iox::ingester::v1 as proto;
use metric::{DurationHistogram, U64Counter};
use observability_deps::tracing::*;
use predicate::Predicate;
use prost::Message;
use thiserror::Error;
use tokio::sync::{Semaphore, TryAcquireError};
@ -26,7 +27,7 @@ use instrumentation::FlightFrameEncodeInstrumentation;
use crate::{
ingester_id::IngesterId,
query::{response::QueryResponse, QueryError, QueryExec},
query::{projection::OwnedProjection, response::QueryResponse, QueryError, QueryExec},
};
/// Error states for the query RPC handler.
@ -48,6 +49,10 @@ enum Error {
/// The number of simultaneous queries being executed has been reached.
#[error("simultaneous query limit exceeded")]
RequestLimit,
/// The payload within the request has an invalid field value.
#[error("field violation: {0}")]
FieldViolation(#[from] ingester_query_grpc::FieldViolation),
}
/// Map a query-execution error into a [`tonic::Status`].
@ -77,6 +82,10 @@ impl From<Error> for tonic::Status {
warn!("simultaneous query limit exceeded");
Code::ResourceExhausted
}
Error::FieldViolation(_) => {
debug!(error=%e, "request contains field violation");
Code::InvalidArgument
}
};
Self::new(code, e.to_string())
@ -188,18 +197,21 @@ where
let ticket = request.into_inner();
let request = proto::IngesterQueryRequest::decode(&*ticket.ticket).map_err(Error::from)?;
// Extract the namespace/table identifiers
// Extract the namespace/table identifiers and the query predicate
let namespace_id = NamespaceId::new(request.namespace_id);
let table_id = TableId::new(request.table_id);
let predicate = if let Some(p) = request.predicate {
debug!(predicate=?p, "received query predicate");
Some(Predicate::try_from(p).map_err(Error::from)?)
} else {
None
};
// Predicate pushdown is part of the API, but not implemented.
if let Some(p) = request.predicate {
debug!(predicate=?p, "ignoring query predicate (unsupported)");
}
let projection = OwnedProjection::from(request.columns);
let response = match self
.query_handler
.query_exec(namespace_id, table_id, request.columns, span.clone())
.query_exec(namespace_id, table_id, projection, span.clone(), predicate)
.await
{
Ok(v) => v,

View File

@ -1,6 +1,9 @@
use std::{collections::BTreeMap, sync::Arc, time::Duration};
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, TableId};
use data_types::{
partition_template::TablePartitionTemplateOverride, NamespaceId, PartitionId, PartitionKey,
SequenceNumber, TableId,
};
use iox_catalog::{interface::Catalog, test_helpers::arbitrary_namespace};
use lazy_static::lazy_static;
use mutable_batch_lp::lines_to_batches;
@ -15,8 +18,8 @@ use crate::{
},
partition::{PartitionData, SortKeyState},
table::{
name_resolver::{mock::MockTableNameProvider, TableNameProvider},
TableName,
metadata_resolver::{mock::MockTableProvider, TableProvider},
TableMetadata, TableName,
},
},
deferred_load::DeferredLoad,
@ -44,10 +47,15 @@ pub(crate) fn defer_namespace_name_1_ms() -> Arc<DeferredLoad<NamespaceName>> {
))
}
pub(crate) fn defer_table_name_1_sec() -> Arc<DeferredLoad<TableName>> {
pub(crate) fn defer_table_metadata_1_sec() -> Arc<DeferredLoad<TableMetadata>> {
Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async { ARBITRARY_TABLE_NAME.clone() },
async {
TableMetadata::new_for_testing(
ARBITRARY_TABLE_NAME.clone(),
TablePartitionTemplateOverride::default(),
)
},
&metric::Registry::default(),
))
}
@ -60,8 +68,11 @@ lazy_static! {
pub(crate) static ref ARBITRARY_NAMESPACE_NAME_PROVIDER: Arc<dyn NamespaceNameProvider> =
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME));
pub(crate) static ref ARBITRARY_TABLE_NAME: TableName = TableName::from("bananas");
pub(crate) static ref ARBITRARY_TABLE_NAME_PROVIDER: Arc<dyn TableNameProvider> =
Arc::new(MockTableNameProvider::new(&**ARBITRARY_TABLE_NAME));
pub(crate) static ref ARBITRARY_TABLE_PROVIDER: Arc<dyn TableProvider> =
Arc::new(MockTableProvider::new(TableMetadata::new_for_testing(
ARBITRARY_TABLE_NAME.clone(),
TablePartitionTemplateOverride::default()
)));
}
/// Build a [`PartitionData`] with mostly arbitrary-yet-valid values for tests.
@ -71,7 +82,7 @@ pub(crate) struct PartitionDataBuilder {
partition_key: Option<PartitionKey>,
namespace_id: Option<NamespaceId>,
table_id: Option<TableId>,
table_name_loader: Option<Arc<DeferredLoad<TableName>>>,
table_loader: Option<Arc<DeferredLoad<TableMetadata>>>,
namespace_loader: Option<Arc<DeferredLoad<NamespaceName>>>,
sort_key: Option<SortKeyState>,
}
@ -101,11 +112,11 @@ impl PartitionDataBuilder {
self
}
pub(crate) fn with_table_name_loader(
pub(crate) fn with_table_loader(
mut self,
table_name_loader: Arc<DeferredLoad<TableName>>,
table_loader: Arc<DeferredLoad<TableMetadata>>,
) -> Self {
self.table_name_loader = Some(table_name_loader);
self.table_loader = Some(table_loader);
self
}
@ -134,8 +145,7 @@ impl PartitionDataBuilder {
self.namespace_loader
.unwrap_or_else(defer_namespace_name_1_sec),
self.table_id.unwrap_or(ARBITRARY_TABLE_ID),
self.table_name_loader
.unwrap_or_else(defer_table_name_1_sec),
self.table_loader.unwrap_or_else(defer_table_metadata_1_sec),
self.sort_key.unwrap_or(SortKeyState::Provided(None)),
)
}
@ -270,7 +280,7 @@ pub(crate) fn make_write_op(
namespace_id: NamespaceId,
table_name: &str,
table_id: TableId,
sequence_number: i64,
sequence_number: u64,
lines: &str,
span_ctx: Option<SpanContext>,
) -> WriteOperation {

View File

@ -32,7 +32,7 @@ impl TimestampOracle {
// or diverge between threads.
let v = self.0.fetch_add(1, Ordering::Relaxed);
SequenceNumber::new(v as i64)
SequenceNumber::new(v)
}
}
@ -106,6 +106,6 @@ mod tests {
timestamps
.into_iter()
.zip(expected)
.for_each(|(got, want)| assert_eq!(got, want as i64));
.for_each(|(got, want)| assert_eq!(got, want as u64));
}
}

View File

@ -248,7 +248,7 @@ mod tests {
/// Return a [`SequenceNumberSet`] containing `vals`.
fn new_set<T>(vals: T) -> SequenceNumberSet
where
T: IntoIterator<Item = i64>,
T: IntoIterator<Item = u64>,
{
vals.into_iter().map(SequenceNumber::new).collect()
}
@ -257,7 +257,7 @@ mod tests {
/// [`SequenceNumberSet`] values.
fn new_note<T>(vals: T) -> Arc<CompletedPersist>
where
T: IntoIterator<Item = i64>,
T: IntoIterator<Item = u64>,
{
Arc::new(CompletedPersist::new(
ParquetFileParams {

View File

@ -105,10 +105,7 @@ impl WalAppender for Arc<wal::Wal> {
let partition_sequence_numbers = w
.tables()
.map(|(table_id, data)| {
(
*table_id,
data.partitioned_data().sequence_number().get() as u64,
)
(*table_id, data.partitioned_data().sequence_number().get())
})
.collect::<HashMap<TableId, u64>>();
(

162
ingester/tests/query.rs Normal file
View File

@ -0,0 +1,162 @@
use arrow_util::assert_batches_sorted_eq;
use data_types::PartitionKey;
use ingester_query_grpc::influxdata::iox::ingester::v1::IngesterQueryRequest;
use ingester_test_ctx::TestContextBuilder;
use metric::{DurationHistogram, U64Histogram};
// Write data to an ingester through the RPC interface and query the data, validating the contents.
#[tokio::test]
async fn write_query() {
let namespace_name = "write_query_test_namespace";
let mut ctx = TestContextBuilder::default().build().await;
let ns = ctx.ensure_namespace(namespace_name, None).await;
// Initial write
let partition_key = PartitionKey::from("1970-01-01");
ctx.write_lp(
namespace_name,
"bananas greatness=\"unbounded\" 10",
partition_key.clone(),
0,
)
.await;
// A subsequent write with a non-contiguous sequence number to a different table.
ctx.write_lp(
namespace_name,
"cpu bar=2 20\ncpu bar=3 30",
partition_key.clone(),
7,
)
.await;
// And a third write that appends more data to the table in the initial
// write.
ctx.write_lp(
namespace_name,
"bananas count=42 200",
partition_key.clone(),
42,
)
.await;
// Perform a query to validate the actual data buffered.
let data: Vec<_> = ctx
.query(IngesterQueryRequest {
namespace_id: ns.id.get(),
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
columns: vec![],
predicate: None,
})
.await
.expect("query request failed");
let expected = vec![
"+-------+-----------+--------------------------------+",
"| count | greatness | time |",
"+-------+-----------+--------------------------------+",
"| | unbounded | 1970-01-01T00:00:00.000000010Z |",
"| 42.0 | | 1970-01-01T00:00:00.000000200Z |",
"+-------+-----------+--------------------------------+",
];
assert_batches_sorted_eq!(&expected, &data);
// Assert various ingest metrics.
let hist = ctx
.get_metric::<DurationHistogram, _>(
"ingester_dml_sink_apply_duration",
&[("handler", "write_apply"), ("result", "success")],
)
.fetch();
assert_eq!(hist.sample_count(), 3);
// Read metrics
let hist = ctx
.get_metric::<DurationHistogram, _>(
"ingester_query_stream_duration",
&[("request", "complete")],
)
.fetch();
assert_eq!(hist.sample_count(), 1);
let hist = ctx
.get_metric::<U64Histogram, _>("ingester_query_result_row", &[])
.fetch();
assert_eq!(hist.sample_count(), 1);
assert_eq!(hist.total, 2);
}
// Write data to an ingester through the RPC interface and query the data, validating the contents.
#[tokio::test]
async fn write_query_projection() {
let namespace_name = "write_query_test_namespace";
let mut ctx = TestContextBuilder::default().build().await;
let ns = ctx.ensure_namespace(namespace_name, None).await;
// Initial write
let partition_key = PartitionKey::from("1970-01-01");
ctx.write_lp(
namespace_name,
"bananas greatness=\"unbounded\",level=42 10",
partition_key.clone(),
0,
)
.await;
// Another write that appends more data to the table in the initial write.
ctx.write_lp(
namespace_name,
"bananas count=42,level=4242 200",
partition_key.clone(),
42,
)
.await;
// Perform a query to validate the actual data buffered.
let data: Vec<_> = ctx
.query(IngesterQueryRequest {
namespace_id: ns.id.get(),
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
columns: vec![],
predicate: None,
})
.await
.expect("query request failed");
let expected = vec![
"+-------+-----------+--------+--------------------------------+",
"| count | greatness | level | time |",
"+-------+-----------+--------+--------------------------------+",
"| | unbounded | 42.0 | 1970-01-01T00:00:00.000000010Z |",
"| 42.0 | | 4242.0 | 1970-01-01T00:00:00.000000200Z |",
"+-------+-----------+--------+--------------------------------+",
];
assert_batches_sorted_eq!(&expected, &data);
// And perform a query with projection, selecting a column that is entirely
// non-NULL, a column containing NULLs (in a different order to the above)
// and a column that does not exist.
let data: Vec<_> = ctx
.query(IngesterQueryRequest {
namespace_id: ns.id.get(),
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
columns: vec![
"level".to_string(),
"greatness".to_string(),
"platanos".to_string(),
],
predicate: None,
})
.await
.expect("query request failed");
let expected = vec![
"+--------+-----------+",
"| level | greatness |",
"+--------+-----------+",
"| 42.0 | unbounded |",
"| 4242.0 | |",
"+--------+-----------+",
];
assert_batches_sorted_eq!(&expected, &data);
}

View File

@ -10,88 +10,6 @@ use metric::{
use parquet_file::ParquetFilePath;
use std::{sync::Arc, time::Duration};
// Write data to an ingester through the RPC interface and query the data, validating the contents.
#[tokio::test]
async fn write_query() {
let namespace_name = "write_query_test_namespace";
let mut ctx = TestContextBuilder::default().build().await;
let ns = ctx.ensure_namespace(namespace_name, None).await;
// Initial write
let partition_key = PartitionKey::from("1970-01-01");
ctx.write_lp(
namespace_name,
"bananas greatness=\"unbounded\" 10",
partition_key.clone(),
0,
)
.await;
// A subsequent write with a non-contiguous sequence number to a different table.
ctx.write_lp(
namespace_name,
"cpu bar=2 20\ncpu bar=3 30",
partition_key.clone(),
7,
)
.await;
// And a third write that appends more data to the table in the initial
// write.
ctx.write_lp(
namespace_name,
"bananas count=42 200",
partition_key.clone(),
42,
)
.await;
// Perform a query to validate the actual data buffered.
let data: Vec<_> = ctx
.query(IngesterQueryRequest {
namespace_id: ns.id.get(),
table_id: ctx.table_id(namespace_name, "bananas").await.get(),
columns: vec![],
predicate: None,
})
.await
.expect("query request failed");
let expected = vec![
"+-------+-----------+--------------------------------+",
"| count | greatness | time |",
"+-------+-----------+--------------------------------+",
"| | unbounded | 1970-01-01T00:00:00.000000010Z |",
"| 42.0 | | 1970-01-01T00:00:00.000000200Z |",
"+-------+-----------+--------------------------------+",
];
assert_batches_sorted_eq!(&expected, &data);
// Assert various ingest metrics.
let hist = ctx
.get_metric::<DurationHistogram, _>(
"ingester_dml_sink_apply_duration",
&[("handler", "write_apply"), ("result", "success")],
)
.fetch();
assert_eq!(hist.sample_count(), 3);
// Read metrics
let hist = ctx
.get_metric::<DurationHistogram, _>(
"ingester_query_stream_duration",
&[("request", "complete")],
)
.fetch();
assert_eq!(hist.sample_count(), 1);
let hist = ctx
.get_metric::<U64Histogram, _>("ingester_query_result_row", &[])
.fetch();
assert_eq!(hist.sample_count(), 1);
assert_eq!(hist.total, 2);
}
// Write data to an ingester through the RPC interface and persist the data.
#[tokio::test]
async fn write_persist() {

View File

@ -242,7 +242,7 @@ where
namespace: &str,
lp: &str,
partition_key: PartitionKey,
sequence_number: i64,
sequence_number: u64,
) {
// Resolve the namespace ID needed to construct the DML op
let namespace_id = self.namespace_id(namespace).await;

View File

@ -6,7 +6,7 @@ edition.workspace = true
license.workspace = true
[dependencies] # In alphabetical order
async-trait = "0.1.68"
async-trait = "0.1.70"
data_types = { path = "../data_types" }
futures = "0.3"
iox_time = { version = "0.1.0", path = "../iox_time" }
@ -20,7 +20,7 @@ siphasher = "0.3"
snafu = "0.7"
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
thiserror = "1.0.40"
thiserror = "1.0.41"
tokio = { version = "1.29", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
uuid = { version = "1", features = ["v4"] }
workspace-hack = { version = "0.1", path = "../workspace-hack" }
@ -30,7 +30,7 @@ assert_matches = "1.5.0"
dotenvy = "0.15.7"
generated_types = { path = "../generated_types" }
mutable_batch_lp = { path = "../mutable_batch_lp" }
paste = "1.0.12"
paste = "1.0.13"
pretty_assertions = "1.3.0"
rand = "0.8"
tempfile = "3"

View File

@ -179,8 +179,8 @@ decorate!(
"partition_list_skipped_compactions" = list_skipped_compactions(&mut self) -> Result<Vec<SkippedCompaction>>;
"partition_delete_skipped_compactions" = delete_skipped_compactions(&mut self, partition_id: PartitionId) -> Result<Option<SkippedCompaction>>;
"partition_most_recent_n" = most_recent_n(&mut self, n: usize) -> Result<Vec<Partition>>;
"partitions_new_file_between" = partitions_new_file_between(&mut self, minimum_time: Timestamp, maximum_time: Option<Timestamp>) -> Result<Vec<PartitionId>>;
"get_in_skipped_compaction" = get_in_skipped_compaction(&mut self, partition_id: PartitionId) -> Result<Option<SkippedCompaction>>;
"partition_partitions_new_file_between" = partitions_new_file_between(&mut self, minimum_time: Timestamp, maximum_time: Option<Timestamp>) -> Result<Vec<PartitionId>>;
"partition_get_in_skipped_compaction" = get_in_skipped_compaction(&mut self, partition_id: PartitionId) -> Result<Option<SkippedCompaction>>;
]
);
@ -195,7 +195,7 @@ decorate!(
"parquet_delete_old_ids_only" = delete_old_ids_only(&mut self, older_than: Timestamp) -> Result<Vec<ParquetFileId>>;
"parquet_list_by_partition_not_to_delete" = list_by_partition_not_to_delete(&mut self, partition_id: PartitionId) -> Result<Vec<ParquetFile>>;
"parquet_get_by_object_store_id" = get_by_object_store_id(&mut self, object_store_id: Uuid) -> Result<Option<ParquetFile>>;
"exists_by_object_store_id_batch" = exists_by_object_store_id_batch(&mut self, object_store_ids: Vec<Uuid>) -> Result<Vec<Uuid>>;
"parquet_exists_by_object_store_id_batch" = exists_by_object_store_id_batch(&mut self, object_store_ids: Vec<Uuid>) -> Result<Vec<Uuid>>;
"parquet_create_upgrade_delete" = create_upgrade_delete(&mut self, delete: &[ParquetFileId], upgrade: &[ParquetFileId], create: &[ParquetFileParams], target_level: CompactionLevel) -> Result<Vec<ParquetFileId>>;
]
);

View File

@ -23,7 +23,7 @@ rand = { version = "0.8.3", features = ["small_rng"] }
regex = "1.8"
schema = { path = "../schema" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.99"
serde_json = "1.0.100"
snafu = "0.7"
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
toml = "0.7.5"

View File

@ -22,7 +22,7 @@ use arrow::{
use async_trait::async_trait;
use data_types::{ChunkId, ChunkOrder, PartitionId};
use datafusion::{error::DataFusionError, physical_plan::Statistics, prelude::SessionContext};
use exec::{stringset::StringSet, IOxSessionContext};
use exec::IOxSessionContext;
use hashbrown::HashMap;
use observability_deps::tracing::trace;
use once_cell::sync::Lazy;
@ -34,6 +34,7 @@ use schema::{
};
use std::{any::Any, fmt::Debug, sync::Arc};
pub mod chunk_statistics;
pub mod config;
pub mod exec;
pub mod frontend;
@ -81,18 +82,6 @@ pub trait QueryChunk: Debug + Send + Sync + 'static {
/// key" within itself
fn may_contain_pk_duplicates(&self) -> bool;
/// Return a set of Strings containing the distinct values in the
/// specified columns. If the predicate can be evaluated entirely
/// on the metadata of this Chunk. Returns `None` otherwise
///
/// The requested columns must all have String type.
fn column_values(
&self,
ctx: IOxSessionContext,
column_name: &str,
predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError>;
/// Provides access to raw [`QueryChunk`] data.
///
/// The engine assume that minimal work shall be performed to gather the `QueryChunkData`.
@ -271,15 +260,6 @@ where
self.as_ref().may_contain_pk_duplicates()
}
fn column_values(
&self,
ctx: IOxSessionContext,
column_name: &str,
predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError> {
self.as_ref().column_values(ctx, column_name, predicate)
}
fn data(&self) -> QueryChunkData {
self.as_ref().data()
}
@ -323,15 +303,6 @@ impl QueryChunk for Arc<dyn QueryChunk> {
self.as_ref().may_contain_pk_duplicates()
}
fn column_values(
&self,
ctx: IOxSessionContext,
column_name: &str,
predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError> {
self.as_ref().column_values(ctx, column_name, predicate)
}
fn data(&self) -> QueryChunkData {
self.as_ref().data()
}

View File

@ -1120,18 +1120,6 @@ impl QueryChunk for TestChunk {
"Test Chunk"
}
fn column_values(
&self,
_ctx: IOxSessionContext,
_column_name: &str,
_predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError> {
self.check_error()?;
// Model not being able to get column values from metadata
Ok(None)
}
fn order(&self) -> ChunkOrder {
self.order
}

View File

@ -20,7 +20,7 @@ predicate = { path = "../predicate" }
query_functions = { path = "../query_functions" }
regex = "1"
schema = { path = "../schema" }
serde_json = "1.0.99"
serde_json = "1.0.100"
thiserror = "1.0"
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -2028,7 +2028,7 @@ mod test {
use crate::plan::ir::TagSet;
use datafusion::common::Result;
use influxdb_influxql_parser::select::SelectStatement;
use schema::{InfluxColumnType, InfluxFieldType};
use schema::{InfluxColumnType, InfluxFieldType, SchemaBuilder};
/// Test implementation that converts `Select` to `SelectStatement` so that it can be
/// converted back to a string.
@ -2647,7 +2647,18 @@ mod test {
/// Projections which contain function calls
#[test]
fn projection_call_expr() {
let namespace = MockSchemaProvider::default();
let mut namespace = MockSchemaProvider::default();
// Add a schema with tags that could conflict with aliasing against an
// existing call expression, in this case "last"
namespace.add_schema(
SchemaBuilder::new()
.measurement("conflicts")
.timestamp()
.tag("last")
.influx_field("field_f64", InfluxFieldType::Float)
.build()
.unwrap(),
);
let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01");
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
@ -2694,6 +2705,14 @@ mod test {
stmt.to_string(),
"SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01"
);
// Handles conflicts when call expression is renamed to match an existing tag
let stmt = parse_select("SELECT LAST(field_f64), last FROM conflicts");
let stmt = rewrite_select_statement(&namespace, &stmt).unwrap();
assert_eq!(
stmt.to_string(),
"SELECT time::timestamp AS time, last(field_f64::float) AS last, last::tag AS last_1 FROM conflicts"
);
}
}

View File

@ -66,9 +66,6 @@ const CONCURRENT_TABLE_JOBS: usize = 10;
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("gRPC planner got error finding column values: {}", source))]
FindingColumnValues { source: DataFusionError },
#[snafu(display(
"gRPC planner got error fetching chunks for table '{}': {}",
table_name,
@ -180,7 +177,6 @@ impl Error {
| Self::BuildingPlan { source, .. }
| Self::ReadColumns { source, .. }
| Self::CheckingChunkPredicate { source, .. }
| Self::FindingColumnValues { source, .. }
| Self::CastingAggregates { source, .. } => {
DataFusionError::Context(format!("{method}: {msg}"), Box::new(source))
}
@ -480,7 +476,6 @@ impl InfluxRpcPlanner {
)
.and_then(|(table_name, table_schema, predicate, chunks)| async move {
let mut chunks_full = vec![];
let mut known_values = BTreeSet::new();
let chunks = prune_chunks(&table_schema, chunks, &predicate);
for chunk in cheap_chunk_first(chunks) {
@ -513,36 +508,15 @@ impl InfluxRpcPlanner {
}
);
// try and get the list of values directly from metadata
let mut ctx = self.ctx.child_ctx("tag_values execution");
ctx.set_metadata("table", table_name.to_string());
let maybe_values = chunk
.column_values(ctx, tag_name, &predicate)
.context(FindingColumnValuesSnafu)?;
match maybe_values {
Some(mut names) => {
debug!(
%table_name,
names=?names,
chunk_id=%chunk.id().get(),
"tag values found from metadata",
);
known_values.append(&mut names);
}
None => {
debug!(
%table_name,
chunk_id=%chunk.id().get(),
"need full plan to find tag values"
);
chunks_full.push(chunk);
}
}
debug!(
%table_name,
chunk_id=%chunk.id().get(),
"need full plan to find tag values"
);
chunks_full.push(chunk);
}
Ok((table_name, predicate, chunks_full, known_values))
Ok((table_name, predicate, chunks_full))
})
.try_collect()
.await?;
@ -554,9 +528,7 @@ impl InfluxRpcPlanner {
// At this point, we have a set of tag_values we know at plan
// time in `known_columns`, and some tables in chunks that we
// need to run a plan to find what values pass the predicate.
for (table_name, predicate, chunks_full, known_values) in tables {
builder = builder.append_other(known_values.into());
for (table_name, predicate, chunks_full) in tables {
if !chunks_full.is_empty() {
let schema = namespace
.table_schema(table_name)

View File

@ -12,14 +12,14 @@ license.workspace = true
authz = { path = "../authz", features = ["http"] }
clap_blocks = { path = "../clap_blocks" }
generated_types = { path = "../generated_types" }
heappy = { git = "https://github.com/mkmik/heappy", rev = "1d6ac77a4026fffce8680a7b31a9f6e9859b5e73", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
heappy = { git = "https://github.com/mkmik/heappy", rev = "1de977a241cdd768acc5b6c82c0728b30c7db7b4", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
metric = { path = "../metric" }
metric_exporters = { path = "../metric_exporters" }
observability_deps = { path = "../observability_deps" }
# NOTE: we may not notice that we need the "backtrace-rs" feature if we also build with the heappy feature, which depends on backtrace-rs.
# (honestly I thought that cargo dependencies were isolated on a per crate basis so I'm a bit surprised that pprof accidentally builds
# successfully just because another crate happens to depend on backtrace-rs)
pprof = { version = "0.11", default-features = false, features = ["flamegraph", "prost-codec"], optional = true }
pprof = { version = "0.12", default-features = false, features = ["flamegraph", "prost-codec"], optional = true }
service_grpc_testing = { path = "../service_grpc_testing" }
trace = { path = "../trace" }
trace_exporters = { path = "../trace_exporters" }
@ -38,7 +38,7 @@ log = "0.4"
parking_lot = "0.12"
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.99"
serde_json = "1.0.100"
serde_urlencoded = "0.7.0"
snafu = "0.7"
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }

View File

@ -18,7 +18,7 @@ iox_query = { version = "0.1.0", path = "../iox_query" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
parquet_file = { version = "0.1.0", path = "../parquet_file" }
thiserror = "1.0.40"
thiserror = "1.0.41"
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-util = { version = "0.7.8" }
trace = { path = "../trace" }

View File

@ -30,7 +30,7 @@ trace = { path = "../trace" }
arrow-flight = { workspace = true }
async-trait = "0.1"
hyper = "0.14"
thiserror = "1.0.40"
thiserror = "1.0.41"
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tonic = { workspace = true }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -18,7 +18,7 @@ metric = { path = "../metric" }
mutable_batch = { path = "../mutable_batch" }
object_store = { workspace = true }
router = { path = "../router" }
thiserror = "1.0.40"
thiserror = "1.0.41"
tokio-util = { version = "0.7.8" }
trace = { path = "../trace" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -18,12 +18,12 @@ hashbrown = { workspace = true }
itertools = "0.11"
workspace-hack = { version = "0.1", path = "../workspace-hack" }
percent-encoding = "2.2.0"
thiserror = "1.0.40"
thiserror = "1.0.41"
unicode-segmentation = "1.10.1"
[dev-dependencies]
assert_matches = "1.5.0"
mutable_batch_lp = { path = "../mutable_batch_lp" }
paste = "1.0.12"
paste = "1.0.13"
proptest = { version = "1.2.0", default-features = false }
rand = "0.8"

View File

@ -6,13 +6,13 @@ edition.workspace = true
license.workspace = true
[dependencies] # In alphabetical order
async-trait = "0.1.68"
async-trait = "0.1.70"
bytes = "1.4"
futures = "0.3"
iox_time = { version = "0.1.0", path = "../iox_time" }
metric = { version = "0.1.0", path = "../metric" }
object_store = { workspace = true }
pin-project = "1.1.1"
pin-project = "1.1.2"
tokio = { version = "1.29", features = ["io-util"] }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -22,7 +22,7 @@ pbjson-types = "0.5"
prost = "0.11"
schema = { path = "../schema" }
snafu = "0.7"
thiserror = "1.0.40"
thiserror = "1.0.41"
thrift = "0.17"
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt", "rt-multi-thread", "sync"] }
uuid = { version = "1", features = ["v4"] }

View File

@ -8,7 +8,7 @@ license.workspace = true
[dependencies]
arrow = { workspace = true }
arrow-flight = { workspace = true }
async-trait = "0.1.68"
async-trait = "0.1.70"
backoff = { path = "../backoff" }
bytes = "1.4"
cache_system = { path = "../cache_system" }

View File

@ -361,8 +361,8 @@ mod tests {
partition.create_parquet_file(builder).await;
let table_id = table.table.id;
let single_file_size = 208;
let two_file_size = 384;
let single_file_size = 240;
let two_file_size = 448;
assert!(single_file_size < two_file_size);
let cache = make_cache(&catalog);

View File

@ -17,6 +17,7 @@ use data_types::{
};
use datafusion::scalar::ScalarValue;
use iox_catalog::interface::Catalog;
use iox_query::chunk_statistics::{ColumnRange, ColumnRanges};
use iox_time::TimeProvider;
use observability_deps::tracing::debug;
use schema::sort::SortKey;
@ -27,8 +28,6 @@ use std::{
};
use trace::span::Span;
use crate::df_stats::{ColumnRange, ColumnRanges};
use super::{namespace::CachedTable, ram::RamSize};
const CACHE_ID: &str = "partition";

View File

@ -6,24 +6,21 @@ use self::{
invalidate_on_error::InvalidateOnErrorFlightClient,
test_util::MockIngesterConnection,
};
use crate::{
cache::{namespace::CachedTable, CatalogCache},
df_stats::{create_chunk_statistics, ColumnRanges},
};
use crate::cache::{namespace::CachedTable, CatalogCache};
use arrow::{datatypes::DataType, error::ArrowError, record_batch::RecordBatch};
use arrow_flight::decode::DecodedPayload;
use async_trait::async_trait;
use backoff::{Backoff, BackoffConfig, BackoffError};
use client_util::connection;
use data_types::{ChunkId, ChunkOrder, NamespaceId, PartitionHashId, PartitionId};
use datafusion::{error::DataFusionError, physical_plan::Statistics};
use datafusion::physical_plan::Statistics;
use futures::{stream::FuturesUnordered, TryStreamExt};
use ingester_query_grpc::{
encode_proto_predicate_as_base64, influxdata::iox::ingester::v1::IngesterQueryResponseMetadata,
IngesterQueryRequest,
};
use iox_query::{
exec::{stringset::StringSet, IOxSessionContext},
chunk_statistics::{create_chunk_statistics, ColumnRanges},
util::compute_timenanosecond_min_max,
QueryChunk, QueryChunkData,
};
@ -941,16 +938,6 @@ impl QueryChunk for IngesterChunk {
true
}
fn column_values(
&self,
_ctx: IOxSessionContext,
_column_name: &str,
_predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError> {
// TODO maybe some special handling?
Ok(None)
}
fn data(&self) -> QueryChunkData {
QueryChunkData::RecordBatches(self.batches.clone())
}

View File

@ -18,7 +18,6 @@ use workspace_hack as _;
mod cache;
mod database;
mod df_stats;
mod ingester;
mod namespace;
mod parquet;

View File

@ -2,6 +2,7 @@
use data_types::{ChunkId, ChunkOrder, PartitionId};
use datafusion::physical_plan::Statistics;
use iox_query::chunk_statistics::{create_chunk_statistics, ColumnRanges};
use parquet_file::chunk::ParquetChunk;
use schema::sort::SortKey;
use std::sync::Arc;
@ -11,8 +12,6 @@ mod query_access;
pub use creation::ChunkAdapter;
use crate::df_stats::{create_chunk_statistics, ColumnRanges};
/// Immutable metadata attached to a [`QuerierParquetChunk`].
#[derive(Debug)]
pub struct QuerierParquetChunkMeta {

View File

@ -1,11 +1,7 @@
use crate::parquet::QuerierParquetChunk;
use data_types::{ChunkId, ChunkOrder, PartitionId};
use datafusion::{error::DataFusionError, physical_plan::Statistics};
use iox_query::{
exec::{stringset::StringSet, IOxSessionContext},
QueryChunk, QueryChunkData,
};
use predicate::Predicate;
use datafusion::physical_plan::Statistics;
use iox_query::{QueryChunk, QueryChunkData};
use schema::{sort::SortKey, Schema};
use std::{any::Any, sync::Arc};
@ -34,21 +30,6 @@ impl QueryChunk for QuerierParquetChunk {
false
}
fn column_values(
&self,
mut ctx: IOxSessionContext,
column_name: &str,
predicate: &Predicate,
) -> Result<Option<StringSet>, DataFusionError> {
ctx.set_metadata("column_name", column_name.to_string());
ctx.set_metadata("predicate", format!("{}", &predicate));
ctx.set_metadata("storage", "parquet");
// Since DataFusion can read Parquet, there is no advantage to
// manually implementing this vs just letting DataFusion do its thing
Ok(None)
}
fn data(&self) -> QueryChunkData {
QueryChunkData::Parquet(self.parquet_chunk.parquet_exec_input())
}

View File

@ -492,7 +492,6 @@ mod tests {
use super::*;
use crate::{
cache::test_util::{assert_cache_access_metric_count, assert_catalog_access_metric_count},
df_stats::ColumnRange,
ingester::{test_util::MockIngesterConnection, IngesterPartition},
table::test_util::{querier_table, IngesterPartitionBuilder},
};
@ -506,7 +505,7 @@ mod tests {
use generated_types::influxdata::iox::partition_template::v1::{
template_part::Part, PartitionTemplate, TemplatePart,
};
use iox_query::exec::IOxSessionContext;
use iox_query::{chunk_statistics::ColumnRange, exec::IOxSessionContext};
use iox_tests::{TestCatalog, TestParquetFileBuilder, TestTable};
use predicate::Predicate;
use schema::{builder::SchemaBuilder, InfluxFieldType, TIME_COLUMN_NAME};

View File

@ -1,11 +1,12 @@
use super::{PruneMetrics, QuerierTable, QuerierTableArgs};
use crate::{
cache::CatalogCache, create_ingester_connection_for_testing, df_stats::ColumnRanges,
parquet::ChunkAdapter, IngesterPartition,
cache::CatalogCache, create_ingester_connection_for_testing, parquet::ChunkAdapter,
IngesterPartition,
};
use arrow::record_batch::RecordBatch;
use data_types::ChunkId;
use iox_catalog::interface::{get_schema_by_name, SoftDeletedRows};
use iox_query::chunk_statistics::ColumnRanges;
use iox_tests::{TestCatalog, TestPartition, TestTable};
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
use schema::{Projection, Schema};

View File

@ -49,7 +49,7 @@ criterion = { version = "0.5", default-features = false, features = ["async_toki
influxdb-line-protocol = { path = "../influxdb_line_protocol" }
iox_tests = { path = "../iox_tests" }
once_cell = "1"
paste = "1.0.12"
paste = "1.0.13"
pretty_assertions = "1.3.0"
proptest = { version = "1.2.0", default-features = false }
rand = "0.8.3"

View File

@ -32,6 +32,14 @@ impl SchemaBuilder {
Self::default()
}
pub fn with_capacity(n: usize) -> Self {
Self {
measurement: Default::default(),
fields: Vec::with_capacity(n),
finished: Default::default(),
}
}
/// Add a new tag column to this schema. By default tags are
/// potentially nullable as they are not guaranteed to be present
/// for all rows

View File

@ -1,5 +1,3 @@
use std::sync::Arc;
use arrow::{datatypes::Field, record_batch::RecordBatch};
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
@ -44,7 +42,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
/// This is infallable because the schemas of chunks within a
/// partition are assumed to be compatible because that schema was
/// enforced as part of writing into the partition
pub fn merge_record_batch_schemas(batches: &[Arc<RecordBatch>]) -> Schema {
pub fn merge_record_batch_schemas(batches: &[RecordBatch]) -> Schema {
let mut merger = SchemaMerger::new();
for batch in batches {
let schema = Schema::try_from(batch.schema()).expect("Schema conversion error");
@ -172,6 +170,8 @@ impl<'a> SchemaMerger<'a> {
#[cfg(test)]
mod tests {
use std::sync::Arc;
use crate::builder::SchemaBuilder;
use crate::InfluxFieldType::Integer;

View File

@ -6,7 +6,7 @@ edition.workspace = true
license.workspace = true
[dependencies] # In alphabetical order
async-trait = "0.1.68"
async-trait = "0.1.70"
bytes = "1.4"
datafusion = { workspace = true }
iox_query = { path = "../iox_query" }

View File

@ -26,7 +26,7 @@ bytes = "1.4"
futures = "0.3"
prost = "0.11"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.99"
serde_json = "1.0.100"
snafu = "0.7"
tonic = { workspace = true }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

Some files were not shown because too many files have changed in this diff Show More