Merge branch 'main' into dom/sharded-cache
commit
ace76cef14
|
@ -104,7 +104,7 @@ jobs:
|
|||
command: find scripts -type f ! \( -iname '*.py' -or -iname '*.supp' \) -exec shellcheck {} +
|
||||
- run:
|
||||
name: Yamllint
|
||||
command: yamllint --strict .
|
||||
command: yamllint --config-file .circleci/yamllint.yml --strict .
|
||||
- cache_save
|
||||
cargo_audit:
|
||||
docker:
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
rules:
|
||||
truthy:
|
||||
check-keys: false
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
shopt -s nocasematch
|
||||
semantic_pattern='(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\([^)]+\))?: +[^ ]'
|
||||
|
||||
if [[ $1 == "test" ]]; then
|
||||
exit_code=0
|
||||
|
||||
echo checking strings that should be OK
|
||||
expect_ok="chore: foo
|
||||
chore(hello): foo
|
||||
CHORE: foo"
|
||||
while read -r s; do
|
||||
if [[ ! $s =~ $semantic_pattern ]]; then
|
||||
echo got FAIL, expected OK: "$s"
|
||||
exit_code=1
|
||||
fi
|
||||
done <<< "$expect_ok"
|
||||
|
||||
echo checking strings that should FAIL
|
||||
expect_fail="more: foo
|
||||
chore(: foo
|
||||
chore : foo
|
||||
chore:
|
||||
chore:
|
||||
chore:foo
|
||||
"
|
||||
while read -r s; do
|
||||
if [[ $s =~ $semantic_pattern ]]; then
|
||||
echo got OK, expected FAIL: "$s"
|
||||
exit_code=1
|
||||
fi
|
||||
done <<< "$expect_fail"
|
||||
|
||||
exit $exit_code
|
||||
fi
|
||||
|
||||
# nb: quotes are often not required around env var names between [[ and ]]
|
||||
if [[ -z $PR_TITLE || -z $COMMITS_URL ]]; then
|
||||
echo ::error::required env vars: PR_TITLE, COMMITS_URL
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit_code=0
|
||||
|
||||
if [[ ! $PR_TITLE =~ $semantic_pattern ]]; then
|
||||
echo ::error::PR title not semantic: "$PR_TITLE"
|
||||
exit_code=1
|
||||
else
|
||||
echo PR title OK: "$PR_TITLE"
|
||||
fi
|
||||
|
||||
json=$(curl --silent "$COMMITS_URL")
|
||||
commits=$(echo "$json" | jq --raw-output '.[] | [.sha, .commit.message] | join(" ") | split("\n") | first')
|
||||
|
||||
while read -r commit; do
|
||||
commit_title=$(echo "$commit" | cut -c 42-999)
|
||||
|
||||
if [[ ! $commit_title =~ $semantic_pattern ]]; then
|
||||
echo ::error::Commit title not semantic: "$commit"
|
||||
exit_code=1
|
||||
else
|
||||
echo Commit title OK: "$commit"
|
||||
fi
|
||||
done <<< "$commits"
|
||||
|
||||
exit $exit_code
|
|
@ -0,0 +1,20 @@
|
|||
---
|
||||
name: "Semantic PR and Commit Messages"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, reopened, synchronize, edited]
|
||||
|
||||
env:
|
||||
PR_TITLE: ${{ github.event.pull_request.title }}
|
||||
COMMITS_URL: ${{ github.event.pull_request.commits_url }}
|
||||
|
||||
jobs:
|
||||
main:
|
||||
name: Semantic PR and commit messages
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- run: bash .github/workflows/semantic_check.sh
|
|
@ -678,9 +678,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.3.1"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2209c310e29876f7f0b2721e7e26b84aff178aa3da5d091f9bfbf47669e60e3"
|
||||
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
@ -841,6 +841,51 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0d720b8683f8dd83c65155f0530560cba68cd2bf395f6513a483caee57ff7f4"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"darling_macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a340f241d2ceed1deb47ae36c4144b2707ec7dd0b649f894cb39bb595986324"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim 0.10.0",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72c41b3b7352feb3211a0d743dc5700a4e3b60f51bd2b368892d1e0f9a95f44b"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "4.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data_types"
|
||||
version = "0.1.0"
|
||||
|
@ -962,6 +1007,17 @@ dependencies = [
|
|||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derivative"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.12"
|
||||
|
@ -1269,9 +1325,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba3dda0b6588335f360afc675d0564c17a77a2bda81ca178a4b6081bd86c7f0b"
|
||||
checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
|
@ -1279,15 +1335,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0c8ff0461b82559810cdccfde3215c3f373807f5e5232b71479bff7bb2583d7"
|
||||
checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a"
|
||||
checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
|
@ -1307,15 +1363,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1f9d34af5a1aac6fb380f735fe510746c38067c5bf16c7fd250280503c971b2"
|
||||
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dbd947adfffb0efc70599b3ddcf7b5597bb5fa9e245eb99f62b3a5f7bb8bd3c"
|
||||
checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -1324,21 +1380,21 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3055baccb68d74ff6480350f8d6eb8fcfa3aa11bdc1a1ae3afdd0514617d508"
|
||||
checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ee7c6485c30167ce4dfb83ac568a849fe53274c831081476ee13e0dce1aad72"
|
||||
checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a"
|
||||
|
||||
[[package]]
|
||||
name = "futures-test"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e741bc851e1e90ad08901b329389ae77e02d5e9a0ec61955b80834630fbdc2f"
|
||||
checksum = "8c3e9379dbbfb35dd6df79e895d73c0f75558827fe68eb853b858ff417a8ee98"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
|
@ -1353,9 +1409,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.19"
|
||||
version = "0.3.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b5cf40b47a271f77a8b1bec03ca09044d99d2372c0de244e66430761127164"
|
||||
checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -1428,6 +1484,36 @@ version = "0.26.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4"
|
||||
|
||||
[[package]]
|
||||
name = "gitops_adapter"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_matches",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"clap 3.0.13",
|
||||
"dotenv",
|
||||
"futures",
|
||||
"glob",
|
||||
"k8s-openapi",
|
||||
"kube",
|
||||
"kube-derive",
|
||||
"kube-runtime",
|
||||
"parking_lot 0.11.2",
|
||||
"pbjson-build",
|
||||
"prost",
|
||||
"schemars",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"tracing",
|
||||
"trogging",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.0"
|
||||
|
@ -1690,6 +1776,12 @@ dependencies = [
|
|||
"tokio-native-tls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ident_case"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.2.3"
|
||||
|
@ -1929,6 +2021,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"arrow",
|
||||
"arrow_util",
|
||||
"async-trait",
|
||||
"base64 0.13.0",
|
||||
"bytes",
|
||||
"chrono",
|
||||
|
@ -2021,6 +2114,7 @@ dependencies = [
|
|||
"schema",
|
||||
"snafu",
|
||||
"sqlx",
|
||||
"test_helpers",
|
||||
"tokio",
|
||||
"uuid",
|
||||
"workspace-hack",
|
||||
|
@ -2129,6 +2223,28 @@ dependencies = [
|
|||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-patch"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f995a3c8f2bc3dd52a18a583e90f9ec109c047fa1603a853e46bcda14d2e279d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"treediff",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath_lib"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eaa63191d68230cccb81c5aa23abd53ed64d83337cacbb25a7b8c7979523774f"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonwebtoken"
|
||||
version = "7.2.0"
|
||||
|
@ -2136,13 +2252,126 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "afabcc15e437a6484fc4f12d0fd63068fe457bf93f1c148d3d9649c60b103f32"
|
||||
dependencies = [
|
||||
"base64 0.12.3",
|
||||
"pem",
|
||||
"pem 0.8.3",
|
||||
"ring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simple_asn1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "k8s-openapi"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f8de9873b904e74b3533f77493731ee26742418077503683db44e1b3c54aa5c"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"schemars",
|
||||
"serde",
|
||||
"serde-value",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kube"
|
||||
version = "0.64.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "84dcc2f8ca3f2427a72acc31fa9538159f6b33a97002e315a3fcd5323cf51a2b"
|
||||
dependencies = [
|
||||
"k8s-openapi",
|
||||
"kube-client",
|
||||
"kube-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kube-client"
|
||||
version = "0.64.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8957106140aa24a76de3f7d005966f381b30a4cd6a9c003b3bba6828e9617535"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"dirs-next",
|
||||
"either",
|
||||
"futures",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-timeout",
|
||||
"hyper-tls",
|
||||
"jsonpath_lib",
|
||||
"k8s-openapi",
|
||||
"kube-core",
|
||||
"openssl",
|
||||
"pem 1.0.2",
|
||||
"pin-project",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tokio-util",
|
||||
"tower",
|
||||
"tower-http",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kube-core"
|
||||
version = "0.64.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ec73e7d8e937dd055d962af06e635e262fdb6ed341c36ecf659d4fece0a8005"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"form_urlencoded",
|
||||
"http",
|
||||
"json-patch",
|
||||
"k8s-openapi",
|
||||
"once_cell",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kube-derive"
|
||||
version = "0.64.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6651bfae82bc23439da1099174b52bcbf68df065dc33317c912e3c5c5cea43c"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde_json",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kube-runtime"
|
||||
version = "0.64.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b090d3d7b43e2d60fa93ca51b19fe9f2e05a5252c97880fe834f8fa9f2de605"
|
||||
dependencies = [
|
||||
"dashmap",
|
||||
"derivative",
|
||||
"futures",
|
||||
"json-patch",
|
||||
"k8s-openapi",
|
||||
"kube-client",
|
||||
"pin-project",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smallvec",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
|
@ -2221,9 +2450,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.116"
|
||||
version = "0.2.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "565dbd88872dbe4cc8a46e527f26483c1d1f7afa6b884a3bd6cd893d4f98da74"
|
||||
checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
|
@ -2257,6 +2486,12 @@ dependencies = [
|
|||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linked-hash-map"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.0.37"
|
||||
|
@ -3149,6 +3384,15 @@ dependencies = [
|
|||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9a3b09a20e374558580a4914d3b7d89bd61b954a5a5e1dcbea98753addb1947"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.1.0"
|
||||
|
@ -4083,6 +4327,30 @@ dependencies = [
|
|||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schemars"
|
||||
version = "0.8.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6b5a3c80cea1ab61f4260238409510e814e38b4b563c06044edf91e7dc070e3"
|
||||
dependencies = [
|
||||
"dyn-clone",
|
||||
"schemars_derive",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schemars_derive"
|
||||
version = "0.8.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41ae4dce13e8614c46ac3c38ef1c0d668b101df6ac39817aebdaa26642ddae9b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde_derive_internals",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
|
@ -4137,6 +4405,16 @@ dependencies = [
|
|||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde-value"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
|
||||
dependencies = [
|
||||
"ordered-float 2.10.0",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde-xml-rs"
|
||||
version = "0.4.1"
|
||||
|
@ -4170,6 +4448,17 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive_internals"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1dbab34ca63057a1f15280bdf3c39f2b1eb1b54c17e98360e511637aef7418c6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.78"
|
||||
|
@ -4203,6 +4492,18 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.8.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"ryu",
|
||||
"serde",
|
||||
"yaml-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "server"
|
||||
version = "0.1.0"
|
||||
|
@ -4905,6 +5206,7 @@ dependencies = [
|
|||
"futures-sink",
|
||||
"log",
|
||||
"pin-project-lite",
|
||||
"slab",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
|
@ -5011,6 +5313,24 @@ dependencies = [
|
|||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-http"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81eca72647e58054bbfa41e6f297c23436f1c60aff6e5eb38455a0f9ca420bb5"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"pin-project",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-layer"
|
||||
version = "0.3.1"
|
||||
|
@ -5095,11 +5415,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.21"
|
||||
version = "0.1.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
|
||||
checksum = "03cfcb51380632a72d3111cb8d3447a8d908e577d31beeac006f836383d29a23"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -5171,6 +5492,15 @@ dependencies = [
|
|||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "treediff"
|
||||
version = "3.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "761e8d5ad7ce14bb82b7e61ccc0ca961005a275a060b9644a2431aa11553c2ff"
|
||||
dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "trogging"
|
||||
version = "0.1.0"
|
||||
|
@ -5276,6 +5606,12 @@ dependencies = [
|
|||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
|
@ -5669,6 +6005,15 @@ version = "0.8.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3"
|
||||
|
||||
[[package]]
|
||||
name = "yaml-rust"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
|
||||
dependencies = [
|
||||
"linked-hash-map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zeroize"
|
||||
version = "1.5.2"
|
||||
|
|
|
@ -9,6 +9,7 @@ members = [
|
|||
"db",
|
||||
"dml",
|
||||
"generated_types",
|
||||
"gitops_adapter",
|
||||
"grpc-router",
|
||||
"grpc-router-test-gen",
|
||||
"influxdb_iox",
|
||||
|
|
|
@ -14,11 +14,11 @@ use job_registry::JobRegistry;
|
|||
use metric::{Attributes, DurationCounter, Metric, U64Counter};
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
use predicate::{predicate::Predicate, rpc_predicate::QueryDatabaseMeta};
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate};
|
||||
use query::{
|
||||
provider::{ChunkPruner, ProviderBuilder},
|
||||
pruning::{prune_chunks, PruningObserver},
|
||||
QueryChunkMeta, QueryCompletedToken, QueryDatabase, DEFAULT_SCHEMA,
|
||||
QueryChunkMeta, QueryCompletedToken, QueryDatabase, QueryText, DEFAULT_SCHEMA,
|
||||
};
|
||||
use schema::Schema;
|
||||
use std::time::Instant;
|
||||
|
@ -27,7 +27,7 @@ use system_tables::{SystemSchemaProvider, SYSTEM_SCHEMA};
|
|||
use time::TimeProvider;
|
||||
|
||||
/// The number of entries to store in the circular query buffer log
|
||||
const QUERY_LOG_SIZE: usize = 100;
|
||||
const QUERY_LOG_SIZE: usize = 10_000;
|
||||
|
||||
/// Metrics related to chunk access (pruning specifically)
|
||||
#[derive(Debug)]
|
||||
|
@ -290,7 +290,7 @@ impl QueryDatabase for QueryCatalogAccess {
|
|||
fn record_query(
|
||||
&self,
|
||||
query_type: impl Into<String>,
|
||||
query_text: impl Into<String>,
|
||||
query_text: QueryText,
|
||||
) -> QueryCompletedToken<'_> {
|
||||
// When the query token is dropped the query entry's completion time
|
||||
// will be set.
|
||||
|
@ -398,7 +398,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::test_helpers::write_lp;
|
||||
use crate::utils::make_db;
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::PredicateBuilder;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_filtered_chunks() {
|
||||
|
|
|
@ -15,7 +15,7 @@ use mutable_buffer::snapshot::ChunkSnapshot;
|
|||
use observability_deps::tracing::debug;
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use partition_metadata::TableSummary;
|
||||
use predicate::predicate::{Predicate, PredicateMatch};
|
||||
use predicate::{Predicate, PredicateMatch};
|
||||
use query::{exec::stringset::StringSet, QueryChunk, QueryChunkMeta};
|
||||
use read_buffer::RBChunk;
|
||||
use schema::InfluxColumnType;
|
||||
|
|
|
@ -16,7 +16,7 @@ use crate::{
|
|||
};
|
||||
use ::lifecycle::select_persistable_chunks;
|
||||
pub use ::lifecycle::{LifecycleChunk, LockableChunk, LockablePartition};
|
||||
use ::write_buffer::core::WriteBufferReading;
|
||||
use ::write_buffer::core::{WriteBufferReading, WriteBufferStreamHandler};
|
||||
use async_trait::async_trait;
|
||||
use data_types::{
|
||||
chunk_metadata::{ChunkId, ChunkLifecycleAction, ChunkOrder, ChunkSummary},
|
||||
|
@ -42,10 +42,10 @@ use parquet_catalog::{
|
|||
prune::prune_history as prune_catalog_transaction_history,
|
||||
};
|
||||
use persistence_windows::{checkpoint::ReplayPlan, persistence_windows::PersistenceWindows};
|
||||
use predicate::{predicate::Predicate, rpc_predicate::QueryDatabaseMeta};
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate};
|
||||
use query::{
|
||||
exec::{ExecutionContextProvider, Executor, ExecutorType, IOxExecutionContext},
|
||||
QueryCompletedToken, QueryDatabase,
|
||||
QueryCompletedToken, QueryDatabase, QueryText,
|
||||
};
|
||||
use rand_distr::{Distribution, Poisson};
|
||||
use schema::selection::Selection;
|
||||
|
@ -53,7 +53,7 @@ use schema::Schema;
|
|||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use std::{
|
||||
any::Any,
|
||||
collections::{HashMap, HashSet},
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
sync::{
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
Arc,
|
||||
|
@ -112,6 +112,11 @@ pub enum Error {
|
|||
source: persistence_windows::checkpoint::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot setup write buffer: {}", source))]
|
||||
WriteBuffer {
|
||||
source: ::write_buffer::core::WriteBufferError,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot replay: {}", source))]
|
||||
ReplayError { source: crate::replay::Error },
|
||||
|
||||
|
@ -889,16 +894,23 @@ impl Db {
|
|||
pub async fn perform_replay(
|
||||
&self,
|
||||
replay_plan: Option<&ReplayPlan>,
|
||||
consumer: &mut dyn WriteBufferReading,
|
||||
) -> Result<()> {
|
||||
consumer: Arc<dyn WriteBufferReading>,
|
||||
) -> Result<BTreeMap<u32, Box<dyn WriteBufferStreamHandler>>> {
|
||||
use crate::replay::{perform_replay, seek_to_end};
|
||||
if let Some(replay_plan) = replay_plan {
|
||||
perform_replay(self, replay_plan, consumer)
|
||||
|
||||
let streams = consumer.stream_handlers().await.context(WriteBufferSnafu)?;
|
||||
|
||||
let streams = if let Some(replay_plan) = replay_plan {
|
||||
perform_replay(self, replay_plan, streams)
|
||||
.await
|
||||
.context(ReplaySnafu)
|
||||
.context(ReplaySnafu)?
|
||||
} else {
|
||||
seek_to_end(self, consumer).await.context(ReplaySnafu)
|
||||
}
|
||||
seek_to_end(self, consumer.as_ref(), streams)
|
||||
.await
|
||||
.context(ReplaySnafu)?
|
||||
};
|
||||
|
||||
Ok(streams)
|
||||
}
|
||||
|
||||
/// Background worker function
|
||||
|
@ -1218,7 +1230,7 @@ impl QueryDatabase for Db {
|
|||
fn record_query(
|
||||
&self,
|
||||
query_type: impl Into<String>,
|
||||
query_text: impl Into<String>,
|
||||
query_text: QueryText,
|
||||
) -> QueryCompletedToken<'_> {
|
||||
self.catalog_access.record_query(query_type, query_text)
|
||||
}
|
||||
|
|
|
@ -30,6 +30,9 @@ pub enum Error {
|
|||
#[snafu(context(false))]
|
||||
Aborted { source: futures::future::Aborted },
|
||||
|
||||
#[snafu(context(false))]
|
||||
Timeout { source: tokio::time::error::Elapsed },
|
||||
|
||||
#[snafu(display("Read Buffer Error in chunk {}{} : {}", chunk_id, table_name, source))]
|
||||
ReadBufferChunkError {
|
||||
source: read_buffer::Error,
|
||||
|
|
|
@ -14,6 +14,7 @@ use crate::{
|
|||
DbChunk,
|
||||
};
|
||||
use ::lifecycle::LifecycleWriteGuard;
|
||||
use data_types::error::ErrorLogger;
|
||||
use data_types::{chunk_metadata::ChunkLifecycleAction, job::Job};
|
||||
use observability_deps::tracing::{debug, warn};
|
||||
use parquet_catalog::interface::CatalogParquetInfo;
|
||||
|
@ -29,9 +30,13 @@ use persistence_windows::{
|
|||
use query::QueryChunk;
|
||||
use schema::selection::Selection;
|
||||
use snafu::ResultExt;
|
||||
use std::time::Duration;
|
||||
use std::{future::Future, sync::Arc};
|
||||
use tokio::time::timeout;
|
||||
use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt};
|
||||
|
||||
const TIMEOUT: Duration = Duration::from_secs(300);
|
||||
|
||||
/// The implementation for writing a chunk to the object store
|
||||
///
|
||||
/// `flush_handle` describes both what to persist and also acts as a transaction
|
||||
|
@ -111,7 +116,9 @@ pub(super) fn write_chunk_to_object_store(
|
|||
// catalog-level transaction for preservation layer
|
||||
{
|
||||
// fetch shared (= read) guard preventing the cleanup job from deleting our files
|
||||
let _guard = db.cleanup_lock.read().await;
|
||||
let _guard = timeout(TIMEOUT, db.cleanup_lock.read())
|
||||
.await
|
||||
.log_if_error("write chunk cleanup lock")?;
|
||||
|
||||
// Write this table data into the object store
|
||||
//
|
||||
|
@ -128,10 +135,14 @@ pub(super) fn write_chunk_to_object_store(
|
|||
time_of_last_write,
|
||||
chunk_order,
|
||||
};
|
||||
let written_result = storage
|
||||
.write_to_object_store(addr.clone(), stream, metadata)
|
||||
.await
|
||||
.context(WritingToObjectStoreSnafu)?;
|
||||
|
||||
let written_result = timeout(
|
||||
TIMEOUT,
|
||||
storage.write_to_object_store(addr.clone(), stream, metadata),
|
||||
)
|
||||
.await
|
||||
.log_if_error("write chunk to object store")?
|
||||
.context(WritingToObjectStoreSnafu)?;
|
||||
|
||||
// the stream was empty
|
||||
if written_result.is_none() {
|
||||
|
@ -160,13 +171,17 @@ pub(super) fn write_chunk_to_object_store(
|
|||
//
|
||||
// This ensures that any deletes encountered during or prior to the replay window
|
||||
// must have been made durable within the catalog for any persisted chunks
|
||||
let delete_handle = db.delete_predicates_mailbox.consume().await;
|
||||
let delete_handle = timeout(TIMEOUT, db.delete_predicates_mailbox.consume())
|
||||
.await
|
||||
.log_if_error("delete handle")?;
|
||||
|
||||
// IMPORTANT: Start transaction AFTER writing the actual parquet file so we do not hold
|
||||
// the transaction lock (that is part of the PreservedCatalog) for too long.
|
||||
// By using the cleanup lock (see above) it is ensured that the file that we
|
||||
// have written is not deleted in between.
|
||||
let mut transaction = db.preserved_catalog.open_transaction().await;
|
||||
let mut transaction = timeout(TIMEOUT, db.preserved_catalog.open_transaction())
|
||||
.await
|
||||
.log_if_error("preserved catalog transaction")?;
|
||||
|
||||
// add parquet file
|
||||
let info = CatalogParquetInfo {
|
||||
|
@ -194,7 +209,10 @@ pub(super) fn write_chunk_to_object_store(
|
|||
}
|
||||
|
||||
// preserved commit
|
||||
let ckpt_handle = transaction.commit().await.context(CommitSnafu)?;
|
||||
let ckpt_handle = timeout(TIMEOUT, transaction.commit())
|
||||
.await
|
||||
.log_if_error("preserved catalog commit")?
|
||||
.context(CommitSnafu)?;
|
||||
|
||||
// Deletes persisted correctly
|
||||
delete_handle.flush();
|
||||
|
@ -216,10 +234,14 @@ pub(super) fn write_chunk_to_object_store(
|
|||
// NOTE: There can only be a single transaction in this section because the checkpoint handle holds
|
||||
// transaction lock. Therefore we don't need to worry about concurrent modifications of
|
||||
// preserved chunks.
|
||||
if let Err(e) = ckpt_handle
|
||||
.create_checkpoint(checkpoint_data_from_catalog(&db.catalog))
|
||||
.await
|
||||
{
|
||||
let checkpoint_result = timeout(
|
||||
TIMEOUT,
|
||||
ckpt_handle.create_checkpoint(checkpoint_data_from_catalog(&db.catalog)),
|
||||
)
|
||||
.await
|
||||
.log_if_error("create checkpoint")?;
|
||||
|
||||
if let Err(e) = checkpoint_result {
|
||||
warn!(%e, "cannot create catalog checkpoint");
|
||||
|
||||
// That's somewhat OK. Don't fail the entire task, because the actual preservation was completed
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use predicate::predicate::Predicate;
|
||||
use predicate::Predicate;
|
||||
use snafu::Snafu;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
|
@ -55,7 +55,7 @@ pub mod test {
|
|||
use datafusion::logical_plan::{col, lit, Expr};
|
||||
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::PredicateBuilder;
|
||||
use read_buffer::BinaryExpr as RBBinaryExpr;
|
||||
use read_buffer::Predicate as RBPredicate;
|
||||
|
||||
|
|
|
@ -7,19 +7,19 @@ use std::{
|
|||
};
|
||||
|
||||
use parking_lot::Mutex;
|
||||
use query::QueryText;
|
||||
use time::{Time, TimeProvider};
|
||||
|
||||
// The query duration used for queries still running.
|
||||
const UNCOMPLETED_DURATION: i64 = -1;
|
||||
|
||||
/// Information about a single query that was executed
|
||||
#[derive(Debug)]
|
||||
pub struct QueryLogEntry {
|
||||
/// The type of query
|
||||
pub query_type: String,
|
||||
|
||||
/// The text of the query (SQL for sql queries, pbjson for storage rpc queries)
|
||||
pub query_text: String,
|
||||
pub query_text: QueryText,
|
||||
|
||||
/// Time at which the query was run
|
||||
pub issue_time: Time,
|
||||
|
@ -29,9 +29,20 @@ pub struct QueryLogEntry {
|
|||
query_completed_duration: atomic::AtomicI64,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for QueryLogEntry {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("QueryLogEntry")
|
||||
.field("query_type", &self.query_type)
|
||||
.field("query_text", &self.query_text.to_string())
|
||||
.field("issue_time", &self.issue_time)
|
||||
.field("query_completed_duration", &self.query_completed_duration)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryLogEntry {
|
||||
/// Creates a new QueryLogEntry -- use `QueryLog::push` to add new entries to the log
|
||||
fn new(query_type: String, query_text: String, issue_time: Time) -> Self {
|
||||
fn new(query_type: String, query_text: QueryText, issue_time: Time) -> Self {
|
||||
Self {
|
||||
query_type,
|
||||
query_text,
|
||||
|
@ -77,14 +88,10 @@ impl QueryLog {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn push(
|
||||
&self,
|
||||
query_type: impl Into<String>,
|
||||
query_text: impl Into<String>,
|
||||
) -> Arc<QueryLogEntry> {
|
||||
pub fn push(&self, query_type: impl Into<String>, query_text: QueryText) -> Arc<QueryLogEntry> {
|
||||
let entry = Arc::new(QueryLogEntry::new(
|
||||
query_type.into(),
|
||||
query_text.into(),
|
||||
query_text,
|
||||
self.time_provider.now(),
|
||||
));
|
||||
|
||||
|
@ -126,7 +133,7 @@ mod test_super {
|
|||
|
||||
let entry = Arc::new(QueryLogEntry::new(
|
||||
"sql".into(),
|
||||
"SELECT 1".into(),
|
||||
Box::new("SELECT 1"),
|
||||
time_provider.now(),
|
||||
));
|
||||
// query has not completed
|
||||
|
|
176
db/src/replay.rs
176
db/src/replay.rs
|
@ -20,7 +20,7 @@ use std::{
|
|||
time::Duration,
|
||||
};
|
||||
use time::Time;
|
||||
use write_buffer::core::WriteBufferReading;
|
||||
use write_buffer::core::{WriteBufferReading, WriteBufferStreamHandler};
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(Debug, Snafu)]
|
||||
|
@ -85,22 +85,34 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|||
/// operation fails. In that case some of the sequencers in the write buffers might already be seeked and others not.
|
||||
/// The caller must NOT use the write buffer in that case without ensuring that it is put into some proper state, e.g.
|
||||
/// by retrying this function.
|
||||
pub async fn seek_to_end(db: &Db, write_buffer: &mut dyn WriteBufferReading) -> Result<()> {
|
||||
let mut watermarks = vec![];
|
||||
for (sequencer_id, stream) in write_buffer.streams() {
|
||||
let watermark = (stream.fetch_high_watermark)()
|
||||
.await
|
||||
.context(SeekSnafu { sequencer_id })?;
|
||||
watermarks.push((sequencer_id, watermark));
|
||||
}
|
||||
pub async fn seek_to_end(
|
||||
db: &Db,
|
||||
write_buffer: &dyn WriteBufferReading,
|
||||
write_buffer_streams: BTreeMap<u32, Box<dyn WriteBufferStreamHandler>>,
|
||||
) -> Result<BTreeMap<u32, Box<dyn WriteBufferStreamHandler>>> {
|
||||
// need to convert the btree into a vec because the btree iterator is not `Send`
|
||||
let write_buffer_streams: Vec<_> = write_buffer_streams.into_iter().collect();
|
||||
|
||||
for (sequencer_id, watermark) in &watermarks {
|
||||
write_buffer
|
||||
.seek(*sequencer_id, *watermark)
|
||||
let mut watermarks = vec![];
|
||||
for (sequencer_id, _handler) in &write_buffer_streams {
|
||||
let watermark = write_buffer
|
||||
.fetch_high_watermark(*sequencer_id)
|
||||
.await
|
||||
.context(SeekSnafu {
|
||||
sequencer_id: *sequencer_id,
|
||||
})?;
|
||||
watermarks.push((*sequencer_id, watermark));
|
||||
}
|
||||
|
||||
let mut write_buffer_streams_res = BTreeMap::new();
|
||||
for ((sequencer_id, watermark), (sequencer_id_2, mut handler)) in
|
||||
watermarks.iter().zip(write_buffer_streams)
|
||||
{
|
||||
assert_eq!(*sequencer_id, sequencer_id_2);
|
||||
handler.seek(*watermark).await.context(SeekSnafu {
|
||||
sequencer_id: *sequencer_id,
|
||||
})?;
|
||||
write_buffer_streams_res.insert(*sequencer_id, handler);
|
||||
}
|
||||
|
||||
// remember max seen sequence numbers
|
||||
|
@ -142,24 +154,20 @@ pub async fn seek_to_end(db: &Db, write_buffer: &mut dyn WriteBufferReading) ->
|
|||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(write_buffer_streams_res)
|
||||
}
|
||||
|
||||
/// Perform sequencer-driven replay for this DB.
|
||||
pub async fn perform_replay(
|
||||
db: &Db,
|
||||
replay_plan: &ReplayPlan,
|
||||
write_buffer: &mut dyn WriteBufferReading,
|
||||
) -> Result<()> {
|
||||
write_buffer_streams: BTreeMap<u32, Box<dyn WriteBufferStreamHandler>>,
|
||||
) -> Result<BTreeMap<u32, Box<dyn WriteBufferStreamHandler>>> {
|
||||
let db_name = db.rules.read().db_name().to_string();
|
||||
info!(%db_name, "starting replay");
|
||||
|
||||
// check if write buffer and replay plan agree on the set of sequencer ids
|
||||
let sequencer_ids: BTreeSet<_> = write_buffer
|
||||
.streams()
|
||||
.into_iter()
|
||||
.map(|(sequencer_id, _stream)| sequencer_id)
|
||||
.collect();
|
||||
let sequencer_ids: BTreeSet<_> = write_buffer_streams.keys().copied().collect();
|
||||
for sequencer_id in replay_plan.sequencer_ids() {
|
||||
if !sequencer_ids.contains(&sequencer_id) {
|
||||
return Err(Error::UnknownSequencer {
|
||||
|
@ -179,31 +187,30 @@ pub async fn perform_replay(
|
|||
})
|
||||
.collect();
|
||||
|
||||
// need to convert the btree into a vec because the btree iterator is not `Send`
|
||||
let mut write_buffer_streams: Vec<_> = write_buffer_streams.into_iter().collect();
|
||||
|
||||
// seek write buffer according to the plan
|
||||
for (sequencer_id, min_max) in &replay_ranges {
|
||||
if let Some(min) = min_max.min() {
|
||||
info!(%db_name, sequencer_id, sequence_number=min, "seek sequencer in preperation for replay");
|
||||
write_buffer
|
||||
.seek(*sequencer_id, min)
|
||||
.await
|
||||
.context(SeekSnafu {
|
||||
for (sequencer_id, handler) in write_buffer_streams.iter_mut() {
|
||||
if let Some(min_max) = replay_ranges.get(sequencer_id) {
|
||||
if let Some(min) = min_max.min() {
|
||||
info!(%db_name, sequencer_id, sequence_number=min, "seek sequencer in preperation for replay");
|
||||
handler.seek(min).await.context(SeekSnafu {
|
||||
sequencer_id: *sequencer_id,
|
||||
})?;
|
||||
} else {
|
||||
let sequence_number = min_max.max() + 1;
|
||||
info!(%db_name, sequencer_id, sequence_number, "seek sequencer that did not require replay");
|
||||
write_buffer
|
||||
.seek(*sequencer_id, sequence_number)
|
||||
.await
|
||||
.context(SeekSnafu {
|
||||
} else {
|
||||
let sequence_number = min_max.max() + 1;
|
||||
info!(%db_name, sequencer_id, sequence_number, "seek sequencer that did not require replay");
|
||||
handler.seek(sequence_number).await.context(SeekSnafu {
|
||||
sequencer_id: *sequencer_id,
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// replay ranges
|
||||
for (sequencer_id, mut stream) in write_buffer.streams() {
|
||||
if let Some(min_max) = replay_ranges.get(&sequencer_id) {
|
||||
for (sequencer_id, handler) in write_buffer_streams.iter_mut() {
|
||||
if let Some(min_max) = replay_ranges.get(sequencer_id) {
|
||||
if min_max.min().is_none() {
|
||||
// no replay required
|
||||
continue;
|
||||
|
@ -216,19 +223,17 @@ pub async fn perform_replay(
|
|||
"replay sequencer",
|
||||
);
|
||||
|
||||
while let Some(dml_operation) = stream
|
||||
.stream
|
||||
.try_next()
|
||||
.await
|
||||
.context(EntrySnafu { sequencer_id })?
|
||||
{
|
||||
let mut stream = handler.stream();
|
||||
while let Some(dml_operation) = stream.try_next().await.context(EntrySnafu {
|
||||
sequencer_id: *sequencer_id,
|
||||
})? {
|
||||
let sequence = *dml_operation
|
||||
.meta()
|
||||
.sequence()
|
||||
.expect("entry must be sequenced");
|
||||
if sequence.number > min_max.max() {
|
||||
return Err(Error::EntryLostError {
|
||||
sequencer_id,
|
||||
sequencer_id: *sequencer_id,
|
||||
actual_sequence_number: sequence.number,
|
||||
expected_sequence_number: min_max.max(),
|
||||
});
|
||||
|
@ -253,6 +258,7 @@ pub async fn perform_replay(
|
|||
}
|
||||
Err(crate::DmlError::HardLimitReached {}) if n_try < n_tries => {
|
||||
if !logged_hard_limit {
|
||||
let sequencer_id: u32 = *sequencer_id;
|
||||
info!(
|
||||
%db_name,
|
||||
sequencer_id,
|
||||
|
@ -313,7 +319,7 @@ pub async fn perform_replay(
|
|||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(write_buffer_streams.into_iter().collect())
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
|
@ -610,8 +616,12 @@ mod tests {
|
|||
|
||||
let mut lifecycle = LifecycleWorker::new(Arc::clone(&test_db.db));
|
||||
|
||||
let write_buffer =
|
||||
Arc::new(MockBufferForReading::new(write_buffer_state.clone(), None).unwrap());
|
||||
let streams = write_buffer.stream_handlers().await.unwrap();
|
||||
let mut maybe_consumer = Some(WriteBufferConsumer::new(
|
||||
Box::new(MockBufferForReading::new(write_buffer_state.clone(), None).unwrap()),
|
||||
write_buffer,
|
||||
streams,
|
||||
Arc::clone(&test_db.db),
|
||||
®istry,
|
||||
));
|
||||
|
@ -664,16 +674,17 @@ mod tests {
|
|||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let mut write_buffer =
|
||||
MockBufferForReading::new(write_buffer_state.clone(), None).unwrap();
|
||||
let write_buffer: Arc<dyn WriteBufferReading> = Arc::new(
|
||||
MockBufferForReading::new(write_buffer_state.clone(), None).unwrap(),
|
||||
);
|
||||
|
||||
test_db
|
||||
let streams = test_db
|
||||
.db
|
||||
.perform_replay(replay_plan, &mut write_buffer)
|
||||
.perform_replay(replay_plan, Arc::clone(&write_buffer))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
maybe_write_buffer = Some(write_buffer);
|
||||
maybe_write_buffer = Some((write_buffer, streams));
|
||||
}
|
||||
Step::Persist(partitions) => {
|
||||
let db = &test_db.db;
|
||||
|
@ -736,13 +747,20 @@ mod tests {
|
|||
}
|
||||
Step::Await(checks) => {
|
||||
if maybe_consumer.is_none() {
|
||||
let write_buffer = match maybe_write_buffer.take() {
|
||||
Some(write_buffer) => write_buffer,
|
||||
None => MockBufferForReading::new(write_buffer_state.clone(), None)
|
||||
.unwrap(),
|
||||
let (write_buffer, streams) = match maybe_write_buffer.take() {
|
||||
Some(x) => x,
|
||||
None => {
|
||||
let write_buffer: Arc<dyn WriteBufferReading> = Arc::new(
|
||||
MockBufferForReading::new(write_buffer_state.clone(), None)
|
||||
.unwrap(),
|
||||
);
|
||||
let streams = write_buffer.stream_handlers().await.unwrap();
|
||||
(write_buffer, streams)
|
||||
}
|
||||
};
|
||||
maybe_consumer = Some(WriteBufferConsumer::new(
|
||||
Box::new(write_buffer),
|
||||
write_buffer,
|
||||
streams,
|
||||
Arc::clone(&test_db.db),
|
||||
®istry,
|
||||
));
|
||||
|
@ -981,6 +999,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_ok_two_partitions_persist_second() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// acts as regression test for the following PRs:
|
||||
// - https://github.com/influxdata/influxdb_iox/pull/2079
|
||||
// - https://github.com/influxdata/influxdb_iox/pull/2084
|
||||
|
@ -1087,6 +1106,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_ok_two_partitions_persist_first() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// acts as regression test for the following PRs:
|
||||
// - https://github.com/influxdata/influxdb_iox/pull/2079
|
||||
// - https://github.com/influxdata/influxdb_iox/pull/2084
|
||||
|
@ -1193,6 +1213,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_ok_nothing_to_replay() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Restart,
|
||||
|
@ -1227,6 +1248,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_ok_different_sequencer_situations() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// three sequencers:
|
||||
// 0: no data at all
|
||||
// 1: replay required, additional incoming data during downtime
|
||||
|
@ -1338,6 +1360,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_ok_interleaved_writes() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
// let's ingest some data for two partitions a and b
|
||||
|
@ -1581,6 +1604,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_compacts() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let tracing_capture = TracingCapture::new();
|
||||
|
||||
// these numbers are handtuned to trigger hard buffer limits w/o making the test too big
|
||||
|
@ -1635,6 +1659,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_prune_full_partition() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// there the following entries:
|
||||
//
|
||||
// 0. table 2, partition a:
|
||||
|
@ -1723,6 +1748,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_prune_some_sequences_partition() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// there the following entries:
|
||||
//
|
||||
// 0. table 2, partition a:
|
||||
|
@ -1814,6 +1840,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_prune_rows() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Ingest(vec![
|
||||
|
@ -1923,6 +1950,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_works_with_checkpoints_all_full_persisted_1() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
catalog_transactions_until_checkpoint: NonZeroU64::new(2).unwrap(),
|
||||
steps: vec![
|
||||
|
@ -1962,6 +1990,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_works_with_checkpoints_all_full_persisted_2() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// try to provoke an catalog checkpoints that lists database checkpoints in the wrong order
|
||||
ReplayTest {
|
||||
catalog_transactions_until_checkpoint: NonZeroU64::new(2).unwrap(),
|
||||
|
@ -2050,6 +2079,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_works_partially_persisted_1() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// regression test for https://github.com/influxdata/influxdb_iox/issues/2185
|
||||
let tracing_capture = TracingCapture::new();
|
||||
|
||||
|
@ -2121,6 +2151,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_works_partially_persisted_2() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// regression test for https://github.com/influxdata/influxdb_iox/issues/2185
|
||||
let tracing_capture = TracingCapture::new();
|
||||
|
||||
|
@ -2202,6 +2233,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_works_after_skip() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let tracing_capture = TracingCapture::new();
|
||||
|
||||
ReplayTest {
|
||||
|
@ -2272,6 +2304,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_initializes_max_seen_sequence_numbers() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// Ensures that either replay or the catalog loading initializes the maximum seen sequence numbers (per
|
||||
// partition) correctly. Before this test (and its fix), sequence numbers were only written if there was any
|
||||
// unpersisted range during replay.
|
||||
|
@ -2402,6 +2435,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn skip_replay_initializes_max_seen_sequence_numbers() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// Similar case to `replay_initializes_max_seen_sequence_numbers` but instead of replaying, we skip replay to
|
||||
// provoke a similar outcome.
|
||||
//
|
||||
|
@ -2528,6 +2562,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_after_drop() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Ingest(vec![
|
||||
|
@ -2630,6 +2665,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_delete() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
|
@ -2696,6 +2732,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_delete_persisted_chunks() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
Step::Ingest(vec![TestSequencedEntry {
|
||||
|
@ -2751,6 +2788,7 @@ mod tests {
|
|||
// This test replay compact os chunks with deletes and duplicates
|
||||
#[tokio::test]
|
||||
async fn replay_delete_compact_os_chunks() {
|
||||
test_helpers::maybe_start_logging();
|
||||
ReplayTest {
|
||||
steps: vec![
|
||||
// --------------------------
|
||||
|
@ -2913,10 +2951,12 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_fail_sequencers_change() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// create write buffer w/ sequencer 0 and 1
|
||||
let write_buffer_state =
|
||||
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(2).unwrap());
|
||||
let mut write_buffer = MockBufferForReading::new(write_buffer_state, None).unwrap();
|
||||
let write_buffer: Arc<dyn WriteBufferReading> =
|
||||
Arc::new(MockBufferForReading::new(write_buffer_state, None).unwrap());
|
||||
|
||||
// create DB
|
||||
let db = TestDb::builder().build().await.db;
|
||||
|
@ -2940,9 +2980,7 @@ mod tests {
|
|||
let replay_plan = replay_planner.build().unwrap();
|
||||
|
||||
// replay fails
|
||||
let res = db
|
||||
.perform_replay(Some(&replay_plan), &mut write_buffer)
|
||||
.await;
|
||||
let res = db.perform_replay(Some(&replay_plan), write_buffer).await;
|
||||
assert_contains!(
|
||||
res.unwrap_err().to_string(),
|
||||
"Replay plan references unknown sequencer"
|
||||
|
@ -2951,12 +2989,14 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn replay_fail_lost_entry() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// create write buffer state with sequence number 0 and 2, 1 is missing
|
||||
let write_buffer_state =
|
||||
MockBufferSharedState::empty_with_n_sequencers(NonZeroU32::try_from(1).unwrap());
|
||||
write_buffer_state.push_lp(Sequence::new(0, 0), "cpu bar=1 0");
|
||||
write_buffer_state.push_lp(Sequence::new(0, 2), "cpu bar=1 10");
|
||||
let mut write_buffer = MockBufferForReading::new(write_buffer_state, None).unwrap();
|
||||
let write_buffer: Arc<dyn WriteBufferReading> =
|
||||
Arc::new(MockBufferForReading::new(write_buffer_state, None).unwrap());
|
||||
|
||||
// create DB
|
||||
let db = TestDb::builder().build().await.db;
|
||||
|
@ -2979,9 +3019,7 @@ mod tests {
|
|||
let replay_plan = replay_planner.build().unwrap();
|
||||
|
||||
// replay fails
|
||||
let res = db
|
||||
.perform_replay(Some(&replay_plan), &mut write_buffer)
|
||||
.await;
|
||||
let res = db.perform_replay(Some(&replay_plan), write_buffer).await;
|
||||
assert_contains!(
|
||||
res.unwrap_err().to_string(),
|
||||
"Cannot replay: For sequencer 0 expected to find sequence 1 but replay jumped to 2"
|
||||
|
@ -2990,6 +3028,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn seek_to_end_works() {
|
||||
test_helpers::maybe_start_logging();
|
||||
// setup watermarks:
|
||||
// 0 -> 3 + 1 = 4
|
||||
// 1 -> 1 + 1 = 2
|
||||
|
@ -2999,14 +3038,18 @@ mod tests {
|
|||
write_buffer_state.push_lp(Sequence::new(0, 0), "cpu bar=0 0");
|
||||
write_buffer_state.push_lp(Sequence::new(0, 3), "cpu bar=3 3");
|
||||
write_buffer_state.push_lp(Sequence::new(1, 1), "cpu bar=11 11");
|
||||
let mut write_buffer = MockBufferForReading::new(write_buffer_state.clone(), None).unwrap();
|
||||
let write_buffer: Arc<dyn WriteBufferReading> =
|
||||
Arc::new(MockBufferForReading::new(write_buffer_state.clone(), None).unwrap());
|
||||
|
||||
// create DB
|
||||
let test_db = TestDb::builder().build().await;
|
||||
let db = &test_db.db;
|
||||
|
||||
// seek
|
||||
db.perform_replay(None, &mut write_buffer).await.unwrap();
|
||||
let streams = db
|
||||
.perform_replay(None, Arc::clone(&write_buffer))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// add more data
|
||||
write_buffer_state.push_lp(Sequence::new(0, 4), "cpu bar=4 4");
|
||||
|
@ -3021,7 +3064,7 @@ mod tests {
|
|||
tokio::spawn(async move { db_captured.background_worker(shutdown_captured).await });
|
||||
|
||||
let consumer =
|
||||
WriteBufferConsumer::new(Box::new(write_buffer), Arc::clone(db), &Default::default());
|
||||
WriteBufferConsumer::new(write_buffer, streams, Arc::clone(db), &Default::default());
|
||||
|
||||
// wait until checks pass
|
||||
let checks = vec![Check::Query(
|
||||
|
@ -3040,6 +3083,7 @@ mod tests {
|
|||
loop {
|
||||
println!("Try checks...");
|
||||
if ReplayTest::eval_checks(&checks, false, &test_db).await {
|
||||
println!("checks passed...");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,19 +8,19 @@
|
|||
//! For example `SELECT * FROM system.chunks`
|
||||
|
||||
use super::{catalog::Catalog, query_log::QueryLog};
|
||||
use arrow::{
|
||||
datatypes::{Field, Schema, SchemaRef},
|
||||
error::Result,
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use arrow::{datatypes::SchemaRef, error::Result, record_batch::RecordBatch};
|
||||
use async_trait::async_trait;
|
||||
use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
use datafusion::physical_plan::{
|
||||
Partitioning, RecordBatchStream, SendableRecordBatchStream, Statistics,
|
||||
};
|
||||
use datafusion::{
|
||||
catalog::schema::SchemaProvider,
|
||||
datasource::TableProvider,
|
||||
error::{DataFusionError, Result as DataFusionResult},
|
||||
physical_plan::{memory::MemoryExec, ExecutionPlan},
|
||||
catalog::schema::SchemaProvider, datasource::TableProvider, error::Result as DataFusionResult,
|
||||
physical_plan::ExecutionPlan,
|
||||
};
|
||||
use job_registry::JobRegistry;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
mod chunks;
|
||||
|
@ -65,22 +65,22 @@ impl SystemSchemaProvider {
|
|||
) -> Self {
|
||||
let db_name = db_name.into();
|
||||
let chunks = Arc::new(SystemTableProvider {
|
||||
inner: chunks::ChunksTable::new(Arc::clone(&catalog)),
|
||||
table: Arc::new(chunks::ChunksTable::new(Arc::clone(&catalog))),
|
||||
});
|
||||
let columns = Arc::new(SystemTableProvider {
|
||||
inner: columns::ColumnsTable::new(Arc::clone(&catalog)),
|
||||
table: Arc::new(columns::ColumnsTable::new(Arc::clone(&catalog))),
|
||||
});
|
||||
let chunk_columns = Arc::new(SystemTableProvider {
|
||||
inner: columns::ChunkColumnsTable::new(Arc::clone(&catalog)),
|
||||
table: Arc::new(columns::ChunkColumnsTable::new(Arc::clone(&catalog))),
|
||||
});
|
||||
let operations = Arc::new(SystemTableProvider {
|
||||
inner: operations::OperationsTable::new(db_name, jobs),
|
||||
table: Arc::new(operations::OperationsTable::new(db_name, jobs)),
|
||||
});
|
||||
let persistence_windows = Arc::new(SystemTableProvider {
|
||||
inner: persistence::PersistenceWindowsTable::new(catalog),
|
||||
table: Arc::new(persistence::PersistenceWindowsTable::new(catalog)),
|
||||
});
|
||||
let queries = Arc::new(SystemTableProvider {
|
||||
inner: queries::QueriesTable::new(query_log),
|
||||
table: Arc::new(queries::QueriesTable::new(query_log)),
|
||||
});
|
||||
Self {
|
||||
chunks,
|
||||
|
@ -133,21 +133,20 @@ impl SchemaProvider for SystemSchemaProvider {
|
|||
}
|
||||
}
|
||||
|
||||
type BatchIterator = Box<dyn Iterator<Item = Result<RecordBatch>> + Send + Sync>;
|
||||
|
||||
/// The minimal thing that a system table needs to implement
|
||||
trait IoxSystemTable: Send + Sync {
|
||||
/// Produce the schema from this system table
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
/// Get the contents of the system table as a single RecordBatch
|
||||
fn batch(&self) -> Result<RecordBatch>;
|
||||
/// Get the contents of the system table
|
||||
fn scan(&self, batch_size: usize) -> Result<BatchIterator>;
|
||||
}
|
||||
|
||||
/// Adapter that makes any `IoxSystemTable` a DataFusion `TableProvider`
|
||||
struct SystemTableProvider<T>
|
||||
where
|
||||
T: IoxSystemTable,
|
||||
{
|
||||
inner: T,
|
||||
struct SystemTableProvider<T: IoxSystemTable> {
|
||||
table: Arc<T>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
@ -160,7 +159,7 @@ where
|
|||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.inner.schema()
|
||||
self.table.schema()
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
|
@ -170,134 +169,97 @@ where
|
|||
_filters: &[datafusion::logical_plan::Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
||||
scan_batch(self.inner.batch()?, self.schema(), projection.as_ref())
|
||||
let schema = self.table.schema();
|
||||
let projected_schema = match projection.as_ref() {
|
||||
Some(projection) => Arc::new(schema.project(projection)?),
|
||||
None => schema,
|
||||
};
|
||||
|
||||
Ok(Arc::new(SystemTableExecutionPlan {
|
||||
table: Arc::clone(&self.table),
|
||||
projection: projection.clone(),
|
||||
projected_schema,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a DataFusion ExecutionPlan node that scans a single batch
|
||||
/// of records.
|
||||
fn scan_batch(
|
||||
batch: RecordBatch,
|
||||
schema: SchemaRef,
|
||||
projection: Option<&Vec<usize>>,
|
||||
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
||||
// apply projection, if any
|
||||
let (schema, batch) = match projection {
|
||||
None => (schema, batch),
|
||||
Some(projection) => {
|
||||
let projected_columns: DataFusionResult<Vec<Field>> = projection
|
||||
.iter()
|
||||
.map(|i| {
|
||||
if *i < schema.fields().len() {
|
||||
Ok(schema.field(*i).clone())
|
||||
} else {
|
||||
Err(DataFusionError::Internal(format!(
|
||||
"Projection index out of range in ChunksProvider: {}",
|
||||
i
|
||||
)))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let projected_schema = Arc::new(Schema::new(projected_columns?));
|
||||
|
||||
let columns = projection
|
||||
.iter()
|
||||
.map(|i| Arc::clone(batch.column(*i)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let projected_batch = RecordBatch::try_new(Arc::clone(&projected_schema), columns)?;
|
||||
(projected_schema, projected_batch)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?))
|
||||
struct SystemTableExecutionPlan<T> {
|
||||
table: Arc<T>,
|
||||
projected_schema: SchemaRef,
|
||||
projection: Option<Vec<usize>>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use arrow::array::{ArrayRef, UInt64Array};
|
||||
use arrow_util::assert_batches_eq;
|
||||
use datafusion_util::test_collect;
|
||||
|
||||
fn seq_array(start: u64, end: u64) -> ArrayRef {
|
||||
Arc::new(UInt64Array::from_iter_values(start..end))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scan_batch_no_projection() {
|
||||
let batch = RecordBatch::try_from_iter(vec![
|
||||
("col1", seq_array(0, 3)),
|
||||
("col2", seq_array(1, 4)),
|
||||
("col3", seq_array(2, 5)),
|
||||
("col4", seq_array(3, 6)),
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let projection = None;
|
||||
let scan = scan_batch(batch.clone(), batch.schema(), projection).unwrap();
|
||||
let collected = test_collect(scan).await;
|
||||
|
||||
let expected = vec![
|
||||
"+------+------+------+------+",
|
||||
"| col1 | col2 | col3 | col4 |",
|
||||
"+------+------+------+------+",
|
||||
"| 0 | 1 | 2 | 3 |",
|
||||
"| 1 | 2 | 3 | 4 |",
|
||||
"| 2 | 3 | 4 | 5 |",
|
||||
"+------+------+------+------+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(&expected, &collected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scan_batch_good_projection() {
|
||||
let batch = RecordBatch::try_from_iter(vec![
|
||||
("col1", seq_array(0, 3)),
|
||||
("col2", seq_array(1, 4)),
|
||||
("col3", seq_array(2, 5)),
|
||||
("col4", seq_array(3, 6)),
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let projection = Some(vec![3, 1]);
|
||||
let scan = scan_batch(batch.clone(), batch.schema(), projection.as_ref()).unwrap();
|
||||
let collected = test_collect(scan).await;
|
||||
|
||||
let expected = vec![
|
||||
"+------+------+",
|
||||
"| col4 | col2 |",
|
||||
"+------+------+",
|
||||
"| 3 | 1 |",
|
||||
"| 4 | 2 |",
|
||||
"| 5 | 3 |",
|
||||
"+------+------+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(&expected, &collected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scan_batch_bad_projection() {
|
||||
let batch = RecordBatch::try_from_iter(vec![
|
||||
("col1", seq_array(0, 3)),
|
||||
("col2", seq_array(1, 4)),
|
||||
("col3", seq_array(2, 5)),
|
||||
("col4", seq_array(3, 6)),
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
// no column idex 5
|
||||
let projection = Some(vec![3, 1, 5]);
|
||||
let result = scan_batch(batch.clone(), batch.schema(), projection.as_ref());
|
||||
let err_string = result.unwrap_err().to_string();
|
||||
assert!(
|
||||
err_string
|
||||
.contains("Internal error: Projection index out of range in ChunksProvider: 5"),
|
||||
"Actual error: {}",
|
||||
err_string
|
||||
);
|
||||
impl<T> std::fmt::Debug for SystemTableExecutionPlan<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SystemTableExecutionPlan")
|
||||
.field("projection", &self.projection)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: IoxSystemTable + 'static> ExecutionPlan for SystemTableExecutionPlan<T> {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
Arc::clone(&self.projected_schema)
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> Partitioning {
|
||||
Partitioning::UnknownPartitioning(1)
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
&self,
|
||||
_children: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
) -> DataFusionResult<SendableRecordBatchStream> {
|
||||
Ok(Box::pin(SystemTableStream {
|
||||
projected_schema: Arc::clone(&self.projected_schema),
|
||||
batches: self.table.scan(runtime.batch_size)?,
|
||||
projection: self.projection.clone(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
Statistics::default()
|
||||
}
|
||||
}
|
||||
|
||||
struct SystemTableStream {
|
||||
projected_schema: SchemaRef,
|
||||
projection: Option<Vec<usize>>,
|
||||
batches: BatchIterator,
|
||||
}
|
||||
|
||||
impl RecordBatchStream for SystemTableStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
Arc::clone(&self.projected_schema)
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Stream for SystemTableStream {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
Poll::Ready(self.batches.next().map(|maybe_batch| {
|
||||
maybe_batch.and_then(|batch| match &self.projection {
|
||||
Some(projection) => batch.project(projection),
|
||||
None => Ok(batch),
|
||||
})
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::system_tables::BatchIterator;
|
||||
use crate::{catalog::Catalog, system_tables::IoxSystemTable};
|
||||
use arrow::{
|
||||
array::{StringArray, TimestampNanosecondArray, UInt32Array, UInt64Array},
|
||||
|
@ -30,9 +31,14 @@ impl IoxSystemTable for ChunksTable {
|
|||
Arc::clone(&self.schema)
|
||||
}
|
||||
|
||||
fn batch(&self) -> Result<RecordBatch> {
|
||||
from_chunk_summaries(self.schema(), self.catalog.chunk_summaries())
|
||||
.log_if_error("system.chunks table")
|
||||
fn scan(&self, _batch_size: usize) -> Result<BatchIterator> {
|
||||
let schema = Arc::clone(&self.schema);
|
||||
let catalog = Arc::clone(&self.catalog);
|
||||
|
||||
Ok(Box::new(std::iter::once_with(move || {
|
||||
from_chunk_summaries(schema, catalog.chunk_summaries())
|
||||
.log_if_error("system.chunks table")
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::system_tables::BatchIterator;
|
||||
use crate::{catalog::Catalog, system_tables::IoxSystemTable};
|
||||
use arrow::array::UInt32Array;
|
||||
use arrow::{
|
||||
|
@ -33,9 +34,13 @@ impl IoxSystemTable for ColumnsTable {
|
|||
fn schema(&self) -> SchemaRef {
|
||||
Arc::clone(&self.schema)
|
||||
}
|
||||
fn batch(&self) -> Result<RecordBatch> {
|
||||
from_partition_summaries(self.schema(), self.catalog.partition_summaries())
|
||||
.log_if_error("system.columns table")
|
||||
fn scan(&self, _batch_size: usize) -> Result<BatchIterator> {
|
||||
let schema = Arc::clone(&self.schema);
|
||||
let catalog = Arc::clone(&self.catalog);
|
||||
Ok(Box::new(std::iter::once_with(move || {
|
||||
from_partition_summaries(schema, catalog.partition_summaries())
|
||||
.log_if_error("system.columns table")
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,9 +118,13 @@ impl IoxSystemTable for ChunkColumnsTable {
|
|||
Arc::clone(&self.schema)
|
||||
}
|
||||
|
||||
fn batch(&self) -> Result<RecordBatch> {
|
||||
assemble_chunk_columns(self.schema(), self.catalog.detailed_chunk_summaries())
|
||||
.log_if_error("system.column_chunks table")
|
||||
fn scan(&self, _batch_size: usize) -> Result<BatchIterator> {
|
||||
let schema = Arc::clone(&self.schema);
|
||||
let catalog = Arc::clone(&self.catalog);
|
||||
Ok(Box::new(std::iter::once_with(move || {
|
||||
assemble_chunk_columns(schema, catalog.detailed_chunk_summaries())
|
||||
.log_if_error("system.column_chunks table")
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::system_tables::IoxSystemTable;
|
||||
use crate::system_tables::{BatchIterator, IoxSystemTable};
|
||||
use arrow::{
|
||||
array::{ArrayRef, StringArray, Time64NanosecondArray, TimestampNanosecondArray},
|
||||
datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit},
|
||||
|
@ -34,9 +34,15 @@ impl IoxSystemTable for OperationsTable {
|
|||
Arc::clone(&self.schema)
|
||||
}
|
||||
|
||||
fn batch(&self) -> Result<RecordBatch> {
|
||||
from_task_trackers(self.schema(), &self.db_name, self.jobs.tracked())
|
||||
.log_if_error("system.operations table")
|
||||
fn scan(&self, _batch_size: usize) -> Result<BatchIterator> {
|
||||
let schema = Arc::clone(&self.schema);
|
||||
let jobs = Arc::clone(&self.jobs);
|
||||
let db_name = self.db_name.clone();
|
||||
|
||||
Ok(Box::new(std::iter::once_with(move || {
|
||||
from_task_trackers(schema, &db_name, jobs.tracked())
|
||||
.log_if_error("system.operations table")
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::system_tables::BatchIterator;
|
||||
use crate::{catalog::Catalog, system_tables::IoxSystemTable};
|
||||
use arrow::{
|
||||
array::{StringArray, TimestampNanosecondArray, UInt64Array},
|
||||
|
@ -31,9 +32,14 @@ impl IoxSystemTable for PersistenceWindowsTable {
|
|||
Arc::clone(&self.schema)
|
||||
}
|
||||
|
||||
fn batch(&self) -> Result<RecordBatch> {
|
||||
from_write_summaries(self.schema(), self.catalog.persistence_summaries())
|
||||
.log_if_error("system.persistence_windows table")
|
||||
fn scan(&self, _batch_size: usize) -> Result<BatchIterator> {
|
||||
let schema = Arc::clone(&self.schema);
|
||||
let catalog = Arc::clone(&self.catalog);
|
||||
|
||||
Ok(Box::new(std::iter::once_with(move || {
|
||||
from_write_summaries(schema, catalog.persistence_summaries())
|
||||
.log_if_error("system.persistence_windows table")
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::system_tables::BatchIterator;
|
||||
use crate::{
|
||||
query_log::{QueryLog, QueryLogEntry},
|
||||
system_tables::IoxSystemTable,
|
||||
|
@ -8,7 +9,7 @@ use arrow::{
|
|||
error::Result,
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use data_types::error::ErrorLogger;
|
||||
use observability_deps::tracing::error;
|
||||
use std::{collections::VecDeque, sync::Arc};
|
||||
|
||||
/// Implementation of system.queries table
|
||||
|
@ -32,9 +33,27 @@ impl IoxSystemTable for QueriesTable {
|
|||
Arc::clone(&self.schema)
|
||||
}
|
||||
|
||||
fn batch(&self) -> Result<RecordBatch> {
|
||||
from_query_log_entries(self.schema(), self.query_log.entries())
|
||||
.log_if_error("system.chunks table")
|
||||
fn scan(&self, batch_size: usize) -> Result<BatchIterator> {
|
||||
let schema = self.schema();
|
||||
let entries = self.query_log.entries();
|
||||
let mut offset = 0;
|
||||
Ok(Box::new(std::iter::from_fn(move || {
|
||||
if offset >= entries.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let len = batch_size.min(entries.len() - offset);
|
||||
match from_query_log_entries(schema.clone(), &entries, offset, len) {
|
||||
Ok(batch) => {
|
||||
offset += len;
|
||||
Some(Ok(batch))
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Error system.chunks table: {:?}", e);
|
||||
Some(Err(e))
|
||||
}
|
||||
}
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -57,26 +76,36 @@ fn queries_schema() -> SchemaRef {
|
|||
|
||||
fn from_query_log_entries(
|
||||
schema: SchemaRef,
|
||||
entries: VecDeque<Arc<QueryLogEntry>>,
|
||||
entries: &VecDeque<Arc<QueryLogEntry>>,
|
||||
offset: usize,
|
||||
len: usize,
|
||||
) -> Result<RecordBatch> {
|
||||
let issue_time = entries
|
||||
.iter()
|
||||
.skip(offset)
|
||||
.take(len)
|
||||
.map(|e| e.issue_time)
|
||||
.map(|ts| Some(ts.timestamp_nanos()))
|
||||
.collect::<TimestampNanosecondArray>();
|
||||
|
||||
let query_type = entries
|
||||
.iter()
|
||||
.skip(offset)
|
||||
.take(len)
|
||||
.map(|e| Some(&e.query_type))
|
||||
.collect::<StringArray>();
|
||||
|
||||
let query_text = entries
|
||||
.iter()
|
||||
.map(|e| Some(&e.query_text))
|
||||
.skip(offset)
|
||||
.take(len)
|
||||
.map(|e| Some(e.query_text.to_string()))
|
||||
.collect::<StringArray>();
|
||||
|
||||
let query_runtime = entries
|
||||
.iter()
|
||||
.skip(offset)
|
||||
.take(len)
|
||||
.map(|e| e.query_completed_duration().map(|d| d.as_nanos() as i64))
|
||||
.collect::<DurationNanosecondArray>();
|
||||
|
||||
|
@ -101,11 +130,14 @@ mod tests {
|
|||
fn test_from_query_log() {
|
||||
let now = Time::from_rfc3339("1996-12-19T16:39:57+00:00").unwrap();
|
||||
let time_provider = Arc::new(time::MockProvider::new(now));
|
||||
|
||||
let query_log = QueryLog::new(10, Arc::clone(&time_provider) as Arc<dyn TimeProvider>);
|
||||
query_log.push("sql", "select * from foo");
|
||||
query_log.push("sql", Box::new("select * from foo"));
|
||||
time_provider.inc(std::time::Duration::from_secs(24 * 60 * 60));
|
||||
query_log.push("sql", "select * from bar");
|
||||
let read_filter_entry = query_log.push("read_filter", "json goop");
|
||||
query_log.push("sql", Box::new("select * from bar"));
|
||||
let read_filter_entry = query_log.push("read_filter", Box::new("json goop"));
|
||||
|
||||
let table = QueriesTable::new(Arc::new(query_log));
|
||||
|
||||
let expected = vec![
|
||||
"+----------------------+-------------+-------------------+--------------------+",
|
||||
|
@ -117,9 +149,9 @@ mod tests {
|
|||
"+----------------------+-------------+-------------------+--------------------+",
|
||||
];
|
||||
|
||||
let schema = queries_schema();
|
||||
let batch = from_query_log_entries(schema.clone(), query_log.entries()).unwrap();
|
||||
assert_batches_eq!(&expected, &[batch]);
|
||||
let entries = table.scan(3).unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(entries.len(), 1);
|
||||
assert_batches_eq!(&expected, &entries);
|
||||
|
||||
// mark one of the queries completed after 4s
|
||||
let now = Time::from_rfc3339("1996-12-20T16:40:01+00:00").unwrap();
|
||||
|
@ -135,7 +167,8 @@ mod tests {
|
|||
"+----------------------+-------------+-------------------+--------------------+",
|
||||
];
|
||||
|
||||
let batch = from_query_log_entries(schema, query_log.entries()).unwrap();
|
||||
assert_batches_eq!(&expected, &[batch]);
|
||||
let entries = table.scan(2).unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(entries.len(), 2);
|
||||
assert_batches_eq!(&expected, &entries);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,11 +2,12 @@ use crate::Db;
|
|||
use dml::DmlOperation;
|
||||
use futures::{
|
||||
future::{BoxFuture, Shared},
|
||||
stream::{BoxStream, FuturesUnordered},
|
||||
stream::FuturesUnordered,
|
||||
FutureExt, StreamExt, TryFutureExt,
|
||||
};
|
||||
use observability_deps::tracing::{debug, error, info, warn};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
future::Future,
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
|
@ -14,7 +15,7 @@ use std::{
|
|||
use tokio::task::JoinError;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use trace::span::SpanRecorder;
|
||||
use write_buffer::core::{FetchHighWatermark, WriteBufferError, WriteBufferReading};
|
||||
use write_buffer::core::{WriteBufferErrorKind, WriteBufferReading, WriteBufferStreamHandler};
|
||||
|
||||
use self::metrics::{SequencerMetrics, WriteBufferIngestMetrics};
|
||||
pub mod metrics;
|
||||
|
@ -32,7 +33,8 @@ pub struct WriteBufferConsumer {
|
|||
|
||||
impl WriteBufferConsumer {
|
||||
pub fn new(
|
||||
mut write_buffer: Box<dyn WriteBufferReading>,
|
||||
write_buffer: Arc<dyn WriteBufferReading>,
|
||||
write_buffer_streams: BTreeMap<u32, Box<dyn WriteBufferStreamHandler>>,
|
||||
db: Arc<Db>,
|
||||
registry: &metric::Registry,
|
||||
) -> Self {
|
||||
|
@ -42,16 +44,15 @@ impl WriteBufferConsumer {
|
|||
|
||||
let shutdown_captured = shutdown.clone();
|
||||
let join = tokio::spawn(async move {
|
||||
let mut futures: FuturesUnordered<_> = write_buffer
|
||||
.streams()
|
||||
let mut futures: FuturesUnordered<_> = write_buffer_streams
|
||||
.into_iter()
|
||||
.map(|(sequencer_id, stream)| {
|
||||
.map(|(sequencer_id, handler)| {
|
||||
let metrics = ingest_metrics.new_sequencer_metrics(sequencer_id);
|
||||
stream_in_sequenced_entries(
|
||||
Arc::clone(&db),
|
||||
Arc::clone(&write_buffer),
|
||||
sequencer_id,
|
||||
stream.stream,
|
||||
stream.fetch_high_watermark,
|
||||
handler,
|
||||
metrics,
|
||||
)
|
||||
})
|
||||
|
@ -100,14 +101,15 @@ impl Drop for WriteBufferConsumer {
|
|||
/// buffer are ignored.
|
||||
async fn stream_in_sequenced_entries<'a>(
|
||||
db: Arc<Db>,
|
||||
write_buffer: Arc<dyn WriteBufferReading>,
|
||||
sequencer_id: u32,
|
||||
mut stream: BoxStream<'a, Result<DmlOperation, WriteBufferError>>,
|
||||
f_mark: FetchHighWatermark<'a>,
|
||||
mut stream_handler: Box<dyn WriteBufferStreamHandler>,
|
||||
mut metrics: SequencerMetrics,
|
||||
) {
|
||||
let db_name = db.rules().name.to_string();
|
||||
let mut watermark_last_updated: Option<Instant> = None;
|
||||
let mut watermark = 0_u64;
|
||||
let mut stream = stream_handler.stream();
|
||||
|
||||
while let Some(db_write_result) = stream.next().await {
|
||||
// maybe update sequencer watermark
|
||||
|
@ -118,7 +120,7 @@ async fn stream_in_sequenced_entries<'a>(
|
|||
.map(|ts| now.duration_since(ts) > Duration::from_secs(10))
|
||||
.unwrap_or(true)
|
||||
{
|
||||
match f_mark().await {
|
||||
match write_buffer.fetch_high_watermark(sequencer_id).await {
|
||||
Ok(w) => {
|
||||
watermark = w;
|
||||
}
|
||||
|
@ -140,14 +142,27 @@ async fn stream_in_sequenced_entries<'a>(
|
|||
// get entry from sequencer
|
||||
let dml_operation = match db_write_result {
|
||||
Ok(db_write) => db_write,
|
||||
// skip over invalid data in the write buffer so recovery can succeed
|
||||
Err(e) => {
|
||||
warn!(
|
||||
error!(
|
||||
%e,
|
||||
%db_name,
|
||||
sequencer_id,
|
||||
"Error converting write buffer data to SequencedEntry",
|
||||
"Error reading record from write buffer",
|
||||
);
|
||||
|
||||
match e.kind() {
|
||||
// If invalid data, simply skip over it
|
||||
WriteBufferErrorKind::InvalidData => {}
|
||||
|
||||
// Otherwise backoff for a period
|
||||
WriteBufferErrorKind::Unknown
|
||||
| WriteBufferErrorKind::IO
|
||||
// TODO: Should probably bail on invalid input error
|
||||
| WriteBufferErrorKind::InvalidInput => {
|
||||
// TODO: Exponential backoff
|
||||
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
@ -251,11 +266,10 @@ mod tests {
|
|||
let join_handle =
|
||||
tokio::spawn(async move { db_captured.background_worker(shutdown_captured).await });
|
||||
|
||||
let consumer = WriteBufferConsumer::new(
|
||||
Box::new(MockBufferForReading::new(write_buffer_state, None).unwrap()),
|
||||
Arc::clone(&db),
|
||||
&Default::default(),
|
||||
);
|
||||
let write_buffer = Arc::new(MockBufferForReading::new(write_buffer_state, None).unwrap());
|
||||
let streams = write_buffer.stream_handlers().await.unwrap();
|
||||
let consumer =
|
||||
WriteBufferConsumer::new(write_buffer, streams, Arc::clone(&db), &Default::default());
|
||||
|
||||
// check: after a while the persistence windows should have the expected data
|
||||
let t_0 = Instant::now();
|
||||
|
@ -314,8 +328,11 @@ mod tests {
|
|||
let join_handle =
|
||||
tokio::spawn(async move { db_captured.background_worker(shutdown_captured).await });
|
||||
|
||||
let write_buffer = Arc::new(MockBufferForReading::new(write_buffer_state, None).unwrap());
|
||||
let streams = write_buffer.stream_handlers().await.unwrap();
|
||||
let consumer = WriteBufferConsumer::new(
|
||||
Box::new(MockBufferForReading::new(write_buffer_state, None).unwrap()),
|
||||
write_buffer,
|
||||
streams,
|
||||
Arc::clone(&db),
|
||||
test_db.metric_registry.as_ref(),
|
||||
);
|
||||
|
@ -457,8 +474,11 @@ mod tests {
|
|||
let join_handle =
|
||||
tokio::spawn(async move { db_captured.background_worker(shutdown_captured).await });
|
||||
|
||||
let write_buffer = Arc::new(MockBufferForReading::new(write_buffer_state, None).unwrap());
|
||||
let streams = write_buffer.stream_handlers().await.unwrap();
|
||||
let consumer = WriteBufferConsumer::new(
|
||||
Box::new(MockBufferForReading::new(write_buffer_state, None).unwrap()),
|
||||
write_buffer,
|
||||
streams,
|
||||
Arc::clone(&db),
|
||||
metric_registry.as_ref(),
|
||||
);
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
[package]
|
||||
name = "gitops_adapter"
|
||||
version = "0.1.0"
|
||||
authors = ["Luke Bond <luke.n.bond@gmail.com>"]
|
||||
edition = "2021"
|
||||
|
||||
# Prevent this from being published to crates.io!
|
||||
publish = false
|
||||
|
||||
[[bin]]
|
||||
name = "iox-gitops-adapter"
|
||||
path = "src/main.rs"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1"
|
||||
chrono = "0.4.15"
|
||||
clap = { version = "3", features = ["derive", "env"] }
|
||||
dotenv = "0.15"
|
||||
futures = "0.3"
|
||||
k8s-openapi = { version = "0.13.1", features = ["v1_17", "schemars"], default-features = false }
|
||||
kube = "0.64"
|
||||
kube-derive = { version = "0.64", default-features = false } # only needed to opt out of schema
|
||||
kube-runtime = "0.64"
|
||||
prost = "0.9"
|
||||
schemars = "0.8.3"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
thiserror = "1.0"
|
||||
tokio = { version = "1.0", features = ["rt-multi-thread", "macros", "parking_lot"] }
|
||||
tonic = "0.6"
|
||||
tracing = { version = "0.1", features = ["release_max_level_debug"] }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
trogging = { path = "../trogging", default-features = false, features = ["clap"] }
|
||||
|
||||
[build-dependencies]
|
||||
glob = "0.3.0"
|
||||
pbjson-build = "0.2"
|
||||
tonic-build = "0.6"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.5"
|
||||
parking_lot = { version = "0.11.1" }
|
|
@ -0,0 +1,25 @@
|
|||
use std::process::Command;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Populate env!(GIT_HASH) with the current git commit
|
||||
println!("cargo:rustc-env=GIT_HASH={}", get_git_hash());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_git_hash() -> String {
|
||||
let out = match std::env::var("VERSION_HASH") {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
let output = Command::new("git")
|
||||
.args(&["describe", "--always", "--dirty", "--abbrev=64"])
|
||||
.output()
|
||||
.expect("failed to execute git rev-parse to read the current git hash");
|
||||
|
||||
String::from_utf8(output.stdout).expect("non-utf8 found in git hash")
|
||||
}
|
||||
};
|
||||
|
||||
assert!(!out.is_empty(), "attempting to embed empty git hash");
|
||||
out
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
use async_trait::async_trait;
|
||||
use kube::{
|
||||
api::{Patch, PatchParams},
|
||||
Api,
|
||||
};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::kafka_topic_list::resources::{KafkaTopicList, KafkaTopicListStatus};
|
||||
|
||||
#[async_trait]
|
||||
pub trait KafkaTopicListApi: Send + Sync + Clone + 'static {
|
||||
/// Gets a KafkaTopicList resource by name.
|
||||
async fn get_kafka_topic_list(
|
||||
&self,
|
||||
kafka_topic_list_name: String,
|
||||
) -> Result<KafkaTopicList, kube::Error>;
|
||||
|
||||
/// Patch status block, if it exists, with the given status.
|
||||
async fn patch_resource_status(
|
||||
&self,
|
||||
kafka_topic_list_name: String,
|
||||
status: KafkaTopicListStatus,
|
||||
) -> Result<KafkaTopicList, kube::Error>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KafkaTopicListApi for Api<KafkaTopicList> {
|
||||
async fn get_kafka_topic_list(
|
||||
&self,
|
||||
kafka_topic_list_name: String,
|
||||
) -> Result<KafkaTopicList, kube::Error> {
|
||||
self.get(kafka_topic_list_name.as_str()).await
|
||||
}
|
||||
|
||||
async fn patch_resource_status(
|
||||
&self,
|
||||
kafka_topic_list_name: String,
|
||||
status: KafkaTopicListStatus,
|
||||
) -> Result<KafkaTopicList, kube::Error> {
|
||||
let patch_params = PatchParams::default();
|
||||
let s = json!({ "status": status });
|
||||
self.patch_status(
|
||||
kafka_topic_list_name.as_str(),
|
||||
&patch_params,
|
||||
&Patch::Merge(&s),
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
#![allow(missing_docs)]
|
||||
|
||||
use std::sync::{mpsc::SyncSender, Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use crate::kafka_topic_list::{
|
||||
api::KafkaTopicListApi,
|
||||
resources::{KafkaTopicList, KafkaTopicListStatus},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum MockKafkaTopicListApiCall {
|
||||
Get(String),
|
||||
PatchStatus {
|
||||
kafka_topic_list_name: String,
|
||||
status: KafkaTopicListStatus,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ClientInner {
|
||||
/// A channel to push call notifications into as they occur.
|
||||
pub notify: Option<SyncSender<MockKafkaTopicListApiCall>>,
|
||||
|
||||
/// A vector of calls in call order for assertions.
|
||||
pub calls: Vec<MockKafkaTopicListApiCall>,
|
||||
|
||||
// Return values
|
||||
pub get_ret: Vec<Result<KafkaTopicList, kube::Error>>,
|
||||
pub patch_status_ret: Vec<Result<KafkaTopicList, kube::Error>>,
|
||||
}
|
||||
|
||||
impl ClientInner {
|
||||
fn record_call(&mut self, c: MockKafkaTopicListApiCall) {
|
||||
self.calls.push(c.clone());
|
||||
if let Some(ref n) = self.notify {
|
||||
let _ = n.send(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ClientInner> for MockKafkaTopicListApi {
|
||||
fn from(state: ClientInner) -> Self {
|
||||
Self {
|
||||
state: Arc::new(Mutex::new(state)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Mock helper to record a call and return the pre-configured value.
|
||||
///
|
||||
/// Pushes `$call` to call record, popping `self.$return` and returning it to
|
||||
/// the caller. If no value exists, the pop attempt causes a panic.
|
||||
macro_rules! record_and_return {
|
||||
($self:ident, $call:expr, $return:ident) => {{
|
||||
let mut state = $self.state.lock();
|
||||
state.record_call($call);
|
||||
state.$return.pop().expect("no mock result to return")
|
||||
}};
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct MockKafkaTopicListApi {
|
||||
pub state: Arc<Mutex<ClientInner>>,
|
||||
}
|
||||
|
||||
impl MockKafkaTopicListApi {
|
||||
pub fn with_notify(self, s: SyncSender<MockKafkaTopicListApiCall>) -> Self {
|
||||
self.state.lock().notify = Some(s);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_get_ret(self, ret: Vec<Result<KafkaTopicList, kube::Error>>) -> Self {
|
||||
self.state.lock().get_ret = ret;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_patch_status_ret(self, ret: Vec<Result<KafkaTopicList, kube::Error>>) -> Self {
|
||||
self.state.lock().patch_status_ret = ret;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn get_calls(&self) -> Vec<MockKafkaTopicListApiCall> {
|
||||
self.state.lock().calls.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KafkaTopicListApi for Arc<MockKafkaTopicListApi> {
|
||||
/// Gets a KafkaTopicList resource by name.
|
||||
async fn get_kafka_topic_list(
|
||||
&self,
|
||||
kafka_topic_list_name: String,
|
||||
) -> Result<KafkaTopicList, kube::Error> {
|
||||
record_and_return!(
|
||||
self,
|
||||
MockKafkaTopicListApiCall::Get(kafka_topic_list_name,),
|
||||
get_ret
|
||||
)
|
||||
}
|
||||
|
||||
/// Patch status block, if it exists, with the given status.
|
||||
async fn patch_resource_status(
|
||||
&self,
|
||||
kafka_topic_list_name: String,
|
||||
status: KafkaTopicListStatus,
|
||||
) -> Result<KafkaTopicList, kube::Error> {
|
||||
record_and_return!(
|
||||
self,
|
||||
MockKafkaTopicListApiCall::PatchStatus {
|
||||
kafka_topic_list_name,
|
||||
status,
|
||||
},
|
||||
patch_status_ret
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Cloning a client shares the same mock state across both client instances.
|
||||
impl Clone for MockKafkaTopicListApi {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
state: Arc::clone(&self.state),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
pub mod api;
|
||||
pub mod resources;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod mock_api;
|
|
@ -0,0 +1,108 @@
|
|||
use kube_derive::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "iox.influxdata.com",
|
||||
version = "v1alpha1",
|
||||
kind = "KafkaTopicList",
|
||||
namespaced,
|
||||
shortname = "topics"
|
||||
)]
|
||||
#[kube(status = "KafkaTopicListStatus")]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct KafkaTopicListSpec {
|
||||
topics: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct KafkaTopicListStatus {
|
||||
conditions: Vec<KafkaTopicListStatusCondition>,
|
||||
observed_generation: i64, // type matches that of metadata.generation
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Clone, Debug, Default, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct KafkaTopicListStatusCondition {
|
||||
type_: String,
|
||||
status: String,
|
||||
message: String,
|
||||
last_transition_time: String,
|
||||
last_update_time: String,
|
||||
}
|
||||
|
||||
impl KafkaTopicListSpec {
|
||||
pub fn new(topics: Vec<String>) -> Self {
|
||||
Self { topics }
|
||||
}
|
||||
|
||||
pub fn topics(&self) -> &Vec<String> {
|
||||
&self.topics
|
||||
}
|
||||
}
|
||||
|
||||
impl KafkaTopicListStatus {
|
||||
pub fn conditions(&self) -> &Vec<KafkaTopicListStatusCondition> {
|
||||
&self.conditions
|
||||
}
|
||||
|
||||
pub fn conditions_mut(&mut self) -> &mut Vec<KafkaTopicListStatusCondition> {
|
||||
&mut self.conditions
|
||||
}
|
||||
|
||||
pub fn observed_generation(&self) -> i64 {
|
||||
self.observed_generation
|
||||
}
|
||||
|
||||
pub fn set_observed_generation(&mut self, observed_generation: i64) {
|
||||
self.observed_generation = observed_generation;
|
||||
}
|
||||
}
|
||||
|
||||
impl KafkaTopicListStatusCondition {
|
||||
pub fn new(
|
||||
type_: String,
|
||||
status: String,
|
||||
message: String,
|
||||
last_transition_time: String,
|
||||
last_update_time: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
type_,
|
||||
status,
|
||||
message,
|
||||
last_transition_time,
|
||||
last_update_time,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn type_(&self) -> &String {
|
||||
&self.type_
|
||||
}
|
||||
|
||||
pub fn status(&self) -> &String {
|
||||
&self.status
|
||||
}
|
||||
|
||||
pub fn message(&self) -> &String {
|
||||
&self.message
|
||||
}
|
||||
|
||||
pub fn last_transition_time(&self) -> &String {
|
||||
&self.last_transition_time
|
||||
}
|
||||
|
||||
pub fn last_update_time(&self) -> &String {
|
||||
&self.last_update_time
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for KafkaTopicListStatusCondition {
|
||||
// just for assertions in tests; too tedious to have to have the items the same
|
||||
// too
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.type_ == other.type_ && self.status == other.status && self.message == other.message
|
||||
}
|
||||
}
|
|
@ -0,0 +1,537 @@
|
|||
use std::{
|
||||
io::ErrorKind,
|
||||
sync::Arc,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use dotenv::dotenv;
|
||||
use futures::StreamExt;
|
||||
use kube::{api::ListParams, Api, Client as K8sClient};
|
||||
use kube_runtime::controller::{Context, Controller, ReconcilerAction};
|
||||
use std::process::Command as Cmd;
|
||||
use thiserror::Error;
|
||||
use tracing::*;
|
||||
use trogging::{cli::LoggingConfig, LogFormat};
|
||||
|
||||
use crate::kafka_topic_list::{
|
||||
api::KafkaTopicListApi,
|
||||
resources::{KafkaTopicList, KafkaTopicListStatus, KafkaTopicListStatusCondition},
|
||||
};
|
||||
|
||||
pub mod kafka_topic_list;
|
||||
|
||||
static CONDITION_TYPE_RECONCILED: &str = "Reconciled";
|
||||
static CONDITION_STATUS_TRUE: &str = "True";
|
||||
static CONDITION_STATUS_FALSE: &str = "False";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
enum CatalogError {
|
||||
#[error("Malformed KafkaTopicList resource: {message}")]
|
||||
MalformedKafkaTopicListResource { message: String },
|
||||
|
||||
#[error("Request to patch status of k8s custom resource failed: {0}")]
|
||||
PatchStatusError(#[from] kube::Error),
|
||||
|
||||
#[error("Failed to execute iox binary to update catalog: {0}")]
|
||||
IOxBinaryExecFailed(#[from] std::io::Error),
|
||||
|
||||
#[error("Request to update catalog with topic failed: {stderr}")]
|
||||
UpdateTopicError { stderr: String },
|
||||
|
||||
#[error("Failed to parse stdout of catalog update command to ID: {0}")]
|
||||
TopicIdParseError(#[from] std::num::ParseIntError),
|
||||
}
|
||||
|
||||
// Config defines the runtime configuration variables settable on the command
|
||||
// line.
|
||||
//
|
||||
// These fields are automatically converted into a [Clap] CLI.
|
||||
//
|
||||
// This has an `allow(missing_docs)` annotation as otherwise the comment is
|
||||
// added to the CLI help text.
|
||||
//
|
||||
// [Clap]: https://github.com/clap-rs/clap
|
||||
#[derive(Debug, clap::Parser)]
|
||||
#[clap(
|
||||
name = "iox-gitops-adapter",
|
||||
about = "Adapter to configure IOx Catalog from Kubernetes Custom Resources",
|
||||
long_about = r#"Kubernetes controller responsible for synchronising the IOx Catalog to cluster configuration in a Kubernetes Custom Resource.
|
||||
|
||||
Examples:
|
||||
# Run the gitops adapter server:
|
||||
iox-gitops-adapter
|
||||
|
||||
# See all configuration options
|
||||
iox-gitops-adapter --help
|
||||
"#,
|
||||
version = concat!(env!("CARGO_PKG_VERSION"), " - ", env!("GIT_HASH"))
|
||||
)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Config {
|
||||
/// Configure the log level & filter.
|
||||
///
|
||||
/// Example values:
|
||||
/// iox_gitops_adapter=debug
|
||||
#[clap(flatten)]
|
||||
logging_config: LoggingConfig,
|
||||
|
||||
/// Configure the Kubernetes namespace where custom resources are found.
|
||||
///
|
||||
/// Example values:
|
||||
/// namespace=conductor
|
||||
#[clap(long = "--namespace", env = "GITOPS_ADAPTER_NAMESPACE")]
|
||||
namespace: String,
|
||||
|
||||
/// Configure the Catalog's Postgres DSN.
|
||||
///
|
||||
/// Example values:
|
||||
/// catalog-dsn=postgres://postgres:postgres@localhost:5432/iox_shared
|
||||
#[clap(long = "--catalog-dsn", env = "GITOPS_ADAPTER_CATALOG_DSN")]
|
||||
catalog_dsn: String,
|
||||
|
||||
/// Configure the path to the IOx CLI.
|
||||
///
|
||||
/// Example values:
|
||||
/// iox-cli=/usr/bin/influxdb_iox
|
||||
#[clap(long = "--iox-cli", env = "GITOPS_ADAPTER_IOX_CLI")]
|
||||
iox_cli: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
enum Command {
|
||||
Config,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Returns the (possibly invalid) log filter string.
|
||||
pub fn log_filter(&self) -> &Option<String> {
|
||||
&self.logging_config.log_filter
|
||||
}
|
||||
|
||||
/// Returns the (possibly invalid) log format string.
|
||||
pub fn log_format(&self) -> &LogFormat {
|
||||
&self.logging_config.log_format
|
||||
}
|
||||
}
|
||||
|
||||
/// Load the config.
|
||||
///
|
||||
/// This pulls in config from the following sources, in order of precedence:
|
||||
///
|
||||
/// - command line arguments
|
||||
/// - user set environment variables
|
||||
/// - .env file contents
|
||||
/// - pre-configured default values
|
||||
pub fn load_config() -> Result<Config, Box<dyn std::error::Error>> {
|
||||
// Source the .env file before initialising the Config struct - this sets
|
||||
// any envs in the file, which the Config struct then uses.
|
||||
//
|
||||
// Precedence is given to existing env variables.
|
||||
match dotenv() {
|
||||
Ok(_) => {}
|
||||
Err(dotenv::Error::Io(err)) if err.kind() == ErrorKind::NotFound => {
|
||||
// Ignore this - a missing env file is not an error,
|
||||
// defaults will be applied when initialising the Config struct.
|
||||
}
|
||||
Err(e) => return Err(Box::new(e)),
|
||||
};
|
||||
|
||||
// Load the Config struct - this pulls in any envs set by the user or
|
||||
// sourced above, and applies any defaults.
|
||||
Ok(clap::Parser::parse())
|
||||
}
|
||||
|
||||
/// Initialise the tracing subscribers.
|
||||
fn setup_tracing(
|
||||
logging_config: &LoggingConfig,
|
||||
log_env_var: Option<String>,
|
||||
) -> Result<trogging::TroggingGuard, trogging::Error> {
|
||||
let drop_handle = logging_config
|
||||
.to_builder()
|
||||
.with_default_log_filter(log_env_var.unwrap_or_else(|| "info".to_string()))
|
||||
.install_global()?;
|
||||
|
||||
trace!("logging initialised!");
|
||||
|
||||
Ok(drop_handle)
|
||||
}
|
||||
|
||||
async fn reconcile_topics(
|
||||
path_to_iox_binary: &str,
|
||||
catalog_dsn: &str,
|
||||
topics: &[String],
|
||||
) -> Result<Vec<u32>, CatalogError> {
|
||||
trace!(
|
||||
"calling out to {} for topics {:?}",
|
||||
path_to_iox_binary,
|
||||
topics
|
||||
);
|
||||
topics
|
||||
.iter()
|
||||
.map(|topic| {
|
||||
match Cmd::new(path_to_iox_binary)
|
||||
.arg("catalog")
|
||||
.arg("topic")
|
||||
.arg("update")
|
||||
.arg("--catalog-dsn")
|
||||
.arg(catalog_dsn)
|
||||
.arg(topic)
|
||||
.output()
|
||||
{
|
||||
Ok(output) => match output.status.success() {
|
||||
true => {
|
||||
trace!(
|
||||
"Updated catalog with kafka topic {}. stdout: {}",
|
||||
topic,
|
||||
String::from_utf8_lossy(&output.stdout).trim()
|
||||
);
|
||||
// The CLI returns an ID on success; try to parse it here to ensure it
|
||||
// worked; not sure that return zero is enough? e.g. --help will return 0.
|
||||
// also, we'd like to print the IDs out later
|
||||
String::from_utf8_lossy(&output.stdout)
|
||||
.trim()
|
||||
.parse::<u32>()
|
||||
.map_err(CatalogError::TopicIdParseError)
|
||||
}
|
||||
false => Err(CatalogError::UpdateTopicError {
|
||||
stderr: String::from_utf8_lossy(&output.stderr).into(),
|
||||
}),
|
||||
},
|
||||
Err(e) => Err(CatalogError::IOxBinaryExecFailed(e)),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Controller triggers this whenever our main object or our children changed
|
||||
async fn reconcile<T>(
|
||||
topics: KafkaTopicList,
|
||||
ctx: Context<Data<T>>,
|
||||
) -> Result<ReconcilerAction, CatalogError>
|
||||
where
|
||||
T: KafkaTopicListApi,
|
||||
{
|
||||
debug!(
|
||||
"got a change to the kafka topic list custom resource: {:?}",
|
||||
topics.spec
|
||||
);
|
||||
let kafka_topic_list_api = ctx.get_ref().kafka_topic_list_api.clone();
|
||||
let topics = Arc::new(topics);
|
||||
|
||||
// if CR doesn't contain status field, add it
|
||||
let mut topics_status = match &topics.status {
|
||||
Some(status) => status.clone(),
|
||||
None => KafkaTopicListStatus::default(),
|
||||
};
|
||||
let kafka_topic_list_name = match &topics.metadata.name {
|
||||
Some(n) => n.clone(),
|
||||
None => {
|
||||
return Err(CatalogError::MalformedKafkaTopicListResource {
|
||||
message: "Missing metadata.name field".to_string(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// have we seen this update before?
|
||||
// NOTE: we may find that we'd prefer to do the reconcile anyway, if it's cheap.
|
||||
// for now this seems okay
|
||||
let generation = match topics.metadata.generation {
|
||||
Some(gen) => {
|
||||
if topics_status.observed_generation() == gen {
|
||||
info!("Nothing to reconcile; observedGeneration == generation");
|
||||
return Ok(ReconcilerAction {
|
||||
requeue_after: None,
|
||||
});
|
||||
}
|
||||
gen
|
||||
}
|
||||
_ => {
|
||||
return Err(CatalogError::MalformedKafkaTopicListResource {
|
||||
message: "Missing metadata.generation field".to_string(),
|
||||
})
|
||||
}
|
||||
};
|
||||
// make a note that we've seen this update
|
||||
topics_status.set_observed_generation(generation);
|
||||
|
||||
// call out to the iox CLI to update the catalog for each topic name in the list
|
||||
let reconcile_result = reconcile_topics(
|
||||
&ctx.get_ref().path_to_iox_binary,
|
||||
&ctx.get_ref().catalog_dsn,
|
||||
topics.spec.topics(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// update status subresource based on outcome of reconcile
|
||||
let now: DateTime<Utc> = SystemTime::now().into();
|
||||
let now_str = now.to_rfc3339();
|
||||
let prev_condition = topics_status.conditions().get(0);
|
||||
let last_transition_time = match prev_condition {
|
||||
Some(c) if c.status() == CONDITION_STATUS_TRUE => c.last_transition_time().clone(),
|
||||
_ => now_str.clone(),
|
||||
};
|
||||
let new_status = match &reconcile_result {
|
||||
Ok(v) => {
|
||||
debug!(
|
||||
"Updated catalog with kafka topic list: {:?}. IDs returned: {:?}.",
|
||||
topics.spec.topics(),
|
||||
v
|
||||
);
|
||||
KafkaTopicListStatusCondition::new(
|
||||
CONDITION_TYPE_RECONCILED.to_string(),
|
||||
CONDITION_STATUS_TRUE.to_string(),
|
||||
"".to_string(),
|
||||
last_transition_time,
|
||||
now_str.clone(),
|
||||
)
|
||||
}
|
||||
Err(e) => KafkaTopicListStatusCondition::new(
|
||||
CONDITION_TYPE_RECONCILED.to_string(),
|
||||
CONDITION_STATUS_FALSE.to_string(),
|
||||
e.to_string(),
|
||||
last_transition_time,
|
||||
now_str.clone(),
|
||||
),
|
||||
};
|
||||
if topics_status.conditions().is_empty() {
|
||||
topics_status.conditions_mut().insert(0, new_status);
|
||||
} else {
|
||||
topics_status.conditions_mut()[0] = new_status;
|
||||
}
|
||||
|
||||
// patch the status field with the updated condition and observed generation
|
||||
match kafka_topic_list_api
|
||||
.patch_resource_status(kafka_topic_list_name.clone(), topics_status)
|
||||
.await
|
||||
{
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
// Not great to silently swallow the error here but doesn't feel warranted to requeue
|
||||
// just because the status wasn't updated
|
||||
error!("Failed to patch KafkaTopicList status subresource: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
reconcile_result.map(|_| ReconcilerAction {
|
||||
requeue_after: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// an error handler that will be called when the reconciler fails
|
||||
fn error_policy<T>(error: &CatalogError, _ctx: Context<Data<T>>) -> ReconcilerAction
|
||||
where
|
||||
T: KafkaTopicListApi,
|
||||
{
|
||||
error!(%error, "reconciliation error");
|
||||
ReconcilerAction {
|
||||
// if a sync fails we want to retry- it could simply be in the process of
|
||||
// doing another redeploy. there may be a deeper problem, in which case it'll keep trying
|
||||
// and we'll see errors and investigate. arbitrary duration chosen ¯\_(ツ)_/¯
|
||||
requeue_after: Some(Duration::from_secs(5)),
|
||||
}
|
||||
}
|
||||
|
||||
// Data we want access to in error/reconcile calls
|
||||
struct Data<T>
|
||||
where
|
||||
T: KafkaTopicListApi,
|
||||
{
|
||||
path_to_iox_binary: String,
|
||||
catalog_dsn: String,
|
||||
kafka_topic_list_api: T,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let config = load_config().expect("failed to load config");
|
||||
let _drop_handle = setup_tracing(&config.logging_config, None).unwrap();
|
||||
debug!(?config, "loaded config");
|
||||
|
||||
info!(git_hash = env!("GIT_HASH"), "starting iox-gitops-adapter");
|
||||
|
||||
let k8s_client = K8sClient::try_default()
|
||||
.await
|
||||
.expect("couldn't create k8s client");
|
||||
let topics = Api::<KafkaTopicList>::namespaced(k8s_client.clone(), config.namespace.as_str());
|
||||
info!("initialised Kubernetes API client");
|
||||
|
||||
info!("starting IOx GitOps Adapter");
|
||||
Controller::new(topics.clone(), ListParams::default())
|
||||
.run(
|
||||
reconcile,
|
||||
error_policy,
|
||||
Context::new(Data {
|
||||
path_to_iox_binary: config.iox_cli.clone(),
|
||||
catalog_dsn: config.catalog_dsn.clone(),
|
||||
kafka_topic_list_api: topics,
|
||||
}),
|
||||
)
|
||||
.for_each(|res| async move {
|
||||
match res {
|
||||
Ok(o) => info!("reconciled {:?}", o),
|
||||
Err(e) => info!("reconcile failed: {:?}", e),
|
||||
}
|
||||
})
|
||||
.await; // controller does nothing unless polled
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use assert_matches::assert_matches;
|
||||
use kafka_topic_list::{
|
||||
mock_api::{MockKafkaTopicListApi, MockKafkaTopicListApiCall},
|
||||
resources::KafkaTopicListSpec,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn create_topics(
|
||||
name: &str,
|
||||
spec: KafkaTopicListSpec,
|
||||
generation: i64,
|
||||
status: KafkaTopicListStatus,
|
||||
) -> KafkaTopicList {
|
||||
let mut c = KafkaTopicList::new(name, spec);
|
||||
c.metadata.generation = Some(generation);
|
||||
c.status = Some(status);
|
||||
c
|
||||
}
|
||||
|
||||
fn create_topics_status(
|
||||
observed_generation: i64,
|
||||
reconciled: bool,
|
||||
message: String,
|
||||
t: SystemTime,
|
||||
) -> KafkaTopicListStatus {
|
||||
let now: DateTime<Utc> = t.into();
|
||||
let now_str = now.to_rfc3339();
|
||||
let mut status = KafkaTopicListStatus::default();
|
||||
status
|
||||
.conditions_mut()
|
||||
.push(KafkaTopicListStatusCondition::new(
|
||||
CONDITION_TYPE_RECONCILED.to_string(),
|
||||
if reconciled {
|
||||
CONDITION_STATUS_TRUE.to_string()
|
||||
} else {
|
||||
CONDITION_STATUS_FALSE.to_string()
|
||||
},
|
||||
message,
|
||||
now_str.clone(),
|
||||
now_str,
|
||||
));
|
||||
status.set_observed_generation(observed_generation);
|
||||
status
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_single_topic_success() {
|
||||
let now = SystemTime::now();
|
||||
let mock_topics_api = Arc::new(MockKafkaTopicListApi::default().with_patch_status_ret(
|
||||
vec![Ok(create_topics(
|
||||
"iox",
|
||||
KafkaTopicListSpec::new(vec!["iox_shared".to_string()]),
|
||||
1,
|
||||
create_topics_status(0, true, "".to_string(), now),
|
||||
))],
|
||||
));
|
||||
let data = Data {
|
||||
path_to_iox_binary: "test/mock-iox-single-topic.sh".to_string(),
|
||||
catalog_dsn: "unused".to_string(),
|
||||
kafka_topic_list_api: Arc::clone(&mock_topics_api),
|
||||
};
|
||||
let c = create_topics(
|
||||
"iox",
|
||||
KafkaTopicListSpec::new(vec!["iox_shared".to_string()]),
|
||||
1,
|
||||
create_topics_status(0, true, "".to_string(), now),
|
||||
);
|
||||
let result = reconcile(c, Context::new(data)).await;
|
||||
// whole operation returns a successful result.
|
||||
assert_matches!(result, Ok(ReconcilerAction { .. }));
|
||||
// ensure status was updated accordingly.
|
||||
// alas, we don't have a success patch result either, due to the above
|
||||
assert_eq!(
|
||||
mock_topics_api.get_calls(),
|
||||
vec![MockKafkaTopicListApiCall::PatchStatus {
|
||||
kafka_topic_list_name: "iox".to_string(),
|
||||
status: create_topics_status(1, true, "".to_string(), now),
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multi_topic_success() {
|
||||
let now = SystemTime::now();
|
||||
let mock_topics_api = Arc::new(MockKafkaTopicListApi::default().with_patch_status_ret(
|
||||
vec![Ok(create_topics(
|
||||
"iox",
|
||||
KafkaTopicListSpec::new(vec!["one".to_string(), "two".to_string()]),
|
||||
1,
|
||||
create_topics_status(0, true, "".to_string(), now),
|
||||
))],
|
||||
));
|
||||
let data = Data {
|
||||
path_to_iox_binary: "test/mock-iox-single-topic.sh".to_string(),
|
||||
catalog_dsn: "unused".to_string(),
|
||||
kafka_topic_list_api: Arc::clone(&mock_topics_api),
|
||||
};
|
||||
let c = create_topics(
|
||||
"iox",
|
||||
KafkaTopicListSpec::new(vec!["one".to_string(), "two".to_string()]),
|
||||
1,
|
||||
create_topics_status(0, true, "".to_string(), now),
|
||||
);
|
||||
let result = reconcile(c, Context::new(data)).await;
|
||||
// whole operation returns a successful result.
|
||||
assert_matches!(result, Ok(ReconcilerAction { .. }));
|
||||
// ensure status was updated accordingly.
|
||||
assert_eq!(
|
||||
mock_topics_api.get_calls(),
|
||||
vec![MockKafkaTopicListApiCall::PatchStatus {
|
||||
kafka_topic_list_name: "iox".to_string(),
|
||||
status: create_topics_status(1, true, "".to_string(), now),
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_single_topic_error() {
|
||||
let now = SystemTime::now();
|
||||
let mock_topics_api = Arc::new(MockKafkaTopicListApi::default().with_patch_status_ret(
|
||||
vec![Ok(create_topics(
|
||||
"iox",
|
||||
KafkaTopicListSpec::new(vec!["iox_shared".to_string()]),
|
||||
1,
|
||||
create_topics_status(0, true, "".to_string(), now),
|
||||
))],
|
||||
));
|
||||
let data = Data {
|
||||
path_to_iox_binary: "test/mock-iox-failure.sh".to_string(),
|
||||
catalog_dsn: "unused".to_string(),
|
||||
kafka_topic_list_api: Arc::clone(&mock_topics_api),
|
||||
};
|
||||
let c = create_topics(
|
||||
"iox",
|
||||
KafkaTopicListSpec::new(vec!["iox_shared".to_string()]),
|
||||
1,
|
||||
create_topics_status(0, false, "".to_string(), now),
|
||||
);
|
||||
let result = reconcile(c, Context::new(data)).await;
|
||||
// whole operation returns a successful result
|
||||
assert_matches!(result, Err(CatalogError::UpdateTopicError { .. }));
|
||||
// Ensure status was updated accordingly
|
||||
assert_eq!(
|
||||
mock_topics_api.get_calls(),
|
||||
vec![MockKafkaTopicListApiCall::PatchStatus {
|
||||
kafka_topic_list_name: "iox".to_string(),
|
||||
status: create_topics_status(
|
||||
1,
|
||||
false,
|
||||
"Request to update catalog with topic failed: ".to_string(),
|
||||
now
|
||||
),
|
||||
}]
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
#!/bin/bash
|
||||
exit 1
|
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
echo 42
|
||||
echo 93
|
|
@ -0,0 +1,2 @@
|
|||
#!/bin/bash
|
||||
echo 42
|
|
@ -13,6 +13,7 @@ db = { path = "../db" }
|
|||
dml = { path = "../dml" }
|
||||
generated_types = { path = "../generated_types" }
|
||||
influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format", "write_lp"] }
|
||||
influxdb_storage_client = { path = "../influxdb_storage_client" }
|
||||
influxdb_line_protocol = { path = "../influxdb_line_protocol" }
|
||||
ingester = { path = "../ingester" }
|
||||
internal_types = { path = "../internal_types" }
|
||||
|
@ -36,6 +37,7 @@ query = { path = "../query" }
|
|||
read_buffer = { path = "../read_buffer" }
|
||||
router = { path = "../router" }
|
||||
router2 = { path = "../router2" }
|
||||
schema = { path = "../schema" }
|
||||
server = { path = "../server" }
|
||||
time = { path = "../time" }
|
||||
trace = { path = "../trace" }
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
use std::sync::Arc;
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use data_types::write_buffer::WriteBufferConnection;
|
||||
use time::SystemProvider;
|
||||
use trace::TraceCollector;
|
||||
use write_buffer::{
|
||||
config::WriteBufferConfigFactory,
|
||||
core::{WriteBufferError, WriteBufferWriting},
|
||||
core::{WriteBufferError, WriteBufferReading, WriteBufferWriting},
|
||||
};
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
|
@ -27,27 +27,114 @@ pub struct WriteBufferConfig {
|
|||
default_value = "iox-shared"
|
||||
)]
|
||||
pub(crate) topic: String,
|
||||
|
||||
/// Write buffer connection config.
|
||||
///
|
||||
/// The concrete options depend on the write buffer type.
|
||||
///
|
||||
/// Command line arguments are passed as `--write-buffer-connection-config key1=value1 key2=value2` or
|
||||
/// `--write-buffer-connection-config key1=value1,key2=value2`.
|
||||
///
|
||||
/// Environment variables are passed as `key1=value1,key2=value2,...`.
|
||||
#[clap(
|
||||
long = "--write-buffer-connection-config",
|
||||
env = "INFLUXDB_IOX_WRITE_BUFFER_CONNECTION_CONFIG",
|
||||
default_value = "",
|
||||
multiple_values = true,
|
||||
use_delimiter = true
|
||||
)]
|
||||
pub(crate) connection_config: Vec<String>,
|
||||
}
|
||||
|
||||
impl WriteBufferConfig {
|
||||
/// Initialize the [`WriteBufferWriting`].
|
||||
pub async fn init_write_buffer(
|
||||
/// Initialize a [`WriteBufferWriting`].
|
||||
pub async fn writing(
|
||||
&self,
|
||||
metrics: Arc<metric::Registry>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
) -> Result<Arc<dyn WriteBufferWriting>, WriteBufferError> {
|
||||
let write_buffer_config = WriteBufferConnection {
|
||||
let conn = self.conn();
|
||||
let factory = Self::factory(metrics);
|
||||
factory
|
||||
.new_config_write(&self.topic, trace_collector.as_ref(), &conn)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Initialize a [`WriteBufferReading`].
|
||||
pub async fn reading(
|
||||
&self,
|
||||
metrics: Arc<metric::Registry>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
) -> Result<Arc<dyn WriteBufferReading>, WriteBufferError> {
|
||||
let conn = self.conn();
|
||||
let factory = Self::factory(metrics);
|
||||
factory
|
||||
.new_config_read(&self.topic, trace_collector.as_ref(), &conn)
|
||||
.await
|
||||
}
|
||||
|
||||
fn connection_config(&self) -> BTreeMap<String, String> {
|
||||
let mut cfg = BTreeMap::new();
|
||||
|
||||
for s in &self.connection_config {
|
||||
if s.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some((k, v)) = s.split_once('=') {
|
||||
cfg.insert(k.to_owned(), v.to_owned());
|
||||
} else {
|
||||
cfg.insert(s.clone(), String::from(""));
|
||||
}
|
||||
}
|
||||
|
||||
cfg
|
||||
}
|
||||
|
||||
fn conn(&self) -> WriteBufferConnection {
|
||||
WriteBufferConnection {
|
||||
type_: self.type_.clone(),
|
||||
connection: self.connection_string.clone(),
|
||||
connection_config: Default::default(),
|
||||
connection_config: self.connection_config(),
|
||||
creation_config: None,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let write_buffer =
|
||||
WriteBufferConfigFactory::new(Arc::new(SystemProvider::default()), metrics);
|
||||
let write_buffer = write_buffer
|
||||
.new_config_write(&self.topic, trace_collector.as_ref(), &write_buffer_config)
|
||||
.await?;
|
||||
Ok(write_buffer)
|
||||
fn factory(metrics: Arc<metric::Registry>) -> WriteBufferConfigFactory {
|
||||
WriteBufferConfigFactory::new(Arc::new(SystemProvider::default()), metrics)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use clap::StructOpt;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_connection_config() {
|
||||
let cfg = WriteBufferConfig::try_parse_from([
|
||||
"my_binary",
|
||||
"--write-buffer",
|
||||
"kafka",
|
||||
"--write-buffer-addr",
|
||||
"localhost:1234",
|
||||
"--write-buffer-connection-config",
|
||||
"foo=bar",
|
||||
"",
|
||||
"x=",
|
||||
"y",
|
||||
"foo=baz",
|
||||
"so=many=args",
|
||||
])
|
||||
.unwrap();
|
||||
let actual = cfg.connection_config();
|
||||
let expected = BTreeMap::from([
|
||||
(String::from("foo"), String::from("baz")),
|
||||
(String::from("x"), String::from("")),
|
||||
(String::from("y"), String::from("")),
|
||||
(String::from("so"), String::from("many=args")),
|
||||
]);
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,8 +44,9 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
match config.command {
|
||||
Command::Update(update) => {
|
||||
let catalog = update.catalog_dsn.get_catalog("cli").await?;
|
||||
let topics_repo = catalog.kafka_topics();
|
||||
let topic = topics_repo.create_or_get(&update.db_name).await?;
|
||||
let mut txn = catalog.start_transaction().await?;
|
||||
let topic = txn.kafka_topics().create_or_get(&update.db_name).await?;
|
||||
txn.commit().await?;
|
||||
println!("{}", topic.id);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -12,9 +12,9 @@ use crate::{
|
|||
},
|
||||
},
|
||||
};
|
||||
use data_types::write_buffer::WriteBufferConnection;
|
||||
use ingester::{
|
||||
handler::IngestHandlerImpl,
|
||||
lifecycle::LifecycleConfig,
|
||||
server::{grpc::GrpcDelegate, http::HttpDelegate, IngesterServer},
|
||||
};
|
||||
use iox_catalog::interface::KafkaPartition;
|
||||
|
@ -23,9 +23,8 @@ use observability_deps::tracing::*;
|
|||
use std::collections::BTreeMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use thiserror::Error;
|
||||
use time::TimeProvider;
|
||||
use write_buffer::config::WriteBufferConfigFactory;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
|
@ -50,6 +49,9 @@ pub enum Error {
|
|||
#[error("sequencer record not found for partition {0}")]
|
||||
SequencerNotFound(KafkaPartition),
|
||||
|
||||
#[error("error initializing ingester: {0}")]
|
||||
Ingester(#[from] ingester::handler::Error),
|
||||
|
||||
#[error("error initializing write buffer {0}")]
|
||||
WriteBuffer(#[from] write_buffer::core::WriteBufferError),
|
||||
}
|
||||
|
@ -93,6 +95,45 @@ pub struct Config {
|
|||
env = "INFLUXDB_IOX_WRITE_BUFFER_PARTITION_RANGE_END"
|
||||
)]
|
||||
pub write_buffer_partition_range_end: i32,
|
||||
|
||||
/// The ingester will continue to pull data and buffer it from Kafka
|
||||
/// as long as it is below this size. If it hits this size it will pause
|
||||
/// ingest from Kafka until persistence goes below this threshold.
|
||||
#[clap(
|
||||
long = "--pause-ingest-size-bytes",
|
||||
env = "INFLUXDB_IOX_PAUSE_INGEST_SIZE_BYTES"
|
||||
)]
|
||||
pub pause_ingest_size_bytes: usize,
|
||||
|
||||
/// Once the ingester crosses this threshold of data buffered across
|
||||
/// all sequencers, it will pick the largest partitions and persist
|
||||
/// them until it falls below this threshold. An ingester running in
|
||||
/// a steady state is expected to take up this much memory.
|
||||
#[clap(
|
||||
long = "--persist-memory-threshold-bytes",
|
||||
env = "INFLUXDB_IOX_PERSIST_MEMORY_THRESHOLD_BYTES"
|
||||
)]
|
||||
pub persist_memory_threshold_bytes: usize,
|
||||
|
||||
/// If an individual partition crosses this size threshold, it will be persisted.
|
||||
/// The default value is 300MB (in bytes).
|
||||
#[clap(
|
||||
long = "--persist-partition-size-threshold-bytes",
|
||||
env = "INFLUXDB_IOX_PERSIST_PARTITION_SIZE_THRESHOLD_BYTES",
|
||||
default_value = "314572800"
|
||||
)]
|
||||
pub persist_partition_size_threshold_bytes: usize,
|
||||
|
||||
/// If a partition has had data buffered for longer than this period of time
|
||||
/// it will be persisted. This puts an upper bound on how far back the
|
||||
/// ingester may need to read in Kafka on restart or recovery. The default value
|
||||
/// is 30 minutes (in seconds).
|
||||
#[clap(
|
||||
long = "--persist-partition-age-threshold-seconds",
|
||||
env = "INFLUXDB_IOX_PERSIST_PARTITION_AGE_THRESHOLD_SECONDS",
|
||||
default_value = "1800"
|
||||
)]
|
||||
pub persist_partition_age_threshold_seconds: u64,
|
||||
}
|
||||
|
||||
pub async fn command(config: Config) -> Result<()> {
|
||||
|
@ -100,11 +141,12 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
|
||||
let catalog = config.catalog_dsn.get_catalog("ingester").await?;
|
||||
|
||||
let kafka_topic = catalog
|
||||
let mut txn = catalog.start_transaction().await?;
|
||||
let kafka_topic = txn
|
||||
.kafka_topics()
|
||||
.get_by_name(&config.write_buffer_config.topic)
|
||||
.await?
|
||||
.ok_or(Error::KafkaTopicNotFound(config.write_buffer_config.topic))?;
|
||||
.ok_or_else(|| Error::KafkaTopicNotFound(config.write_buffer_config.topic.clone()))?;
|
||||
|
||||
if config.write_buffer_partition_range_start > config.write_buffer_partition_range_end {
|
||||
return Err(Error::KafkaRange);
|
||||
|
@ -122,46 +164,45 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
|
||||
let mut sequencers = BTreeMap::new();
|
||||
for k in kafka_partitions {
|
||||
let s = catalog
|
||||
let s = txn
|
||||
.sequencers()
|
||||
.get_by_topic_id_and_partition(kafka_topic.id, k)
|
||||
.await?
|
||||
.ok_or(Error::SequencerNotFound(k))?;
|
||||
sequencers.insert(k, s);
|
||||
}
|
||||
txn.commit().await?;
|
||||
|
||||
let metric_registry: Arc<metric::Registry> = Default::default();
|
||||
let trace_collector = common_state.trace_collector();
|
||||
let time_provider: Arc<dyn TimeProvider> = Arc::new(time::SystemProvider::new());
|
||||
let write_buffer_factory =
|
||||
WriteBufferConfigFactory::new(Arc::clone(&time_provider), Arc::clone(&metric_registry));
|
||||
|
||||
let write_buffer_cfg = WriteBufferConnection {
|
||||
type_: config.write_buffer_config.type_,
|
||||
connection: config.write_buffer_config.connection_string,
|
||||
connection_config: Default::default(),
|
||||
creation_config: None,
|
||||
};
|
||||
let write_buffer = write_buffer_factory
|
||||
.new_config_read(
|
||||
&kafka_topic.name,
|
||||
trace_collector.as_ref(),
|
||||
&write_buffer_cfg,
|
||||
)
|
||||
let write_buffer = config
|
||||
.write_buffer_config
|
||||
.reading(Arc::clone(&metric_registry), trace_collector.clone())
|
||||
.await?;
|
||||
|
||||
let ingest_handler = Arc::new(IngestHandlerImpl::new(
|
||||
kafka_topic,
|
||||
sequencers,
|
||||
catalog,
|
||||
object_store,
|
||||
write_buffer,
|
||||
&metric_registry,
|
||||
));
|
||||
let lifecycle_config = LifecycleConfig::new(
|
||||
config.pause_ingest_size_bytes,
|
||||
config.persist_memory_threshold_bytes,
|
||||
config.persist_partition_size_threshold_bytes,
|
||||
Duration::from_secs(config.persist_partition_age_threshold_seconds),
|
||||
);
|
||||
let ingest_handler = Arc::new(
|
||||
IngestHandlerImpl::new(
|
||||
lifecycle_config,
|
||||
kafka_topic,
|
||||
sequencers,
|
||||
catalog,
|
||||
object_store,
|
||||
write_buffer,
|
||||
&metric_registry,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
let http = HttpDelegate::new(Arc::clone(&ingest_handler));
|
||||
let grpc = GrpcDelegate::new(ingest_handler);
|
||||
|
||||
let ingester = IngesterServer::new(http, grpc);
|
||||
let ingester = IngesterServer::new(metric_registry, http, grpc);
|
||||
let server_type = Arc::new(IngesterServerType::new(ingester, &common_state));
|
||||
|
||||
info!("starting ingester");
|
||||
|
|
|
@ -111,7 +111,8 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
// This code / auto-creation is for architecture testing purposes only - a
|
||||
// prod deployment would expect namespaces to be explicitly created and this
|
||||
// layer would be removed.
|
||||
let topic_id = catalog
|
||||
let mut txn = catalog.start_transaction().await?;
|
||||
let topic_id = txn
|
||||
.kafka_topics()
|
||||
.get_by_name(&config.write_buffer_config.topic)
|
||||
.await?
|
||||
|
@ -122,7 +123,7 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
&config.write_buffer_config.topic
|
||||
)
|
||||
});
|
||||
let query_id = catalog
|
||||
let query_id = txn
|
||||
.query_pools()
|
||||
.create_or_get(&config.query_pool_name)
|
||||
.await
|
||||
|
@ -133,6 +134,8 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
&config.write_buffer_config.topic, e
|
||||
)
|
||||
});
|
||||
txn.commit().await?;
|
||||
|
||||
let handler_stack = NamespaceAutocreation::new(
|
||||
catalog,
|
||||
ns_cache,
|
||||
|
@ -169,7 +172,7 @@ async fn init_write_buffer(
|
|||
let write_buffer = Arc::new(
|
||||
config
|
||||
.write_buffer_config
|
||||
.init_write_buffer(metrics, trace_collector)
|
||||
.writing(metrics, trace_collector)
|
||||
.await?,
|
||||
);
|
||||
|
||||
|
|
|
@ -102,7 +102,13 @@ async fn load_remote_system_tables(
|
|||
connection: Connection,
|
||||
) -> Result<()> {
|
||||
// all prefixed with "system."
|
||||
let table_names = vec!["chunks", "chunk_columns", "columns", "operations"];
|
||||
let table_names = vec![
|
||||
"chunks",
|
||||
"chunk_columns",
|
||||
"columns",
|
||||
"operations",
|
||||
"queries",
|
||||
];
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
|
|
|
@ -1,19 +1,35 @@
|
|||
pub(crate) mod request;
|
||||
pub(crate) mod response;
|
||||
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use tonic::Status;
|
||||
|
||||
use generated_types::Predicate;
|
||||
use influxdb_storage_client::{connection::Connection, Client, OrgAndBucket};
|
||||
use influxrpc_parser::predicate;
|
||||
use time;
|
||||
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Unable to parse timestamp '{:?}'", t))]
|
||||
TimestampParseError { t: String },
|
||||
pub enum ParseError {
|
||||
#[snafu(display("unable to parse timestamp '{:?}'", t))]
|
||||
Timestamp { t: String },
|
||||
|
||||
#[snafu(display("Unable to parse predicate: {:?}", source))]
|
||||
PredicateParseError { source: predicate::Error },
|
||||
#[snafu(display("unable to parse database name '{:?}'", db_name))]
|
||||
DBName { db_name: String },
|
||||
|
||||
#[snafu(display("unable to parse predicate: {:?}", source))]
|
||||
Predicate { source: predicate::Error },
|
||||
|
||||
#[snafu(display("server error: {:?}", source))]
|
||||
ServerError { source: Status },
|
||||
|
||||
#[snafu(display("error building response: {:?}", source))]
|
||||
ResponseError { source: response::Error },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
pub type Result<T, E = ParseError> = std::result::Result<T, E>;
|
||||
|
||||
/// Craft and submit different types of storage read requests
|
||||
#[derive(Debug, clap::Parser)]
|
||||
|
@ -21,6 +37,10 @@ pub struct Config {
|
|||
#[clap(subcommand)]
|
||||
command: Command,
|
||||
|
||||
/// The name of the database
|
||||
#[clap(parse(try_from_str = parse_db_name))]
|
||||
db_name: OrgAndBucket,
|
||||
|
||||
/// The requested start time (inclusive) of the time-range (also accepts RFC3339 format).
|
||||
#[clap(long, default_value = "-9223372036854775806", parse(try_from_str = parse_range))]
|
||||
start: i64,
|
||||
|
@ -37,12 +57,12 @@ pub struct Config {
|
|||
// Attempts to parse either a stringified `i64` value. or alternatively parse an
|
||||
// RFC3339 formatted timestamp into an `i64` value representing nanoseconds
|
||||
// since the epoch.
|
||||
fn parse_range(s: &str) -> Result<i64, Error> {
|
||||
fn parse_range(s: &str) -> Result<i64, ParseError> {
|
||||
match s.parse::<i64>() {
|
||||
Ok(v) => Ok(v),
|
||||
Err(_) => {
|
||||
// try to parse timestamp
|
||||
let t = time::Time::from_rfc3339(s).or_else(|_| TimestampParseSnafu { t: s }.fail())?;
|
||||
let t = time::Time::from_rfc3339(s).or_else(|_| TimestampSnafu { t: s }.fail())?;
|
||||
Ok(t.timestamp_nanos())
|
||||
}
|
||||
}
|
||||
|
@ -50,30 +70,90 @@ fn parse_range(s: &str) -> Result<i64, Error> {
|
|||
|
||||
// Attempts to parse the optional predicate into an `Predicate` RPC node. This
|
||||
// node is then used as part of a read request.
|
||||
fn parse_predicate(expr: &str) -> Result<Predicate, Error> {
|
||||
fn parse_predicate(expr: &str) -> Result<Predicate, ParseError> {
|
||||
if expr.is_empty() {
|
||||
return Ok(Predicate::default());
|
||||
}
|
||||
|
||||
predicate::expr_to_rpc_predicate(expr).context(PredicateParseSnafu)
|
||||
predicate::expr_to_rpc_predicate(expr).context(PredicateSnafu)
|
||||
}
|
||||
|
||||
// Attempts to parse the database name into and org and bucket ID.
|
||||
fn parse_db_name(db_name: &str) -> Result<OrgAndBucket, ParseError> {
|
||||
let parts = db_name.split('_').collect::<Vec<_>>();
|
||||
if parts.len() != 2 {
|
||||
return DBNameSnafu {
|
||||
db_name: db_name.to_owned(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
let org_id = usize::from_str_radix(parts[0], 16).map_err(|_| ParseError::DBName {
|
||||
db_name: db_name.to_owned(),
|
||||
})?;
|
||||
|
||||
let bucket_id = usize::from_str_radix(parts[1], 16).map_err(|_| ParseError::DBName {
|
||||
db_name: db_name.to_owned(),
|
||||
})?;
|
||||
|
||||
Ok(OrgAndBucket::new(
|
||||
NonZeroU64::new(org_id as u64).ok_or_else(|| ParseError::DBName {
|
||||
db_name: db_name.to_owned(),
|
||||
})?,
|
||||
NonZeroU64::new(bucket_id as u64).ok_or_else(|| ParseError::DBName {
|
||||
db_name: db_name.to_owned(),
|
||||
})?,
|
||||
))
|
||||
}
|
||||
|
||||
/// All possible subcommands for storage
|
||||
#[derive(Debug, clap::Parser)]
|
||||
enum Command {
|
||||
/// Issue a read_filter request
|
||||
ReadFilter(ReadFilter),
|
||||
ReadFilter,
|
||||
TagValues(TagValues),
|
||||
}
|
||||
|
||||
/// Create a new database
|
||||
#[derive(Debug, clap::Parser)]
|
||||
struct ReadFilter {}
|
||||
struct TagValues {
|
||||
// The tag key value to interrogate for tag values.
|
||||
tag_key: String,
|
||||
}
|
||||
|
||||
/// Create and issue read request
|
||||
pub async fn command(config: Config) -> Result<()> {
|
||||
// TODO(edd): handle command/config and execute request
|
||||
println!("Unimplemented: config is {:?}", config);
|
||||
Ok(())
|
||||
pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
||||
let mut client = influxdb_storage_client::Client::new(connection);
|
||||
|
||||
// convert predicate with no root node into None.
|
||||
let predicate = config.predicate.root.is_some().then(|| config.predicate);
|
||||
|
||||
let source = Client::read_source(&config.db_name, 0);
|
||||
match config.command {
|
||||
Command::ReadFilter => {
|
||||
let result = client
|
||||
.read_filter(request::read_filter(
|
||||
source,
|
||||
config.start,
|
||||
config.stop,
|
||||
predicate,
|
||||
))
|
||||
.await
|
||||
.context(ServerSnafu)?;
|
||||
response::pretty_print_frames(&result).context(ResponseSnafu)
|
||||
}
|
||||
Command::TagValues(tv) => {
|
||||
let result = client
|
||||
.tag_values(request::tag_values(
|
||||
source,
|
||||
config.start,
|
||||
config.stop,
|
||||
predicate,
|
||||
tv.tag_key,
|
||||
))
|
||||
.await
|
||||
.context(ServerSnafu)?;
|
||||
response::pretty_print_strings(result).context(ResponseSnafu)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
pub mod generated_types {
|
||||
pub use generated_types::influxdata::platform::storage::*;
|
||||
}
|
||||
|
||||
use self::generated_types::*;
|
||||
use super::response::{
|
||||
tag_key_is_field, tag_key_is_measurement, FIELD_TAG_KEY_BIN, MEASUREMENT_TAG_KEY_BIN,
|
||||
};
|
||||
use ::generated_types::google::protobuf::*;
|
||||
|
||||
pub fn read_filter(
|
||||
org_bucket: Any,
|
||||
start: i64,
|
||||
stop: i64,
|
||||
predicate: std::option::Option<Predicate>,
|
||||
) -> ReadFilterRequest {
|
||||
generated_types::ReadFilterRequest {
|
||||
predicate,
|
||||
read_source: Some(org_bucket),
|
||||
range: Some(TimestampRange { start, end: stop }),
|
||||
key_sort: read_filter_request::KeySort::Unspecified as i32, // IOx doesn't support any other sort
|
||||
tag_key_meta_names: TagKeyMetaNames::Text as i32,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tag_values(
|
||||
org_bucket: Any,
|
||||
start: i64,
|
||||
stop: i64,
|
||||
predicate: std::option::Option<Predicate>,
|
||||
tag_key: String,
|
||||
) -> TagValuesRequest {
|
||||
let tag_key = if tag_key_is_measurement(tag_key.as_bytes()) {
|
||||
MEASUREMENT_TAG_KEY_BIN.to_vec()
|
||||
} else if tag_key_is_field(tag_key.as_bytes()) {
|
||||
FIELD_TAG_KEY_BIN.to_vec()
|
||||
} else {
|
||||
tag_key.as_bytes().to_vec()
|
||||
};
|
||||
|
||||
generated_types::TagValuesRequest {
|
||||
predicate,
|
||||
tags_source: Some(org_bucket),
|
||||
range: Some(TimestampRange { start, end: stop }),
|
||||
tag_key,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO Add the following helpers for building requests:
|
||||
//
|
||||
// * read_group
|
||||
// * read_window_aggregate
|
||||
// * tag_keys
|
||||
// * tag_values_with_measurement_and_key
|
||||
// * measurement_names
|
||||
// * measurement_tag_keys
|
||||
// * measurement_tag_values
|
||||
// * measurement_fields
|
|
@ -0,0 +1,805 @@
|
|||
use arrow::{record_batch::RecordBatch, util::pretty::print_batches};
|
||||
use hashbrown::HashMap;
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
iter,
|
||||
string::FromUtf8Error,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use generated_types::{
|
||||
read_response::{frame::Data, DataType, SeriesFrame},
|
||||
Tag,
|
||||
};
|
||||
use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("arrow error: {:?}", source))]
|
||||
Arrow { source: arrow::error::ArrowError },
|
||||
|
||||
#[snafu(display("frame type currently unsupported: {:?}", frame))]
|
||||
UnsupportedFrameType { frame: String },
|
||||
|
||||
#[snafu(display("tag keys must be valid UTF-8: {:?}", source))]
|
||||
InvalidTagKey { source: FromUtf8Error },
|
||||
|
||||
#[snafu(display("tag values must be valid UTF-8: {:?}", source))]
|
||||
InvalidTagValue { source: FromUtf8Error },
|
||||
|
||||
#[snafu(display("measurement name must be valid UTF-8: {:?}", source))]
|
||||
InvalidMeasurementName { source: FromUtf8Error },
|
||||
|
||||
#[snafu(display("unable to build schema: {:?}", source))]
|
||||
SchemaBuilding { source: schema::builder::Error },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
// Prints the provided data frames in a tabular format grouped into tables per
|
||||
// distinct measurement.
|
||||
pub fn pretty_print_frames(frames: &[Data]) -> Result<()> {
|
||||
let rbs = frames_to_record_batches(frames)?;
|
||||
for (k, rb) in rbs {
|
||||
println!("\n_measurement: {}", k);
|
||||
println!("rows: {:?}", &rb.num_rows());
|
||||
print_batches(&[rb]).context(ArrowSnafu)?;
|
||||
println!("\n");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Prints the provided set of strings in a tabular format grouped.
|
||||
pub fn pretty_print_strings(values: Vec<String>) -> Result<()> {
|
||||
let schema = SchemaBuilder::new()
|
||||
.influx_field("values", InfluxFieldType::String)
|
||||
.build()
|
||||
.context(SchemaBuildingSnafu)?;
|
||||
|
||||
let arrow_schema: arrow::datatypes::SchemaRef = schema.into();
|
||||
let rb_columns: Vec<Arc<dyn arrow::array::Array>> =
|
||||
vec![Arc::new(arrow::array::StringArray::from(
|
||||
values.iter().map(|x| Some(x.as_str())).collect::<Vec<_>>(),
|
||||
))];
|
||||
|
||||
let rb = RecordBatch::try_new(arrow_schema, rb_columns).context(ArrowSnafu)?;
|
||||
|
||||
println!("\ntag values: {:?}", &rb.num_rows());
|
||||
print_batches(&[rb]).context(ArrowSnafu)?;
|
||||
println!("\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// This function takes a set of InfluxRPC data frames and converts them into an
|
||||
// Arrow record batches, which are suitable for pretty printing.
|
||||
fn frames_to_record_batches(frames: &[Data]) -> Result<BTreeMap<String, RecordBatch>> {
|
||||
// Run through all the frames once to build the schema of each table we need
|
||||
// to build as a record batch.
|
||||
let mut table_column_mapping = determine_tag_columns(frames);
|
||||
|
||||
let mut all_tables = BTreeMap::new();
|
||||
let mut current_table_frame: Option<(IntermediateTable, SeriesFrame)> = None;
|
||||
|
||||
if frames.is_empty() {
|
||||
return Ok(all_tables);
|
||||
}
|
||||
|
||||
for frame in frames {
|
||||
match frame {
|
||||
generated_types::read_response::frame::Data::Group(_) => {
|
||||
return UnsupportedFrameTypeSnafu {
|
||||
frame: "group_frame".to_owned(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
generated_types::read_response::frame::Data::Series(sf) => {
|
||||
let cur_frame_measurement = &sf.tags[0].value;
|
||||
|
||||
// First series frame in result set.
|
||||
if current_table_frame.is_none() {
|
||||
let table = IntermediateTable::try_new(
|
||||
table_column_mapping
|
||||
.remove(cur_frame_measurement)
|
||||
.expect("table column mappings exists for measurement"),
|
||||
)?;
|
||||
|
||||
current_table_frame = Some((table, sf.clone()));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Subsequent series frames in results.
|
||||
let (mut current_table, prev_series_frame) = current_table_frame.take().unwrap();
|
||||
|
||||
// Series frame has moved on to a different measurement. Push
|
||||
// this table into a record batch and onto final results, then
|
||||
// create a new table.
|
||||
if measurement(&prev_series_frame) != cur_frame_measurement {
|
||||
let rb: RecordBatch = current_table.try_into()?;
|
||||
all_tables.insert(
|
||||
String::from_utf8(measurement(&prev_series_frame).to_owned())
|
||||
.context(InvalidMeasurementNameSnafu)?,
|
||||
rb,
|
||||
);
|
||||
|
||||
// Initialise next intermediate table to fill.
|
||||
current_table = IntermediateTable::try_new(
|
||||
table_column_mapping
|
||||
.remove(cur_frame_measurement)
|
||||
.expect("table column mappings exists for measurement"),
|
||||
)?;
|
||||
}
|
||||
|
||||
// Put current table (which may have been replaced with a new
|
||||
// table if _measurement has changed) and series frame back. The
|
||||
// field key can change on each series frame, so it's important
|
||||
// to update it each time we see a new series frame, so that the
|
||||
// value frames know where to push their data.
|
||||
current_table_frame = Some((current_table, sf.clone()));
|
||||
|
||||
// no new column values written so no need to pad.
|
||||
continue;
|
||||
}
|
||||
generated_types::read_response::frame::Data::FloatPoints(f) => {
|
||||
// Get field key associated with previous series frame.
|
||||
let (current_table, prev_series_frame) = current_table_frame.as_mut().unwrap();
|
||||
let column = current_table.field_column(field_name(prev_series_frame));
|
||||
|
||||
let values = f.values.iter().copied().map(Some).collect::<Vec<_>>();
|
||||
column.extend_f64(&values);
|
||||
|
||||
let time_column = &mut current_table.time_column;
|
||||
time_column.extend_from_slice(&f.timestamps);
|
||||
}
|
||||
generated_types::read_response::frame::Data::IntegerPoints(f) => {
|
||||
// Get field key associated with previous series frame.
|
||||
let (current_table, prev_series_frame) = current_table_frame.as_mut().unwrap();
|
||||
let column = current_table.field_column(field_name(prev_series_frame));
|
||||
|
||||
let values = f.values.iter().copied().map(Some).collect::<Vec<_>>();
|
||||
column.extend_i64(&values);
|
||||
|
||||
let time_column = &mut current_table.time_column;
|
||||
time_column.extend_from_slice(&f.timestamps);
|
||||
}
|
||||
generated_types::read_response::frame::Data::UnsignedPoints(f) => {
|
||||
// Get field key associated with previous series frame.
|
||||
let (current_table, prev_series_frame) = current_table_frame.as_mut().unwrap();
|
||||
let column = current_table.field_column(field_name(prev_series_frame));
|
||||
|
||||
let values = f.values.iter().copied().map(Some).collect::<Vec<_>>();
|
||||
column.extend_u64(&values);
|
||||
|
||||
let time_column = &mut current_table.time_column;
|
||||
time_column.extend_from_slice(&f.timestamps);
|
||||
}
|
||||
generated_types::read_response::frame::Data::BooleanPoints(f) => {
|
||||
// Get field key associated with previous series frame.
|
||||
let (current_table, prev_series_frame) = current_table_frame.as_mut().unwrap();
|
||||
let column = current_table.field_column(field_name(prev_series_frame));
|
||||
|
||||
let values = f.values.iter().copied().map(Some).collect::<Vec<_>>();
|
||||
column.extend_bool(&values);
|
||||
|
||||
let time_column = &mut current_table.time_column;
|
||||
time_column.extend_from_slice(&f.timestamps);
|
||||
}
|
||||
generated_types::read_response::frame::Data::StringPoints(f) => {
|
||||
// Get field key associated with previous series frame.
|
||||
let (current_table, prev_series_frame) = current_table_frame.as_mut().unwrap();
|
||||
let column = current_table.field_column(field_name(prev_series_frame));
|
||||
|
||||
let values = f
|
||||
.values
|
||||
.iter()
|
||||
.map(|x| Some(x.to_owned()))
|
||||
.collect::<Vec<_>>();
|
||||
column.extend_string(&values);
|
||||
|
||||
let time_column = &mut current_table.time_column;
|
||||
time_column.extend_from_slice(&f.timestamps);
|
||||
}
|
||||
};
|
||||
|
||||
// If the current frame contained field values/timestamps then we need
|
||||
// pad all the other columns with either values or NULL so that all
|
||||
// columns remain the same length.
|
||||
//
|
||||
let (current_table, prev_series_frame) = current_table_frame.as_mut().unwrap();
|
||||
let max_rows = current_table.max_rows();
|
||||
|
||||
// Pad all tag columns with keys present in the previous series frame
|
||||
// with identical values.
|
||||
for Tag { key, value } in &prev_series_frame.tags {
|
||||
if tag_key_is_measurement(key) || tag_key_is_field(key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let idx = current_table
|
||||
.tag_columns
|
||||
.get(key)
|
||||
.expect("tag column mapping to be present");
|
||||
|
||||
let column = &mut current_table.column_data[*idx];
|
||||
let column_rows = column.len();
|
||||
assert!(max_rows >= column_rows);
|
||||
column.pad_tag(
|
||||
String::from_utf8(value.to_owned()).context(InvalidTagValueSnafu)?,
|
||||
max_rows - column_rows,
|
||||
);
|
||||
}
|
||||
|
||||
// Pad all tag columns that were not present in the previous series
|
||||
// frame with NULL.
|
||||
for (_, &idx) in ¤t_table.tag_columns {
|
||||
let column = &mut current_table.column_data[idx];
|
||||
let column_rows = column.len();
|
||||
if column_rows < max_rows {
|
||||
column.pad_none(max_rows - column_rows);
|
||||
}
|
||||
}
|
||||
|
||||
// Pad all field columns with NULL such that they're the same length as
|
||||
// the largest column.
|
||||
for (_, &idx) in ¤t_table.field_columns {
|
||||
let column = &mut current_table.column_data[idx];
|
||||
let column_rows = column.len();
|
||||
if column_rows < max_rows {
|
||||
column.pad_none(max_rows - column_rows);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert and insert current table
|
||||
let (current_table, prev_series_frame) = current_table_frame.take().unwrap();
|
||||
let rb: RecordBatch = current_table.try_into()?;
|
||||
all_tables.insert(
|
||||
String::from_utf8(measurement(&prev_series_frame).to_owned())
|
||||
.context(InvalidMeasurementNameSnafu)?,
|
||||
rb,
|
||||
);
|
||||
|
||||
Ok(all_tables)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ColumnData {
|
||||
Float(Vec<Option<f64>>),
|
||||
Integer(Vec<Option<i64>>),
|
||||
Unsigned(Vec<Option<u64>>),
|
||||
Boolean(Vec<Option<bool>>),
|
||||
String(Vec<Option<String>>),
|
||||
Tag(Vec<Option<String>>),
|
||||
}
|
||||
|
||||
impl ColumnData {
|
||||
fn pad_tag(&mut self, value: String, additional: usize) {
|
||||
if let Self::Tag(data) = self {
|
||||
data.extend(iter::repeat(Some(value)).take(additional));
|
||||
} else {
|
||||
unreachable!("can't pad strings into {:?} column", self)
|
||||
}
|
||||
}
|
||||
|
||||
fn pad_none(&mut self, additional: usize) {
|
||||
match self {
|
||||
ColumnData::Float(data) => data.extend(iter::repeat(None).take(additional)),
|
||||
ColumnData::Integer(data) => data.extend(iter::repeat(None).take(additional)),
|
||||
ColumnData::Unsigned(data) => data.extend(iter::repeat(None).take(additional)),
|
||||
ColumnData::Boolean(data) => data.extend(iter::repeat(None).take(additional)),
|
||||
ColumnData::String(data) => data.extend(iter::repeat(None).take(additional)),
|
||||
ColumnData::Tag(data) => data.extend(iter::repeat(None).take(additional)),
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_f64(&mut self, arr: &[Option<f64>]) {
|
||||
if let Self::Float(data) = self {
|
||||
data.extend_from_slice(arr);
|
||||
} else {
|
||||
unreachable!("can't extend {:?} column with floats", self)
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_i64(&mut self, arr: &[Option<i64>]) {
|
||||
if let Self::Integer(data) = self {
|
||||
data.extend_from_slice(arr);
|
||||
} else {
|
||||
unreachable!("can't extend {:?} column with integers", self)
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_u64(&mut self, arr: &[Option<u64>]) {
|
||||
if let Self::Unsigned(data) = self {
|
||||
data.extend_from_slice(arr);
|
||||
} else {
|
||||
unreachable!("can't extend {:?} column with unsigned integers", self)
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_bool(&mut self, arr: &[Option<bool>]) {
|
||||
if let Self::Boolean(data) = self {
|
||||
data.extend_from_slice(arr);
|
||||
} else {
|
||||
unreachable!("can't extend {:?} column with bools", self)
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_string(&mut self, arr: &[Option<String>]) {
|
||||
if let Self::String(data) = self {
|
||||
data.extend_from_slice(arr);
|
||||
} else {
|
||||
unreachable!("can't extend {:?} column with strings", self)
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
match self {
|
||||
ColumnData::Float(arr) => arr.len(),
|
||||
ColumnData::Integer(arr) => arr.len(),
|
||||
ColumnData::Unsigned(arr) => arr.len(),
|
||||
ColumnData::Boolean(arr) => arr.len(),
|
||||
ColumnData::String(arr) => arr.len(),
|
||||
ColumnData::Tag(arr) => arr.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct IntermediateTable {
|
||||
schema: Schema,
|
||||
|
||||
// constant-time access to the correct column from a tag or field key
|
||||
tag_columns: HashMap<Vec<u8>, usize>,
|
||||
field_columns: HashMap<Vec<u8>, usize>,
|
||||
|
||||
column_data: Vec<ColumnData>,
|
||||
time_column: Vec<i64>,
|
||||
}
|
||||
|
||||
impl IntermediateTable {
|
||||
fn try_new(table_columns: TableColumns) -> Result<Self, Error> {
|
||||
let mut schema_builder = SchemaBuilder::new();
|
||||
let mut tag_columns = HashMap::new();
|
||||
let mut field_columns = HashMap::new();
|
||||
let mut column_data = vec![];
|
||||
|
||||
// First add the tag columns to the schema and column data.
|
||||
for tag_key in table_columns.tag_columns {
|
||||
let column_name = String::from_utf8(tag_key.clone()).context(InvalidTagKeySnafu)?;
|
||||
schema_builder.influx_column(&column_name, InfluxColumnType::Tag);
|
||||
|
||||
// track position of column
|
||||
tag_columns.insert(tag_key, column_data.len());
|
||||
column_data.push(ColumnData::Tag(vec![]));
|
||||
}
|
||||
|
||||
// Then add the field columns to the schema and column data.
|
||||
for (field_key, data_type) in table_columns.field_columns {
|
||||
let column_name = String::from_utf8(field_key.clone()).context(InvalidTagKeySnafu)?;
|
||||
schema_builder.influx_column(
|
||||
&column_name,
|
||||
InfluxColumnType::Field(match data_type {
|
||||
DataType::Float => InfluxFieldType::Float,
|
||||
DataType::Integer => InfluxFieldType::Integer,
|
||||
DataType::Unsigned => InfluxFieldType::UInteger,
|
||||
DataType::Boolean => InfluxFieldType::Boolean,
|
||||
DataType::String => InfluxFieldType::String,
|
||||
}),
|
||||
);
|
||||
|
||||
// track position of column
|
||||
field_columns.insert(field_key, column_data.len());
|
||||
column_data.push(match data_type {
|
||||
DataType::Float => ColumnData::Float(vec![]),
|
||||
DataType::Integer => ColumnData::Integer(vec![]),
|
||||
DataType::Unsigned => ColumnData::Unsigned(vec![]),
|
||||
DataType::Boolean => ColumnData::Boolean(vec![]),
|
||||
DataType::String => ColumnData::String(vec![]),
|
||||
});
|
||||
}
|
||||
|
||||
// Finally add the timestamp column.
|
||||
schema_builder.influx_column("time", InfluxColumnType::Timestamp);
|
||||
let time_column = vec![];
|
||||
|
||||
Ok(Self {
|
||||
schema: schema_builder.build().context(SchemaBuildingSnafu)?,
|
||||
tag_columns,
|
||||
field_columns,
|
||||
column_data,
|
||||
time_column,
|
||||
})
|
||||
}
|
||||
|
||||
fn field_column(&mut self, field: &[u8]) -> &mut ColumnData {
|
||||
let idx = self
|
||||
.field_columns
|
||||
.get(field)
|
||||
.expect("field column mapping to be present");
|
||||
|
||||
&mut self.column_data[*idx]
|
||||
}
|
||||
|
||||
// Returns the number of rows in the largest column. Useful for padding the
|
||||
// rest of the columns out.
|
||||
fn max_rows(&self) -> usize {
|
||||
self.column_data
|
||||
.iter()
|
||||
.map(|c| c.len())
|
||||
.max()
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<IntermediateTable> for RecordBatch {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(table: IntermediateTable) -> Result<Self, Self::Error> {
|
||||
let arrow_schema: arrow::datatypes::SchemaRef = table.schema.into();
|
||||
|
||||
let mut rb_columns: Vec<Arc<dyn arrow::array::Array>> =
|
||||
Vec::with_capacity(&table.column_data.len() + 1); // + time column
|
||||
|
||||
for col in table.column_data {
|
||||
match col {
|
||||
ColumnData::Integer(v) => {
|
||||
rb_columns.push(Arc::new(arrow::array::Int64Array::from(v)));
|
||||
}
|
||||
ColumnData::Unsigned(v) => {
|
||||
rb_columns.push(Arc::new(arrow::array::UInt64Array::from(v)));
|
||||
}
|
||||
ColumnData::Float(v) => {
|
||||
rb_columns.push(Arc::new(arrow::array::Float64Array::from(v)));
|
||||
}
|
||||
ColumnData::String(v) => {
|
||||
rb_columns.push(Arc::new(arrow::array::StringArray::from(
|
||||
v.iter().map(|s| s.as_deref()).collect::<Vec<_>>(),
|
||||
)));
|
||||
}
|
||||
ColumnData::Boolean(v) => {
|
||||
rb_columns.push(Arc::new(arrow::array::BooleanArray::from(v)));
|
||||
}
|
||||
ColumnData::Tag(v) => {
|
||||
rb_columns.push(Arc::new(arrow::array::DictionaryArray::<
|
||||
arrow::datatypes::Int32Type,
|
||||
>::from_iter(
|
||||
v.iter().map(|s| s.as_deref())
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// time column
|
||||
rb_columns.push(Arc::new(arrow::array::TimestampNanosecondArray::from(
|
||||
table.time_column,
|
||||
)));
|
||||
|
||||
Self::try_new(arrow_schema, rb_columns).context(ArrowSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
// These constants describe known values for the keys associated with
|
||||
// measurements and fields.
|
||||
const MEASUREMENT_TAG_KEY_TEXT: [u8; 12] = [
|
||||
b'_', b'm', b'e', b'a', b's', b'u', b'r', b'e', b'm', b'e', b'n', b't',
|
||||
];
|
||||
pub(crate) const MEASUREMENT_TAG_KEY_BIN: [u8; 1] = [0_u8];
|
||||
const FIELD_TAG_KEY_TEXT: [u8; 6] = [b'_', b'f', b'i', b'e', b'l', b'd'];
|
||||
pub(crate) const FIELD_TAG_KEY_BIN: [u8; 1] = [255_u8];
|
||||
|
||||
// Store a collection of column names and types for a single table (measurement).
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
struct TableColumns {
|
||||
tag_columns: BTreeSet<Vec<u8>>,
|
||||
field_columns: BTreeMap<Vec<u8>, DataType>,
|
||||
}
|
||||
|
||||
// Given a set of data frames determine from the series frames within the set
|
||||
// of tag columns for each distinct table (measurement).
|
||||
fn determine_tag_columns(frames: &[Data]) -> BTreeMap<Vec<u8>, TableColumns> {
|
||||
let mut schema: BTreeMap<Vec<u8>, TableColumns> = BTreeMap::new();
|
||||
for frame in frames {
|
||||
if let Data::Series(sf) = frame {
|
||||
assert!(!sf.tags.is_empty(), "expected _measurement and _field tags");
|
||||
|
||||
assert!(tag_key_is_measurement(&sf.tags[0].key));
|
||||
// PERF: avoid clone of value
|
||||
let measurement_name = sf.tags[0].value.clone();
|
||||
let table = schema.entry(measurement_name).or_default();
|
||||
|
||||
for Tag { key, value } in sf.tags.iter().skip(1) {
|
||||
if tag_key_is_field(key) {
|
||||
table.field_columns.insert(value.clone(), sf.data_type());
|
||||
continue;
|
||||
}
|
||||
|
||||
// PERF: avoid clone of key
|
||||
table.tag_columns.insert(key.clone()); // Add column to table schema
|
||||
}
|
||||
}
|
||||
}
|
||||
schema
|
||||
}
|
||||
|
||||
// Extract a reference to the measurement name from a Series frame.
|
||||
fn measurement(frame: &SeriesFrame) -> &Vec<u8> {
|
||||
assert!(tag_key_is_measurement(&frame.tags[0].key));
|
||||
&frame.tags[0].value
|
||||
}
|
||||
|
||||
// Extract a reference to the field name from a Series frame.
|
||||
fn field_name(frame: &SeriesFrame) -> &Vec<u8> {
|
||||
let idx = frame.tags.len() - 1;
|
||||
assert!(tag_key_is_field(&frame.tags[idx].key));
|
||||
&frame.tags[idx].value
|
||||
}
|
||||
|
||||
pub(crate) fn tag_key_is_measurement(key: &[u8]) -> bool {
|
||||
(key == MEASUREMENT_TAG_KEY_TEXT) || (key == MEASUREMENT_TAG_KEY_BIN)
|
||||
}
|
||||
|
||||
pub(crate) fn tag_key_is_field(key: &[u8]) -> bool {
|
||||
(key == FIELD_TAG_KEY_TEXT) || (key == FIELD_TAG_KEY_BIN)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_super {
|
||||
use arrow::util::pretty::pretty_format_batches;
|
||||
use generated_types::read_response::{
|
||||
BooleanPointsFrame, FloatPointsFrame, IntegerPointsFrame, SeriesFrame, StringPointsFrame,
|
||||
UnsignedPointsFrame,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
// converts a vector of key/value pairs into a vector of `Tag`.
|
||||
fn make_tags(pairs: &[(&str, &str)]) -> Vec<Tag> {
|
||||
pairs
|
||||
.iter()
|
||||
.map(|(key, value)| Tag {
|
||||
key: key.as_bytes().to_vec(),
|
||||
value: value.as_bytes().to_vec(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
struct TableColumnInput<'a> {
|
||||
measurement: &'a str,
|
||||
tags: &'a [&'a str],
|
||||
fields: &'a [(&'a str, DataType)],
|
||||
}
|
||||
|
||||
impl<'a> TableColumnInput<'a> {
|
||||
fn new(measurement: &'a str, tags: &'a [&str], fields: &'a [(&str, DataType)]) -> Self {
|
||||
Self {
|
||||
measurement,
|
||||
tags,
|
||||
fields,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// converts a vector of key/value tag pairs and a field datatype into a
|
||||
// collection of `TableColumns` objects.
|
||||
fn make_table_columns(input: &'_ [TableColumnInput<'_>]) -> BTreeMap<Vec<u8>, TableColumns> {
|
||||
let mut all_table_columns = BTreeMap::new();
|
||||
for TableColumnInput {
|
||||
measurement,
|
||||
tags,
|
||||
fields,
|
||||
} in input
|
||||
{
|
||||
let tag_columns = tags
|
||||
.iter()
|
||||
.map(|c| c.as_bytes().to_vec())
|
||||
.collect::<Vec<Vec<u8>>>();
|
||||
|
||||
let mut tag_columns_set = BTreeSet::new();
|
||||
for c in tag_columns {
|
||||
tag_columns_set.insert(c);
|
||||
}
|
||||
|
||||
let mut field_columns = BTreeMap::new();
|
||||
for (field, data_type) in *fields {
|
||||
field_columns.insert(field.as_bytes().to_vec(), *data_type);
|
||||
}
|
||||
|
||||
let table_columns = TableColumns {
|
||||
tag_columns: tag_columns_set,
|
||||
field_columns,
|
||||
};
|
||||
|
||||
all_table_columns.insert(measurement.as_bytes().to_vec(), table_columns);
|
||||
}
|
||||
all_table_columns
|
||||
}
|
||||
|
||||
// generate a substantial set of frames across multiple tables.
|
||||
fn gen_frames() -> Vec<Data> {
|
||||
vec![
|
||||
Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[
|
||||
("_measurement", "cpu"),
|
||||
("host", "foo"),
|
||||
("server", "a"),
|
||||
("_field", "temp"),
|
||||
]),
|
||||
data_type: DataType::Float as i32,
|
||||
}),
|
||||
Data::FloatPoints(FloatPointsFrame {
|
||||
timestamps: vec![1, 2, 3, 4],
|
||||
values: vec![1.1, 2.2, 3.3, 4.4],
|
||||
}),
|
||||
Data::FloatPoints(FloatPointsFrame {
|
||||
timestamps: vec![5, 6, 7, 10],
|
||||
values: vec![5.1, 5.2, 5.3, 10.4],
|
||||
}),
|
||||
Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[
|
||||
("_measurement", "cpu"),
|
||||
("host", "foo"),
|
||||
("server", "a"),
|
||||
("_field", "voltage"),
|
||||
]),
|
||||
data_type: DataType::Integer as i32,
|
||||
}),
|
||||
Data::IntegerPoints(IntegerPointsFrame {
|
||||
timestamps: vec![1, 2],
|
||||
values: vec![22, 22],
|
||||
}),
|
||||
Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[
|
||||
("_measurement", "cpu"),
|
||||
("host", "foo"),
|
||||
("new_column", "a"),
|
||||
("_field", "voltage"),
|
||||
]),
|
||||
data_type: DataType::Integer as i32,
|
||||
}),
|
||||
Data::IntegerPoints(IntegerPointsFrame {
|
||||
timestamps: vec![100, 200],
|
||||
values: vec![1000, 2000],
|
||||
}),
|
||||
Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[("_measurement", "another table"), ("_field", "voltage")]),
|
||||
data_type: DataType::String as i32,
|
||||
}),
|
||||
Data::StringPoints(StringPointsFrame {
|
||||
timestamps: vec![200, 201],
|
||||
values: vec!["hello".to_string(), "abc".to_string()],
|
||||
}),
|
||||
Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[
|
||||
("_measurement", "another table"),
|
||||
("region", "west"),
|
||||
("_field", "voltage"),
|
||||
]),
|
||||
data_type: DataType::String as i32,
|
||||
}),
|
||||
Data::StringPoints(StringPointsFrame {
|
||||
timestamps: vec![302, 304],
|
||||
values: vec!["foo".to_string(), "bar".to_string()],
|
||||
}),
|
||||
Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[
|
||||
("_measurement", "another table"),
|
||||
("region", "north"),
|
||||
("_field", "bool_field"),
|
||||
]),
|
||||
data_type: DataType::Boolean as i32,
|
||||
}),
|
||||
Data::BooleanPoints(BooleanPointsFrame {
|
||||
timestamps: vec![1000],
|
||||
values: vec![true],
|
||||
}),
|
||||
Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[
|
||||
("_measurement", "another table"),
|
||||
("region", "south"),
|
||||
("_field", "unsigned_field"),
|
||||
]),
|
||||
data_type: DataType::Unsigned as i32,
|
||||
}),
|
||||
Data::UnsignedPoints(UnsignedPointsFrame {
|
||||
timestamps: vec![2000],
|
||||
values: vec![600],
|
||||
}),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_determine_tag_columns() {
|
||||
assert!(determine_tag_columns(&[]).is_empty());
|
||||
|
||||
let frame = Data::Series(SeriesFrame {
|
||||
tags: make_tags(&[("_measurement", "cpu"), ("server", "a"), ("_field", "temp")]),
|
||||
data_type: DataType::Float as i32,
|
||||
});
|
||||
|
||||
let exp = make_table_columns(&[TableColumnInput::new(
|
||||
"cpu",
|
||||
&["server"],
|
||||
&[("temp", DataType::Float)],
|
||||
)]);
|
||||
assert_eq!(determine_tag_columns(&[frame]), exp);
|
||||
|
||||
// larger example
|
||||
let frames = gen_frames();
|
||||
|
||||
let exp = make_table_columns(&[
|
||||
TableColumnInput::new(
|
||||
"cpu",
|
||||
&["host", "new_column", "server"],
|
||||
&[("temp", DataType::Float), ("voltage", DataType::Integer)],
|
||||
),
|
||||
TableColumnInput::new(
|
||||
"another table",
|
||||
&["region"],
|
||||
&[
|
||||
("bool_field", DataType::Boolean),
|
||||
("unsigned_field", DataType::Unsigned),
|
||||
("voltage", DataType::String),
|
||||
],
|
||||
),
|
||||
]);
|
||||
assert_eq!(determine_tag_columns(&frames), exp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_frames_to_into_record_batches() {
|
||||
let frames = gen_frames();
|
||||
|
||||
let rbs = frames_to_record_batches(&frames);
|
||||
let exp = vec![
|
||||
(
|
||||
"another table",
|
||||
vec![
|
||||
"+--------+------------+----------------+---------+-------------------------------+",
|
||||
"| region | bool_field | unsigned_field | voltage | time |",
|
||||
"+--------+------------+----------------+---------+-------------------------------+",
|
||||
"| | | | hello | 1970-01-01 00:00:00.000000200 |",
|
||||
"| | | | abc | 1970-01-01 00:00:00.000000201 |",
|
||||
"| west | | | foo | 1970-01-01 00:00:00.000000302 |",
|
||||
"| west | | | bar | 1970-01-01 00:00:00.000000304 |",
|
||||
"| north | true | | | 1970-01-01 00:00:00.000001 |",
|
||||
"| south | | 600 | | 1970-01-01 00:00:00.000002 |",
|
||||
"+--------+------------+----------------+---------+-------------------------------+",
|
||||
],
|
||||
),
|
||||
(
|
||||
"cpu",
|
||||
vec![
|
||||
"+------+------------+--------+------+---------+-------------------------------+",
|
||||
"| host | new_column | server | temp | voltage | time |",
|
||||
"+------+------------+--------+------+---------+-------------------------------+",
|
||||
"| foo | | a | 1.1 | | 1970-01-01 00:00:00.000000001 |",
|
||||
"| foo | | a | 2.2 | | 1970-01-01 00:00:00.000000002 |",
|
||||
"| foo | | a | 3.3 | | 1970-01-01 00:00:00.000000003 |",
|
||||
"| foo | | a | 4.4 | | 1970-01-01 00:00:00.000000004 |",
|
||||
"| foo | | a | 5.1 | | 1970-01-01 00:00:00.000000005 |",
|
||||
"| foo | | a | 5.2 | | 1970-01-01 00:00:00.000000006 |",
|
||||
"| foo | | a | 5.3 | | 1970-01-01 00:00:00.000000007 |",
|
||||
"| foo | | a | 10.4 | | 1970-01-01 00:00:00.000000010 |",
|
||||
"| foo | | a | | 22 | 1970-01-01 00:00:00.000000001 |",
|
||||
"| foo | | a | | 22 | 1970-01-01 00:00:00.000000002 |",
|
||||
"| foo | a | | | 1000 | 1970-01-01 00:00:00.000000100 |",
|
||||
"| foo | a | | | 2000 | 1970-01-01 00:00:00.000000200 |",
|
||||
"+------+------------+--------+------+---------+-------------------------------+",
|
||||
],
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_owned(), v.join("\n")))
|
||||
.collect::<BTreeMap<String, String>>();
|
||||
|
||||
let got = rbs
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
let table: String = pretty_format_batches(&[v]).unwrap().to_string();
|
||||
(k, table)
|
||||
})
|
||||
.collect::<BTreeMap<String, String>>();
|
||||
assert_eq!(got, exp);
|
||||
}
|
||||
}
|
|
@ -252,7 +252,7 @@ async fn query(
|
|||
|
||||
let db = server.db(&db_name)?;
|
||||
|
||||
let _query_completed_token = db.record_query("sql", &q);
|
||||
let _query_completed_token = db.record_query("sql", Box::new(q.clone()));
|
||||
|
||||
let ctx = db.new_query_context(req.extensions().get().cloned());
|
||||
let physical_plan = Planner::new(&ctx).sql(&q).await.context(PlanningSnafu)?;
|
||||
|
|
|
@ -172,7 +172,7 @@ impl Flight for FlightService {
|
|||
.db(&database)
|
||||
.map_err(default_server_error_handler)?;
|
||||
|
||||
let _query_completed_token = db.record_query("sql", &read_info.sql_query);
|
||||
let _query_completed_token = db.record_query("sql", Box::new(read_info.sql_query.clone()));
|
||||
|
||||
let ctx = db.new_query_context(span_ctx);
|
||||
|
||||
|
|
|
@ -26,9 +26,9 @@ use super::{TAG_KEY_FIELD, TAG_KEY_MEASUREMENT};
|
|||
use observability_deps::tracing::warn;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::{
|
||||
predicate::PredicateBuilder,
|
||||
regex::regex_match_expr,
|
||||
rpc_predicate::{FIELD_COLUMN_NAME, MEASUREMENT_COLUMN_NAME},
|
||||
PredicateBuilder,
|
||||
};
|
||||
use query::group_by::{Aggregate as QueryAggregate, WindowDuration};
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
|
@ -867,7 +867,7 @@ fn format_comparison(v: i32, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use generated_types::node::Type as RPCNodeType;
|
||||
use predicate::{predicate::Predicate, rpc_predicate::QueryDatabaseMeta};
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate};
|
||||
use std::{collections::BTreeSet, sync::Arc};
|
||||
|
||||
use super::*;
|
||||
|
|
|
@ -30,7 +30,7 @@ use query::{
|
|||
fieldlist::FieldList, seriesset::converter::Error as SeriesSetError,
|
||||
ExecutionContextProvider,
|
||||
},
|
||||
QueryDatabase,
|
||||
QueryDatabase, QueryText,
|
||||
};
|
||||
use server::DatabaseStore;
|
||||
|
||||
|
@ -1303,31 +1303,29 @@ where
|
|||
|
||||
/// Return something which can be formatted as json ("pbjson"
|
||||
/// specifically)
|
||||
fn defer_json<S>(s: &S) -> impl Into<String> + '_
|
||||
fn defer_json<S>(s: &S) -> QueryText
|
||||
where
|
||||
S: serde::Serialize,
|
||||
S: serde::Serialize + Send + Sync + Clone + 'static,
|
||||
{
|
||||
/// Defers conversion into a String
|
||||
struct DeferredToJson<'a, S>
|
||||
struct DeferredToJson<S>
|
||||
where
|
||||
S: serde::Serialize,
|
||||
{
|
||||
s: &'a S,
|
||||
s: S,
|
||||
}
|
||||
|
||||
impl<S> From<DeferredToJson<'_, S>> for String
|
||||
where
|
||||
S: serde::Serialize,
|
||||
{
|
||||
fn from(w: DeferredToJson<'_, S>) -> Self {
|
||||
match serde_json::to_string_pretty(&w.s) {
|
||||
Ok(json) => json,
|
||||
Err(e) => e.to_string(),
|
||||
impl<S: serde::Serialize> std::fmt::Display for DeferredToJson<S> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// This buffering is unfortunate but `Formatter` doesn't implement `std::io::Write`
|
||||
match serde_json::to_string_pretty(&self.s) {
|
||||
Ok(s) => f.write_str(&s),
|
||||
Err(e) => write!(f, "error formatting: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DeferredToJson { s }
|
||||
Box::new(DeferredToJson { s: s.clone() })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -1351,7 +1349,7 @@ mod tests {
|
|||
Client as StorageClient, OrgAndBucket,
|
||||
};
|
||||
use panic_logging::SendPanicsToTracing;
|
||||
use predicate::predicate::{PredicateBuilder, PredicateMatch};
|
||||
use predicate::{PredicateBuilder, PredicateMatch};
|
||||
use query::{
|
||||
exec::Executor,
|
||||
test::{TestChunk, TestDatabase, TestError},
|
||||
|
@ -2971,7 +2969,7 @@ mod tests {
|
|||
db_name: &str,
|
||||
partition_key: &str,
|
||||
chunk_id: u128,
|
||||
expected_predicate: &predicate::predicate::Predicate,
|
||||
expected_predicate: &predicate::Predicate,
|
||||
) {
|
||||
let actual_predicates = self
|
||||
.test_storage
|
||||
|
|
|
@ -264,7 +264,8 @@ fn main() -> Result<(), std::io::Error> {
|
|||
}
|
||||
Command::Storage(config) => {
|
||||
let _tracing_guard = handle_init_logs(init_simple_logs(log_verbose_count));
|
||||
if let Err(e) = commands::storage::command(config).await {
|
||||
let connection = connection().await;
|
||||
if let Err(e) = commands::storage::command(connection, config).await {
|
||||
eprintln!("{}", e);
|
||||
std::process::exit(ReturnCode::Failure as _)
|
||||
}
|
||||
|
|
|
@ -162,7 +162,7 @@ pub async fn test_delete_on_router() {
|
|||
let fixture = ServerFixture::create_shared(ServerType::Router).await;
|
||||
|
||||
let db_name = rand_name();
|
||||
let (_tmpdir, mut write_buffer) = create_router_to_write_buffer(&fixture, &db_name).await;
|
||||
let (_tmpdir, write_buffer) = create_router_to_write_buffer(&fixture, &db_name).await;
|
||||
|
||||
let table = "cpu";
|
||||
let pred = DeletePredicate {
|
||||
|
@ -179,8 +179,10 @@ pub async fn test_delete_on_router() {
|
|||
.await
|
||||
.expect("cannot delete");
|
||||
|
||||
let mut stream = write_buffer.streams().into_values().next().unwrap();
|
||||
let delete_actual = stream.stream.next().await.unwrap().unwrap();
|
||||
let sequencer_id = write_buffer.sequencer_ids().into_iter().next().unwrap();
|
||||
let mut handler = write_buffer.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream = handler.stream();
|
||||
let delete_actual = stream.next().await.unwrap().unwrap();
|
||||
let delete_expected = DmlDelete::new(
|
||||
&db_name,
|
||||
pred,
|
||||
|
|
|
@ -45,7 +45,7 @@ pub async fn test_write_pb_router() {
|
|||
let fixture = ServerFixture::create_shared(ServerType::Router).await;
|
||||
|
||||
let db_name = rand_name();
|
||||
let (_tmpdir, mut write_buffer) = create_router_to_write_buffer(&fixture, &db_name).await;
|
||||
let (_tmpdir, write_buffer) = create_router_to_write_buffer(&fixture, &db_name).await;
|
||||
|
||||
fixture
|
||||
.write_client()
|
||||
|
@ -53,8 +53,10 @@ pub async fn test_write_pb_router() {
|
|||
.await
|
||||
.expect("cannot write");
|
||||
|
||||
let mut stream = write_buffer.streams().into_values().next().unwrap();
|
||||
let write_actual = stream.stream.next().await.unwrap().unwrap();
|
||||
let sequencer_id = write_buffer.sequencer_ids().into_iter().next().unwrap();
|
||||
let mut handler = write_buffer.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream = handler.stream();
|
||||
let write_actual = stream.next().await.unwrap().unwrap();
|
||||
let write_expected = DmlWrite::new(
|
||||
&db_name,
|
||||
lines_to_batches("mytable mycol1=5 3", 0).unwrap(),
|
||||
|
|
|
@ -7,6 +7,7 @@ edition = "2021"
|
|||
[dependencies]
|
||||
arrow = { version = "8.0", features = ["prettyprint"] }
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
async-trait = "0.1.42"
|
||||
base64 = "0.13"
|
||||
bytes = "1.0"
|
||||
datafusion = { path = "../datafusion" }
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
use arrow::record_batch::RecordBatch;
|
||||
use data_types::delete_predicate::DeletePredicate;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{format::StrftimeItems, TimeZone, Utc};
|
||||
use dml::DmlOperation;
|
||||
use iox_catalog::interface::{
|
||||
|
@ -100,6 +101,25 @@ impl IngesterData {
|
|||
}
|
||||
}
|
||||
|
||||
/// The Persister has a single function that will persist a given partition Id. It is expected
|
||||
/// that the persist function will retry forever until it succeeds.
|
||||
#[async_trait]
|
||||
pub(crate) trait Persister: Send + Sync + 'static {
|
||||
async fn persist(&self, partition_id: PartitionId);
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Persister for IngesterData {
|
||||
async fn persist(&self, _partition_id: PartitionId) {
|
||||
// lookup the TableData
|
||||
// let persisting_batch = table_data.create_persisting_batch(partition.partition_key);
|
||||
// do the persist with this persisting batch
|
||||
// update the catalog
|
||||
// table_data.clear_persisting_batch() (behind the scenes this will remove the persisting batch
|
||||
// and if the partition is empty, remove it from the map in table_data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Data of a Shard
|
||||
#[derive(Default)]
|
||||
pub struct SequencerData {
|
||||
|
@ -144,12 +164,15 @@ impl SequencerData {
|
|||
namespace: &str,
|
||||
catalog: &dyn Catalog,
|
||||
) -> Result<Arc<NamespaceData>> {
|
||||
let namespace = catalog
|
||||
let mut txn = catalog.start_transaction().await.context(CatalogSnafu)?;
|
||||
let namespace = txn
|
||||
.namespaces()
|
||||
.get_by_name(namespace)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.context(NamespaceNotFoundSnafu { namespace })?;
|
||||
txn.commit().await.context(CatalogSnafu)?;
|
||||
|
||||
let mut n = self.namespaces.write();
|
||||
let data = Arc::clone(
|
||||
n.entry(namespace.name)
|
||||
|
@ -230,11 +253,14 @@ impl NamespaceData {
|
|||
table_name: &str,
|
||||
catalog: &dyn Catalog,
|
||||
) -> Result<Arc<TableData>> {
|
||||
let table = catalog
|
||||
let mut txn = catalog.start_transaction().await.context(CatalogSnafu)?;
|
||||
let table = txn
|
||||
.tables()
|
||||
.create_or_get(table_name, self.namespace_id)
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
txn.commit().await.context(CatalogSnafu)?;
|
||||
|
||||
let mut t = self.tables.write();
|
||||
let data = Arc::clone(
|
||||
t.entry(table.name)
|
||||
|
@ -306,7 +332,8 @@ impl TableData {
|
|||
let min_time = Timestamp::new(predicate.range.start());
|
||||
let max_time = Timestamp::new(predicate.range.end());
|
||||
|
||||
let tombstone = catalog
|
||||
let mut txn = catalog.start_transaction().await.context(CatalogSnafu)?;
|
||||
let tombstone = txn
|
||||
.tombstones()
|
||||
.create_or_get(
|
||||
self.table_id,
|
||||
|
@ -318,6 +345,7 @@ impl TableData {
|
|||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
txn.commit().await.context(CatalogSnafu)?;
|
||||
|
||||
let partitions = self.partition_data.read();
|
||||
for data in partitions.values() {
|
||||
|
@ -339,11 +367,13 @@ impl TableData {
|
|||
sequencer_id: SequencerId,
|
||||
catalog: &dyn Catalog,
|
||||
) -> Result<Arc<PartitionData>> {
|
||||
let partition = catalog
|
||||
let mut txn = catalog.start_transaction().await.context(CatalogSnafu)?;
|
||||
let partition = txn
|
||||
.partitions()
|
||||
.create_or_get(partition_key, sequencer_id, self.table_id)
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
txn.commit().await.context(CatalogSnafu)?;
|
||||
let mut p = self.partition_data.write();
|
||||
let data = Arc::new(PartitionData::new(partition.id));
|
||||
p.insert(partition.partition_key, Arc::clone(&data));
|
||||
|
|
|
@ -3,21 +3,24 @@
|
|||
use iox_catalog::interface::{Catalog, KafkaPartition, KafkaTopic, Sequencer, SequencerId};
|
||||
use object_store::ObjectStore;
|
||||
|
||||
use crate::data::{IngesterData, SequencerData};
|
||||
use crate::{
|
||||
data::{IngesterData, SequencerData},
|
||||
lifecycle::{run_lifecycle_manager, LifecycleConfig, LifecycleManager},
|
||||
};
|
||||
use db::write_buffer::metrics::{SequencerMetrics, WriteBufferIngestMetrics};
|
||||
use dml::DmlOperation;
|
||||
use futures::{stream::BoxStream, StreamExt};
|
||||
use futures::StreamExt;
|
||||
use observability_deps::tracing::{debug, warn};
|
||||
use snafu::Snafu;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::collections::BTreeMap;
|
||||
use std::{
|
||||
fmt::Formatter,
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use time::SystemProvider;
|
||||
use tokio::task::JoinHandle;
|
||||
use trace::span::SpanRecorder;
|
||||
use write_buffer::core::{FetchHighWatermark, WriteBufferError, WriteBufferReading};
|
||||
use write_buffer::core::{WriteBufferReading, WriteBufferStreamHandler};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(missing_copy_implementations, missing_docs)]
|
||||
|
@ -31,6 +34,11 @@ pub enum Error {
|
|||
kafka_topic: String,
|
||||
kafka_partition: KafkaPartition,
|
||||
},
|
||||
|
||||
#[snafu(display("Write buffer error: {}", source))]
|
||||
WriteBuffer {
|
||||
source: write_buffer::core::WriteBufferError,
|
||||
},
|
||||
}
|
||||
|
||||
/// A specialized `Error` for Catalog errors
|
||||
|
@ -45,11 +53,11 @@ pub struct IngestHandlerImpl {
|
|||
#[allow(dead_code)]
|
||||
kafka_topic: KafkaTopic,
|
||||
/// Future that resolves when the background worker exits
|
||||
#[allow(dead_code)]
|
||||
join_handles: Vec<JoinHandle<()>>,
|
||||
/// The cache and buffered data for the ingester
|
||||
#[allow(dead_code)]
|
||||
data: Arc<IngesterData>,
|
||||
/// The lifecycle manager, keeping state of partitions across all sequencers
|
||||
lifecycle_manager: Arc<LifecycleManager>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for IngestHandlerImpl {
|
||||
|
@ -60,14 +68,15 @@ impl std::fmt::Debug for IngestHandlerImpl {
|
|||
|
||||
impl IngestHandlerImpl {
|
||||
/// Initialize the Ingester
|
||||
pub fn new(
|
||||
pub async fn new(
|
||||
lifecycle_config: LifecycleConfig,
|
||||
topic: KafkaTopic,
|
||||
mut sequencer_states: BTreeMap<KafkaPartition, Sequencer>,
|
||||
sequencer_states: BTreeMap<KafkaPartition, Sequencer>,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
object_store: Arc<ObjectStore>,
|
||||
write_buffer: Box<dyn WriteBufferReading>,
|
||||
write_buffer: Arc<dyn WriteBufferReading>,
|
||||
registry: &metric::Registry,
|
||||
) -> Self {
|
||||
) -> Result<Self> {
|
||||
// build the initial ingester data state
|
||||
let mut sequencers = BTreeMap::new();
|
||||
for s in sequencer_states.values() {
|
||||
|
@ -83,40 +92,46 @@ impl IngestHandlerImpl {
|
|||
let kafka_topic_name = topic.name.clone();
|
||||
let ingest_metrics = WriteBufferIngestMetrics::new(registry, &topic.name);
|
||||
|
||||
let write_buffer: &'static mut _ = Box::leak(write_buffer);
|
||||
let join_handles: Vec<_> = write_buffer
|
||||
.streams()
|
||||
.into_iter()
|
||||
.filter_map(|(kafka_partition_id, stream)| {
|
||||
// streams may return a stream for every partition in the kafka topic. We only want
|
||||
// to process streams for those specified by the call to new.
|
||||
let kafka_partition = KafkaPartition::new(kafka_partition_id as i32);
|
||||
sequencer_states.remove(&kafka_partition).map(|sequencer| {
|
||||
let metrics = ingest_metrics.new_sequencer_metrics(kafka_partition_id);
|
||||
let ingester_data = Arc::clone(&ingester_data);
|
||||
let kafka_topic_name = kafka_topic_name.clone();
|
||||
let mut join_handles = Vec::with_capacity(sequencer_states.len());
|
||||
for (kafka_partition, sequencer) in sequencer_states {
|
||||
let metrics = ingest_metrics.new_sequencer_metrics(kafka_partition.get() as u32);
|
||||
let ingester_data = Arc::clone(&ingester_data);
|
||||
let kafka_topic_name = kafka_topic_name.clone();
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
stream_in_sequenced_entries(
|
||||
ingester_data,
|
||||
sequencer.id,
|
||||
kafka_topic_name,
|
||||
kafka_partition,
|
||||
stream.stream,
|
||||
stream.fetch_high_watermark,
|
||||
metrics,
|
||||
)
|
||||
.await;
|
||||
})
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
let stream_handler = write_buffer
|
||||
.stream_handler(kafka_partition.get() as u32)
|
||||
.await
|
||||
.context(WriteBufferSnafu)?;
|
||||
|
||||
Self {
|
||||
join_handles.push(tokio::task::spawn(stream_in_sequenced_entries(
|
||||
ingester_data,
|
||||
sequencer.id,
|
||||
kafka_topic_name,
|
||||
kafka_partition,
|
||||
Arc::clone(&write_buffer),
|
||||
stream_handler,
|
||||
metrics,
|
||||
)));
|
||||
}
|
||||
|
||||
// start the lifecycle manager
|
||||
let persister = Arc::clone(&data);
|
||||
let lifecycle_manager = Arc::new(LifecycleManager::new(
|
||||
lifecycle_config,
|
||||
Arc::new(SystemProvider::new()),
|
||||
));
|
||||
let manager = Arc::clone(&lifecycle_manager);
|
||||
let handle = tokio::task::spawn(async move {
|
||||
run_lifecycle_manager(manager, persister).await;
|
||||
});
|
||||
join_handles.push(handle);
|
||||
|
||||
Ok(Self {
|
||||
data,
|
||||
kafka_topic: topic,
|
||||
join_handles,
|
||||
}
|
||||
lifecycle_manager,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,17 +150,18 @@ impl Drop for IngestHandlerImpl {
|
|||
///
|
||||
/// Note all errors reading / parsing / writing entries from the write
|
||||
/// buffer are ignored.
|
||||
async fn stream_in_sequenced_entries<'a>(
|
||||
async fn stream_in_sequenced_entries(
|
||||
ingester_data: Arc<IngesterData>,
|
||||
sequencer_id: SequencerId,
|
||||
kafka_topic: String,
|
||||
kafka_partition: KafkaPartition,
|
||||
mut stream: BoxStream<'a, Result<DmlOperation, WriteBufferError>>,
|
||||
f_mark: FetchHighWatermark<'a>,
|
||||
write_buffer: Arc<dyn WriteBufferReading>,
|
||||
mut write_buffer_stream: Box<dyn WriteBufferStreamHandler>,
|
||||
mut metrics: SequencerMetrics,
|
||||
) {
|
||||
let mut watermark_last_updated: Option<Instant> = None;
|
||||
let mut watermark = 0_u64;
|
||||
let mut stream = write_buffer_stream.stream();
|
||||
|
||||
while let Some(db_write_result) = stream.next().await {
|
||||
// maybe update sequencer watermark
|
||||
|
@ -156,7 +172,10 @@ async fn stream_in_sequenced_entries<'a>(
|
|||
.map(|ts| now.duration_since(ts) > Duration::from_secs(10))
|
||||
.unwrap_or(true)
|
||||
{
|
||||
match f_mark().await {
|
||||
match write_buffer
|
||||
.fetch_high_watermark(sequencer_id.get() as u32)
|
||||
.await
|
||||
{
|
||||
Ok(w) => {
|
||||
watermark = w;
|
||||
}
|
||||
|
@ -233,34 +252,28 @@ mod tests {
|
|||
use iox_catalog::validate_or_insert_schema;
|
||||
use metric::{Attributes, Metric, U64Counter, U64Gauge};
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use std::num::NonZeroU32;
|
||||
use std::{num::NonZeroU32, ops::DerefMut};
|
||||
use time::Time;
|
||||
use write_buffer::mock::{MockBufferForReading, MockBufferSharedState};
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_from_write_buffer_write_to_mutable_buffer() {
|
||||
let catalog = MemCatalog::new();
|
||||
let kafka_topic = catalog
|
||||
.kafka_topics()
|
||||
.create_or_get("whatevs")
|
||||
.await
|
||||
.unwrap();
|
||||
let query_pool = catalog
|
||||
.query_pools()
|
||||
.create_or_get("whatevs")
|
||||
.await
|
||||
.unwrap();
|
||||
let mut txn = catalog.start_transaction().await.unwrap();
|
||||
let kafka_topic = txn.kafka_topics().create_or_get("whatevs").await.unwrap();
|
||||
let query_pool = txn.query_pools().create_or_get("whatevs").await.unwrap();
|
||||
let kafka_partition = KafkaPartition::new(0);
|
||||
let namespace = catalog
|
||||
let namespace = txn
|
||||
.namespaces()
|
||||
.create("foo", "inf", kafka_topic.id, query_pool.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let sequencer = catalog
|
||||
let sequencer = txn
|
||||
.sequencers()
|
||||
.create_or_get(&kafka_topic, kafka_partition)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut sequencer_states = BTreeMap::new();
|
||||
sequencer_states.insert(kafka_partition, sequencer);
|
||||
|
||||
|
@ -275,7 +288,7 @@ mod tests {
|
|||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
DmlMeta::sequenced(Sequence::new(0, 0), ingest_ts1, None, 50),
|
||||
);
|
||||
let schema = validate_or_insert_schema(w1.tables(), &schema, &catalog)
|
||||
let schema = validate_or_insert_schema(w1.tables(), &schema, txn.deref_mut())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
@ -285,23 +298,29 @@ mod tests {
|
|||
lines_to_batches("cpu bar=2 20\ncpu bar=3 30", 0).unwrap(),
|
||||
DmlMeta::sequenced(Sequence::new(0, 7), ingest_ts2, None, 150),
|
||||
);
|
||||
let _schema = validate_or_insert_schema(w2.tables(), &schema, &catalog)
|
||||
let _schema = validate_or_insert_schema(w2.tables(), &schema, txn.deref_mut())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
txn.commit().await.unwrap();
|
||||
write_buffer_state.push_write(w2);
|
||||
let reading = Box::new(MockBufferForReading::new(write_buffer_state, None).unwrap());
|
||||
let reading: Arc<dyn WriteBufferReading> =
|
||||
Arc::new(MockBufferForReading::new(write_buffer_state, None).unwrap());
|
||||
let object_store = Arc::new(ObjectStore::new_in_memory());
|
||||
let metrics: Arc<metric::Registry> = Default::default();
|
||||
|
||||
let lifecycle_config = LifecycleConfig::new(1000000, 1000, 1000, Duration::from_secs(10));
|
||||
let ingester = IngestHandlerImpl::new(
|
||||
lifecycle_config,
|
||||
kafka_topic,
|
||||
sequencer_states,
|
||||
Arc::new(catalog),
|
||||
object_store,
|
||||
reading,
|
||||
&metrics,
|
||||
);
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// give the writes some time to go through the buffer. Exit once we've verified there's
|
||||
// data in there from both writes.
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
pub mod compact;
|
||||
pub mod data;
|
||||
pub mod handler;
|
||||
pub mod lifecycle;
|
||||
pub mod persist;
|
||||
pub mod query;
|
||||
pub mod server;
|
||||
|
|
|
@ -0,0 +1,475 @@
|
|||
//! Manages the persistence and eviction lifecycle of data in the buffer across all sequencers.
|
||||
//! Note that the byte counts logged by the lifecycle manager and when exactly persistence gets
|
||||
//! triggered aren't required to be absolutely accurate. The byte count is just an estimate
|
||||
//! anyway, this just needs to keep things moving along to keep memory use roughly under
|
||||
//! some absolute number and individual Parquet files that get persisted below some number. It
|
||||
//! is expected that they may be above or below the absolute thresholds.
|
||||
|
||||
use crate::data::Persister;
|
||||
use iox_catalog::interface::PartitionId;
|
||||
use parking_lot::Mutex;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use time::{Time, TimeProvider};
|
||||
|
||||
/// The lifecycle manager keeps track of the size and age of partitions across all sequencers.
|
||||
/// It triggers persistence based on keeping total memory usage around a set amount while
|
||||
/// ensuring that partitions don't get too old or large before being persisted.
|
||||
pub(crate) struct LifecycleManager {
|
||||
config: LifecycleConfig,
|
||||
time_provider: Arc<dyn TimeProvider>,
|
||||
state: Mutex<LifecycleState>,
|
||||
persist_running: tokio::sync::Mutex<()>,
|
||||
}
|
||||
|
||||
/// The configuration options for the lifecycle on the ingester.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct LifecycleConfig {
|
||||
/// The ingester will pause pulling data from Kafka if it hits this amount of memory used, waiting
|
||||
/// until persistence evicts partitions from memory.
|
||||
pause_ingest_size: usize,
|
||||
/// When the ingester hits this threshold, the lifecycle manager will persist the largest
|
||||
/// partitions currently buffered until it falls below this threshold. An ingester running
|
||||
/// in a steady state should operate around this amount of memory usage.
|
||||
persist_memory_threshold: usize,
|
||||
/// If an individual partition crosses this threshold, it will be persisted. The purpose of this
|
||||
/// setting to to ensure the ingester doesn't create Parquet files that are too large.
|
||||
partition_size_threshold: usize,
|
||||
/// If an individual partitiion has had data buffered for longer than this period of time, the
|
||||
/// manager will persist it. This setting is to ensure we have an upper bound on how far back
|
||||
/// we will need to read in Kafka on restart or recovery.
|
||||
partition_age_threshold: Duration,
|
||||
}
|
||||
|
||||
impl LifecycleConfig {
|
||||
/// Initialize a new LifecycleConfig. panics if the passed `pause_ingest_size` is less than the
|
||||
/// `persist_memory_threshold`.
|
||||
pub fn new(
|
||||
pause_ingest_size: usize,
|
||||
persist_memory_threshold: usize,
|
||||
partition_size_threshold: usize,
|
||||
partition_age_threshold: Duration,
|
||||
) -> Self {
|
||||
// this must be true to ensure that persistence will get triggered, freeing up memory
|
||||
assert!(pause_ingest_size > persist_memory_threshold);
|
||||
|
||||
Self {
|
||||
pause_ingest_size,
|
||||
persist_memory_threshold,
|
||||
partition_size_threshold,
|
||||
partition_age_threshold,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
struct LifecycleState {
|
||||
total_bytes: usize,
|
||||
partition_stats: BTreeMap<PartitionId, PartitionLifecycleStats>,
|
||||
}
|
||||
|
||||
impl LifecycleState {
|
||||
fn remove(&mut self, partition_id: &PartitionId) -> Option<PartitionLifecycleStats> {
|
||||
self.partition_stats.remove(partition_id).map(|stats| {
|
||||
self.total_bytes -= stats.bytes_written;
|
||||
stats
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A snapshot of the stats for the lifecycle manager
|
||||
#[derive(Debug)]
|
||||
pub struct LifecycleStats {
|
||||
/// total number of bytes the lifecycle manager is aware of across all sequencers and
|
||||
/// partitions. Based on the mutable batch sizes received into all partitions.
|
||||
pub total_bytes: usize,
|
||||
/// the stats for every partition the lifecycle manager is tracking.
|
||||
pub partition_stats: Vec<PartitionLifecycleStats>,
|
||||
}
|
||||
|
||||
/// The stats for a partition
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PartitionLifecycleStats {
|
||||
/// The partition identifier
|
||||
partition_id: PartitionId,
|
||||
/// Time that the partition received its first write. This is reset anytime
|
||||
/// the partition is persisted.
|
||||
first_write: Time,
|
||||
/// The number of bytes in the partition as estimated by the mutable batch sizes.
|
||||
bytes_written: usize,
|
||||
}
|
||||
|
||||
impl LifecycleManager {
|
||||
/// Initialize a new lifecycle manager that will persist when `maybe_persist` is called
|
||||
/// if anything is over the size or age threshold.
|
||||
pub(crate) fn new(config: LifecycleConfig, time_provider: Arc<dyn TimeProvider>) -> Self {
|
||||
Self {
|
||||
config,
|
||||
time_provider,
|
||||
state: Default::default(),
|
||||
persist_running: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Logs bytes written into a partition so that it can be tracked for the manager to
|
||||
/// trigger persistence. Returns true if the ingester should pause consuming from the
|
||||
/// write buffer so that persistence can catch up and free up memory.
|
||||
pub fn log_write(&self, partition_id: PartitionId, bytes_written: usize) -> bool {
|
||||
let mut s = self.state.lock();
|
||||
s.partition_stats
|
||||
.entry(partition_id)
|
||||
.or_insert_with(|| PartitionLifecycleStats {
|
||||
partition_id,
|
||||
first_write: self.time_provider.now(),
|
||||
bytes_written: 0,
|
||||
})
|
||||
.bytes_written += bytes_written;
|
||||
s.total_bytes += bytes_written;
|
||||
|
||||
s.total_bytes > self.config.pause_ingest_size
|
||||
}
|
||||
|
||||
/// Returns true if the `total_bytes` tracked by the manager is less than the pause amount.
|
||||
/// As persistence runs, the `total_bytes` go down.
|
||||
pub fn can_resume_ingest(&self) -> bool {
|
||||
let s = self.state.lock();
|
||||
s.total_bytes < self.config.pause_ingest_size
|
||||
}
|
||||
|
||||
/// This will persist any partitions that are over their size or age thresholds and
|
||||
/// persist as many partitions as necessary (largest first) to get below the memory threshold.
|
||||
/// The persist operations are spawned in new tasks and run at the same time, but the
|
||||
/// function waits for all to return before completing.
|
||||
pub async fn maybe_persist<P: Persister>(&self, persister: &Arc<P>) {
|
||||
// ensure that this is only running one at a time
|
||||
self.persist_running.lock().await;
|
||||
|
||||
let LifecycleStats {
|
||||
mut total_bytes,
|
||||
partition_stats,
|
||||
} = self.stats();
|
||||
|
||||
// get anything over the threshold size or age to persist
|
||||
let now = self.time_provider.now();
|
||||
|
||||
let (to_persist, mut rest): (Vec<PartitionLifecycleStats>, Vec<PartitionLifecycleStats>) =
|
||||
partition_stats.into_iter().partition(|s| {
|
||||
let aged_out = now
|
||||
.checked_duration_since(s.first_write)
|
||||
.map(|age| age > self.config.partition_age_threshold)
|
||||
.unwrap_or(false);
|
||||
let sized_out = s.bytes_written > self.config.partition_size_threshold;
|
||||
|
||||
aged_out || sized_out
|
||||
});
|
||||
|
||||
let mut persist_tasks: Vec<_> = to_persist
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
let bytes_removed = self
|
||||
.remove(s.partition_id)
|
||||
.map(|s| s.bytes_written)
|
||||
.unwrap_or(0);
|
||||
total_bytes -= bytes_removed;
|
||||
let persister = Arc::clone(persister);
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
persister.persist(s.partition_id).await;
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
// if we're still over the memory threshold, persist as many of the largest partitions
|
||||
// until we're under. It's ok if this is stale, it'll just get handled on the next pass
|
||||
// through.
|
||||
if total_bytes > self.config.persist_memory_threshold {
|
||||
let mut to_persist = vec![];
|
||||
|
||||
rest.sort_by(|a, b| b.bytes_written.cmp(&a.bytes_written));
|
||||
|
||||
for s in rest {
|
||||
total_bytes -= s.bytes_written;
|
||||
to_persist.push(s);
|
||||
if total_bytes < self.config.persist_memory_threshold {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let mut to_persist: Vec<_> = to_persist
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
self.remove(s.partition_id);
|
||||
let persister = Arc::clone(persister);
|
||||
tokio::task::spawn(async move {
|
||||
persister.persist(s.partition_id).await;
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
persist_tasks.append(&mut to_persist);
|
||||
}
|
||||
|
||||
let persists = futures::future::join_all(persist_tasks.into_iter());
|
||||
persists.await;
|
||||
}
|
||||
|
||||
/// Returns a point in time snapshot of the lifecycle state.
|
||||
pub fn stats(&self) -> LifecycleStats {
|
||||
let s = self.state.lock();
|
||||
let partition_stats: Vec<_> = s.partition_stats.values().cloned().collect();
|
||||
|
||||
LifecycleStats {
|
||||
total_bytes: s.total_bytes,
|
||||
partition_stats,
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes the partition from the state
|
||||
pub fn remove(&self, partition_id: PartitionId) -> Option<PartitionLifecycleStats> {
|
||||
let mut s = self.state.lock();
|
||||
s.remove(&partition_id)
|
||||
}
|
||||
}
|
||||
|
||||
const CHECK_INTERVAL: Duration = Duration::from_secs(1);
|
||||
|
||||
/// Runs the lifecycle manager to trigger persistence every second.
|
||||
pub(crate) async fn run_lifecycle_manager<P: Persister>(
|
||||
manager: Arc<LifecycleManager>,
|
||||
persister: Arc<P>,
|
||||
) {
|
||||
loop {
|
||||
manager.maybe_persist(&persister).await;
|
||||
tokio::time::sleep(CHECK_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use async_trait::async_trait;
|
||||
use std::collections::BTreeSet;
|
||||
use time::{MockProvider, SystemProvider};
|
||||
|
||||
#[derive(Default)]
|
||||
struct TestPersister {
|
||||
persist_called: Mutex<BTreeSet<PartitionId>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Persister for TestPersister {
|
||||
async fn persist(&self, partition_id: PartitionId) {
|
||||
let mut p = self.persist_called.lock();
|
||||
p.insert(partition_id);
|
||||
}
|
||||
}
|
||||
|
||||
impl TestPersister {
|
||||
fn persist_called_for(&self, partition_id: PartitionId) -> bool {
|
||||
let p = self.persist_called.lock();
|
||||
p.contains(&partition_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn logs_write() {
|
||||
let config = LifecycleConfig {
|
||||
pause_ingest_size: 20,
|
||||
persist_memory_threshold: 10,
|
||||
partition_size_threshold: 5,
|
||||
partition_age_threshold: Duration::from_nanos(0),
|
||||
};
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp_nanos(0)));
|
||||
let tp = Arc::clone(&time_provider);
|
||||
let m = LifecycleManager::new(config, tp);
|
||||
|
||||
// log first two writes at different times
|
||||
assert!(!m.log_write(PartitionId::new(1), 1));
|
||||
time_provider.inc(Duration::from_nanos(10));
|
||||
assert!(!m.log_write(PartitionId::new(1), 1));
|
||||
|
||||
// log another write for different partition
|
||||
assert!(!m.log_write(PartitionId::new(2), 3));
|
||||
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 5);
|
||||
|
||||
let p1 = stats.partition_stats.get(0).unwrap();
|
||||
assert_eq!(p1.bytes_written, 2);
|
||||
assert_eq!(p1.partition_id, PartitionId::new(1));
|
||||
assert_eq!(p1.first_write, Time::from_timestamp_nanos(0));
|
||||
|
||||
let p2 = stats.partition_stats.get(1).unwrap();
|
||||
assert_eq!(p2.bytes_written, 3);
|
||||
assert_eq!(p2.partition_id, PartitionId::new(2));
|
||||
assert_eq!(p2.first_write, Time::from_timestamp_nanos(10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pausing_and_resuming_ingest() {
|
||||
let config = LifecycleConfig {
|
||||
pause_ingest_size: 20,
|
||||
persist_memory_threshold: 10,
|
||||
partition_size_threshold: 5,
|
||||
partition_age_threshold: Duration::from_nanos(0),
|
||||
};
|
||||
let time_provider = Arc::new(SystemProvider::new());
|
||||
let m = LifecycleManager::new(config, time_provider);
|
||||
|
||||
assert!(!m.log_write(PartitionId::new(1), 15));
|
||||
|
||||
// now it should indicate a pause
|
||||
assert!(m.log_write(PartitionId::new(1), 10));
|
||||
assert!(!m.can_resume_ingest());
|
||||
|
||||
m.remove(PartitionId::new(1));
|
||||
assert!(m.can_resume_ingest());
|
||||
assert!(!m.log_write(PartitionId::new(1), 3));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn persists_based_on_age() {
|
||||
let config = LifecycleConfig {
|
||||
pause_ingest_size: 30,
|
||||
persist_memory_threshold: 20,
|
||||
partition_size_threshold: 10,
|
||||
partition_age_threshold: Duration::from_nanos(5),
|
||||
};
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp_nanos(0)));
|
||||
let tp = Arc::clone(&time_provider);
|
||||
let m = LifecycleManager::new(config, tp);
|
||||
let partition_id = PartitionId::new(1);
|
||||
let persister = Arc::new(TestPersister::default());
|
||||
m.log_write(partition_id, 10);
|
||||
|
||||
m.maybe_persist(&persister).await;
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 10);
|
||||
assert_eq!(stats.partition_stats[0].partition_id, partition_id);
|
||||
|
||||
// age out the partition
|
||||
time_provider.inc(Duration::from_nanos(6));
|
||||
|
||||
// validate that from before, persist wasn't called for the partition
|
||||
assert!(!persister.persist_called_for(partition_id));
|
||||
|
||||
// write in data for a new partition so we can be sure it isn't persisted, but the older one is
|
||||
m.log_write(PartitionId::new(2), 6);
|
||||
|
||||
m.maybe_persist(&persister).await;
|
||||
|
||||
assert!(persister.persist_called_for(partition_id));
|
||||
assert!(!persister.persist_called_for(PartitionId::new(2)));
|
||||
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 6);
|
||||
assert_eq!(stats.partition_stats.len(), 1);
|
||||
assert_eq!(stats.partition_stats[0].partition_id, PartitionId::new(2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn persists_based_on_partition_size() {
|
||||
let config = LifecycleConfig {
|
||||
pause_ingest_size: 30,
|
||||
persist_memory_threshold: 20,
|
||||
partition_size_threshold: 5,
|
||||
partition_age_threshold: Duration::from_millis(100),
|
||||
};
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp_nanos(0)));
|
||||
|
||||
let m = LifecycleManager::new(config, time_provider);
|
||||
let partition_id = PartitionId::new(1);
|
||||
let persister = Arc::new(TestPersister::default());
|
||||
m.log_write(partition_id, 4);
|
||||
|
||||
m.maybe_persist(&persister).await;
|
||||
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 4);
|
||||
assert_eq!(stats.partition_stats[0].partition_id, partition_id);
|
||||
assert!(!persister.persist_called_for(partition_id));
|
||||
|
||||
// introduce a new partition under the limit to verify it doesn't get taken with the other
|
||||
m.log_write(PartitionId::new(2), 3);
|
||||
m.log_write(partition_id, 5);
|
||||
|
||||
m.maybe_persist(&persister).await;
|
||||
|
||||
assert!(persister.persist_called_for(partition_id));
|
||||
assert!(!persister.persist_called_for(PartitionId::new(2)));
|
||||
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 3);
|
||||
assert_eq!(stats.partition_stats.len(), 1);
|
||||
assert_eq!(stats.partition_stats[0].partition_id, PartitionId::new(2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn persists_based_on_memory_size() {
|
||||
let config = LifecycleConfig {
|
||||
pause_ingest_size: 60,
|
||||
persist_memory_threshold: 20,
|
||||
partition_size_threshold: 20,
|
||||
partition_age_threshold: Duration::from_millis(1000),
|
||||
};
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp_nanos(0)));
|
||||
let m = LifecycleManager::new(config, time_provider);
|
||||
let partition_id = PartitionId::new(1);
|
||||
let persister = Arc::new(TestPersister::default());
|
||||
m.log_write(partition_id, 8);
|
||||
m.log_write(PartitionId::new(2), 13);
|
||||
|
||||
m.maybe_persist(&persister).await;
|
||||
|
||||
// the bigger of the two partitions should have been persisted, leaving the smaller behind
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 8);
|
||||
assert_eq!(stats.partition_stats[0].partition_id, partition_id);
|
||||
assert!(!persister.persist_called_for(partition_id));
|
||||
assert!(persister.persist_called_for(PartitionId::new(2)));
|
||||
|
||||
// add that partition back in over size
|
||||
m.log_write(partition_id, 20);
|
||||
m.log_write(PartitionId::new(2), 21);
|
||||
|
||||
// both partitions should now need to be persisted to bring us below the mem threshold of 20.
|
||||
m.maybe_persist(&persister).await;
|
||||
|
||||
assert!(persister.persist_called_for(partition_id));
|
||||
assert!(persister.persist_called_for(PartitionId::new(2)));
|
||||
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 0);
|
||||
assert_eq!(stats.partition_stats.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn persist_based_on_partition_and_memory_size() {
|
||||
let config = LifecycleConfig {
|
||||
pause_ingest_size: 60,
|
||||
persist_memory_threshold: 6,
|
||||
partition_size_threshold: 5,
|
||||
partition_age_threshold: Duration::from_millis(1000),
|
||||
};
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp_nanos(0)));
|
||||
let tp = Arc::clone(&time_provider);
|
||||
let m = LifecycleManager::new(config, tp);
|
||||
let persister = Arc::new(TestPersister::default());
|
||||
m.log_write(PartitionId::new(1), 4);
|
||||
time_provider.inc(Duration::from_nanos(1));
|
||||
m.log_write(PartitionId::new(2), 6);
|
||||
time_provider.inc(Duration::from_nanos(1));
|
||||
m.log_write(PartitionId::new(3), 3);
|
||||
|
||||
m.maybe_persist(&persister).await;
|
||||
|
||||
// the bigger of the two partitions should have been persisted, leaving the smaller behind
|
||||
let stats = m.stats();
|
||||
assert_eq!(stats.total_bytes, 3);
|
||||
assert_eq!(stats.partition_stats[0].partition_id, PartitionId::new(3));
|
||||
assert!(!persister.persist_called_for(PartitionId::new(3)));
|
||||
assert!(persister.persist_called_for(PartitionId::new(2)));
|
||||
assert!(persister.persist_called_for(PartitionId::new(1)));
|
||||
}
|
||||
}
|
|
@ -15,10 +15,7 @@ use datafusion::physical_plan::{
|
|||
SendableRecordBatchStream,
|
||||
};
|
||||
use iox_catalog::interface::{SequenceNumber, Tombstone};
|
||||
use predicate::{
|
||||
delete_predicate::parse_delete_predicate,
|
||||
predicate::{Predicate, PredicateMatch},
|
||||
};
|
||||
use predicate::{delete_predicate::parse_delete_predicate, Predicate, PredicateMatch};
|
||||
use query::{exec::stringset::StringSet, QueryChunk, QueryChunkMeta};
|
||||
use schema::{merge::merge_record_batch_schemas, selection::Selection, sort::SortKey, Schema};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
|
|
@ -22,9 +22,13 @@ pub struct IngesterServer<I: IngestHandler> {
|
|||
impl<I: IngestHandler> IngesterServer<I> {
|
||||
/// Initialise a new [`IngesterServer`] using the provided HTTP and gRPC
|
||||
/// handlers.
|
||||
pub fn new(http: HttpDelegate<I>, grpc: GrpcDelegate<I>) -> Self {
|
||||
pub fn new(
|
||||
metrics: Arc<metric::Registry>,
|
||||
http: HttpDelegate<I>,
|
||||
grpc: GrpcDelegate<I>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metrics: Default::default(),
|
||||
metrics,
|
||||
http,
|
||||
grpc,
|
||||
}
|
||||
|
|
|
@ -22,5 +22,6 @@ dotenv = "0.15.0"
|
|||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
paste = "1.0.6"
|
||||
pretty_assertions = "1.0.0"
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
||||
[features]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -12,11 +12,11 @@
|
|||
)]
|
||||
|
||||
use crate::interface::{
|
||||
Catalog, ColumnType, Error, KafkaPartition, KafkaTopic, NamespaceSchema, QueryPool, Result,
|
||||
Sequencer, SequencerId, TableSchema,
|
||||
ColumnType, Error, KafkaPartition, KafkaTopic, NamespaceSchema, QueryPool, Result, Sequencer,
|
||||
SequencerId, TableSchema, Transaction,
|
||||
};
|
||||
use futures::{stream::FuturesOrdered, StreamExt};
|
||||
|
||||
use interface::{ParquetFile, ProcessedTombstone, Tombstone};
|
||||
use mutable_batch::MutableBatch;
|
||||
use std::{borrow::Cow, collections::BTreeMap};
|
||||
|
||||
|
@ -43,7 +43,7 @@ pub mod postgres;
|
|||
pub async fn validate_or_insert_schema<'a, T, U>(
|
||||
tables: T,
|
||||
schema: &NamespaceSchema,
|
||||
catalog: &dyn Catalog,
|
||||
txn: &mut dyn Transaction,
|
||||
) -> Result<Option<NamespaceSchema>>
|
||||
where
|
||||
T: IntoIterator<IntoIter = U, Item = (&'a str, &'a MutableBatch)> + Send + Sync,
|
||||
|
@ -55,7 +55,7 @@ where
|
|||
let mut schema = Cow::Borrowed(schema);
|
||||
|
||||
for (table_name, batch) in tables {
|
||||
validate_mutable_batch(batch, table_name, &mut schema, catalog).await?;
|
||||
validate_mutable_batch(batch, table_name, &mut schema, txn).await?;
|
||||
}
|
||||
|
||||
match schema {
|
||||
|
@ -68,7 +68,7 @@ async fn validate_mutable_batch(
|
|||
mb: &MutableBatch,
|
||||
table_name: &str,
|
||||
schema: &mut Cow<'_, NamespaceSchema>,
|
||||
catalog: &dyn Catalog,
|
||||
txn: &mut dyn Transaction,
|
||||
) -> Result<()> {
|
||||
// Check if the table exists in the schema.
|
||||
//
|
||||
|
@ -81,14 +81,14 @@ async fn validate_mutable_batch(
|
|||
//
|
||||
// Attempt to create the table in the catalog, or load an existing
|
||||
// table from the catalog to populate the cache.
|
||||
let mut table = catalog
|
||||
let mut table = txn
|
||||
.tables()
|
||||
.create_or_get(table_name, schema.id)
|
||||
.await
|
||||
.map(|t| TableSchema::new(t.id))?;
|
||||
|
||||
// Always add a time column to all new tables.
|
||||
let time_col = catalog
|
||||
let time_col = txn
|
||||
.columns()
|
||||
.create_or_get(TIME_COLUMN, table.id, ColumnType::Time)
|
||||
.await?;
|
||||
|
@ -134,7 +134,7 @@ async fn validate_mutable_batch(
|
|||
None => {
|
||||
// The column does not exist in the cache, create/get it from
|
||||
// the catalog, and add it to the table.
|
||||
let column = catalog
|
||||
let column = txn
|
||||
.columns()
|
||||
.create_or_get(name.as_str(), table.id, ColumnType::from(col.influx_type()))
|
||||
.await?;
|
||||
|
@ -161,34 +161,53 @@ async fn validate_mutable_batch(
|
|||
/// each of the partitions.
|
||||
pub async fn create_or_get_default_records(
|
||||
kafka_partition_count: i32,
|
||||
catalog: &dyn Catalog,
|
||||
txn: &mut dyn Transaction,
|
||||
) -> Result<(KafkaTopic, QueryPool, BTreeMap<SequencerId, Sequencer>)> {
|
||||
let kafka_topic = catalog
|
||||
.kafka_topics()
|
||||
.create_or_get(SHARED_KAFKA_TOPIC)
|
||||
.await?;
|
||||
let query_pool = catalog
|
||||
.query_pools()
|
||||
.create_or_get(SHARED_QUERY_POOL)
|
||||
.await?;
|
||||
let kafka_topic = txn.kafka_topics().create_or_get(SHARED_KAFKA_TOPIC).await?;
|
||||
let query_pool = txn.query_pools().create_or_get(SHARED_QUERY_POOL).await?;
|
||||
|
||||
let sequencers = (1..=kafka_partition_count)
|
||||
.map(|partition| {
|
||||
catalog
|
||||
.sequencers()
|
||||
.create_or_get(&kafka_topic, KafkaPartition::new(partition))
|
||||
})
|
||||
.collect::<FuturesOrdered<_>>()
|
||||
.map(|v| {
|
||||
let v = v.expect("failed to create sequencer");
|
||||
(v.id, v)
|
||||
})
|
||||
.collect::<BTreeMap<_, _>>()
|
||||
.await;
|
||||
let mut sequencers = BTreeMap::new();
|
||||
for partition in 1..=kafka_partition_count {
|
||||
let sequencer = txn
|
||||
.sequencers()
|
||||
.create_or_get(&kafka_topic, KafkaPartition::new(partition))
|
||||
.await?;
|
||||
sequencers.insert(sequencer.id, sequencer);
|
||||
}
|
||||
|
||||
Ok((kafka_topic, query_pool, sequencers))
|
||||
}
|
||||
|
||||
/// Insert the conpacted parquet file and its tombstones
|
||||
pub async fn add_parquet_file_with_tombstones(
|
||||
parquet_file: &ParquetFile,
|
||||
tombstones: &[Tombstone],
|
||||
txn: &mut dyn Transaction,
|
||||
) -> Result<(ParquetFile, Vec<ProcessedTombstone>), Error> {
|
||||
// create a parquet file in the catalog first
|
||||
let parquet = txn
|
||||
.parquet_files()
|
||||
.create(
|
||||
parquet_file.sequencer_id,
|
||||
parquet_file.table_id,
|
||||
parquet_file.partition_id,
|
||||
parquet_file.object_store_id,
|
||||
parquet_file.min_sequence_number,
|
||||
parquet_file.max_sequence_number,
|
||||
parquet_file.min_time,
|
||||
parquet_file.max_time,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Now the parquet available, create its processed tombstones
|
||||
let processed_tombstones = txn
|
||||
.processed_tombstones()
|
||||
.create_many(parquet.id, tombstones)
|
||||
.await?;
|
||||
|
||||
Ok((parquet, processed_tombstones))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -211,13 +230,16 @@ mod tests {
|
|||
#[allow(clippy::bool_assert_comparison)]
|
||||
#[tokio::test]
|
||||
async fn [<test_validate_schema_ $name>]() {
|
||||
use crate::interface::Catalog;
|
||||
use std::ops::DerefMut;
|
||||
use pretty_assertions::assert_eq;
|
||||
const NAMESPACE_NAME: &str = "bananas";
|
||||
|
||||
let repo = MemCatalog::new();
|
||||
let (kafka_topic, query_pool, _) = create_or_get_default_records(2, &repo).await.unwrap();
|
||||
let mut txn = repo.start_transaction().await.unwrap();
|
||||
let (kafka_topic, query_pool, _) = create_or_get_default_records(2, txn.deref_mut()).await.unwrap();
|
||||
|
||||
let namespace = repo
|
||||
let namespace = txn
|
||||
.namespaces()
|
||||
.create(NAMESPACE_NAME, "inf", kafka_topic.id, query_pool.id)
|
||||
.await
|
||||
|
@ -240,7 +262,7 @@ mod tests {
|
|||
let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp.as_str(), 42)
|
||||
.expect("failed to build test writes from LP");
|
||||
|
||||
let got = validate_or_insert_schema(writes.iter().map(|(k, v)| (k.as_str(), v)), &schema, &repo)
|
||||
let got = validate_or_insert_schema(writes.iter().map(|(k, v)| (k.as_str(), v)), &schema, txn.deref_mut())
|
||||
.await;
|
||||
|
||||
match got {
|
||||
|
@ -260,7 +282,7 @@ mod tests {
|
|||
// Invariant: in absence of concurrency, the schema within
|
||||
// the database must always match the incrementally built
|
||||
// cached schema.
|
||||
let db_schema = get_schema_by_name(NAMESPACE_NAME, &repo)
|
||||
let db_schema = get_schema_by_name(NAMESPACE_NAME, txn.deref_mut())
|
||||
.await
|
||||
.expect("database failed to query for namespace schema");
|
||||
assert_eq!(schema, db_schema, "schema in DB and cached schema differ");
|
||||
|
|
|
@ -2,23 +2,26 @@
|
|||
//! used for testing or for an IOx designed to run without catalog persistence.
|
||||
|
||||
use crate::interface::{
|
||||
Catalog, Column, ColumnId, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic,
|
||||
KafkaTopicId, KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, ParquetFile,
|
||||
ParquetFileId, ParquetFileRepo, Partition, PartitionId, PartitionRepo, QueryPool, QueryPoolId,
|
||||
QueryPoolRepo, Result, SequenceNumber, Sequencer, SequencerId, SequencerRepo, Table, TableId,
|
||||
TableRepo, Timestamp, Tombstone, TombstoneId, TombstoneRepo,
|
||||
sealed::TransactionFinalize, Catalog, Column, ColumnId, ColumnRepo, ColumnType, Error,
|
||||
KafkaPartition, KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace, NamespaceId,
|
||||
NamespaceRepo, ParquetFile, ParquetFileId, ParquetFileRepo, Partition, PartitionId,
|
||||
PartitionInfo, PartitionRepo, ProcessedTombstone, ProcessedTombstoneRepo, QueryPool,
|
||||
QueryPoolId, QueryPoolRepo, Result, SequenceNumber, Sequencer, SequencerId, SequencerRepo,
|
||||
Table, TableId, TableRepo, Timestamp, Tombstone, TombstoneId, TombstoneRepo, Transaction,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use observability_deps::tracing::warn;
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt::Formatter;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{Mutex, OwnedMutexGuard};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// In-memory catalog that implements the `RepoCollection` and individual repo traits from
|
||||
/// the catalog interface.
|
||||
#[derive(Default)]
|
||||
pub struct MemCatalog {
|
||||
collections: Mutex<MemCollections>,
|
||||
collections: Arc<Mutex<MemCollections>>,
|
||||
}
|
||||
|
||||
impl MemCatalog {
|
||||
|
@ -30,12 +33,11 @@ impl MemCatalog {
|
|||
|
||||
impl std::fmt::Debug for MemCatalog {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let c = self.collections.lock().expect("mutex poisoned");
|
||||
write!(f, "MemCatalog[ {:?} ]", c)
|
||||
f.debug_struct("MemCatalog").finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
#[derive(Default, Debug, Clone)]
|
||||
struct MemCollections {
|
||||
kafka_topics: Vec<KafkaTopic>,
|
||||
query_pools: Vec<QueryPool>,
|
||||
|
@ -46,6 +48,23 @@ struct MemCollections {
|
|||
partitions: Vec<Partition>,
|
||||
tombstones: Vec<Tombstone>,
|
||||
parquet_files: Vec<ParquetFile>,
|
||||
processed_tombstones: Vec<ProcessedTombstone>,
|
||||
}
|
||||
|
||||
/// transaction bound to an in-memory catalog.
|
||||
#[derive(Debug)]
|
||||
pub struct MemTxn {
|
||||
guard: OwnedMutexGuard<MemCollections>,
|
||||
stage: MemCollections,
|
||||
finalized: bool,
|
||||
}
|
||||
|
||||
impl Drop for MemTxn {
|
||||
fn drop(&mut self) {
|
||||
if !self.finalized {
|
||||
warn!("Dropping MemTxn w/o finalizing (commit or abort)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
@ -55,66 +74,95 @@ impl Catalog for MemCatalog {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn kafka_topics(&self) -> &dyn KafkaTopicRepo {
|
||||
async fn start_transaction(&self) -> Result<Box<dyn Transaction>, Error> {
|
||||
let guard = Arc::clone(&self.collections).lock_owned().await;
|
||||
let stage = guard.clone();
|
||||
Ok(Box::new(MemTxn {
|
||||
guard,
|
||||
stage,
|
||||
finalized: false,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TransactionFinalize for MemTxn {
|
||||
async fn commit_inplace(&mut self) -> Result<(), Error> {
|
||||
*self.guard = std::mem::take(&mut self.stage);
|
||||
self.finalized = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn abort_inplace(&mut self) -> Result<(), Error> {
|
||||
self.finalized = true;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Transaction for MemTxn {
|
||||
fn kafka_topics(&mut self) -> &mut dyn KafkaTopicRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn query_pools(&self) -> &dyn QueryPoolRepo {
|
||||
fn query_pools(&mut self) -> &mut dyn QueryPoolRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn namespaces(&self) -> &dyn NamespaceRepo {
|
||||
fn namespaces(&mut self) -> &mut dyn NamespaceRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn tables(&self) -> &dyn TableRepo {
|
||||
fn tables(&mut self) -> &mut dyn TableRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn columns(&self) -> &dyn ColumnRepo {
|
||||
fn columns(&mut self) -> &mut dyn ColumnRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn sequencers(&self) -> &dyn SequencerRepo {
|
||||
fn sequencers(&mut self) -> &mut dyn SequencerRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn partitions(&self) -> &dyn PartitionRepo {
|
||||
fn partitions(&mut self) -> &mut dyn PartitionRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn tombstones(&self) -> &dyn TombstoneRepo {
|
||||
fn tombstones(&mut self) -> &mut dyn TombstoneRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn parquet_files(&self) -> &dyn ParquetFileRepo {
|
||||
fn parquet_files(&mut self) -> &mut dyn ParquetFileRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn processed_tombstones(&mut self) -> &mut dyn ProcessedTombstoneRepo {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KafkaTopicRepo for MemCatalog {
|
||||
async fn create_or_get(&self, name: &str) -> Result<KafkaTopic> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
|
||||
let topic = match collections.kafka_topics.iter().find(|t| t.name == name) {
|
||||
impl KafkaTopicRepo for MemTxn {
|
||||
async fn create_or_get(&mut self, name: &str) -> Result<KafkaTopic> {
|
||||
let topic = match self.stage.kafka_topics.iter().find(|t| t.name == name) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let topic = KafkaTopic {
|
||||
id: KafkaTopicId::new(collections.kafka_topics.len() as i32 + 1),
|
||||
id: KafkaTopicId::new(self.stage.kafka_topics.len() as i32 + 1),
|
||||
name: name.to_string(),
|
||||
};
|
||||
collections.kafka_topics.push(topic);
|
||||
collections.kafka_topics.last().unwrap()
|
||||
self.stage.kafka_topics.push(topic);
|
||||
self.stage.kafka_topics.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(topic.clone())
|
||||
}
|
||||
|
||||
async fn get_by_name(&self, name: &str) -> Result<Option<KafkaTopic>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
let kafka_topic = collections
|
||||
async fn get_by_name(&mut self, name: &str) -> Result<Option<KafkaTopic>> {
|
||||
let kafka_topic = self
|
||||
.stage
|
||||
.kafka_topics
|
||||
.iter()
|
||||
.find(|t| t.name == name)
|
||||
|
@ -124,19 +172,17 @@ impl KafkaTopicRepo for MemCatalog {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl QueryPoolRepo for MemCatalog {
|
||||
async fn create_or_get(&self, name: &str) -> Result<QueryPool> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
|
||||
let pool = match collections.query_pools.iter().find(|t| t.name == name) {
|
||||
impl QueryPoolRepo for MemTxn {
|
||||
async fn create_or_get(&mut self, name: &str) -> Result<QueryPool> {
|
||||
let pool = match self.stage.query_pools.iter().find(|t| t.name == name) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let pool = QueryPool {
|
||||
id: QueryPoolId::new(collections.query_pools.len() as i16 + 1),
|
||||
id: QueryPoolId::new(self.stage.query_pools.len() as i16 + 1),
|
||||
name: name.to_string(),
|
||||
};
|
||||
collections.query_pools.push(pool);
|
||||
collections.query_pools.last().unwrap()
|
||||
self.stage.query_pools.push(pool);
|
||||
self.stage.query_pools.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -145,35 +191,34 @@ impl QueryPoolRepo for MemCatalog {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NamespaceRepo for MemCatalog {
|
||||
impl NamespaceRepo for MemTxn {
|
||||
async fn create(
|
||||
&self,
|
||||
&mut self,
|
||||
name: &str,
|
||||
retention_duration: &str,
|
||||
kafka_topic_id: KafkaTopicId,
|
||||
query_pool_id: QueryPoolId,
|
||||
) -> Result<Namespace> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
if collections.namespaces.iter().any(|n| n.name == name) {
|
||||
if self.stage.namespaces.iter().any(|n| n.name == name) {
|
||||
return Err(Error::NameExists {
|
||||
name: name.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let namespace = Namespace {
|
||||
id: NamespaceId::new(collections.namespaces.len() as i32 + 1),
|
||||
id: NamespaceId::new(self.stage.namespaces.len() as i32 + 1),
|
||||
name: name.to_string(),
|
||||
kafka_topic_id,
|
||||
query_pool_id,
|
||||
retention_duration: Some(retention_duration.to_string()),
|
||||
};
|
||||
collections.namespaces.push(namespace);
|
||||
Ok(collections.namespaces.last().unwrap().clone())
|
||||
self.stage.namespaces.push(namespace);
|
||||
Ok(self.stage.namespaces.last().unwrap().clone())
|
||||
}
|
||||
|
||||
async fn get_by_name(&self, name: &str) -> Result<Option<Namespace>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
Ok(collections
|
||||
async fn get_by_name(&mut self, name: &str) -> Result<Option<Namespace>> {
|
||||
Ok(self
|
||||
.stage
|
||||
.namespaces
|
||||
.iter()
|
||||
.find(|n| n.name == name)
|
||||
|
@ -182,11 +227,10 @@ impl NamespaceRepo for MemCatalog {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TableRepo for MemCatalog {
|
||||
async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result<Table> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
|
||||
let table = match collections
|
||||
impl TableRepo for MemTxn {
|
||||
async fn create_or_get(&mut self, name: &str, namespace_id: NamespaceId) -> Result<Table> {
|
||||
let table = match self
|
||||
.stage
|
||||
.tables
|
||||
.iter()
|
||||
.find(|t| t.name == name && t.namespace_id == namespace_id)
|
||||
|
@ -194,21 +238,21 @@ impl TableRepo for MemCatalog {
|
|||
Some(t) => t,
|
||||
None => {
|
||||
let table = Table {
|
||||
id: TableId::new(collections.tables.len() as i32 + 1),
|
||||
id: TableId::new(self.stage.tables.len() as i32 + 1),
|
||||
namespace_id,
|
||||
name: name.to_string(),
|
||||
};
|
||||
collections.tables.push(table);
|
||||
collections.tables.last().unwrap()
|
||||
self.stage.tables.push(table);
|
||||
self.stage.tables.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(table.clone())
|
||||
}
|
||||
|
||||
async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Table>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
let tables: Vec<_> = collections
|
||||
async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Table>> {
|
||||
let tables: Vec<_> = self
|
||||
.stage
|
||||
.tables
|
||||
.iter()
|
||||
.filter(|t| t.namespace_id == namespace_id)
|
||||
|
@ -219,16 +263,15 @@ impl TableRepo for MemCatalog {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ColumnRepo for MemCatalog {
|
||||
impl ColumnRepo for MemTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
name: &str,
|
||||
table_id: TableId,
|
||||
column_type: ColumnType,
|
||||
) -> Result<Column> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
|
||||
let column = match collections
|
||||
let column = match self
|
||||
.stage
|
||||
.columns
|
||||
.iter()
|
||||
.find(|t| t.name == name && t.table_id == table_id)
|
||||
|
@ -246,31 +289,31 @@ impl ColumnRepo for MemCatalog {
|
|||
}
|
||||
None => {
|
||||
let column = Column {
|
||||
id: ColumnId::new(collections.columns.len() as i32 + 1),
|
||||
id: ColumnId::new(self.stage.columns.len() as i32 + 1),
|
||||
table_id,
|
||||
name: name.to_string(),
|
||||
column_type: column_type as i16,
|
||||
};
|
||||
collections.columns.push(column);
|
||||
collections.columns.last().unwrap()
|
||||
self.stage.columns.push(column);
|
||||
self.stage.columns.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(column.clone())
|
||||
}
|
||||
|
||||
async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Column>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
|
||||
let table_ids: Vec<_> = collections
|
||||
async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Column>> {
|
||||
let table_ids: Vec<_> = self
|
||||
.stage
|
||||
.tables
|
||||
.iter()
|
||||
.filter(|t| t.namespace_id == namespace_id)
|
||||
.map(|t| t.id)
|
||||
.collect();
|
||||
println!("tables: {:?}", collections.tables);
|
||||
println!("tables: {:?}", self.stage.tables);
|
||||
println!("table_ids: {:?}", table_ids);
|
||||
let columns: Vec<_> = collections
|
||||
let columns: Vec<_> = self
|
||||
.stage
|
||||
.columns
|
||||
.iter()
|
||||
.filter(|c| table_ids.contains(&c.table_id))
|
||||
|
@ -282,15 +325,14 @@ impl ColumnRepo for MemCatalog {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SequencerRepo for MemCatalog {
|
||||
impl SequencerRepo for MemTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
topic: &KafkaTopic,
|
||||
partition: KafkaPartition,
|
||||
) -> Result<Sequencer> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
|
||||
let sequencer = match collections
|
||||
let sequencer = match self
|
||||
.stage
|
||||
.sequencers
|
||||
.iter()
|
||||
.find(|s| s.kafka_topic_id == topic.id && s.kafka_partition == partition)
|
||||
|
@ -298,13 +340,13 @@ impl SequencerRepo for MemCatalog {
|
|||
Some(t) => t,
|
||||
None => {
|
||||
let sequencer = Sequencer {
|
||||
id: SequencerId::new(collections.sequencers.len() as i16 + 1),
|
||||
id: SequencerId::new(self.stage.sequencers.len() as i16 + 1),
|
||||
kafka_topic_id: topic.id,
|
||||
kafka_partition: partition,
|
||||
min_unpersisted_sequence_number: 0,
|
||||
};
|
||||
collections.sequencers.push(sequencer);
|
||||
collections.sequencers.last().unwrap()
|
||||
self.stage.sequencers.push(sequencer);
|
||||
self.stage.sequencers.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -312,12 +354,12 @@ impl SequencerRepo for MemCatalog {
|
|||
}
|
||||
|
||||
async fn get_by_topic_id_and_partition(
|
||||
&self,
|
||||
&mut self,
|
||||
topic_id: KafkaTopicId,
|
||||
partition: KafkaPartition,
|
||||
) -> Result<Option<Sequencer>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
let sequencer = collections
|
||||
let sequencer = self
|
||||
.stage
|
||||
.sequencers
|
||||
.iter()
|
||||
.find(|s| s.kafka_topic_id == topic_id && s.kafka_partition == partition)
|
||||
|
@ -325,14 +367,13 @@ impl SequencerRepo for MemCatalog {
|
|||
Ok(sequencer)
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<Sequencer>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
Ok(collections.sequencers.clone())
|
||||
async fn list(&mut self) -> Result<Vec<Sequencer>> {
|
||||
Ok(self.stage.sequencers.clone())
|
||||
}
|
||||
|
||||
async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result<Vec<Sequencer>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
let sequencers: Vec<_> = collections
|
||||
async fn list_by_kafka_topic(&mut self, topic: &KafkaTopic) -> Result<Vec<Sequencer>> {
|
||||
let sequencers: Vec<_> = self
|
||||
.stage
|
||||
.sequencers
|
||||
.iter()
|
||||
.filter(|s| s.kafka_topic_id == topic.id)
|
||||
|
@ -343,36 +384,35 @@ impl SequencerRepo for MemCatalog {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionRepo for MemCatalog {
|
||||
impl PartitionRepo for MemTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
key: &str,
|
||||
sequencer_id: SequencerId,
|
||||
table_id: TableId,
|
||||
) -> Result<Partition> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
let partition = match collections.partitions.iter().find(|p| {
|
||||
let partition = match self.stage.partitions.iter().find(|p| {
|
||||
p.partition_key == key && p.sequencer_id == sequencer_id && p.table_id == table_id
|
||||
}) {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
let p = Partition {
|
||||
id: PartitionId::new(collections.partitions.len() as i64 + 1),
|
||||
id: PartitionId::new(self.stage.partitions.len() as i64 + 1),
|
||||
sequencer_id,
|
||||
table_id,
|
||||
partition_key: key.to_string(),
|
||||
};
|
||||
collections.partitions.push(p);
|
||||
collections.partitions.last().unwrap()
|
||||
self.stage.partitions.push(p);
|
||||
self.stage.partitions.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(partition.clone())
|
||||
}
|
||||
|
||||
async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result<Vec<Partition>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
let partitions: Vec<_> = collections
|
||||
async fn list_by_sequencer(&mut self, sequencer_id: SequencerId) -> Result<Vec<Partition>> {
|
||||
let partitions: Vec<_> = self
|
||||
.stage
|
||||
.partitions
|
||||
.iter()
|
||||
.filter(|p| p.sequencer_id == sequencer_id)
|
||||
|
@ -380,12 +420,50 @@ impl PartitionRepo for MemCatalog {
|
|||
.collect();
|
||||
Ok(partitions)
|
||||
}
|
||||
|
||||
async fn partition_info_by_id(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
) -> Result<Option<PartitionInfo>> {
|
||||
let partition = self
|
||||
.stage
|
||||
.partitions
|
||||
.iter()
|
||||
.find(|p| p.id == partition_id)
|
||||
.cloned();
|
||||
|
||||
if let Some(partition) = partition {
|
||||
let table = self
|
||||
.stage
|
||||
.tables
|
||||
.iter()
|
||||
.find(|t| t.id == partition.table_id)
|
||||
.cloned();
|
||||
if let Some(table) = table {
|
||||
let namespace = self
|
||||
.stage
|
||||
.namespaces
|
||||
.iter()
|
||||
.find(|n| n.id == table.namespace_id)
|
||||
.cloned();
|
||||
if let Some(namespace) = namespace {
|
||||
return Ok(Some(PartitionInfo {
|
||||
namespace_name: namespace.name,
|
||||
table_name: table.name,
|
||||
partition,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TombstoneRepo for MemCatalog {
|
||||
impl TombstoneRepo for MemTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
table_id: TableId,
|
||||
sequencer_id: SequencerId,
|
||||
sequence_number: SequenceNumber,
|
||||
|
@ -393,8 +471,7 @@ impl TombstoneRepo for MemCatalog {
|
|||
max_time: Timestamp,
|
||||
predicate: &str,
|
||||
) -> Result<Tombstone> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
let tombstone = match collections.tombstones.iter().find(|t| {
|
||||
let tombstone = match self.stage.tombstones.iter().find(|t| {
|
||||
t.table_id == table_id
|
||||
&& t.sequencer_id == sequencer_id
|
||||
&& t.sequence_number == sequence_number
|
||||
|
@ -402,7 +479,7 @@ impl TombstoneRepo for MemCatalog {
|
|||
Some(t) => t,
|
||||
None => {
|
||||
let t = Tombstone {
|
||||
id: TombstoneId::new(collections.tombstones.len() as i64 + 1),
|
||||
id: TombstoneId::new(self.stage.tombstones.len() as i64 + 1),
|
||||
table_id,
|
||||
sequencer_id,
|
||||
sequence_number,
|
||||
|
@ -410,8 +487,8 @@ impl TombstoneRepo for MemCatalog {
|
|||
max_time,
|
||||
serialized_predicate: predicate.to_string(),
|
||||
};
|
||||
collections.tombstones.push(t);
|
||||
collections.tombstones.last().unwrap()
|
||||
self.stage.tombstones.push(t);
|
||||
self.stage.tombstones.last().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -419,12 +496,12 @@ impl TombstoneRepo for MemCatalog {
|
|||
}
|
||||
|
||||
async fn list_tombstones_by_sequencer_greater_than(
|
||||
&self,
|
||||
&mut self,
|
||||
sequencer_id: SequencerId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<Vec<Tombstone>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
let tombstones: Vec<_> = collections
|
||||
let tombstones: Vec<_> = self
|
||||
.stage
|
||||
.tombstones
|
||||
.iter()
|
||||
.filter(|t| t.sequencer_id == sequencer_id && t.sequence_number > sequence_number)
|
||||
|
@ -435,9 +512,9 @@ impl TombstoneRepo for MemCatalog {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ParquetFileRepo for MemCatalog {
|
||||
impl ParquetFileRepo for MemTxn {
|
||||
async fn create(
|
||||
&self,
|
||||
&mut self,
|
||||
sequencer_id: SequencerId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
|
@ -447,8 +524,8 @@ impl ParquetFileRepo for MemCatalog {
|
|||
min_time: Timestamp,
|
||||
max_time: Timestamp,
|
||||
) -> Result<ParquetFile> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
if collections
|
||||
if self
|
||||
.stage
|
||||
.parquet_files
|
||||
.iter()
|
||||
.any(|f| f.object_store_id == object_store_id)
|
||||
|
@ -457,7 +534,7 @@ impl ParquetFileRepo for MemCatalog {
|
|||
}
|
||||
|
||||
let parquet_file = ParquetFile {
|
||||
id: ParquetFileId::new(collections.parquet_files.len() as i64 + 1),
|
||||
id: ParquetFileId::new(self.stage.parquet_files.len() as i64 + 1),
|
||||
sequencer_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
|
@ -468,14 +545,12 @@ impl ParquetFileRepo for MemCatalog {
|
|||
max_time,
|
||||
to_delete: false,
|
||||
};
|
||||
collections.parquet_files.push(parquet_file);
|
||||
Ok(*collections.parquet_files.last().unwrap())
|
||||
self.stage.parquet_files.push(parquet_file);
|
||||
Ok(*self.stage.parquet_files.last().unwrap())
|
||||
}
|
||||
|
||||
async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()> {
|
||||
let mut collections = self.collections.lock().expect("mutex poisoned");
|
||||
|
||||
match collections.parquet_files.iter_mut().find(|p| p.id == id) {
|
||||
async fn flag_for_delete(&mut self, id: ParquetFileId) -> Result<()> {
|
||||
match self.stage.parquet_files.iter_mut().find(|p| p.id == id) {
|
||||
Some(f) => f.to_delete = true,
|
||||
None => return Err(Error::ParquetRecordNotFound { id }),
|
||||
}
|
||||
|
@ -484,12 +559,12 @@ impl ParquetFileRepo for MemCatalog {
|
|||
}
|
||||
|
||||
async fn list_by_sequencer_greater_than(
|
||||
&self,
|
||||
&mut self,
|
||||
sequencer_id: SequencerId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
let collections = self.collections.lock().expect("mutex poisoned");
|
||||
let files: Vec<_> = collections
|
||||
let files: Vec<_> = self
|
||||
.stage
|
||||
.parquet_files
|
||||
.iter()
|
||||
.filter(|f| f.sequencer_id == sequencer_id && f.max_sequence_number > sequence_number)
|
||||
|
@ -497,6 +572,100 @@ impl ParquetFileRepo for MemCatalog {
|
|||
.collect();
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
async fn exist(&mut self, id: ParquetFileId) -> Result<bool> {
|
||||
Ok(self.stage.parquet_files.iter().any(|f| f.id == id))
|
||||
}
|
||||
|
||||
async fn count(&mut self) -> Result<i64> {
|
||||
let count = self.stage.parquet_files.len();
|
||||
let count_i64 = i64::try_from(count);
|
||||
if count_i64.is_err() {
|
||||
return Err(Error::InvalidValue { value: count });
|
||||
}
|
||||
Ok(count_i64.unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ProcessedTombstoneRepo for MemTxn {
|
||||
async fn create_many(
|
||||
&mut self,
|
||||
parquet_file_id: ParquetFileId,
|
||||
tombstones: &[Tombstone],
|
||||
) -> Result<Vec<ProcessedTombstone>> {
|
||||
// check if the parquet file available
|
||||
if !self
|
||||
.stage
|
||||
.parquet_files
|
||||
.iter()
|
||||
.any(|f| f.id == parquet_file_id)
|
||||
{
|
||||
return Err(Error::FileNotFound {
|
||||
id: parquet_file_id.get(),
|
||||
});
|
||||
}
|
||||
|
||||
let mut processed_tombstones = vec![];
|
||||
for tombstone in tombstones {
|
||||
// check if tomstone exists
|
||||
if !self.stage.tombstones.iter().any(|f| f.id == tombstone.id) {
|
||||
return Err(Error::TombstoneNotFound {
|
||||
id: tombstone.id.get(),
|
||||
});
|
||||
}
|
||||
|
||||
if self
|
||||
.stage
|
||||
.processed_tombstones
|
||||
.iter()
|
||||
.any(|pt| pt.tombstone_id == tombstone.id && pt.parquet_file_id == parquet_file_id)
|
||||
{
|
||||
// The tombstone was already proccessed for this file
|
||||
return Err(Error::ProcessTombstoneExists {
|
||||
parquet_file_id: parquet_file_id.get(),
|
||||
tombstone_id: tombstone.id.get(),
|
||||
});
|
||||
}
|
||||
|
||||
let processed_tombstone = ProcessedTombstone {
|
||||
tombstone_id: tombstone.id,
|
||||
parquet_file_id,
|
||||
};
|
||||
processed_tombstones.push(processed_tombstone);
|
||||
}
|
||||
|
||||
// save for returning
|
||||
let return_processed_tombstones = processed_tombstones.clone();
|
||||
|
||||
// Add to the catalog
|
||||
self.stage
|
||||
.processed_tombstones
|
||||
.append(&mut processed_tombstones);
|
||||
|
||||
Ok(return_processed_tombstones)
|
||||
}
|
||||
|
||||
async fn exist(
|
||||
&mut self,
|
||||
parquet_file_id: ParquetFileId,
|
||||
tombstone_id: TombstoneId,
|
||||
) -> Result<bool> {
|
||||
Ok(self
|
||||
.stage
|
||||
.processed_tombstones
|
||||
.iter()
|
||||
.any(|f| f.parquet_file_id == parquet_file_id && f.tombstone_id == tombstone_id))
|
||||
}
|
||||
|
||||
async fn count(&mut self) -> Result<i64> {
|
||||
let count = self.stage.processed_tombstones.len();
|
||||
let count_i64 = i64::try_from(count);
|
||||
if count_i64.is_err() {
|
||||
return Err(Error::InvalidValue { value: count });
|
||||
}
|
||||
Ok(count_i64.unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
//! A Postgres backed implementation of the Catalog
|
||||
|
||||
use crate::interface::{
|
||||
Catalog, Column, ColumnRepo, ColumnType, Error, KafkaPartition, KafkaTopic, KafkaTopicId,
|
||||
KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, ParquetFile, ParquetFileId,
|
||||
ParquetFileRepo, Partition, PartitionId, PartitionRepo, QueryPool, QueryPoolId, QueryPoolRepo,
|
||||
Result, SequenceNumber, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo,
|
||||
Timestamp, Tombstone, TombstoneRepo,
|
||||
sealed::TransactionFinalize, Catalog, Column, ColumnRepo, ColumnType, Error, KafkaPartition,
|
||||
KafkaTopic, KafkaTopicId, KafkaTopicRepo, Namespace, NamespaceId, NamespaceRepo, ParquetFile,
|
||||
ParquetFileId, ParquetFileRepo, Partition, PartitionId, PartitionInfo, PartitionRepo,
|
||||
ProcessedTombstone, ProcessedTombstoneRepo, QueryPool, QueryPoolId, QueryPoolRepo, Result,
|
||||
SequenceNumber, Sequencer, SequencerId, SequencerRepo, Table, TableId, TableRepo, Timestamp,
|
||||
Tombstone, TombstoneId, TombstoneRepo, Transaction,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use observability_deps::tracing::info;
|
||||
use sqlx::{migrate::Migrator, postgres::PgPoolOptions, Executor, Pool, Postgres};
|
||||
use observability_deps::tracing::{info, warn};
|
||||
use sqlx::{migrate::Migrator, postgres::PgPoolOptions, Executor, Pool, Postgres, Row};
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -21,12 +22,18 @@ pub const SCHEMA_NAME: &str = "iox_catalog";
|
|||
|
||||
static MIGRATOR: Migrator = sqlx::migrate!();
|
||||
|
||||
/// In-memory catalog that implements the `RepoCollection` and individual repo traits.
|
||||
/// PostgreSQL catalog.
|
||||
#[derive(Debug)]
|
||||
pub struct PostgresCatalog {
|
||||
pool: Pool<Postgres>,
|
||||
}
|
||||
|
||||
// struct to get return value from "select count(*) ..." wuery"
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct Count {
|
||||
count: i64,
|
||||
}
|
||||
|
||||
impl PostgresCatalog {
|
||||
/// Connect to the catalog store.
|
||||
pub async fn connect(
|
||||
|
@ -63,6 +70,50 @@ impl PostgresCatalog {
|
|||
}
|
||||
}
|
||||
|
||||
/// transaction for [`PostgresCatalog`].
|
||||
#[derive(Debug)]
|
||||
pub struct PostgresTxn {
|
||||
transaction: Option<sqlx::Transaction<'static, Postgres>>,
|
||||
}
|
||||
|
||||
impl PostgresTxn {
|
||||
fn transaction(&mut self) -> &mut sqlx::Transaction<'static, Postgres> {
|
||||
self.transaction.as_mut().expect("Not yet finalized")
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PostgresTxn {
|
||||
fn drop(&mut self) {
|
||||
if self.transaction.is_some() {
|
||||
warn!("Dropping PostgresTxn w/o finalizing (commit or abort)");
|
||||
|
||||
// SQLx ensures that the inner transaction enqueues a rollback when it is dropped, so we don't need to spawn
|
||||
// a task here to call `rollback` manually.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TransactionFinalize for PostgresTxn {
|
||||
async fn commit_inplace(&mut self) -> Result<(), Error> {
|
||||
self.transaction
|
||||
.take()
|
||||
.expect("Not yet finalized")
|
||||
.commit()
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn abort_inplace(&mut self) -> Result<(), Error> {
|
||||
self.transaction
|
||||
.take()
|
||||
.expect("Not yet finalized")
|
||||
.rollback()
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Catalog for PostgresCatalog {
|
||||
async fn setup(&self) -> Result<(), Error> {
|
||||
|
@ -74,46 +125,65 @@ impl Catalog for PostgresCatalog {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn kafka_topics(&self) -> &dyn KafkaTopicRepo {
|
||||
async fn start_transaction(&self) -> Result<Box<dyn Transaction>, Error> {
|
||||
let transaction = self
|
||||
.pool
|
||||
.begin()
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(Box::new(PostgresTxn {
|
||||
transaction: Some(transaction),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Transaction for PostgresTxn {
|
||||
fn kafka_topics(&mut self) -> &mut dyn KafkaTopicRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn query_pools(&self) -> &dyn QueryPoolRepo {
|
||||
fn query_pools(&mut self) -> &mut dyn QueryPoolRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn namespaces(&self) -> &dyn NamespaceRepo {
|
||||
fn namespaces(&mut self) -> &mut dyn NamespaceRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn tables(&self) -> &dyn TableRepo {
|
||||
fn tables(&mut self) -> &mut dyn TableRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn columns(&self) -> &dyn ColumnRepo {
|
||||
fn columns(&mut self) -> &mut dyn ColumnRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn sequencers(&self) -> &dyn SequencerRepo {
|
||||
fn sequencers(&mut self) -> &mut dyn SequencerRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn partitions(&self) -> &dyn PartitionRepo {
|
||||
fn partitions(&mut self) -> &mut dyn PartitionRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn tombstones(&self) -> &dyn TombstoneRepo {
|
||||
fn tombstones(&mut self) -> &mut dyn TombstoneRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn parquet_files(&self) -> &dyn ParquetFileRepo {
|
||||
fn parquet_files(&mut self) -> &mut dyn ParquetFileRepo {
|
||||
self
|
||||
}
|
||||
|
||||
fn processed_tombstones(&mut self) -> &mut dyn ProcessedTombstoneRepo {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KafkaTopicRepo for PostgresCatalog {
|
||||
async fn create_or_get(&self, name: &str) -> Result<KafkaTopic> {
|
||||
impl KafkaTopicRepo for PostgresTxn {
|
||||
async fn create_or_get(&mut self, name: &str) -> Result<KafkaTopic> {
|
||||
let rec = sqlx::query_as::<_, KafkaTopic>(
|
||||
r#"
|
||||
INSERT INTO kafka_topic ( name )
|
||||
|
@ -123,21 +193,21 @@ DO UPDATE SET name = kafka_topic.name RETURNING *;
|
|||
"#,
|
||||
)
|
||||
.bind(&name) // $1
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(rec)
|
||||
}
|
||||
|
||||
async fn get_by_name(&self, name: &str) -> Result<Option<KafkaTopic>> {
|
||||
async fn get_by_name(&mut self, name: &str) -> Result<Option<KafkaTopic>> {
|
||||
let rec = sqlx::query_as::<_, KafkaTopic>(
|
||||
r#"
|
||||
SELECT * FROM kafka_topic WHERE name = $1;
|
||||
"#,
|
||||
)
|
||||
.bind(&name) // $1
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await;
|
||||
|
||||
if let Err(sqlx::Error::RowNotFound) = rec {
|
||||
|
@ -151,8 +221,8 @@ SELECT * FROM kafka_topic WHERE name = $1;
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl QueryPoolRepo for PostgresCatalog {
|
||||
async fn create_or_get(&self, name: &str) -> Result<QueryPool> {
|
||||
impl QueryPoolRepo for PostgresTxn {
|
||||
async fn create_or_get(&mut self, name: &str) -> Result<QueryPool> {
|
||||
let rec = sqlx::query_as::<_, QueryPool>(
|
||||
r#"
|
||||
INSERT INTO query_pool ( name )
|
||||
|
@ -162,7 +232,7 @@ DO UPDATE SET name = query_pool.name RETURNING *;
|
|||
"#,
|
||||
)
|
||||
.bind(&name) // $1
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
|
@ -171,9 +241,9 @@ DO UPDATE SET name = query_pool.name RETURNING *;
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NamespaceRepo for PostgresCatalog {
|
||||
impl NamespaceRepo for PostgresTxn {
|
||||
async fn create(
|
||||
&self,
|
||||
&mut self,
|
||||
name: &str,
|
||||
retention_duration: &str,
|
||||
kafka_topic_id: KafkaTopicId,
|
||||
|
@ -190,7 +260,7 @@ RETURNING *
|
|||
.bind(&retention_duration) // $2
|
||||
.bind(kafka_topic_id) // $3
|
||||
.bind(query_pool_id) // $4
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_unique_violation(&e) {
|
||||
|
@ -207,14 +277,14 @@ RETURNING *
|
|||
Ok(rec)
|
||||
}
|
||||
|
||||
async fn get_by_name(&self, name: &str) -> Result<Option<Namespace>> {
|
||||
async fn get_by_name(&mut self, name: &str) -> Result<Option<Namespace>> {
|
||||
let rec = sqlx::query_as::<_, Namespace>(
|
||||
r#"
|
||||
SELECT * FROM namespace WHERE name = $1;
|
||||
"#,
|
||||
)
|
||||
.bind(&name) // $1
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await;
|
||||
|
||||
if let Err(sqlx::Error::RowNotFound) = rec {
|
||||
|
@ -228,8 +298,8 @@ SELECT * FROM namespace WHERE name = $1;
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TableRepo for PostgresCatalog {
|
||||
async fn create_or_get(&self, name: &str, namespace_id: NamespaceId) -> Result<Table> {
|
||||
impl TableRepo for PostgresTxn {
|
||||
async fn create_or_get(&mut self, name: &str, namespace_id: NamespaceId) -> Result<Table> {
|
||||
let rec = sqlx::query_as::<_, Table>(
|
||||
r#"
|
||||
INSERT INTO table_name ( name, namespace_id )
|
||||
|
@ -240,7 +310,7 @@ DO UPDATE SET name = table_name.name RETURNING *;
|
|||
)
|
||||
.bind(&name) // $1
|
||||
.bind(&namespace_id) // $2
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_fk_violation(&e) {
|
||||
|
@ -253,7 +323,7 @@ DO UPDATE SET name = table_name.name RETURNING *;
|
|||
Ok(rec)
|
||||
}
|
||||
|
||||
async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Table>> {
|
||||
async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Table>> {
|
||||
let rec = sqlx::query_as::<_, Table>(
|
||||
r#"
|
||||
SELECT * FROM table_name
|
||||
|
@ -261,7 +331,7 @@ WHERE namespace_id = $1;
|
|||
"#,
|
||||
)
|
||||
.bind(&namespace_id)
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
|
@ -270,9 +340,9 @@ WHERE namespace_id = $1;
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ColumnRepo for PostgresCatalog {
|
||||
impl ColumnRepo for PostgresTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
name: &str,
|
||||
table_id: TableId,
|
||||
column_type: ColumnType,
|
||||
|
@ -290,7 +360,7 @@ DO UPDATE SET name = column_name.name RETURNING *;
|
|||
.bind(&name) // $1
|
||||
.bind(&table_id) // $2
|
||||
.bind(&ct) // $3
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_fk_violation(&e) {
|
||||
|
@ -311,7 +381,7 @@ DO UPDATE SET name = column_name.name RETURNING *;
|
|||
Ok(rec)
|
||||
}
|
||||
|
||||
async fn list_by_namespace_id(&self, namespace_id: NamespaceId) -> Result<Vec<Column>> {
|
||||
async fn list_by_namespace_id(&mut self, namespace_id: NamespaceId) -> Result<Vec<Column>> {
|
||||
let rec = sqlx::query_as::<_, Column>(
|
||||
r#"
|
||||
SELECT column_name.* FROM table_name
|
||||
|
@ -320,7 +390,7 @@ WHERE table_name.namespace_id = $1;
|
|||
"#,
|
||||
)
|
||||
.bind(&namespace_id)
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
|
@ -329,9 +399,9 @@ WHERE table_name.namespace_id = $1;
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SequencerRepo for PostgresCatalog {
|
||||
impl SequencerRepo for PostgresTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
topic: &KafkaTopic,
|
||||
partition: KafkaPartition,
|
||||
) -> Result<Sequencer> {
|
||||
|
@ -347,7 +417,7 @@ impl SequencerRepo for PostgresCatalog {
|
|||
)
|
||||
.bind(&topic.id) // $1
|
||||
.bind(&partition) // $2
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_fk_violation(&e) {
|
||||
|
@ -359,7 +429,7 @@ impl SequencerRepo for PostgresCatalog {
|
|||
}
|
||||
|
||||
async fn get_by_topic_id_and_partition(
|
||||
&self,
|
||||
&mut self,
|
||||
topic_id: KafkaTopicId,
|
||||
partition: KafkaPartition,
|
||||
) -> Result<Option<Sequencer>> {
|
||||
|
@ -370,7 +440,7 @@ SELECT * FROM sequencer WHERE kafka_topic_id = $1 AND kafka_partition = $2;
|
|||
)
|
||||
.bind(topic_id) // $1
|
||||
.bind(partition) // $2
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await;
|
||||
|
||||
if let Err(sqlx::Error::RowNotFound) = rec {
|
||||
|
@ -382,26 +452,26 @@ SELECT * FROM sequencer WHERE kafka_topic_id = $1 AND kafka_partition = $2;
|
|||
Ok(Some(sequencer))
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<Sequencer>> {
|
||||
async fn list(&mut self) -> Result<Vec<Sequencer>> {
|
||||
sqlx::query_as::<_, Sequencer>(r#"SELECT * FROM sequencer;"#)
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn list_by_kafka_topic(&self, topic: &KafkaTopic) -> Result<Vec<Sequencer>> {
|
||||
async fn list_by_kafka_topic(&mut self, topic: &KafkaTopic) -> Result<Vec<Sequencer>> {
|
||||
sqlx::query_as::<_, Sequencer>(r#"SELECT * FROM sequencer WHERE kafka_topic_id = $1;"#)
|
||||
.bind(&topic.id) // $1
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionRepo for PostgresCatalog {
|
||||
impl PartitionRepo for PostgresTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
key: &str,
|
||||
sequencer_id: SequencerId,
|
||||
table_id: TableId,
|
||||
|
@ -419,7 +489,7 @@ impl PartitionRepo for PostgresCatalog {
|
|||
.bind(key) // $1
|
||||
.bind(&sequencer_id) // $2
|
||||
.bind(&table_id) // $3
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_fk_violation(&e) {
|
||||
|
@ -430,19 +500,53 @@ impl PartitionRepo for PostgresCatalog {
|
|||
})
|
||||
}
|
||||
|
||||
async fn list_by_sequencer(&self, sequencer_id: SequencerId) -> Result<Vec<Partition>> {
|
||||
async fn list_by_sequencer(&mut self, sequencer_id: SequencerId) -> Result<Vec<Partition>> {
|
||||
sqlx::query_as::<_, Partition>(r#"SELECT * FROM partition WHERE sequencer_id = $1;"#)
|
||||
.bind(&sequencer_id) // $1
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn partition_info_by_id(
|
||||
&mut self,
|
||||
partition_id: PartitionId,
|
||||
) -> Result<Option<PartitionInfo>> {
|
||||
let info = sqlx::query(
|
||||
r#"
|
||||
SELECT namespace.name as namespace_name, table_name.name as table_name, partition.id,
|
||||
partition.sequencer_id, partition.table_id, partition.partition_key
|
||||
FROM partition
|
||||
INNER JOIN table_name on table_name.id = partition.table_id
|
||||
INNER JOIN namespace on namespace.id = table_name.namespace_id
|
||||
WHERE partition.id = $1;"#,
|
||||
)
|
||||
.bind(&partition_id) // $1
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
let namespace_name = info.get("namespace_name");
|
||||
let table_name = info.get("table_name");
|
||||
let partition = Partition {
|
||||
id: info.get("id"),
|
||||
sequencer_id: info.get("sequencer_id"),
|
||||
table_id: info.get("table_id"),
|
||||
partition_key: info.get("partition_key"),
|
||||
};
|
||||
|
||||
Ok(Some(PartitionInfo {
|
||||
namespace_name,
|
||||
table_name,
|
||||
partition,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TombstoneRepo for PostgresCatalog {
|
||||
impl TombstoneRepo for PostgresTxn {
|
||||
async fn create_or_get(
|
||||
&self,
|
||||
&mut self,
|
||||
table_id: TableId,
|
||||
sequencer_id: SequencerId,
|
||||
sequence_number: SequenceNumber,
|
||||
|
@ -466,7 +570,7 @@ impl TombstoneRepo for PostgresCatalog {
|
|||
.bind(&min_time) // $4
|
||||
.bind(&max_time) // $5
|
||||
.bind(predicate) // $6
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_fk_violation(&e) {
|
||||
|
@ -478,23 +582,23 @@ impl TombstoneRepo for PostgresCatalog {
|
|||
}
|
||||
|
||||
async fn list_tombstones_by_sequencer_greater_than(
|
||||
&self,
|
||||
&mut self,
|
||||
sequencer_id: SequencerId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<Vec<Tombstone>> {
|
||||
sqlx::query_as::<_, Tombstone>(r#"SELECT * FROM tombstone WHERE sequencer_id = $1 AND sequence_number > $2 ORDER BY id;"#)
|
||||
.bind(&sequencer_id) // $1
|
||||
.bind(&sequence_number) // $2
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ParquetFileRepo for PostgresCatalog {
|
||||
impl ParquetFileRepo for PostgresTxn {
|
||||
async fn create(
|
||||
&self,
|
||||
&mut self,
|
||||
sequencer_id: SequencerId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
|
@ -519,7 +623,7 @@ RETURNING *
|
|||
.bind(max_sequence_number) // $6
|
||||
.bind(min_time) // $7
|
||||
.bind(max_time) // $8
|
||||
.fetch_one(&self.pool)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_unique_violation(&e) {
|
||||
|
@ -536,10 +640,10 @@ RETURNING *
|
|||
Ok(rec)
|
||||
}
|
||||
|
||||
async fn flag_for_delete(&self, id: ParquetFileId) -> Result<()> {
|
||||
async fn flag_for_delete(&mut self, id: ParquetFileId) -> Result<()> {
|
||||
let _ = sqlx::query(r#"UPDATE parquet_file SET to_delete = true WHERE id = $1;"#)
|
||||
.bind(&id) // $1
|
||||
.execute(&self.pool)
|
||||
.execute(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
|
@ -547,17 +651,109 @@ RETURNING *
|
|||
}
|
||||
|
||||
async fn list_by_sequencer_greater_than(
|
||||
&self,
|
||||
&mut self,
|
||||
sequencer_id: SequencerId,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<Vec<ParquetFile>> {
|
||||
sqlx::query_as::<_, ParquetFile>(r#"SELECT * FROM parquet_file WHERE sequencer_id = $1 AND max_sequence_number > $2 ORDER BY id;"#)
|
||||
.bind(&sequencer_id) // $1
|
||||
.bind(&sequence_number) // $2
|
||||
.fetch_all(&self.pool)
|
||||
.fetch_all(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn exist(&mut self, id: ParquetFileId) -> Result<bool> {
|
||||
let read_result = sqlx::query_as::<_, Count>(
|
||||
r#"SELECT count(*) as count FROM parquet_file WHERE id = $1;"#,
|
||||
)
|
||||
.bind(&id) // $1
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(read_result.count > 0)
|
||||
}
|
||||
|
||||
async fn count(&mut self) -> Result<i64> {
|
||||
let read_result =
|
||||
sqlx::query_as::<_, Count>(r#"SELECT count(*) as count FROM parquet_file;"#)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(read_result.count)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ProcessedTombstoneRepo for PostgresTxn {
|
||||
async fn create_many(
|
||||
&mut self,
|
||||
parquet_file_id: ParquetFileId,
|
||||
tombstones: &[Tombstone],
|
||||
) -> Result<Vec<ProcessedTombstone>> {
|
||||
// no transaction provided
|
||||
// todo: we should never needs this but since right now we implement 2 catalogs,
|
||||
// postgres (for production) and mem (for testing only) that does not need to provide txt
|
||||
// this will be refactor when Marco has his new abstraction done
|
||||
let mut processed_tombstones = vec![];
|
||||
for tombstone in tombstones {
|
||||
let processed_tombstone = sqlx::query_as::<_, ProcessedTombstone>(
|
||||
r#"
|
||||
INSERT INTO processed_tombstone ( tombstone_id, parquet_file_id )
|
||||
VALUES ( $1, $2 )
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(tombstone.id) // $1
|
||||
.bind(parquet_file_id) // $2
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if is_unique_violation(&e) {
|
||||
Error::ProcessTombstoneExists {
|
||||
tombstone_id: tombstone.id.get(),
|
||||
parquet_file_id: parquet_file_id.get(),
|
||||
}
|
||||
} else if is_fk_violation(&e) {
|
||||
Error::ForeignKeyViolation { source: e }
|
||||
} else {
|
||||
Error::SqlxError { source: e }
|
||||
}
|
||||
})?;
|
||||
|
||||
processed_tombstones.push(processed_tombstone);
|
||||
}
|
||||
|
||||
Ok(processed_tombstones)
|
||||
}
|
||||
|
||||
async fn exist(
|
||||
&mut self,
|
||||
parquet_file_id: ParquetFileId,
|
||||
tombstone_id: TombstoneId,
|
||||
) -> Result<bool> {
|
||||
let read_result = sqlx::query_as::<_, Count>(
|
||||
r#"SELECT count(*) as count FROM processed_tombstone WHERE parquet_file_id = $1 AND tombstone_id = $2;"#)
|
||||
.bind(&parquet_file_id) // $1
|
||||
.bind(&tombstone_id) // $2
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(read_result.count > 0)
|
||||
}
|
||||
|
||||
async fn count(&mut self) -> Result<i64> {
|
||||
let read_result =
|
||||
sqlx::query_as::<_, Count>(r#"SELECT count(*) as count FROM processed_tombstone;"#)
|
||||
.fetch_one(self.transaction())
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })?;
|
||||
|
||||
Ok(read_result.count)
|
||||
}
|
||||
}
|
||||
|
||||
/// The error code returned by Postgres for a unique constraint violation.
|
||||
|
@ -659,6 +855,10 @@ mod tests {
|
|||
}
|
||||
|
||||
async fn clear_schema(pool: &Pool<Postgres>) {
|
||||
sqlx::query("delete from processed_tombstone;")
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("delete from tombstone;")
|
||||
.execute(pool)
|
||||
.await
|
||||
|
|
|
@ -5,7 +5,7 @@ use data_types::{
|
|||
};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use iox_object_store::{IoxObjectStore, ParquetFilePath};
|
||||
use predicate::predicate::Predicate;
|
||||
use predicate::Predicate;
|
||||
use schema::selection::Selection;
|
||||
use schema::{Schema, TIME_COLUMN_NAME};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
|
|
@ -92,7 +92,9 @@ use data_types::{
|
|||
};
|
||||
use generated_types::influxdata::iox::ingest::v1 as proto;
|
||||
use generated_types::influxdata::iox::preserved_catalog::v1 as preserved_catalog;
|
||||
use iox_catalog::interface::{NamespaceId, PartitionId, SequenceNumber, SequencerId, TableId};
|
||||
use iox_catalog::interface::{
|
||||
NamespaceId, ParquetFile, ParquetFileId, PartitionId, SequenceNumber, SequencerId, TableId,
|
||||
};
|
||||
use parquet::{
|
||||
arrow::parquet_to_arrow_schema,
|
||||
file::{
|
||||
|
@ -589,6 +591,26 @@ impl IoxMetadata {
|
|||
pub fn match_object_store_id(&self, uuid: Uuid) -> bool {
|
||||
uuid == self.object_store_id
|
||||
}
|
||||
|
||||
// create a corresponding iox catalog's ParquetFile
|
||||
pub fn to_parquet_file(&self) -> ParquetFile {
|
||||
ParquetFile {
|
||||
id: ParquetFileId::new(0), // this will be created in the DB. This 0 won't be used anywhere
|
||||
sequencer_id: self.sequencer_id,
|
||||
table_id: self.table_id,
|
||||
partition_id: self.partition_id,
|
||||
object_store_id: self.object_store_id,
|
||||
min_sequence_number: self.min_sequence_number,
|
||||
max_sequence_number: self.max_sequence_number,
|
||||
min_time: iox_catalog::interface::Timestamp::new(
|
||||
self.time_of_first_write.timestamp_nanos(),
|
||||
),
|
||||
max_time: iox_catalog::interface::Timestamp::new(
|
||||
self.time_of_last_write.timestamp_nanos(),
|
||||
),
|
||||
to_delete: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse big-endian UUID from protobuf.
|
||||
|
|
|
@ -22,7 +22,7 @@ use parquet::{
|
|||
basic::Compression,
|
||||
file::{metadata::KeyValue, properties::WriterProperties, writer::TryClone},
|
||||
};
|
||||
use predicate::predicate::Predicate;
|
||||
use predicate::Predicate;
|
||||
use schema::selection::Selection;
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use std::{
|
||||
|
|
|
@ -68,7 +68,7 @@ pub enum Error {
|
|||
/// Result type for Parser Cient
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
impl From<DeletePredicate> for crate::predicate::Predicate {
|
||||
impl From<DeletePredicate> for crate::Predicate {
|
||||
fn from(pred: DeletePredicate) -> Self {
|
||||
Self {
|
||||
field_columns: None,
|
||||
|
|
|
@ -10,7 +10,648 @@
|
|||
|
||||
pub mod delete_expr;
|
||||
pub mod delete_predicate;
|
||||
pub mod predicate;
|
||||
pub mod regex;
|
||||
pub mod rewrite;
|
||||
pub mod rpc_predicate;
|
||||
|
||||
use data_types::timestamp::{TimestampRange, MAX_NANO_TIME, MIN_NANO_TIME};
|
||||
use datafusion::{
|
||||
error::DataFusionError,
|
||||
logical_plan::{col, lit_timestamp_nano, Column, Expr, Operator},
|
||||
optimizer::utils,
|
||||
};
|
||||
use datafusion_util::{make_range_expr, AndExprBuilder};
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::TIME_COLUMN_NAME;
|
||||
use std::{
|
||||
collections::{BTreeSet, HashSet},
|
||||
fmt,
|
||||
};
|
||||
|
||||
/// This `Predicate` represents the empty predicate (aka that evaluates to true for all rows).
|
||||
pub const EMPTY_PREDICATE: Predicate = Predicate {
|
||||
field_columns: None,
|
||||
exprs: vec![],
|
||||
range: None,
|
||||
partition_key: None,
|
||||
value_expr: vec![],
|
||||
};
|
||||
|
||||
/// A unified Predicate structure for IOx queries that can select and filter Fields and Tags from
|
||||
/// the InfluxDB data mode, as well as for arbitrary other predicates that are expressed by
|
||||
/// DataFusion's [`Expr`] type.
|
||||
///
|
||||
/// Note that the InfluxDB data model (e.g. ParsedLine's) distinguishes between some types of
|
||||
/// columns (tags and fields), and likewise the semantics of this structure can express some types
|
||||
/// of restrictions that only apply to certain types of columns.
|
||||
#[derive(Clone, Debug, Default, PartialEq, PartialOrd)]
|
||||
pub struct Predicate {
|
||||
/// Optional field restriction. If present, restricts the results to only
|
||||
/// tables which have *at least one* of the fields in field_columns.
|
||||
pub field_columns: Option<BTreeSet<String>>,
|
||||
|
||||
/// Optional partition key filter
|
||||
pub partition_key: Option<String>,
|
||||
|
||||
/// Optional timestamp range: only rows within this range are included in
|
||||
/// results. Other rows are excluded
|
||||
pub range: Option<TimestampRange>,
|
||||
|
||||
/// Optional arbitrary predicates, represented as list of
|
||||
/// DataFusion expressions applied a logical conjunction (aka they
|
||||
/// are 'AND'ed together). Only rows that evaluate to TRUE for all
|
||||
/// these expressions should be returned. Other rows are excluded
|
||||
/// from the results.
|
||||
pub exprs: Vec<Expr>,
|
||||
|
||||
/// Optional arbitrary predicates on the special `_value` column. These
|
||||
/// expressions are applied to `field_columns` projections in the form of
|
||||
/// `CASE` statement conditions.
|
||||
pub value_expr: Vec<BinaryExpr>,
|
||||
}
|
||||
|
||||
impl Predicate {
|
||||
/// Return true if this predicate has any general purpose predicates
|
||||
pub fn has_exprs(&self) -> bool {
|
||||
!self.exprs.is_empty()
|
||||
}
|
||||
|
||||
/// Return a DataFusion `Expr` predicate representing the
|
||||
/// combination of all predicate (`exprs`) and timestamp
|
||||
/// restriction in this Predicate. Returns None if there are no
|
||||
/// `Expr`'s restricting the data
|
||||
pub fn filter_expr(&self) -> Option<Expr> {
|
||||
let mut builder =
|
||||
AndExprBuilder::default().append_opt(self.make_timestamp_predicate_expr());
|
||||
|
||||
for expr in &self.exprs {
|
||||
builder = builder.append_expr(expr.clone());
|
||||
}
|
||||
|
||||
builder.build()
|
||||
}
|
||||
|
||||
/// Return true if the field should be included in results
|
||||
pub fn should_include_field(&self, field_name: &str) -> bool {
|
||||
match &self.field_columns {
|
||||
None => true, // No field restriction on predicate
|
||||
Some(field_names) => field_names.contains(field_name),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a DataFusion predicate for appliying a timestamp range:
|
||||
///
|
||||
/// `range.start <= time and time < range.end`
|
||||
fn make_timestamp_predicate_expr(&self) -> Option<Expr> {
|
||||
self.range
|
||||
.map(|range| make_range_expr(range.start(), range.end(), TIME_COLUMN_NAME))
|
||||
}
|
||||
|
||||
/// Returns true if ths predicate evaluates to true for all rows
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self == &EMPTY_PREDICATE
|
||||
}
|
||||
|
||||
/// Return a negated DF logical expression for the given delete predicates
|
||||
pub fn negated_expr<S>(delete_predicates: &[S]) -> Option<Expr>
|
||||
where
|
||||
S: AsRef<Self>,
|
||||
{
|
||||
if delete_predicates.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut pred = PredicateBuilder::default().build();
|
||||
pred.merge_delete_predicates(delete_predicates);
|
||||
|
||||
// Make a conjunctive expression of the pred.exprs
|
||||
let mut val = None;
|
||||
for e in pred.exprs {
|
||||
match val {
|
||||
None => val = Some(e),
|
||||
Some(expr) => val = Some(expr.and(e)),
|
||||
}
|
||||
}
|
||||
|
||||
val
|
||||
}
|
||||
|
||||
/// Merge the given delete predicates into this select predicate.
|
||||
/// Since we want to eliminate data filtered by the delete predicates,
|
||||
/// they are first converted into their negated form: NOT(delete_predicate)
|
||||
/// then added/merged into the selection one
|
||||
pub fn merge_delete_predicates<S>(&mut self, delete_predicates: &[S])
|
||||
where
|
||||
S: AsRef<Self>,
|
||||
{
|
||||
// Create a list of disjunctive negated expressions.
|
||||
// Example: there are two deletes as follows (note that time_range is stored separated in the Predicate
|
||||
// but we need to put it together with the exprs here)
|
||||
// . Delete_1: WHERE city != "Boston" AND temp = 70 AND time_range in [10, 30)
|
||||
// . Delete 2: WHERE state = "NY" AND route != "I90" AND time_range in [20, 50)
|
||||
// The negated list will be "NOT(Delete_1)", NOT(Delete_2)" which means
|
||||
// NOT(city != "Boston" AND temp = 70 AND time_range in [10, 30]), NOT(state = "NY" AND route != "I90" AND time_range in [20, 50]) which means
|
||||
// [NOT(city = Boston") OR NOT(temp = 70) OR NOT(time_range in [10, 30])], [NOT(state = "NY") OR NOT(route != "I90") OR NOT(time_range in [20, 50])]
|
||||
// Note that the "NOT(time_range in [20, 50])]" or "NOT(20 <= time <= 50)"" is replaced with "time < 20 OR time > 50"
|
||||
|
||||
for pred in delete_predicates {
|
||||
let pred = pred.as_ref();
|
||||
|
||||
let mut expr: Option<Expr> = None;
|
||||
|
||||
// Time range
|
||||
if let Some(range) = pred.range {
|
||||
// time_expr = NOT(start <= time_range <= end)
|
||||
// Equivalent to: (time < start OR time > end)
|
||||
let time_expr = col(TIME_COLUMN_NAME)
|
||||
.lt(lit_timestamp_nano(range.start()))
|
||||
.or(col(TIME_COLUMN_NAME).gt(lit_timestamp_nano(range.end())));
|
||||
|
||||
match expr {
|
||||
None => expr = Some(time_expr),
|
||||
Some(e) => expr = Some(e.or(time_expr)),
|
||||
}
|
||||
}
|
||||
|
||||
// Exprs
|
||||
for exp in &pred.exprs {
|
||||
match expr {
|
||||
None => expr = Some(exp.clone().not()),
|
||||
Some(e) => expr = Some(e.or(exp.clone().not())),
|
||||
}
|
||||
}
|
||||
|
||||
// Push the negated expression of the delete predicate into the list exprs of the select predicate
|
||||
if let Some(e) = expr {
|
||||
self.exprs.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes the timestamp range from this predicate, if the range
|
||||
/// is for the entire min/max valid range.
|
||||
///
|
||||
/// This is used in certain cases to retain compatibility with the
|
||||
/// existing storage engine
|
||||
pub(crate) fn clear_timestamp_if_max_range(mut self) -> Self {
|
||||
self.range = self.range.take().and_then(|range| {
|
||||
if range.start() <= MIN_NANO_TIME && range.end() >= MAX_NANO_TIME {
|
||||
None
|
||||
} else {
|
||||
Some(range)
|
||||
}
|
||||
});
|
||||
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Predicate {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn iter_to_str<S>(s: impl IntoIterator<Item = S>) -> String
|
||||
where
|
||||
S: ToString,
|
||||
{
|
||||
s.into_iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
}
|
||||
|
||||
write!(f, "Predicate")?;
|
||||
|
||||
if let Some(field_columns) = &self.field_columns {
|
||||
write!(f, " field_columns: {{{}}}", iter_to_str(field_columns))?;
|
||||
}
|
||||
|
||||
if let Some(partition_key) = &self.partition_key {
|
||||
write!(f, " partition_key: '{}'", partition_key)?;
|
||||
}
|
||||
|
||||
if let Some(range) = &self.range {
|
||||
// TODO: could be nice to show this as actual timestamps (not just numbers)?
|
||||
write!(f, " range: [{} - {}]", range.start(), range.end())?;
|
||||
}
|
||||
|
||||
if !self.exprs.is_empty() {
|
||||
write!(f, " exprs: [")?;
|
||||
for (i, expr) in self.exprs.iter().enumerate() {
|
||||
write!(f, "{}", expr)?;
|
||||
if i < self.exprs.len() - 1 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
}
|
||||
write!(f, "]")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
/// The result of evaluating a predicate on a set of rows
|
||||
pub enum PredicateMatch {
|
||||
/// There is at least one row that matches the predicate that has
|
||||
/// at least one non null value in each field of the predicate
|
||||
AtLeastOneNonNullField,
|
||||
|
||||
/// There are exactly zero rows that match the predicate
|
||||
Zero,
|
||||
|
||||
/// There *may* be rows that match, OR there *may* be no rows that
|
||||
/// match
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Structure for building [`Predicate`]s
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use predicate::PredicateBuilder;
|
||||
/// use datafusion::logical_plan::{col, lit};
|
||||
///
|
||||
/// let p = PredicateBuilder::new()
|
||||
/// .timestamp_range(1, 100)
|
||||
/// .add_expr(col("foo").eq(lit(42)))
|
||||
/// .build();
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// p.to_string(),
|
||||
/// "Predicate range: [1 - 100] exprs: [#foo = Int32(42)]"
|
||||
/// );
|
||||
/// ```
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PredicateBuilder {
|
||||
inner: Predicate,
|
||||
}
|
||||
|
||||
impl From<Predicate> for PredicateBuilder {
|
||||
fn from(inner: Predicate) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
}
|
||||
|
||||
impl PredicateBuilder {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Sets the timestamp range
|
||||
pub fn timestamp_range(mut self, start: i64, end: i64) -> Self {
|
||||
// Without more thought, redefining the timestamp range would
|
||||
// lose the old range. Asser that that cannot happen.
|
||||
assert!(
|
||||
self.inner.range.is_none(),
|
||||
"Unexpected re-definition of timestamp range"
|
||||
);
|
||||
|
||||
self.inner.range = Some(TimestampRange::new(start, end));
|
||||
self
|
||||
}
|
||||
|
||||
/// sets the optional timestamp range, if any
|
||||
pub fn timestamp_range_option(mut self, range: Option<TimestampRange>) -> Self {
|
||||
// Without more thought, redefining the timestamp range would
|
||||
// lose the old range. Asser that that cannot happen.
|
||||
assert!(
|
||||
range.is_none() || self.inner.range.is_none(),
|
||||
"Unexpected re-definition of timestamp range"
|
||||
);
|
||||
self.inner.range = range;
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds an expression to the list of general purpose predicates
|
||||
pub fn add_expr(mut self, expr: Expr) -> Self {
|
||||
self.inner.exprs.push(expr);
|
||||
self
|
||||
}
|
||||
|
||||
/// Builds a regex matching expression from the provided column name and
|
||||
/// pattern. Values not matching the regex will be filtered out.
|
||||
pub fn build_regex_match_expr(self, column: &str, pattern: impl Into<String>) -> Self {
|
||||
self.regex_match_expr(column, pattern, true)
|
||||
}
|
||||
|
||||
/// Builds a regex "not matching" expression from the provided column name
|
||||
/// and pattern. Values *matching* the regex will be filtered out.
|
||||
pub fn build_regex_not_match_expr(self, column: &str, pattern: impl Into<String>) -> Self {
|
||||
self.regex_match_expr(column, pattern, false)
|
||||
}
|
||||
|
||||
fn regex_match_expr(mut self, column: &str, pattern: impl Into<String>, matches: bool) -> Self {
|
||||
let expr = crate::regex::regex_match_expr(col(column), pattern.into(), matches);
|
||||
self.inner.exprs.push(expr);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets field_column restriction
|
||||
pub fn field_columns(mut self, columns: Vec<impl Into<String>>) -> Self {
|
||||
// We need to distinguish predicates like `column_name In
|
||||
// (foo, bar)` and `column_name = foo and column_name = bar` in order to handle
|
||||
// this
|
||||
if self.inner.field_columns.is_some() {
|
||||
unimplemented!("Complex/Multi field predicates are not yet supported");
|
||||
}
|
||||
|
||||
let column_names = columns
|
||||
.into_iter()
|
||||
.map(|s| s.into())
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
self.inner.field_columns = Some(column_names);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the partition key restriction
|
||||
pub fn partition_key(mut self, partition_key: impl Into<String>) -> Self {
|
||||
assert!(
|
||||
self.inner.partition_key.is_none(),
|
||||
"multiple partition key predicates not suported"
|
||||
);
|
||||
self.inner.partition_key = Some(partition_key.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Create a predicate, consuming this builder
|
||||
pub fn build(self) -> Predicate {
|
||||
self.inner
|
||||
}
|
||||
|
||||
/// Adds only the expressions from `filters` that can be pushed down to
|
||||
/// execution engines.
|
||||
pub fn add_pushdown_exprs(mut self, filters: &[Expr]) -> Self {
|
||||
// For each expression of the filters, recursively split it, if it is is an AND conjunction
|
||||
// For example, expression (x AND y) will be split into a vector of 2 expressions [x, y]
|
||||
let mut exprs = vec![];
|
||||
filters
|
||||
.iter()
|
||||
.for_each(|expr| Self::split_members(expr, &mut exprs));
|
||||
|
||||
// Only keep single_column and primitive binary expressions
|
||||
let mut pushdown_exprs: Vec<Expr> = vec![];
|
||||
let exprs_result = exprs
|
||||
.into_iter()
|
||||
.try_for_each::<_, Result<_, DataFusionError>>(|expr| {
|
||||
let mut columns = HashSet::new();
|
||||
utils::expr_to_columns(&expr, &mut columns)?;
|
||||
|
||||
if columns.len() == 1 && Self::primitive_binary_expr(&expr) {
|
||||
pushdown_exprs.push(expr);
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
|
||||
match exprs_result {
|
||||
Ok(()) => {
|
||||
// Return the builder with only the pushdownable expressions on it.
|
||||
self.inner.exprs.append(&mut pushdown_exprs);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Error, {}, building push-down predicates for filters: {:#?}. No predicates are pushed down", e, filters);
|
||||
}
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
/// Recursively split all "AND" expressions into smaller one
|
||||
/// Example: "A AND B AND C" => [A, B, C]
|
||||
pub fn split_members(predicate: &Expr, predicates: &mut Vec<Expr>) {
|
||||
match predicate {
|
||||
Expr::BinaryExpr {
|
||||
right,
|
||||
op: Operator::And,
|
||||
left,
|
||||
} => {
|
||||
Self::split_members(left, predicates);
|
||||
Self::split_members(right, predicates);
|
||||
}
|
||||
other => predicates.push(other.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if the given expression is in a primitive binary in the form: `column op constant`
|
||||
// and op must be a comparison one
|
||||
pub fn primitive_binary_expr(expr: &Expr) -> bool {
|
||||
match expr {
|
||||
Expr::BinaryExpr { left, op, right } => {
|
||||
matches!(
|
||||
(&**left, &**right),
|
||||
(Expr::Column(_), Expr::Literal(_)) | (Expr::Literal(_), Expr::Column(_))
|
||||
) && matches!(
|
||||
op,
|
||||
Operator::Eq
|
||||
| Operator::NotEq
|
||||
| Operator::Lt
|
||||
| Operator::LtEq
|
||||
| Operator::Gt
|
||||
| Operator::GtEq
|
||||
)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A representation of the `BinaryExpr` variant of a Datafusion expression.
|
||||
#[derive(Clone, Debug, PartialEq, PartialOrd)]
|
||||
pub struct BinaryExpr {
|
||||
pub left: Column,
|
||||
pub op: Operator,
|
||||
pub right: Expr,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
|
||||
#[test]
|
||||
fn test_default_predicate_is_empty() {
|
||||
let p = Predicate::default();
|
||||
assert!(p.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_default_predicate_is_not_empty() {
|
||||
let p = PredicateBuilder::new().timestamp_range(1, 100).build();
|
||||
|
||||
assert!(!p.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pushdown_predicates() {
|
||||
let mut filters = vec![];
|
||||
|
||||
// state = CA
|
||||
let expr1 = col("state").eq(lit("CA"));
|
||||
filters.push(expr1);
|
||||
|
||||
// "price > 10"
|
||||
let expr2 = col("price").gt(lit(10));
|
||||
filters.push(expr2);
|
||||
|
||||
// a < 10 AND b >= 50 --> will be split to [a < 10, b >= 50]
|
||||
let expr3 = col("a").lt(lit(10)).and(col("b").gt_eq(lit(50)));
|
||||
filters.push(expr3);
|
||||
|
||||
// c != 3 OR d = 8 --> won't be pushed down
|
||||
let expr4 = col("c").not_eq(lit(3)).or(col("d").eq(lit(8)));
|
||||
filters.push(expr4);
|
||||
|
||||
// e is null --> won't be pushed down
|
||||
let expr5 = col("e").is_null();
|
||||
filters.push(expr5);
|
||||
|
||||
// f <= 60
|
||||
let expr6 = col("f").lt_eq(lit(60));
|
||||
filters.push(expr6);
|
||||
|
||||
// g is not null --> won't be pushed down
|
||||
let expr7 = col("g").is_not_null();
|
||||
filters.push(expr7);
|
||||
|
||||
// h + i --> won't be pushed down
|
||||
let expr8 = col("h") + col("i");
|
||||
filters.push(expr8);
|
||||
|
||||
// city = Boston
|
||||
let expr9 = col("city").eq(lit("Boston"));
|
||||
filters.push(expr9);
|
||||
|
||||
// city != Braintree
|
||||
let expr9 = col("city").not_eq(lit("Braintree"));
|
||||
filters.push(expr9);
|
||||
|
||||
// city != state --> won't be pushed down
|
||||
let expr10 = col("city").not_eq(col("state"));
|
||||
filters.push(expr10);
|
||||
|
||||
// city = state --> won't be pushed down
|
||||
let expr11 = col("city").eq(col("state"));
|
||||
filters.push(expr11);
|
||||
|
||||
// city_state = city + state --> won't be pushed down
|
||||
let expr12 = col("city_sate").eq(col("city") + col("state"));
|
||||
filters.push(expr12);
|
||||
|
||||
// city = city + 5 --> won't be pushed down
|
||||
let expr13 = col("city").eq(col("city") + lit(5));
|
||||
filters.push(expr13);
|
||||
|
||||
// city = city --> won't be pushed down
|
||||
let expr14 = col("city").eq(col("city"));
|
||||
filters.push(expr14);
|
||||
|
||||
// city + 5 = city --> won't be pushed down
|
||||
let expr15 = (col("city") + lit(5)).eq(col("city"));
|
||||
filters.push(expr15);
|
||||
|
||||
// 5 = city
|
||||
let expr16 = lit(5).eq(col("city"));
|
||||
filters.push(expr16);
|
||||
|
||||
println!(" --------------- Filters: {:#?}", filters);
|
||||
|
||||
// Expected pushdown predicates: [state = CA, price > 10, a < 10, b >= 50, f <= 60, city = Boston, city != Braintree, 5 = city]
|
||||
let predicate = PredicateBuilder::default()
|
||||
.add_pushdown_exprs(&filters)
|
||||
.build();
|
||||
|
||||
println!(" ------------- Predicates: {:#?}", predicate);
|
||||
assert_eq!(predicate.exprs.len(), 8);
|
||||
assert_eq!(predicate.exprs[0], col("state").eq(lit("CA")));
|
||||
assert_eq!(predicate.exprs[1], col("price").gt(lit(10)));
|
||||
assert_eq!(predicate.exprs[2], col("a").lt(lit(10)));
|
||||
assert_eq!(predicate.exprs[3], col("b").gt_eq(lit(50)));
|
||||
assert_eq!(predicate.exprs[4], col("f").lt_eq(lit(60)));
|
||||
assert_eq!(predicate.exprs[5], col("city").eq(lit("Boston")));
|
||||
assert_eq!(predicate.exprs[6], col("city").not_eq(lit("Braintree")));
|
||||
assert_eq!(predicate.exprs[7], lit(5).eq(col("city")));
|
||||
}
|
||||
#[test]
|
||||
fn predicate_display_ts() {
|
||||
// TODO make this a doc example?
|
||||
let p = PredicateBuilder::new().timestamp_range(1, 100).build();
|
||||
|
||||
assert_eq!(p.to_string(), "Predicate range: [1 - 100]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_display_ts_and_expr() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(1, 100)
|
||||
.add_expr(col("foo").eq(lit(42)).and(col("bar").lt(lit(11))))
|
||||
.build();
|
||||
|
||||
assert_eq!(
|
||||
p.to_string(),
|
||||
"Predicate range: [1 - 100] exprs: [#foo = Int32(42) AND #bar < Int32(11)]"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_display_full() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(1, 100)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.field_columns(vec!["f1", "f2"])
|
||||
.partition_key("the_key")
|
||||
.build();
|
||||
|
||||
assert_eq!(p.to_string(), "Predicate field_columns: {f1, f2} partition_key: 'the_key' range: [1 - 100] exprs: [#foo = Int32(42)]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_out_of_range() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(1, 100)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = p.clone();
|
||||
|
||||
// no rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_out_of_range_low() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(MIN_NANO_TIME, 100)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = p.clone();
|
||||
|
||||
// no rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_out_of_range_high() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(0, MAX_NANO_TIME)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = p.clone();
|
||||
|
||||
// no rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_in_range() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(MIN_NANO_TIME, MAX_NANO_TIME)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = PredicateBuilder::new()
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
// rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,648 +0,0 @@
|
|||
//! This module contains a unified Predicate structure for IOx qieries
|
||||
//! that can select and filter Fields and Tags from the InfluxDB data
|
||||
//! mode as well as for arbitrary other predicates that are expressed
|
||||
//! by DataFusion's `Expr` type.
|
||||
|
||||
use std::{
|
||||
collections::{BTreeSet, HashSet},
|
||||
fmt,
|
||||
};
|
||||
|
||||
use data_types::timestamp::{TimestampRange, MAX_NANO_TIME, MIN_NANO_TIME};
|
||||
use datafusion::{
|
||||
error::DataFusionError,
|
||||
logical_plan::{col, lit_timestamp_nano, Column, Expr, Operator},
|
||||
optimizer::utils,
|
||||
};
|
||||
use datafusion_util::{make_range_expr, AndExprBuilder};
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::TIME_COLUMN_NAME;
|
||||
|
||||
/// This `Predicate` represents the empty predicate (aka that
|
||||
/// evaluates to true for all rows).
|
||||
pub const EMPTY_PREDICATE: Predicate = Predicate {
|
||||
field_columns: None,
|
||||
exprs: vec![],
|
||||
range: None,
|
||||
partition_key: None,
|
||||
value_expr: vec![],
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
/// The result of evaluating a predicate on a set of rows
|
||||
pub enum PredicateMatch {
|
||||
/// There is at least one row that matches the predicate that has
|
||||
/// at least one non null value in each field of the predicate
|
||||
AtLeastOneNonNullField,
|
||||
|
||||
/// There are exactly zero rows that match the predicate
|
||||
Zero,
|
||||
|
||||
/// There *may* be rows that match, OR there *may* be no rows that
|
||||
/// match
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Represents a parsed predicate for evaluation by the InfluxDB IOx
|
||||
/// query engine.
|
||||
///
|
||||
/// Note that the InfluxDB data model (e.g. ParsedLine's)
|
||||
/// distinguishes between some types of columns (tags and fields), and
|
||||
/// likewise the semantics of this structure can express some types of
|
||||
/// restrictions that only apply to certain types of columns.
|
||||
#[derive(Clone, Debug, Default, PartialEq, PartialOrd)]
|
||||
pub struct Predicate {
|
||||
/// Optional field restriction. If present, restricts the results to only
|
||||
/// tables which have *at least one* of the fields in field_columns.
|
||||
pub field_columns: Option<BTreeSet<String>>,
|
||||
|
||||
/// Optional partition key filter
|
||||
pub partition_key: Option<String>,
|
||||
|
||||
/// Optional timestamp range: only rows within this range are included in
|
||||
/// results. Other rows are excluded
|
||||
pub range: Option<TimestampRange>,
|
||||
|
||||
/// Optional arbitrary predicates, represented as list of
|
||||
/// DataFusion expressions applied a logical conjunction (aka they
|
||||
/// are 'AND'ed together). Only rows that evaluate to TRUE for all
|
||||
/// these expressions should be returned. Other rows are excluded
|
||||
/// from the results.
|
||||
pub exprs: Vec<Expr>,
|
||||
|
||||
/// Optional arbitrary predicates on the special `_value` column. These
|
||||
/// expressions are applied to `field_columns` projections in the form of
|
||||
/// `CASE` statement conditions.
|
||||
pub value_expr: Vec<BinaryExpr>,
|
||||
}
|
||||
|
||||
impl Predicate {
|
||||
/// Return true if this predicate has any general purpose predicates
|
||||
pub fn has_exprs(&self) -> bool {
|
||||
!self.exprs.is_empty()
|
||||
}
|
||||
|
||||
/// Return a DataFusion `Expr` predicate representing the
|
||||
/// combination of all predicate (`exprs`) and timestamp
|
||||
/// restriction in this Predicate. Returns None if there are no
|
||||
/// `Expr`'s restricting the data
|
||||
pub fn filter_expr(&self) -> Option<Expr> {
|
||||
let mut builder =
|
||||
AndExprBuilder::default().append_opt(self.make_timestamp_predicate_expr());
|
||||
|
||||
for expr in &self.exprs {
|
||||
builder = builder.append_expr(expr.clone());
|
||||
}
|
||||
|
||||
builder.build()
|
||||
}
|
||||
|
||||
/// Return true if the field should be included in results
|
||||
pub fn should_include_field(&self, field_name: &str) -> bool {
|
||||
match &self.field_columns {
|
||||
None => true, // No field restriction on predicate
|
||||
Some(field_names) => field_names.contains(field_name),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a DataFusion predicate for appliying a timestamp range:
|
||||
///
|
||||
/// `range.start <= time and time < range.end`
|
||||
fn make_timestamp_predicate_expr(&self) -> Option<Expr> {
|
||||
self.range
|
||||
.map(|range| make_range_expr(range.start(), range.end(), TIME_COLUMN_NAME))
|
||||
}
|
||||
|
||||
/// Returns true if ths predicate evaluates to true for all rows
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self == &EMPTY_PREDICATE
|
||||
}
|
||||
|
||||
/// Return a negated DF logical expression for the given delete predicates
|
||||
pub fn negated_expr<S>(delete_predicates: &[S]) -> Option<Expr>
|
||||
where
|
||||
S: AsRef<Self>,
|
||||
{
|
||||
if delete_predicates.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut pred = PredicateBuilder::default().build();
|
||||
pred.merge_delete_predicates(delete_predicates);
|
||||
|
||||
// Make a conjunctive expression of the pred.exprs
|
||||
let mut val = None;
|
||||
for e in pred.exprs {
|
||||
match val {
|
||||
None => val = Some(e),
|
||||
Some(expr) => val = Some(expr.and(e)),
|
||||
}
|
||||
}
|
||||
|
||||
val
|
||||
}
|
||||
|
||||
/// Merge the given delete predicates into this select predicate.
|
||||
/// Since we want to eliminate data filtered by the delete predicates,
|
||||
/// they are first converted into their negated form: NOT(delete_predicate)
|
||||
/// then added/merged into the selection one
|
||||
pub fn merge_delete_predicates<S>(&mut self, delete_predicates: &[S])
|
||||
where
|
||||
S: AsRef<Self>,
|
||||
{
|
||||
// Create a list of disjunctive negated expressions.
|
||||
// Example: there are two deletes as follows (note that time_range is stored separated in the Predicate
|
||||
// but we need to put it together with the exprs here)
|
||||
// . Delete_1: WHERE city != "Boston" AND temp = 70 AND time_range in [10, 30)
|
||||
// . Delete 2: WHERE state = "NY" AND route != "I90" AND time_range in [20, 50)
|
||||
// The negated list will be "NOT(Delete_1)", NOT(Delete_2)" which means
|
||||
// NOT(city != "Boston" AND temp = 70 AND time_range in [10, 30]), NOT(state = "NY" AND route != "I90" AND time_range in [20, 50]) which means
|
||||
// [NOT(city = Boston") OR NOT(temp = 70) OR NOT(time_range in [10, 30])], [NOT(state = "NY") OR NOT(route != "I90") OR NOT(time_range in [20, 50])]
|
||||
// Note that the "NOT(time_range in [20, 50])]" or "NOT(20 <= time <= 50)"" is replaced with "time < 20 OR time > 50"
|
||||
|
||||
for pred in delete_predicates {
|
||||
let pred = pred.as_ref();
|
||||
|
||||
let mut expr: Option<Expr> = None;
|
||||
|
||||
// Time range
|
||||
if let Some(range) = pred.range {
|
||||
// time_expr = NOT(start <= time_range <= end)
|
||||
// Equivalent to: (time < start OR time > end)
|
||||
let time_expr = col(TIME_COLUMN_NAME)
|
||||
.lt(lit_timestamp_nano(range.start()))
|
||||
.or(col(TIME_COLUMN_NAME).gt(lit_timestamp_nano(range.end())));
|
||||
|
||||
match expr {
|
||||
None => expr = Some(time_expr),
|
||||
Some(e) => expr = Some(e.or(time_expr)),
|
||||
}
|
||||
}
|
||||
|
||||
// Exprs
|
||||
for exp in &pred.exprs {
|
||||
match expr {
|
||||
None => expr = Some(exp.clone().not()),
|
||||
Some(e) => expr = Some(e.or(exp.clone().not())),
|
||||
}
|
||||
}
|
||||
|
||||
// Push the negated expression of the delete predicate into the list exprs of the select predicate
|
||||
if let Some(e) = expr {
|
||||
self.exprs.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes the timestamp range from this predicate, if the range
|
||||
/// is for the entire min/max valid range.
|
||||
///
|
||||
/// This is used in certain cases to retain compatibility with the
|
||||
/// existing storage engine
|
||||
pub(crate) fn clear_timestamp_if_max_range(mut self) -> Self {
|
||||
self.range = self.range.take().and_then(|range| {
|
||||
if range.start() <= MIN_NANO_TIME && range.end() >= MAX_NANO_TIME {
|
||||
None
|
||||
} else {
|
||||
Some(range)
|
||||
}
|
||||
});
|
||||
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Predicate {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn iter_to_str<S>(s: impl IntoIterator<Item = S>) -> String
|
||||
where
|
||||
S: ToString,
|
||||
{
|
||||
s.into_iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
}
|
||||
|
||||
write!(f, "Predicate")?;
|
||||
|
||||
if let Some(field_columns) = &self.field_columns {
|
||||
write!(f, " field_columns: {{{}}}", iter_to_str(field_columns))?;
|
||||
}
|
||||
|
||||
if let Some(partition_key) = &self.partition_key {
|
||||
write!(f, " partition_key: '{}'", partition_key)?;
|
||||
}
|
||||
|
||||
if let Some(range) = &self.range {
|
||||
// TODO: could be nice to show this as actual timestamps (not just numbers)?
|
||||
write!(f, " range: [{} - {}]", range.start(), range.end())?;
|
||||
}
|
||||
|
||||
if !self.exprs.is_empty() {
|
||||
write!(f, " exprs: [")?;
|
||||
for (i, expr) in self.exprs.iter().enumerate() {
|
||||
write!(f, "{}", expr)?;
|
||||
if i < self.exprs.len() - 1 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
}
|
||||
write!(f, "]")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
/// Structure for building [`Predicate`]s
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use predicate::predicate::PredicateBuilder;
|
||||
/// use datafusion::logical_plan::{col, lit};
|
||||
///
|
||||
/// let p = PredicateBuilder::new()
|
||||
/// .timestamp_range(1, 100)
|
||||
/// .add_expr(col("foo").eq(lit(42)))
|
||||
/// .build();
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// p.to_string(),
|
||||
/// "Predicate range: [1 - 100] exprs: [#foo = Int32(42)]"
|
||||
/// );
|
||||
/// ```
|
||||
pub struct PredicateBuilder {
|
||||
inner: Predicate,
|
||||
}
|
||||
|
||||
impl From<Predicate> for PredicateBuilder {
|
||||
fn from(inner: Predicate) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
}
|
||||
|
||||
impl PredicateBuilder {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Sets the timestamp range
|
||||
pub fn timestamp_range(mut self, start: i64, end: i64) -> Self {
|
||||
// Without more thought, redefining the timestamp range would
|
||||
// lose the old range. Asser that that cannot happen.
|
||||
assert!(
|
||||
self.inner.range.is_none(),
|
||||
"Unexpected re-definition of timestamp range"
|
||||
);
|
||||
|
||||
self.inner.range = Some(TimestampRange::new(start, end));
|
||||
self
|
||||
}
|
||||
|
||||
/// sets the optional timestamp range, if any
|
||||
pub fn timestamp_range_option(mut self, range: Option<TimestampRange>) -> Self {
|
||||
// Without more thought, redefining the timestamp range would
|
||||
// lose the old range. Asser that that cannot happen.
|
||||
assert!(
|
||||
range.is_none() || self.inner.range.is_none(),
|
||||
"Unexpected re-definition of timestamp range"
|
||||
);
|
||||
self.inner.range = range;
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds an expression to the list of general purpose predicates
|
||||
pub fn add_expr(mut self, expr: Expr) -> Self {
|
||||
self.inner.exprs.push(expr);
|
||||
self
|
||||
}
|
||||
|
||||
/// Builds a regex matching expression from the provided column name and
|
||||
/// pattern. Values not matching the regex will be filtered out.
|
||||
pub fn build_regex_match_expr(self, column: &str, pattern: impl Into<String>) -> Self {
|
||||
self.regex_match_expr(column, pattern, true)
|
||||
}
|
||||
|
||||
/// Builds a regex "not matching" expression from the provided column name
|
||||
/// and pattern. Values *matching* the regex will be filtered out.
|
||||
pub fn build_regex_not_match_expr(self, column: &str, pattern: impl Into<String>) -> Self {
|
||||
self.regex_match_expr(column, pattern, false)
|
||||
}
|
||||
|
||||
fn regex_match_expr(mut self, column: &str, pattern: impl Into<String>, matches: bool) -> Self {
|
||||
let expr = crate::regex::regex_match_expr(col(column), pattern.into(), matches);
|
||||
self.inner.exprs.push(expr);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets field_column restriction
|
||||
pub fn field_columns(mut self, columns: Vec<impl Into<String>>) -> Self {
|
||||
// We need to distinguish predicates like `column_name In
|
||||
// (foo, bar)` and `column_name = foo and column_name = bar` in order to handle
|
||||
// this
|
||||
if self.inner.field_columns.is_some() {
|
||||
unimplemented!("Complex/Multi field predicates are not yet supported");
|
||||
}
|
||||
|
||||
let column_names = columns
|
||||
.into_iter()
|
||||
.map(|s| s.into())
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
self.inner.field_columns = Some(column_names);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the partition key restriction
|
||||
pub fn partition_key(mut self, partition_key: impl Into<String>) -> Self {
|
||||
assert!(
|
||||
self.inner.partition_key.is_none(),
|
||||
"multiple partition key predicates not suported"
|
||||
);
|
||||
self.inner.partition_key = Some(partition_key.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Create a predicate, consuming this builder
|
||||
pub fn build(self) -> Predicate {
|
||||
self.inner
|
||||
}
|
||||
|
||||
/// Adds only the expressions from `filters` that can be pushed down to
|
||||
/// execution engines.
|
||||
pub fn add_pushdown_exprs(mut self, filters: &[Expr]) -> Self {
|
||||
// For each expression of the filters, recursively split it, if it is is an AND conjunction
|
||||
// For example, expression (x AND y) will be split into a vector of 2 expressions [x, y]
|
||||
let mut exprs = vec![];
|
||||
filters
|
||||
.iter()
|
||||
.for_each(|expr| Self::split_members(expr, &mut exprs));
|
||||
|
||||
// Only keep single_column and primitive binary expressions
|
||||
let mut pushdown_exprs: Vec<Expr> = vec![];
|
||||
let exprs_result = exprs
|
||||
.into_iter()
|
||||
.try_for_each::<_, Result<_, DataFusionError>>(|expr| {
|
||||
let mut columns = HashSet::new();
|
||||
utils::expr_to_columns(&expr, &mut columns)?;
|
||||
|
||||
if columns.len() == 1 && Self::primitive_binary_expr(&expr) {
|
||||
pushdown_exprs.push(expr);
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
|
||||
match exprs_result {
|
||||
Ok(()) => {
|
||||
// Return the builder with only the pushdownable expressions on it.
|
||||
self.inner.exprs.append(&mut pushdown_exprs);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Error, {}, building push-down predicates for filters: {:#?}. No predicates are pushed down", e, filters);
|
||||
}
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
/// Recursively split all "AND" expressions into smaller one
|
||||
/// Example: "A AND B AND C" => [A, B, C]
|
||||
pub fn split_members(predicate: &Expr, predicates: &mut Vec<Expr>) {
|
||||
match predicate {
|
||||
Expr::BinaryExpr {
|
||||
right,
|
||||
op: Operator::And,
|
||||
left,
|
||||
} => {
|
||||
Self::split_members(left, predicates);
|
||||
Self::split_members(right, predicates);
|
||||
}
|
||||
other => predicates.push(other.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if the given expression is in a primitive binary in the form: `column op constant`
|
||||
// and op must be a comparison one
|
||||
pub fn primitive_binary_expr(expr: &Expr) -> bool {
|
||||
match expr {
|
||||
Expr::BinaryExpr { left, op, right } => {
|
||||
matches!(
|
||||
(&**left, &**right),
|
||||
(Expr::Column(_), Expr::Literal(_)) | (Expr::Literal(_), Expr::Column(_))
|
||||
) && matches!(
|
||||
op,
|
||||
Operator::Eq
|
||||
| Operator::NotEq
|
||||
| Operator::Lt
|
||||
| Operator::LtEq
|
||||
| Operator::Gt
|
||||
| Operator::GtEq
|
||||
)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A representation of the `BinaryExpr` variant of a Datafusion expression.
|
||||
#[derive(Clone, Debug, PartialEq, PartialOrd)]
|
||||
pub struct BinaryExpr {
|
||||
pub left: Column,
|
||||
pub op: Operator,
|
||||
pub right: Expr,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
|
||||
#[test]
|
||||
fn test_default_predicate_is_empty() {
|
||||
let p = Predicate::default();
|
||||
assert!(p.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_default_predicate_is_not_empty() {
|
||||
let p = PredicateBuilder::new().timestamp_range(1, 100).build();
|
||||
|
||||
assert!(!p.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pushdown_predicates() {
|
||||
let mut filters = vec![];
|
||||
|
||||
// state = CA
|
||||
let expr1 = col("state").eq(lit("CA"));
|
||||
filters.push(expr1);
|
||||
|
||||
// "price > 10"
|
||||
let expr2 = col("price").gt(lit(10));
|
||||
filters.push(expr2);
|
||||
|
||||
// a < 10 AND b >= 50 --> will be split to [a < 10, b >= 50]
|
||||
let expr3 = col("a").lt(lit(10)).and(col("b").gt_eq(lit(50)));
|
||||
filters.push(expr3);
|
||||
|
||||
// c != 3 OR d = 8 --> won't be pushed down
|
||||
let expr4 = col("c").not_eq(lit(3)).or(col("d").eq(lit(8)));
|
||||
filters.push(expr4);
|
||||
|
||||
// e is null --> won't be pushed down
|
||||
let expr5 = col("e").is_null();
|
||||
filters.push(expr5);
|
||||
|
||||
// f <= 60
|
||||
let expr6 = col("f").lt_eq(lit(60));
|
||||
filters.push(expr6);
|
||||
|
||||
// g is not null --> won't be pushed down
|
||||
let expr7 = col("g").is_not_null();
|
||||
filters.push(expr7);
|
||||
|
||||
// h + i --> won't be pushed down
|
||||
let expr8 = col("h") + col("i");
|
||||
filters.push(expr8);
|
||||
|
||||
// city = Boston
|
||||
let expr9 = col("city").eq(lit("Boston"));
|
||||
filters.push(expr9);
|
||||
|
||||
// city != Braintree
|
||||
let expr9 = col("city").not_eq(lit("Braintree"));
|
||||
filters.push(expr9);
|
||||
|
||||
// city != state --> won't be pushed down
|
||||
let expr10 = col("city").not_eq(col("state"));
|
||||
filters.push(expr10);
|
||||
|
||||
// city = state --> won't be pushed down
|
||||
let expr11 = col("city").eq(col("state"));
|
||||
filters.push(expr11);
|
||||
|
||||
// city_state = city + state --> won't be pushed down
|
||||
let expr12 = col("city_sate").eq(col("city") + col("state"));
|
||||
filters.push(expr12);
|
||||
|
||||
// city = city + 5 --> won't be pushed down
|
||||
let expr13 = col("city").eq(col("city") + lit(5));
|
||||
filters.push(expr13);
|
||||
|
||||
// city = city --> won't be pushed down
|
||||
let expr14 = col("city").eq(col("city"));
|
||||
filters.push(expr14);
|
||||
|
||||
// city + 5 = city --> won't be pushed down
|
||||
let expr15 = (col("city") + lit(5)).eq(col("city"));
|
||||
filters.push(expr15);
|
||||
|
||||
// 5 = city
|
||||
let expr16 = lit(5).eq(col("city"));
|
||||
filters.push(expr16);
|
||||
|
||||
println!(" --------------- Filters: {:#?}", filters);
|
||||
|
||||
// Expected pushdown predicates: [state = CA, price > 10, a < 10, b >= 50, f <= 60, city = Boston, city != Braintree, 5 = city]
|
||||
let predicate = PredicateBuilder::default()
|
||||
.add_pushdown_exprs(&filters)
|
||||
.build();
|
||||
|
||||
println!(" ------------- Predicates: {:#?}", predicate);
|
||||
assert_eq!(predicate.exprs.len(), 8);
|
||||
assert_eq!(predicate.exprs[0], col("state").eq(lit("CA")));
|
||||
assert_eq!(predicate.exprs[1], col("price").gt(lit(10)));
|
||||
assert_eq!(predicate.exprs[2], col("a").lt(lit(10)));
|
||||
assert_eq!(predicate.exprs[3], col("b").gt_eq(lit(50)));
|
||||
assert_eq!(predicate.exprs[4], col("f").lt_eq(lit(60)));
|
||||
assert_eq!(predicate.exprs[5], col("city").eq(lit("Boston")));
|
||||
assert_eq!(predicate.exprs[6], col("city").not_eq(lit("Braintree")));
|
||||
assert_eq!(predicate.exprs[7], lit(5).eq(col("city")));
|
||||
}
|
||||
#[test]
|
||||
fn predicate_display_ts() {
|
||||
// TODO make this a doc example?
|
||||
let p = PredicateBuilder::new().timestamp_range(1, 100).build();
|
||||
|
||||
assert_eq!(p.to_string(), "Predicate range: [1 - 100]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_display_ts_and_expr() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(1, 100)
|
||||
.add_expr(col("foo").eq(lit(42)).and(col("bar").lt(lit(11))))
|
||||
.build();
|
||||
|
||||
assert_eq!(
|
||||
p.to_string(),
|
||||
"Predicate range: [1 - 100] exprs: [#foo = Int32(42) AND #bar < Int32(11)]"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_display_full() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(1, 100)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.field_columns(vec!["f1", "f2"])
|
||||
.partition_key("the_key")
|
||||
.build();
|
||||
|
||||
assert_eq!(p.to_string(), "Predicate field_columns: {f1, f2} partition_key: 'the_key' range: [1 - 100] exprs: [#foo = Int32(42)]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_out_of_range() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(1, 100)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = p.clone();
|
||||
|
||||
// no rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_out_of_range_low() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(MIN_NANO_TIME, 100)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = p.clone();
|
||||
|
||||
// no rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_out_of_range_high() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(0, MAX_NANO_TIME)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = p.clone();
|
||||
|
||||
// no rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_timestamp_if_max_range_in_range() {
|
||||
let p = PredicateBuilder::new()
|
||||
.timestamp_range(MIN_NANO_TIME, MAX_NANO_TIME)
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
|
||||
let expected = PredicateBuilder::new()
|
||||
.add_expr(col("foo").eq(lit(42)))
|
||||
.build();
|
||||
// rewrite
|
||||
assert_eq!(p.clear_timestamp_if_max_range(), expected);
|
||||
}
|
||||
}
|
|
@ -1,7 +1,6 @@
|
|||
//! Interface logic between IOx ['Predicate`] and predicates used by the
|
||||
//! InfluxDB Storage gRPC API
|
||||
use crate::predicate::{BinaryExpr, Predicate};
|
||||
use crate::rewrite;
|
||||
use crate::{rewrite, BinaryExpr, Predicate};
|
||||
|
||||
use datafusion::error::Result as DataFusionResult;
|
||||
use datafusion::execution::context::ExecutionProps;
|
||||
|
|
|
@ -18,8 +18,8 @@ use datafusion_util::AsExpr;
|
|||
|
||||
use hashbrown::HashSet;
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::predicate::{BinaryExpr, Predicate, PredicateMatch};
|
||||
use predicate::rpc_predicate::{InfluxRpcPredicate, FIELD_COLUMN_NAME, MEASUREMENT_COLUMN_NAME};
|
||||
use predicate::{BinaryExpr, Predicate, PredicateMatch};
|
||||
use schema::selection::Selection;
|
||||
use schema::{InfluxColumnType, Schema, TIME_COLUMN_NAME};
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
|
@ -219,6 +219,9 @@ impl InfluxRpcPlanner {
|
|||
{
|
||||
debug!(?rpc_predicate, "planning table_names");
|
||||
|
||||
// Special case predicates that span the entire valid timestamp range
|
||||
let rpc_predicate = rpc_predicate.clear_timestamp_if_max_range();
|
||||
|
||||
let mut builder = StringSetPlanBuilder::new();
|
||||
|
||||
// Mapping between table and chunks that need full plan
|
||||
|
@ -617,6 +620,9 @@ impl InfluxRpcPlanner {
|
|||
{
|
||||
debug!(?rpc_predicate, "planning field_columns");
|
||||
|
||||
// Special case predicates that span the entire valid timestamp range
|
||||
let rpc_predicate = rpc_predicate.clear_timestamp_if_max_range();
|
||||
|
||||
// Algorithm is to run a "select field_cols from table where
|
||||
// <predicate> type plan for each table in the chunks"
|
||||
//
|
||||
|
@ -1834,7 +1840,7 @@ impl<'a> ExprRewriter for MissingColumnsToNull<'a> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datafusion::logical_plan::lit;
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::PredicateBuilder;
|
||||
use schema::builder::SchemaBuilder;
|
||||
|
||||
use crate::{
|
||||
|
@ -1953,7 +1959,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_predicate_rewrite_table_names() {
|
||||
run_test::<_, TestDatabase>(|test_db, rpc_predicate| {
|
||||
run_test(&|test_db, rpc_predicate| {
|
||||
InfluxRpcPlanner::new()
|
||||
.table_names(test_db, rpc_predicate)
|
||||
.expect("creating plan");
|
||||
|
@ -1963,7 +1969,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_predicate_rewrite_tag_keys() {
|
||||
run_test::<_, TestDatabase>(|test_db, rpc_predicate| {
|
||||
run_test(&|test_db, rpc_predicate| {
|
||||
InfluxRpcPlanner::new()
|
||||
.tag_keys(test_db, rpc_predicate)
|
||||
.expect("creating plan");
|
||||
|
@ -1973,7 +1979,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_predicate_rewrite_tag_values() {
|
||||
run_test::<_, TestDatabase>(|test_db, rpc_predicate| {
|
||||
run_test(&|test_db, rpc_predicate| {
|
||||
InfluxRpcPlanner::new()
|
||||
.tag_values(test_db, "foo", rpc_predicate)
|
||||
.expect("creating plan");
|
||||
|
@ -1983,7 +1989,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_predicate_rewrite_field_columns() {
|
||||
run_test::<_, TestDatabase>(|test_db, rpc_predicate| {
|
||||
run_test(&|test_db, rpc_predicate| {
|
||||
InfluxRpcPlanner::new()
|
||||
.field_columns(test_db, rpc_predicate)
|
||||
.expect("creating plan");
|
||||
|
@ -1993,7 +1999,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_predicate_rewrite_read_filter() {
|
||||
run_test::<_, TestDatabase>(|test_db, rpc_predicate| {
|
||||
run_test(&|test_db, rpc_predicate| {
|
||||
InfluxRpcPlanner::new()
|
||||
.read_filter(test_db, rpc_predicate)
|
||||
.expect("creating plan");
|
||||
|
@ -2003,7 +2009,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_predicate_read_group() {
|
||||
run_test::<_, TestDatabase>(|test_db, rpc_predicate| {
|
||||
run_test(&|test_db, rpc_predicate| {
|
||||
let agg = Aggregate::None;
|
||||
let group_columns = &["foo"];
|
||||
InfluxRpcPlanner::new()
|
||||
|
@ -2015,7 +2021,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_predicate_read_window_aggregate() {
|
||||
run_test::<_, TestDatabase>(|test_db, rpc_predicate| {
|
||||
run_test(&|test_db, rpc_predicate| {
|
||||
let agg = Aggregate::First;
|
||||
let every = WindowDuration::from_months(1, false);
|
||||
let offset = WindowDuration::from_months(1, false);
|
||||
|
@ -2026,17 +2032,15 @@ mod tests {
|
|||
.await
|
||||
}
|
||||
|
||||
/// Runs func() and checks that predicates are simplified prior to sending them off
|
||||
async fn run_test<F, D>(f: F)
|
||||
where
|
||||
F: FnOnce(&TestDatabase, InfluxRpcPredicate) + Send,
|
||||
{
|
||||
let chunk0 = Arc::new(
|
||||
TestChunk::new("h2o")
|
||||
.with_id(0)
|
||||
.with_tag_column("foo")
|
||||
.with_time_column(),
|
||||
);
|
||||
/// Given a `TestDatabase` plans a InfluxRPC query
|
||||
/// (e.g. read_filter, read_window_aggregate, etc). The test below
|
||||
/// ensures that predicates are simplified during query planning.
|
||||
type PlanRPCFunc = dyn Fn(&TestDatabase, InfluxRpcPredicate) + Send + Sync;
|
||||
|
||||
/// Runs func() and checks that predicates are simplified prior to
|
||||
/// sending them down to the chunks for processing.
|
||||
async fn run_test(func: &'static PlanRPCFunc) {
|
||||
// ------------- Test 1 ----------------
|
||||
|
||||
// this is what happens with a grpc predicate on a tag
|
||||
//
|
||||
|
@ -2053,22 +2057,74 @@ mod tests {
|
|||
.add_expr(expr.eq(lit("bar")))
|
||||
.build();
|
||||
|
||||
// verify that the predicate was rewritten to `foo = 'bar'`
|
||||
let expr = col("foo").eq(lit("bar"));
|
||||
let expected_predicate = PredicateBuilder::new().add_expr(expr).build();
|
||||
|
||||
run_test_with_predicate(&func, silly_predicate, expected_predicate).await;
|
||||
|
||||
// ------------- Test 2 ----------------
|
||||
// Validate that _measurement predicates are translated
|
||||
//
|
||||
// https://github.com/influxdata/influxdb_iox/issues/3601
|
||||
// _measurement = 'foo'
|
||||
let silly_predicate = PredicateBuilder::new()
|
||||
.add_expr(col("_measurement").eq(lit("foo")))
|
||||
.build();
|
||||
|
||||
// verify that the predicate was rewritten to `false` as the
|
||||
// measurement name is `h20`
|
||||
let expr = lit(false);
|
||||
|
||||
let expected_predicate = PredicateBuilder::new().add_expr(expr).build();
|
||||
run_test_with_predicate(&func, silly_predicate, expected_predicate).await;
|
||||
|
||||
// ------------- Test 3 ----------------
|
||||
// more complicated _measurement predicates are translated
|
||||
//
|
||||
// https://github.com/influxdata/influxdb_iox/issues/3601
|
||||
// (_measurement = 'foo' or measurement = 'h2o') AND time > 5
|
||||
let silly_predicate = PredicateBuilder::new()
|
||||
.add_expr(
|
||||
col("_measurement")
|
||||
.eq(lit("foo"))
|
||||
.or(col("_measurement").eq(lit("h2o")))
|
||||
.and(col("time").gt(lit(5))),
|
||||
)
|
||||
.build();
|
||||
|
||||
// verify that the predicate was rewritten to time > 5
|
||||
let expr = col("time").gt(lit(5));
|
||||
|
||||
let expected_predicate = PredicateBuilder::new().add_expr(expr).build();
|
||||
run_test_with_predicate(&func, silly_predicate, expected_predicate).await;
|
||||
}
|
||||
|
||||
/// Runs func() with the specified predicate and verifies
|
||||
/// `expected_predicate` is received by the chunk
|
||||
async fn run_test_with_predicate(
|
||||
func: &PlanRPCFunc,
|
||||
predicate: Predicate,
|
||||
expected_predicate: Predicate,
|
||||
) {
|
||||
let chunk0 = Arc::new(
|
||||
TestChunk::new("h2o")
|
||||
.with_id(0)
|
||||
.with_tag_column("foo")
|
||||
.with_time_column(),
|
||||
);
|
||||
|
||||
let executor = Arc::new(Executor::new(1));
|
||||
let test_db = TestDatabase::new(Arc::clone(&executor));
|
||||
test_db.add_chunk("my_partition_key", Arc::clone(&chunk0));
|
||||
|
||||
let rpc_predicate = InfluxRpcPredicate::new(None, silly_predicate);
|
||||
let rpc_predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
|
||||
// run the function
|
||||
f(&test_db, rpc_predicate);
|
||||
func(&test_db, rpc_predicate);
|
||||
|
||||
let actual_predicate = test_db.get_chunks_predicate();
|
||||
|
||||
// verify that the predicate was rewritten to `foo = 'bar'`
|
||||
let expr = col("foo").eq(lit("bar"));
|
||||
|
||||
let expected_predicate = PredicateBuilder::new().add_expr(expr).build();
|
||||
|
||||
assert_eq!(
|
||||
actual_predicate, expected_predicate,
|
||||
"\nActual: {:?}\nExpected: {:?}",
|
||||
|
|
|
@ -16,10 +16,7 @@ use data_types::{
|
|||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use exec::stringset::StringSet;
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::{
|
||||
predicate::{Predicate, PredicateMatch},
|
||||
rpc_predicate::QueryDatabaseMeta,
|
||||
};
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate, PredicateMatch};
|
||||
use schema::selection::Selection;
|
||||
use schema::{sort::SortKey, Schema, TIME_COLUMN_NAME};
|
||||
|
||||
|
@ -109,6 +106,11 @@ impl<'a> Drop for QueryCompletedToken<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Boxed description of a query that knows how to render to a string
|
||||
///
|
||||
/// This avoids storing potentially large strings
|
||||
pub type QueryText = Box<dyn std::fmt::Display + Send + Sync>;
|
||||
|
||||
/// A `Database` is the main trait implemented by the IOx subsystems
|
||||
/// that store actual data.
|
||||
///
|
||||
|
@ -132,7 +134,7 @@ pub trait QueryDatabase: QueryDatabaseMeta + Debug + Send + Sync {
|
|||
fn record_query(
|
||||
&self,
|
||||
query_type: impl Into<String>,
|
||||
query_text: impl Into<String>,
|
||||
query_text: QueryText,
|
||||
) -> QueryCompletedToken<'_>;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ use datafusion::{
|
|||
},
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::predicate::{Predicate, PredicateBuilder};
|
||||
use predicate::{Predicate, PredicateBuilder};
|
||||
use schema::{merge::SchemaMerger, sort::SortKey, Schema};
|
||||
|
||||
use crate::{
|
||||
|
|
|
@ -16,7 +16,7 @@ use schema::selection::Selection;
|
|||
use schema::Schema;
|
||||
|
||||
use crate::QueryChunk;
|
||||
use predicate::predicate::Predicate;
|
||||
use predicate::Predicate;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ use datafusion::{
|
|||
physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::predicate::Predicate;
|
||||
use predicate::Predicate;
|
||||
use schema::Schema;
|
||||
|
||||
use crate::{group_by::Aggregate, QueryChunkMeta};
|
||||
|
@ -228,7 +228,7 @@ mod test {
|
|||
use std::{cell::RefCell, sync::Arc};
|
||||
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::PredicateBuilder;
|
||||
use schema::merge::SchemaMerger;
|
||||
|
||||
use crate::{test::TestChunk, QueryChunk};
|
||||
|
|
|
@ -4,11 +4,11 @@
|
|||
//! AKA it is a Mock
|
||||
|
||||
use crate::exec::{ExecutionContextProvider, Executor, ExecutorType, IOxExecutionContext};
|
||||
use crate::QueryCompletedToken;
|
||||
use crate::{
|
||||
exec::stringset::{StringSet, StringSetRef},
|
||||
Predicate, PredicateMatch, QueryChunk, QueryChunkMeta, QueryDatabase,
|
||||
};
|
||||
use crate::{QueryCompletedToken, QueryText};
|
||||
use arrow::array::UInt64Array;
|
||||
use arrow::{
|
||||
array::{ArrayRef, DictionaryArray, Int64Array, StringArray, TimestampNanosecondArray},
|
||||
|
@ -155,7 +155,7 @@ impl QueryDatabase for TestDatabase {
|
|||
fn record_query(
|
||||
&self,
|
||||
_query_type: impl Into<String>,
|
||||
_query_text: impl Into<String>,
|
||||
_query_text: QueryText,
|
||||
) -> QueryCompletedToken<'_> {
|
||||
QueryCompletedToken::new(|| {})
|
||||
}
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use arrow::datatypes::DataType;
|
||||
use data_types::timestamp::{MAX_NANO_TIME, MIN_NANO_TIME};
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::fieldlist::{Field, FieldList},
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
|
@ -216,3 +217,64 @@ async fn test_field_name_plan_with_delete() {
|
|||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_field_columns_max_time() {
|
||||
let predicate = PredicateBuilder::default()
|
||||
.timestamp_range(MIN_NANO_TIME, MAX_NANO_TIME)
|
||||
.build();
|
||||
let predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
|
||||
let expected_fields = FieldList {
|
||||
fields: vec![Field {
|
||||
name: "value".into(),
|
||||
data_type: DataType::Float64,
|
||||
last_timestamp: MAX_NANO_TIME,
|
||||
}],
|
||||
};
|
||||
|
||||
run_field_columns_test_case(MeasurementWithMaxTime {}, predicate, expected_fields).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_field_columns_max_i64() {
|
||||
let predicate = PredicateBuilder::default()
|
||||
.timestamp_range(i64::MIN, i64::MAX)
|
||||
.build();
|
||||
let predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
|
||||
let expected_fields = FieldList {
|
||||
fields: vec![Field {
|
||||
name: "value".into(),
|
||||
data_type: DataType::Float64,
|
||||
last_timestamp: MAX_NANO_TIME,
|
||||
}],
|
||||
};
|
||||
|
||||
run_field_columns_test_case(MeasurementWithMaxTime {}, predicate, expected_fields).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_field_columns_max_time_less_one() {
|
||||
let predicate = PredicateBuilder::default()
|
||||
// one less than max timestamp
|
||||
.timestamp_range(MIN_NANO_TIME, MAX_NANO_TIME - 1)
|
||||
.build();
|
||||
let predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
|
||||
let expected_fields = FieldList { fields: vec![] };
|
||||
|
||||
run_field_columns_test_case(MeasurementWithMaxTime {}, predicate, expected_fields).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_field_columns_max_time_greater_one() {
|
||||
let predicate = PredicateBuilder::default()
|
||||
.timestamp_range(MIN_NANO_TIME + 1, MAX_NANO_TIME)
|
||||
.build();
|
||||
let predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
|
||||
let expected_fields = FieldList { fields: vec![] };
|
||||
|
||||
run_field_columns_test_case(MeasurementWithMaxTime {}, predicate, expected_fields).await;
|
||||
}
|
||||
|
|
|
@ -13,8 +13,8 @@ use crate::{
|
|||
},
|
||||
};
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::frontend::influxrpc::InfluxRpcPlanner;
|
||||
|
||||
/// runs read_filter(predicate) and compares it to the expected
|
||||
|
|
|
@ -14,8 +14,8 @@ use datafusion::{
|
|||
logical_plan::{binary_expr, Operator},
|
||||
prelude::*,
|
||||
};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{frontend::influxrpc::InfluxRpcPlanner, group_by::Aggregate};
|
||||
|
||||
/// runs read_group(predicate) and compares it to the expected
|
||||
|
@ -360,8 +360,6 @@ async fn test_grouped_series_set_plan_count_measurement_pred() {
|
|||
.await;
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_grouped_series_set_plan_first() {
|
||||
let predicate = PredicateBuilder::default()
|
||||
|
@ -486,8 +484,6 @@ async fn test_grouped_series_set_plan_last_with_nulls() {
|
|||
.await;
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_grouped_series_set_plan_min() {
|
||||
let predicate = PredicateBuilder::default()
|
||||
|
|
|
@ -10,8 +10,8 @@ use async_trait::async_trait;
|
|||
use data_types::{delete_predicate::DeletePredicate, timestamp::TimestampRange};
|
||||
use datafusion::prelude::*;
|
||||
use db::{test_helpers::write_lp, utils::make_db};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
group_by::{Aggregate, WindowDuration},
|
||||
|
@ -108,8 +108,6 @@ impl DbSetup for MeasurementForWindowAggregate {
|
|||
}
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_window_aggregate_nanoseconds() {
|
||||
let predicate = PredicateBuilder::default()
|
||||
|
@ -256,8 +254,6 @@ impl DbSetup for MeasurementForWindowAggregateMonths {
|
|||
}
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_window_aggregate_months() {
|
||||
let agg = Aggregate::Mean;
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
//! Tests for the Influx gRPC queries
|
||||
use data_types::timestamp::{MAX_NANO_TIME, MIN_NANO_TIME};
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::stringset::{IntoStringSet, StringSetRef},
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
|
@ -207,6 +208,48 @@ async fn list_table_names_data_pred_250_300_with_delete_all() {
|
|||
run_table_names_test_case(TwoMeasurementsWithDeleteAll {}, tsp(250, 300), vec![]).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_table_names_max_time() {
|
||||
run_table_names_test_case(
|
||||
MeasurementWithMaxTime {},
|
||||
tsp(MIN_NANO_TIME, MAX_NANO_TIME),
|
||||
vec!["cpu"],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_table_names_max_i64() {
|
||||
run_table_names_test_case(
|
||||
MeasurementWithMaxTime {},
|
||||
// outside valid timestamp range
|
||||
tsp(i64::MIN, i64::MAX),
|
||||
vec!["cpu"],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_table_names_time_less_one() {
|
||||
run_table_names_test_case(
|
||||
MeasurementWithMaxTime {},
|
||||
tsp(MIN_NANO_TIME, MAX_NANO_TIME - 1),
|
||||
vec![],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_table_names_max_time_greater_one() {
|
||||
run_table_names_test_case(
|
||||
MeasurementWithMaxTime {},
|
||||
// one more than max timestamp
|
||||
tsp(MIN_NANO_TIME + 1, MAX_NANO_TIME),
|
||||
vec![],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Note when table names supports general purpose predicates, add a
|
||||
// test here with a `_measurement` predicate
|
||||
// https://github.com/influxdata/influxdb_iox/issues/762
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use data_types::timestamp::{MAX_NANO_TIME, MIN_NANO_TIME};
|
||||
use datafusion::logical_plan::{col, lit};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::stringset::{IntoStringSet, StringSetRef},
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
|
@ -186,7 +187,7 @@ async fn list_tag_name_end_to_end_with_delete() {
|
|||
async fn list_tag_name_max_time() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let predicate = PredicateBuilder::default()
|
||||
.timestamp_range(-9223372036854775806, 9223372036854775806)
|
||||
.timestamp_range(MIN_NANO_TIME, MAX_NANO_TIME)
|
||||
.build();
|
||||
let predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
let expected_tag_keys = vec!["host"];
|
||||
|
@ -209,7 +210,7 @@ async fn list_tag_name_max_i64() {
|
|||
async fn list_tag_name_max_time_less_one() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let predicate = PredicateBuilder::default()
|
||||
.timestamp_range(-9223372036854775806, 9223372036854775805) // one less than max timestamp
|
||||
.timestamp_range(MIN_NANO_TIME, MAX_NANO_TIME - 1) // one less than max timestamp
|
||||
.build();
|
||||
let predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
let expected_tag_keys = vec![];
|
||||
|
@ -220,7 +221,7 @@ async fn list_tag_name_max_time_less_one() {
|
|||
async fn list_tag_name_max_time_greater_one() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let predicate = PredicateBuilder::default()
|
||||
.timestamp_range(-9223372036854775805, 9223372036854775806) // one more than min timestamp
|
||||
.timestamp_range(MIN_NANO_TIME + 1, MAX_NANO_TIME) // one more than min timestamp
|
||||
.build();
|
||||
let predicate = InfluxRpcPredicate::new(None, predicate);
|
||||
let expected_tag_keys = vec![];
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use datafusion::logical_plan::{col, lit};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::stringset::{IntoStringSet, StringSetRef},
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
|
|
|
@ -5,8 +5,8 @@ use db::{
|
|||
utils::{make_db, TestDb},
|
||||
};
|
||||
use metric::{Attributes, Metric, U64Counter};
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::{stringset::StringSet, ExecutionContextProvider},
|
||||
frontend::{influxrpc::InfluxRpcPlanner, sql::SqlQueryPlanner},
|
||||
|
|
|
@ -1308,8 +1308,6 @@ impl DbSetup for OneMeasurementForAggs {
|
|||
}
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
pub struct AnotherMeasurementForAggs {}
|
||||
#[async_trait]
|
||||
impl DbSetup for AnotherMeasurementForAggs {
|
||||
|
@ -1332,8 +1330,6 @@ impl DbSetup for AnotherMeasurementForAggs {
|
|||
}
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
pub struct TwoMeasurementForAggs {}
|
||||
#[async_trait]
|
||||
impl DbSetup for TwoMeasurementForAggs {
|
||||
|
@ -1353,8 +1349,6 @@ impl DbSetup for TwoMeasurementForAggs {
|
|||
}
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
pub struct MeasurementForSelectors {}
|
||||
#[async_trait]
|
||||
impl DbSetup for MeasurementForSelectors {
|
||||
|
@ -1408,8 +1402,6 @@ impl DbSetup for MeasurementForMax {
|
|||
}
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
pub struct MeasurementForGroupKeys {}
|
||||
#[async_trait]
|
||||
impl DbSetup for MeasurementForGroupKeys {
|
||||
|
@ -1432,8 +1424,6 @@ impl DbSetup for MeasurementForGroupKeys {
|
|||
}
|
||||
}
|
||||
|
||||
// NGA todo: add delete DbSetup after all scenarios are done for 2 chunks
|
||||
|
||||
pub struct MeasurementForGroupByField {}
|
||||
#[async_trait]
|
||||
impl DbSetup for MeasurementForGroupByField {
|
||||
|
|
|
@ -104,7 +104,9 @@ impl VariantWriteBuffer {
|
|||
write_buffer
|
||||
.store_operation(0, operation)
|
||||
.await
|
||||
.context(WriteFailureSnafu)?;
|
||||
.map_err(|e| Error::WriteFailure {
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -91,8 +91,13 @@ where
|
|||
if self.cache.get_schema(&namespace).is_none() {
|
||||
trace!(%namespace, "namespace auto-create cache miss");
|
||||
|
||||
match self
|
||||
let mut txn = self
|
||||
.catalog
|
||||
.start_transaction()
|
||||
.await
|
||||
.map_err(NamespaceCreationError::Create)?;
|
||||
|
||||
match txn
|
||||
.namespaces()
|
||||
.create(
|
||||
namespace.as_str(),
|
||||
|
@ -103,6 +108,8 @@ where
|
|||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
txn.commit().await.map_err(NamespaceCreationError::Create)?;
|
||||
|
||||
debug!(%namespace, "created namespace");
|
||||
}
|
||||
Err(iox_catalog::interface::Error::NameExists { .. }) => {
|
||||
|
@ -110,9 +117,11 @@ where
|
|||
// namespace, or another thread raced populating the catalog
|
||||
// and beat this thread to it.
|
||||
debug!(%namespace, "spurious namespace create failed");
|
||||
txn.abort().await.map_err(NamespaceCreationError::Create)?;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error=%e, %namespace, "failed to auto-create namespace");
|
||||
txn.abort().await.map_err(NamespaceCreationError::Create)?;
|
||||
return Err(NamespaceCreationError::Create(e));
|
||||
}
|
||||
}
|
||||
|
@ -190,15 +199,19 @@ mod tests {
|
|||
|
||||
// The cache hit should mean the catalog SHOULD NOT see a create request
|
||||
// for the namespace.
|
||||
let mut txn = catalog
|
||||
.start_transaction()
|
||||
.await
|
||||
.expect("failed to start UoW");
|
||||
assert!(
|
||||
catalog
|
||||
.namespaces()
|
||||
txn.namespaces()
|
||||
.get_by_name(ns.as_str())
|
||||
.await
|
||||
.expect("lookup should not error")
|
||||
.is_none(),
|
||||
"expected no request to the catalog"
|
||||
);
|
||||
txn.abort().await.expect("failed to abort UoW");
|
||||
|
||||
// And the DML handler must be called.
|
||||
assert_matches!(mock_handler.calls().as_slice(), [MockDmlHandlerCall::Write { namespace, .. }] => {
|
||||
|
@ -230,12 +243,17 @@ mod tests {
|
|||
|
||||
// The cache miss should mean the catalog MUST see a create request for
|
||||
// the namespace.
|
||||
let got = catalog
|
||||
let mut txn = catalog
|
||||
.start_transaction()
|
||||
.await
|
||||
.expect("failed to start UoW");
|
||||
let got = txn
|
||||
.namespaces()
|
||||
.get_by_name(ns.as_str())
|
||||
.await
|
||||
.expect("lookup should not error")
|
||||
.expect("creation request should be sent to catalog");
|
||||
txn.abort().await.expect("failed to abort UoW");
|
||||
|
||||
assert_eq!(
|
||||
got,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::sync::Arc;
|
||||
use std::{ops::DerefMut, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{delete_predicate::DeletePredicate, DatabaseName};
|
||||
|
@ -135,6 +135,12 @@ where
|
|||
batches: HashMap<String, MutableBatch>,
|
||||
span_ctx: Option<SpanContext>,
|
||||
) -> Result<(), Self::WriteError> {
|
||||
let mut txn = self
|
||||
.catalog
|
||||
.start_transaction()
|
||||
.await
|
||||
.map_err(SchemaError::NamespaceLookup)?;
|
||||
|
||||
// Load the namespace schema from the cache, falling back to pulling it
|
||||
// from the global catalog (if it exists).
|
||||
let schema = self.cache.get_schema(&namespace);
|
||||
|
@ -143,7 +149,7 @@ where
|
|||
None => {
|
||||
// Pull the schema from the global catalog or error if it does
|
||||
// not exist.
|
||||
let schema = get_schema_by_name(&namespace, &*self.catalog)
|
||||
let schema = get_schema_by_name(&namespace, txn.deref_mut())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
warn!(error=%e, %namespace, "failed to retrieve namespace schema");
|
||||
|
@ -162,7 +168,7 @@ where
|
|||
let maybe_new_schema = validate_or_insert_schema(
|
||||
batches.iter().map(|(k, v)| (k.as_str(), v)),
|
||||
&schema,
|
||||
&*self.catalog,
|
||||
txn.deref_mut(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
|
@ -171,6 +177,8 @@ where
|
|||
})?
|
||||
.map(Arc::new);
|
||||
|
||||
txn.commit().await.map_err(SchemaError::NamespaceLookup)?;
|
||||
|
||||
trace!(%namespace, "schema validation complete");
|
||||
|
||||
// If the schema has been updated, immediately add it to the cache
|
||||
|
@ -246,8 +254,12 @@ mod tests {
|
|||
/// named [`NAMESPACE`].
|
||||
async fn create_catalog() -> Arc<dyn Catalog> {
|
||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new());
|
||||
catalog
|
||||
.namespaces()
|
||||
|
||||
let mut txn = catalog
|
||||
.start_transaction()
|
||||
.await
|
||||
.expect("failed to start UoW");
|
||||
txn.namespaces()
|
||||
.create(
|
||||
NAMESPACE,
|
||||
"inf",
|
||||
|
@ -256,6 +268,8 @@ mod tests {
|
|||
)
|
||||
.await
|
||||
.expect("failed to create test namespace");
|
||||
txn.commit().await.expect("failed to commit UoW");
|
||||
|
||||
catalog
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ async-trait = "0.1"
|
|||
bytes = "1.0"
|
||||
chrono = { version = "0.4", default-features = false }
|
||||
cache_loader_async = { version = "0.1.2", features = ["ttl-cache"] }
|
||||
crc32fast = "1.3.0"
|
||||
crc32fast = "1.3.2"
|
||||
data_types = { path = "../data_types" }
|
||||
db = { path = "../db" }
|
||||
futures = "0.3"
|
||||
|
|
|
@ -511,7 +511,7 @@ impl DatabaseStateCatalogLoaded {
|
|||
};
|
||||
let write_buffer_consumer = match rules.write_buffer_connection.as_ref() {
|
||||
Some(connection) => {
|
||||
let mut consumer = write_buffer_factory
|
||||
let consumer = write_buffer_factory
|
||||
.new_config_read(db_name.as_str(), trace_collector.as_ref(), connection)
|
||||
.await
|
||||
.context(CreateWriteBufferSnafu)?;
|
||||
|
@ -522,12 +522,14 @@ impl DatabaseStateCatalogLoaded {
|
|||
self.replay_plan.as_ref().as_ref()
|
||||
};
|
||||
|
||||
db.perform_replay(replay_plan, consumer.as_mut())
|
||||
let streams = db
|
||||
.perform_replay(replay_plan, Arc::clone(&consumer))
|
||||
.await
|
||||
.context(ReplaySnafu)?;
|
||||
|
||||
Some(Arc::new(WriteBufferConsumer::new(
|
||||
consumer,
|
||||
streams,
|
||||
Arc::clone(&db),
|
||||
shared.application.metric_registry().as_ref(),
|
||||
)))
|
||||
|
|
|
@ -5,8 +5,8 @@ use std::io::Read;
|
|||
// current-thread executor
|
||||
use db::Db;
|
||||
use flate2::read::GzDecoder;
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::{Executor, ExecutorType},
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
|
|
|
@ -5,8 +5,8 @@ use std::io::Read;
|
|||
// current-thread executor
|
||||
use db::Db;
|
||||
use flate2::read::GzDecoder;
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::{Executor, ExecutorType},
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
|
|
|
@ -5,8 +5,8 @@ use std::io::Read;
|
|||
// current-thread executor
|
||||
use db::Db;
|
||||
use flate2::read::GzDecoder;
|
||||
use predicate::predicate::PredicateBuilder;
|
||||
use predicate::rpc_predicate::InfluxRpcPredicate;
|
||||
use predicate::PredicateBuilder;
|
||||
use query::{
|
||||
exec::{Executor, ExecutorType},
|
||||
frontend::influxrpc::InfluxRpcPlanner,
|
||||
|
|
|
@ -19,7 +19,7 @@ base64 = { version = "0.13", features = ["std"] }
|
|||
bitflags = { version = "1" }
|
||||
byteorder = { version = "1", features = ["std"] }
|
||||
bytes = { version = "1", features = ["std"] }
|
||||
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "libc", "std", "winapi"] }
|
||||
chrono = { version = "0.4", features = ["alloc", "clock", "libc", "oldtime", "serde", "std", "time", "winapi"] }
|
||||
digest = { version = "0.9", default-features = false, features = ["alloc", "std"] }
|
||||
either = { version = "1", features = ["use_std"] }
|
||||
futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] }
|
||||
|
@ -52,10 +52,10 @@ sha2 = { version = "0.9", features = ["std"] }
|
|||
smallvec = { version = "1", default-features = false, features = ["union"] }
|
||||
tokio = { version = "1", features = ["bytes", "fs", "full", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "parking_lot", "process", "rt", "rt-multi-thread", "signal", "signal-hook-registry", "sync", "time", "tokio-macros", "winapi"] }
|
||||
tokio-stream = { version = "0.1", features = ["fs", "net", "time"] }
|
||||
tokio-util = { version = "0.6", features = ["codec", "io"] }
|
||||
tokio-util = { version = "0.6", features = ["codec", "io", "slab", "time"] }
|
||||
tower = { version = "0.4", features = ["balance", "buffer", "discover", "futures-util", "indexmap", "limit", "load", "log", "make", "rand", "ready-cache", "slab", "timeout", "tokio", "tokio-stream", "tokio-util", "tracing", "util"] }
|
||||
tracing = { version = "0.1", features = ["attributes", "log", "max_level_trace", "release_max_level_debug", "std", "tracing-attributes"] }
|
||||
tracing-core = { version = "0.1", features = ["lazy_static", "std"] }
|
||||
tracing-core = { version = "0.1", features = ["lazy_static", "std", "valuable"] }
|
||||
tracing-log = { version = "0.1", features = ["log-tracer", "std", "trace-logger"] }
|
||||
tracing-subscriber = { version = "0.3", features = ["alloc", "ansi", "ansi_term", "env-filter", "fmt", "json", "lazy_static", "matchers", "regex", "registry", "serde", "serde_json", "sharded-slab", "smallvec", "std", "thread_local", "tracing", "tracing-log", "tracing-serde"] }
|
||||
uuid = { version = "0.8", features = ["getrandom", "std", "v4"] }
|
||||
|
@ -66,6 +66,7 @@ base64 = { version = "0.13", features = ["std"] }
|
|||
bitflags = { version = "1" }
|
||||
byteorder = { version = "1", features = ["std"] }
|
||||
bytes = { version = "1", features = ["std"] }
|
||||
cc = { version = "1", default-features = false, features = ["jobserver", "parallel"] }
|
||||
digest = { version = "0.9", default-features = false, features = ["alloc", "std"] }
|
||||
either = { version = "1", features = ["use_std"] }
|
||||
futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] }
|
||||
|
@ -98,7 +99,6 @@ uuid = { version = "0.8", features = ["getrandom", "std", "v4"] }
|
|||
libc = { version = "0.2", features = ["extra_traits", "std"] }
|
||||
|
||||
[target.x86_64-unknown-linux-gnu.build-dependencies]
|
||||
cc = { version = "1", default-features = false, features = ["jobserver", "parallel"] }
|
||||
libc = { version = "0.2", features = ["extra_traits", "std"] }
|
||||
|
||||
[target.x86_64-apple-darwin.dependencies]
|
||||
|
@ -115,10 +115,10 @@ libc = { version = "0.2", features = ["extra_traits", "std"] }
|
|||
|
||||
[target.x86_64-pc-windows-msvc.dependencies]
|
||||
scopeguard = { version = "1", features = ["use_std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "libloaderapi", "lmcons", "minschannel", "minwinbase", "minwindef", "mstcpip", "mswsock", "namedpipeapi", "ntdef", "ntsecapi", "ntstatus", "objbase", "processenv", "schannel", "securitybaseapi", "shellapi", "shlobj", "sspi", "std", "stringapiset", "synchapi", "sysinfoapi", "threadpoollegacyapiset", "timezoneapi", "winbase", "wincon", "wincrypt", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2def", "ws2ipdef", "ws2tcpip"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "libloaderapi", "lmcons", "minschannel", "minwinbase", "minwindef", "mstcpip", "mswsock", "namedpipeapi", "ntdef", "ntsecapi", "ntstatus", "objbase", "processenv", "profileapi", "schannel", "securitybaseapi", "shellapi", "shlobj", "sspi", "std", "stringapiset", "synchapi", "sysinfoapi", "threadpoollegacyapiset", "timezoneapi", "winbase", "wincon", "wincrypt", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2def", "ws2ipdef", "ws2tcpip"] }
|
||||
|
||||
[target.x86_64-pc-windows-msvc.build-dependencies]
|
||||
scopeguard = { version = "1", features = ["use_std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "libloaderapi", "lmcons", "minschannel", "minwinbase", "minwindef", "mstcpip", "mswsock", "namedpipeapi", "ntdef", "ntsecapi", "ntstatus", "objbase", "processenv", "schannel", "securitybaseapi", "shellapi", "shlobj", "sspi", "std", "stringapiset", "synchapi", "sysinfoapi", "threadpoollegacyapiset", "timezoneapi", "winbase", "wincon", "wincrypt", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2def", "ws2ipdef", "ws2tcpip"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "libloaderapi", "lmcons", "minschannel", "minwinbase", "minwindef", "mstcpip", "mswsock", "namedpipeapi", "ntdef", "ntsecapi", "ntstatus", "objbase", "processenv", "profileapi", "schannel", "securitybaseapi", "shellapi", "shlobj", "sspi", "std", "stringapiset", "synchapi", "sysinfoapi", "threadpoollegacyapiset", "timezoneapi", "winbase", "wincon", "wincrypt", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2def", "ws2ipdef", "ws2tcpip"] }
|
||||
|
||||
### END HAKARI SECTION
|
||||
|
|
|
@ -76,9 +76,17 @@ impl IoxHeaders {
|
|||
if name.eq_ignore_ascii_case(HEADER_CONTENT_TYPE) {
|
||||
content_type = match std::str::from_utf8(value.as_ref()) {
|
||||
Ok(CONTENT_TYPE_PROTOBUF) => Some(ContentType::Protobuf),
|
||||
Ok(c) => return Err(format!("Unknown message format: {}", c).into()),
|
||||
Ok(c) => {
|
||||
return Err(WriteBufferError::invalid_data(format!(
|
||||
"Unknown message format: {}",
|
||||
c
|
||||
)))
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(format!("Error decoding content type header: {}", e).into())
|
||||
return Err(WriteBufferError::invalid_data(format!(
|
||||
"Error decoding content type header: {}",
|
||||
e
|
||||
)))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -95,7 +103,10 @@ impl IoxHeaders {
|
|||
span_context = match parser.parse(trace_collector, &headers) {
|
||||
Ok(ctx) => ctx,
|
||||
Err(e) => {
|
||||
return Err(format!("Error decoding trace context: {}", e).into())
|
||||
return Err(WriteBufferError::invalid_data(format!(
|
||||
"Error decoding trace context: {}",
|
||||
e
|
||||
)))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -103,15 +114,20 @@ impl IoxHeaders {
|
|||
}
|
||||
|
||||
if name.eq_ignore_ascii_case(HEADER_NAMESPACE) {
|
||||
namespace = Some(
|
||||
String::from_utf8(value.as_ref().to_vec())
|
||||
.map_err(|e| format!("Error decoding namespace header: {}", e))?,
|
||||
);
|
||||
namespace = Some(String::from_utf8(value.as_ref().to_vec()).map_err(|e| {
|
||||
WriteBufferError::invalid_data(format!(
|
||||
"Error decoding namespace header: {}",
|
||||
e
|
||||
))
|
||||
})?);
|
||||
}
|
||||
}
|
||||
|
||||
let content_type =
|
||||
content_type.ok_or_else(|| WriteBufferError::invalid_data("No content type header"))?;
|
||||
|
||||
Ok(Self {
|
||||
content_type: content_type.ok_or_else(|| "No content type header".to_string())?,
|
||||
content_type,
|
||||
span_context,
|
||||
namespace: namespace.unwrap_or_default(),
|
||||
})
|
||||
|
@ -173,8 +189,12 @@ pub fn decode(
|
|||
|
||||
match payload {
|
||||
Payload::Write(write) => {
|
||||
let tables = decode_database_batch(&write)
|
||||
.map_err(|e| format!("failed to decode database batch: {}", e))?;
|
||||
let tables = decode_database_batch(&write).map_err(|e| {
|
||||
WriteBufferError::invalid_data(format!(
|
||||
"failed to decode database batch: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
Ok(DmlOperation::Write(DmlWrite::new(
|
||||
headers.namespace,
|
||||
|
@ -183,7 +203,11 @@ pub fn decode(
|
|||
)))
|
||||
}
|
||||
Payload::Delete(delete) => {
|
||||
let predicate = delete.predicate.required("predicate")?;
|
||||
let predicate = delete
|
||||
.predicate
|
||||
.required("predicate")
|
||||
.map_err(WriteBufferError::invalid_data)?;
|
||||
|
||||
Ok(DmlOperation::Delete(DmlDelete::new(
|
||||
headers.namespace,
|
||||
predicate,
|
||||
|
@ -220,7 +244,8 @@ pub fn encode_operation(
|
|||
let payload = WriteBufferPayload {
|
||||
payload: Some(payload),
|
||||
};
|
||||
Ok(payload.encode(buf).map_err(Box::new)?)
|
||||
|
||||
payload.encode(buf).map_err(WriteBufferError::invalid_input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -142,7 +142,7 @@ impl WriteBufferConfigFactory {
|
|||
db_name: &str,
|
||||
trace_collector: Option<&Arc<dyn TraceCollector>>,
|
||||
cfg: &WriteBufferConnection,
|
||||
) -> Result<Box<dyn WriteBufferReading>, WriteBufferError> {
|
||||
) -> Result<Arc<dyn WriteBufferReading>, WriteBufferError> {
|
||||
let reader = match &cfg.type_[..] {
|
||||
"file" => {
|
||||
let root = PathBuf::from(&cfg.connection);
|
||||
|
@ -153,7 +153,7 @@ impl WriteBufferConfigFactory {
|
|||
trace_collector,
|
||||
)
|
||||
.await?;
|
||||
Box::new(file_buffer) as _
|
||||
Arc::new(file_buffer) as _
|
||||
}
|
||||
"kafka" => {
|
||||
let rskafka_buffer = RSKafkaConsumer::new(
|
||||
|
@ -164,17 +164,17 @@ impl WriteBufferConfigFactory {
|
|||
trace_collector.map(Arc::clone),
|
||||
)
|
||||
.await?;
|
||||
Box::new(rskafka_buffer) as _
|
||||
Arc::new(rskafka_buffer) as _
|
||||
}
|
||||
"mock" => match self.get_mock(&cfg.connection)? {
|
||||
Mock::Normal(state) => {
|
||||
let mock_buffer =
|
||||
MockBufferForReading::new(state, cfg.creation_config.as_ref())?;
|
||||
Box::new(mock_buffer) as _
|
||||
Arc::new(mock_buffer) as _
|
||||
}
|
||||
Mock::AlwaysFailing => {
|
||||
let mock_buffer = MockBufferForReadingThatAlwaysErrors {};
|
||||
Box::new(mock_buffer) as _
|
||||
Arc::new(mock_buffer) as _
|
||||
}
|
||||
},
|
||||
other => {
|
||||
|
@ -267,7 +267,7 @@ mod tests {
|
|||
.new_config_write(db_name.as_str(), None, &cfg)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().starts_with("Unknown mock ID:"));
|
||||
assert!(err.to_string().contains("Unknown mock ID:"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -302,7 +302,7 @@ mod tests {
|
|||
.new_config_read(db_name.as_str(), None, &cfg)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().starts_with("Unknown mock ID:"));
|
||||
assert!(err.to_string().contains("Unknown mock ID:"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -335,7 +335,7 @@ mod tests {
|
|||
.new_config_write(db_name.as_str(), None, &cfg)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().starts_with("Unknown mock ID:"));
|
||||
assert!(err.to_string().contains("Unknown mock ID:"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -368,7 +368,7 @@ mod tests {
|
|||
.new_config_read(db_name.as_str(), None, &cfg)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().starts_with("Unknown mock ID:"));
|
||||
assert!(err.to_string().contains("Unknown mock ID:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use std::fmt::{Display, Formatter};
|
||||
use std::io::Error;
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
fmt::Debug,
|
||||
|
@ -5,12 +7,111 @@ use std::{
|
|||
|
||||
use async_trait::async_trait;
|
||||
use dml::{DmlMeta, DmlOperation, DmlWrite};
|
||||
use futures::{future::BoxFuture, stream::BoxStream};
|
||||
use futures::stream::BoxStream;
|
||||
|
||||
/// Generic boxed error type that is used in this crate.
|
||||
///
|
||||
/// The dynamic boxing makes it easier to deal with error from different implementations.
|
||||
pub type WriteBufferError = Box<dyn std::error::Error + Sync + Send>;
|
||||
#[derive(Debug)]
|
||||
pub struct WriteBufferError {
|
||||
inner: Box<dyn std::error::Error + Sync + Send>,
|
||||
kind: WriteBufferErrorKind,
|
||||
}
|
||||
|
||||
impl WriteBufferError {
|
||||
pub fn new(
|
||||
kind: WriteBufferErrorKind,
|
||||
e: impl Into<Box<dyn std::error::Error + Sync + Send>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: e.into(),
|
||||
kind,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn invalid_data(e: impl Into<Box<dyn std::error::Error + Sync + Send>>) -> Self {
|
||||
Self::new(WriteBufferErrorKind::InvalidData, e)
|
||||
}
|
||||
|
||||
pub fn invalid_input(e: impl Into<Box<dyn std::error::Error + Sync + Send>>) -> Self {
|
||||
Self::new(WriteBufferErrorKind::InvalidInput, e)
|
||||
}
|
||||
|
||||
/// Returns the kind of error this was
|
||||
pub fn kind(&self) -> WriteBufferErrorKind {
|
||||
self.kind
|
||||
}
|
||||
|
||||
/// Returns the inner error
|
||||
pub fn inner(&self) -> &dyn std::error::Error {
|
||||
self.inner.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for WriteBufferError {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "WriteBufferError({:?}): {}", self.kind, self.inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for WriteBufferError {}
|
||||
|
||||
impl From<std::io::Error> for WriteBufferError {
|
||||
fn from(e: Error) -> Self {
|
||||
Self {
|
||||
inner: Box::new(e),
|
||||
kind: WriteBufferErrorKind::IO,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rskafka::client::error::Error> for WriteBufferError {
|
||||
fn from(e: rskafka::client::error::Error) -> Self {
|
||||
Self {
|
||||
inner: Box::new(e),
|
||||
kind: WriteBufferErrorKind::IO,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rskafka::client::producer::Error> for WriteBufferError {
|
||||
fn from(e: rskafka::client::producer::Error) -> Self {
|
||||
Self {
|
||||
inner: Box::new(e),
|
||||
kind: WriteBufferErrorKind::IO,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for WriteBufferError {
|
||||
fn from(e: String) -> Self {
|
||||
Self {
|
||||
inner: e.into(),
|
||||
kind: WriteBufferErrorKind::Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&'static str> for WriteBufferError {
|
||||
fn from(e: &'static str) -> Self {
|
||||
Self {
|
||||
inner: e.into(),
|
||||
kind: WriteBufferErrorKind::Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum WriteBufferErrorKind {
|
||||
/// This operation failed for an unknown reason
|
||||
Unknown,
|
||||
/// This operation was provided with invalid input data
|
||||
InvalidInput,
|
||||
/// This operation encountered invalid data
|
||||
InvalidData,
|
||||
/// A fatal IO error occurred - non-fatal errors should be retried internally
|
||||
IO,
|
||||
}
|
||||
|
||||
/// Writing to a Write Buffer takes a [`DmlWrite`] and returns the [`DmlMeta`] for the
|
||||
/// payload that was written
|
||||
|
@ -18,7 +119,7 @@ pub type WriteBufferError = Box<dyn std::error::Error + Sync + Send>;
|
|||
pub trait WriteBufferWriting: Sync + Send + Debug + 'static {
|
||||
/// List all known sequencers.
|
||||
///
|
||||
/// This set not empty.
|
||||
/// This set not empty.
|
||||
fn sequencer_ids(&self) -> BTreeSet<u32>;
|
||||
|
||||
/// Send a [`DmlOperation`] to the write buffer using the specified sequencer ID.
|
||||
|
@ -44,7 +145,9 @@ pub trait WriteBufferWriting: Sync + Send + Debug + 'static {
|
|||
lp: &str,
|
||||
default_time: i64,
|
||||
) -> Result<DmlMeta, WriteBufferError> {
|
||||
let tables = mutable_batch_lp::lines_to_batches(lp, default_time).map_err(Box::new)?;
|
||||
let tables = mutable_batch_lp::lines_to_batches(lp, default_time)
|
||||
.map_err(WriteBufferError::invalid_input)?;
|
||||
|
||||
self.store_operation(
|
||||
sequencer_id,
|
||||
&DmlOperation::Write(DmlWrite::new("test_db", tables, Default::default())),
|
||||
|
@ -63,47 +166,60 @@ pub trait WriteBufferWriting: Sync + Send + Debug + 'static {
|
|||
fn type_name(&self) -> &'static str;
|
||||
}
|
||||
|
||||
pub type FetchHighWatermarkFut<'a> = BoxFuture<'a, Result<u64, WriteBufferError>>;
|
||||
pub type FetchHighWatermark<'a> = Box<dyn (Fn() -> FetchHighWatermarkFut<'a>) + Send + Sync>;
|
||||
|
||||
/// Output stream of [`WriteBufferReading`].
|
||||
pub struct WriteStream<'a> {
|
||||
/// Stream that produces entries.
|
||||
pub stream: BoxStream<'a, Result<DmlOperation, WriteBufferError>>,
|
||||
|
||||
/// Get high watermark (= what we believe is the next sequence number to be added).
|
||||
/// Handles a stream of a specific sequencer.
|
||||
///
|
||||
/// This can be used to consume data via a stream or to seek the stream to a given offset.
|
||||
#[async_trait]
|
||||
pub trait WriteBufferStreamHandler: Sync + Send + Debug + 'static {
|
||||
/// Stream that produces DML operations.
|
||||
///
|
||||
/// Can be used to calculate lag. Note that since the watermark is "next sequence ID number to be added", it starts
|
||||
/// at 0 and after the entry with sequence number 0 is added to the buffer, it is 1.
|
||||
pub fetch_high_watermark: FetchHighWatermark<'a>,
|
||||
}
|
||||
/// Note that due to the mutable borrow, it is not possible to have multiple streams from the same
|
||||
/// [`WriteBufferStreamHandler`] instance at the same time. If all streams are dropped and requested again, the last
|
||||
/// offsets of the old streams will be the start offsets for the new streams. If you want to prevent that either
|
||||
/// create a new [`WriteBufferStreamHandler`] or use [`seek`](Self::seek).
|
||||
fn stream(&mut self) -> BoxStream<'_, Result<DmlOperation, WriteBufferError>>;
|
||||
|
||||
impl<'a> Debug for WriteStream<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("EntryStream").finish_non_exhaustive()
|
||||
}
|
||||
/// Seek sequencer to given sequence number. The next output of related streams will be an entry with at least
|
||||
/// the given sequence number (the actual sequence number might be skipped due to "holes" in the stream).
|
||||
///
|
||||
/// Note that due to the mutable borrow, it is not possible to seek while streams exists.
|
||||
async fn seek(&mut self, sequence_number: u64) -> Result<(), WriteBufferError>;
|
||||
}
|
||||
|
||||
/// Produce streams (one per sequencer) of [`DmlWrite`]s.
|
||||
#[async_trait]
|
||||
pub trait WriteBufferReading: Sync + Send + Debug + 'static {
|
||||
/// Returns a stream per sequencer.
|
||||
/// List all known sequencers.
|
||||
///
|
||||
/// Note that due to the mutable borrow, it is not possible to have multiple streams from the same
|
||||
/// [`WriteBufferReading`] instance at the same time. If all streams are dropped and requested again, the last
|
||||
/// offsets of the old streams will be the start offsets for the new streams. If you want to prevent that either
|
||||
/// create a new [`WriteBufferReading`] or use [`seek`](Self::seek).
|
||||
fn streams(&mut self) -> BTreeMap<u32, WriteStream<'_>>;
|
||||
/// This set not empty.
|
||||
fn sequencer_ids(&self) -> BTreeSet<u32>;
|
||||
|
||||
/// Seek given sequencer to given sequence number. The next output of related streams will be an entry with at least
|
||||
/// the given sequence number (the actual sequence number might be skipped due to "holes" in the stream).
|
||||
/// Get stream handler for a dedicated sequencer.
|
||||
///
|
||||
/// Note that due to the mutable borrow, it is not possible to seek while streams exists.
|
||||
async fn seek(
|
||||
&mut self,
|
||||
/// Handlers do NOT share any state (e.g. last offsets).
|
||||
async fn stream_handler(
|
||||
&self,
|
||||
sequencer_id: u32,
|
||||
sequence_number: u64,
|
||||
) -> Result<(), WriteBufferError>;
|
||||
) -> Result<Box<dyn WriteBufferStreamHandler>, WriteBufferError>;
|
||||
|
||||
/// Get stream handlers for all stream.
|
||||
async fn stream_handlers(
|
||||
&self,
|
||||
) -> Result<BTreeMap<u32, Box<dyn WriteBufferStreamHandler>>, WriteBufferError> {
|
||||
let mut handlers = BTreeMap::new();
|
||||
|
||||
for sequencer_id in self.sequencer_ids() {
|
||||
handlers.insert(sequencer_id, self.stream_handler(sequencer_id).await?);
|
||||
}
|
||||
|
||||
Ok(handlers)
|
||||
}
|
||||
|
||||
/// Get high watermark (= what we believe is the next sequence number to be added).
|
||||
///
|
||||
/// Can be used to calculate lag. Note that since the watermark is "next sequence ID number to be added", it starts
|
||||
/// at 0 and after the entry with sequence number 0 is added to the buffer, it is 1.
|
||||
async fn fetch_high_watermark(&self, sequencer_id: u32) -> Result<u64, WriteBufferError>;
|
||||
|
||||
/// Return type (like `"mock"` or `"kafka"`) of this reader.
|
||||
fn type_name(&self) -> &'static str;
|
||||
|
@ -111,16 +227,14 @@ pub trait WriteBufferReading: Sync + Send + Debug + 'static {
|
|||
|
||||
pub mod test_utils {
|
||||
//! Generic tests for all write buffer implementations.
|
||||
use super::{WriteBufferError, WriteBufferReading, WriteBufferWriting};
|
||||
use super::{
|
||||
WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use dml::{test_util::assert_write_op_eq, DmlMeta, DmlOperation, DmlWrite};
|
||||
use futures::{stream::FuturesUnordered, StreamExt, TryStreamExt};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
convert::TryFrom,
|
||||
num::NonZeroU32,
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
collections::BTreeSet, convert::TryFrom, num::NonZeroU32, sync::Arc, time::Duration,
|
||||
};
|
||||
use time::{Time, TimeProvider};
|
||||
use trace::{ctx::SpanContext, span::Span, RingBufferTraceCollector};
|
||||
|
@ -246,40 +360,41 @@ pub mod test_utils {
|
|||
let entry_3 = "upc user=3 300";
|
||||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
let mut reader = context.reading(true).await.unwrap();
|
||||
let reader = context.reading(true).await.unwrap();
|
||||
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
let sequencer_id = set_pop_first(&mut reader.sequencer_ids()).unwrap();
|
||||
let mut stream_handler = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream = stream_handler.stream();
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
let mut cx = futures::task::Context::from_waker(&waker);
|
||||
|
||||
// empty stream is pending
|
||||
assert!(stream.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream.poll_next_unpin(&mut cx).is_pending());
|
||||
|
||||
// adding content allows us to get results
|
||||
let w1 = write("namespace", &writer, entry_1, sequencer_id, None).await;
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w1);
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w1);
|
||||
|
||||
// stream is pending again
|
||||
assert!(stream.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream.poll_next_unpin(&mut cx).is_pending());
|
||||
|
||||
// adding more data unblocks the stream
|
||||
let w2 = write("namespace", &writer, entry_2, sequencer_id, None).await;
|
||||
let w3 = write("namespace", &writer, entry_3, sequencer_id, None).await;
|
||||
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w2);
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w3);
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w2);
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w3);
|
||||
|
||||
// stream is pending again
|
||||
assert!(stream.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream.poll_next_unpin(&mut cx).is_pending());
|
||||
}
|
||||
|
||||
/// Tests multiple subsequently created streams from a single reader.
|
||||
/// Tests multiple subsequently created streams from a single [`WriteBufferStreamHandler`].
|
||||
///
|
||||
/// This tests that:
|
||||
/// - readers remember their offset (and "pending" state) even when streams are dropped
|
||||
/// - state is not shared between handlers
|
||||
async fn test_multi_stream_io<T>(adapter: &T)
|
||||
where
|
||||
T: TestAdapter,
|
||||
|
@ -291,7 +406,7 @@ pub mod test_utils {
|
|||
let entry_3 = "upc user=3 300";
|
||||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
let mut reader = context.reading(true).await.unwrap();
|
||||
let reader = context.reading(true).await.unwrap();
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
let mut cx = futures::task::Context::from_waker(&waker);
|
||||
|
@ -301,35 +416,31 @@ pub mod test_utils {
|
|||
let w3 = write("namespace", &writer, entry_3, 0, None).await;
|
||||
|
||||
// creating stream, drop stream, re-create it => still starts at first entry
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (_sequencer_id, stream) = map_pop_first(&mut streams).unwrap();
|
||||
let sequencer_id = set_pop_first(&mut reader.sequencer_ids()).unwrap();
|
||||
let mut stream_handler = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
let stream = stream_handler.stream();
|
||||
drop(stream);
|
||||
drop(streams);
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w1);
|
||||
let mut stream = stream_handler.stream();
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w1);
|
||||
|
||||
// re-creating stream after reading remembers offset, but wait a bit to provoke the stream to buffer some
|
||||
// entries
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
drop(stream);
|
||||
drop(streams);
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w2);
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w3);
|
||||
let mut stream = stream_handler.stream();
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w2);
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w3);
|
||||
|
||||
// re-creating stream after reading everything makes it pending
|
||||
drop(stream);
|
||||
drop(streams);
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
assert!(stream.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
let mut stream = stream_handler.stream();
|
||||
assert!(stream.poll_next_unpin(&mut cx).is_pending());
|
||||
|
||||
// use a different handler => stream starts from beginning
|
||||
let mut stream_handler2 = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream2 = stream_handler2.stream();
|
||||
assert_write_op_eq(&stream2.next().await.unwrap().unwrap(), &w1);
|
||||
assert!(stream.poll_next_unpin(&mut cx).is_pending());
|
||||
}
|
||||
|
||||
/// Test single reader-writer IO w/ multiple sequencers.
|
||||
|
@ -348,37 +459,43 @@ pub mod test_utils {
|
|||
let entry_3 = "upc user=3 300";
|
||||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
let mut reader = context.reading(true).await.unwrap();
|
||||
let reader = context.reading(true).await.unwrap();
|
||||
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 2);
|
||||
let (sequencer_id_1, mut stream_1) = map_pop_first(&mut streams).unwrap();
|
||||
let (sequencer_id_2, mut stream_2) = map_pop_first(&mut streams).unwrap();
|
||||
// check that we have two different sequencer IDs
|
||||
let mut sequencer_ids = reader.sequencer_ids();
|
||||
assert_eq!(sequencer_ids.len(), 2);
|
||||
let sequencer_id_1 = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
let sequencer_id_2 = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
assert_ne!(sequencer_id_1, sequencer_id_2);
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
let mut cx = futures::task::Context::from_waker(&waker);
|
||||
|
||||
let mut stream_handler_1 = reader.stream_handler(sequencer_id_1).await.unwrap();
|
||||
let mut stream_handler_2 = reader.stream_handler(sequencer_id_2).await.unwrap();
|
||||
let mut stream_1 = stream_handler_1.stream();
|
||||
let mut stream_2 = stream_handler_2.stream();
|
||||
|
||||
// empty streams are pending
|
||||
assert!(stream_1.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream_2.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream_1.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream_2.poll_next_unpin(&mut cx).is_pending());
|
||||
|
||||
// entries arrive at the right target stream
|
||||
let w1 = write("namespace", &writer, entry_1, sequencer_id_1, None).await;
|
||||
assert_write_op_eq(&stream_1.stream.next().await.unwrap().unwrap(), &w1);
|
||||
assert!(stream_2.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert_write_op_eq(&stream_1.next().await.unwrap().unwrap(), &w1);
|
||||
assert!(stream_2.poll_next_unpin(&mut cx).is_pending());
|
||||
|
||||
let w2 = write("namespace", &writer, entry_2, sequencer_id_2, None).await;
|
||||
assert!(stream_1.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert_write_op_eq(&stream_2.stream.next().await.unwrap().unwrap(), &w2);
|
||||
assert!(stream_1.poll_next_unpin(&mut cx).is_pending());
|
||||
assert_write_op_eq(&stream_2.next().await.unwrap().unwrap(), &w2);
|
||||
|
||||
let w3 = write("namespace", &writer, entry_3, sequencer_id_1, None).await;
|
||||
assert!(stream_2.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert_write_op_eq(&stream_1.stream.next().await.unwrap().unwrap(), &w3);
|
||||
assert!(stream_2.poll_next_unpin(&mut cx).is_pending());
|
||||
assert_write_op_eq(&stream_1.next().await.unwrap().unwrap(), &w3);
|
||||
|
||||
// streams are pending again
|
||||
assert!(stream_1.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream_2.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream_1.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream_2.poll_next_unpin(&mut cx).is_pending());
|
||||
}
|
||||
|
||||
/// Test multiple multiple writers and multiple readers on multiple sequencers.
|
||||
|
@ -400,8 +517,8 @@ pub mod test_utils {
|
|||
|
||||
let writer_1 = context.writing(true).await.unwrap();
|
||||
let writer_2 = context.writing(true).await.unwrap();
|
||||
let mut reader_1 = context.reading(true).await.unwrap();
|
||||
let mut reader_2 = context.reading(true).await.unwrap();
|
||||
let reader_1 = context.reading(true).await.unwrap();
|
||||
let reader_2 = context.reading(true).await.unwrap();
|
||||
|
||||
let mut sequencer_ids_1 = writer_1.sequencer_ids();
|
||||
let sequencer_ids_2 = writer_2.sequencer_ids();
|
||||
|
@ -414,22 +531,15 @@ pub mod test_utils {
|
|||
let w_west_1 = write("namespace", &writer_1, entry_west_1, sequencer_id_2, None).await;
|
||||
let w_east_2 = write("namespace", &writer_2, entry_east_2, sequencer_id_1, None).await;
|
||||
|
||||
assert_reader_content(
|
||||
&mut reader_1,
|
||||
&[
|
||||
(sequencer_id_1, &[&w_east_1, &w_east_2]),
|
||||
(sequencer_id_2, &[&w_west_1]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
assert_reader_content(
|
||||
&mut reader_2,
|
||||
&[
|
||||
(sequencer_id_1, &[&w_east_1, &w_east_2]),
|
||||
(sequencer_id_2, &[&w_west_1]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
let mut handler_1_1 = reader_1.stream_handler(sequencer_id_1).await.unwrap();
|
||||
let mut handler_1_2 = reader_1.stream_handler(sequencer_id_2).await.unwrap();
|
||||
let mut handler_2_1 = reader_2.stream_handler(sequencer_id_1).await.unwrap();
|
||||
let mut handler_2_2 = reader_2.stream_handler(sequencer_id_2).await.unwrap();
|
||||
|
||||
assert_reader_content(&mut handler_1_1, &[&w_east_1, &w_east_2]).await;
|
||||
assert_reader_content(&mut handler_1_2, &[&w_west_1]).await;
|
||||
assert_reader_content(&mut handler_2_1, &[&w_east_1, &w_east_2]).await;
|
||||
assert_reader_content(&mut handler_2_2, &[&w_west_1]).await;
|
||||
}
|
||||
|
||||
/// Test seek implemention of readers.
|
||||
|
@ -455,46 +565,47 @@ pub mod test_utils {
|
|||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
|
||||
let w_east_1 = write("namespace", &writer, entry_east_1, 0, None).await;
|
||||
let w_east_2 = write("namespace", &writer, entry_east_2, 0, None).await;
|
||||
let w_west_1 = write("namespace", &writer, entry_west_1, 1, None).await;
|
||||
let mut sequencer_ids = writer.sequencer_ids();
|
||||
let sequencer_id_1 = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
let sequencer_id_2 = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
|
||||
let mut reader_1 = context.reading(true).await.unwrap();
|
||||
let mut reader_2 = context.reading(true).await.unwrap();
|
||||
let w_east_1 = write("namespace", &writer, entry_east_1, sequencer_id_1, None).await;
|
||||
let w_east_2 = write("namespace", &writer, entry_east_2, sequencer_id_1, None).await;
|
||||
let w_west_1 = write("namespace", &writer, entry_west_1, sequencer_id_2, None).await;
|
||||
|
||||
let reader_1 = context.reading(true).await.unwrap();
|
||||
let reader_2 = context.reading(true).await.unwrap();
|
||||
|
||||
let mut handler_1_1_a = reader_1.stream_handler(sequencer_id_1).await.unwrap();
|
||||
let mut handler_1_2_a = reader_1.stream_handler(sequencer_id_2).await.unwrap();
|
||||
let mut handler_1_1_b = reader_1.stream_handler(sequencer_id_1).await.unwrap();
|
||||
let mut handler_1_2_b = reader_1.stream_handler(sequencer_id_2).await.unwrap();
|
||||
let mut handler_2_1 = reader_2.stream_handler(sequencer_id_1).await.unwrap();
|
||||
let mut handler_2_2 = reader_2.stream_handler(sequencer_id_2).await.unwrap();
|
||||
|
||||
// forward seek
|
||||
reader_1
|
||||
.seek(0, w_east_2.meta().sequence().unwrap().number)
|
||||
handler_1_1_a
|
||||
.seek(w_east_2.meta().sequence().unwrap().number)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_reader_content(&mut reader_1, &[(0, &[&w_east_2]), (1, &[&w_west_1])]).await;
|
||||
assert_reader_content(
|
||||
&mut reader_2,
|
||||
&[(0, &[&w_east_1, &w_east_2]), (1, &[&w_west_1])],
|
||||
)
|
||||
.await;
|
||||
assert_reader_content(&mut handler_1_1_a, &[&w_east_2]).await;
|
||||
assert_reader_content(&mut handler_1_2_a, &[&w_west_1]).await;
|
||||
assert_reader_content(&mut handler_1_1_b, &[&w_east_1, &w_east_2]).await;
|
||||
assert_reader_content(&mut handler_1_2_b, &[&w_west_1]).await;
|
||||
assert_reader_content(&mut handler_2_1, &[&w_east_1, &w_east_2]).await;
|
||||
assert_reader_content(&mut handler_2_2, &[&w_west_1]).await;
|
||||
|
||||
// backward seek
|
||||
reader_1.seek(0, 0).await.unwrap();
|
||||
assert_reader_content(&mut reader_1, &[(0, &[&w_east_1, &w_east_2]), (1, &[])]).await;
|
||||
handler_1_1_a.seek(0).await.unwrap();
|
||||
assert_reader_content(&mut handler_1_1_a, &[&w_east_1, &w_east_2]).await;
|
||||
|
||||
// seek to far end and then add data
|
||||
reader_1.seek(0, 1_000_000).await.unwrap();
|
||||
handler_1_1_a.seek(1_000_000).await.unwrap();
|
||||
write("namespace", &writer, entry_east_3, 0, None).await;
|
||||
|
||||
let mut streams = reader_1.streams();
|
||||
assert_eq!(streams.len(), 2);
|
||||
let (_sequencer_id, mut stream_1) = map_pop_first(&mut streams).unwrap();
|
||||
let (_sequencer_id, mut stream_2) = map_pop_first(&mut streams).unwrap();
|
||||
assert!(stream_1.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(stream_2.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
drop(stream_1);
|
||||
drop(stream_2);
|
||||
drop(streams);
|
||||
|
||||
// seeking unknown sequencer is NOT an error
|
||||
reader_1.seek(0, 42).await.unwrap();
|
||||
assert!(handler_1_1_a.stream().poll_next_unpin(&mut cx).is_pending());
|
||||
assert!(handler_1_2_a.stream().poll_next_unpin(&mut cx).is_pending());
|
||||
}
|
||||
|
||||
/// Test watermark fetching.
|
||||
|
@ -513,28 +624,33 @@ pub mod test_utils {
|
|||
let entry_west_1 = "upc,region=west user=1 200";
|
||||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
let mut reader = context.reading(true).await.unwrap();
|
||||
let reader = context.reading(true).await.unwrap();
|
||||
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 2);
|
||||
let (sequencer_id_1, stream_1) = map_pop_first(&mut streams).unwrap();
|
||||
let (sequencer_id_2, stream_2) = map_pop_first(&mut streams).unwrap();
|
||||
let mut sequencer_ids = writer.sequencer_ids();
|
||||
let sequencer_id_1 = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
let sequencer_id_2 = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
|
||||
// start at watermark 0
|
||||
assert_eq!((stream_1.fetch_high_watermark)().await.unwrap(), 0);
|
||||
assert_eq!((stream_2.fetch_high_watermark)().await.unwrap(), 0);
|
||||
assert_eq!(
|
||||
reader.fetch_high_watermark(sequencer_id_1).await.unwrap(),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
reader.fetch_high_watermark(sequencer_id_2).await.unwrap(),
|
||||
0
|
||||
);
|
||||
|
||||
// high water mark moves
|
||||
write("namespace", &writer, entry_east_1, sequencer_id_1, None).await;
|
||||
let w1 = write("namespace", &writer, entry_east_2, sequencer_id_1, None).await;
|
||||
let w2 = write("namespace", &writer, entry_west_1, sequencer_id_2, None).await;
|
||||
assert_eq!(
|
||||
(stream_1.fetch_high_watermark)().await.unwrap(),
|
||||
reader.fetch_high_watermark(sequencer_id_1).await.unwrap(),
|
||||
w1.meta().sequence().unwrap().number + 1
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
(stream_2.fetch_high_watermark)().await.unwrap(),
|
||||
reader.fetch_high_watermark(sequencer_id_2).await.unwrap(),
|
||||
w2.meta().sequence().unwrap().number + 1
|
||||
);
|
||||
}
|
||||
|
@ -557,11 +673,11 @@ pub mod test_utils {
|
|||
let entry = "upc user=1 100";
|
||||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
let mut reader = context.reading(true).await.unwrap();
|
||||
let reader = context.reading(true).await.unwrap();
|
||||
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
let mut sequencer_ids = writer.sequencer_ids();
|
||||
assert_eq!(sequencer_ids.len(), 1);
|
||||
let sequencer_id = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
|
||||
let write = write("namespace", &writer, entry, sequencer_id, None).await;
|
||||
let reported_ts = write.meta().producer_ts().unwrap();
|
||||
|
@ -570,7 +686,8 @@ pub mod test_utils {
|
|||
time.inc(Duration::from_secs(10));
|
||||
|
||||
// check that the timestamp records the ingestion time, not the read time
|
||||
let sequenced_entry = stream.stream.next().await.unwrap().unwrap();
|
||||
let mut handler = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
let sequenced_entry = handler.stream().next().await.unwrap().unwrap();
|
||||
let ts_entry = sequenced_entry.meta().producer_ts().unwrap();
|
||||
assert_eq!(ts_entry, t0);
|
||||
assert_eq!(reported_ts, t0);
|
||||
|
@ -603,7 +720,7 @@ pub mod test_utils {
|
|||
context.writing(false).await.unwrap();
|
||||
}
|
||||
|
||||
/// Test sequencer IDs reporting of writers.
|
||||
/// Test sequencer IDs reporting of readers and writers.
|
||||
///
|
||||
/// This tests that:
|
||||
/// - all sequencers are reported
|
||||
|
@ -618,11 +735,17 @@ pub mod test_utils {
|
|||
|
||||
let writer_1 = context.writing(true).await.unwrap();
|
||||
let writer_2 = context.writing(true).await.unwrap();
|
||||
let reader_1 = context.reading(true).await.unwrap();
|
||||
let reader_2 = context.reading(true).await.unwrap();
|
||||
|
||||
let sequencer_ids_1 = writer_1.sequencer_ids();
|
||||
let sequencer_ids_2 = writer_2.sequencer_ids();
|
||||
assert_eq!(sequencer_ids_1, sequencer_ids_2);
|
||||
let sequencer_ids_3 = reader_1.sequencer_ids();
|
||||
let sequencer_ids_4 = reader_2.sequencer_ids();
|
||||
assert_eq!(sequencer_ids_1.len(), n_sequencers as usize);
|
||||
assert_eq!(sequencer_ids_1, sequencer_ids_2);
|
||||
assert_eq!(sequencer_ids_1, sequencer_ids_3);
|
||||
assert_eq!(sequencer_ids_1, sequencer_ids_4);
|
||||
}
|
||||
|
||||
/// Test that span contexts are propagated through the system.
|
||||
|
@ -635,11 +758,13 @@ pub mod test_utils {
|
|||
let entry = "upc user=1 100";
|
||||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
let mut reader = context.reading(true).await.unwrap();
|
||||
let reader = context.reading(true).await.unwrap();
|
||||
|
||||
let mut streams = reader.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
let mut sequencer_ids = writer.sequencer_ids();
|
||||
assert_eq!(sequencer_ids.len(), 1);
|
||||
let sequencer_id = set_pop_first(&mut sequencer_ids).unwrap();
|
||||
let mut handler = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream = handler.stream();
|
||||
|
||||
// 1: no context
|
||||
write("namespace", &writer, entry, sequencer_id, None).await;
|
||||
|
@ -669,16 +794,16 @@ pub mod test_utils {
|
|||
.await;
|
||||
|
||||
// check write 1
|
||||
let write_1 = stream.stream.next().await.unwrap().unwrap();
|
||||
let write_1 = stream.next().await.unwrap().unwrap();
|
||||
assert!(write_1.meta().span_context().is_none());
|
||||
|
||||
// check write 2
|
||||
let write_2 = stream.stream.next().await.unwrap().unwrap();
|
||||
let write_2 = stream.next().await.unwrap().unwrap();
|
||||
let actual_context_1 = write_2.meta().span_context().unwrap();
|
||||
assert_span_context_eq_or_linked(&span_context_1, actual_context_1, collector.spans());
|
||||
|
||||
// check write 3
|
||||
let write_3 = stream.stream.next().await.unwrap().unwrap();
|
||||
let write_3 = stream.next().await.unwrap().unwrap();
|
||||
let actual_context_2 = write_3.meta().span_context().unwrap();
|
||||
assert_span_context_eq_or_linked(&span_context_2, actual_context_2, collector.spans());
|
||||
}
|
||||
|
@ -719,7 +844,7 @@ pub mod test_utils {
|
|||
let entry_2 = "upc,region=east user=2 200";
|
||||
|
||||
let writer = context.writing(true).await.unwrap();
|
||||
let mut reader = context.reading(true).await.unwrap();
|
||||
let reader = context.reading(true).await.unwrap();
|
||||
|
||||
let mut sequencer_ids = writer.sequencer_ids();
|
||||
assert_eq!(sequencer_ids.len(), 1);
|
||||
|
@ -728,7 +853,8 @@ pub mod test_utils {
|
|||
let w1 = write("namespace_1", &writer, entry_2, sequencer_id, None).await;
|
||||
let w2 = write("namespace_2", &writer, entry_1, sequencer_id, None).await;
|
||||
|
||||
assert_reader_content(&mut reader, &[(sequencer_id, &[&w1, &w2])]).await;
|
||||
let mut handler = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
assert_reader_content(&mut handler, &[&w1, &w2]).await;
|
||||
}
|
||||
|
||||
/// Dummy test to ensure that flushing somewhat works.
|
||||
|
@ -770,57 +896,30 @@ pub mod test_utils {
|
|||
|
||||
/// Assert that the content of the reader is as expected.
|
||||
///
|
||||
/// This will read `expected.len()` from the reader and then ensures that the stream is pending.
|
||||
async fn assert_reader_content<R>(reader: &mut R, expected: &[(u32, &[&DmlWrite])])
|
||||
where
|
||||
R: WriteBufferReading,
|
||||
{
|
||||
// normalize expected values
|
||||
let expected = {
|
||||
let mut expected = expected.to_vec();
|
||||
expected.sort_by_key(|(sequencer_id, _entries)| *sequencer_id);
|
||||
expected
|
||||
};
|
||||
/// This will read `expected_writes.len()` from the reader and then ensures that the stream is pending.
|
||||
async fn assert_reader_content(
|
||||
actual_stream_handler: &mut Box<dyn WriteBufferStreamHandler>,
|
||||
expected_writes: &[&DmlWrite],
|
||||
) {
|
||||
let actual_stream = actual_stream_handler.stream();
|
||||
|
||||
// Ensure content of the streams
|
||||
let streams = reader.streams();
|
||||
assert_eq!(streams.len(), expected.len());
|
||||
// we need to limit the stream to `expected_writes.len()` elements, otherwise it might be pending forever
|
||||
let actual_writes: Vec<_> = actual_stream
|
||||
.take(expected_writes.len())
|
||||
.try_collect()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for ((actual_sequencer_id, actual_stream), (expected_sequencer_id, expected_writes)) in
|
||||
streams.into_iter().zip(expected.iter())
|
||||
{
|
||||
assert_eq!(actual_sequencer_id, *expected_sequencer_id);
|
||||
|
||||
// we need to limit the stream to `expected.len()` elements, otherwise it might be pending forever
|
||||
let results: Vec<_> = actual_stream
|
||||
.stream
|
||||
.take(expected_writes.len())
|
||||
.try_collect()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let actual_writes: Vec<_> = results.iter().collect();
|
||||
assert_eq!(actual_writes.len(), expected_writes.len());
|
||||
for (actual, expected) in actual_writes.iter().zip(expected_writes.iter()) {
|
||||
assert_write_op_eq(actual, expected);
|
||||
}
|
||||
assert_eq!(actual_writes.len(), expected_writes.len());
|
||||
for (actual, expected) in actual_writes.iter().zip(expected_writes.iter()) {
|
||||
assert_write_op_eq(actual, expected);
|
||||
}
|
||||
|
||||
// Ensure that streams a pending
|
||||
let streams = reader.streams();
|
||||
assert_eq!(streams.len(), expected.len());
|
||||
|
||||
// Ensure that stream is pending
|
||||
let waker = futures::task::noop_waker();
|
||||
let mut cx = futures::task::Context::from_waker(&waker);
|
||||
|
||||
for ((actual_sequencer_id, mut actual_stream), (expected_sequencer_id, _expected_writes)) in
|
||||
streams.into_iter().zip(expected.iter())
|
||||
{
|
||||
assert_eq!(actual_sequencer_id, *expected_sequencer_id);
|
||||
|
||||
// empty stream is pending
|
||||
assert!(actual_stream.stream.poll_next_unpin(&mut cx).is_pending());
|
||||
}
|
||||
let mut actual_stream = actual_stream_handler.stream();
|
||||
assert!(actual_stream.poll_next_unpin(&mut cx).is_pending());
|
||||
}
|
||||
|
||||
/// Asserts that given span context are the same or that `second` links back to `first`.
|
||||
|
@ -854,20 +953,6 @@ pub mod test_utils {
|
|||
assert_eq!(first.parent_span_id, second.parent_span_id);
|
||||
}
|
||||
|
||||
/// Pops first entry from map.
|
||||
///
|
||||
/// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable.
|
||||
pub(crate) fn map_pop_first<K, V>(map: &mut BTreeMap<K, V>) -> Option<(K, V)>
|
||||
where
|
||||
K: Clone + Ord,
|
||||
{
|
||||
map.keys()
|
||||
.next()
|
||||
.cloned()
|
||||
.map(|k| map.remove_entry(&k))
|
||||
.flatten()
|
||||
}
|
||||
|
||||
/// Pops first entry from set.
|
||||
///
|
||||
/// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable.
|
||||
|
|
|
@ -119,21 +119,21 @@ use std::{
|
|||
},
|
||||
};
|
||||
|
||||
use crate::codec::{ContentType, IoxHeaders};
|
||||
use crate::{
|
||||
codec::{ContentType, IoxHeaders},
|
||||
core::WriteBufferStreamHandler,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use data_types::{sequence::Sequence, write_buffer::WriteBufferCreationConfig};
|
||||
use dml::{DmlMeta, DmlOperation};
|
||||
use futures::{FutureExt, Stream, StreamExt};
|
||||
use futures::{stream::BoxStream, Stream, StreamExt};
|
||||
use pin_project::pin_project;
|
||||
use time::{Time, TimeProvider};
|
||||
use tokio_util::sync::ReusableBoxFuture;
|
||||
use trace::TraceCollector;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::core::{
|
||||
FetchHighWatermark, FetchHighWatermarkFut, WriteBufferError, WriteBufferReading,
|
||||
WriteBufferWriting, WriteStream,
|
||||
};
|
||||
use crate::core::{WriteBufferError, WriteBufferReading, WriteBufferWriting};
|
||||
|
||||
/// Header used to declare the creation time of the message.
|
||||
pub const HEADER_TIME: &str = "last-modified";
|
||||
|
@ -260,6 +260,35 @@ impl WriteBufferWriting for FileBufferProducer {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FileBufferStreamHandler {
|
||||
sequencer_id: u32,
|
||||
path: PathBuf,
|
||||
next_sequence_number: Arc<AtomicU64>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WriteBufferStreamHandler for FileBufferStreamHandler {
|
||||
fn stream(&mut self) -> BoxStream<'_, Result<DmlOperation, WriteBufferError>> {
|
||||
let committed = self.path.join("committed");
|
||||
|
||||
ConsumerStream::new(
|
||||
self.sequencer_id,
|
||||
committed,
|
||||
Arc::clone(&self.next_sequence_number),
|
||||
self.trace_collector.clone(),
|
||||
)
|
||||
.boxed()
|
||||
}
|
||||
|
||||
async fn seek(&mut self, sequence_number: u64) -> Result<(), WriteBufferError> {
|
||||
self.next_sequence_number
|
||||
.store(sequence_number, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// File-based write buffer reader.
|
||||
#[derive(Debug)]
|
||||
pub struct FileBufferConsumer {
|
||||
|
@ -291,56 +320,39 @@ impl FileBufferConsumer {
|
|||
|
||||
#[async_trait]
|
||||
impl WriteBufferReading for FileBufferConsumer {
|
||||
fn streams(&mut self) -> BTreeMap<u32, WriteStream<'_>> {
|
||||
let mut streams = BTreeMap::default();
|
||||
|
||||
for (sequencer_id, (sequencer_path, next_sequence_number)) in &self.dirs {
|
||||
let committed = sequencer_path.join("committed");
|
||||
|
||||
let stream = ConsumerStream::new(
|
||||
*sequencer_id,
|
||||
committed.clone(),
|
||||
Arc::clone(next_sequence_number),
|
||||
self.trace_collector.clone(),
|
||||
)
|
||||
.boxed();
|
||||
|
||||
let fetch_high_watermark = move || {
|
||||
let committed = committed.clone();
|
||||
|
||||
let fut = async move { watermark(&committed).await };
|
||||
fut.boxed() as FetchHighWatermarkFut<'_>
|
||||
};
|
||||
let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>;
|
||||
|
||||
streams.insert(
|
||||
*sequencer_id,
|
||||
WriteStream {
|
||||
stream,
|
||||
fetch_high_watermark,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
streams
|
||||
fn sequencer_ids(&self) -> BTreeSet<u32> {
|
||||
self.dirs.keys().copied().collect()
|
||||
}
|
||||
|
||||
async fn seek(
|
||||
&mut self,
|
||||
async fn stream_handler(
|
||||
&self,
|
||||
sequencer_id: u32,
|
||||
sequence_number: u64,
|
||||
) -> Result<(), WriteBufferError> {
|
||||
let path_and_next_sequence_number = self
|
||||
) -> Result<Box<dyn WriteBufferStreamHandler>, WriteBufferError> {
|
||||
let (path, _next_sequence_number) = self
|
||||
.dirs
|
||||
.get(&sequencer_id)
|
||||
.ok_or_else::<WriteBufferError, _>(|| {
|
||||
format!("Unknown sequencer: {}", sequencer_id).into()
|
||||
})?;
|
||||
path_and_next_sequence_number
|
||||
.1
|
||||
.store(sequence_number, Ordering::SeqCst);
|
||||
|
||||
Ok(())
|
||||
Ok(Box::new(FileBufferStreamHandler {
|
||||
sequencer_id,
|
||||
path: path.clone(),
|
||||
next_sequence_number: Arc::new(AtomicU64::new(0)),
|
||||
trace_collector: self.trace_collector.clone(),
|
||||
}))
|
||||
}
|
||||
|
||||
async fn fetch_high_watermark(&self, sequencer_id: u32) -> Result<u64, WriteBufferError> {
|
||||
let (path, _next_sequence_number) = self
|
||||
.dirs
|
||||
.get(&sequencer_id)
|
||||
.ok_or_else::<WriteBufferError, _>(|| {
|
||||
format!("Unknown sequencer: {}", sequencer_id).into()
|
||||
})?;
|
||||
let committed = path.join("committed");
|
||||
|
||||
watermark(&committed).await
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
|
@ -450,7 +462,7 @@ impl ConsumerStream {
|
|||
}
|
||||
_ => {
|
||||
// cannot read file => communicate to user
|
||||
Err(Box::new(error) as WriteBufferError)
|
||||
Err(error.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -466,7 +478,10 @@ impl ConsumerStream {
|
|||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
) -> Result<DmlOperation, WriteBufferError> {
|
||||
let mut headers = [httparse::EMPTY_HEADER; 16];
|
||||
match httparse::parse_headers(&data, &mut headers)? {
|
||||
let status =
|
||||
httparse::parse_headers(&data, &mut headers).map_err(WriteBufferError::invalid_data)?;
|
||||
|
||||
match status {
|
||||
httparse::Status::Complete((offset, headers)) => {
|
||||
let iox_headers = IoxHeaders::from_headers(
|
||||
headers.iter().map(|header| (header.name, header.value)),
|
||||
|
@ -792,11 +807,12 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
let mut reader = ctx.reading(true).await.unwrap();
|
||||
let mut stream = reader.streams().remove(&sequencer_id).unwrap();
|
||||
let reader = ctx.reading(true).await.unwrap();
|
||||
let mut handler = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream = handler.stream();
|
||||
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w1);
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w4);
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w1);
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w4);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -820,9 +836,10 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
let mut reader = ctx.reading(true).await.unwrap();
|
||||
let mut stream = reader.streams().remove(&sequencer_id).unwrap();
|
||||
let reader = ctx.reading(true).await.unwrap();
|
||||
let mut handler = reader.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream = handler.stream();
|
||||
|
||||
assert_write_op_eq(&stream.stream.next().await.unwrap().unwrap(), &w2);
|
||||
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::{collections::BTreeMap, time::Duration};
|
||||
use std::{collections::BTreeMap, fmt::Display, str::FromStr, time::Duration};
|
||||
|
||||
use data_types::write_buffer::WriteBufferCreationConfig;
|
||||
|
||||
|
@ -18,7 +18,7 @@ impl TryFrom<&BTreeMap<String, String>> for ClientConfig {
|
|||
|
||||
fn try_from(cfg: &BTreeMap<String, String>) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
max_message_size: cfg.get("max_message_size").map(|s| s.parse()).transpose()?,
|
||||
max_message_size: parse_key(cfg, "max_message_size")?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -45,25 +45,16 @@ impl TryFrom<&WriteBufferCreationConfig> for TopicCreationConfig {
|
|||
|
||||
fn try_from(cfg: &WriteBufferCreationConfig) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
num_partitions: i32::try_from(cfg.n_sequencers.get())?,
|
||||
replication_factor: cfg
|
||||
.options
|
||||
.get("replication_factor")
|
||||
.map(|s| s.parse())
|
||||
.transpose()?
|
||||
.unwrap_or(1),
|
||||
timeout_ms: cfg
|
||||
.options
|
||||
.get("timeout_ms")
|
||||
.map(|s| s.parse())
|
||||
.transpose()?
|
||||
.unwrap_or(5_000),
|
||||
num_partitions: i32::try_from(cfg.n_sequencers.get())
|
||||
.map_err(WriteBufferError::invalid_input)?,
|
||||
replication_factor: parse_key(&cfg.options, "replication_factor")?.unwrap_or(1),
|
||||
timeout_ms: parse_key(&cfg.options, "timeout_ms")?.unwrap_or(5_000),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Config for consumers.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct ConsumerConfig {
|
||||
/// Will wait for at least `min_batch_size` bytes of data
|
||||
///
|
||||
|
@ -86,18 +77,9 @@ impl TryFrom<&BTreeMap<String, String>> for ConsumerConfig {
|
|||
|
||||
fn try_from(cfg: &BTreeMap<String, String>) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
max_wait_ms: cfg
|
||||
.get("consumer_max_wait_ms")
|
||||
.map(|s| s.parse())
|
||||
.transpose()?,
|
||||
min_batch_size: cfg
|
||||
.get("consumer_min_batch_size")
|
||||
.map(|s| s.parse())
|
||||
.transpose()?,
|
||||
max_batch_size: cfg
|
||||
.get("consumer_max_batch_size")
|
||||
.map(|s| s.parse())
|
||||
.transpose()?,
|
||||
max_wait_ms: parse_key(cfg, "consumer_max_wait_ms")?,
|
||||
min_batch_size: parse_key(cfg, "consumer_min_batch_size")?,
|
||||
max_batch_size: parse_key(cfg, "consumer_max_batch_size")?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -120,25 +102,33 @@ impl TryFrom<&BTreeMap<String, String>> for ProducerConfig {
|
|||
type Error = WriteBufferError;
|
||||
|
||||
fn try_from(cfg: &BTreeMap<String, String>) -> Result<Self, Self::Error> {
|
||||
let linger_ms: Option<u64> = cfg
|
||||
.get("producer_linger_ms")
|
||||
.map(|s| s.parse())
|
||||
.transpose()?;
|
||||
let linger_ms: Option<u64> = parse_key(cfg, "producer_linger_ms")?;
|
||||
|
||||
Ok(Self {
|
||||
linger: linger_ms.map(Duration::from_millis),
|
||||
max_batch_size: cfg
|
||||
.get("producer_max_batch_size")
|
||||
.map(|s| s.parse())
|
||||
.transpose()?
|
||||
.unwrap_or(100 * 1024),
|
||||
max_batch_size: parse_key(cfg, "producer_max_batch_size")?.unwrap_or(100 * 1024),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_key<T>(cfg: &BTreeMap<String, String>, key: &str) -> Result<Option<T>, WriteBufferError>
|
||||
where
|
||||
T: FromStr,
|
||||
T::Err: Display,
|
||||
{
|
||||
if let Some(s) = cfg.get(key) {
|
||||
s.parse()
|
||||
.map(Some)
|
||||
.map_err(|e| format!("Cannot parse `{key}` from '{s}': {e}").into())
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{collections::BTreeMap, num::NonZeroU32};
|
||||
use test_helpers::assert_contains;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -164,6 +154,19 @@ mod tests {
|
|||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_client_config_error() {
|
||||
let err = ClientConfig::try_from(&BTreeMap::from([(
|
||||
String::from("max_message_size"),
|
||||
String::from("xyz"),
|
||||
)]))
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `max_message_size` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_topic_creation_config_default() {
|
||||
let actual = TopicCreationConfig::try_from(&WriteBufferCreationConfig {
|
||||
|
@ -198,6 +201,29 @@ mod tests {
|
|||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_topic_creation_config_err() {
|
||||
let err = TopicCreationConfig::try_from(&WriteBufferCreationConfig {
|
||||
n_sequencers: NonZeroU32::new(2).unwrap(),
|
||||
options: BTreeMap::from([(String::from("replication_factor"), String::from("xyz"))]),
|
||||
})
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `replication_factor` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
|
||||
let err = TopicCreationConfig::try_from(&WriteBufferCreationConfig {
|
||||
n_sequencers: NonZeroU32::new(2).unwrap(),
|
||||
options: BTreeMap::from([(String::from("timeout_ms"), String::from("xyz"))]),
|
||||
})
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `timeout_ms` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_consumer_config_default() {
|
||||
let actual = ConsumerConfig::try_from(&BTreeMap::default()).unwrap();
|
||||
|
@ -226,6 +252,39 @@ mod tests {
|
|||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_consumer_config_err() {
|
||||
let err = ConsumerConfig::try_from(&BTreeMap::from([(
|
||||
String::from("consumer_max_wait_ms"),
|
||||
String::from("xyz"),
|
||||
)]))
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `consumer_max_wait_ms` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
|
||||
let err = ConsumerConfig::try_from(&BTreeMap::from([(
|
||||
String::from("consumer_min_batch_size"),
|
||||
String::from("xyz"),
|
||||
)]))
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `consumer_min_batch_size` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
|
||||
let err = ConsumerConfig::try_from(&BTreeMap::from([(
|
||||
String::from("consumer_max_batch_size"),
|
||||
String::from("xyz"),
|
||||
)]))
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `consumer_max_batch_size` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_producer_config_default() {
|
||||
let actual = ProducerConfig::try_from(&BTreeMap::default()).unwrap();
|
||||
|
@ -253,4 +312,27 @@ mod tests {
|
|||
};
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_producer_config_err() {
|
||||
let err = ProducerConfig::try_from(&BTreeMap::from([(
|
||||
String::from("producer_linger_ms"),
|
||||
String::from("xyz"),
|
||||
)]))
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `producer_linger_ms` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
|
||||
let err = ProducerConfig::try_from(&BTreeMap::from([(
|
||||
String::from("producer_max_batch_size"),
|
||||
String::from("xyz"),
|
||||
)]))
|
||||
.unwrap_err();
|
||||
assert_contains!(
|
||||
err.to_string(),
|
||||
"Cannot parse `producer_max_batch_size` from 'xyz': invalid digit found in string"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ use std::{
|
|||
use async_trait::async_trait;
|
||||
use data_types::{sequence::Sequence, write_buffer::WriteBufferCreationConfig};
|
||||
use dml::{DmlMeta, DmlOperation};
|
||||
use futures::{FutureExt, StreamExt};
|
||||
use futures::{stream::BoxStream, StreamExt};
|
||||
use rskafka::client::{
|
||||
consumer::StreamConsumerBuilder,
|
||||
error::{Error as RSKafkaError, ProtocolError},
|
||||
|
@ -22,10 +22,7 @@ use trace::TraceCollector;
|
|||
|
||||
use crate::{
|
||||
codec::IoxHeaders,
|
||||
core::{
|
||||
FetchHighWatermark, FetchHighWatermarkFut, WriteBufferError, WriteBufferReading,
|
||||
WriteBufferWriting, WriteStream,
|
||||
},
|
||||
core::{WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting},
|
||||
};
|
||||
|
||||
use self::{
|
||||
|
@ -119,14 +116,86 @@ impl WriteBufferWriting for RSKafkaProducer {
|
|||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ConsumerPartition {
|
||||
pub struct RSKafkaStreamHandler {
|
||||
partition_client: Arc<PartitionClient>,
|
||||
next_offset: Arc<AtomicI64>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
consumer_config: ConsumerConfig,
|
||||
sequencer_id: u32,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WriteBufferStreamHandler for RSKafkaStreamHandler {
|
||||
fn stream(&mut self) -> BoxStream<'_, Result<DmlOperation, WriteBufferError>> {
|
||||
let trace_collector = self.trace_collector.clone();
|
||||
let next_offset = Arc::clone(&self.next_offset);
|
||||
|
||||
let mut stream_builder = StreamConsumerBuilder::new(
|
||||
Arc::clone(&self.partition_client),
|
||||
next_offset.load(Ordering::SeqCst),
|
||||
);
|
||||
if let Some(max_wait_ms) = self.consumer_config.max_wait_ms {
|
||||
stream_builder = stream_builder.with_max_wait_ms(max_wait_ms);
|
||||
}
|
||||
if let Some(min_batch_size) = self.consumer_config.min_batch_size {
|
||||
stream_builder = stream_builder.with_min_batch_size(min_batch_size);
|
||||
}
|
||||
if let Some(max_batch_size) = self.consumer_config.max_batch_size {
|
||||
stream_builder = stream_builder.with_max_batch_size(max_batch_size);
|
||||
}
|
||||
let stream = stream_builder.build();
|
||||
|
||||
let stream = stream.map(move |res| {
|
||||
let (record, _watermark) = res?;
|
||||
|
||||
// store new offset already so we don't get stuck on invalid records
|
||||
next_offset.store(record.offset + 1, Ordering::SeqCst);
|
||||
|
||||
let kafka_read_size = record.record.approximate_size();
|
||||
|
||||
let headers =
|
||||
IoxHeaders::from_headers(record.record.headers, trace_collector.as_ref())?;
|
||||
|
||||
let sequence = Sequence {
|
||||
id: self.sequencer_id,
|
||||
number: record
|
||||
.offset
|
||||
.try_into()
|
||||
.map_err(WriteBufferError::invalid_data)?,
|
||||
};
|
||||
|
||||
let timestamp_millis =
|
||||
i64::try_from(record.record.timestamp.unix_timestamp_nanos() / 1_000_000)
|
||||
.map_err(WriteBufferError::invalid_data)?;
|
||||
|
||||
let timestamp = Time::from_timestamp_millis_opt(timestamp_millis)
|
||||
.ok_or_else::<WriteBufferError, _>(|| {
|
||||
format!(
|
||||
"Cannot parse timestamp for milliseconds: {}",
|
||||
timestamp_millis
|
||||
)
|
||||
.into()
|
||||
})?;
|
||||
|
||||
let value = record
|
||||
.record
|
||||
.value
|
||||
.ok_or_else::<WriteBufferError, _>(|| "Value missing".to_string().into())?;
|
||||
crate::codec::decode(&value, headers, sequence, timestamp, kafka_read_size)
|
||||
});
|
||||
stream.boxed()
|
||||
}
|
||||
|
||||
async fn seek(&mut self, sequence_number: u64) -> Result<(), WriteBufferError> {
|
||||
let offset = i64::try_from(sequence_number).map_err(WriteBufferError::invalid_input)?;
|
||||
self.next_offset.store(offset, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RSKafkaConsumer {
|
||||
partitions: BTreeMap<u32, ConsumerPartition>,
|
||||
partition_clients: BTreeMap<u32, Arc<PartitionClient>>,
|
||||
trace_collector: Option<Arc<dyn TraceCollector>>,
|
||||
consumer_config: ConsumerConfig,
|
||||
}
|
||||
|
@ -147,24 +216,13 @@ impl RSKafkaConsumer {
|
|||
)
|
||||
.await?;
|
||||
|
||||
let partitions = partition_clients
|
||||
let partition_clients = partition_clients
|
||||
.into_iter()
|
||||
.map(|(partition_id, partition_client)| {
|
||||
let partition_client = Arc::new(partition_client);
|
||||
let next_offset = Arc::new(AtomicI64::new(0));
|
||||
|
||||
(
|
||||
partition_id,
|
||||
ConsumerPartition {
|
||||
partition_client,
|
||||
next_offset,
|
||||
},
|
||||
)
|
||||
})
|
||||
.map(|(k, v)| (k, Arc::new(v)))
|
||||
.collect();
|
||||
|
||||
Ok(Self {
|
||||
partitions,
|
||||
partition_clients,
|
||||
trace_collector,
|
||||
consumer_config: ConsumerConfig::try_from(connection_config)?,
|
||||
})
|
||||
|
@ -173,103 +231,40 @@ impl RSKafkaConsumer {
|
|||
|
||||
#[async_trait]
|
||||
impl WriteBufferReading for RSKafkaConsumer {
|
||||
fn streams(&mut self) -> BTreeMap<u32, WriteStream<'_>> {
|
||||
let mut streams = BTreeMap::new();
|
||||
|
||||
for (sequencer_id, partition) in &self.partitions {
|
||||
let trace_collector = self.trace_collector.clone();
|
||||
let next_offset = Arc::clone(&partition.next_offset);
|
||||
|
||||
let mut stream_builder = StreamConsumerBuilder::new(
|
||||
Arc::clone(&partition.partition_client),
|
||||
next_offset.load(Ordering::SeqCst),
|
||||
);
|
||||
if let Some(max_wait_ms) = self.consumer_config.max_wait_ms {
|
||||
stream_builder = stream_builder.with_max_wait_ms(max_wait_ms);
|
||||
}
|
||||
if let Some(min_batch_size) = self.consumer_config.min_batch_size {
|
||||
stream_builder = stream_builder.with_min_batch_size(min_batch_size);
|
||||
}
|
||||
if let Some(max_batch_size) = self.consumer_config.max_batch_size {
|
||||
stream_builder = stream_builder.with_max_batch_size(max_batch_size);
|
||||
}
|
||||
let stream = stream_builder.build();
|
||||
|
||||
let stream = stream.map(move |res| {
|
||||
let (record, _watermark) = res?;
|
||||
|
||||
// store new offset already so we don't get stuck on invalid records
|
||||
next_offset.store(record.offset + 1, Ordering::SeqCst);
|
||||
|
||||
let kafka_read_size = record.record.approximate_size();
|
||||
|
||||
let headers =
|
||||
IoxHeaders::from_headers(record.record.headers, trace_collector.as_ref())?;
|
||||
|
||||
let sequence = Sequence {
|
||||
id: *sequencer_id,
|
||||
number: record.offset.try_into()?,
|
||||
};
|
||||
|
||||
let timestamp_millis =
|
||||
i64::try_from(record.record.timestamp.unix_timestamp_nanos() / 1_000_000)?;
|
||||
let timestamp = Time::from_timestamp_millis_opt(timestamp_millis)
|
||||
.ok_or_else::<WriteBufferError, _>(|| {
|
||||
format!(
|
||||
"Cannot parse timestamp for milliseconds: {}",
|
||||
timestamp_millis
|
||||
)
|
||||
.into()
|
||||
})?;
|
||||
|
||||
let value = record
|
||||
.record
|
||||
.value
|
||||
.ok_or_else::<WriteBufferError, _>(|| "Value missing".to_string().into())?;
|
||||
crate::codec::decode(&value, headers, sequence, timestamp, kafka_read_size)
|
||||
});
|
||||
let stream = stream.boxed();
|
||||
|
||||
let partition_client = Arc::clone(&partition.partition_client);
|
||||
let fetch_high_watermark = move || {
|
||||
let partition_client = Arc::clone(&partition_client);
|
||||
let fut = async move {
|
||||
let watermark = partition_client.get_high_watermark().await?;
|
||||
u64::try_from(watermark).map_err(|e| Box::new(e) as WriteBufferError)
|
||||
};
|
||||
|
||||
fut.boxed() as FetchHighWatermarkFut<'_>
|
||||
};
|
||||
let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>;
|
||||
|
||||
streams.insert(
|
||||
*sequencer_id,
|
||||
WriteStream {
|
||||
stream,
|
||||
fetch_high_watermark,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
streams
|
||||
fn sequencer_ids(&self) -> BTreeSet<u32> {
|
||||
self.partition_clients.keys().copied().collect()
|
||||
}
|
||||
|
||||
async fn seek(
|
||||
&mut self,
|
||||
async fn stream_handler(
|
||||
&self,
|
||||
sequencer_id: u32,
|
||||
sequence_number: u64,
|
||||
) -> Result<(), WriteBufferError> {
|
||||
let partition = self
|
||||
.partitions
|
||||
.get_mut(&sequencer_id)
|
||||
) -> Result<Box<dyn WriteBufferStreamHandler>, WriteBufferError> {
|
||||
let partition_client = self
|
||||
.partition_clients
|
||||
.get(&sequencer_id)
|
||||
.ok_or_else::<WriteBufferError, _>(|| {
|
||||
format!("Unknown partition: {}", sequencer_id).into()
|
||||
})?;
|
||||
|
||||
let offset = i64::try_from(sequence_number)?;
|
||||
partition.next_offset.store(offset, Ordering::SeqCst);
|
||||
Ok(Box::new(RSKafkaStreamHandler {
|
||||
partition_client: Arc::clone(partition_client),
|
||||
next_offset: Arc::new(AtomicI64::new(0)),
|
||||
trace_collector: self.trace_collector.clone(),
|
||||
consumer_config: self.consumer_config.clone(),
|
||||
sequencer_id,
|
||||
}))
|
||||
}
|
||||
|
||||
Ok(())
|
||||
async fn fetch_high_watermark(&self, sequencer_id: u32) -> Result<u64, WriteBufferError> {
|
||||
let partition_client = self
|
||||
.partition_clients
|
||||
.get(&sequencer_id)
|
||||
.ok_or_else::<WriteBufferError, _>(|| {
|
||||
format!("Unknown partition: {}", sequencer_id).into()
|
||||
})?;
|
||||
|
||||
let watermark = partition_client.get_high_watermark().await?;
|
||||
u64::try_from(watermark).map_err(WriteBufferError::invalid_data)
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
|
@ -298,7 +293,7 @@ async fn setup_topic(
|
|||
let mut partition_clients = BTreeMap::new();
|
||||
for partition in topic.partitions {
|
||||
let c = client.partition_client(&database_name, partition).await?;
|
||||
let partition = u32::try_from(partition)?;
|
||||
let partition = u32::try_from(partition).map_err(WriteBufferError::invalid_data)?;
|
||||
partition_clients.insert(partition, c);
|
||||
}
|
||||
return Ok(partition_clients);
|
||||
|
@ -340,12 +335,13 @@ mod tests {
|
|||
use dml::{test_util::assert_write_op_eq, DmlDelete, DmlWrite};
|
||||
use futures::{stream::FuturesUnordered, TryStreamExt};
|
||||
use rskafka::{client::partition::Compression, record::Record};
|
||||
use test_helpers::assert_contains;
|
||||
use trace::{ctx::SpanContext, RingBufferTraceCollector};
|
||||
|
||||
use crate::{
|
||||
core::test_utils::{
|
||||
assert_span_context_eq_or_linked, map_pop_first, perform_generic_tests,
|
||||
random_topic_name, set_pop_first, TestAdapter, TestContext,
|
||||
assert_span_context_eq_or_linked, perform_generic_tests, random_topic_name,
|
||||
set_pop_first, TestAdapter, TestContext,
|
||||
},
|
||||
maybe_skip_kafka_integration,
|
||||
};
|
||||
|
@ -506,22 +502,18 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
let mut consumer = ctx.reading(true).await.unwrap();
|
||||
let consumer = ctx.reading(true).await.unwrap();
|
||||
let mut handler = consumer.stream_handler(sequencer_id).await.unwrap();
|
||||
|
||||
// read broken message from stream
|
||||
let mut streams = consumer.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
let err = stream.stream.next().await.unwrap().unwrap_err();
|
||||
assert_eq!(err.to_string(), "No content type header");
|
||||
let mut stream = handler.stream();
|
||||
let err = stream.next().await.unwrap().unwrap_err();
|
||||
assert_contains!(err.to_string(), "No content type header");
|
||||
|
||||
// re-creating the stream should advance past the broken message
|
||||
drop(stream);
|
||||
drop(streams);
|
||||
let mut streams = consumer.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
let op = stream.stream.next().await.unwrap().unwrap();
|
||||
let mut stream = handler.stream();
|
||||
let op = stream.next().await.unwrap().unwrap();
|
||||
assert_write_op_eq(&op, &w);
|
||||
}
|
||||
|
||||
|
@ -564,17 +556,16 @@ mod tests {
|
|||
assert_ne!(w2_1.sequence().unwrap(), w1_1.sequence().unwrap());
|
||||
assert_eq!(w2_1.sequence().unwrap(), w2_2.sequence().unwrap());
|
||||
|
||||
let mut consumer = ctx.reading(true).await.unwrap();
|
||||
let mut streams = consumer.streams();
|
||||
assert_eq!(streams.len(), 1);
|
||||
let (_sequencer_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
let consumer = ctx.reading(true).await.unwrap();
|
||||
let mut handler = consumer.stream_handler(sequencer_id).await.unwrap();
|
||||
let mut stream = handler.stream();
|
||||
|
||||
// get output, note that the write operations were fused
|
||||
let op_w1_12 = stream.stream.next().await.unwrap().unwrap();
|
||||
let op_d1_1 = stream.stream.next().await.unwrap().unwrap();
|
||||
let op_d1_2 = stream.stream.next().await.unwrap().unwrap();
|
||||
let op_w1_34 = stream.stream.next().await.unwrap().unwrap();
|
||||
let op_w2_12 = stream.stream.next().await.unwrap().unwrap();
|
||||
let op_w1_12 = stream.next().await.unwrap().unwrap();
|
||||
let op_d1_1 = stream.next().await.unwrap().unwrap();
|
||||
let op_d1_2 = stream.next().await.unwrap().unwrap();
|
||||
let op_w1_34 = stream.next().await.unwrap().unwrap();
|
||||
let op_w2_12 = stream.next().await.unwrap().unwrap();
|
||||
|
||||
// ensure that sequence numbers map as expected
|
||||
assert_eq!(
|
||||
|
|
|
@ -6,7 +6,7 @@ use std::{
|
|||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::{stream, FutureExt, StreamExt};
|
||||
use futures::{stream::BoxStream, StreamExt};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use data_types::sequence::Sequence;
|
||||
|
@ -15,8 +15,7 @@ use dml::{DmlDelete, DmlMeta, DmlOperation, DmlWrite};
|
|||
use time::TimeProvider;
|
||||
|
||||
use crate::core::{
|
||||
FetchHighWatermark, FetchHighWatermarkFut, WriteBufferError, WriteBufferReading,
|
||||
WriteBufferWriting, WriteStream,
|
||||
WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting,
|
||||
};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
|
@ -344,18 +343,9 @@ impl WriteBufferWriting for MockBufferForWritingThatAlwaysErrors {
|
|||
}
|
||||
}
|
||||
|
||||
/// Sequencer-specific playback state
|
||||
struct PlaybackState {
|
||||
/// Index within the entry vector.
|
||||
vector_index: usize,
|
||||
|
||||
/// Offset within the sequencer IDs.
|
||||
offset: u64,
|
||||
}
|
||||
|
||||
pub struct MockBufferForReading {
|
||||
shared_state: MockBufferSharedState,
|
||||
playback_states: Arc<Mutex<BTreeMap<u32, PlaybackState>>>,
|
||||
n_sequencers: u32,
|
||||
}
|
||||
|
||||
impl MockBufferForReading {
|
||||
|
@ -375,21 +365,10 @@ impl MockBufferForReading {
|
|||
};
|
||||
entries.len() as u32
|
||||
};
|
||||
let playback_states: BTreeMap<_, _> = (0..n_sequencers)
|
||||
.map(|sequencer_id| {
|
||||
(
|
||||
sequencer_id,
|
||||
PlaybackState {
|
||||
vector_index: 0,
|
||||
offset: 0,
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Self {
|
||||
shared_state: state,
|
||||
playback_states: Arc::new(Mutex::new(playback_states)),
|
||||
n_sequencers,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -400,104 +379,106 @@ impl std::fmt::Debug for MockBufferForReading {
|
|||
}
|
||||
}
|
||||
|
||||
/// Sequencer-specific playback state
|
||||
#[derive(Debug)]
|
||||
pub struct MockBufferStreamHandler {
|
||||
/// Shared state.
|
||||
shared_state: MockBufferSharedState,
|
||||
|
||||
/// Own sequencer ID.
|
||||
sequencer_id: u32,
|
||||
|
||||
/// Index within the entry vector.
|
||||
vector_index: usize,
|
||||
|
||||
/// Offset within the sequencer IDs.
|
||||
offset: u64,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WriteBufferReading for MockBufferForReading {
|
||||
fn streams(&mut self) -> BTreeMap<u32, WriteStream<'_>> {
|
||||
let sequencer_ids: Vec<_> = {
|
||||
let playback_states = self.playback_states.lock();
|
||||
playback_states.keys().copied().collect()
|
||||
};
|
||||
impl WriteBufferStreamHandler for MockBufferStreamHandler {
|
||||
fn stream(&mut self) -> BoxStream<'_, Result<DmlOperation, WriteBufferError>> {
|
||||
futures::stream::poll_fn(|cx| {
|
||||
let mut guard = self.shared_state.writes.lock();
|
||||
let writes = guard.as_mut().unwrap();
|
||||
let writes_vec = writes.get_mut(&self.sequencer_id).unwrap();
|
||||
|
||||
let mut streams = BTreeMap::new();
|
||||
for sequencer_id in sequencer_ids {
|
||||
let shared_state = self.shared_state.clone();
|
||||
let playback_states = Arc::clone(&self.playback_states);
|
||||
let entries = &writes_vec.writes;
|
||||
while entries.len() > self.vector_index {
|
||||
let write_result = &entries[self.vector_index];
|
||||
|
||||
let stream = stream::poll_fn(move |cx| {
|
||||
let mut guard = shared_state.writes.lock();
|
||||
let writes = guard.as_mut().unwrap();
|
||||
let writes_vec = writes.get_mut(&sequencer_id).unwrap();
|
||||
// consume entry
|
||||
self.vector_index += 1;
|
||||
|
||||
let mut playback_states = playback_states.lock();
|
||||
let playback_state = playback_states.get_mut(&sequencer_id).unwrap();
|
||||
|
||||
let entries = &writes_vec.writes;
|
||||
while entries.len() > playback_state.vector_index {
|
||||
let write_result = &entries[playback_state.vector_index];
|
||||
|
||||
// consume entry
|
||||
playback_state.vector_index += 1;
|
||||
|
||||
match write_result {
|
||||
Ok(write) => {
|
||||
// found an entry => need to check if it is within the offset
|
||||
let sequence = write.meta().sequence().unwrap();
|
||||
if sequence.number >= playback_state.offset {
|
||||
// within offset => return entry to caller
|
||||
return Poll::Ready(Some(Ok(write.clone())));
|
||||
} else {
|
||||
// offset is larger then the current entry => ignore entry and try next
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// found an error => return entry to caller
|
||||
return Poll::Ready(Some(Err(e.to_string().into())));
|
||||
match write_result {
|
||||
Ok(write) => {
|
||||
// found an entry => need to check if it is within the offset
|
||||
let sequence = write.meta().sequence().unwrap();
|
||||
if sequence.number >= self.offset {
|
||||
// within offset => return entry to caller
|
||||
return Poll::Ready(Some(Ok(write.clone())));
|
||||
} else {
|
||||
// offset is larger then the current entry => ignore entry and try next
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// found an error => return entry to caller
|
||||
return Poll::Ready(Some(Err(e.to_string().into())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we are at the end of the recorded entries => report pending
|
||||
writes_vec.register_waker(cx.waker());
|
||||
Poll::Pending
|
||||
})
|
||||
.boxed();
|
||||
|
||||
let shared_state = self.shared_state.clone();
|
||||
|
||||
let fetch_high_watermark = move || {
|
||||
let shared_state = shared_state.clone();
|
||||
|
||||
let fut = async move {
|
||||
let guard = shared_state.writes.lock();
|
||||
let entries = guard.as_ref().unwrap();
|
||||
let entry_vec = entries.get(&sequencer_id).unwrap();
|
||||
let watermark = entry_vec.max_seqno.map(|n| n + 1).unwrap_or(0);
|
||||
|
||||
Ok(watermark)
|
||||
};
|
||||
fut.boxed() as FetchHighWatermarkFut<'_>
|
||||
};
|
||||
let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>;
|
||||
|
||||
streams.insert(
|
||||
sequencer_id,
|
||||
WriteStream {
|
||||
stream,
|
||||
fetch_high_watermark,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
streams
|
||||
// we are at the end of the recorded entries => report pending
|
||||
writes_vec.register_waker(cx.waker());
|
||||
Poll::Pending
|
||||
})
|
||||
.boxed()
|
||||
}
|
||||
|
||||
async fn seek(
|
||||
&mut self,
|
||||
sequencer_id: u32,
|
||||
sequence_number: u64,
|
||||
) -> Result<(), WriteBufferError> {
|
||||
let mut playback_states = self.playback_states.lock();
|
||||
async fn seek(&mut self, sequence_number: u64) -> Result<(), WriteBufferError> {
|
||||
self.offset = sequence_number;
|
||||
|
||||
if let Some(playback_state) = playback_states.get_mut(&sequencer_id) {
|
||||
playback_state.offset = sequence_number;
|
||||
|
||||
// reset position to start since seeking might go backwards
|
||||
playback_state.vector_index = 0;
|
||||
}
|
||||
// reset position to start since seeking might go backwards
|
||||
self.vector_index = 0;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WriteBufferReading for MockBufferForReading {
|
||||
fn sequencer_ids(&self) -> BTreeSet<u32> {
|
||||
(0..self.n_sequencers).into_iter().collect()
|
||||
}
|
||||
async fn stream_handler(
|
||||
&self,
|
||||
sequencer_id: u32,
|
||||
) -> Result<Box<dyn WriteBufferStreamHandler>, WriteBufferError> {
|
||||
if sequencer_id >= self.n_sequencers {
|
||||
return Err(format!("Unknown sequencer: {}", sequencer_id).into());
|
||||
}
|
||||
|
||||
Ok(Box::new(MockBufferStreamHandler {
|
||||
shared_state: self.shared_state.clone(),
|
||||
sequencer_id,
|
||||
vector_index: 0,
|
||||
offset: 0,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn fetch_high_watermark(&self, sequencer_id: u32) -> Result<u64, WriteBufferError> {
|
||||
let guard = self.shared_state.writes.lock();
|
||||
let entries = guard.as_ref().unwrap();
|
||||
let entry_vec = entries
|
||||
.get(&sequencer_id)
|
||||
.ok_or_else::<WriteBufferError, _>(|| {
|
||||
format!("Unknown sequencer: {}", sequencer_id).into()
|
||||
})?;
|
||||
let watermark = entry_vec.max_seqno.map(|n| n + 1).unwrap_or(0);
|
||||
|
||||
Ok(watermark)
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"mock"
|
||||
|
@ -507,40 +488,42 @@ impl WriteBufferReading for MockBufferForReading {
|
|||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct MockBufferForReadingThatAlwaysErrors;
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct MockStreamHandlerThatAlwaysErrors;
|
||||
|
||||
#[async_trait]
|
||||
impl WriteBufferReading for MockBufferForReadingThatAlwaysErrors {
|
||||
fn streams(&mut self) -> BTreeMap<u32, WriteStream<'_>> {
|
||||
let stream = stream::poll_fn(|_ctx| {
|
||||
impl WriteBufferStreamHandler for MockStreamHandlerThatAlwaysErrors {
|
||||
fn stream(&mut self) -> BoxStream<'_, Result<DmlOperation, WriteBufferError>> {
|
||||
futures::stream::poll_fn(|_cx| {
|
||||
Poll::Ready(Some(Err(String::from(
|
||||
"Something bad happened while reading from stream",
|
||||
)
|
||||
.into())))
|
||||
})
|
||||
.boxed();
|
||||
let fetch_high_watermark = move || {
|
||||
let fut = async move {
|
||||
Err(String::from("Something bad happened while fetching the high watermark").into())
|
||||
};
|
||||
fut.boxed() as FetchHighWatermarkFut<'_>
|
||||
};
|
||||
let fetch_high_watermark = Box::new(fetch_high_watermark) as FetchHighWatermark<'_>;
|
||||
IntoIterator::into_iter([(
|
||||
0,
|
||||
WriteStream {
|
||||
stream,
|
||||
fetch_high_watermark,
|
||||
},
|
||||
)])
|
||||
.collect()
|
||||
.boxed()
|
||||
}
|
||||
|
||||
async fn seek(
|
||||
&mut self,
|
||||
_sequencer_id: u32,
|
||||
_sequence_number: u64,
|
||||
) -> Result<(), WriteBufferError> {
|
||||
async fn seek(&mut self, _sequence_number: u64) -> Result<(), WriteBufferError> {
|
||||
Err(String::from("Something bad happened while seeking the stream").into())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WriteBufferReading for MockBufferForReadingThatAlwaysErrors {
|
||||
fn sequencer_ids(&self) -> BTreeSet<u32> {
|
||||
BTreeSet::from([0])
|
||||
}
|
||||
|
||||
async fn stream_handler(
|
||||
&self,
|
||||
_sequencer_id: u32,
|
||||
) -> Result<Box<dyn WriteBufferStreamHandler>, WriteBufferError> {
|
||||
Ok(Box::new(MockStreamHandlerThatAlwaysErrors {}))
|
||||
}
|
||||
|
||||
async fn fetch_high_watermark(&self, _sequencer_id: u32) -> Result<u64, WriteBufferError> {
|
||||
Err(String::from("Something bad happened while fetching the high watermark").into())
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"mock_failing"
|
||||
|
@ -552,11 +535,13 @@ mod tests {
|
|||
use std::convert::TryFrom;
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::StreamExt;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use test_helpers::assert_contains;
|
||||
use time::TimeProvider;
|
||||
use trace::RingBufferTraceCollector;
|
||||
|
||||
use crate::core::test_utils::{map_pop_first, perform_generic_tests, TestAdapter, TestContext};
|
||||
use crate::core::test_utils::{perform_generic_tests, TestAdapter, TestContext};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -739,26 +724,34 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_always_error_read() {
|
||||
let mut reader = MockBufferForReadingThatAlwaysErrors {};
|
||||
let reader = MockBufferForReadingThatAlwaysErrors {};
|
||||
|
||||
assert_eq!(
|
||||
reader.seek(0, 0).await.unwrap_err().to_string(),
|
||||
"Something bad happened while seeking the stream"
|
||||
);
|
||||
|
||||
let mut streams = reader.streams();
|
||||
let (_id, mut stream) = map_pop_first(&mut streams).unwrap();
|
||||
assert_eq!(
|
||||
stream.stream.next().await.unwrap().unwrap_err().to_string(),
|
||||
"Something bad happened while reading from stream"
|
||||
);
|
||||
assert_eq!(
|
||||
(stream.fetch_high_watermark)()
|
||||
assert_contains!(
|
||||
reader
|
||||
.fetch_high_watermark(0)
|
||||
.await
|
||||
.unwrap_err()
|
||||
.to_string(),
|
||||
"Something bad happened while fetching the high watermark"
|
||||
);
|
||||
|
||||
let mut stream_handler = reader.stream_handler(0).await.unwrap();
|
||||
|
||||
assert_contains!(
|
||||
stream_handler.seek(0).await.unwrap_err().to_string(),
|
||||
"Something bad happened while seeking the stream"
|
||||
);
|
||||
|
||||
assert_contains!(
|
||||
stream_handler
|
||||
.stream()
|
||||
.next()
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap_err()
|
||||
.to_string(),
|
||||
"Something bad happened while reading from stream"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -768,7 +761,7 @@ mod tests {
|
|||
let tables = lines_to_batches("upc user=1 100", 0).unwrap();
|
||||
let operation = DmlOperation::Write(DmlWrite::new("test_db", tables, Default::default()));
|
||||
|
||||
assert_eq!(
|
||||
assert_contains!(
|
||||
writer
|
||||
.store_operation(0, &operation)
|
||||
.await
|
||||
|
@ -823,19 +816,20 @@ mod tests {
|
|||
|
||||
state.push_lp(Sequence::new(0, 0), "mem foo=1 10");
|
||||
|
||||
let mut read = MockBufferForReading::new(state.clone(), None).unwrap();
|
||||
let playback_state = Arc::clone(&read.playback_states);
|
||||
let read = MockBufferForReading::new(state.clone(), None).unwrap();
|
||||
|
||||
let barrier = Arc::new(tokio::sync::Barrier::new(2));
|
||||
let barrier_captured = Arc::clone(&barrier);
|
||||
let consumer = tokio::spawn(async move {
|
||||
let mut stream = map_pop_first(&mut read.streams()).unwrap().1.stream;
|
||||
let mut stream_handler = read.stream_handler(0).await.unwrap();
|
||||
let mut stream = stream_handler.stream();
|
||||
stream.next().await.unwrap().unwrap();
|
||||
barrier_captured.wait().await;
|
||||
stream.next().await.unwrap().unwrap();
|
||||
});
|
||||
|
||||
// Wait for consumer to read first entry
|
||||
while playback_state.lock().get(&0).unwrap().vector_index < 1 {
|
||||
tokio::time::sleep(Duration::from_millis(1)).await;
|
||||
}
|
||||
barrier.wait().await;
|
||||
|
||||
state.push_lp(Sequence::new(0, 1), "mem foo=2 20");
|
||||
|
||||
|
|
Loading…
Reference in New Issue