chore: merge main into branch
Merge branch 'main' into ntran/optimize_column_selectionpull/24376/head
commit
18de3bdcab
|
@ -86,7 +86,7 @@ jobs:
|
|||
# out for parallel CI runs!
|
||||
#
|
||||
# To change the contents of the build container, modify docker/Dockerfile.ci
|
||||
# To change the final release container, modify docker/Dockerfile.perf
|
||||
# To change the final release container, modify docker/Dockerfile.iox
|
||||
perf_image:
|
||||
docker:
|
||||
- image: quay.io/influxdb/rust:ci
|
||||
|
@ -105,7 +105,7 @@ jobs:
|
|||
echo "$QUAY_PASS" | docker login quay.io --username $QUAY_USER --password-stdin
|
||||
- run: |
|
||||
BRANCH=$(git rev-parse --abbrev-ref HEAD | tr '/' '.')
|
||||
docker build -t quay.io/influxdb/fusion:$BRANCH -f docker/Dockerfile.perf .
|
||||
docker build -t quay.io/influxdb/fusion:$BRANCH -f docker/Dockerfile.iox .
|
||||
docker push quay.io/influxdb/fusion:$BRANCH
|
||||
echo "export BRANCH=${BRANCH}" >> $BASH_ENV
|
||||
- run:
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
# Ignore everything
|
||||
**
|
||||
# Except
|
||||
!target/release/influxdb_iox
|
|
@ -71,7 +71,7 @@ jobs:
|
|||
args: --workspace
|
||||
|
||||
lints:
|
||||
name: Lints
|
||||
name: Rust Lints
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: quay.io/influxdb/rust:ci
|
||||
|
@ -91,3 +91,13 @@ jobs:
|
|||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
args: --all-targets --workspace -- -D warnings
|
||||
|
||||
protobuf:
|
||||
name: Protobuf Lints
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: bufbuild/buf
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Lint IOx protobuf
|
||||
run: buf lint
|
||||
|
|
|
@ -101,7 +101,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
|||
[[package]]
|
||||
name = "arrow"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
|
||||
dependencies = [
|
||||
"cfg_aliases",
|
||||
"chrono",
|
||||
|
@ -124,7 +124,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "arrow-flight"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"bytes",
|
||||
|
@ -411,9 +411,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.6.0"
|
||||
version = "3.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "099e596ef14349721d9016f6b80dd3419ea1bf289ab9b44df8e4dfd3a005d5d9"
|
||||
checksum = "63396b8a4b9de3f4fdfb320ab6080762242f66a8ef174c49d8e19b674db4cdbe"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
|
@ -438,9 +438,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.66"
|
||||
version = "1.0.67"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
|
||||
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
@ -488,9 +488,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.1.0"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cb92721cb37482245ed88428f72253ce422b3b4ee169c70a0642521bb5db4cc"
|
||||
checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
|
@ -669,9 +669,9 @@ dependencies = [
|
|||
"cfg-if 1.0.0",
|
||||
"crossbeam-channel 0.5.0",
|
||||
"crossbeam-deque 0.8.0",
|
||||
"crossbeam-epoch 0.9.1",
|
||||
"crossbeam-epoch 0.9.2",
|
||||
"crossbeam-queue 0.3.1",
|
||||
"crossbeam-utils 0.8.1",
|
||||
"crossbeam-utils 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -691,7 +691,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils 0.8.1",
|
||||
"crossbeam-utils 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -712,8 +712,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-epoch 0.9.1",
|
||||
"crossbeam-utils 0.8.1",
|
||||
"crossbeam-epoch 0.9.2",
|
||||
"crossbeam-utils 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -733,14 +733,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.1"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
|
||||
checksum = "d60ab4a8dba064f2fbb5aa270c28da5cf4bbd0e72dae1140a6b0353a779dbe00"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"const_fn",
|
||||
"crossbeam-utils 0.8.1",
|
||||
"crossbeam-utils 0.8.2",
|
||||
"lazy_static",
|
||||
"loom",
|
||||
"memoffset 0.6.1",
|
||||
"scopeguard",
|
||||
]
|
||||
|
@ -763,7 +763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils 0.8.1",
|
||||
"crossbeam-utils 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -779,13 +779,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.1"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
|
||||
checksum = "bae8f328835f8f5a6ceb6a7842a7f2d0c03692adb5c889347235d59194731fe3"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"lazy_static",
|
||||
"loom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -850,7 +851,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
|
||||
dependencies = [
|
||||
"ahash 0.7.0",
|
||||
"arrow",
|
||||
|
@ -871,6 +872,7 @@ dependencies = [
|
|||
"sha2",
|
||||
"sqlparser 0.8.0",
|
||||
"tokio",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1115,9 +1117,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
|||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
|
||||
checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191"
|
||||
dependencies = [
|
||||
"matches",
|
||||
"percent-encoding",
|
||||
|
@ -1135,9 +1137,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150"
|
||||
checksum = "7f55667319111d593ba876406af7c409c0ebb44dc4be6132a783ccf163ea14c1"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -1150,9 +1152,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846"
|
||||
checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
|
@ -1160,15 +1162,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65"
|
||||
checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9"
|
||||
checksum = "891a4b7b96d84d5940084b2a37632dd65deeae662c114ceaa2c879629c9c0ad1"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
|
@ -1177,15 +1179,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500"
|
||||
checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd"
|
||||
checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
|
@ -1195,24 +1197,21 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6"
|
||||
checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"
|
||||
|
||||
[[package]]
|
||||
name = "futures-test"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b30f48f6b9cd26d8739965d6e3345c511718884fb223795b80dc71d24a9ea9a"
|
||||
checksum = "f1fe5e51002528907757d5f1648101086f7197f792112db43ba23b06b09e6bce"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
|
@ -1220,16 +1219,15 @@ dependencies = [
|
|||
"futures-sink",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
"once_cell",
|
||||
"pin-project 1.0.5",
|
||||
"pin-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b"
|
||||
checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -1259,6 +1257,19 @@ dependencies = [
|
|||
"tonic-build",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generator"
|
||||
version = "0.6.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9fed24fd1e18827652b4d55652899a1e9da8e54d91624dc3437a5bc3a9f9a9c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"log",
|
||||
"rustversion",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.4"
|
||||
|
@ -1493,9 +1504,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.2.1"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de910d521f7cc3135c4de8db1cb910e0b5ed1dc6f57c381cd07e8e661ce10094"
|
||||
checksum = "89829a5d69c23d348314a7ac337fe39173b61149a9864deabd260983aed48c21"
|
||||
dependencies = [
|
||||
"matches",
|
||||
"unicode-bidi",
|
||||
|
@ -1579,6 +1590,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic",
|
||||
"tonic-health",
|
||||
"tracing",
|
||||
"tracing-futures",
|
||||
"tracing-opentelemetry",
|
||||
|
@ -1593,6 +1605,7 @@ dependencies = [
|
|||
"arrow_deps",
|
||||
"data_types",
|
||||
"futures-util",
|
||||
"generated_types",
|
||||
"rand 0.8.3",
|
||||
"reqwest",
|
||||
"serde",
|
||||
|
@ -1754,9 +1767,9 @@ checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c"
|
|||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.6.7"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883"
|
||||
checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"winapi",
|
||||
|
@ -1803,6 +1816,17 @@ dependencies = [
|
|||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "loom"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d44c73b4636e497b4917eb21c33539efa3816741a2d3ff26c6316f1b529481a4"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"generator",
|
||||
"scoped-tls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4"
|
||||
version = "1.23.2"
|
||||
|
@ -1919,9 +1943,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.7.7"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e50ae3f04d169fcc9bde0b547d1c205219b7157e07ded9c5aff03e0637cb3ed7"
|
||||
checksum = "a5dede4e2065b3842b8b0af444119f3aa331cc7cc2dd20388bfb0f5d5a38823a"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
|
@ -2190,9 +2214,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.5.2"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
|
||||
checksum = "10acf907b94fc1b1a152d08ef97e7759650268cf986bf127f387e602b02c7e5a"
|
||||
dependencies = [
|
||||
"parking_lot",
|
||||
]
|
||||
|
@ -2351,7 +2375,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "parquet"
|
||||
version = "4.0.0-SNAPSHOT"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
|
||||
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"base64 0.12.3",
|
||||
|
@ -2798,7 +2822,7 @@ checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a"
|
|||
dependencies = [
|
||||
"crossbeam-channel 0.5.0",
|
||||
"crossbeam-deque 0.8.0",
|
||||
"crossbeam-utils 0.8.1",
|
||||
"crossbeam-utils 0.8.2",
|
||||
"lazy_static",
|
||||
"num_cpus",
|
||||
]
|
||||
|
@ -2896,9 +2920,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.0"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd281b1030aa675fb90aa994d07187645bb3c8fc756ca766e7c3070b439de9de"
|
||||
checksum = "0460542b551950620a3648c6aa23318ac6b3cd779114bd873209e6e8b5eb1c34"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"base64 0.13.0",
|
||||
|
@ -3053,7 +3077,7 @@ dependencies = [
|
|||
"base64 0.13.0",
|
||||
"blake2b_simd",
|
||||
"constant_time_eq",
|
||||
"crossbeam-utils 0.8.1",
|
||||
"crossbeam-utils 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3102,6 +3126,12 @@ dependencies = [
|
|||
"security-framework",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb5d2a036dc6d2d8fd16fde3498b04306e29bd193bf306a57427019b823d5acd"
|
||||
|
||||
[[package]]
|
||||
name = "rustyline"
|
||||
version = "7.1.0"
|
||||
|
@ -3148,6 +3178,12 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scoped-tls"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
|
@ -3166,9 +3202,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "2.0.0"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1759c2e3c8580017a484a7ac56d3abc5a6c1feadf88db2f3633f12ae4268c69"
|
||||
checksum = "c6af1b6204f89cf0069736daf8b852573e3bc34898eee600e95d3dd855c12e81"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"core-foundation",
|
||||
|
@ -3179,9 +3215,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "security-framework-sys"
|
||||
version = "2.0.0"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f99b9d5e26d2a71633cc4f2ebae7cc9f874044e0c351a27e17892d76dce5678b"
|
||||
checksum = "31531d257baab426203cf81c5ce1b0b55159dda7ed602ac81b582ccd62265741"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
|
@ -3633,18 +3669,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.23"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
|
||||
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.23"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
|
||||
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -3873,6 +3909,21 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic-health"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a93d6649c8f5436d65337af08887a516183a096d785ef1fc3acf69ed60dbec6b"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"bytes",
|
||||
"prost",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.4.5"
|
||||
|
@ -3907,9 +3958,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
|
|||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.23"
|
||||
version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7d40a22fd029e33300d8d89a5cc8ffce18bb7c587662f54629e94c9de5487f3"
|
||||
checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"log",
|
||||
|
@ -3920,9 +3971,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.12"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43f080ea7e4107844ef4766459426fa2d5c1ada2e47edba05dc7fa99d9629f47"
|
||||
checksum = "a8a9bd1db7706f2373a190b0d067146caa39350c486f3d455b0e33b431f94c07"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -3940,19 +3991,19 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-futures"
|
||||
version = "0.2.4"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab7bb6f14721aa00656086e9335d363c5c8747bae02ebe32ea2c7dece5689b4c"
|
||||
checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
|
||||
dependencies = [
|
||||
"pin-project 0.4.27",
|
||||
"pin-project 1.0.5",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e0f8c7178e13481ff6765bd169b33e8d554c5d2bbede5e32c356194be02b9b9"
|
||||
checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"log",
|
||||
|
@ -3984,9 +4035,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.2.15"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1fa8f0c8f4c594e4fc9debc1990deab13238077271ba84dd853d54902ee3401"
|
||||
checksum = "8ab8966ac3ca27126141f7999361cc97dd6fb4b71da04c02044fa9045d98bb96"
|
||||
dependencies = [
|
||||
"ansi_term 0.12.1",
|
||||
"chrono",
|
||||
|
@ -4067,9 +4118,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
|||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.2.0"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
|
||||
checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b"
|
||||
dependencies = [
|
||||
"form_urlencoded",
|
||||
"idna",
|
||||
|
|
|
@ -81,6 +81,7 @@ structopt = "0.3.21"
|
|||
tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "parking_lot"] }
|
||||
tokio-stream = { version = "0.1.2", features = ["net"] }
|
||||
tonic = "0.4.0"
|
||||
tonic-health = "0.3.0"
|
||||
tracing = { version = "0.1", features = ["release_max_level_debug"] }
|
||||
tracing-futures = "0.2.4"
|
||||
tracing-opentelemetry = "0.11.0"
|
||||
|
|
40
README.md
40
README.md
|
@ -176,6 +176,30 @@ The server will, by default, start an HTTP API server on port `8080` and a gRPC
|
|||
|
||||
### Writing and Reading Data
|
||||
|
||||
Each IOx instance requires a writer ID.
|
||||
This can be set three ways:
|
||||
- set an environment variable `INFLUXDB_IOX_ID=42`
|
||||
- set a flag `--writer-id 42`
|
||||
- send an HTTP PUT request:
|
||||
```
|
||||
curl --request PUT \
|
||||
--url http://localhost:8080/iox/api/v1/id \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"id": 42
|
||||
}'
|
||||
```
|
||||
|
||||
To write data, you need a destination database.
|
||||
This is set via HTTP PUT, identifying the database by org `company` and bucket `sensors`:
|
||||
```
|
||||
curl --request PUT \
|
||||
--url http://localhost:8080/iox/api/v1/databases/company_sensors \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
}'
|
||||
```
|
||||
|
||||
Data can be stored in InfluxDB IOx by sending it in [line protocol] format to the `/api/v2/write`
|
||||
endpoint. Data is stored by organization and bucket names. Here's an example using [`curl`] with
|
||||
the organization name `company` and the bucket name `sensors` that will send the data in the
|
||||
|
@ -196,6 +220,22 @@ all data in the `company` organization's `sensors` bucket for the `processes` me
|
|||
curl -v -G -d 'org=company' -d 'bucket=sensors' --data-urlencode 'sql_query=select * from processes' "http://127.0.0.1:8080/api/v2/read"
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
|
||||
The HTTP API exposes a healthcheck endpoint at `/health`
|
||||
|
||||
```shell
|
||||
$ curl http://127.0.0.1:8080/health
|
||||
OK
|
||||
```
|
||||
|
||||
The gRPC API implements the [gRPC Health Checking Protocol](https://github.com/grpc/grpc/blob/master/doc/health-checking.md). This can be tested with [grpc-health-probe](https://github.com/grpc-ecosystem/grpc-health-probe)
|
||||
|
||||
```shell
|
||||
$ grpc_health_probe -addr 127.0.0.1:8082 -service influxdata.platform.storage.Storage
|
||||
status: SERVING
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome community contributions from anyone!
|
||||
|
|
|
@ -8,11 +8,11 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
|
|||
[dependencies] # In alphabetical order
|
||||
# We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev
|
||||
|
||||
# The version can be found here: https://github.com/apache/arrow/commit/ad4504e8e85eb8e5babe0f01ca8cf9947499fc40
|
||||
# The version can be found here: https://github.com/apache/arrow/commit/b5ac048c75cc55f4039d279f554920be3112d7cd
|
||||
#
|
||||
arrow = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" , features = ["simd"] }
|
||||
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" }
|
||||
datafusion = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" }
|
||||
arrow = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" , features = ["simd"] }
|
||||
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" }
|
||||
datafusion = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" }
|
||||
# Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
|
||||
# and we're not currently using it anyway
|
||||
parquet = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
|
||||
parquet = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
version: v1beta1
|
||||
build:
|
||||
roots:
|
||||
- generated_types/protos/
|
||||
excludes:
|
||||
- generated_types/protos/com
|
||||
- generated_types/protos/influxdata/platform
|
||||
- generated_types/protos/grpc
|
||||
|
||||
lint:
|
||||
use:
|
||||
- DEFAULT
|
||||
- STYLE_DEFAULT
|
||||
|
||||
breaking:
|
||||
use:
|
||||
- WIRE
|
||||
- WIRE_JSON
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,112 @@
|
|||
//! A collection of extension traits for types that
|
||||
//! implement TryInto<U, Error=FieldViolation>
|
||||
//!
|
||||
//! Allows associating field context with the generated errors
|
||||
//! as they propagate up the struct topology
|
||||
|
||||
use generated_types::google::FieldViolation;
|
||||
use std::convert::TryInto;
|
||||
|
||||
/// An extension trait that adds the method `scope` to any type
|
||||
/// implementing `TryInto<U, Error = FieldViolation>`
|
||||
pub(crate) trait FromField<T> {
|
||||
fn scope(self, field: impl Into<String>) -> Result<T, FieldViolation>;
|
||||
}
|
||||
|
||||
impl<T, U> FromField<U> for T
|
||||
where
|
||||
T: TryInto<U, Error = FieldViolation>,
|
||||
{
|
||||
/// Try to convert type using TryInto calling `FieldViolation::scope`
|
||||
/// on any returned error
|
||||
fn scope(self, field: impl Into<String>) -> Result<U, FieldViolation> {
|
||||
self.try_into().map_err(|e| e.scope(field))
|
||||
}
|
||||
}
|
||||
|
||||
/// An extension trait that adds the methods `optional` and `required` to any
|
||||
/// Option containing a type implementing `TryInto<U, Error = FieldViolation>`
|
||||
pub(crate) trait FromFieldOpt<T> {
|
||||
/// Try to convert inner type, if any, using TryInto calling
|
||||
/// `FieldViolation::scope` on any error encountered
|
||||
///
|
||||
/// Returns None if empty
|
||||
fn optional(self, field: impl Into<String>) -> Result<Option<T>, FieldViolation>;
|
||||
|
||||
/// Try to convert inner type, using TryInto calling `FieldViolation::scope`
|
||||
/// on any error encountered
|
||||
///
|
||||
/// Returns an error if empty
|
||||
fn required(self, field: impl Into<String>) -> Result<T, FieldViolation>;
|
||||
}
|
||||
|
||||
impl<T, U> FromFieldOpt<U> for Option<T>
|
||||
where
|
||||
T: TryInto<U, Error = FieldViolation>,
|
||||
{
|
||||
fn optional(self, field: impl Into<String>) -> Result<Option<U>, FieldViolation> {
|
||||
self.map(|t| t.scope(field)).transpose()
|
||||
}
|
||||
|
||||
fn required(self, field: impl Into<String>) -> Result<U, FieldViolation> {
|
||||
match self {
|
||||
None => Err(FieldViolation::required(field)),
|
||||
Some(t) => t.scope(field),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An extension trait that adds the methods `optional` and `required` to any
|
||||
/// String
|
||||
///
|
||||
/// Prost will default string fields to empty, whereas IOx sometimes
|
||||
/// uses Option<String>, this helper aids mapping between them
|
||||
///
|
||||
/// TODO: Review mixed use of Option<String> and String in IOX
|
||||
pub(crate) trait FromFieldString {
|
||||
/// Returns a Ok if the String is not empty
|
||||
fn required(self, field: impl Into<String>) -> Result<String, FieldViolation>;
|
||||
|
||||
/// Wraps non-empty strings in Some(_), returns None for empty strings
|
||||
fn optional(self) -> Option<String>;
|
||||
}
|
||||
|
||||
impl FromFieldString for String {
|
||||
fn required(self, field: impl Into<String>) -> Result<String, FieldViolation> {
|
||||
if self.is_empty() {
|
||||
return Err(FieldViolation::required(field));
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn optional(self) -> Option<String> {
|
||||
if self.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// An extension trait that adds the method `vec_field` to any Vec of a type
|
||||
/// implementing `TryInto<U, Error = FieldViolation>`
|
||||
pub(crate) trait FromFieldVec<T> {
|
||||
/// Converts to a `Vec<U>`, short-circuiting on the first error and
|
||||
/// returning a correctly scoped `FieldViolation` for where the error
|
||||
/// was encountered
|
||||
fn vec_field(self, field: impl Into<String>) -> Result<T, FieldViolation>;
|
||||
}
|
||||
|
||||
impl<T, U> FromFieldVec<Vec<U>> for Vec<T>
|
||||
where
|
||||
T: TryInto<U, Error = FieldViolation>,
|
||||
{
|
||||
fn vec_field(self, field: impl Into<String>) -> Result<Vec<U>, FieldViolation> {
|
||||
let res: Result<_, _> = self
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, t)| t.scope(i.to_string()))
|
||||
.collect();
|
||||
|
||||
res.map_err(|e| e.scope(field))
|
||||
}
|
||||
}
|
|
@ -32,3 +32,5 @@ pub mod wal;
|
|||
|
||||
mod database_name;
|
||||
pub use database_name::*;
|
||||
|
||||
pub(crate) mod field_validation;
|
||||
|
|
|
@ -299,10 +299,44 @@ impl Schema {
|
|||
/// Returns an iterator of (Option<InfluxColumnType>, &Field) for
|
||||
/// all the columns of this schema, in order
|
||||
pub fn iter(&self) -> SchemaIter<'_> {
|
||||
SchemaIter {
|
||||
schema: self,
|
||||
idx: 0,
|
||||
}
|
||||
SchemaIter::new(self)
|
||||
}
|
||||
|
||||
/// Returns an iterator of `&Field` for all the tag columns of
|
||||
/// this schema, in order
|
||||
pub fn tags_iter(&self) -> impl Iterator<Item = &ArrowField> {
|
||||
self.iter().filter_map(|(influx_column_type, field)| {
|
||||
if matches!(influx_column_type, Some(InfluxColumnType::Tag)) {
|
||||
Some(field)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns an iterator of `&Field` for all the field columns of
|
||||
/// this schema, in order
|
||||
pub fn fields_iter(&self) -> impl Iterator<Item = &ArrowField> {
|
||||
self.iter().filter_map(|(influx_column_type, field)| {
|
||||
if matches!(influx_column_type, Some(InfluxColumnType::Field(_))) {
|
||||
Some(field)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns an iterator of `&Field` for all the timestamp columns
|
||||
/// of this schema, in order. At the time of writing there should
|
||||
/// be only one or 0 such columns
|
||||
pub fn time_iter(&self) -> impl Iterator<Item = &ArrowField> {
|
||||
self.iter().filter_map(|(influx_column_type, field)| {
|
||||
if matches!(influx_column_type, Some(InfluxColumnType::Timestamp)) {
|
||||
Some(field)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Merges any new columns from new_schema, consuming self. If the
|
||||
|
@ -573,6 +607,12 @@ pub struct SchemaIter<'a> {
|
|||
idx: usize,
|
||||
}
|
||||
|
||||
impl<'a> SchemaIter<'a> {
|
||||
fn new(schema: &'a Schema) -> Self {
|
||||
Self { schema, idx: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for SchemaIter<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "SchemaIter<{}>", self.idx)
|
||||
|
@ -829,15 +869,47 @@ mod test {
|
|||
}
|
||||
}
|
||||
|
||||
/// Build an empty iterator
|
||||
fn empty_schema() -> Schema {
|
||||
SchemaBuilder::new().build().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iter_empty() {
|
||||
assert_eq!(empty_schema().iter().count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_iter_empty() {
|
||||
assert_eq!(empty_schema().tags_iter().count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fields_iter_empty() {
|
||||
assert_eq!(empty_schema().fields_iter().count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_iter_empty() {
|
||||
assert_eq!(empty_schema().time_iter().count(), 0);
|
||||
}
|
||||
|
||||
/// Build a schema for testing iterators
|
||||
fn iter_schema() -> Schema {
|
||||
SchemaBuilder::new()
|
||||
.influx_field("field1", Float)
|
||||
.tag("tag1")
|
||||
.timestamp()
|
||||
.influx_field("field2", String)
|
||||
.influx_field("field3", String)
|
||||
.tag("tag2")
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iter() {
|
||||
let schema = SchemaBuilder::new()
|
||||
.influx_field("the_field", String)
|
||||
.tag("the_tag")
|
||||
.timestamp()
|
||||
.measurement("the_measurement")
|
||||
.build()
|
||||
.unwrap();
|
||||
let schema = iter_schema();
|
||||
|
||||
// test schema iterator and field accessor match up
|
||||
for (i, (iter_col_type, iter_field)) in schema.iter().enumerate() {
|
||||
|
@ -845,7 +917,40 @@ mod test {
|
|||
assert_eq!(iter_col_type, col_type);
|
||||
assert_eq!(iter_field, field);
|
||||
}
|
||||
assert_eq!(schema.iter().count(), 3);
|
||||
assert_eq!(schema.iter().count(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tags_iter() {
|
||||
let schema = iter_schema();
|
||||
|
||||
let mut iter = schema.tags_iter();
|
||||
assert_eq!(iter.next().unwrap().name(), "tag1");
|
||||
assert_eq!(iter.next().unwrap().name(), "tag2");
|
||||
assert_eq!(iter.next(), None);
|
||||
assert_eq!(iter.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fields_iter() {
|
||||
let schema = iter_schema();
|
||||
|
||||
let mut iter = schema.fields_iter();
|
||||
assert_eq!(iter.next().unwrap().name(), "field1");
|
||||
assert_eq!(iter.next().unwrap().name(), "field2");
|
||||
assert_eq!(iter.next().unwrap().name(), "field3");
|
||||
assert_eq!(iter.next(), None);
|
||||
assert_eq!(iter.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_iter() {
|
||||
let schema = iter_schema();
|
||||
|
||||
let mut iter = schema.time_iter();
|
||||
assert_eq!(iter.next().unwrap().name(), "time");
|
||||
assert_eq!(iter.next(), None);
|
||||
assert_eq!(iter.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
###
|
||||
# Dockerfile for the image used in CI performance tests
|
||||
# Dockerfile used for deploying IOx
|
||||
##
|
||||
FROM rust:slim-buster
|
||||
FROM debian:buster-slim
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libssl1.1 libgcc1 libc6 \
|
||||
&& rm -rf /var/lib/{apt,dpkg,cache,log}
|
||||
|
||||
RUN groupadd -g 1500 rust \
|
||||
&& useradd -u 1500 -g rust -s /bin/bash -m rust
|
||||
|
@ -15,4 +19,4 @@ COPY target/release/influxdb_iox /usr/bin/influxdb_iox
|
|||
|
||||
EXPOSE 8080 8082
|
||||
|
||||
CMD ["influxdb_iox"]
|
||||
ENTRYPOINT ["influxdb_iox"]
|
|
@ -5,9 +5,11 @@ interest for those who wish to understand how the code works. It is
|
|||
not intended to be general user facing documentation
|
||||
|
||||
## Table of Contents:
|
||||
|
||||
* Rust style and Idiom guide: [style_guide.md](style_guide.md)
|
||||
* Tracing and logging Guide: [tracing.md](tracing.md)
|
||||
* How InfluxDB IOx manages the lifecycle of time series data: [data_management.md](data_management.md)
|
||||
* Thoughts on parquet encoding and compression for timeseries data: [encoding_thoughts.md](encoding_thoughts.md)
|
||||
* Thoughts on using multiple cores: [multi_core_tasks.md](multi_core_tasks.md)
|
||||
* [Query Engine Docs](../query/README.md)
|
||||
* [Testing documentation](testing.md) for developers of IOx
|
||||
|
|
|
@ -28,10 +28,10 @@
|
|||
# AWS_ACCESS_KEY_ID=access_key_value
|
||||
# AWS_SECRET_ACCESS_KEY=secret_access_key_value
|
||||
# AWS_DEFAULT_REGION=us-east-2
|
||||
# INFLUXDB_IOX_S3_BUCKET=bucket-name
|
||||
# INFLUXDB_IOX_BUCKET=bucket-name
|
||||
#
|
||||
# If using Google Cloud Storage as an object store:
|
||||
# INFLUXDB_IOX_GCP_BUCKET=bucket_name
|
||||
# INFLUXDB_IOX_BUCKET=bucket_name
|
||||
# Set one of SERVICE_ACCOUNT or GOOGLE_APPLICATION_CREDENTIALS, either to a path of a filename
|
||||
# containing Google credential JSON or to the JSON directly.
|
||||
# SERVICE_ACCOUNT=/path/to/auth/info.json
|
||||
|
@ -41,7 +41,7 @@
|
|||
# The name you see when going to All Services > Storage accounts > [name]
|
||||
# AZURE_STORAGE_ACCOUNT=
|
||||
# The name of a container you've created in the storage account, under Blob Service > Containers
|
||||
# AZURE_STORAGE_CONTAINER=
|
||||
# INFLUXDB_IOX_BUCKET=
|
||||
# In the Storage account's Settings > Access keys, one of the Key values
|
||||
# AZURE_STORAGE_MASTER_KEY=
|
||||
#
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
# Testing
|
||||
|
||||
This document covers details that are only relevant if you are developing IOx and running the tests.
|
||||
|
||||
## Object storage
|
||||
|
||||
### To run the tests or not run the tests
|
||||
|
||||
If you are testing integration with some or all of the object storage options, you'll have more
|
||||
setup to do.
|
||||
|
||||
By default, `cargo test -p object_store` does not run any tests that actually contact
|
||||
any cloud services: tests that do contact the services will silently pass.
|
||||
|
||||
To ensure you've configured object storage integration testing correctly, you can run
|
||||
`TEST_INTEGRATION=1 cargo test -p object_store`, which will run the tests that contact the cloud
|
||||
services and fail them if the required environment variables aren't set.
|
||||
|
||||
If you don't specify the `TEST_INTEGRATION` environment variable but you do configure some or all
|
||||
of the object stores, the relevant tests will run.
|
||||
|
||||
### Configuration differences when running the tests
|
||||
|
||||
When running `influxdb_iox server`, you can pick one object store to use. When running the tests,
|
||||
you can run them against all the possible object stores. There's still only one
|
||||
`INFLUXDB_IOX_BUCKET` variable, though, so that will set the bucket name for all configured object
|
||||
stores. Use the same bucket name when setting up the different services.
|
||||
|
||||
Other than possibly configuring multiple object stores, configuring the tests to use the object
|
||||
store services is the same as configuring the server to use an object store service. See the output
|
||||
of `influxdb_iox server --help` for instructions.
|
||||
|
||||
## InfluxDB IOx Client
|
||||
|
||||
The `influxdb_iox_client` crate might be used by people who are using a managed IOx server. In
|
||||
other words, they might only use the `influxdb_iox_client` crate and not the rest of the crates in
|
||||
this workspace. The tests in `influxdb_iox_client` see an IOx server in the same way as IOx servers
|
||||
see the object store services: sometimes you'll want to run the tests against an actual server, and
|
||||
sometimes you won't.
|
||||
|
||||
Like in the `object_store` crate, the `influxdb_iox_client` crate's tests use the
|
||||
`TEST_INTEGRATION` environment variable to enforce running tests that use an actual IOx server.
|
||||
Running `cargo test -p influxdb_iox_client` will silently pass tests that contact a server.
|
||||
|
||||
Start an IOx server in one terminal and run `TEST_INTEGRATION=1
|
||||
TEST_IOX_ENDPOINT=http://127.0.0.1:8080 cargo test -p influxdb_iox_client` in another (where
|
||||
`http://127.0.0.1:8080` is the address to the IOx HTTP server) to run the client tests against the
|
||||
server. If you set `TEST_INTEGRATION` but not `TEST_IOX_ENDPOINT`, the integration tests will fail
|
||||
because of the missed configuration. If you set `TEST_IOX_ENDPOINT` but not `TEST_INTEGRATION`, the
|
||||
integration tests will be run.
|
|
@ -10,7 +10,7 @@ type Error = Box<dyn std::error::Error>;
|
|||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
|
||||
|
||||
generate_grpc_types(&root)?;
|
||||
generate_wal_types(&root)?;
|
||||
|
@ -20,16 +20,27 @@ fn main() -> Result<()> {
|
|||
|
||||
/// Schema used with IOx specific gRPC requests
|
||||
///
|
||||
/// Creates `influxdata.platform.storage.rs` and
|
||||
/// `com.github.influxdata.idpe.storage.read.rs`
|
||||
/// Creates
|
||||
/// - `influxdata.platform.storage.rs`
|
||||
/// - `com.github.influxdata.idpe.storage.read.rs`
|
||||
/// - `influxdata.iox.management.v1.rs`
|
||||
fn generate_grpc_types(root: &Path) -> Result<()> {
|
||||
let storage_path = root.join("influxdata/platform/storage");
|
||||
let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
|
||||
let management_path = root.join("influxdata/iox/management/v1");
|
||||
let grpc_path = root.join("grpc/health/v1");
|
||||
|
||||
let proto_files = vec![
|
||||
root.join("test.proto"),
|
||||
root.join("predicate.proto"),
|
||||
root.join("storage_common.proto"),
|
||||
root.join("storage_common_idpe.proto"),
|
||||
root.join("service.proto"),
|
||||
root.join("source.proto"),
|
||||
storage_path.join("test.proto"),
|
||||
storage_path.join("predicate.proto"),
|
||||
storage_path.join("storage_common.proto"),
|
||||
storage_path.join("service.proto"),
|
||||
storage_path.join("storage_common_idpe.proto"),
|
||||
idpe_path.join("source.proto"),
|
||||
management_path.join("base_types.proto"),
|
||||
management_path.join("database_rules.proto"),
|
||||
management_path.join("service.proto"),
|
||||
grpc_path.join("service.proto"),
|
||||
];
|
||||
|
||||
// Tell cargo to recompile if any of these proto files are changed
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package grpc.health.v1;
|
||||
|
||||
message HealthCheckRequest {
|
||||
string service = 1;
|
||||
}
|
||||
|
||||
message HealthCheckResponse {
|
||||
enum ServingStatus {
|
||||
UNKNOWN = 0;
|
||||
SERVING = 1;
|
||||
NOT_SERVING = 2;
|
||||
SERVICE_UNKNOWN = 3; // Used only by the Watch method.
|
||||
}
|
||||
ServingStatus status = 1;
|
||||
}
|
||||
|
||||
service Health {
|
||||
rpc Check(HealthCheckRequest) returns (HealthCheckResponse);
|
||||
|
||||
rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.management.v1;
|
||||
|
||||
enum Order {
|
||||
ORDER_UNSPECIFIED = 0;
|
||||
ORDER_ASC = 1;
|
||||
ORDER_DESC = 2;
|
||||
}
|
||||
|
||||
enum Aggregate {
|
||||
AGGREGATE_UNSPECIFIED = 0;
|
||||
AGGREGATE_MIN = 1;
|
||||
AGGREGATE_MAX = 2;
|
||||
}
|
||||
|
||||
enum ColumnType {
|
||||
COLUMN_TYPE_UNSPECIFIED = 0;
|
||||
COLUMN_TYPE_I64 = 1;
|
||||
COLUMN_TYPE_U64 = 2;
|
||||
COLUMN_TYPE_F64 = 3;
|
||||
COLUMN_TYPE_STRING = 4;
|
||||
COLUMN_TYPE_BOOL = 5;
|
||||
}
|
||||
|
||||
message HostGroup {
|
||||
string id = 1;
|
||||
|
||||
// connection strings for remote hosts.
|
||||
repeated string hosts = 2;
|
||||
}
|
|
@ -0,0 +1,248 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.management.v1;
|
||||
|
||||
import "google/protobuf/duration.proto";
|
||||
import "google/protobuf/empty.proto";
|
||||
import "influxdata/iox/management/v1/base_types.proto";
|
||||
|
||||
// `PartitionTemplate` is used to compute the partition key of each row that
|
||||
// gets written. It can consist of the table name, a column name and its value,
|
||||
// a formatted time, or a string column and regex captures of its value. For
|
||||
// columns that do not appear in the input row, a blank value is output.
|
||||
//
|
||||
// The key is constructed in order of the template parts; thus ordering changes
|
||||
// what partition key is generated.
|
||||
message PartitionTemplate {
|
||||
message Part {
|
||||
message ColumnFormat {
|
||||
string column = 1;
|
||||
string format = 2;
|
||||
}
|
||||
|
||||
oneof part {
|
||||
google.protobuf.Empty table = 1;
|
||||
string column = 2;
|
||||
string time = 3;
|
||||
ColumnFormat regex = 4;
|
||||
ColumnFormat strf_time = 5;
|
||||
}
|
||||
}
|
||||
|
||||
repeated Part parts = 1;
|
||||
}
|
||||
|
||||
message Matcher {
|
||||
// A query predicate to filter rows
|
||||
string predicate = 1;
|
||||
// Restrict selection to a specific table or tables specified by a regex
|
||||
oneof table_matcher {
|
||||
google.protobuf.Empty all = 2;
|
||||
string table = 3;
|
||||
string regex = 4;
|
||||
}
|
||||
}
|
||||
|
||||
message ReplicationConfig {
|
||||
// The set of host groups that data should be replicated to. Which host a
|
||||
// write goes to within a host group is determined by consistent hashing of
|
||||
// the partition key. We'd use this to create a host group per
|
||||
// availability zone, so you might have 5 availability zones with 2
|
||||
// hosts in each. Replication will ensure that N of those zones get a
|
||||
// write. For each zone, only a single host needs to get the write.
|
||||
// Replication is for ensuring a write exists across multiple hosts
|
||||
// before returning success. Its purpose is to ensure write durability,
|
||||
// rather than write availability for query (this is covered by
|
||||
// subscriptions).
|
||||
repeated string replications = 1;
|
||||
|
||||
// The minimum number of host groups to replicate a write to before success
|
||||
// is returned. This can be overridden on a per request basis.
|
||||
// Replication will continue to write to the other host groups in the
|
||||
// background.
|
||||
uint32 replication_count = 2;
|
||||
|
||||
// How long the replication queue can get before either rejecting writes or
|
||||
// dropping missed writes. The queue is kept in memory on a
|
||||
// per-database basis. A queue size of zero means it will only try to
|
||||
// replicate synchronously and drop any failures.
|
||||
uint64 replication_queue_max_size = 3;
|
||||
}
|
||||
|
||||
message SubscriptionConfig {
|
||||
message Subscription {
|
||||
string name = 1;
|
||||
string host_group_id = 2;
|
||||
Matcher matcher = 3;
|
||||
}
|
||||
|
||||
// `subscriptions` are used for query servers to get data via either push
|
||||
// or pull as it arrives. They are separate from replication as they
|
||||
// have a different purpose. They're for query servers or other clients
|
||||
// that want to subscribe to some subset of data being written in. This
|
||||
// could either be specific partitions, ranges of partitions, tables, or
|
||||
// rows matching some predicate.
|
||||
repeated Subscription subscriptions = 1;
|
||||
}
|
||||
|
||||
message QueryConfig {
|
||||
// If set to `true`, this server should answer queries from one or more of
|
||||
// of its local write buffer and any read-only partitions that it knows
|
||||
// about. In this case, results will be merged with any others from the
|
||||
// remote goups or read-only partitions.
|
||||
bool query_local = 1;
|
||||
|
||||
// Set `primary` to a host group if remote servers should be
|
||||
// issued queries for this database. All hosts in the group should be
|
||||
// queried with this server acting as the coordinator that merges
|
||||
// results together.
|
||||
string primary = 2;
|
||||
|
||||
// If a specific host in the primary group is unavailable,
|
||||
// another host in the same position from a secondary group should be
|
||||
// queried. For example, imagine we've partitioned the data in this DB into
|
||||
// 4 partitions and we are replicating the data across 3 availability
|
||||
// zones. We have 4 hosts in each of those AZs, thus they each have 1
|
||||
// partition. We'd set the primary group to be the 4 hosts in the same
|
||||
// AZ as this one, and the secondary groups as the hosts in the other 2 AZs.
|
||||
repeated string secondaries = 3;
|
||||
|
||||
// Use `readOnlyPartitions` when a server should answer queries for
|
||||
// partitions that come from object storage. This can be used to start
|
||||
// up a new query server to handle queries by pointing it at a
|
||||
// collection of partitions and then telling it to also pull
|
||||
// data from the replication servers (writes that haven't been snapshotted
|
||||
// into a partition).
|
||||
repeated string read_only_partitions = 4;
|
||||
}
|
||||
|
||||
message WalBufferConfig {
|
||||
enum Rollover {
|
||||
ROLLOVER_UNSPECIFIED = 0;
|
||||
|
||||
// Drop the old segment even though it hasn't been persisted. This part of
|
||||
// the WAL will be lost on this server.
|
||||
ROLLOVER_DROP_OLD_SEGMENT = 1;
|
||||
|
||||
// Drop the incoming write and fail silently. This favors making sure that
|
||||
// older WAL data will be backed up.
|
||||
ROLLOVER_DROP_INCOMING = 2;
|
||||
|
||||
// Reject the incoming write and return an error. The client may retry the
|
||||
// request, which will succeed once the oldest segment has been
|
||||
// persisted to object storage.
|
||||
ROLLOVER_RETURN_ERROR = 3;
|
||||
}
|
||||
|
||||
// The size the WAL buffer should be limited to. Once the buffer gets to
|
||||
// this size it will drop old segments to remain below this size, but
|
||||
// still try to hold as much in memory as possible while remaining
|
||||
// below this threshold
|
||||
uint64 buffer_size = 1;
|
||||
|
||||
// WAL segments become read-only after crossing over this size. Which means
|
||||
// that segments will always be >= this size. When old segments are
|
||||
// dropped from of memory, at least this much space will be freed from
|
||||
// the buffer.
|
||||
uint64 segment_size = 2;
|
||||
|
||||
// What should happen if a write comes in that would exceed the WAL buffer
|
||||
// size and the oldest segment that could be dropped hasn't yet been
|
||||
// persisted to object storage. If the oldest segment has been
|
||||
// persisted, then it will be dropped from the buffer so that new writes
|
||||
// can be accepted. This option is only for defining the behavior of what
|
||||
// happens if that segment hasn't been persisted. If set to return an
|
||||
// error, new writes will be rejected until the oldest segment has been
|
||||
// persisted so that it can be cleared from memory. Alternatively, this
|
||||
// can be set so that old segments are dropped even if they haven't been
|
||||
// persisted. This setting is also useful for cases where persistence
|
||||
// isn't being used and this is only for in-memory buffering.
|
||||
Rollover buffer_rollover = 3;
|
||||
|
||||
// If set to true, buffer segments will be written to object storage.
|
||||
bool persist_segments = 4;
|
||||
|
||||
// If set, segments will be rolled over after this period of time even
|
||||
// if they haven't hit the size threshold. This allows them to be written
|
||||
// out to object storage as they must be immutable first.
|
||||
google.protobuf.Duration close_segment_after = 5;
|
||||
}
|
||||
|
||||
message MutableBufferConfig {
|
||||
message PartitionDropOrder {
|
||||
message ColumnSort {
|
||||
string column_name = 1;
|
||||
ColumnType column_type = 2;
|
||||
Aggregate column_value = 3;
|
||||
}
|
||||
|
||||
// Sort partitions by this order. Last will be dropped first.
|
||||
Order order = 1;
|
||||
|
||||
// Configure sort key
|
||||
oneof sort {
|
||||
// The last time the partition received a write.
|
||||
google.protobuf.Empty last_write_time = 2;
|
||||
|
||||
// When the partition was opened in the mutable buffer.
|
||||
google.protobuf.Empty created_at_time = 3;
|
||||
|
||||
// A column name, its expected type, and whether to use the min or max
|
||||
// value. The ColumnType is necessary because the column can appear in
|
||||
// any number of tables and be of a different type. This specifies that
|
||||
// when sorting partitions, only columns with the given name and type
|
||||
// should be used for the purposes of determining the partition order. If a
|
||||
// partition doesn't have the given column in any way, the partition will
|
||||
// appear at the beginning of the list with a null value where all
|
||||
// partitions having null for that value will then be
|
||||
// sorted by created_at_time desc. So if none of the partitions in the
|
||||
// mutable buffer had this column with this type, then the partition
|
||||
// that was created first would appear last in the list and thus be the
|
||||
// first up to be dropped.
|
||||
ColumnSort column = 4;
|
||||
}
|
||||
}
|
||||
// The size the mutable buffer should be limited to. Once the buffer gets
|
||||
// to this size it will drop partitions in the given order. If unable
|
||||
// to drop partitions (because of later rules in this config) it will
|
||||
// reject writes until it is able to drop partitions.
|
||||
uint64 buffer_size = 1;
|
||||
|
||||
// If set, the mutable buffer will not drop partitions that have chunks
|
||||
// that have not yet been persisted. Thus it will reject writes if it
|
||||
// is over size and is unable to drop partitions. The default is to
|
||||
// drop partitions in the sort order, regardless of whether they have
|
||||
// unpersisted chunks or not. The WAL Buffer can be used to ensure
|
||||
// persistence, but this may cause longer recovery times.
|
||||
bool reject_if_not_persisted = 2;
|
||||
|
||||
// Configure order to drop partitions in
|
||||
PartitionDropOrder partition_drop_order = 3;
|
||||
|
||||
// Attempt to persist partitions after they haven't received a write for
|
||||
// this number of seconds. If not set, partitions won't be
|
||||
// automatically persisted.
|
||||
uint32 persist_after_cold_seconds = 4;
|
||||
}
|
||||
|
||||
message DatabaseRules {
|
||||
// The unencoded name of the database
|
||||
string name = 1;
|
||||
|
||||
// Template that generates a partition key for each row inserted into the database
|
||||
PartitionTemplate partition_template = 2;
|
||||
|
||||
// Synchronous replication configuration for this database
|
||||
ReplicationConfig replication_config = 3;
|
||||
|
||||
// Asynchronous pull-based subscription configuration for this database
|
||||
SubscriptionConfig subscription_config = 4;
|
||||
|
||||
// Query configuration for this database
|
||||
QueryConfig query_config = 5;
|
||||
|
||||
// WAL configuration for this database
|
||||
WalBufferConfig wal_buffer_config = 6;
|
||||
|
||||
// Mutable buffer configuration for this database
|
||||
MutableBufferConfig mutable_buffer_config = 7;
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.management.v1;
|
||||
|
||||
import "google/protobuf/empty.proto";
|
||||
import "influxdata/iox/management/v1/database_rules.proto";
|
||||
|
||||
service ManagementService {
|
||||
rpc GetWriterId(GetWriterIdRequest) returns (GetWriterIdResponse);
|
||||
|
||||
rpc UpdateWriterId(UpdateWriterIdRequest) returns (UpdateWriterIdResponse);
|
||||
|
||||
rpc ListDatabases(ListDatabasesRequest) returns (ListDatabasesResponse);
|
||||
|
||||
rpc GetDatabase(GetDatabaseRequest) returns (GetDatabaseResponse);
|
||||
|
||||
rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse);
|
||||
}
|
||||
|
||||
message GetWriterIdRequest {}
|
||||
|
||||
message GetWriterIdResponse {
|
||||
uint32 id = 1;
|
||||
}
|
||||
|
||||
message UpdateWriterIdRequest {
|
||||
uint32 id = 1;
|
||||
}
|
||||
|
||||
message UpdateWriterIdResponse {}
|
||||
|
||||
message ListDatabasesRequest {}
|
||||
|
||||
message ListDatabasesResponse {
|
||||
repeated string names = 1;
|
||||
}
|
||||
|
||||
message GetDatabaseRequest {
|
||||
string name = 1;
|
||||
}
|
||||
|
||||
message GetDatabaseResponse {
|
||||
DatabaseRules rules = 1;
|
||||
}
|
||||
|
||||
message CreateDatabaseRequest {
|
||||
DatabaseRules rules = 1;
|
||||
}
|
||||
|
||||
message CreateDatabaseResponse {}
|
|
@ -8,9 +8,8 @@ syntax = "proto3";
|
|||
package influxdata.platform.storage;
|
||||
|
||||
import "google/protobuf/empty.proto";
|
||||
import "storage_common.proto";
|
||||
import "storage_common_idpe.proto";
|
||||
|
||||
import "influxdata/platform/storage/storage_common.proto";
|
||||
import "influxdata/platform/storage/storage_common_idpe.proto";
|
||||
|
||||
service Storage {
|
||||
// ReadFilter performs a filter operation at storage
|
|
@ -8,7 +8,7 @@ syntax = "proto3";
|
|||
package influxdata.platform.storage;
|
||||
|
||||
import "google/protobuf/any.proto";
|
||||
import "predicate.proto";
|
||||
import "influxdata/platform/storage/predicate.proto";
|
||||
|
||||
|
||||
message ReadFilterRequest {
|
|
@ -10,8 +10,8 @@ syntax = "proto3";
|
|||
package influxdata.platform.storage;
|
||||
|
||||
import "google/protobuf/any.proto";
|
||||
import "predicate.proto";
|
||||
import "storage_common.proto";
|
||||
import "influxdata/platform/storage/predicate.proto";
|
||||
import "influxdata/platform/storage/storage_common.proto";
|
||||
|
||||
message ReadSeriesCardinalityRequest {
|
||||
google.protobuf.Any read_series_cardinality_source = 1;
|
|
@ -9,21 +9,71 @@
|
|||
clippy::clone_on_ref_ptr
|
||||
)]
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/com.github.influxdata.idpe.storage.read.rs"
|
||||
));
|
||||
include!(concat!(env!("OUT_DIR"), "/wal_generated.rs"));
|
||||
mod pb {
|
||||
pub mod influxdata {
|
||||
pub mod platform {
|
||||
pub mod storage {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
|
||||
|
||||
// Can't implement `Default` because `prost::Message` implements `Default`
|
||||
impl TimestampRange {
|
||||
pub fn max() -> Self {
|
||||
TimestampRange {
|
||||
start: std::i64::MIN,
|
||||
end: std::i64::MAX,
|
||||
// Can't implement `Default` because `prost::Message` implements `Default`
|
||||
impl TimestampRange {
|
||||
pub fn max() -> Self {
|
||||
TimestampRange {
|
||||
start: std::i64::MIN,
|
||||
end: std::i64::MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod iox {
|
||||
pub mod management {
|
||||
pub mod v1 {
|
||||
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.management.v1.rs"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod com {
|
||||
pub mod github {
|
||||
pub mod influxdata {
|
||||
pub mod idpe {
|
||||
pub mod storage {
|
||||
pub mod read {
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/com.github.influxdata.idpe.storage.read.rs"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Needed because of https://github.com/hyperium/tonic/issues/471
|
||||
pub mod grpc {
|
||||
pub mod health {
|
||||
pub mod v1 {
|
||||
include!(concat!(env!("OUT_DIR"), "/grpc.health.v1.rs"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/wal_generated.rs"));
|
||||
|
||||
/// gRPC Storage Service
|
||||
pub const STORAGE_SERVICE: &str = "influxdata.platform.storage.Storage";
|
||||
/// gRPC Testing Service
|
||||
pub const IOX_TESTING_SERVICE: &str = "influxdata.platform.storage.IOxTesting";
|
||||
/// gRPC Arrow Flight Service
|
||||
pub const ARROW_SERVICE: &str = "arrow.flight.protocol.FlightService";
|
||||
|
||||
pub use pb::com::github::influxdata::idpe::storage::read::*;
|
||||
pub use pb::influxdata::platform::storage::*;
|
||||
|
||||
pub use google_types as google;
|
||||
pub use pb::{grpc, influxdata};
|
||||
|
|
|
@ -5,12 +5,13 @@ authors = ["Dom Dwyer <dom@itsallbroken.com>"]
|
|||
edition = "2018"
|
||||
|
||||
[features]
|
||||
flight = ["arrow_deps", "serde/derive", "tonic", "serde_json", "futures-util"]
|
||||
flight = ["arrow_deps", "serde/derive", "serde_json", "futures-util"]
|
||||
|
||||
[dependencies]
|
||||
# Workspace dependencies, in alphabetical order
|
||||
arrow_deps = { path = "../arrow_deps", optional = true }
|
||||
data_types = { path = "../data_types" }
|
||||
generated_types = { path = "../generated_types" }
|
||||
|
||||
# Crates.io dependencies, in alphabetical order
|
||||
futures-util = { version = "0.3.1", optional = true }
|
||||
|
@ -19,7 +20,7 @@ serde = "1.0.118"
|
|||
serde_json = { version = "1.0.44", optional = true }
|
||||
thiserror = "1.0.23"
|
||||
tokio = { version = "1.0", features = ["macros"] }
|
||||
tonic = { version = "0.4.0", optional = true }
|
||||
tonic = { version = "0.4.0" }
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
rand = "0.8.1"
|
||||
|
|
|
@ -9,6 +9,9 @@ use data_types::{http::ListDatabasesResponse, DatabaseName};
|
|||
#[cfg(feature = "flight")]
|
||||
mod flight;
|
||||
|
||||
/// Client for the gRPC health checking API
|
||||
pub mod health;
|
||||
|
||||
// can't combine these into one statement that uses `{}` because of this bug in
|
||||
// the `unreachable_pub` lint: https://github.com/rust-lang/rust/issues/64762
|
||||
#[cfg(feature = "flight")]
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
use generated_types::grpc::health::v1::*;
|
||||
use thiserror::Error;
|
||||
|
||||
/// Error type for the health check client
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
/// Service is not serving
|
||||
#[error("Service is not serving")]
|
||||
NotServing,
|
||||
|
||||
/// Service returned an unexpected variant for the status enumeration
|
||||
#[error("Received invalid response: {}", .0)]
|
||||
InvalidResponse(i32),
|
||||
|
||||
/// Error connecting to the server
|
||||
#[error("Connection error: {}", .0)]
|
||||
ConnectionError(#[from] tonic::transport::Error),
|
||||
|
||||
/// Client received an unexpected error from the server
|
||||
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
|
||||
UnexpectedError(#[from] tonic::Status),
|
||||
}
|
||||
|
||||
/// Result type for the health check client
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// A client for the gRPC health checking API
|
||||
///
|
||||
/// Allows checking the status of a given service
|
||||
#[derive(Debug)]
|
||||
pub struct Client {
|
||||
inner: health_client::HealthClient<tonic::transport::Channel>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
/// Create a new client with the provided endpoint
|
||||
pub async fn connect<D>(dst: D) -> Result<Self>
|
||||
where
|
||||
D: std::convert::TryInto<tonic::transport::Endpoint>,
|
||||
D::Error: Into<tonic::codegen::StdError>,
|
||||
{
|
||||
Ok(Self {
|
||||
inner: health_client::HealthClient::connect(dst).await?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns `Ok()` if the corresponding service is serving
|
||||
pub async fn check(&mut self, service: impl Into<String>) -> Result<()> {
|
||||
use health_check_response::ServingStatus;
|
||||
|
||||
let status = self
|
||||
.inner
|
||||
.check(HealthCheckRequest {
|
||||
service: service.into(),
|
||||
})
|
||||
.await?
|
||||
.into_inner();
|
||||
|
||||
match status.status() {
|
||||
ServingStatus::Serving => Ok(()),
|
||||
ServingStatus::NotServing => Err(Error::NotServing),
|
||||
_ => Err(Error::InvalidResponse(status.status)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `Ok()` if the storage service is serving
|
||||
pub async fn check_storage(&mut self) -> Result<()> {
|
||||
self.check(generated_types::STORAGE_SERVICE).await
|
||||
}
|
||||
}
|
|
@ -24,9 +24,11 @@ use query::{
|
|||
util::{make_range_expr, AndExprBuilder},
|
||||
};
|
||||
|
||||
use crate::dictionary::{Dictionary, Error as DictionaryError};
|
||||
use crate::table::Table;
|
||||
|
||||
use crate::{
|
||||
column::Column,
|
||||
dictionary::{Dictionary, Error as DictionaryError},
|
||||
table::Table,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
|
||||
|
@ -50,6 +52,12 @@ pub enum Error {
|
|||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error checking predicate in table '{}': {}", table_name, source))]
|
||||
NamedTablePredicateCheck {
|
||||
table_name: String,
|
||||
source: crate::table::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Unsupported predicate when mutable buffer table names. Found a general expression: {:?}",
|
||||
exprs
|
||||
|
@ -85,12 +93,36 @@ pub enum Error {
|
|||
#[snafu(display("Attempt to write table batch without a name"))]
|
||||
TableWriteWithoutName,
|
||||
|
||||
#[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk_id))]
|
||||
InternalColumnValueIdNotFoundInDictionary {
|
||||
value_id: u32,
|
||||
chunk_id: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display("Column ID {} not found in dictionary of chunk {}", column_id, chunk))]
|
||||
ColumnIdNotFoundInDictionary {
|
||||
column_id: u32,
|
||||
chunk: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Column name {} not found in dictionary of chunk {}",
|
||||
column_name,
|
||||
chunk_id
|
||||
))]
|
||||
ColumnNameNotFoundInDictionary {
|
||||
column_name: String,
|
||||
chunk_id: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Column '{}' is not a string tag column and thus can not list values",
|
||||
column_name
|
||||
))]
|
||||
UnsupportedColumnTypeForListingValues { column_name: String },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -312,13 +344,7 @@ impl Chunk {
|
|||
return Ok(None);
|
||||
}
|
||||
|
||||
let table_name_id =
|
||||
self.dictionary
|
||||
.id(table_name)
|
||||
.context(InternalTableNotFoundInDictionary {
|
||||
table_name,
|
||||
chunk_id: self.id(),
|
||||
})?;
|
||||
let table_name_id = self.table_name_id(table_name)?;
|
||||
|
||||
let mut chunk_column_ids = BTreeSet::new();
|
||||
|
||||
|
@ -366,6 +392,115 @@ impl Chunk {
|
|||
Ok(Some(column_names))
|
||||
}
|
||||
|
||||
/// Return the id of the table in the chunk's dictionary
|
||||
fn table_name_id(&self, table_name: &str) -> Result<u32> {
|
||||
self.dictionary
|
||||
.id(table_name)
|
||||
.context(InternalTableNotFoundInDictionary {
|
||||
table_name,
|
||||
chunk_id: self.id(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the strings of the specified Tag column that satisfy
|
||||
/// the predicate, if they can be determined entirely using metadata.
|
||||
///
|
||||
/// If the predicate cannot be evaluated entirely with metadata,
|
||||
/// return `Ok(None)`.
|
||||
pub fn tag_column_values(
|
||||
&self,
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
chunk_predicate: &ChunkPredicate,
|
||||
) -> Result<Option<BTreeSet<String>>> {
|
||||
// No support for general purpose expressions
|
||||
if !chunk_predicate.chunk_exprs.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let chunk_id = self.id();
|
||||
|
||||
let table_name_id = self.table_name_id(table_name)?;
|
||||
|
||||
// Is this table even in the chunk?
|
||||
let table = self
|
||||
.tables
|
||||
.get(&table_name_id)
|
||||
.context(NamedTableNotFoundInChunk {
|
||||
table_name,
|
||||
chunk_id,
|
||||
})?;
|
||||
|
||||
// See if we can rule out the table entire on metadata
|
||||
let could_match = table
|
||||
.could_match_predicate(chunk_predicate)
|
||||
.context(NamedTablePredicateCheck { table_name })?;
|
||||
|
||||
if !could_match {
|
||||
// No columns could match, return empty set
|
||||
return Ok(Default::default());
|
||||
}
|
||||
|
||||
let column_id =
|
||||
self.dictionary
|
||||
.lookup_value(column_name)
|
||||
.context(ColumnNameNotFoundInDictionary {
|
||||
column_name,
|
||||
chunk_id,
|
||||
})?;
|
||||
|
||||
let column = table
|
||||
.column(column_id)
|
||||
.context(NamedTableError { table_name })?;
|
||||
|
||||
if let Column::Tag(column, _) = column {
|
||||
// if we have a timestamp predicate, find all values
|
||||
// where the timestamp is within range. Otherwise take
|
||||
// all values.
|
||||
|
||||
// Collect matching ids into BTreeSet to deduplicate on
|
||||
// ids *before* looking up Strings
|
||||
let column_value_ids: BTreeSet<u32> = match chunk_predicate.range {
|
||||
None => {
|
||||
// take all non-null values
|
||||
column.iter().filter_map(|&s| s).collect()
|
||||
}
|
||||
Some(range) => {
|
||||
// filter out all values that don't match the timestmap
|
||||
let time_column = table
|
||||
.column_i64(chunk_predicate.time_column_id)
|
||||
.context(NamedTableError { table_name })?;
|
||||
|
||||
column
|
||||
.iter()
|
||||
.zip(time_column.iter())
|
||||
.filter_map(|(&column_value_id, ×tamp_value)| {
|
||||
if range.contains_opt(timestamp_value) {
|
||||
column_value_id
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
};
|
||||
|
||||
// convert all the (deduplicated) ids to Strings
|
||||
let column_values = column_value_ids
|
||||
.into_iter()
|
||||
.map(|value_id| {
|
||||
let value = self.dictionary.lookup_id(value_id).context(
|
||||
InternalColumnValueIdNotFoundInDictionary { value_id, chunk_id },
|
||||
)?;
|
||||
Ok(value.to_string())
|
||||
})
|
||||
.collect::<Result<BTreeSet<String>>>()?;
|
||||
|
||||
Ok(Some(column_values))
|
||||
} else {
|
||||
UnsupportedColumnTypeForListingValues { column_name }.fail()
|
||||
}
|
||||
}
|
||||
|
||||
/// Translates `predicate` into per-chunk ids that can be
|
||||
/// directly evaluated against tables in this chunk
|
||||
pub fn compile_predicate(&self, predicate: &Predicate) -> Result<ChunkPredicate> {
|
||||
|
@ -627,6 +762,15 @@ impl query::PartitionChunk for Chunk {
|
|||
) -> Result<Option<StringSet>, Self::Error> {
|
||||
unimplemented!("This function is slated for removal")
|
||||
}
|
||||
|
||||
async fn column_values(
|
||||
&self,
|
||||
_table_name: &str,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, Self::Error> {
|
||||
unimplemented!("This function is slated for removal")
|
||||
}
|
||||
}
|
||||
|
||||
/// Used to figure out if we know how to deal with this kind of
|
||||
|
|
|
@ -6,11 +6,10 @@ use generated_types::wal;
|
|||
use query::group_by::GroupByAndAggregate;
|
||||
use query::group_by::WindowDuration;
|
||||
use query::{
|
||||
exec::{stringset::StringSet, SeriesSetPlan, SeriesSetPlans},
|
||||
predicate::Predicate,
|
||||
Database,
|
||||
group_by::Aggregate,
|
||||
plan::seriesset::{SeriesSetPlan, SeriesSetPlans},
|
||||
};
|
||||
use query::{group_by::Aggregate, plan::stringset::StringSetPlan};
|
||||
use query::{predicate::Predicate, Database};
|
||||
|
||||
use crate::column::Column;
|
||||
use crate::table::Table;
|
||||
|
@ -19,10 +18,10 @@ use crate::{
|
|||
partition::Partition,
|
||||
};
|
||||
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_deps::datafusion::{error::DataFusionError, logical_plan::LogicalPlan};
|
||||
use arrow_deps::datafusion::error::DataFusionError;
|
||||
|
||||
use crate::dictionary::Error as DictionaryError;
|
||||
|
||||
|
@ -46,30 +45,6 @@ pub enum Error {
|
|||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Column name {} not found in dictionary of chunk {}",
|
||||
column_name,
|
||||
chunk
|
||||
))]
|
||||
ColumnNameNotFoundInDictionary {
|
||||
column_name: String,
|
||||
chunk: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk))]
|
||||
ColumnValueIdNotFoundInDictionary {
|
||||
value_id: u32,
|
||||
chunk: u64,
|
||||
source: DictionaryError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Column '{}' is not a tag column and thus can not list values",
|
||||
column_name
|
||||
))]
|
||||
UnsupportedColumnTypeForListingValues { column_name: String },
|
||||
|
||||
#[snafu(display("id conversion error"))]
|
||||
IdConversionError { source: std::num::TryFromIntError },
|
||||
|
||||
|
@ -254,27 +229,6 @@ impl Database for MutableBufferDb {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// return all column values in this database, while applying optional
|
||||
/// predicates
|
||||
async fn column_values(
|
||||
&self,
|
||||
column_name: &str,
|
||||
predicate: Predicate,
|
||||
) -> Result<StringSetPlan, Self::Error> {
|
||||
let has_exprs = predicate.has_exprs();
|
||||
let mut filter = ChunkTableFilter::new(predicate);
|
||||
|
||||
if has_exprs {
|
||||
let mut visitor = ValuePredVisitor::new(column_name);
|
||||
self.accept(&mut filter, &mut visitor)?;
|
||||
Ok(visitor.plans.into())
|
||||
} else {
|
||||
let mut visitor = ValueVisitor::new(column_name);
|
||||
self.accept(&mut filter, &mut visitor)?;
|
||||
Ok(visitor.column_values.into())
|
||||
}
|
||||
}
|
||||
|
||||
async fn query_series(&self, predicate: Predicate) -> Result<SeriesSetPlans, Self::Error> {
|
||||
let mut filter = ChunkTableFilter::new(predicate);
|
||||
let mut visitor = SeriesVisitor::new();
|
||||
|
@ -569,152 +523,6 @@ impl ChunkTableFilter {
|
|||
}
|
||||
}
|
||||
|
||||
/// return all values in the `column_name` column
|
||||
/// in this database, while applying the timestamp range
|
||||
///
|
||||
/// Potential optimizations: Run this in parallel (in different
|
||||
/// futures) for each chunk / table, rather than a single one
|
||||
/// -- but that will require building up parallel hash tables.
|
||||
struct ValueVisitor<'a> {
|
||||
column_name: &'a str,
|
||||
// what column id we are looking for
|
||||
column_id: Option<u32>,
|
||||
chunk_value_ids: BTreeSet<u32>,
|
||||
column_values: StringSet,
|
||||
}
|
||||
|
||||
impl<'a> ValueVisitor<'a> {
|
||||
fn new(column_name: &'a str) -> Self {
|
||||
Self {
|
||||
column_name,
|
||||
column_id: None,
|
||||
column_values: StringSet::new(),
|
||||
chunk_value_ids: BTreeSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Visitor for ValueVisitor<'a> {
|
||||
fn pre_visit_chunk(&mut self, chunk: &Chunk) -> Result<()> {
|
||||
self.chunk_value_ids.clear();
|
||||
|
||||
self.column_id = Some(chunk.dictionary.lookup_value(self.column_name).context(
|
||||
ColumnNameNotFoundInDictionary {
|
||||
column_name: self.column_name,
|
||||
chunk: chunk.id,
|
||||
},
|
||||
)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn visit_column(
|
||||
&mut self,
|
||||
table: &Table,
|
||||
column_id: u32,
|
||||
column: &Column,
|
||||
filter: &mut ChunkTableFilter,
|
||||
) -> Result<()> {
|
||||
if Some(column_id) != self.column_id {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match column {
|
||||
Column::Tag(column, _) => {
|
||||
// if we have a timestamp prediate, find all values
|
||||
// where the timestamp is within range. Otherwise take
|
||||
// all values.
|
||||
let chunk_predicate = filter.chunk_predicate();
|
||||
match chunk_predicate.range {
|
||||
None => {
|
||||
// take all non-null values
|
||||
column.iter().filter_map(|&s| s).for_each(|value_id| {
|
||||
self.chunk_value_ids.insert(value_id);
|
||||
});
|
||||
}
|
||||
Some(range) => {
|
||||
// filter out all values that don't match the timestmap
|
||||
let time_column = table.column_i64(chunk_predicate.time_column_id)?;
|
||||
|
||||
column
|
||||
.iter()
|
||||
.zip(time_column.iter())
|
||||
.filter_map(|(&column_value_id, ×tamp_value)| {
|
||||
if range.contains_opt(timestamp_value) {
|
||||
column_value_id
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.for_each(|value_id| {
|
||||
self.chunk_value_ids.insert(value_id);
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ => UnsupportedColumnTypeForListingValues {
|
||||
column_name: self.column_name,
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
fn post_visit_chunk(&mut self, chunk: &Chunk) -> Result<()> {
|
||||
// convert all the chunk's column_ids to Strings
|
||||
for &value_id in &self.chunk_value_ids {
|
||||
let value = chunk.dictionary.lookup_id(value_id).context(
|
||||
ColumnValueIdNotFoundInDictionary {
|
||||
value_id,
|
||||
chunk: chunk.id,
|
||||
},
|
||||
)?;
|
||||
|
||||
if !self.column_values.contains(value) {
|
||||
self.column_values.insert(value.to_string());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// return all column values for the specified column in this
|
||||
/// database, while applying the timestamp range and predicate
|
||||
struct ValuePredVisitor<'a> {
|
||||
column_name: &'a str,
|
||||
plans: Vec<LogicalPlan>,
|
||||
}
|
||||
|
||||
impl<'a> ValuePredVisitor<'a> {
|
||||
fn new(column_name: &'a str) -> Self {
|
||||
Self {
|
||||
column_name,
|
||||
plans: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Visitor for ValuePredVisitor<'a> {
|
||||
// TODO try and rule out entire tables based on the same critera
|
||||
// as explained on NamePredVisitor
|
||||
fn pre_visit_table(
|
||||
&mut self,
|
||||
table: &Table,
|
||||
chunk: &Chunk,
|
||||
filter: &mut ChunkTableFilter,
|
||||
) -> Result<()> {
|
||||
// skip table entirely if there are no rows that fall in the timestamp
|
||||
if table.could_match_predicate(filter.chunk_predicate())? {
|
||||
self.plans.push(table.tag_values_plan(
|
||||
self.column_name,
|
||||
filter.chunk_predicate(),
|
||||
chunk,
|
||||
)?);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return DataFusion plans to calculate which series pass the
|
||||
/// specified predicate.
|
||||
struct SeriesVisitor {
|
||||
|
@ -843,10 +651,6 @@ mod tests {
|
|||
type TestError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
type Result<T = (), E = TestError> = std::result::Result<T, E>;
|
||||
|
||||
fn to_set(v: &[&str]) -> BTreeSet<String> {
|
||||
v.iter().map(|s| s.to_string()).collect::<BTreeSet<_>>()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn missing_tags_are_null() -> Result {
|
||||
let db = MutableBufferDb::new("mydb");
|
||||
|
@ -906,158 +710,6 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_column_values() -> Result {
|
||||
let db = MutableBufferDb::new("column_namedb");
|
||||
|
||||
let lp_data = "h2o,state=CA,city=LA temp=70.4 100\n\
|
||||
h2o,state=MA,city=Boston temp=72.4 250\n\
|
||||
o2,state=MA,city=Boston temp=50.4 200\n\
|
||||
o2,state=CA temp=79.0 300\n\
|
||||
o2,state=NY temp=60.8 400\n";
|
||||
|
||||
let lines: Vec<_> = parse_lines(lp_data).map(|l| l.unwrap()).collect();
|
||||
write_lines(&db, &lines).await;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestCase<'a> {
|
||||
description: &'a str,
|
||||
column_name: &'a str,
|
||||
predicate: Predicate,
|
||||
expected_column_values: Result<Vec<&'a str>>,
|
||||
}
|
||||
|
||||
let test_cases = vec![
|
||||
TestCase {
|
||||
description: "No predicates, 'state' col",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default().build(),
|
||||
expected_column_values: Ok(vec!["CA", "MA", "NY"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "No predicates, 'city' col",
|
||||
column_name: "city",
|
||||
predicate: PredicateBuilder::default().build(),
|
||||
expected_column_values: Ok(vec!["Boston", "LA"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: timestamp",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default().timestamp_range(50, 201).build(),
|
||||
expected_column_values: Ok(vec!["CA", "MA"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: predicate",
|
||||
column_name: "city",
|
||||
predicate: PredicateBuilder::default()
|
||||
.add_expr(col("state").eq(lit("MA"))) // state=MA
|
||||
.build(),
|
||||
expected_column_values: Ok(vec!["Boston"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: timestamp and predicate",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default()
|
||||
.timestamp_range(150, 301)
|
||||
.add_expr(col("state").eq(lit("MA"))) // state=MA
|
||||
.build(),
|
||||
expected_column_values: Ok(vec!["MA"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: measurement name",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default().table("h2o").build(),
|
||||
expected_column_values: Ok(vec!["CA", "MA"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: measurement name, with nulls",
|
||||
column_name: "city",
|
||||
predicate: PredicateBuilder::default().table("o2").build(),
|
||||
expected_column_values: Ok(vec!["Boston"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: measurement name and timestamp",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.timestamp_range(50, 201)
|
||||
.build(),
|
||||
expected_column_values: Ok(vec!["MA"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: measurement name and predicate",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.add_expr(col("state").eq(lit("NY"))) // state=NY
|
||||
.build(),
|
||||
expected_column_values: Ok(vec!["NY"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: measurement name, timestamp and predicate",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.timestamp_range(1, 550)
|
||||
.add_expr(col("state").eq(lit("NY"))) // state=NY
|
||||
.build(),
|
||||
expected_column_values: Ok(vec!["NY"]),
|
||||
},
|
||||
TestCase {
|
||||
description: "Restrictions: measurement name, timestamp and predicate: no match",
|
||||
column_name: "state",
|
||||
predicate: PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.timestamp_range(1, 300) // filters out the NY row
|
||||
.add_expr(col("state").eq(lit("NY"))) // state=NY
|
||||
.build(),
|
||||
expected_column_values: Ok(vec![]),
|
||||
},
|
||||
];
|
||||
|
||||
for test_case in test_cases.into_iter() {
|
||||
let test_case_str = format!("{:#?}", test_case);
|
||||
println!("Running test case: {:?}", test_case);
|
||||
|
||||
let column_values_plan = db
|
||||
.column_values(test_case.column_name, test_case.predicate)
|
||||
.await
|
||||
.expect("Created tag_values plan successfully");
|
||||
|
||||
// run the execution plan (
|
||||
let executor = Executor::default();
|
||||
let actual_column_values = executor.to_string_set(column_values_plan).await;
|
||||
|
||||
let is_match = if let Ok(expected_column_values) = &test_case.expected_column_values {
|
||||
let expected_column_values = to_set(expected_column_values);
|
||||
if let Ok(actual_column_values) = &actual_column_values {
|
||||
**actual_column_values == expected_column_values
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else if let Err(e) = &actual_column_values {
|
||||
// use string compare to compare errors to avoid having to build exact errors
|
||||
format!("{:?}", e) == format!("{:?}", test_case.expected_column_values)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
assert!(
|
||||
is_match,
|
||||
"Mismatch\n\
|
||||
actual_column_values: \n\
|
||||
{:?}\n\
|
||||
expected_column_values: \n\
|
||||
{:?}\n\
|
||||
Test_case: \n\
|
||||
{}",
|
||||
actual_column_values, test_case.expected_column_values, test_case_str
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_series() -> Result {
|
||||
// This test checks that everything is wired together
|
||||
|
@ -1088,7 +740,7 @@ mod tests {
|
|||
let plans = db
|
||||
.query_series(predicate)
|
||||
.await
|
||||
.expect("Created tag_values plan successfully");
|
||||
.expect("Created query_series plan successfully");
|
||||
|
||||
let results = run_and_gather_results(plans).await;
|
||||
|
||||
|
@ -1164,7 +816,7 @@ mod tests {
|
|||
let plans = db
|
||||
.query_series(predicate)
|
||||
.await
|
||||
.expect("Created tag_values plan successfully");
|
||||
.expect("Created query_series plan successfully");
|
||||
|
||||
let results = run_and_gather_results(plans).await;
|
||||
|
||||
|
@ -1207,7 +859,7 @@ mod tests {
|
|||
let plans = db
|
||||
.query_series(predicate)
|
||||
.await
|
||||
.expect("Created tag_values plan successfully");
|
||||
.expect("Created query_series plan successfully");
|
||||
|
||||
let results = run_and_gather_results(plans).await;
|
||||
assert!(results.is_empty());
|
||||
|
@ -1220,7 +872,7 @@ mod tests {
|
|||
let plans = db
|
||||
.query_series(predicate)
|
||||
.await
|
||||
.expect("Created tag_values plan successfully");
|
||||
.expect("Created query_series plan successfully");
|
||||
|
||||
let results = run_and_gather_results(plans).await;
|
||||
assert_eq!(results.len(), 1);
|
||||
|
@ -1234,7 +886,7 @@ mod tests {
|
|||
let plans = db
|
||||
.query_series(predicate)
|
||||
.await
|
||||
.expect("Created tag_values plan successfully");
|
||||
.expect("Created query_series plan successfully");
|
||||
|
||||
let results = run_and_gather_results(plans).await;
|
||||
assert!(results.is_empty());
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use generated_types::wal as wb;
|
||||
use query::{
|
||||
exec::{field::FieldColumns, SeriesSetPlan},
|
||||
exec::field::FieldColumns,
|
||||
func::selectors::{selector_first, selector_last, selector_max, selector_min, SelectorOutput},
|
||||
func::window::make_window_bound_expr,
|
||||
group_by::{Aggregate, WindowDuration},
|
||||
plan::seriesset::SeriesSetPlan,
|
||||
};
|
||||
|
||||
use std::{
|
||||
|
@ -35,7 +36,7 @@ use arrow_deps::{
|
|||
},
|
||||
datafusion::{
|
||||
self,
|
||||
logical_plan::{Expr, LogicalPlan, LogicalPlanBuilder},
|
||||
logical_plan::{Expr, LogicalPlanBuilder},
|
||||
prelude::*,
|
||||
},
|
||||
};
|
||||
|
@ -223,7 +224,7 @@ impl Table {
|
|||
}
|
||||
|
||||
/// Returns a reference to the specified column
|
||||
fn column(&self, column_id: u32) -> Result<&Column> {
|
||||
pub(crate) fn column(&self, column_id: u32) -> Result<&Column> {
|
||||
self.columns.get(&column_id).context(ColumnIdNotFound {
|
||||
id: column_id,
|
||||
table_id: self.id,
|
||||
|
@ -271,32 +272,6 @@ impl Table {
|
|||
}
|
||||
}
|
||||
|
||||
/// Creates a DataFusion LogicalPlan that returns column *values* as a
|
||||
/// single column of Strings
|
||||
///
|
||||
/// The created plan looks like:
|
||||
///
|
||||
/// Projection
|
||||
/// Filter(predicate)
|
||||
/// InMemoryScan
|
||||
pub fn tag_values_plan(
|
||||
&self,
|
||||
column_name: &str,
|
||||
chunk_predicate: &ChunkPredicate,
|
||||
chunk: &Chunk,
|
||||
) -> Result<LogicalPlan> {
|
||||
// Scan and Filter
|
||||
let plan_builder = self.scan_with_predicates(chunk_predicate, chunk)?;
|
||||
|
||||
let select_exprs = vec![col(column_name)];
|
||||
|
||||
plan_builder
|
||||
.project(&select_exprs)
|
||||
.context(BuildingPlan)?
|
||||
.build()
|
||||
.context(BuildingPlan)
|
||||
}
|
||||
|
||||
/// Creates a SeriesSet plan that produces an output table with rows that
|
||||
/// match the predicate
|
||||
///
|
||||
|
@ -503,10 +478,7 @@ impl Table {
|
|||
column_name: col_name,
|
||||
chunk: chunk.id,
|
||||
})?;
|
||||
let column = self.columns.get(&column_id).context(ColumnIdNotFound {
|
||||
id: column_id,
|
||||
table_id: self.id,
|
||||
})?;
|
||||
let column = self.column(column_id)?;
|
||||
|
||||
Ok(column.data_type())
|
||||
})?;
|
||||
|
@ -735,10 +707,7 @@ impl Table {
|
|||
|
||||
for col in &selection.cols {
|
||||
let column_name = col.column_name;
|
||||
let column = self.columns.get(&col.column_id).context(ColumnIdNotFound {
|
||||
id: col.column_id,
|
||||
table_id: self.id,
|
||||
})?;
|
||||
let column = self.column(col.column_id)?;
|
||||
|
||||
schema_builder = match column {
|
||||
Column::String(_, _) => schema_builder.field(column_name, ArrowDataType::Utf8),
|
||||
|
@ -769,10 +738,7 @@ impl Table {
|
|||
let mut columns = Vec::with_capacity(selection.cols.len());
|
||||
|
||||
for col in &selection.cols {
|
||||
let column = self.columns.get(&col.column_id).context(ColumnIdNotFound {
|
||||
id: col.column_id,
|
||||
table_id: self.id,
|
||||
})?;
|
||||
let column = self.column(col.column_id)?;
|
||||
|
||||
let array = match column {
|
||||
Column::String(vals, _) => {
|
||||
|
@ -1221,6 +1187,7 @@ impl<'a> TableColSelection<'a> {
|
|||
mod tests {
|
||||
|
||||
use arrow::util::pretty::pretty_format_batches;
|
||||
use arrow_deps::datafusion::logical_plan::LogicalPlan;
|
||||
use data_types::data::split_lines_into_write_entry_partitions;
|
||||
use influxdb_line_protocol::{parse_lines, ParsedLine};
|
||||
use query::{
|
||||
|
|
|
@ -419,26 +419,26 @@ mod tests {
|
|||
dotenv::dotenv().ok();
|
||||
|
||||
let region = env::var("AWS_DEFAULT_REGION");
|
||||
let bucket_name = env::var("INFLUXDB_IOX_S3_BUCKET");
|
||||
let bucket_name = env::var("INFLUXDB_IOX_BUCKET");
|
||||
let force = std::env::var("TEST_INTEGRATION");
|
||||
|
||||
match (region.is_ok(), bucket_name.is_ok(), force.is_ok()) {
|
||||
(false, false, true) => {
|
||||
panic!(
|
||||
"TEST_INTEGRATION is set, \
|
||||
but AWS_DEFAULT_REGION and INFLUXDB_IOX_S3_BUCKET are not"
|
||||
but AWS_DEFAULT_REGION and INFLUXDB_IOX_BUCKET are not"
|
||||
)
|
||||
}
|
||||
(false, true, true) => {
|
||||
panic!("TEST_INTEGRATION is set, but AWS_DEFAULT_REGION is not")
|
||||
}
|
||||
(true, false, true) => {
|
||||
panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_S3_BUCKET is not")
|
||||
panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_BUCKET is not")
|
||||
}
|
||||
(false, false, false) => {
|
||||
eprintln!(
|
||||
"skipping integration test - set \
|
||||
AWS_DEFAULT_REGION and INFLUXDB_IOX_S3_BUCKET to run"
|
||||
AWS_DEFAULT_REGION and INFLUXDB_IOX_BUCKET to run"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -447,7 +447,7 @@ mod tests {
|
|||
return Ok(());
|
||||
}
|
||||
(true, false, false) => {
|
||||
eprintln!("skipping integration test - set INFLUXDB_IOX_S3_BUCKET to run");
|
||||
eprintln!("skipping integration test - set INFLUXDB_IOX_BUCKET to run");
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
|
@ -466,8 +466,8 @@ mod tests {
|
|||
"The environment variable AWS_DEFAULT_REGION must be set \
|
||||
to a value like `us-east-2`"
|
||||
})?;
|
||||
let bucket_name = env::var("INFLUXDB_IOX_S3_BUCKET")
|
||||
.map_err(|_| "The environment variable INFLUXDB_IOX_S3_BUCKET must be set")?;
|
||||
let bucket_name = env::var("INFLUXDB_IOX_BUCKET")
|
||||
.map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?;
|
||||
|
||||
Ok((region.parse()?, bucket_name))
|
||||
}
|
||||
|
|
|
@ -299,7 +299,7 @@ mod tests {
|
|||
|
||||
let required_vars = [
|
||||
"AZURE_STORAGE_ACCOUNT",
|
||||
"AZURE_STORAGE_CONTAINER",
|
||||
"INFLUXDB_IOX_BUCKET",
|
||||
"AZURE_STORAGE_MASTER_KEY",
|
||||
];
|
||||
let unset_vars: Vec<_> = required_vars
|
||||
|
@ -334,8 +334,8 @@ mod tests {
|
|||
async fn azure_blob_test() -> Result<()> {
|
||||
maybe_skip_integration!();
|
||||
|
||||
let container_name = env::var("AZURE_STORAGE_CONTAINER")
|
||||
.map_err(|_| "The environment variable AZURE_STORAGE_CONTAINER must be set")?;
|
||||
let container_name = env::var("INFLUXDB_IOX_BUCKET")
|
||||
.map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?;
|
||||
let integration = MicrosoftAzure::new_from_env(container_name);
|
||||
|
||||
put_get_delete_list(&integration).await?;
|
||||
|
|
|
@ -267,15 +267,15 @@ mod test {
|
|||
() => {
|
||||
dotenv::dotenv().ok();
|
||||
|
||||
let bucket_name = env::var("GCS_BUCKET_NAME");
|
||||
let bucket_name = env::var("INFLUXDB_IOX_BUCKET");
|
||||
let force = std::env::var("TEST_INTEGRATION");
|
||||
|
||||
match (bucket_name.is_ok(), force.is_ok()) {
|
||||
(false, true) => {
|
||||
panic!("TEST_INTEGRATION is set, but GCS_BUCKET_NAME is not")
|
||||
panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_BUCKET is not")
|
||||
}
|
||||
(false, false) => {
|
||||
eprintln!("skipping integration test - set GCS_BUCKET_NAME to run");
|
||||
eprintln!("skipping integration test - set INFLUXDB_IOX_BUCKET to run");
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
|
@ -284,8 +284,8 @@ mod test {
|
|||
}
|
||||
|
||||
fn bucket_name() -> Result<String> {
|
||||
Ok(env::var("GCS_BUCKET_NAME")
|
||||
.map_err(|_| "The environment variable GCS_BUCKET_NAME must be set")?)
|
||||
Ok(env::var("INFLUXDB_IOX_BUCKET")
|
||||
.map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
@ -5,11 +5,17 @@ use super::DELIMITER;
|
|||
// percent_encode's API needs this as a byte
|
||||
const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];
|
||||
|
||||
// special encoding of the empty string part.
|
||||
// Using '%' is the safest character since it will always be used in the
|
||||
// output of percent_encode no matter how we evolve the INVALID AsciiSet over
|
||||
// time.
|
||||
const EMPTY: &str = "%";
|
||||
|
||||
/// The PathPart type exists to validate the directory/file names that form part
|
||||
/// of a path.
|
||||
///
|
||||
/// A PathPart instance is guaranteed to contain no `/` characters as it can
|
||||
/// only be constructed by going through the `try_from` impl.
|
||||
/// A PathPart instance is guaranteed to be non-empty and to contain no `/`
|
||||
/// characters as it can only be constructed by going through the `from` impl.
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default)]
|
||||
pub struct PathPart(pub(super) String);
|
||||
|
||||
|
@ -48,6 +54,12 @@ impl From<&str> for PathPart {
|
|||
// to be equal to `.` or `..` to prevent file system traversal shenanigans.
|
||||
"." => Self(String::from("%2E")),
|
||||
".." => Self(String::from("%2E%2E")),
|
||||
|
||||
// Every string except the empty string will be percent encoded.
|
||||
// The empty string will be transformed into a sentinel value EMPTY
|
||||
// which can safely be a prefix of an encoded value since it will be
|
||||
// fully matched at decode time (see impl Display for PathPart).
|
||||
"" => Self(String::from(EMPTY)),
|
||||
other => Self(percent_encode(other.as_bytes(), INVALID).to_string()),
|
||||
}
|
||||
}
|
||||
|
@ -55,10 +67,13 @@ impl From<&str> for PathPart {
|
|||
|
||||
impl std::fmt::Display for PathPart {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
percent_decode_str(&self.0)
|
||||
.decode_utf8()
|
||||
.expect("Valid UTF-8 that came from String")
|
||||
.fmt(f)
|
||||
match &self.0[..] {
|
||||
EMPTY => "".fmt(f),
|
||||
_ => percent_decode_str(&self.0)
|
||||
.decode_utf8()
|
||||
.expect("Valid UTF-8 that came from String")
|
||||
.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,4 +119,21 @@ mod tests {
|
|||
assert_eq!(part, PathPart(String::from("%2E%2E")));
|
||||
assert_eq!(part.to_string(), "..");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_part_cant_be_empty() {
|
||||
let part: PathPart = "".into();
|
||||
assert_eq!(part, PathPart(String::from(EMPTY)));
|
||||
assert_eq!(part.to_string(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_is_safely_encoded() {
|
||||
let part: PathPart = EMPTY.into();
|
||||
assert_eq!(
|
||||
part,
|
||||
PathPart(percent_encode(EMPTY.as_bytes(), INVALID).to_string())
|
||||
);
|
||||
assert_eq!(part.to_string(), EMPTY);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ use arrow_deps::{
|
|||
use counters::ExecutionCounters;
|
||||
|
||||
use context::IOxExecutionContext;
|
||||
use field::FieldColumns;
|
||||
use schema_pivot::SchemaPivotNode;
|
||||
|
||||
use fieldlist::{FieldList, IntoFieldList};
|
||||
|
@ -28,7 +27,11 @@ use tokio::sync::mpsc::{self, error::SendError};
|
|||
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::plan::{fieldlist::FieldListPlan, stringset::StringSetPlan};
|
||||
use crate::plan::{
|
||||
fieldlist::FieldListPlan,
|
||||
seriesset::{SeriesSetPlan, SeriesSetPlans},
|
||||
stringset::StringSetPlan,
|
||||
};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
|
@ -85,91 +88,6 @@ pub enum Error {
|
|||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// A plan that can be run to produce a logical stream of time series,
|
||||
/// as represented as sequence of SeriesSets from a single DataFusion
|
||||
/// plan, optionally grouped in some way.
|
||||
#[derive(Debug)]
|
||||
pub struct SeriesSetPlan {
|
||||
/// The table name this came from
|
||||
pub table_name: Arc<String>,
|
||||
|
||||
/// Datafusion plan to execute. The plan must produce
|
||||
/// RecordBatches that have:
|
||||
///
|
||||
/// * fields for each name in `tag_columns` and `field_columns`
|
||||
/// * a timestamp column called 'time'
|
||||
/// * each column in tag_columns must be a String (Utf8)
|
||||
pub plan: LogicalPlan,
|
||||
|
||||
/// The names of the columns that define tags.
|
||||
///
|
||||
/// Note these are `Arc` strings because they are duplicated for
|
||||
/// *each* resulting `SeriesSet` that is produced when this type
|
||||
/// of plan is executed.
|
||||
pub tag_columns: Vec<Arc<String>>,
|
||||
|
||||
/// The names of the columns which are "fields"
|
||||
///
|
||||
/// Note these are `Arc` strings because they are duplicated for
|
||||
/// *each* resulting `SeriesSet` that is produced when this type
|
||||
/// of plan is executed.
|
||||
pub field_columns: FieldColumns,
|
||||
|
||||
/// If present, how many of the series_set_plan::tag_columns
|
||||
/// should be used to compute the group
|
||||
pub num_prefix_tag_group_columns: Option<usize>,
|
||||
}
|
||||
|
||||
impl SeriesSetPlan {
|
||||
/// Create a SeriesSetPlan that will not produce any Group items
|
||||
pub fn new_from_shared_timestamp(
|
||||
table_name: Arc<String>,
|
||||
plan: LogicalPlan,
|
||||
tag_columns: Vec<Arc<String>>,
|
||||
field_columns: Vec<Arc<String>>,
|
||||
) -> Self {
|
||||
Self::new(table_name, plan, tag_columns, field_columns.into())
|
||||
}
|
||||
|
||||
/// Create a SeriesSetPlan that will not produce any Group items
|
||||
pub fn new(
|
||||
table_name: Arc<String>,
|
||||
plan: LogicalPlan,
|
||||
tag_columns: Vec<Arc<String>>,
|
||||
field_columns: FieldColumns,
|
||||
) -> Self {
|
||||
let num_prefix_tag_group_columns = None;
|
||||
|
||||
Self {
|
||||
table_name,
|
||||
plan,
|
||||
tag_columns,
|
||||
field_columns,
|
||||
num_prefix_tag_group_columns,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a SeriesSetPlan that will produce Group items, according to
|
||||
/// num_prefix_tag_group_columns.
|
||||
pub fn grouped(mut self, num_prefix_tag_group_columns: usize) -> Self {
|
||||
self.num_prefix_tag_group_columns = Some(num_prefix_tag_group_columns);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A container for plans which each produce a logical stream of
|
||||
/// timeseries (from across many potential tables).
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SeriesSetPlans {
|
||||
pub plans: Vec<SeriesSetPlan>,
|
||||
}
|
||||
|
||||
impl From<Vec<SeriesSetPlan>> for SeriesSetPlans {
|
||||
fn from(plans: Vec<SeriesSetPlan>) -> Self {
|
||||
Self { plans }
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles executing plans, and marshalling the results into rust
|
||||
/// native structures.
|
||||
#[derive(Debug, Default)]
|
||||
|
|
|
@ -3,16 +3,21 @@ use std::{
|
|||
sync::Arc,
|
||||
};
|
||||
|
||||
use arrow_deps::datafusion::{
|
||||
error::{DataFusionError, Result as DatafusionResult},
|
||||
logical_plan::{Expr, ExpressionVisitor, LogicalPlan, LogicalPlanBuilder, Operator, Recursion},
|
||||
prelude::col,
|
||||
use arrow_deps::{
|
||||
arrow::datatypes::DataType,
|
||||
datafusion::{
|
||||
error::{DataFusionError, Result as DatafusionResult},
|
||||
logical_plan::{
|
||||
Expr, ExpressionVisitor, LogicalPlan, LogicalPlanBuilder, Operator, Recursion,
|
||||
},
|
||||
prelude::col,
|
||||
},
|
||||
};
|
||||
use data_types::{
|
||||
schema::{InfluxColumnType, Schema},
|
||||
selection::Selection,
|
||||
};
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::{
|
||||
|
@ -44,6 +49,11 @@ pub enum Error {
|
|||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
|
||||
#[snafu(display("gRPC planner got error finding column values: {}", source))]
|
||||
FindingColumnValues {
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"gRPC planner got internal error making table_name with default predicate: {}",
|
||||
source
|
||||
|
@ -68,7 +78,7 @@ pub enum Error {
|
|||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
|
||||
#[snafu(display("gRPC planner got error creating string set: {}", source))]
|
||||
#[snafu(display("gRPC planner got error creating string set plan: {}", source))]
|
||||
CreatingStringSet { source: StringSetError },
|
||||
|
||||
#[snafu(display(
|
||||
|
@ -81,13 +91,13 @@ pub enum Error {
|
|||
source: crate::provider::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Error building plan: {}", source))]
|
||||
#[snafu(display("gRPC planner got error building plan: {}", source))]
|
||||
BuildingPlan {
|
||||
source: arrow_deps::datafusion::error::DataFusionError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Error getting table schema for table '{}' in chunk {}: {}",
|
||||
"gRPC planner got error getting table schema for table '{}' in chunk {}: {}",
|
||||
table_name,
|
||||
chunk_id,
|
||||
source
|
||||
|
@ -98,8 +108,28 @@ pub enum Error {
|
|||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported predicate: {}", source))]
|
||||
#[snafu(display("gRPC planner error: unsupported predicate: {}", source))]
|
||||
UnsupportedPredicate { source: DataFusionError },
|
||||
|
||||
#[snafu(display(
|
||||
"gRPC planner error: column '{}' is not a tag, it is {:?}",
|
||||
tag_name,
|
||||
influx_column_type
|
||||
))]
|
||||
InvalidTagColumn {
|
||||
tag_name: String,
|
||||
influx_column_type: Option<InfluxColumnType>,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Internal error: tag column '{}' is not Utf8 type, it is {:?} ",
|
||||
tag_name,
|
||||
data_type
|
||||
))]
|
||||
InternalInvalidTagType {
|
||||
tag_name: String,
|
||||
data_type: DataType,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -272,6 +302,155 @@ impl InfluxRPCPlanner {
|
|||
.context(CreatingStringSet)
|
||||
}
|
||||
|
||||
/// Returns a plan which finds the distinct, non-null tag values
|
||||
/// in the specified `tag_name` column of this database which pass
|
||||
/// the conditions specified by `predicate`.
|
||||
pub async fn tag_values<D>(
|
||||
&self,
|
||||
database: &D,
|
||||
tag_name: &str,
|
||||
predicate: Predicate,
|
||||
) -> Result<StringSetPlan>
|
||||
where
|
||||
D: Database + 'static,
|
||||
{
|
||||
debug!(predicate=?predicate, tag_name, "planning tag_values");
|
||||
|
||||
// The basic algorithm is:
|
||||
//
|
||||
// 1. Find all the potential tables in the chunks
|
||||
//
|
||||
// 2. For each table/chunk pair, figure out which have
|
||||
// distinct values that can be found from only metadata and
|
||||
// which need full plans
|
||||
|
||||
// Key is table name, value is set of chunks which had data
|
||||
// for that table but that we couldn't evaluate the predicate
|
||||
// entirely using the metadata
|
||||
let mut need_full_plans = BTreeMap::new();
|
||||
|
||||
let mut known_values = BTreeSet::new();
|
||||
for chunk in self.filtered_chunks(database, &predicate).await? {
|
||||
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
|
||||
|
||||
for table_name in table_names {
|
||||
debug!(
|
||||
table_name = table_name.as_str(),
|
||||
chunk_id = chunk.id(),
|
||||
"finding columns in table"
|
||||
);
|
||||
|
||||
// use schema to validate column type
|
||||
let schema = chunk
|
||||
.table_schema(&table_name, Selection::All)
|
||||
.await
|
||||
.expect("to be able to get table schema");
|
||||
|
||||
// Skip this table if the tag_name is not a column in this table
|
||||
let idx = if let Some(idx) = schema.find_index_of(tag_name) {
|
||||
idx
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Validate that this really is a Tag column
|
||||
let (influx_column_type, field) = schema.field(idx);
|
||||
ensure!(
|
||||
matches!(influx_column_type, Some(InfluxColumnType::Tag)),
|
||||
InvalidTagColumn {
|
||||
tag_name,
|
||||
influx_column_type,
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
field.data_type() == &DataType::Utf8,
|
||||
InternalInvalidTagType {
|
||||
tag_name,
|
||||
data_type: field.data_type().clone(),
|
||||
}
|
||||
);
|
||||
|
||||
// try and get the list of values directly from metadata
|
||||
let maybe_values = chunk
|
||||
.column_values(&table_name, tag_name, &predicate)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(FindingColumnValues)?;
|
||||
|
||||
match maybe_values {
|
||||
Some(mut names) => {
|
||||
debug!(names=?names, chunk_id = chunk.id(), "column values found from metadata");
|
||||
known_values.append(&mut names);
|
||||
}
|
||||
None => {
|
||||
debug!(
|
||||
table_name = table_name.as_str(),
|
||||
chunk_id = chunk.id(),
|
||||
"need full plan to find column values"
|
||||
);
|
||||
// can't get columns only from metadata, need
|
||||
// a general purpose plan
|
||||
need_full_plans
|
||||
.entry(table_name)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(Arc::clone(&chunk));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut builder = StringSetPlanBuilder::new();
|
||||
|
||||
let select_exprs = vec![col(tag_name)];
|
||||
|
||||
// At this point, we have a set of tag_values we know at plan
|
||||
// time in `known_columns`, and some tables in chunks that we
|
||||
// need to run a plan to find what values pass the predicate.
|
||||
for (table_name, chunks) in need_full_plans.into_iter() {
|
||||
let scan_and_filter = self
|
||||
.scan_and_filter(&table_name, &predicate, chunks)
|
||||
.await?;
|
||||
|
||||
// if we have any data to scan, make a plan!
|
||||
if let Some(TableScanAndFilter {
|
||||
plan_builder,
|
||||
schema: _,
|
||||
}) = scan_and_filter
|
||||
{
|
||||
// TODO use Expr::is_null() here when this
|
||||
// https://issues.apache.org/jira/browse/ARROW-11742
|
||||
// is completed.
|
||||
let tag_name_is_not_null = Expr::IsNotNull(Box::new(col(tag_name)));
|
||||
|
||||
// TODO: optimize this to use "DISINCT" or do
|
||||
// something more intelligent that simply fetching all
|
||||
// the values and reducing them in the query Executor
|
||||
//
|
||||
// Until then, simply use a plan which looks like:
|
||||
//
|
||||
// Projection
|
||||
// Filter(is not null)
|
||||
// Filter(predicate)
|
||||
// InMemoryScan
|
||||
let plan = plan_builder
|
||||
.project(&select_exprs)
|
||||
.context(BuildingPlan)?
|
||||
.filter(tag_name_is_not_null)
|
||||
.context(BuildingPlan)?
|
||||
.build()
|
||||
.context(BuildingPlan)?;
|
||||
|
||||
builder = builder.append(plan.into());
|
||||
}
|
||||
}
|
||||
|
||||
// add the known values we could find from metadata only
|
||||
builder
|
||||
.append(known_values.into())
|
||||
.build()
|
||||
.context(CreatingStringSet)
|
||||
}
|
||||
|
||||
/// Returns a plan that produces a list of columns and their
|
||||
/// datatypes (as defined in the data written via `write_lines`),
|
||||
/// and which have more than zero rows which pass the conditions
|
||||
|
|
|
@ -11,8 +11,8 @@ use async_trait::async_trait;
|
|||
use data_types::{
|
||||
data::ReplicatedWrite, partition_metadata::TableSummary, schema::Schema, selection::Selection,
|
||||
};
|
||||
use exec::{stringset::StringSet, Executor, SeriesSetPlans};
|
||||
use plan::stringset::StringSetPlan;
|
||||
use exec::{stringset::StringSet, Executor};
|
||||
use plan::seriesset::SeriesSetPlans;
|
||||
|
||||
use std::{fmt::Debug, sync::Arc};
|
||||
|
||||
|
@ -55,15 +55,6 @@ pub trait Database: Debug + Send + Sync {
|
|||
// The functions below are slated for removal (migration into a gRPC query
|
||||
// frontend) ---------
|
||||
|
||||
/// Returns a plan which finds the distinct values in the
|
||||
/// `column_name` column of this database which pass the
|
||||
/// conditions specified by `predicate`.
|
||||
async fn column_values(
|
||||
&self,
|
||||
column_name: &str,
|
||||
predicate: Predicate,
|
||||
) -> Result<StringSetPlan, Self::Error>;
|
||||
|
||||
/// Returns a plan that finds all rows rows which pass the
|
||||
/// conditions specified by `predicate` in the form of logical
|
||||
/// time series.
|
||||
|
@ -132,7 +123,7 @@ pub trait PartitionChunk: Debug + Send + Sync {
|
|||
/// Returns a set of Strings with column names from the specified
|
||||
/// table that have at least one row that matches `predicate`, if
|
||||
/// the predicate can be evaluated entirely on the metadata of
|
||||
/// this Chunk.
|
||||
/// this Chunk. Returns `None` otherwise
|
||||
async fn column_names(
|
||||
&self,
|
||||
table_name: &str,
|
||||
|
@ -140,6 +131,18 @@ pub trait PartitionChunk: Debug + Send + Sync {
|
|||
columns: Selection<'_>,
|
||||
) -> Result<Option<StringSet>, Self::Error>;
|
||||
|
||||
/// Return a set of Strings containing the distinct values in the
|
||||
/// specified columns. If the predicate can be evaluated entirely
|
||||
/// on the metadata of this Chunk. Returns `None` otherwise
|
||||
///
|
||||
/// The requested columns must all have String type.
|
||||
async fn column_values(
|
||||
&self,
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, Self::Error>;
|
||||
|
||||
/// Returns the Schema for a table in this chunk, with the
|
||||
/// specified column selection. An error is returned if the
|
||||
/// selection refers to columns that do not exist.
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
pub mod fieldlist;
|
||||
pub mod seriesset;
|
||||
pub mod stringset;
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow_deps::datafusion::logical_plan::LogicalPlan;
|
||||
|
||||
use crate::exec::field::FieldColumns;
|
||||
|
||||
/// A plan that can be run to produce a logical stream of time series,
|
||||
/// as represented as sequence of SeriesSets from a single DataFusion
|
||||
/// plan, optionally grouped in some way.
|
||||
#[derive(Debug)]
|
||||
pub struct SeriesSetPlan {
|
||||
/// The table name this came from
|
||||
pub table_name: Arc<String>,
|
||||
|
||||
/// Datafusion plan to execute. The plan must produce
|
||||
/// RecordBatches that have:
|
||||
///
|
||||
/// * fields for each name in `tag_columns` and `field_columns`
|
||||
/// * a timestamp column called 'time'
|
||||
/// * each column in tag_columns must be a String (Utf8)
|
||||
pub plan: LogicalPlan,
|
||||
|
||||
/// The names of the columns that define tags.
|
||||
///
|
||||
/// Note these are `Arc` strings because they are duplicated for
|
||||
/// *each* resulting `SeriesSet` that is produced when this type
|
||||
/// of plan is executed.
|
||||
pub tag_columns: Vec<Arc<String>>,
|
||||
|
||||
/// The names of the columns which are "fields"
|
||||
pub field_columns: FieldColumns,
|
||||
|
||||
/// If present, how many of the series_set_plan::tag_columns
|
||||
/// should be used to compute the group
|
||||
pub num_prefix_tag_group_columns: Option<usize>,
|
||||
}
|
||||
|
||||
impl SeriesSetPlan {
|
||||
/// Create a SeriesSetPlan that will not produce any Group items
|
||||
pub fn new_from_shared_timestamp(
|
||||
table_name: Arc<String>,
|
||||
plan: LogicalPlan,
|
||||
tag_columns: Vec<Arc<String>>,
|
||||
field_columns: Vec<Arc<String>>,
|
||||
) -> Self {
|
||||
Self::new(table_name, plan, tag_columns, field_columns.into())
|
||||
}
|
||||
|
||||
/// Create a SeriesSetPlan that will not produce any Group items
|
||||
pub fn new(
|
||||
table_name: Arc<String>,
|
||||
plan: LogicalPlan,
|
||||
tag_columns: Vec<Arc<String>>,
|
||||
field_columns: FieldColumns,
|
||||
) -> Self {
|
||||
let num_prefix_tag_group_columns = None;
|
||||
|
||||
Self {
|
||||
table_name,
|
||||
plan,
|
||||
tag_columns,
|
||||
field_columns,
|
||||
num_prefix_tag_group_columns,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a SeriesSetPlan that will produce Group items, according to
|
||||
/// num_prefix_tag_group_columns.
|
||||
pub fn grouped(mut self, num_prefix_tag_group_columns: usize) -> Self {
|
||||
self.num_prefix_tag_group_columns = Some(num_prefix_tag_group_columns);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A container for plans which each produce a logical stream of
|
||||
/// timeseries (from across many potential tables).
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SeriesSetPlans {
|
||||
pub plans: Vec<SeriesSetPlan>,
|
||||
}
|
||||
|
||||
impl From<Vec<SeriesSetPlan>> for SeriesSetPlans {
|
||||
fn from(plans: Vec<SeriesSetPlan>) -> Self {
|
||||
Self { plans }
|
||||
}
|
||||
}
|
|
@ -12,14 +12,11 @@ use arrow_deps::{
|
|||
datafusion::physical_plan::{common::SizedRecordBatchStream, SendableRecordBatchStream},
|
||||
};
|
||||
|
||||
use crate::{exec::Executor, group_by::GroupByAndAggregate, plan::stringset::StringSetPlan};
|
||||
use crate::{
|
||||
exec::{
|
||||
stringset::{StringSet, StringSetRef},
|
||||
SeriesSetPlans,
|
||||
},
|
||||
exec::stringset::{StringSet, StringSetRef},
|
||||
Database, DatabaseStore, PartitionChunk, Predicate,
|
||||
};
|
||||
use crate::{exec::Executor, group_by::GroupByAndAggregate, plan::seriesset::SeriesSetPlans};
|
||||
|
||||
use data_types::{
|
||||
data::{lines_to_replicated_write, ReplicatedWrite},
|
||||
|
@ -59,12 +56,6 @@ pub struct TestDatabase {
|
|||
/// `column_names` to return upon next request
|
||||
column_names: Arc<Mutex<Option<StringSetRef>>>,
|
||||
|
||||
/// `column_values` to return upon next request
|
||||
column_values: Arc<Mutex<Option<StringSetRef>>>,
|
||||
|
||||
/// The last request for `column_values`
|
||||
column_values_request: Arc<Mutex<Option<ColumnValuesRequest>>>,
|
||||
|
||||
/// Responses to return on the next request to `query_series`
|
||||
query_series_values: Arc<Mutex<Option<SeriesSetPlans>>>,
|
||||
|
||||
|
@ -78,16 +69,6 @@ pub struct TestDatabase {
|
|||
query_groups_request: Arc<Mutex<Option<QueryGroupsRequest>>>,
|
||||
}
|
||||
|
||||
/// Records the parameters passed to a column values request
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ColumnValuesRequest {
|
||||
/// The name of the requested column
|
||||
pub column_name: String,
|
||||
|
||||
/// Stringified '{:?}' version of the predicate
|
||||
pub predicate: String,
|
||||
}
|
||||
|
||||
/// Records the parameters passed to a `query_series` request
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct QuerySeriesRequest {
|
||||
|
@ -178,20 +159,6 @@ impl TestDatabase {
|
|||
*Arc::clone(&self.column_names).lock() = Some(column_names)
|
||||
}
|
||||
|
||||
/// Set the list of column values that will be returned on a call to
|
||||
/// column_values
|
||||
pub fn set_column_values(&self, column_values: Vec<String>) {
|
||||
let column_values = column_values.into_iter().collect::<StringSet>();
|
||||
let column_values = Arc::new(column_values);
|
||||
|
||||
*Arc::clone(&self.column_values).lock() = Some(column_values)
|
||||
}
|
||||
|
||||
/// Get the parameters from the last column name request
|
||||
pub fn get_column_values_request(&self) -> Option<ColumnValuesRequest> {
|
||||
Arc::clone(&self.column_values_request).lock().take()
|
||||
}
|
||||
|
||||
/// Set the series that will be returned on a call to query_series
|
||||
pub fn set_query_series_values(&self, plan: SeriesSetPlans) {
|
||||
*Arc::clone(&self.query_series_values).lock() = Some(plan);
|
||||
|
@ -267,34 +234,6 @@ impl Database for TestDatabase {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the mocked out column values, recording the request
|
||||
async fn column_values(
|
||||
&self,
|
||||
column_name: &str,
|
||||
predicate: Predicate,
|
||||
) -> Result<StringSetPlan, Self::Error> {
|
||||
// save the request
|
||||
let predicate = predicate_to_test_string(&predicate);
|
||||
|
||||
let new_column_values_request = Some(ColumnValuesRequest {
|
||||
column_name: column_name.into(),
|
||||
predicate,
|
||||
});
|
||||
|
||||
*Arc::clone(&self.column_values_request).lock() = new_column_values_request;
|
||||
|
||||
// pull out the saved columns
|
||||
let column_values = Arc::clone(&self.column_values)
|
||||
.lock()
|
||||
.take()
|
||||
// Turn None into an error
|
||||
.context(General {
|
||||
message: "No saved column_values in TestDatabase",
|
||||
})?;
|
||||
|
||||
Ok(StringSetPlan::Known(column_values))
|
||||
}
|
||||
|
||||
async fn query_series(&self, predicate: Predicate) -> Result<SeriesSetPlans, Self::Error> {
|
||||
let predicate = predicate_to_test_string(&predicate);
|
||||
|
||||
|
@ -595,6 +534,16 @@ impl PartitionChunk for TestChunk {
|
|||
})
|
||||
}
|
||||
|
||||
async fn column_values(
|
||||
&self,
|
||||
_table_name: &str,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, Self::Error> {
|
||||
// Model not being able to get column values from metadata
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn has_table(&self, table_name: &str) -> bool {
|
||||
self.table_schemas.contains_key(table_name)
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ use async_trait::async_trait;
|
|||
use data_types::{data::ReplicatedWrite, database_rules::DatabaseRules, selection::Selection};
|
||||
use mutable_buffer::MutableBufferDb;
|
||||
use parking_lot::Mutex;
|
||||
use query::{plan::stringset::StringSetPlan, Database, PartitionChunk};
|
||||
use query::{Database, PartitionChunk};
|
||||
use read_buffer::Database as ReadBufferDb;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
|
@ -306,23 +306,10 @@ impl Database for Db {
|
|||
.context(MutableBufferWrite)
|
||||
}
|
||||
|
||||
async fn column_values(
|
||||
&self,
|
||||
column_name: &str,
|
||||
predicate: query::predicate::Predicate,
|
||||
) -> Result<StringSetPlan, Self::Error> {
|
||||
self.mutable_buffer
|
||||
.as_ref()
|
||||
.context(DatabaseNotReadable)?
|
||||
.column_values(column_name, predicate)
|
||||
.await
|
||||
.context(MutableBufferRead)
|
||||
}
|
||||
|
||||
async fn query_series(
|
||||
&self,
|
||||
predicate: query::predicate::Predicate,
|
||||
) -> Result<query::exec::SeriesSetPlans, Self::Error> {
|
||||
) -> Result<query::plan::seriesset::SeriesSetPlans, Self::Error> {
|
||||
self.mutable_buffer
|
||||
.as_ref()
|
||||
.context(DatabaseNotReadable)?
|
||||
|
@ -335,7 +322,7 @@ impl Database for Db {
|
|||
&self,
|
||||
predicate: query::predicate::Predicate,
|
||||
gby_agg: query::group_by::GroupByAndAggregate,
|
||||
) -> Result<query::exec::SeriesSetPlans, Self::Error> {
|
||||
) -> Result<query::plan::seriesset::SeriesSetPlans, Self::Error> {
|
||||
self.mutable_buffer
|
||||
.as_ref()
|
||||
.context(DatabaseNotReadable)?
|
||||
|
|
|
@ -350,4 +350,40 @@ impl PartitionChunk for DBChunk {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn column_values(
|
||||
&self,
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, Self::Error> {
|
||||
match self {
|
||||
Self::MutableBuffer { chunk } => {
|
||||
use mutable_buffer::chunk::Error::UnsupportedColumnTypeForListingValues;
|
||||
|
||||
let chunk_predicate = chunk
|
||||
.compile_predicate(predicate)
|
||||
.context(MutableBufferChunk)?;
|
||||
|
||||
let values = chunk.tag_column_values(table_name, column_name, &chunk_predicate);
|
||||
|
||||
// if the mutable buffer doesn't support getting
|
||||
// values for this kind of column, report back None
|
||||
if let Err(UnsupportedColumnTypeForListingValues { .. }) = values {
|
||||
Ok(None)
|
||||
} else {
|
||||
values.context(MutableBufferChunk)
|
||||
}
|
||||
}
|
||||
Self::ReadBuffer { .. } => {
|
||||
// TODO hook up read buffer API here when ready. Until
|
||||
// now, fallback to using a full plan
|
||||
// https://github.com/influxdata/influxdb_iox/issues/857
|
||||
Ok(None)
|
||||
}
|
||||
Self::ParquetFile => {
|
||||
unimplemented!("parquet file not implemented for column_values")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
pub mod field_columns;
|
||||
pub mod table_names;
|
||||
pub mod tag_keys;
|
||||
pub mod tag_values;
|
||||
|
|
|
@ -0,0 +1,253 @@
|
|||
use arrow_deps::datafusion::logical_plan::{col, lit};
|
||||
use query::{
|
||||
exec::{
|
||||
stringset::{IntoStringSet, StringSetRef},
|
||||
Executor,
|
||||
},
|
||||
frontend::influxrpc::InfluxRPCPlanner,
|
||||
predicate::PredicateBuilder,
|
||||
};
|
||||
|
||||
use crate::query_tests::scenarios::*;
|
||||
|
||||
/// runs tag_value(predicate) and compares it to the expected
|
||||
/// output
|
||||
macro_rules! run_tag_values_test_case {
|
||||
($DB_SETUP:expr, $TAG_NAME:expr, $PREDICATE:expr, $EXPECTED_VALUES:expr) => {
|
||||
test_helpers::maybe_start_logging();
|
||||
let predicate = $PREDICATE;
|
||||
let tag_name = $TAG_NAME;
|
||||
let expected_values = $EXPECTED_VALUES;
|
||||
for scenario in $DB_SETUP.make().await {
|
||||
let DBScenario {
|
||||
scenario_name, db, ..
|
||||
} = scenario;
|
||||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
let executor = Executor::new();
|
||||
|
||||
let plan = planner
|
||||
.tag_values(&db, &tag_name, predicate.clone())
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
let names = executor
|
||||
.to_string_set(plan)
|
||||
.await
|
||||
.expect("converted plan to strings successfully");
|
||||
|
||||
assert_eq!(
|
||||
names,
|
||||
to_stringset(&expected_values),
|
||||
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
|
||||
scenario_name,
|
||||
expected_values,
|
||||
names
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_no_tag() {
|
||||
let predicate = PredicateBuilder::default().build();
|
||||
// If the tag is not present, expect no values back (not error)
|
||||
let tag_name = "tag_not_in_chunks";
|
||||
let expected_tag_keys = vec![];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_no_predicate_state_col() {
|
||||
let predicate = PredicateBuilder::default().build();
|
||||
let tag_name = "state";
|
||||
let expected_tag_keys = vec!["CA", "MA", "NY"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_no_predicate_city_col() {
|
||||
let tag_name = "city";
|
||||
let predicate = PredicateBuilder::default().build();
|
||||
let expected_tag_keys = vec!["Boston", "LA", "NYC"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_timestamp_pred_state_col() {
|
||||
let tag_name = "state";
|
||||
let predicate = PredicateBuilder::default().timestamp_range(50, 201).build();
|
||||
let expected_tag_keys = vec!["CA", "MA"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_state_pred_state_col() {
|
||||
let tag_name = "city";
|
||||
let predicate = PredicateBuilder::default()
|
||||
.add_expr(col("state").eq(lit("MA"))) // state=MA
|
||||
.build();
|
||||
let expected_tag_keys = vec!["Boston"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_timestamp_and_state_pred_state_col() {
|
||||
let tag_name = "state";
|
||||
let predicate = PredicateBuilder::default()
|
||||
.timestamp_range(150, 301)
|
||||
.add_expr(col("state").eq(lit("MA"))) // state=MA
|
||||
.build();
|
||||
let expected_tag_keys = vec!["MA"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_table_pred_state_col() {
|
||||
let tag_name = "state";
|
||||
let predicate = PredicateBuilder::default().table("h2o").build();
|
||||
let expected_tag_keys = vec!["CA", "MA"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_table_pred_city_col() {
|
||||
let tag_name = "city";
|
||||
let predicate = PredicateBuilder::default().table("o2").build();
|
||||
let expected_tag_keys = vec!["Boston", "NYC"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_table_and_timestamp_and_table_pred_state_col() {
|
||||
let tag_name = "state";
|
||||
let predicate = PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.timestamp_range(50, 201)
|
||||
.build();
|
||||
let expected_tag_keys = vec!["MA"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_table_and_state_pred_state_col() {
|
||||
let tag_name = "state";
|
||||
let predicate = PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.add_expr(col("state").eq(lit("NY"))) // state=NY
|
||||
.build();
|
||||
let expected_tag_keys = vec!["NY"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_table_and_timestamp_and_state_pred_state_col() {
|
||||
let tag_name = "state";
|
||||
let predicate = PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.timestamp_range(1, 550)
|
||||
.add_expr(col("state").eq(lit("NY"))) // state=NY
|
||||
.build();
|
||||
let expected_tag_keys = vec!["NY"];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_table_and_timestamp_and_state_pred_state_col_no_rows() {
|
||||
let tag_name = "state";
|
||||
let predicate = PredicateBuilder::default()
|
||||
.table("o2")
|
||||
.timestamp_range(1, 300) // filters out the NY row
|
||||
.add_expr(col("state").eq(lit("NY"))) // state=NY
|
||||
.build();
|
||||
let expected_tag_keys = vec![];
|
||||
run_tag_values_test_case!(
|
||||
TwoMeasurementsManyNulls {},
|
||||
tag_name,
|
||||
predicate,
|
||||
expected_tag_keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_tag_values_field_col() {
|
||||
let db_setup = TwoMeasurementsManyNulls {};
|
||||
let predicate = PredicateBuilder::default().build();
|
||||
|
||||
for scenario in db_setup.make().await {
|
||||
let DBScenario {
|
||||
scenario_name, db, ..
|
||||
} = scenario;
|
||||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("Predicate: '{:#?}'", predicate);
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
|
||||
// Test: temp is a field, not a tag
|
||||
let tag_name = "temp";
|
||||
let plan_result = planner.tag_values(&db, &tag_name, predicate.clone()).await;
|
||||
|
||||
assert_eq!(
|
||||
plan_result.unwrap_err().to_string(),
|
||||
"gRPC planner error: column \'temp\' is not a tag, it is Some(Field(Float))"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn to_stringset(v: &[&str]) -> StringSetRef {
|
||||
v.into_stringset().unwrap()
|
||||
}
|
|
@ -1,8 +1,8 @@
|
|||
//! Implementation of command line option for manipulating and showing server
|
||||
//! config
|
||||
|
||||
use clap::arg_enum;
|
||||
use std::{net::SocketAddr, net::ToSocketAddrs, path::PathBuf};
|
||||
|
||||
use structopt::StructOpt;
|
||||
|
||||
/// The default bind address for the HTTP API.
|
||||
|
@ -91,16 +91,37 @@ pub struct Config {
|
|||
#[structopt(long = "--data-dir", env = "INFLUXDB_IOX_DB_DIR")]
|
||||
pub database_directory: Option<PathBuf>,
|
||||
|
||||
#[structopt(
|
||||
long = "--object-store",
|
||||
env = "INFLUXDB_IOX_OBJECT_STORE",
|
||||
possible_values = &ObjectStore::variants(),
|
||||
case_insensitive = true,
|
||||
long_help = r#"Which object storage to use. If not specified, defaults to memory.
|
||||
|
||||
Possible values (case insensitive):
|
||||
|
||||
* memory (default): Effectively no object persistence.
|
||||
* file: Stores objects in the local filesystem. Must also set `--data-dir`.
|
||||
* s3: Amazon S3. Must also set `--bucket`, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and
|
||||
AWS_DEFAULT_REGION.
|
||||
* google: Google Cloud Storage. Must also set `--bucket` and SERVICE_ACCOUNT.
|
||||
* azure: Microsoft Azure blob storage. Must also set `--bucket`, AZURE_STORAGE_ACCOUNT,
|
||||
and AZURE_STORAGE_MASTER_KEY.
|
||||
"#,
|
||||
)]
|
||||
pub object_store: Option<ObjectStore>,
|
||||
|
||||
/// Name of the bucket to use for the object store. Must also set
|
||||
/// `--object_store` to a cloud object storage to have any effect.
|
||||
///
|
||||
/// If using Google Cloud Storage for the object store, this item, as well
|
||||
/// as SERVICE_ACCOUNT must be set.
|
||||
#[structopt(long = "--gcp-bucket", env = "INFLUXDB_IOX_GCP_BUCKET")]
|
||||
pub gcp_bucket: Option<String>,
|
||||
|
||||
///
|
||||
/// If using S3 for the object store, this item, as well
|
||||
/// as AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_DEFAULT_REGION must
|
||||
/// be set.
|
||||
#[structopt(long = "--s3-bucket", env = "INFLUXDB_IOX_S3_BUCKET")]
|
||||
pub s3_bucket: Option<String>,
|
||||
#[structopt(long = "--bucket", env = "INFLUXDB_IOX_BUCKET")]
|
||||
pub bucket: Option<String>,
|
||||
|
||||
/// If set, Jaeger traces are emitted to this host
|
||||
/// using the OpenTelemetry tracer.
|
||||
|
@ -167,6 +188,17 @@ fn strip_server(args: impl Iterator<Item = String>) -> Vec<String> {
|
|||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
arg_enum! {
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub enum ObjectStore {
|
||||
Memory,
|
||||
File,
|
||||
S3,
|
||||
Google,
|
||||
Azure,
|
||||
}
|
||||
}
|
||||
|
||||
/// How to format output logging messages
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum LogFormat {
|
||||
|
@ -301,15 +333,6 @@ mod tests {
|
|||
clap::ErrorKind::ValueValidation
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Config::from_iter_safe(strip_server(
|
||||
to_vec(&["cmd", "server", "--api-bind", "badhost.badtld:1234"]).into_iter(),
|
||||
))
|
||||
.map_err(|e| e.kind)
|
||||
.expect_err("must fail"),
|
||||
clap::ErrorKind::ValueValidation
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@ use panic_logging::SendPanicsToTracing;
|
|||
use server::{ConnectionManagerImpl as ConnectionManager, Server as AppServer};
|
||||
|
||||
use crate::commands::{
|
||||
config::{load_config, Config},
|
||||
config::{load_config, Config, ObjectStore as ObjStoreOpt},
|
||||
logging::LoggingLevel,
|
||||
};
|
||||
|
||||
|
@ -64,6 +64,12 @@ pub enum Error {
|
|||
|
||||
#[snafu(display("Error serving RPC: {}", source))]
|
||||
ServingRPC { source: self::rpc::Error },
|
||||
|
||||
#[snafu(display("Specifed {} for the object store, but not a bucket", object_store))]
|
||||
InvalidCloudObjectStoreConfiguration { object_store: ObjStoreOpt },
|
||||
|
||||
#[snafu(display("Specified file for the object store, but not a database directory"))]
|
||||
InvalidFileObjectStoreConfiguration,
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
@ -92,22 +98,53 @@ pub async fn main(logging_level: LoggingLevel, config: Option<Config>) -> Result
|
|||
let f = SendPanicsToTracing::new();
|
||||
std::mem::forget(f);
|
||||
|
||||
let db_dir = &config.database_directory;
|
||||
|
||||
let object_store = if let Some(bucket_name) = &config.gcp_bucket {
|
||||
info!("Using GCP bucket {} for storage", bucket_name);
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name))
|
||||
} else if let Some(bucket_name) = &config.s3_bucket {
|
||||
info!("Using S3 bucket {} for storage", bucket_name);
|
||||
// rusoto::Region's default takes the value from the AWS_DEFAULT_REGION env var.
|
||||
ObjectStore::new_amazon_s3(AmazonS3::new(Default::default(), bucket_name))
|
||||
} else if let Some(db_dir) = db_dir {
|
||||
info!("Using local dir {:?} for storage", db_dir);
|
||||
fs::create_dir_all(db_dir).context(CreatingDatabaseDirectory { path: db_dir })?;
|
||||
ObjectStore::new_file(object_store::disk::File::new(&db_dir))
|
||||
} else {
|
||||
warn!("NO PERSISTENCE: using memory for object storage");
|
||||
ObjectStore::new_in_memory(object_store::memory::InMemory::new())
|
||||
let object_store = match (
|
||||
config.object_store,
|
||||
config.bucket,
|
||||
config.database_directory,
|
||||
) {
|
||||
(Some(ObjStoreOpt::Google), Some(bucket), _) => {
|
||||
info!("Using GCP bucket {} for storage", bucket);
|
||||
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket))
|
||||
}
|
||||
(Some(ObjStoreOpt::Google), None, _) => {
|
||||
return InvalidCloudObjectStoreConfiguration {
|
||||
object_store: ObjStoreOpt::Google,
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(Some(ObjStoreOpt::S3), Some(bucket), _) => {
|
||||
info!("Using S3 bucket {} for storage", bucket);
|
||||
// rusoto::Region's default takes the value from the AWS_DEFAULT_REGION env var.
|
||||
ObjectStore::new_amazon_s3(AmazonS3::new(Default::default(), bucket))
|
||||
}
|
||||
(Some(ObjStoreOpt::S3), None, _) => {
|
||||
return InvalidCloudObjectStoreConfiguration {
|
||||
object_store: ObjStoreOpt::S3,
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(Some(ObjStoreOpt::File), _, Some(ref db_dir)) => {
|
||||
info!("Using local dir {:?} for storage", db_dir);
|
||||
fs::create_dir_all(db_dir).context(CreatingDatabaseDirectory { path: db_dir })?;
|
||||
ObjectStore::new_file(object_store::disk::File::new(&db_dir))
|
||||
}
|
||||
(Some(ObjStoreOpt::File), _, None) => {
|
||||
return InvalidFileObjectStoreConfiguration.fail();
|
||||
}
|
||||
(Some(ObjStoreOpt::Azure), Some(_bucket), _) => {
|
||||
unimplemented!();
|
||||
}
|
||||
(Some(ObjStoreOpt::Azure), None, _) => {
|
||||
return InvalidCloudObjectStoreConfiguration {
|
||||
object_store: ObjStoreOpt::Azure,
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(Some(ObjStoreOpt::Memory), _, _) | (None, _, _) => {
|
||||
warn!("NO PERSISTENCE: using memory for object storage");
|
||||
ObjectStore::new_in_memory(object_store::memory::InMemory::new())
|
||||
}
|
||||
};
|
||||
let object_storage = Arc::new(object_store);
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
//! This module contains a partial implementation of the /v2 HTTP api
|
||||
//! routes for InfluxDB IOx.
|
||||
//! This module contains the HTTP api for InfluxDB IOx, including a
|
||||
//! partial implementation of the /v2 HTTP api routes from InfluxDB
|
||||
//! for compatibility.
|
||||
//!
|
||||
//! Note that these routes are designed to be just helpers for now,
|
||||
//! and "close enough" to the real /v2 api to be able to test InfluxDB IOx
|
||||
|
@ -10,7 +11,7 @@
|
|||
//! database names and may remove this quasi /v2 API.
|
||||
|
||||
// Influx crates
|
||||
use arrow_deps::{arrow, datafusion::physical_plan::collect};
|
||||
use arrow_deps::datafusion::physical_plan::collect;
|
||||
use data_types::{
|
||||
database_rules::DatabaseRules,
|
||||
http::{ListDatabasesResponse, WalMetadataQuery},
|
||||
|
@ -25,7 +26,7 @@ use server::{ConnectionManager, Server as AppServer};
|
|||
// External crates
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use futures::{self, StreamExt};
|
||||
use http::header::CONTENT_ENCODING;
|
||||
use http::header::{CONTENT_ENCODING, CONTENT_TYPE};
|
||||
use hyper::{Body, Method, Request, Response, StatusCode};
|
||||
use routerify::{prelude::*, Middleware, RequestInfo, Router, RouterError, RouterService};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -35,6 +36,9 @@ use tracing::{debug, error, info};
|
|||
use data_types::http::WalMetadataResponse;
|
||||
use std::{fmt::Debug, str, sync::Arc};
|
||||
|
||||
mod format;
|
||||
use format::QueryOutputFormat;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum ApplicationError {
|
||||
// Internal (unexpected) errors
|
||||
|
@ -86,7 +90,9 @@ pub enum ApplicationError {
|
|||
#[snafu(display("Expected query string in request, but none was provided"))]
|
||||
ExpectedQueryString {},
|
||||
|
||||
#[snafu(display("Invalid query string '{}': {}", query_string, source))]
|
||||
/// Error for when we could not parse the http query uri (e.g.
|
||||
/// `?foo=bar&bar=baz)`
|
||||
#[snafu(display("Invalid query string in HTTP URI '{}': {}", query_string, source))]
|
||||
InvalidQueryString {
|
||||
query_string: String,
|
||||
source: serde_urlencoded::de::Error,
|
||||
|
@ -151,6 +157,21 @@ pub enum ApplicationError {
|
|||
|
||||
#[snafu(display("Database {} does not have a WAL", name))]
|
||||
WALNotFound { name: String },
|
||||
|
||||
#[snafu(display("Internal error creating HTTP response: {}", source))]
|
||||
CreatingResponse { source: http::Error },
|
||||
|
||||
#[snafu(display(
|
||||
"Error formatting results of SQL query '{}' using '{:?}': {}",
|
||||
q,
|
||||
format,
|
||||
source
|
||||
))]
|
||||
FormattingResult {
|
||||
q: String,
|
||||
format: QueryOutputFormat,
|
||||
source: format::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ApplicationError {
|
||||
|
@ -181,6 +202,8 @@ impl ApplicationError {
|
|||
Self::DatabaseNameError { .. } => self.bad_request(),
|
||||
Self::DatabaseNotFound { .. } => self.not_found(),
|
||||
Self::WALNotFound { .. } => self.not_found(),
|
||||
Self::CreatingResponse { .. } => self.internal_error(),
|
||||
Self::FormattingResult { .. } => self.internal_error(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -259,10 +282,11 @@ where
|
|||
})) // this endpoint is for API backward compatibility with InfluxDB 2.x
|
||||
.post("/api/v2/write", write::<M>)
|
||||
.get("/ping", ping)
|
||||
.get("/api/v2/read", read::<M>)
|
||||
.get("/health", health)
|
||||
.get("/iox/api/v1/databases", list_databases::<M>)
|
||||
.put("/iox/api/v1/databases/:name", create_database::<M>)
|
||||
.get("/iox/api/v1/databases/:name", get_database::<M>)
|
||||
.get("/iox/api/v1/databases/:name/query", query::<M>)
|
||||
.get("/iox/api/v1/databases/:name/wal/meta", get_wal_meta::<M>)
|
||||
.put("/iox/api/v1/id", set_writer::<M>)
|
||||
.get("/iox/api/v1/id", get_writer::<M>)
|
||||
|
@ -406,53 +430,67 @@ where
|
|||
.unwrap())
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
/// Body of the request to the /read endpoint
|
||||
struct ReadInfo {
|
||||
org: String,
|
||||
bucket: String,
|
||||
// TODO This is currently a "SQL" request -- should be updated to conform
|
||||
// to the V2 API for reading (using timestamps, etc).
|
||||
sql_query: String,
|
||||
#[derive(Deserialize, Debug, PartialEq)]
|
||||
/// Parsed URI Parameters of the request to the .../query endpoint
|
||||
struct QueryParams {
|
||||
q: String,
|
||||
#[serde(default)]
|
||||
format: QueryOutputFormat,
|
||||
}
|
||||
|
||||
// TODO: figure out how to stream read results out rather than rendering the
|
||||
// whole thing in mem
|
||||
#[tracing::instrument(level = "debug")]
|
||||
async fn read<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
||||
async fn query<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
||||
req: Request<Body>,
|
||||
) -> Result<Response<Body>, ApplicationError> {
|
||||
let server = Arc::clone(&req.data::<Arc<AppServer<M>>>().expect("server state"));
|
||||
let query = req.uri().query().context(ExpectedQueryString {})?;
|
||||
|
||||
let read_info: ReadInfo = serde_urlencoded::from_str(query).context(InvalidQueryString {
|
||||
query_string: query,
|
||||
})?;
|
||||
let uri_query = req.uri().query().context(ExpectedQueryString {})?;
|
||||
|
||||
let QueryParams { q, format } =
|
||||
serde_urlencoded::from_str(uri_query).context(InvalidQueryString {
|
||||
query_string: uri_query,
|
||||
})?;
|
||||
|
||||
let db_name_str = req
|
||||
.param("name")
|
||||
.expect("db name must have been set by routerify")
|
||||
.clone();
|
||||
|
||||
let db_name = DatabaseName::new(&db_name_str).context(DatabaseNameError)?;
|
||||
debug!(uri = ?req.uri(), %q, ?format, %db_name, "running SQL query");
|
||||
|
||||
let db = server
|
||||
.db(&db_name)
|
||||
.await
|
||||
.context(DatabaseNotFound { name: &db_name_str })?;
|
||||
|
||||
let planner = SQLQueryPlanner::default();
|
||||
let executor = server.executor();
|
||||
|
||||
let db_name = org_and_bucket_to_database(&read_info.org, &read_info.bucket)
|
||||
.context(BucketMappingError)?;
|
||||
|
||||
let db = server.db(&db_name).await.context(BucketNotFound {
|
||||
org: read_info.org.clone(),
|
||||
bucket: read_info.bucket.clone(),
|
||||
})?;
|
||||
|
||||
let physical_plan = planner
|
||||
.query(db.as_ref(), &read_info.sql_query, executor.as_ref())
|
||||
.query(db.as_ref(), &q, executor.as_ref())
|
||||
.await
|
||||
.context(PlanningSQLQuery { query })?;
|
||||
.context(PlanningSQLQuery { query: &q })?;
|
||||
|
||||
// TODO: stream read results out rather than rendering the
|
||||
// whole thing in mem
|
||||
let batches = collect(physical_plan)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(Query { db_name })?;
|
||||
|
||||
let results = arrow::util::pretty::pretty_format_batches(&batches).unwrap();
|
||||
let results = format
|
||||
.format(&batches)
|
||||
.context(FormattingResult { q, format })?;
|
||||
|
||||
Ok(Response::new(Body::from(results.into_bytes())))
|
||||
let body = Body::from(results.into_bytes());
|
||||
|
||||
let response = Response::builder()
|
||||
.header(CONTENT_TYPE, format.content_type())
|
||||
.body(body)
|
||||
.context(CreatingResponse)?;
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug")]
|
||||
|
@ -637,11 +675,17 @@ async fn get_writer<M: ConnectionManager + Send + Sync + Debug + 'static>(
|
|||
|
||||
// Route to test that the server is alive
|
||||
#[tracing::instrument(level = "debug")]
|
||||
async fn ping(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
|
||||
async fn ping(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
|
||||
let response_body = "PONG";
|
||||
Ok(Response::new(Body::from(response_body.to_string())))
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug")]
|
||||
async fn health(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
|
||||
let response_body = "OK";
|
||||
Ok(Response::new(Body::from(response_body.to_string())))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
/// Arguments in the query string of the request to /partitions
|
||||
struct DatabaseInfo {
|
||||
|
@ -749,7 +793,6 @@ mod tests {
|
|||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
|
||||
use arrow_deps::{arrow::record_batch::RecordBatch, assert_table_eq};
|
||||
use http::header;
|
||||
use query::exec::Executor;
|
||||
use reqwest::{Client, Response};
|
||||
|
||||
|
@ -783,6 +826,22 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_health() -> Result<()> {
|
||||
let test_storage = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
|
||||
let client = Client::new();
|
||||
let response = client.get(&format!("{}/health", server_url)).send().await;
|
||||
|
||||
// Print the response so if the test fails, we have a log of what went wrong
|
||||
check_response("health", response, StatusCode::OK, "OK").await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write() -> Result<()> {
|
||||
let test_storage = Arc::new(AppServer::new(
|
||||
|
@ -833,6 +892,139 @@ mod tests {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Sets up a test database with some data for testing the query endpoint
|
||||
/// returns a client for communicting with the server, and the server
|
||||
/// endpoint
|
||||
async fn setup_test_data() -> (Client, String) {
|
||||
let test_storage: Arc<AppServer<ConnectionManagerImpl>> = Arc::new(AppServer::new(
|
||||
ConnectionManagerImpl {},
|
||||
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
|
||||
));
|
||||
test_storage.set_id(1);
|
||||
test_storage
|
||||
.create_database("MyOrg_MyBucket", DatabaseRules::new())
|
||||
.await
|
||||
.unwrap();
|
||||
let server_url = test_server(Arc::clone(&test_storage));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let lp_data = "h2o_temperature,location=santa_monica,state=CA surface_degrees=65.2,bottom_degrees=50.4 1568756160";
|
||||
|
||||
// send write data
|
||||
let bucket_name = "MyBucket";
|
||||
let org_name = "MyOrg";
|
||||
let response = client
|
||||
.post(&format!(
|
||||
"{}/api/v2/write?bucket={}&org={}",
|
||||
server_url, bucket_name, org_name
|
||||
))
|
||||
.body(lp_data)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
check_response("write", response, StatusCode::NO_CONTENT, "").await;
|
||||
(client, server_url)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_pretty() -> Result<()> {
|
||||
let (client, server_url) = setup_test_data().await;
|
||||
|
||||
// send query data
|
||||
let response = client
|
||||
.get(&format!(
|
||||
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}",
|
||||
server_url, "select%20*%20from%20h2o_temperature"
|
||||
))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
assert_eq!(get_content_type(&response), "text/plain");
|
||||
|
||||
let res = "+----------------+--------------+-------+-----------------+------------+\n\
|
||||
| bottom_degrees | location | state | surface_degrees | time |\n\
|
||||
+----------------+--------------+-------+-----------------+------------+\n\
|
||||
| 50.4 | santa_monica | CA | 65.2 | 1568756160 |\n\
|
||||
+----------------+--------------+-------+-----------------+------------+\n";
|
||||
check_response("query", response, StatusCode::OK, res).await;
|
||||
|
||||
// same response is expected if we explicitly request 'format=pretty'
|
||||
let response = client
|
||||
.get(&format!(
|
||||
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=pretty",
|
||||
server_url, "select%20*%20from%20h2o_temperature"
|
||||
))
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(get_content_type(&response), "text/plain");
|
||||
|
||||
check_response("query", response, StatusCode::OK, res).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_csv() -> Result<()> {
|
||||
let (client, server_url) = setup_test_data().await;
|
||||
|
||||
// send query data
|
||||
let response = client
|
||||
.get(&format!(
|
||||
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=csv",
|
||||
server_url, "select%20*%20from%20h2o_temperature"
|
||||
))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
assert_eq!(get_content_type(&response), "text/csv");
|
||||
|
||||
let res = "bottom_degrees,location,state,surface_degrees,time\n\
|
||||
50.4,santa_monica,CA,65.2,1568756160\n";
|
||||
check_response("query", response, StatusCode::OK, res).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_json() -> Result<()> {
|
||||
let (client, server_url) = setup_test_data().await;
|
||||
|
||||
// send a second line of data to demontrate how that works
|
||||
let lp_data = "h2o_temperature,location=Boston,state=MA surface_degrees=50.2 1568756160";
|
||||
|
||||
// send write data
|
||||
let bucket_name = "MyBucket";
|
||||
let org_name = "MyOrg";
|
||||
let response = client
|
||||
.post(&format!(
|
||||
"{}/api/v2/write?bucket={}&org={}",
|
||||
server_url, bucket_name, org_name
|
||||
))
|
||||
.body(lp_data)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
check_response("write", response, StatusCode::NO_CONTENT, "").await;
|
||||
|
||||
// send query data
|
||||
let response = client
|
||||
.get(&format!(
|
||||
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=json",
|
||||
server_url, "select%20*%20from%20h2o_temperature"
|
||||
))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
assert_eq!(get_content_type(&response), "application/json");
|
||||
|
||||
// Note two json records: one record on each line
|
||||
let res = r#"[{"bottom_degrees":50.4,"location":"santa_monica","state":"CA","surface_degrees":65.2,"time":1568756160},{"location":"Boston","state":"MA","surface_degrees":50.2,"time":1568756160}]"#;
|
||||
check_response("query", response, StatusCode::OK, res).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn gzip_str(s: &str) -> Vec<u8> {
|
||||
use flate2::{write::GzEncoder, Compression};
|
||||
use std::io::Write;
|
||||
|
@ -865,7 +1057,7 @@ mod tests {
|
|||
"{}/api/v2/write?bucket={}&org={}",
|
||||
server_url, bucket_name, org_name
|
||||
))
|
||||
.header(header::CONTENT_ENCODING, "gzip")
|
||||
.header(CONTENT_ENCODING, "gzip")
|
||||
.body(gzip_str(lp_data))
|
||||
.send()
|
||||
.await;
|
||||
|
@ -1119,6 +1311,19 @@ mod tests {
|
|||
assert_eq!(r4.segments.len(), 0);
|
||||
}
|
||||
|
||||
fn get_content_type(response: &Result<Response, reqwest::Error>) -> String {
|
||||
if let Ok(response) = response {
|
||||
response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.map(|v| v.to_str().unwrap())
|
||||
.unwrap_or("")
|
||||
.to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// checks a http response against expected results
|
||||
async fn check_response(
|
||||
description: &str,
|
||||
|
@ -1191,4 +1396,59 @@ mod tests {
|
|||
|
||||
collect(physical_plan).await.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_params_format_default() {
|
||||
// default to pretty format when not otherwise specified
|
||||
assert_eq!(
|
||||
serde_urlencoded::from_str("q=foo"),
|
||||
Ok(QueryParams {
|
||||
q: "foo".to_string(),
|
||||
format: QueryOutputFormat::Pretty
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_params_format_pretty() {
|
||||
assert_eq!(
|
||||
serde_urlencoded::from_str("q=foo&format=pretty"),
|
||||
Ok(QueryParams {
|
||||
q: "foo".to_string(),
|
||||
format: QueryOutputFormat::Pretty
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_params_format_csv() {
|
||||
assert_eq!(
|
||||
serde_urlencoded::from_str("q=foo&format=csv"),
|
||||
Ok(QueryParams {
|
||||
q: "foo".to_string(),
|
||||
format: QueryOutputFormat::CSV
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_params_format_json() {
|
||||
assert_eq!(
|
||||
serde_urlencoded::from_str("q=foo&format=json"),
|
||||
Ok(QueryParams {
|
||||
q: "foo".to_string(),
|
||||
format: QueryOutputFormat::JSON
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_params_bad_format() {
|
||||
assert_eq!(
|
||||
serde_urlencoded::from_str::<QueryParams>("q=foo&format=jsob")
|
||||
.unwrap_err()
|
||||
.to_string(),
|
||||
"unknown variant `jsob`, expected one of `pretty`, `csv`, `json`"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,242 @@
|
|||
//! Output formatting utilities for query endpoint
|
||||
|
||||
use serde::Deserialize;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::io::Write;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use arrow_deps::arrow::{
|
||||
self, csv::WriterBuilder, error::ArrowError, json::writer::record_batches_to_json_rows,
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Arrow pretty printing error: {}", source))]
|
||||
PrettyArrow { source: ArrowError },
|
||||
|
||||
#[snafu(display("Arrow csv printing error: {}", source))]
|
||||
CsvArrow { source: ArrowError },
|
||||
|
||||
#[snafu(display("Arrow json printing error: {}", source))]
|
||||
JsonArrow { source: ArrowError },
|
||||
|
||||
#[snafu(display("Json conversion error: {}", source))]
|
||||
JsonConversion { source: serde_json::Error },
|
||||
|
||||
#[snafu(display("IO error during Json conversion: {}", source))]
|
||||
JsonWrite { source: std::io::Error },
|
||||
|
||||
#[snafu(display("Error converting CSV output to UTF-8: {}", source))]
|
||||
CsvUtf8 { source: std::string::FromUtf8Error },
|
||||
|
||||
#[snafu(display("Error converting JSON output to UTF-8: {}", source))]
|
||||
JsonUtf8 { source: std::string::FromUtf8Error },
|
||||
}
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
|
||||
/// Requested output format for the query endpoint
|
||||
pub enum QueryOutputFormat {
|
||||
/// Arrow pretty printer format (default)
|
||||
#[serde(rename = "pretty")]
|
||||
Pretty,
|
||||
/// Comma separated values
|
||||
#[serde(rename = "csv")]
|
||||
CSV,
|
||||
/// Arrow JSON format
|
||||
#[serde(rename = "json")]
|
||||
JSON,
|
||||
}
|
||||
|
||||
impl Default for QueryOutputFormat {
|
||||
fn default() -> Self {
|
||||
Self::Pretty
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryOutputFormat {
|
||||
/// Return the content type of the relevant format
|
||||
pub fn content_type(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Pretty => "text/plain",
|
||||
Self::CSV => "text/csv",
|
||||
Self::JSON => "application/json",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryOutputFormat {
|
||||
/// Format the [`RecordBatch`]es into a String in one of the
|
||||
/// following formats:
|
||||
///
|
||||
/// Pretty:
|
||||
/// ```text
|
||||
/// +----------------+--------------+-------+-----------------+------------+
|
||||
/// | bottom_degrees | location | state | surface_degrees | time |
|
||||
/// +----------------+--------------+-------+-----------------+------------+
|
||||
/// | 50.4 | santa_monica | CA | 65.2 | 1568756160 |
|
||||
/// +----------------+--------------+-------+-----------------+------------+
|
||||
/// ```
|
||||
///
|
||||
/// CSV:
|
||||
/// ```text
|
||||
/// bottom_degrees,location,state,surface_degrees,time
|
||||
/// 50.4,santa_monica,CA,65.2,1568756160
|
||||
/// ```
|
||||
///
|
||||
/// JSON:
|
||||
///
|
||||
/// Example (newline + whitespace added for clarity):
|
||||
/// ```text
|
||||
/// [
|
||||
/// {"bottom_degrees":50.4,"location":"santa_monica","state":"CA","surface_degrees":65.2,"time":1568756160},
|
||||
/// {"location":"Boston","state":"MA","surface_degrees":50.2,"time":1568756160}
|
||||
/// ]
|
||||
/// ```
|
||||
pub fn format(&self, batches: &[RecordBatch]) -> Result<String> {
|
||||
match self {
|
||||
Self::Pretty => batches_to_pretty(&batches),
|
||||
Self::CSV => batches_to_csv(&batches),
|
||||
Self::JSON => batches_to_json(&batches),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn batches_to_pretty(batches: &[RecordBatch]) -> Result<String> {
|
||||
arrow::util::pretty::pretty_format_batches(batches).context(PrettyArrow)
|
||||
}
|
||||
|
||||
fn batches_to_csv(batches: &[RecordBatch]) -> Result<String> {
|
||||
let mut bytes = vec![];
|
||||
|
||||
{
|
||||
let mut writer = WriterBuilder::new().has_headers(true).build(&mut bytes);
|
||||
|
||||
for batch in batches {
|
||||
writer.write(batch).context(CsvArrow)?;
|
||||
}
|
||||
}
|
||||
let csv = String::from_utf8(bytes).context(CsvUtf8)?;
|
||||
Ok(csv)
|
||||
}
|
||||
|
||||
fn batches_to_json(batches: &[RecordBatch]) -> Result<String> {
|
||||
let mut bytes = vec![];
|
||||
|
||||
{
|
||||
let mut writer = JsonArrayWriter::new(&mut bytes);
|
||||
writer.write_batches(batches)?;
|
||||
writer.finish()?;
|
||||
}
|
||||
|
||||
let json = String::from_utf8(bytes).context(JsonUtf8)?;
|
||||
|
||||
Ok(json)
|
||||
}
|
||||
|
||||
/// Writes out well formed JSON arays in a streaming fashion
|
||||
///
|
||||
/// [{"foo": "bar"}, {"foo": "baz"}]
|
||||
///
|
||||
/// This is based on the arrow JSON writer (json::writer::Writer)
|
||||
///
|
||||
/// TODO contribute this back to arrow: https://issues.apache.org/jira/browse/ARROW-11773
|
||||
struct JsonArrayWriter<W>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
started: bool,
|
||||
finished: bool,
|
||||
writer: W,
|
||||
}
|
||||
|
||||
impl<W> JsonArrayWriter<W>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
fn new(writer: W) -> Self {
|
||||
Self {
|
||||
writer,
|
||||
started: false,
|
||||
finished: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume self and return the inner writer
|
||||
#[cfg(test)]
|
||||
pub fn into_inner(self) -> W {
|
||||
self.writer
|
||||
}
|
||||
|
||||
pub fn write_row(&mut self, row: &Value) -> Result<()> {
|
||||
if !self.started {
|
||||
self.writer.write_all(b"[").context(JsonWrite)?;
|
||||
self.started = true;
|
||||
} else {
|
||||
self.writer.write_all(b",").context(JsonWrite)?;
|
||||
}
|
||||
self.writer
|
||||
.write_all(&serde_json::to_vec(row).context(JsonConversion)?)
|
||||
.context(JsonWrite)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_batches(&mut self, batches: &[RecordBatch]) -> Result<()> {
|
||||
for row in record_batches_to_json_rows(batches) {
|
||||
self.write_row(&Value::Object(row))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// tell the writer there are is no more data to come so it can
|
||||
/// write the final `'['`
|
||||
pub fn finish(&mut self) -> Result<()> {
|
||||
if self.started && !self.finished {
|
||||
self.writer.write_all(b"]").context(JsonWrite)?;
|
||||
self.finished = true;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn json_writer_empty() {
|
||||
let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
|
||||
writer.finish().unwrap();
|
||||
assert_eq!(String::from_utf8(writer.into_inner()).unwrap(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_writer_one_row() {
|
||||
let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
|
||||
let v = json!({ "an": "object" });
|
||||
writer.write_row(&v).unwrap();
|
||||
writer.finish().unwrap();
|
||||
assert_eq!(
|
||||
String::from_utf8(writer.into_inner()).unwrap(),
|
||||
r#"[{"an":"object"}]"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_writer_two_rows() {
|
||||
let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
|
||||
let v = json!({ "an": "object" });
|
||||
writer.write_row(&v).unwrap();
|
||||
let v = json!({ "another": "object" });
|
||||
writer.write_row(&v).unwrap();
|
||||
writer.finish().unwrap();
|
||||
assert_eq!(
|
||||
String::from_utf8(writer.into_inner()).unwrap(),
|
||||
r#"[{"an":"object"},{"another":"object"}]"#
|
||||
);
|
||||
}
|
||||
}
|
|
@ -30,7 +30,22 @@ where
|
|||
{
|
||||
let stream = TcpListenerStream::new(socket);
|
||||
|
||||
let (mut health_reporter, health_service) = tonic_health::server::health_reporter();
|
||||
|
||||
let services = [
|
||||
generated_types::STORAGE_SERVICE,
|
||||
generated_types::IOX_TESTING_SERVICE,
|
||||
generated_types::ARROW_SERVICE,
|
||||
];
|
||||
|
||||
for service in &services {
|
||||
health_reporter
|
||||
.set_service_status(service, tonic_health::ServingStatus::Serving)
|
||||
.await;
|
||||
}
|
||||
|
||||
tonic::transport::Server::builder()
|
||||
.add_service(health_service)
|
||||
.add_service(testing::make_server())
|
||||
.add_service(storage::make_server(Arc::clone(&server)))
|
||||
.add_service(flight::make_server(server))
|
||||
|
|
|
@ -852,7 +852,7 @@ async fn tag_values_impl<T>(
|
|||
rpc_predicate: Option<Predicate>,
|
||||
) -> Result<StringValuesResponse>
|
||||
where
|
||||
T: DatabaseStore,
|
||||
T: DatabaseStore + 'static,
|
||||
{
|
||||
let rpc_predicate_string = format!("{:?}", rpc_predicate);
|
||||
|
||||
|
@ -873,10 +873,12 @@ where
|
|||
.await
|
||||
.context(DatabaseNotFound { db_name })?;
|
||||
|
||||
let planner = InfluxRPCPlanner::new();
|
||||
|
||||
let executor = db_store.executor();
|
||||
|
||||
let tag_value_plan = db
|
||||
.column_values(tag_name, predicate)
|
||||
let tag_value_plan = planner
|
||||
.tag_values(db.as_ref(), tag_name, predicate)
|
||||
.await
|
||||
.map_err(|e| Box::new(e) as _)
|
||||
.context(ListingTagValues { db_name, tag_name })?;
|
||||
|
@ -1107,11 +1109,11 @@ mod tests {
|
|||
use arrow_deps::datafusion::logical_plan::{col, lit, Expr};
|
||||
use panic_logging::SendPanicsToTracing;
|
||||
use query::{
|
||||
exec::SeriesSetPlans,
|
||||
group_by::{Aggregate as QueryAggregate, WindowDuration as QueryWindowDuration},
|
||||
plan::seriesset::SeriesSetPlans,
|
||||
test::QueryGroupsRequest,
|
||||
test::TestDatabaseStore,
|
||||
test::{ColumnValuesRequest, QuerySeriesRequest, TestChunk},
|
||||
test::{QuerySeriesRequest, TestChunk},
|
||||
};
|
||||
use std::{
|
||||
convert::TryFrom,
|
||||
|
@ -1478,11 +1480,18 @@ mod tests {
|
|||
let db_info = OrgAndBucket::new(123, 456);
|
||||
let partition_id = 1;
|
||||
|
||||
let test_db = fixture
|
||||
// Add a chunk with a field
|
||||
let chunk = TestChunk::new(0)
|
||||
.with_time_column("TheMeasurement")
|
||||
.with_tag_column("TheMeasurement", "state")
|
||||
.with_one_row_of_null_data("TheMeasurement");
|
||||
|
||||
fixture
|
||||
.test_storage
|
||||
.db_or_create(&db_info.db_name)
|
||||
.await
|
||||
.expect("creating test database");
|
||||
.unwrap()
|
||||
.add_chunk("my_partition_key", Arc::new(chunk));
|
||||
|
||||
let source = Some(StorageClientWrapper::read_source(
|
||||
db_info.org_id,
|
||||
|
@ -1490,24 +1499,35 @@ mod tests {
|
|||
partition_id,
|
||||
));
|
||||
|
||||
let tag_values = vec!["k1", "k2", "k3", "k4"];
|
||||
let request = TagValuesRequest {
|
||||
tags_source: source.clone(),
|
||||
range: make_timestamp_range(150, 200),
|
||||
range: make_timestamp_range(150, 2000),
|
||||
predicate: make_state_ma_predicate(),
|
||||
tag_key: "the_tag_key".into(),
|
||||
tag_key: "state".into(),
|
||||
};
|
||||
|
||||
let expected_request = ColumnValuesRequest {
|
||||
predicate: "Predicate { exprs: [#state Eq Utf8(\"MA\")] range: TimestampRange { start: 150, end: 200 }}".into(),
|
||||
column_name: "the_tag_key".into(),
|
||||
};
|
||||
|
||||
test_db.set_column_values(to_string_vec(&tag_values));
|
||||
|
||||
let actual_tag_values = fixture.storage_client.tag_values(request).await.unwrap();
|
||||
assert_eq!(actual_tag_values, tag_values,);
|
||||
assert_eq!(test_db.get_column_values_request(), Some(expected_request),);
|
||||
assert_eq!(actual_tag_values, vec!["MA"]);
|
||||
}
|
||||
|
||||
/// test the plumbing of the RPC layer for tag_values
|
||||
///
|
||||
/// For the special case of
|
||||
///
|
||||
/// tag_key = _measurement means listing all measurement names
|
||||
#[tokio::test]
|
||||
async fn test_storage_rpc_tag_values_with_measurement() {
|
||||
// Start a test gRPC server on a randomally allocated port
|
||||
let mut fixture = Fixture::new().await.expect("Connecting to test server");
|
||||
|
||||
let db_info = OrgAndBucket::new(123, 456);
|
||||
let partition_id = 1;
|
||||
|
||||
let source = Some(StorageClientWrapper::read_source(
|
||||
db_info.org_id,
|
||||
db_info.bucket_id,
|
||||
partition_id,
|
||||
));
|
||||
|
||||
// ---
|
||||
// test tag_key = _measurement means listing all measurement names
|
||||
|
@ -1590,11 +1610,14 @@ mod tests {
|
|||
let db_info = OrgAndBucket::new(123, 456);
|
||||
let partition_id = 1;
|
||||
|
||||
let test_db = fixture
|
||||
let chunk = TestChunk::new(0).with_error("Sugar we are going down");
|
||||
|
||||
fixture
|
||||
.test_storage
|
||||
.db_or_create(&db_info.db_name)
|
||||
.await
|
||||
.expect("creating test database");
|
||||
.unwrap()
|
||||
.add_chunk("my_partition_key", Arc::new(chunk));
|
||||
|
||||
let source = Some(StorageClientWrapper::read_source(
|
||||
db_info.org_id,
|
||||
|
@ -1612,12 +1635,13 @@ mod tests {
|
|||
tag_key: "the_tag_key".into(),
|
||||
};
|
||||
|
||||
// Note we don't set the column_names on the test database, so we expect an
|
||||
// error
|
||||
let response = fixture.storage_client.tag_values(request).await;
|
||||
assert!(response.is_err());
|
||||
let response_string = format!("{:?}", response);
|
||||
let expected_error = "No saved column_values in TestDatabase";
|
||||
let response_string = fixture
|
||||
.storage_client
|
||||
.tag_values(request)
|
||||
.await
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
let expected_error = "Sugar we are going down";
|
||||
assert!(
|
||||
response_string.contains(expected_error),
|
||||
"'{}' did not contain expected content '{}'",
|
||||
|
@ -1625,12 +1649,6 @@ mod tests {
|
|||
expected_error
|
||||
);
|
||||
|
||||
let expected_request = Some(ColumnValuesRequest {
|
||||
predicate: "Predicate {}".into(),
|
||||
column_name: "the_tag_key".into(),
|
||||
});
|
||||
assert_eq!(test_db.get_column_values_request(), expected_request);
|
||||
|
||||
// ---
|
||||
// test error with non utf8 value
|
||||
// ---
|
||||
|
@ -1641,9 +1659,12 @@ mod tests {
|
|||
tag_key: [0, 255].into(), // this is not a valid UTF-8 string
|
||||
};
|
||||
|
||||
let response = fixture.storage_client.tag_values(request).await;
|
||||
assert!(response.is_err());
|
||||
let response_string = format!("{:?}", response);
|
||||
let response_string = fixture
|
||||
.storage_client
|
||||
.tag_values(request)
|
||||
.await
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
let expected_error = "Error converting tag_key to UTF-8 in tag_values request";
|
||||
assert!(
|
||||
response_string.contains(expected_error),
|
||||
|
@ -1653,22 +1674,27 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// test the plumbing of the RPC layer for measurement_tag_values--
|
||||
/// specifically that the right parameters are passed into the Database
|
||||
/// interface and that the returned values are sent back via gRPC.
|
||||
/// test the plumbing of the RPC layer for measurement_tag_values
|
||||
#[tokio::test]
|
||||
async fn test_storage_rpc_measurement_tag_values() {
|
||||
// Start a test gRPC server on a randomally allocated port
|
||||
test_helpers::maybe_start_logging();
|
||||
let mut fixture = Fixture::new().await.expect("Connecting to test server");
|
||||
|
||||
let db_info = OrgAndBucket::new(123, 456);
|
||||
let partition_id = 1;
|
||||
|
||||
let test_db = fixture
|
||||
// Add a chunk with a field
|
||||
let chunk = TestChunk::new(0)
|
||||
.with_time_column("TheMeasurement")
|
||||
.with_tag_column("TheMeasurement", "state")
|
||||
.with_one_row_of_null_data("TheMeasurement");
|
||||
|
||||
fixture
|
||||
.test_storage
|
||||
.db_or_create(&db_info.db_name)
|
||||
.await
|
||||
.expect("creating test database");
|
||||
.unwrap()
|
||||
.add_chunk("my_partition_key", Arc::new(chunk));
|
||||
|
||||
let source = Some(StorageClientWrapper::read_source(
|
||||
db_info.org_id,
|
||||
|
@ -1676,22 +1702,14 @@ mod tests {
|
|||
partition_id,
|
||||
));
|
||||
|
||||
let tag_values = vec!["k1", "k2", "k3", "k4"];
|
||||
let request = MeasurementTagValuesRequest {
|
||||
measurement: "m4".into(),
|
||||
measurement: "TheMeasurement".into(),
|
||||
source: source.clone(),
|
||||
range: make_timestamp_range(150, 200),
|
||||
range: make_timestamp_range(150, 2000),
|
||||
predicate: make_state_ma_predicate(),
|
||||
tag_key: "the_tag_key".into(),
|
||||
tag_key: "state".into(),
|
||||
};
|
||||
|
||||
let expected_request = ColumnValuesRequest {
|
||||
predicate: "Predicate { table_names: m4 exprs: [#state Eq Utf8(\"MA\")] range: TimestampRange { start: 150, end: 200 }}".into(),
|
||||
column_name: "the_tag_key".into(),
|
||||
};
|
||||
|
||||
test_db.set_column_values(to_string_vec(&tag_values));
|
||||
|
||||
let actual_tag_values = fixture
|
||||
.storage_client
|
||||
.measurement_tag_values(request)
|
||||
|
@ -1699,15 +1717,34 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
actual_tag_values, tag_values,
|
||||
actual_tag_values,
|
||||
vec!["MA"],
|
||||
"unexpected tag values while getting tag values",
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
test_db.get_column_values_request(),
|
||||
Some(expected_request),
|
||||
"unexpected request while getting tag values",
|
||||
);
|
||||
#[tokio::test]
|
||||
async fn test_storage_rpc_measurement_tag_values_error() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let mut fixture = Fixture::new().await.expect("Connecting to test server");
|
||||
|
||||
let db_info = OrgAndBucket::new(123, 456);
|
||||
let partition_id = 1;
|
||||
|
||||
let chunk = TestChunk::new(0).with_error("Sugar we are going down");
|
||||
|
||||
fixture
|
||||
.test_storage
|
||||
.db_or_create(&db_info.db_name)
|
||||
.await
|
||||
.unwrap()
|
||||
.add_chunk("my_partition_key", Arc::new(chunk));
|
||||
|
||||
let source = Some(StorageClientWrapper::read_source(
|
||||
db_info.org_id,
|
||||
db_info.bucket_id,
|
||||
partition_id,
|
||||
));
|
||||
|
||||
// ---
|
||||
// test error
|
||||
|
@ -1722,22 +1759,19 @@ mod tests {
|
|||
|
||||
// Note we don't set the column_names on the test database, so we expect an
|
||||
// error
|
||||
let response = fixture.storage_client.measurement_tag_values(request).await;
|
||||
assert!(response.is_err());
|
||||
let response_string = format!("{:?}", response);
|
||||
let expected_error = "No saved column_values in TestDatabase";
|
||||
let response_string = fixture
|
||||
.storage_client
|
||||
.measurement_tag_values(request)
|
||||
.await
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
let expected_error = "Sugar we are going down";
|
||||
assert!(
|
||||
response_string.contains(expected_error),
|
||||
"'{}' did not contain expected content '{}'",
|
||||
response_string,
|
||||
expected_error
|
||||
);
|
||||
|
||||
let expected_request = Some(ColumnValuesRequest {
|
||||
predicate: "Predicate { table_names: m5}".into(),
|
||||
column_name: "the_tag_key".into(),
|
||||
});
|
||||
assert_eq!(test_db.get_column_values_request(), expected_request);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
@ -48,7 +48,7 @@ const HTTP_BIND_ADDR: &str = http_bind_addr!();
|
|||
const GRPC_BIND_ADDR: &str = grpc_bind_addr!();
|
||||
|
||||
const HTTP_BASE: &str = concat!("http://", http_bind_addr!());
|
||||
const API_BASE: &str = concat!("http://", http_bind_addr!(), "/api/v2");
|
||||
const IOX_API_V1_BASE: &str = concat!("http://", http_bind_addr!(), "/iox/api/v1");
|
||||
const GRPC_URL_BASE: &str = concat!("http://", grpc_bind_addr!(), "/");
|
||||
|
||||
const TOKEN: &str = "InfluxDB IOx doesn't have authentication yet";
|
||||
|
@ -377,6 +377,27 @@ impl TestServer {
|
|||
// different ports but both need to be up for the test to run
|
||||
let try_grpc_connect = async {
|
||||
let mut interval = tokio::time::interval(Duration::from_millis(500));
|
||||
|
||||
loop {
|
||||
match influxdb_iox_client::health::Client::connect(GRPC_URL_BASE).await {
|
||||
Ok(mut client) => {
|
||||
println!("Successfully connected to server");
|
||||
|
||||
match client.check_storage().await {
|
||||
Ok(_) => {
|
||||
println!("Storage service is running");
|
||||
break;
|
||||
}
|
||||
Err(e) => println!("Error checking storage service status: {}", e),
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Waiting for gRPC API to be up: {}", e);
|
||||
}
|
||||
}
|
||||
interval.tick().await;
|
||||
}
|
||||
|
||||
loop {
|
||||
match StorageClient::connect(GRPC_URL_BASE).await {
|
||||
Ok(storage_client) => {
|
||||
|
@ -387,7 +408,7 @@ impl TestServer {
|
|||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Waiting for gRPC server to be up: {}", e);
|
||||
println!("Failed to create storage client: {}", e)
|
||||
}
|
||||
}
|
||||
interval.tick().await;
|
||||
|
@ -396,7 +417,7 @@ impl TestServer {
|
|||
|
||||
let try_http_connect = async {
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!("{}/ping", HTTP_BASE);
|
||||
let url = format!("{}/health", HTTP_BASE);
|
||||
let mut interval = tokio::time::interval(Duration::from_millis(500));
|
||||
loop {
|
||||
match client.get(&url).send().await {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::{Scenario, API_BASE};
|
||||
use crate::{Scenario, IOX_API_V1_BASE};
|
||||
|
||||
pub async fn test(
|
||||
client: &reqwest::Client,
|
||||
|
@ -6,7 +6,7 @@ pub async fn test(
|
|||
sql_query: &str,
|
||||
expected_read_data: &[String],
|
||||
) {
|
||||
let text = read_data_as_sql(&client, "/read", scenario, sql_query).await;
|
||||
let text = read_data_as_sql(&client, scenario, sql_query).await;
|
||||
|
||||
assert_eq!(
|
||||
text, expected_read_data,
|
||||
|
@ -17,18 +17,15 @@ pub async fn test(
|
|||
|
||||
async fn read_data_as_sql(
|
||||
client: &reqwest::Client,
|
||||
path: &str,
|
||||
scenario: &Scenario,
|
||||
sql_query: &str,
|
||||
) -> Vec<String> {
|
||||
let url = format!("{}{}", API_BASE, path);
|
||||
let db_name = format!("{}_{}", scenario.org_id_str(), scenario.bucket_id_str());
|
||||
let path = format!("/databases/{}/query", db_name);
|
||||
let url = format!("{}{}", IOX_API_V1_BASE, path);
|
||||
let lines = client
|
||||
.get(&url)
|
||||
.query(&[
|
||||
("bucket", scenario.bucket_id_str()),
|
||||
("org", scenario.org_id_str()),
|
||||
("sql_query", sql_query),
|
||||
])
|
||||
.query(&[("q", sql_query)])
|
||||
.send()
|
||||
.await
|
||||
.unwrap()
|
||||
|
|
Loading…
Reference in New Issue