chore: merge main into branch

Merge branch 'main' into ntran/optimize_column_selection
pull/24376/head
Nga Tran 2021-02-26 15:29:43 -05:00
commit 18de3bdcab
57 changed files with 3558 additions and 910 deletions

View File

@ -86,7 +86,7 @@ jobs:
# out for parallel CI runs!
#
# To change the contents of the build container, modify docker/Dockerfile.ci
# To change the final release container, modify docker/Dockerfile.perf
# To change the final release container, modify docker/Dockerfile.iox
perf_image:
docker:
- image: quay.io/influxdb/rust:ci
@ -105,7 +105,7 @@ jobs:
echo "$QUAY_PASS" | docker login quay.io --username $QUAY_USER --password-stdin
- run: |
BRANCH=$(git rev-parse --abbrev-ref HEAD | tr '/' '.')
docker build -t quay.io/influxdb/fusion:$BRANCH -f docker/Dockerfile.perf .
docker build -t quay.io/influxdb/fusion:$BRANCH -f docker/Dockerfile.iox .
docker push quay.io/influxdb/fusion:$BRANCH
echo "export BRANCH=${BRANCH}" >> $BASH_ENV
- run:

4
.dockerignore Normal file
View File

@ -0,0 +1,4 @@
# Ignore everything
**
# Except
!target/release/influxdb_iox

View File

@ -71,7 +71,7 @@ jobs:
args: --workspace
lints:
name: Lints
name: Rust Lints
runs-on: ubuntu-latest
container:
image: quay.io/influxdb/rust:ci
@ -91,3 +91,13 @@ jobs:
with:
token: ${{ secrets.GITHUB_TOKEN }}
args: --all-targets --workspace -- -D warnings
protobuf:
name: Protobuf Lints
runs-on: ubuntu-latest
container:
image: bufbuild/buf
steps:
- uses: actions/checkout@v2
- name: Lint IOx protobuf
run: buf lint

213
Cargo.lock generated
View File

@ -101,7 +101,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "arrow"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
dependencies = [
"cfg_aliases",
"chrono",
@ -124,7 +124,7 @@ dependencies = [
[[package]]
name = "arrow-flight"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
dependencies = [
"arrow",
"bytes",
@ -411,9 +411,9 @@ dependencies = [
[[package]]
name = "bumpalo"
version = "3.6.0"
version = "3.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "099e596ef14349721d9016f6b80dd3419ea1bf289ab9b44df8e4dfd3a005d5d9"
checksum = "63396b8a4b9de3f4fdfb320ab6080762242f66a8ef174c49d8e19b674db4cdbe"
[[package]]
name = "byteorder"
@ -438,9 +438,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.0.66"
version = "1.0.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
dependencies = [
"jobserver",
]
@ -488,9 +488,9 @@ dependencies = [
[[package]]
name = "clang-sys"
version = "1.1.0"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cb92721cb37482245ed88428f72253ce422b3b4ee169c70a0642521bb5db4cc"
checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1"
dependencies = [
"glob",
"libc",
@ -669,9 +669,9 @@ dependencies = [
"cfg-if 1.0.0",
"crossbeam-channel 0.5.0",
"crossbeam-deque 0.8.0",
"crossbeam-epoch 0.9.1",
"crossbeam-epoch 0.9.2",
"crossbeam-queue 0.3.1",
"crossbeam-utils 0.8.1",
"crossbeam-utils 0.8.2",
]
[[package]]
@ -691,7 +691,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-utils 0.8.1",
"crossbeam-utils 0.8.2",
]
[[package]]
@ -712,8 +712,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-epoch 0.9.1",
"crossbeam-utils 0.8.1",
"crossbeam-epoch 0.9.2",
"crossbeam-utils 0.8.2",
]
[[package]]
@ -733,14 +733,14 @@ dependencies = [
[[package]]
name = "crossbeam-epoch"
version = "0.9.1"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
checksum = "d60ab4a8dba064f2fbb5aa270c28da5cf4bbd0e72dae1140a6b0353a779dbe00"
dependencies = [
"cfg-if 1.0.0",
"const_fn",
"crossbeam-utils 0.8.1",
"crossbeam-utils 0.8.2",
"lazy_static",
"loom",
"memoffset 0.6.1",
"scopeguard",
]
@ -763,7 +763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-utils 0.8.1",
"crossbeam-utils 0.8.2",
]
[[package]]
@ -779,13 +779,14 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
version = "0.8.1"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
checksum = "bae8f328835f8f5a6ceb6a7842a7f2d0c03692adb5c889347235d59194731fe3"
dependencies = [
"autocfg",
"cfg-if 1.0.0",
"lazy_static",
"loom",
]
[[package]]
@ -850,7 +851,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
dependencies = [
"ahash 0.7.0",
"arrow",
@ -871,6 +872,7 @@ dependencies = [
"sha2",
"sqlparser 0.8.0",
"tokio",
"unicode-segmentation",
]
[[package]]
@ -1115,9 +1117,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]]
name = "form_urlencoded"
version = "1.0.0"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191"
dependencies = [
"matches",
"percent-encoding",
@ -1135,9 +1137,9 @@ dependencies = [
[[package]]
name = "futures"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150"
checksum = "7f55667319111d593ba876406af7c409c0ebb44dc4be6132a783ccf163ea14c1"
dependencies = [
"futures-channel",
"futures-core",
@ -1150,9 +1152,9 @@ dependencies = [
[[package]]
name = "futures-channel"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846"
checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
dependencies = [
"futures-core",
"futures-sink",
@ -1160,15 +1162,15 @@ dependencies = [
[[package]]
name = "futures-core"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65"
checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"
[[package]]
name = "futures-executor"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9"
checksum = "891a4b7b96d84d5940084b2a37632dd65deeae662c114ceaa2c879629c9c0ad1"
dependencies = [
"futures-core",
"futures-task",
@ -1177,15 +1179,15 @@ dependencies = [
[[package]]
name = "futures-io"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500"
checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"
[[package]]
name = "futures-macro"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd"
checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
dependencies = [
"proc-macro-hack",
"proc-macro2",
@ -1195,24 +1197,21 @@ dependencies = [
[[package]]
name = "futures-sink"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6"
checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"
[[package]]
name = "futures-task"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86"
dependencies = [
"once_cell",
]
checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"
[[package]]
name = "futures-test"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b30f48f6b9cd26d8739965d6e3345c511718884fb223795b80dc71d24a9ea9a"
checksum = "f1fe5e51002528907757d5f1648101086f7197f792112db43ba23b06b09e6bce"
dependencies = [
"futures-core",
"futures-executor",
@ -1220,16 +1219,15 @@ dependencies = [
"futures-sink",
"futures-task",
"futures-util",
"once_cell",
"pin-project 1.0.5",
"pin-utils",
]
[[package]]
name = "futures-util"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b"
checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
dependencies = [
"futures-channel",
"futures-core",
@ -1259,6 +1257,19 @@ dependencies = [
"tonic-build",
]
[[package]]
name = "generator"
version = "0.6.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9fed24fd1e18827652b4d55652899a1e9da8e54d91624dc3437a5bc3a9f9a9c"
dependencies = [
"cc",
"libc",
"log",
"rustversion",
"winapi",
]
[[package]]
name = "generic-array"
version = "0.14.4"
@ -1493,9 +1504,9 @@ dependencies = [
[[package]]
name = "idna"
version = "0.2.1"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de910d521f7cc3135c4de8db1cb910e0b5ed1dc6f57c381cd07e8e661ce10094"
checksum = "89829a5d69c23d348314a7ac337fe39173b61149a9864deabd260983aed48c21"
dependencies = [
"matches",
"unicode-bidi",
@ -1579,6 +1590,7 @@ dependencies = [
"tokio",
"tokio-stream",
"tonic",
"tonic-health",
"tracing",
"tracing-futures",
"tracing-opentelemetry",
@ -1593,6 +1605,7 @@ dependencies = [
"arrow_deps",
"data_types",
"futures-util",
"generated_types",
"rand 0.8.3",
"reqwest",
"serde",
@ -1754,9 +1767,9 @@ checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c"
[[package]]
name = "libloading"
version = "0.6.7"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883"
checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a"
dependencies = [
"cfg-if 1.0.0",
"winapi",
@ -1803,6 +1816,17 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "loom"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d44c73b4636e497b4917eb21c33539efa3816741a2d3ff26c6316f1b529481a4"
dependencies = [
"cfg-if 1.0.0",
"generator",
"scoped-tls",
]
[[package]]
name = "lz4"
version = "1.23.2"
@ -1919,9 +1943,9 @@ dependencies = [
[[package]]
name = "mio"
version = "0.7.7"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e50ae3f04d169fcc9bde0b547d1c205219b7157e07ded9c5aff03e0637cb3ed7"
checksum = "a5dede4e2065b3842b8b0af444119f3aa331cc7cc2dd20388bfb0f5d5a38823a"
dependencies = [
"libc",
"log",
@ -2190,9 +2214,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.5.2"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
checksum = "10acf907b94fc1b1a152d08ef97e7759650268cf986bf127f387e602b02c7e5a"
dependencies = [
"parking_lot",
]
@ -2351,7 +2375,7 @@ dependencies = [
[[package]]
name = "parquet"
version = "4.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
dependencies = [
"arrow",
"base64 0.12.3",
@ -2798,7 +2822,7 @@ checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a"
dependencies = [
"crossbeam-channel 0.5.0",
"crossbeam-deque 0.8.0",
"crossbeam-utils 0.8.1",
"crossbeam-utils 0.8.2",
"lazy_static",
"num_cpus",
]
@ -2896,9 +2920,9 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.11.0"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd281b1030aa675fb90aa994d07187645bb3c8fc756ca766e7c3070b439de9de"
checksum = "0460542b551950620a3648c6aa23318ac6b3cd779114bd873209e6e8b5eb1c34"
dependencies = [
"async-compression",
"base64 0.13.0",
@ -3053,7 +3077,7 @@ dependencies = [
"base64 0.13.0",
"blake2b_simd",
"constant_time_eq",
"crossbeam-utils 0.8.1",
"crossbeam-utils 0.8.2",
]
[[package]]
@ -3102,6 +3126,12 @@ dependencies = [
"security-framework",
]
[[package]]
name = "rustversion"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb5d2a036dc6d2d8fd16fde3498b04306e29bd193bf306a57427019b823d5acd"
[[package]]
name = "rustyline"
version = "7.1.0"
@ -3148,6 +3178,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "scoped-tls"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -3166,9 +3202,9 @@ dependencies = [
[[package]]
name = "security-framework"
version = "2.0.0"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1759c2e3c8580017a484a7ac56d3abc5a6c1feadf88db2f3633f12ae4268c69"
checksum = "c6af1b6204f89cf0069736daf8b852573e3bc34898eee600e95d3dd855c12e81"
dependencies = [
"bitflags",
"core-foundation",
@ -3179,9 +3215,9 @@ dependencies = [
[[package]]
name = "security-framework-sys"
version = "2.0.0"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f99b9d5e26d2a71633cc4f2ebae7cc9f874044e0c351a27e17892d76dce5678b"
checksum = "31531d257baab426203cf81c5ce1b0b55159dda7ed602ac81b582ccd62265741"
dependencies = [
"core-foundation-sys",
"libc",
@ -3633,18 +3669,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.23"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.23"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
dependencies = [
"proc-macro2",
"quote",
@ -3873,6 +3909,21 @@ dependencies = [
"syn",
]
[[package]]
name = "tonic-health"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a93d6649c8f5436d65337af08887a516183a096d785ef1fc3acf69ed60dbec6b"
dependencies = [
"async-stream",
"bytes",
"prost",
"tokio",
"tokio-stream",
"tonic",
"tonic-build",
]
[[package]]
name = "tower"
version = "0.4.5"
@ -3907,9 +3958,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
[[package]]
name = "tracing"
version = "0.1.23"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7d40a22fd029e33300d8d89a5cc8ffce18bb7c587662f54629e94c9de5487f3"
checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f"
dependencies = [
"cfg-if 1.0.0",
"log",
@ -3920,9 +3971,9 @@ dependencies = [
[[package]]
name = "tracing-attributes"
version = "0.1.12"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f080ea7e4107844ef4766459426fa2d5c1ada2e47edba05dc7fa99d9629f47"
checksum = "a8a9bd1db7706f2373a190b0d067146caa39350c486f3d455b0e33b431f94c07"
dependencies = [
"proc-macro2",
"quote",
@ -3940,19 +3991,19 @@ dependencies = [
[[package]]
name = "tracing-futures"
version = "0.2.4"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab7bb6f14721aa00656086e9335d363c5c8747bae02ebe32ea2c7dece5689b4c"
checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
dependencies = [
"pin-project 0.4.27",
"pin-project 1.0.5",
"tracing",
]
[[package]]
name = "tracing-log"
version = "0.1.1"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e0f8c7178e13481ff6765bd169b33e8d554c5d2bbede5e32c356194be02b9b9"
checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
dependencies = [
"lazy_static",
"log",
@ -3984,9 +4035,9 @@ dependencies = [
[[package]]
name = "tracing-subscriber"
version = "0.2.15"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1fa8f0c8f4c594e4fc9debc1990deab13238077271ba84dd853d54902ee3401"
checksum = "8ab8966ac3ca27126141f7999361cc97dd6fb4b71da04c02044fa9045d98bb96"
dependencies = [
"ansi_term 0.12.1",
"chrono",
@ -4067,9 +4118,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]]
name = "url"
version = "2.2.0"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b"
dependencies = [
"form_urlencoded",
"idna",

View File

@ -81,6 +81,7 @@ structopt = "0.3.21"
tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "parking_lot"] }
tokio-stream = { version = "0.1.2", features = ["net"] }
tonic = "0.4.0"
tonic-health = "0.3.0"
tracing = { version = "0.1", features = ["release_max_level_debug"] }
tracing-futures = "0.2.4"
tracing-opentelemetry = "0.11.0"

View File

@ -176,6 +176,30 @@ The server will, by default, start an HTTP API server on port `8080` and a gRPC
### Writing and Reading Data
Each IOx instance requires a writer ID.
This can be set three ways:
- set an environment variable `INFLUXDB_IOX_ID=42`
- set a flag `--writer-id 42`
- send an HTTP PUT request:
```
curl --request PUT \
--url http://localhost:8080/iox/api/v1/id \
--header 'Content-Type: application/json' \
--data '{
"id": 42
}'
```
To write data, you need a destination database.
This is set via HTTP PUT, identifying the database by org `company` and bucket `sensors`:
```
curl --request PUT \
--url http://localhost:8080/iox/api/v1/databases/company_sensors \
--header 'Content-Type: application/json' \
--data '{
}'
```
Data can be stored in InfluxDB IOx by sending it in [line protocol] format to the `/api/v2/write`
endpoint. Data is stored by organization and bucket names. Here's an example using [`curl`] with
the organization name `company` and the bucket name `sensors` that will send the data in the
@ -196,6 +220,22 @@ all data in the `company` organization's `sensors` bucket for the `processes` me
curl -v -G -d 'org=company' -d 'bucket=sensors' --data-urlencode 'sql_query=select * from processes' "http://127.0.0.1:8080/api/v2/read"
```
### Health Checks
The HTTP API exposes a healthcheck endpoint at `/health`
```shell
$ curl http://127.0.0.1:8080/health
OK
```
The gRPC API implements the [gRPC Health Checking Protocol](https://github.com/grpc/grpc/blob/master/doc/health-checking.md). This can be tested with [grpc-health-probe](https://github.com/grpc-ecosystem/grpc-health-probe)
```shell
$ grpc_health_probe -addr 127.0.0.1:8082 -service influxdata.platform.storage.Storage
status: SERVING
```
## Contributing
We welcome community contributions from anyone!

View File

@ -8,11 +8,11 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
[dependencies] # In alphabetical order
# We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev
# The version can be found here: https://github.com/apache/arrow/commit/ad4504e8e85eb8e5babe0f01ca8cf9947499fc40
# The version can be found here: https://github.com/apache/arrow/commit/b5ac048c75cc55f4039d279f554920be3112d7cd
#
arrow = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" , features = ["simd"] }
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" }
datafusion = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" }
arrow = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" , features = ["simd"] }
arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" }
datafusion = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" }
# Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
# and we're not currently using it anyway
parquet = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
parquet = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }

18
buf.yaml Normal file
View File

@ -0,0 +1,18 @@
version: v1beta1
build:
roots:
- generated_types/protos/
excludes:
- generated_types/protos/com
- generated_types/protos/influxdata/platform
- generated_types/protos/grpc
lint:
use:
- DEFAULT
- STYLE_DEFAULT
breaking:
use:
- WIRE
- WIRE_JSON

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,112 @@
//! A collection of extension traits for types that
//! implement TryInto<U, Error=FieldViolation>
//!
//! Allows associating field context with the generated errors
//! as they propagate up the struct topology
use generated_types::google::FieldViolation;
use std::convert::TryInto;
/// An extension trait that adds the method `scope` to any type
/// implementing `TryInto<U, Error = FieldViolation>`
pub(crate) trait FromField<T> {
fn scope(self, field: impl Into<String>) -> Result<T, FieldViolation>;
}
impl<T, U> FromField<U> for T
where
T: TryInto<U, Error = FieldViolation>,
{
/// Try to convert type using TryInto calling `FieldViolation::scope`
/// on any returned error
fn scope(self, field: impl Into<String>) -> Result<U, FieldViolation> {
self.try_into().map_err(|e| e.scope(field))
}
}
/// An extension trait that adds the methods `optional` and `required` to any
/// Option containing a type implementing `TryInto<U, Error = FieldViolation>`
pub(crate) trait FromFieldOpt<T> {
/// Try to convert inner type, if any, using TryInto calling
/// `FieldViolation::scope` on any error encountered
///
/// Returns None if empty
fn optional(self, field: impl Into<String>) -> Result<Option<T>, FieldViolation>;
/// Try to convert inner type, using TryInto calling `FieldViolation::scope`
/// on any error encountered
///
/// Returns an error if empty
fn required(self, field: impl Into<String>) -> Result<T, FieldViolation>;
}
impl<T, U> FromFieldOpt<U> for Option<T>
where
T: TryInto<U, Error = FieldViolation>,
{
fn optional(self, field: impl Into<String>) -> Result<Option<U>, FieldViolation> {
self.map(|t| t.scope(field)).transpose()
}
fn required(self, field: impl Into<String>) -> Result<U, FieldViolation> {
match self {
None => Err(FieldViolation::required(field)),
Some(t) => t.scope(field),
}
}
}
/// An extension trait that adds the methods `optional` and `required` to any
/// String
///
/// Prost will default string fields to empty, whereas IOx sometimes
/// uses Option<String>, this helper aids mapping between them
///
/// TODO: Review mixed use of Option<String> and String in IOX
pub(crate) trait FromFieldString {
/// Returns a Ok if the String is not empty
fn required(self, field: impl Into<String>) -> Result<String, FieldViolation>;
/// Wraps non-empty strings in Some(_), returns None for empty strings
fn optional(self) -> Option<String>;
}
impl FromFieldString for String {
fn required(self, field: impl Into<String>) -> Result<String, FieldViolation> {
if self.is_empty() {
return Err(FieldViolation::required(field));
}
Ok(self)
}
fn optional(self) -> Option<String> {
if self.is_empty() {
return None;
}
Some(self)
}
}
/// An extension trait that adds the method `vec_field` to any Vec of a type
/// implementing `TryInto<U, Error = FieldViolation>`
pub(crate) trait FromFieldVec<T> {
/// Converts to a `Vec<U>`, short-circuiting on the first error and
/// returning a correctly scoped `FieldViolation` for where the error
/// was encountered
fn vec_field(self, field: impl Into<String>) -> Result<T, FieldViolation>;
}
impl<T, U> FromFieldVec<Vec<U>> for Vec<T>
where
T: TryInto<U, Error = FieldViolation>,
{
fn vec_field(self, field: impl Into<String>) -> Result<Vec<U>, FieldViolation> {
let res: Result<_, _> = self
.into_iter()
.enumerate()
.map(|(i, t)| t.scope(i.to_string()))
.collect();
res.map_err(|e| e.scope(field))
}
}

View File

@ -32,3 +32,5 @@ pub mod wal;
mod database_name;
pub use database_name::*;
pub(crate) mod field_validation;

View File

@ -299,10 +299,44 @@ impl Schema {
/// Returns an iterator of (Option<InfluxColumnType>, &Field) for
/// all the columns of this schema, in order
pub fn iter(&self) -> SchemaIter<'_> {
SchemaIter {
schema: self,
idx: 0,
}
SchemaIter::new(self)
}
/// Returns an iterator of `&Field` for all the tag columns of
/// this schema, in order
pub fn tags_iter(&self) -> impl Iterator<Item = &ArrowField> {
self.iter().filter_map(|(influx_column_type, field)| {
if matches!(influx_column_type, Some(InfluxColumnType::Tag)) {
Some(field)
} else {
None
}
})
}
/// Returns an iterator of `&Field` for all the field columns of
/// this schema, in order
pub fn fields_iter(&self) -> impl Iterator<Item = &ArrowField> {
self.iter().filter_map(|(influx_column_type, field)| {
if matches!(influx_column_type, Some(InfluxColumnType::Field(_))) {
Some(field)
} else {
None
}
})
}
/// Returns an iterator of `&Field` for all the timestamp columns
/// of this schema, in order. At the time of writing there should
/// be only one or 0 such columns
pub fn time_iter(&self) -> impl Iterator<Item = &ArrowField> {
self.iter().filter_map(|(influx_column_type, field)| {
if matches!(influx_column_type, Some(InfluxColumnType::Timestamp)) {
Some(field)
} else {
None
}
})
}
/// Merges any new columns from new_schema, consuming self. If the
@ -573,6 +607,12 @@ pub struct SchemaIter<'a> {
idx: usize,
}
impl<'a> SchemaIter<'a> {
fn new(schema: &'a Schema) -> Self {
Self { schema, idx: 0 }
}
}
impl<'a> fmt::Debug for SchemaIter<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "SchemaIter<{}>", self.idx)
@ -829,15 +869,47 @@ mod test {
}
}
/// Build an empty iterator
fn empty_schema() -> Schema {
SchemaBuilder::new().build().unwrap()
}
#[test]
fn test_iter_empty() {
assert_eq!(empty_schema().iter().count(), 0);
}
#[test]
fn test_tags_iter_empty() {
assert_eq!(empty_schema().tags_iter().count(), 0);
}
#[test]
fn test_fields_iter_empty() {
assert_eq!(empty_schema().fields_iter().count(), 0);
}
#[test]
fn test_time_iter_empty() {
assert_eq!(empty_schema().time_iter().count(), 0);
}
/// Build a schema for testing iterators
fn iter_schema() -> Schema {
SchemaBuilder::new()
.influx_field("field1", Float)
.tag("tag1")
.timestamp()
.influx_field("field2", String)
.influx_field("field3", String)
.tag("tag2")
.build()
.unwrap()
}
#[test]
fn test_iter() {
let schema = SchemaBuilder::new()
.influx_field("the_field", String)
.tag("the_tag")
.timestamp()
.measurement("the_measurement")
.build()
.unwrap();
let schema = iter_schema();
// test schema iterator and field accessor match up
for (i, (iter_col_type, iter_field)) in schema.iter().enumerate() {
@ -845,7 +917,40 @@ mod test {
assert_eq!(iter_col_type, col_type);
assert_eq!(iter_field, field);
}
assert_eq!(schema.iter().count(), 3);
assert_eq!(schema.iter().count(), 6);
}
#[test]
fn test_tags_iter() {
let schema = iter_schema();
let mut iter = schema.tags_iter();
assert_eq!(iter.next().unwrap().name(), "tag1");
assert_eq!(iter.next().unwrap().name(), "tag2");
assert_eq!(iter.next(), None);
assert_eq!(iter.next(), None);
}
#[test]
fn test_fields_iter() {
let schema = iter_schema();
let mut iter = schema.fields_iter();
assert_eq!(iter.next().unwrap().name(), "field1");
assert_eq!(iter.next().unwrap().name(), "field2");
assert_eq!(iter.next().unwrap().name(), "field3");
assert_eq!(iter.next(), None);
assert_eq!(iter.next(), None);
}
#[test]
fn test_time_iter() {
let schema = iter_schema();
let mut iter = schema.time_iter();
assert_eq!(iter.next().unwrap().name(), "time");
assert_eq!(iter.next(), None);
assert_eq!(iter.next(), None);
}
#[test]

View File

@ -1,7 +1,11 @@
###
# Dockerfile for the image used in CI performance tests
# Dockerfile used for deploying IOx
##
FROM rust:slim-buster
FROM debian:buster-slim
RUN apt-get update \
&& apt-get install -y libssl1.1 libgcc1 libc6 \
&& rm -rf /var/lib/{apt,dpkg,cache,log}
RUN groupadd -g 1500 rust \
&& useradd -u 1500 -g rust -s /bin/bash -m rust
@ -15,4 +19,4 @@ COPY target/release/influxdb_iox /usr/bin/influxdb_iox
EXPOSE 8080 8082
CMD ["influxdb_iox"]
ENTRYPOINT ["influxdb_iox"]

View File

@ -5,9 +5,11 @@ interest for those who wish to understand how the code works. It is
not intended to be general user facing documentation
## Table of Contents:
* Rust style and Idiom guide: [style_guide.md](style_guide.md)
* Tracing and logging Guide: [tracing.md](tracing.md)
* How InfluxDB IOx manages the lifecycle of time series data: [data_management.md](data_management.md)
* Thoughts on parquet encoding and compression for timeseries data: [encoding_thoughts.md](encoding_thoughts.md)
* Thoughts on using multiple cores: [multi_core_tasks.md](multi_core_tasks.md)
* [Query Engine Docs](../query/README.md)
* [Testing documentation](testing.md) for developers of IOx

View File

@ -28,10 +28,10 @@
# AWS_ACCESS_KEY_ID=access_key_value
# AWS_SECRET_ACCESS_KEY=secret_access_key_value
# AWS_DEFAULT_REGION=us-east-2
# INFLUXDB_IOX_S3_BUCKET=bucket-name
# INFLUXDB_IOX_BUCKET=bucket-name
#
# If using Google Cloud Storage as an object store:
# INFLUXDB_IOX_GCP_BUCKET=bucket_name
# INFLUXDB_IOX_BUCKET=bucket_name
# Set one of SERVICE_ACCOUNT or GOOGLE_APPLICATION_CREDENTIALS, either to a path of a filename
# containing Google credential JSON or to the JSON directly.
# SERVICE_ACCOUNT=/path/to/auth/info.json
@ -41,7 +41,7 @@
# The name you see when going to All Services > Storage accounts > [name]
# AZURE_STORAGE_ACCOUNT=
# The name of a container you've created in the storage account, under Blob Service > Containers
# AZURE_STORAGE_CONTAINER=
# INFLUXDB_IOX_BUCKET=
# In the Storage account's Settings > Access keys, one of the Key values
# AZURE_STORAGE_MASTER_KEY=
#

50
docs/testing.md Normal file
View File

@ -0,0 +1,50 @@
# Testing
This document covers details that are only relevant if you are developing IOx and running the tests.
## Object storage
### To run the tests or not run the tests
If you are testing integration with some or all of the object storage options, you'll have more
setup to do.
By default, `cargo test -p object_store` does not run any tests that actually contact
any cloud services: tests that do contact the services will silently pass.
To ensure you've configured object storage integration testing correctly, you can run
`TEST_INTEGRATION=1 cargo test -p object_store`, which will run the tests that contact the cloud
services and fail them if the required environment variables aren't set.
If you don't specify the `TEST_INTEGRATION` environment variable but you do configure some or all
of the object stores, the relevant tests will run.
### Configuration differences when running the tests
When running `influxdb_iox server`, you can pick one object store to use. When running the tests,
you can run them against all the possible object stores. There's still only one
`INFLUXDB_IOX_BUCKET` variable, though, so that will set the bucket name for all configured object
stores. Use the same bucket name when setting up the different services.
Other than possibly configuring multiple object stores, configuring the tests to use the object
store services is the same as configuring the server to use an object store service. See the output
of `influxdb_iox server --help` for instructions.
## InfluxDB IOx Client
The `influxdb_iox_client` crate might be used by people who are using a managed IOx server. In
other words, they might only use the `influxdb_iox_client` crate and not the rest of the crates in
this workspace. The tests in `influxdb_iox_client` see an IOx server in the same way as IOx servers
see the object store services: sometimes you'll want to run the tests against an actual server, and
sometimes you won't.
Like in the `object_store` crate, the `influxdb_iox_client` crate's tests use the
`TEST_INTEGRATION` environment variable to enforce running tests that use an actual IOx server.
Running `cargo test -p influxdb_iox_client` will silently pass tests that contact a server.
Start an IOx server in one terminal and run `TEST_INTEGRATION=1
TEST_IOX_ENDPOINT=http://127.0.0.1:8080 cargo test -p influxdb_iox_client` in another (where
`http://127.0.0.1:8080` is the address to the IOx HTTP server) to run the client tests against the
server. If you set `TEST_INTEGRATION` but not `TEST_IOX_ENDPOINT`, the integration tests will fail
because of the missed configuration. If you set `TEST_IOX_ENDPOINT` but not `TEST_INTEGRATION`, the
integration tests will be run.

View File

@ -10,7 +10,7 @@ type Error = Box<dyn std::error::Error>;
type Result<T, E = Error> = std::result::Result<T, E>;
fn main() -> Result<()> {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
generate_grpc_types(&root)?;
generate_wal_types(&root)?;
@ -20,16 +20,27 @@ fn main() -> Result<()> {
/// Schema used with IOx specific gRPC requests
///
/// Creates `influxdata.platform.storage.rs` and
/// `com.github.influxdata.idpe.storage.read.rs`
/// Creates
/// - `influxdata.platform.storage.rs`
/// - `com.github.influxdata.idpe.storage.read.rs`
/// - `influxdata.iox.management.v1.rs`
fn generate_grpc_types(root: &Path) -> Result<()> {
let storage_path = root.join("influxdata/platform/storage");
let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
let management_path = root.join("influxdata/iox/management/v1");
let grpc_path = root.join("grpc/health/v1");
let proto_files = vec![
root.join("test.proto"),
root.join("predicate.proto"),
root.join("storage_common.proto"),
root.join("storage_common_idpe.proto"),
root.join("service.proto"),
root.join("source.proto"),
storage_path.join("test.proto"),
storage_path.join("predicate.proto"),
storage_path.join("storage_common.proto"),
storage_path.join("service.proto"),
storage_path.join("storage_common_idpe.proto"),
idpe_path.join("source.proto"),
management_path.join("base_types.proto"),
management_path.join("database_rules.proto"),
management_path.join("service.proto"),
grpc_path.join("service.proto"),
];
// Tell cargo to recompile if any of these proto files are changed

View File

@ -0,0 +1,23 @@
syntax = "proto3";
package grpc.health.v1;
message HealthCheckRequest {
string service = 1;
}
message HealthCheckResponse {
enum ServingStatus {
UNKNOWN = 0;
SERVING = 1;
NOT_SERVING = 2;
SERVICE_UNKNOWN = 3; // Used only by the Watch method.
}
ServingStatus status = 1;
}
service Health {
rpc Check(HealthCheckRequest) returns (HealthCheckResponse);
rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
}

View File

@ -0,0 +1,30 @@
syntax = "proto3";
package influxdata.iox.management.v1;
enum Order {
ORDER_UNSPECIFIED = 0;
ORDER_ASC = 1;
ORDER_DESC = 2;
}
enum Aggregate {
AGGREGATE_UNSPECIFIED = 0;
AGGREGATE_MIN = 1;
AGGREGATE_MAX = 2;
}
enum ColumnType {
COLUMN_TYPE_UNSPECIFIED = 0;
COLUMN_TYPE_I64 = 1;
COLUMN_TYPE_U64 = 2;
COLUMN_TYPE_F64 = 3;
COLUMN_TYPE_STRING = 4;
COLUMN_TYPE_BOOL = 5;
}
message HostGroup {
string id = 1;
// connection strings for remote hosts.
repeated string hosts = 2;
}

View File

@ -0,0 +1,248 @@
syntax = "proto3";
package influxdata.iox.management.v1;
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "influxdata/iox/management/v1/base_types.proto";
// `PartitionTemplate` is used to compute the partition key of each row that
// gets written. It can consist of the table name, a column name and its value,
// a formatted time, or a string column and regex captures of its value. For
// columns that do not appear in the input row, a blank value is output.
//
// The key is constructed in order of the template parts; thus ordering changes
// what partition key is generated.
message PartitionTemplate {
message Part {
message ColumnFormat {
string column = 1;
string format = 2;
}
oneof part {
google.protobuf.Empty table = 1;
string column = 2;
string time = 3;
ColumnFormat regex = 4;
ColumnFormat strf_time = 5;
}
}
repeated Part parts = 1;
}
message Matcher {
// A query predicate to filter rows
string predicate = 1;
// Restrict selection to a specific table or tables specified by a regex
oneof table_matcher {
google.protobuf.Empty all = 2;
string table = 3;
string regex = 4;
}
}
message ReplicationConfig {
// The set of host groups that data should be replicated to. Which host a
// write goes to within a host group is determined by consistent hashing of
// the partition key. We'd use this to create a host group per
// availability zone, so you might have 5 availability zones with 2
// hosts in each. Replication will ensure that N of those zones get a
// write. For each zone, only a single host needs to get the write.
// Replication is for ensuring a write exists across multiple hosts
// before returning success. Its purpose is to ensure write durability,
// rather than write availability for query (this is covered by
// subscriptions).
repeated string replications = 1;
// The minimum number of host groups to replicate a write to before success
// is returned. This can be overridden on a per request basis.
// Replication will continue to write to the other host groups in the
// background.
uint32 replication_count = 2;
// How long the replication queue can get before either rejecting writes or
// dropping missed writes. The queue is kept in memory on a
// per-database basis. A queue size of zero means it will only try to
// replicate synchronously and drop any failures.
uint64 replication_queue_max_size = 3;
}
message SubscriptionConfig {
message Subscription {
string name = 1;
string host_group_id = 2;
Matcher matcher = 3;
}
// `subscriptions` are used for query servers to get data via either push
// or pull as it arrives. They are separate from replication as they
// have a different purpose. They're for query servers or other clients
// that want to subscribe to some subset of data being written in. This
// could either be specific partitions, ranges of partitions, tables, or
// rows matching some predicate.
repeated Subscription subscriptions = 1;
}
message QueryConfig {
// If set to `true`, this server should answer queries from one or more of
// of its local write buffer and any read-only partitions that it knows
// about. In this case, results will be merged with any others from the
// remote goups or read-only partitions.
bool query_local = 1;
// Set `primary` to a host group if remote servers should be
// issued queries for this database. All hosts in the group should be
// queried with this server acting as the coordinator that merges
// results together.
string primary = 2;
// If a specific host in the primary group is unavailable,
// another host in the same position from a secondary group should be
// queried. For example, imagine we've partitioned the data in this DB into
// 4 partitions and we are replicating the data across 3 availability
// zones. We have 4 hosts in each of those AZs, thus they each have 1
// partition. We'd set the primary group to be the 4 hosts in the same
// AZ as this one, and the secondary groups as the hosts in the other 2 AZs.
repeated string secondaries = 3;
// Use `readOnlyPartitions` when a server should answer queries for
// partitions that come from object storage. This can be used to start
// up a new query server to handle queries by pointing it at a
// collection of partitions and then telling it to also pull
// data from the replication servers (writes that haven't been snapshotted
// into a partition).
repeated string read_only_partitions = 4;
}
message WalBufferConfig {
enum Rollover {
ROLLOVER_UNSPECIFIED = 0;
// Drop the old segment even though it hasn't been persisted. This part of
// the WAL will be lost on this server.
ROLLOVER_DROP_OLD_SEGMENT = 1;
// Drop the incoming write and fail silently. This favors making sure that
// older WAL data will be backed up.
ROLLOVER_DROP_INCOMING = 2;
// Reject the incoming write and return an error. The client may retry the
// request, which will succeed once the oldest segment has been
// persisted to object storage.
ROLLOVER_RETURN_ERROR = 3;
}
// The size the WAL buffer should be limited to. Once the buffer gets to
// this size it will drop old segments to remain below this size, but
// still try to hold as much in memory as possible while remaining
// below this threshold
uint64 buffer_size = 1;
// WAL segments become read-only after crossing over this size. Which means
// that segments will always be >= this size. When old segments are
// dropped from of memory, at least this much space will be freed from
// the buffer.
uint64 segment_size = 2;
// What should happen if a write comes in that would exceed the WAL buffer
// size and the oldest segment that could be dropped hasn't yet been
// persisted to object storage. If the oldest segment has been
// persisted, then it will be dropped from the buffer so that new writes
// can be accepted. This option is only for defining the behavior of what
// happens if that segment hasn't been persisted. If set to return an
// error, new writes will be rejected until the oldest segment has been
// persisted so that it can be cleared from memory. Alternatively, this
// can be set so that old segments are dropped even if they haven't been
// persisted. This setting is also useful for cases where persistence
// isn't being used and this is only for in-memory buffering.
Rollover buffer_rollover = 3;
// If set to true, buffer segments will be written to object storage.
bool persist_segments = 4;
// If set, segments will be rolled over after this period of time even
// if they haven't hit the size threshold. This allows them to be written
// out to object storage as they must be immutable first.
google.protobuf.Duration close_segment_after = 5;
}
message MutableBufferConfig {
message PartitionDropOrder {
message ColumnSort {
string column_name = 1;
ColumnType column_type = 2;
Aggregate column_value = 3;
}
// Sort partitions by this order. Last will be dropped first.
Order order = 1;
// Configure sort key
oneof sort {
// The last time the partition received a write.
google.protobuf.Empty last_write_time = 2;
// When the partition was opened in the mutable buffer.
google.protobuf.Empty created_at_time = 3;
// A column name, its expected type, and whether to use the min or max
// value. The ColumnType is necessary because the column can appear in
// any number of tables and be of a different type. This specifies that
// when sorting partitions, only columns with the given name and type
// should be used for the purposes of determining the partition order. If a
// partition doesn't have the given column in any way, the partition will
// appear at the beginning of the list with a null value where all
// partitions having null for that value will then be
// sorted by created_at_time desc. So if none of the partitions in the
// mutable buffer had this column with this type, then the partition
// that was created first would appear last in the list and thus be the
// first up to be dropped.
ColumnSort column = 4;
}
}
// The size the mutable buffer should be limited to. Once the buffer gets
// to this size it will drop partitions in the given order. If unable
// to drop partitions (because of later rules in this config) it will
// reject writes until it is able to drop partitions.
uint64 buffer_size = 1;
// If set, the mutable buffer will not drop partitions that have chunks
// that have not yet been persisted. Thus it will reject writes if it
// is over size and is unable to drop partitions. The default is to
// drop partitions in the sort order, regardless of whether they have
// unpersisted chunks or not. The WAL Buffer can be used to ensure
// persistence, but this may cause longer recovery times.
bool reject_if_not_persisted = 2;
// Configure order to drop partitions in
PartitionDropOrder partition_drop_order = 3;
// Attempt to persist partitions after they haven't received a write for
// this number of seconds. If not set, partitions won't be
// automatically persisted.
uint32 persist_after_cold_seconds = 4;
}
message DatabaseRules {
// The unencoded name of the database
string name = 1;
// Template that generates a partition key for each row inserted into the database
PartitionTemplate partition_template = 2;
// Synchronous replication configuration for this database
ReplicationConfig replication_config = 3;
// Asynchronous pull-based subscription configuration for this database
SubscriptionConfig subscription_config = 4;
// Query configuration for this database
QueryConfig query_config = 5;
// WAL configuration for this database
WalBufferConfig wal_buffer_config = 6;
// Mutable buffer configuration for this database
MutableBufferConfig mutable_buffer_config = 7;
}

View File

@ -0,0 +1,49 @@
syntax = "proto3";
package influxdata.iox.management.v1;
import "google/protobuf/empty.proto";
import "influxdata/iox/management/v1/database_rules.proto";
service ManagementService {
rpc GetWriterId(GetWriterIdRequest) returns (GetWriterIdResponse);
rpc UpdateWriterId(UpdateWriterIdRequest) returns (UpdateWriterIdResponse);
rpc ListDatabases(ListDatabasesRequest) returns (ListDatabasesResponse);
rpc GetDatabase(GetDatabaseRequest) returns (GetDatabaseResponse);
rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse);
}
message GetWriterIdRequest {}
message GetWriterIdResponse {
uint32 id = 1;
}
message UpdateWriterIdRequest {
uint32 id = 1;
}
message UpdateWriterIdResponse {}
message ListDatabasesRequest {}
message ListDatabasesResponse {
repeated string names = 1;
}
message GetDatabaseRequest {
string name = 1;
}
message GetDatabaseResponse {
DatabaseRules rules = 1;
}
message CreateDatabaseRequest {
DatabaseRules rules = 1;
}
message CreateDatabaseResponse {}

View File

@ -8,9 +8,8 @@ syntax = "proto3";
package influxdata.platform.storage;
import "google/protobuf/empty.proto";
import "storage_common.proto";
import "storage_common_idpe.proto";
import "influxdata/platform/storage/storage_common.proto";
import "influxdata/platform/storage/storage_common_idpe.proto";
service Storage {
// ReadFilter performs a filter operation at storage

View File

@ -8,7 +8,7 @@ syntax = "proto3";
package influxdata.platform.storage;
import "google/protobuf/any.proto";
import "predicate.proto";
import "influxdata/platform/storage/predicate.proto";
message ReadFilterRequest {

View File

@ -10,8 +10,8 @@ syntax = "proto3";
package influxdata.platform.storage;
import "google/protobuf/any.proto";
import "predicate.proto";
import "storage_common.proto";
import "influxdata/platform/storage/predicate.proto";
import "influxdata/platform/storage/storage_common.proto";
message ReadSeriesCardinalityRequest {
google.protobuf.Any read_series_cardinality_source = 1;

View File

@ -9,21 +9,71 @@
clippy::clone_on_ref_ptr
)]
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
include!(concat!(
env!("OUT_DIR"),
"/com.github.influxdata.idpe.storage.read.rs"
));
include!(concat!(env!("OUT_DIR"), "/wal_generated.rs"));
mod pb {
pub mod influxdata {
pub mod platform {
pub mod storage {
include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
// Can't implement `Default` because `prost::Message` implements `Default`
impl TimestampRange {
pub fn max() -> Self {
TimestampRange {
start: std::i64::MIN,
end: std::i64::MAX,
// Can't implement `Default` because `prost::Message` implements `Default`
impl TimestampRange {
pub fn max() -> Self {
TimestampRange {
start: std::i64::MIN,
end: std::i64::MAX,
}
}
}
}
}
pub mod iox {
pub mod management {
pub mod v1 {
include!(concat!(env!("OUT_DIR"), "/influxdata.iox.management.v1.rs"));
}
}
}
}
pub mod com {
pub mod github {
pub mod influxdata {
pub mod idpe {
pub mod storage {
pub mod read {
include!(concat!(
env!("OUT_DIR"),
"/com.github.influxdata.idpe.storage.read.rs"
));
}
}
}
}
}
}
// Needed because of https://github.com/hyperium/tonic/issues/471
pub mod grpc {
pub mod health {
pub mod v1 {
include!(concat!(env!("OUT_DIR"), "/grpc.health.v1.rs"));
}
}
}
}
include!(concat!(env!("OUT_DIR"), "/wal_generated.rs"));
/// gRPC Storage Service
pub const STORAGE_SERVICE: &str = "influxdata.platform.storage.Storage";
/// gRPC Testing Service
pub const IOX_TESTING_SERVICE: &str = "influxdata.platform.storage.IOxTesting";
/// gRPC Arrow Flight Service
pub const ARROW_SERVICE: &str = "arrow.flight.protocol.FlightService";
pub use pb::com::github::influxdata::idpe::storage::read::*;
pub use pb::influxdata::platform::storage::*;
pub use google_types as google;
pub use pb::{grpc, influxdata};

View File

@ -5,12 +5,13 @@ authors = ["Dom Dwyer <dom@itsallbroken.com>"]
edition = "2018"
[features]
flight = ["arrow_deps", "serde/derive", "tonic", "serde_json", "futures-util"]
flight = ["arrow_deps", "serde/derive", "serde_json", "futures-util"]
[dependencies]
# Workspace dependencies, in alphabetical order
arrow_deps = { path = "../arrow_deps", optional = true }
data_types = { path = "../data_types" }
generated_types = { path = "../generated_types" }
# Crates.io dependencies, in alphabetical order
futures-util = { version = "0.3.1", optional = true }
@ -19,7 +20,7 @@ serde = "1.0.118"
serde_json = { version = "1.0.44", optional = true }
thiserror = "1.0.23"
tokio = { version = "1.0", features = ["macros"] }
tonic = { version = "0.4.0", optional = true }
tonic = { version = "0.4.0" }
[dev-dependencies] # In alphabetical order
rand = "0.8.1"

View File

@ -9,6 +9,9 @@ use data_types::{http::ListDatabasesResponse, DatabaseName};
#[cfg(feature = "flight")]
mod flight;
/// Client for the gRPC health checking API
pub mod health;
// can't combine these into one statement that uses `{}` because of this bug in
// the `unreachable_pub` lint: https://github.com/rust-lang/rust/issues/64762
#[cfg(feature = "flight")]

View File

@ -0,0 +1,70 @@
use generated_types::grpc::health::v1::*;
use thiserror::Error;
/// Error type for the health check client
#[derive(Debug, Error)]
pub enum Error {
/// Service is not serving
#[error("Service is not serving")]
NotServing,
/// Service returned an unexpected variant for the status enumeration
#[error("Received invalid response: {}", .0)]
InvalidResponse(i32),
/// Error connecting to the server
#[error("Connection error: {}", .0)]
ConnectionError(#[from] tonic::transport::Error),
/// Client received an unexpected error from the server
#[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
UnexpectedError(#[from] tonic::Status),
}
/// Result type for the health check client
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// A client for the gRPC health checking API
///
/// Allows checking the status of a given service
#[derive(Debug)]
pub struct Client {
inner: health_client::HealthClient<tonic::transport::Channel>,
}
impl Client {
/// Create a new client with the provided endpoint
pub async fn connect<D>(dst: D) -> Result<Self>
where
D: std::convert::TryInto<tonic::transport::Endpoint>,
D::Error: Into<tonic::codegen::StdError>,
{
Ok(Self {
inner: health_client::HealthClient::connect(dst).await?,
})
}
/// Returns `Ok()` if the corresponding service is serving
pub async fn check(&mut self, service: impl Into<String>) -> Result<()> {
use health_check_response::ServingStatus;
let status = self
.inner
.check(HealthCheckRequest {
service: service.into(),
})
.await?
.into_inner();
match status.status() {
ServingStatus::Serving => Ok(()),
ServingStatus::NotServing => Err(Error::NotServing),
_ => Err(Error::InvalidResponse(status.status)),
}
}
/// Returns `Ok()` if the storage service is serving
pub async fn check_storage(&mut self) -> Result<()> {
self.check(generated_types::STORAGE_SERVICE).await
}
}

View File

@ -24,9 +24,11 @@ use query::{
util::{make_range_expr, AndExprBuilder},
};
use crate::dictionary::{Dictionary, Error as DictionaryError};
use crate::table::Table;
use crate::{
column::Column,
dictionary::{Dictionary, Error as DictionaryError},
table::Table,
};
use async_trait::async_trait;
use snafu::{OptionExt, ResultExt, Snafu};
@ -50,6 +52,12 @@ pub enum Error {
source: crate::table::Error,
},
#[snafu(display("Error checking predicate in table '{}': {}", table_name, source))]
NamedTablePredicateCheck {
table_name: String,
source: crate::table::Error,
},
#[snafu(display(
"Unsupported predicate when mutable buffer table names. Found a general expression: {:?}",
exprs
@ -85,12 +93,36 @@ pub enum Error {
#[snafu(display("Attempt to write table batch without a name"))]
TableWriteWithoutName,
#[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk_id))]
InternalColumnValueIdNotFoundInDictionary {
value_id: u32,
chunk_id: u64,
source: DictionaryError,
},
#[snafu(display("Column ID {} not found in dictionary of chunk {}", column_id, chunk))]
ColumnIdNotFoundInDictionary {
column_id: u32,
chunk: u64,
source: DictionaryError,
},
#[snafu(display(
"Column name {} not found in dictionary of chunk {}",
column_name,
chunk_id
))]
ColumnNameNotFoundInDictionary {
column_name: String,
chunk_id: u64,
source: DictionaryError,
},
#[snafu(display(
"Column '{}' is not a string tag column and thus can not list values",
column_name
))]
UnsupportedColumnTypeForListingValues { column_name: String },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -312,13 +344,7 @@ impl Chunk {
return Ok(None);
}
let table_name_id =
self.dictionary
.id(table_name)
.context(InternalTableNotFoundInDictionary {
table_name,
chunk_id: self.id(),
})?;
let table_name_id = self.table_name_id(table_name)?;
let mut chunk_column_ids = BTreeSet::new();
@ -366,6 +392,115 @@ impl Chunk {
Ok(Some(column_names))
}
/// Return the id of the table in the chunk's dictionary
fn table_name_id(&self, table_name: &str) -> Result<u32> {
self.dictionary
.id(table_name)
.context(InternalTableNotFoundInDictionary {
table_name,
chunk_id: self.id(),
})
}
/// Returns the strings of the specified Tag column that satisfy
/// the predicate, if they can be determined entirely using metadata.
///
/// If the predicate cannot be evaluated entirely with metadata,
/// return `Ok(None)`.
pub fn tag_column_values(
&self,
table_name: &str,
column_name: &str,
chunk_predicate: &ChunkPredicate,
) -> Result<Option<BTreeSet<String>>> {
// No support for general purpose expressions
if !chunk_predicate.chunk_exprs.is_empty() {
return Ok(None);
}
let chunk_id = self.id();
let table_name_id = self.table_name_id(table_name)?;
// Is this table even in the chunk?
let table = self
.tables
.get(&table_name_id)
.context(NamedTableNotFoundInChunk {
table_name,
chunk_id,
})?;
// See if we can rule out the table entire on metadata
let could_match = table
.could_match_predicate(chunk_predicate)
.context(NamedTablePredicateCheck { table_name })?;
if !could_match {
// No columns could match, return empty set
return Ok(Default::default());
}
let column_id =
self.dictionary
.lookup_value(column_name)
.context(ColumnNameNotFoundInDictionary {
column_name,
chunk_id,
})?;
let column = table
.column(column_id)
.context(NamedTableError { table_name })?;
if let Column::Tag(column, _) = column {
// if we have a timestamp predicate, find all values
// where the timestamp is within range. Otherwise take
// all values.
// Collect matching ids into BTreeSet to deduplicate on
// ids *before* looking up Strings
let column_value_ids: BTreeSet<u32> = match chunk_predicate.range {
None => {
// take all non-null values
column.iter().filter_map(|&s| s).collect()
}
Some(range) => {
// filter out all values that don't match the timestmap
let time_column = table
.column_i64(chunk_predicate.time_column_id)
.context(NamedTableError { table_name })?;
column
.iter()
.zip(time_column.iter())
.filter_map(|(&column_value_id, &timestamp_value)| {
if range.contains_opt(timestamp_value) {
column_value_id
} else {
None
}
})
.collect()
}
};
// convert all the (deduplicated) ids to Strings
let column_values = column_value_ids
.into_iter()
.map(|value_id| {
let value = self.dictionary.lookup_id(value_id).context(
InternalColumnValueIdNotFoundInDictionary { value_id, chunk_id },
)?;
Ok(value.to_string())
})
.collect::<Result<BTreeSet<String>>>()?;
Ok(Some(column_values))
} else {
UnsupportedColumnTypeForListingValues { column_name }.fail()
}
}
/// Translates `predicate` into per-chunk ids that can be
/// directly evaluated against tables in this chunk
pub fn compile_predicate(&self, predicate: &Predicate) -> Result<ChunkPredicate> {
@ -627,6 +762,15 @@ impl query::PartitionChunk for Chunk {
) -> Result<Option<StringSet>, Self::Error> {
unimplemented!("This function is slated for removal")
}
async fn column_values(
&self,
_table_name: &str,
_column_name: &str,
_predicate: &Predicate,
) -> Result<Option<StringSet>, Self::Error> {
unimplemented!("This function is slated for removal")
}
}
/// Used to figure out if we know how to deal with this kind of

View File

@ -6,11 +6,10 @@ use generated_types::wal;
use query::group_by::GroupByAndAggregate;
use query::group_by::WindowDuration;
use query::{
exec::{stringset::StringSet, SeriesSetPlan, SeriesSetPlans},
predicate::Predicate,
Database,
group_by::Aggregate,
plan::seriesset::{SeriesSetPlan, SeriesSetPlans},
};
use query::{group_by::Aggregate, plan::stringset::StringSetPlan};
use query::{predicate::Predicate, Database};
use crate::column::Column;
use crate::table::Table;
@ -19,10 +18,10 @@ use crate::{
partition::Partition,
};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use arrow_deps::datafusion::{error::DataFusionError, logical_plan::LogicalPlan};
use arrow_deps::datafusion::error::DataFusionError;
use crate::dictionary::Error as DictionaryError;
@ -46,30 +45,6 @@ pub enum Error {
source: DictionaryError,
},
#[snafu(display(
"Column name {} not found in dictionary of chunk {}",
column_name,
chunk
))]
ColumnNameNotFoundInDictionary {
column_name: String,
chunk: u64,
source: DictionaryError,
},
#[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk))]
ColumnValueIdNotFoundInDictionary {
value_id: u32,
chunk: u64,
source: DictionaryError,
},
#[snafu(display(
"Column '{}' is not a tag column and thus can not list values",
column_name
))]
UnsupportedColumnTypeForListingValues { column_name: String },
#[snafu(display("id conversion error"))]
IdConversionError { source: std::num::TryFromIntError },
@ -254,27 +229,6 @@ impl Database for MutableBufferDb {
Ok(())
}
/// return all column values in this database, while applying optional
/// predicates
async fn column_values(
&self,
column_name: &str,
predicate: Predicate,
) -> Result<StringSetPlan, Self::Error> {
let has_exprs = predicate.has_exprs();
let mut filter = ChunkTableFilter::new(predicate);
if has_exprs {
let mut visitor = ValuePredVisitor::new(column_name);
self.accept(&mut filter, &mut visitor)?;
Ok(visitor.plans.into())
} else {
let mut visitor = ValueVisitor::new(column_name);
self.accept(&mut filter, &mut visitor)?;
Ok(visitor.column_values.into())
}
}
async fn query_series(&self, predicate: Predicate) -> Result<SeriesSetPlans, Self::Error> {
let mut filter = ChunkTableFilter::new(predicate);
let mut visitor = SeriesVisitor::new();
@ -569,152 +523,6 @@ impl ChunkTableFilter {
}
}
/// return all values in the `column_name` column
/// in this database, while applying the timestamp range
///
/// Potential optimizations: Run this in parallel (in different
/// futures) for each chunk / table, rather than a single one
/// -- but that will require building up parallel hash tables.
struct ValueVisitor<'a> {
column_name: &'a str,
// what column id we are looking for
column_id: Option<u32>,
chunk_value_ids: BTreeSet<u32>,
column_values: StringSet,
}
impl<'a> ValueVisitor<'a> {
fn new(column_name: &'a str) -> Self {
Self {
column_name,
column_id: None,
column_values: StringSet::new(),
chunk_value_ids: BTreeSet::new(),
}
}
}
impl<'a> Visitor for ValueVisitor<'a> {
fn pre_visit_chunk(&mut self, chunk: &Chunk) -> Result<()> {
self.chunk_value_ids.clear();
self.column_id = Some(chunk.dictionary.lookup_value(self.column_name).context(
ColumnNameNotFoundInDictionary {
column_name: self.column_name,
chunk: chunk.id,
},
)?);
Ok(())
}
fn visit_column(
&mut self,
table: &Table,
column_id: u32,
column: &Column,
filter: &mut ChunkTableFilter,
) -> Result<()> {
if Some(column_id) != self.column_id {
return Ok(());
}
match column {
Column::Tag(column, _) => {
// if we have a timestamp prediate, find all values
// where the timestamp is within range. Otherwise take
// all values.
let chunk_predicate = filter.chunk_predicate();
match chunk_predicate.range {
None => {
// take all non-null values
column.iter().filter_map(|&s| s).for_each(|value_id| {
self.chunk_value_ids.insert(value_id);
});
}
Some(range) => {
// filter out all values that don't match the timestmap
let time_column = table.column_i64(chunk_predicate.time_column_id)?;
column
.iter()
.zip(time_column.iter())
.filter_map(|(&column_value_id, &timestamp_value)| {
if range.contains_opt(timestamp_value) {
column_value_id
} else {
None
}
})
.for_each(|value_id| {
self.chunk_value_ids.insert(value_id);
});
}
}
Ok(())
}
_ => UnsupportedColumnTypeForListingValues {
column_name: self.column_name,
}
.fail(),
}
}
fn post_visit_chunk(&mut self, chunk: &Chunk) -> Result<()> {
// convert all the chunk's column_ids to Strings
for &value_id in &self.chunk_value_ids {
let value = chunk.dictionary.lookup_id(value_id).context(
ColumnValueIdNotFoundInDictionary {
value_id,
chunk: chunk.id,
},
)?;
if !self.column_values.contains(value) {
self.column_values.insert(value.to_string());
}
}
Ok(())
}
}
/// return all column values for the specified column in this
/// database, while applying the timestamp range and predicate
struct ValuePredVisitor<'a> {
column_name: &'a str,
plans: Vec<LogicalPlan>,
}
impl<'a> ValuePredVisitor<'a> {
fn new(column_name: &'a str) -> Self {
Self {
column_name,
plans: Vec::new(),
}
}
}
impl<'a> Visitor for ValuePredVisitor<'a> {
// TODO try and rule out entire tables based on the same critera
// as explained on NamePredVisitor
fn pre_visit_table(
&mut self,
table: &Table,
chunk: &Chunk,
filter: &mut ChunkTableFilter,
) -> Result<()> {
// skip table entirely if there are no rows that fall in the timestamp
if table.could_match_predicate(filter.chunk_predicate())? {
self.plans.push(table.tag_values_plan(
self.column_name,
filter.chunk_predicate(),
chunk,
)?);
}
Ok(())
}
}
/// Return DataFusion plans to calculate which series pass the
/// specified predicate.
struct SeriesVisitor {
@ -843,10 +651,6 @@ mod tests {
type TestError = Box<dyn std::error::Error + Send + Sync + 'static>;
type Result<T = (), E = TestError> = std::result::Result<T, E>;
fn to_set(v: &[&str]) -> BTreeSet<String> {
v.iter().map(|s| s.to_string()).collect::<BTreeSet<_>>()
}
#[tokio::test]
async fn missing_tags_are_null() -> Result {
let db = MutableBufferDb::new("mydb");
@ -906,158 +710,6 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn list_column_values() -> Result {
let db = MutableBufferDb::new("column_namedb");
let lp_data = "h2o,state=CA,city=LA temp=70.4 100\n\
h2o,state=MA,city=Boston temp=72.4 250\n\
o2,state=MA,city=Boston temp=50.4 200\n\
o2,state=CA temp=79.0 300\n\
o2,state=NY temp=60.8 400\n";
let lines: Vec<_> = parse_lines(lp_data).map(|l| l.unwrap()).collect();
write_lines(&db, &lines).await;
#[derive(Debug)]
struct TestCase<'a> {
description: &'a str,
column_name: &'a str,
predicate: Predicate,
expected_column_values: Result<Vec<&'a str>>,
}
let test_cases = vec![
TestCase {
description: "No predicates, 'state' col",
column_name: "state",
predicate: PredicateBuilder::default().build(),
expected_column_values: Ok(vec!["CA", "MA", "NY"]),
},
TestCase {
description: "No predicates, 'city' col",
column_name: "city",
predicate: PredicateBuilder::default().build(),
expected_column_values: Ok(vec!["Boston", "LA"]),
},
TestCase {
description: "Restrictions: timestamp",
column_name: "state",
predicate: PredicateBuilder::default().timestamp_range(50, 201).build(),
expected_column_values: Ok(vec!["CA", "MA"]),
},
TestCase {
description: "Restrictions: predicate",
column_name: "city",
predicate: PredicateBuilder::default()
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build(),
expected_column_values: Ok(vec!["Boston"]),
},
TestCase {
description: "Restrictions: timestamp and predicate",
column_name: "state",
predicate: PredicateBuilder::default()
.timestamp_range(150, 301)
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build(),
expected_column_values: Ok(vec!["MA"]),
},
TestCase {
description: "Restrictions: measurement name",
column_name: "state",
predicate: PredicateBuilder::default().table("h2o").build(),
expected_column_values: Ok(vec!["CA", "MA"]),
},
TestCase {
description: "Restrictions: measurement name, with nulls",
column_name: "city",
predicate: PredicateBuilder::default().table("o2").build(),
expected_column_values: Ok(vec!["Boston"]),
},
TestCase {
description: "Restrictions: measurement name and timestamp",
column_name: "state",
predicate: PredicateBuilder::default()
.table("o2")
.timestamp_range(50, 201)
.build(),
expected_column_values: Ok(vec!["MA"]),
},
TestCase {
description: "Restrictions: measurement name and predicate",
column_name: "state",
predicate: PredicateBuilder::default()
.table("o2")
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build(),
expected_column_values: Ok(vec!["NY"]),
},
TestCase {
description: "Restrictions: measurement name, timestamp and predicate",
column_name: "state",
predicate: PredicateBuilder::default()
.table("o2")
.timestamp_range(1, 550)
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build(),
expected_column_values: Ok(vec!["NY"]),
},
TestCase {
description: "Restrictions: measurement name, timestamp and predicate: no match",
column_name: "state",
predicate: PredicateBuilder::default()
.table("o2")
.timestamp_range(1, 300) // filters out the NY row
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build(),
expected_column_values: Ok(vec![]),
},
];
for test_case in test_cases.into_iter() {
let test_case_str = format!("{:#?}", test_case);
println!("Running test case: {:?}", test_case);
let column_values_plan = db
.column_values(test_case.column_name, test_case.predicate)
.await
.expect("Created tag_values plan successfully");
// run the execution plan (
let executor = Executor::default();
let actual_column_values = executor.to_string_set(column_values_plan).await;
let is_match = if let Ok(expected_column_values) = &test_case.expected_column_values {
let expected_column_values = to_set(expected_column_values);
if let Ok(actual_column_values) = &actual_column_values {
**actual_column_values == expected_column_values
} else {
false
}
} else if let Err(e) = &actual_column_values {
// use string compare to compare errors to avoid having to build exact errors
format!("{:?}", e) == format!("{:?}", test_case.expected_column_values)
} else {
false
};
assert!(
is_match,
"Mismatch\n\
actual_column_values: \n\
{:?}\n\
expected_column_values: \n\
{:?}\n\
Test_case: \n\
{}",
actual_column_values, test_case.expected_column_values, test_case_str
);
}
Ok(())
}
#[tokio::test]
async fn test_query_series() -> Result {
// This test checks that everything is wired together
@ -1088,7 +740,7 @@ mod tests {
let plans = db
.query_series(predicate)
.await
.expect("Created tag_values plan successfully");
.expect("Created query_series plan successfully");
let results = run_and_gather_results(plans).await;
@ -1164,7 +816,7 @@ mod tests {
let plans = db
.query_series(predicate)
.await
.expect("Created tag_values plan successfully");
.expect("Created query_series plan successfully");
let results = run_and_gather_results(plans).await;
@ -1207,7 +859,7 @@ mod tests {
let plans = db
.query_series(predicate)
.await
.expect("Created tag_values plan successfully");
.expect("Created query_series plan successfully");
let results = run_and_gather_results(plans).await;
assert!(results.is_empty());
@ -1220,7 +872,7 @@ mod tests {
let plans = db
.query_series(predicate)
.await
.expect("Created tag_values plan successfully");
.expect("Created query_series plan successfully");
let results = run_and_gather_results(plans).await;
assert_eq!(results.len(), 1);
@ -1234,7 +886,7 @@ mod tests {
let plans = db
.query_series(predicate)
.await
.expect("Created tag_values plan successfully");
.expect("Created query_series plan successfully");
let results = run_and_gather_results(plans).await;
assert!(results.is_empty());

View File

@ -1,9 +1,10 @@
use generated_types::wal as wb;
use query::{
exec::{field::FieldColumns, SeriesSetPlan},
exec::field::FieldColumns,
func::selectors::{selector_first, selector_last, selector_max, selector_min, SelectorOutput},
func::window::make_window_bound_expr,
group_by::{Aggregate, WindowDuration},
plan::seriesset::SeriesSetPlan,
};
use std::{
@ -35,7 +36,7 @@ use arrow_deps::{
},
datafusion::{
self,
logical_plan::{Expr, LogicalPlan, LogicalPlanBuilder},
logical_plan::{Expr, LogicalPlanBuilder},
prelude::*,
},
};
@ -223,7 +224,7 @@ impl Table {
}
/// Returns a reference to the specified column
fn column(&self, column_id: u32) -> Result<&Column> {
pub(crate) fn column(&self, column_id: u32) -> Result<&Column> {
self.columns.get(&column_id).context(ColumnIdNotFound {
id: column_id,
table_id: self.id,
@ -271,32 +272,6 @@ impl Table {
}
}
/// Creates a DataFusion LogicalPlan that returns column *values* as a
/// single column of Strings
///
/// The created plan looks like:
///
/// Projection
/// Filter(predicate)
/// InMemoryScan
pub fn tag_values_plan(
&self,
column_name: &str,
chunk_predicate: &ChunkPredicate,
chunk: &Chunk,
) -> Result<LogicalPlan> {
// Scan and Filter
let plan_builder = self.scan_with_predicates(chunk_predicate, chunk)?;
let select_exprs = vec![col(column_name)];
plan_builder
.project(&select_exprs)
.context(BuildingPlan)?
.build()
.context(BuildingPlan)
}
/// Creates a SeriesSet plan that produces an output table with rows that
/// match the predicate
///
@ -503,10 +478,7 @@ impl Table {
column_name: col_name,
chunk: chunk.id,
})?;
let column = self.columns.get(&column_id).context(ColumnIdNotFound {
id: column_id,
table_id: self.id,
})?;
let column = self.column(column_id)?;
Ok(column.data_type())
})?;
@ -735,10 +707,7 @@ impl Table {
for col in &selection.cols {
let column_name = col.column_name;
let column = self.columns.get(&col.column_id).context(ColumnIdNotFound {
id: col.column_id,
table_id: self.id,
})?;
let column = self.column(col.column_id)?;
schema_builder = match column {
Column::String(_, _) => schema_builder.field(column_name, ArrowDataType::Utf8),
@ -769,10 +738,7 @@ impl Table {
let mut columns = Vec::with_capacity(selection.cols.len());
for col in &selection.cols {
let column = self.columns.get(&col.column_id).context(ColumnIdNotFound {
id: col.column_id,
table_id: self.id,
})?;
let column = self.column(col.column_id)?;
let array = match column {
Column::String(vals, _) => {
@ -1221,6 +1187,7 @@ impl<'a> TableColSelection<'a> {
mod tests {
use arrow::util::pretty::pretty_format_batches;
use arrow_deps::datafusion::logical_plan::LogicalPlan;
use data_types::data::split_lines_into_write_entry_partitions;
use influxdb_line_protocol::{parse_lines, ParsedLine};
use query::{

View File

@ -419,26 +419,26 @@ mod tests {
dotenv::dotenv().ok();
let region = env::var("AWS_DEFAULT_REGION");
let bucket_name = env::var("INFLUXDB_IOX_S3_BUCKET");
let bucket_name = env::var("INFLUXDB_IOX_BUCKET");
let force = std::env::var("TEST_INTEGRATION");
match (region.is_ok(), bucket_name.is_ok(), force.is_ok()) {
(false, false, true) => {
panic!(
"TEST_INTEGRATION is set, \
but AWS_DEFAULT_REGION and INFLUXDB_IOX_S3_BUCKET are not"
but AWS_DEFAULT_REGION and INFLUXDB_IOX_BUCKET are not"
)
}
(false, true, true) => {
panic!("TEST_INTEGRATION is set, but AWS_DEFAULT_REGION is not")
}
(true, false, true) => {
panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_S3_BUCKET is not")
panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_BUCKET is not")
}
(false, false, false) => {
eprintln!(
"skipping integration test - set \
AWS_DEFAULT_REGION and INFLUXDB_IOX_S3_BUCKET to run"
AWS_DEFAULT_REGION and INFLUXDB_IOX_BUCKET to run"
);
return Ok(());
}
@ -447,7 +447,7 @@ mod tests {
return Ok(());
}
(true, false, false) => {
eprintln!("skipping integration test - set INFLUXDB_IOX_S3_BUCKET to run");
eprintln!("skipping integration test - set INFLUXDB_IOX_BUCKET to run");
return Ok(());
}
_ => {}
@ -466,8 +466,8 @@ mod tests {
"The environment variable AWS_DEFAULT_REGION must be set \
to a value like `us-east-2`"
})?;
let bucket_name = env::var("INFLUXDB_IOX_S3_BUCKET")
.map_err(|_| "The environment variable INFLUXDB_IOX_S3_BUCKET must be set")?;
let bucket_name = env::var("INFLUXDB_IOX_BUCKET")
.map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?;
Ok((region.parse()?, bucket_name))
}

View File

@ -299,7 +299,7 @@ mod tests {
let required_vars = [
"AZURE_STORAGE_ACCOUNT",
"AZURE_STORAGE_CONTAINER",
"INFLUXDB_IOX_BUCKET",
"AZURE_STORAGE_MASTER_KEY",
];
let unset_vars: Vec<_> = required_vars
@ -334,8 +334,8 @@ mod tests {
async fn azure_blob_test() -> Result<()> {
maybe_skip_integration!();
let container_name = env::var("AZURE_STORAGE_CONTAINER")
.map_err(|_| "The environment variable AZURE_STORAGE_CONTAINER must be set")?;
let container_name = env::var("INFLUXDB_IOX_BUCKET")
.map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?;
let integration = MicrosoftAzure::new_from_env(container_name);
put_get_delete_list(&integration).await?;

View File

@ -267,15 +267,15 @@ mod test {
() => {
dotenv::dotenv().ok();
let bucket_name = env::var("GCS_BUCKET_NAME");
let bucket_name = env::var("INFLUXDB_IOX_BUCKET");
let force = std::env::var("TEST_INTEGRATION");
match (bucket_name.is_ok(), force.is_ok()) {
(false, true) => {
panic!("TEST_INTEGRATION is set, but GCS_BUCKET_NAME is not")
panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_BUCKET is not")
}
(false, false) => {
eprintln!("skipping integration test - set GCS_BUCKET_NAME to run");
eprintln!("skipping integration test - set INFLUXDB_IOX_BUCKET to run");
return Ok(());
}
_ => {}
@ -284,8 +284,8 @@ mod test {
}
fn bucket_name() -> Result<String> {
Ok(env::var("GCS_BUCKET_NAME")
.map_err(|_| "The environment variable GCS_BUCKET_NAME must be set")?)
Ok(env::var("INFLUXDB_IOX_BUCKET")
.map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?)
}
#[tokio::test]

View File

@ -5,11 +5,17 @@ use super::DELIMITER;
// percent_encode's API needs this as a byte
const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];
// special encoding of the empty string part.
// Using '%' is the safest character since it will always be used in the
// output of percent_encode no matter how we evolve the INVALID AsciiSet over
// time.
const EMPTY: &str = "%";
/// The PathPart type exists to validate the directory/file names that form part
/// of a path.
///
/// A PathPart instance is guaranteed to contain no `/` characters as it can
/// only be constructed by going through the `try_from` impl.
/// A PathPart instance is guaranteed to be non-empty and to contain no `/`
/// characters as it can only be constructed by going through the `from` impl.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default)]
pub struct PathPart(pub(super) String);
@ -48,6 +54,12 @@ impl From<&str> for PathPart {
// to be equal to `.` or `..` to prevent file system traversal shenanigans.
"." => Self(String::from("%2E")),
".." => Self(String::from("%2E%2E")),
// Every string except the empty string will be percent encoded.
// The empty string will be transformed into a sentinel value EMPTY
// which can safely be a prefix of an encoded value since it will be
// fully matched at decode time (see impl Display for PathPart).
"" => Self(String::from(EMPTY)),
other => Self(percent_encode(other.as_bytes(), INVALID).to_string()),
}
}
@ -55,10 +67,13 @@ impl From<&str> for PathPart {
impl std::fmt::Display for PathPart {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
percent_decode_str(&self.0)
.decode_utf8()
.expect("Valid UTF-8 that came from String")
.fmt(f)
match &self.0[..] {
EMPTY => "".fmt(f),
_ => percent_decode_str(&self.0)
.decode_utf8()
.expect("Valid UTF-8 that came from String")
.fmt(f),
}
}
}
@ -104,4 +119,21 @@ mod tests {
assert_eq!(part, PathPart(String::from("%2E%2E")));
assert_eq!(part.to_string(), "..");
}
#[test]
fn path_part_cant_be_empty() {
let part: PathPart = "".into();
assert_eq!(part, PathPart(String::from(EMPTY)));
assert_eq!(part.to_string(), "");
}
#[test]
fn empty_is_safely_encoded() {
let part: PathPart = EMPTY.into();
assert_eq!(
part,
PathPart(percent_encode(EMPTY.as_bytes(), INVALID).to_string())
);
assert_eq!(part.to_string(), EMPTY);
}
}

View File

@ -18,7 +18,6 @@ use arrow_deps::{
use counters::ExecutionCounters;
use context::IOxExecutionContext;
use field::FieldColumns;
use schema_pivot::SchemaPivotNode;
use fieldlist::{FieldList, IntoFieldList};
@ -28,7 +27,11 @@ use tokio::sync::mpsc::{self, error::SendError};
use snafu::{ResultExt, Snafu};
use crate::plan::{fieldlist::FieldListPlan, stringset::StringSetPlan};
use crate::plan::{
fieldlist::FieldListPlan,
seriesset::{SeriesSetPlan, SeriesSetPlans},
stringset::StringSetPlan,
};
#[derive(Debug, Snafu)]
pub enum Error {
@ -85,91 +88,6 @@ pub enum Error {
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// A plan that can be run to produce a logical stream of time series,
/// as represented as sequence of SeriesSets from a single DataFusion
/// plan, optionally grouped in some way.
#[derive(Debug)]
pub struct SeriesSetPlan {
/// The table name this came from
pub table_name: Arc<String>,
/// Datafusion plan to execute. The plan must produce
/// RecordBatches that have:
///
/// * fields for each name in `tag_columns` and `field_columns`
/// * a timestamp column called 'time'
/// * each column in tag_columns must be a String (Utf8)
pub plan: LogicalPlan,
/// The names of the columns that define tags.
///
/// Note these are `Arc` strings because they are duplicated for
/// *each* resulting `SeriesSet` that is produced when this type
/// of plan is executed.
pub tag_columns: Vec<Arc<String>>,
/// The names of the columns which are "fields"
///
/// Note these are `Arc` strings because they are duplicated for
/// *each* resulting `SeriesSet` that is produced when this type
/// of plan is executed.
pub field_columns: FieldColumns,
/// If present, how many of the series_set_plan::tag_columns
/// should be used to compute the group
pub num_prefix_tag_group_columns: Option<usize>,
}
impl SeriesSetPlan {
/// Create a SeriesSetPlan that will not produce any Group items
pub fn new_from_shared_timestamp(
table_name: Arc<String>,
plan: LogicalPlan,
tag_columns: Vec<Arc<String>>,
field_columns: Vec<Arc<String>>,
) -> Self {
Self::new(table_name, plan, tag_columns, field_columns.into())
}
/// Create a SeriesSetPlan that will not produce any Group items
pub fn new(
table_name: Arc<String>,
plan: LogicalPlan,
tag_columns: Vec<Arc<String>>,
field_columns: FieldColumns,
) -> Self {
let num_prefix_tag_group_columns = None;
Self {
table_name,
plan,
tag_columns,
field_columns,
num_prefix_tag_group_columns,
}
}
/// Create a SeriesSetPlan that will produce Group items, according to
/// num_prefix_tag_group_columns.
pub fn grouped(mut self, num_prefix_tag_group_columns: usize) -> Self {
self.num_prefix_tag_group_columns = Some(num_prefix_tag_group_columns);
self
}
}
/// A container for plans which each produce a logical stream of
/// timeseries (from across many potential tables).
#[derive(Debug, Default)]
pub struct SeriesSetPlans {
pub plans: Vec<SeriesSetPlan>,
}
impl From<Vec<SeriesSetPlan>> for SeriesSetPlans {
fn from(plans: Vec<SeriesSetPlan>) -> Self {
Self { plans }
}
}
/// Handles executing plans, and marshalling the results into rust
/// native structures.
#[derive(Debug, Default)]

View File

@ -3,16 +3,21 @@ use std::{
sync::Arc,
};
use arrow_deps::datafusion::{
error::{DataFusionError, Result as DatafusionResult},
logical_plan::{Expr, ExpressionVisitor, LogicalPlan, LogicalPlanBuilder, Operator, Recursion},
prelude::col,
use arrow_deps::{
arrow::datatypes::DataType,
datafusion::{
error::{DataFusionError, Result as DatafusionResult},
logical_plan::{
Expr, ExpressionVisitor, LogicalPlan, LogicalPlanBuilder, Operator, Recursion,
},
prelude::col,
},
};
use data_types::{
schema::{InfluxColumnType, Schema},
selection::Selection,
};
use snafu::{OptionExt, ResultExt, Snafu};
use snafu::{ensure, OptionExt, ResultExt, Snafu};
use tracing::debug;
use crate::{
@ -44,6 +49,11 @@ pub enum Error {
source: Box<dyn std::error::Error + Send + Sync>,
},
#[snafu(display("gRPC planner got error finding column values: {}", source))]
FindingColumnValues {
source: Box<dyn std::error::Error + Send + Sync>,
},
#[snafu(display(
"gRPC planner got internal error making table_name with default predicate: {}",
source
@ -68,7 +78,7 @@ pub enum Error {
source: Box<dyn std::error::Error + Send + Sync>,
},
#[snafu(display("gRPC planner got error creating string set: {}", source))]
#[snafu(display("gRPC planner got error creating string set plan: {}", source))]
CreatingStringSet { source: StringSetError },
#[snafu(display(
@ -81,13 +91,13 @@ pub enum Error {
source: crate::provider::Error,
},
#[snafu(display("Error building plan: {}", source))]
#[snafu(display("gRPC planner got error building plan: {}", source))]
BuildingPlan {
source: arrow_deps::datafusion::error::DataFusionError,
},
#[snafu(display(
"Error getting table schema for table '{}' in chunk {}: {}",
"gRPC planner got error getting table schema for table '{}' in chunk {}: {}",
table_name,
chunk_id,
source
@ -98,8 +108,28 @@ pub enum Error {
source: Box<dyn std::error::Error + Send + Sync>,
},
#[snafu(display("Unsupported predicate: {}", source))]
#[snafu(display("gRPC planner error: unsupported predicate: {}", source))]
UnsupportedPredicate { source: DataFusionError },
#[snafu(display(
"gRPC planner error: column '{}' is not a tag, it is {:?}",
tag_name,
influx_column_type
))]
InvalidTagColumn {
tag_name: String,
influx_column_type: Option<InfluxColumnType>,
},
#[snafu(display(
"Internal error: tag column '{}' is not Utf8 type, it is {:?} ",
tag_name,
data_type
))]
InternalInvalidTagType {
tag_name: String,
data_type: DataType,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -272,6 +302,155 @@ impl InfluxRPCPlanner {
.context(CreatingStringSet)
}
/// Returns a plan which finds the distinct, non-null tag values
/// in the specified `tag_name` column of this database which pass
/// the conditions specified by `predicate`.
pub async fn tag_values<D>(
&self,
database: &D,
tag_name: &str,
predicate: Predicate,
) -> Result<StringSetPlan>
where
D: Database + 'static,
{
debug!(predicate=?predicate, tag_name, "planning tag_values");
// The basic algorithm is:
//
// 1. Find all the potential tables in the chunks
//
// 2. For each table/chunk pair, figure out which have
// distinct values that can be found from only metadata and
// which need full plans
// Key is table name, value is set of chunks which had data
// for that table but that we couldn't evaluate the predicate
// entirely using the metadata
let mut need_full_plans = BTreeMap::new();
let mut known_values = BTreeSet::new();
for chunk in self.filtered_chunks(database, &predicate).await? {
let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
for table_name in table_names {
debug!(
table_name = table_name.as_str(),
chunk_id = chunk.id(),
"finding columns in table"
);
// use schema to validate column type
let schema = chunk
.table_schema(&table_name, Selection::All)
.await
.expect("to be able to get table schema");
// Skip this table if the tag_name is not a column in this table
let idx = if let Some(idx) = schema.find_index_of(tag_name) {
idx
} else {
continue;
};
// Validate that this really is a Tag column
let (influx_column_type, field) = schema.field(idx);
ensure!(
matches!(influx_column_type, Some(InfluxColumnType::Tag)),
InvalidTagColumn {
tag_name,
influx_column_type,
}
);
ensure!(
field.data_type() == &DataType::Utf8,
InternalInvalidTagType {
tag_name,
data_type: field.data_type().clone(),
}
);
// try and get the list of values directly from metadata
let maybe_values = chunk
.column_values(&table_name, tag_name, &predicate)
.await
.map_err(|e| Box::new(e) as _)
.context(FindingColumnValues)?;
match maybe_values {
Some(mut names) => {
debug!(names=?names, chunk_id = chunk.id(), "column values found from metadata");
known_values.append(&mut names);
}
None => {
debug!(
table_name = table_name.as_str(),
chunk_id = chunk.id(),
"need full plan to find column values"
);
// can't get columns only from metadata, need
// a general purpose plan
need_full_plans
.entry(table_name)
.or_insert_with(Vec::new)
.push(Arc::clone(&chunk));
}
}
}
}
let mut builder = StringSetPlanBuilder::new();
let select_exprs = vec![col(tag_name)];
// At this point, we have a set of tag_values we know at plan
// time in `known_columns`, and some tables in chunks that we
// need to run a plan to find what values pass the predicate.
for (table_name, chunks) in need_full_plans.into_iter() {
let scan_and_filter = self
.scan_and_filter(&table_name, &predicate, chunks)
.await?;
// if we have any data to scan, make a plan!
if let Some(TableScanAndFilter {
plan_builder,
schema: _,
}) = scan_and_filter
{
// TODO use Expr::is_null() here when this
// https://issues.apache.org/jira/browse/ARROW-11742
// is completed.
let tag_name_is_not_null = Expr::IsNotNull(Box::new(col(tag_name)));
// TODO: optimize this to use "DISINCT" or do
// something more intelligent that simply fetching all
// the values and reducing them in the query Executor
//
// Until then, simply use a plan which looks like:
//
// Projection
// Filter(is not null)
// Filter(predicate)
// InMemoryScan
let plan = plan_builder
.project(&select_exprs)
.context(BuildingPlan)?
.filter(tag_name_is_not_null)
.context(BuildingPlan)?
.build()
.context(BuildingPlan)?;
builder = builder.append(plan.into());
}
}
// add the known values we could find from metadata only
builder
.append(known_values.into())
.build()
.context(CreatingStringSet)
}
/// Returns a plan that produces a list of columns and their
/// datatypes (as defined in the data written via `write_lines`),
/// and which have more than zero rows which pass the conditions

View File

@ -11,8 +11,8 @@ use async_trait::async_trait;
use data_types::{
data::ReplicatedWrite, partition_metadata::TableSummary, schema::Schema, selection::Selection,
};
use exec::{stringset::StringSet, Executor, SeriesSetPlans};
use plan::stringset::StringSetPlan;
use exec::{stringset::StringSet, Executor};
use plan::seriesset::SeriesSetPlans;
use std::{fmt::Debug, sync::Arc};
@ -55,15 +55,6 @@ pub trait Database: Debug + Send + Sync {
// The functions below are slated for removal (migration into a gRPC query
// frontend) ---------
/// Returns a plan which finds the distinct values in the
/// `column_name` column of this database which pass the
/// conditions specified by `predicate`.
async fn column_values(
&self,
column_name: &str,
predicate: Predicate,
) -> Result<StringSetPlan, Self::Error>;
/// Returns a plan that finds all rows rows which pass the
/// conditions specified by `predicate` in the form of logical
/// time series.
@ -132,7 +123,7 @@ pub trait PartitionChunk: Debug + Send + Sync {
/// Returns a set of Strings with column names from the specified
/// table that have at least one row that matches `predicate`, if
/// the predicate can be evaluated entirely on the metadata of
/// this Chunk.
/// this Chunk. Returns `None` otherwise
async fn column_names(
&self,
table_name: &str,
@ -140,6 +131,18 @@ pub trait PartitionChunk: Debug + Send + Sync {
columns: Selection<'_>,
) -> Result<Option<StringSet>, Self::Error>;
/// Return a set of Strings containing the distinct values in the
/// specified columns. If the predicate can be evaluated entirely
/// on the metadata of this Chunk. Returns `None` otherwise
///
/// The requested columns must all have String type.
async fn column_values(
&self,
table_name: &str,
column_name: &str,
predicate: &Predicate,
) -> Result<Option<StringSet>, Self::Error>;
/// Returns the Schema for a table in this chunk, with the
/// specified column selection. An error is returned if the
/// selection refers to columns that do not exist.

View File

@ -1,2 +1,3 @@
pub mod fieldlist;
pub mod seriesset;
pub mod stringset;

View File

@ -0,0 +1,86 @@
use std::sync::Arc;
use arrow_deps::datafusion::logical_plan::LogicalPlan;
use crate::exec::field::FieldColumns;
/// A plan that can be run to produce a logical stream of time series,
/// as represented as sequence of SeriesSets from a single DataFusion
/// plan, optionally grouped in some way.
#[derive(Debug)]
pub struct SeriesSetPlan {
/// The table name this came from
pub table_name: Arc<String>,
/// Datafusion plan to execute. The plan must produce
/// RecordBatches that have:
///
/// * fields for each name in `tag_columns` and `field_columns`
/// * a timestamp column called 'time'
/// * each column in tag_columns must be a String (Utf8)
pub plan: LogicalPlan,
/// The names of the columns that define tags.
///
/// Note these are `Arc` strings because they are duplicated for
/// *each* resulting `SeriesSet` that is produced when this type
/// of plan is executed.
pub tag_columns: Vec<Arc<String>>,
/// The names of the columns which are "fields"
pub field_columns: FieldColumns,
/// If present, how many of the series_set_plan::tag_columns
/// should be used to compute the group
pub num_prefix_tag_group_columns: Option<usize>,
}
impl SeriesSetPlan {
/// Create a SeriesSetPlan that will not produce any Group items
pub fn new_from_shared_timestamp(
table_name: Arc<String>,
plan: LogicalPlan,
tag_columns: Vec<Arc<String>>,
field_columns: Vec<Arc<String>>,
) -> Self {
Self::new(table_name, plan, tag_columns, field_columns.into())
}
/// Create a SeriesSetPlan that will not produce any Group items
pub fn new(
table_name: Arc<String>,
plan: LogicalPlan,
tag_columns: Vec<Arc<String>>,
field_columns: FieldColumns,
) -> Self {
let num_prefix_tag_group_columns = None;
Self {
table_name,
plan,
tag_columns,
field_columns,
num_prefix_tag_group_columns,
}
}
/// Create a SeriesSetPlan that will produce Group items, according to
/// num_prefix_tag_group_columns.
pub fn grouped(mut self, num_prefix_tag_group_columns: usize) -> Self {
self.num_prefix_tag_group_columns = Some(num_prefix_tag_group_columns);
self
}
}
/// A container for plans which each produce a logical stream of
/// timeseries (from across many potential tables).
#[derive(Debug, Default)]
pub struct SeriesSetPlans {
pub plans: Vec<SeriesSetPlan>,
}
impl From<Vec<SeriesSetPlan>> for SeriesSetPlans {
fn from(plans: Vec<SeriesSetPlan>) -> Self {
Self { plans }
}
}

View File

@ -12,14 +12,11 @@ use arrow_deps::{
datafusion::physical_plan::{common::SizedRecordBatchStream, SendableRecordBatchStream},
};
use crate::{exec::Executor, group_by::GroupByAndAggregate, plan::stringset::StringSetPlan};
use crate::{
exec::{
stringset::{StringSet, StringSetRef},
SeriesSetPlans,
},
exec::stringset::{StringSet, StringSetRef},
Database, DatabaseStore, PartitionChunk, Predicate,
};
use crate::{exec::Executor, group_by::GroupByAndAggregate, plan::seriesset::SeriesSetPlans};
use data_types::{
data::{lines_to_replicated_write, ReplicatedWrite},
@ -59,12 +56,6 @@ pub struct TestDatabase {
/// `column_names` to return upon next request
column_names: Arc<Mutex<Option<StringSetRef>>>,
/// `column_values` to return upon next request
column_values: Arc<Mutex<Option<StringSetRef>>>,
/// The last request for `column_values`
column_values_request: Arc<Mutex<Option<ColumnValuesRequest>>>,
/// Responses to return on the next request to `query_series`
query_series_values: Arc<Mutex<Option<SeriesSetPlans>>>,
@ -78,16 +69,6 @@ pub struct TestDatabase {
query_groups_request: Arc<Mutex<Option<QueryGroupsRequest>>>,
}
/// Records the parameters passed to a column values request
#[derive(Debug, PartialEq, Clone)]
pub struct ColumnValuesRequest {
/// The name of the requested column
pub column_name: String,
/// Stringified '{:?}' version of the predicate
pub predicate: String,
}
/// Records the parameters passed to a `query_series` request
#[derive(Debug, PartialEq, Clone)]
pub struct QuerySeriesRequest {
@ -178,20 +159,6 @@ impl TestDatabase {
*Arc::clone(&self.column_names).lock() = Some(column_names)
}
/// Set the list of column values that will be returned on a call to
/// column_values
pub fn set_column_values(&self, column_values: Vec<String>) {
let column_values = column_values.into_iter().collect::<StringSet>();
let column_values = Arc::new(column_values);
*Arc::clone(&self.column_values).lock() = Some(column_values)
}
/// Get the parameters from the last column name request
pub fn get_column_values_request(&self) -> Option<ColumnValuesRequest> {
Arc::clone(&self.column_values_request).lock().take()
}
/// Set the series that will be returned on a call to query_series
pub fn set_query_series_values(&self, plan: SeriesSetPlans) {
*Arc::clone(&self.query_series_values).lock() = Some(plan);
@ -267,34 +234,6 @@ impl Database for TestDatabase {
Ok(())
}
/// Return the mocked out column values, recording the request
async fn column_values(
&self,
column_name: &str,
predicate: Predicate,
) -> Result<StringSetPlan, Self::Error> {
// save the request
let predicate = predicate_to_test_string(&predicate);
let new_column_values_request = Some(ColumnValuesRequest {
column_name: column_name.into(),
predicate,
});
*Arc::clone(&self.column_values_request).lock() = new_column_values_request;
// pull out the saved columns
let column_values = Arc::clone(&self.column_values)
.lock()
.take()
// Turn None into an error
.context(General {
message: "No saved column_values in TestDatabase",
})?;
Ok(StringSetPlan::Known(column_values))
}
async fn query_series(&self, predicate: Predicate) -> Result<SeriesSetPlans, Self::Error> {
let predicate = predicate_to_test_string(&predicate);
@ -595,6 +534,16 @@ impl PartitionChunk for TestChunk {
})
}
async fn column_values(
&self,
_table_name: &str,
_column_name: &str,
_predicate: &Predicate,
) -> Result<Option<StringSet>, Self::Error> {
// Model not being able to get column values from metadata
Ok(None)
}
fn has_table(&self, table_name: &str) -> bool {
self.table_schemas.contains_key(table_name)
}

View File

@ -13,7 +13,7 @@ use async_trait::async_trait;
use data_types::{data::ReplicatedWrite, database_rules::DatabaseRules, selection::Selection};
use mutable_buffer::MutableBufferDb;
use parking_lot::Mutex;
use query::{plan::stringset::StringSetPlan, Database, PartitionChunk};
use query::{Database, PartitionChunk};
use read_buffer::Database as ReadBufferDb;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt, Snafu};
@ -306,23 +306,10 @@ impl Database for Db {
.context(MutableBufferWrite)
}
async fn column_values(
&self,
column_name: &str,
predicate: query::predicate::Predicate,
) -> Result<StringSetPlan, Self::Error> {
self.mutable_buffer
.as_ref()
.context(DatabaseNotReadable)?
.column_values(column_name, predicate)
.await
.context(MutableBufferRead)
}
async fn query_series(
&self,
predicate: query::predicate::Predicate,
) -> Result<query::exec::SeriesSetPlans, Self::Error> {
) -> Result<query::plan::seriesset::SeriesSetPlans, Self::Error> {
self.mutable_buffer
.as_ref()
.context(DatabaseNotReadable)?
@ -335,7 +322,7 @@ impl Database for Db {
&self,
predicate: query::predicate::Predicate,
gby_agg: query::group_by::GroupByAndAggregate,
) -> Result<query::exec::SeriesSetPlans, Self::Error> {
) -> Result<query::plan::seriesset::SeriesSetPlans, Self::Error> {
self.mutable_buffer
.as_ref()
.context(DatabaseNotReadable)?

View File

@ -350,4 +350,40 @@ impl PartitionChunk for DBChunk {
}
}
}
async fn column_values(
&self,
table_name: &str,
column_name: &str,
predicate: &Predicate,
) -> Result<Option<StringSet>, Self::Error> {
match self {
Self::MutableBuffer { chunk } => {
use mutable_buffer::chunk::Error::UnsupportedColumnTypeForListingValues;
let chunk_predicate = chunk
.compile_predicate(predicate)
.context(MutableBufferChunk)?;
let values = chunk.tag_column_values(table_name, column_name, &chunk_predicate);
// if the mutable buffer doesn't support getting
// values for this kind of column, report back None
if let Err(UnsupportedColumnTypeForListingValues { .. }) = values {
Ok(None)
} else {
values.context(MutableBufferChunk)
}
}
Self::ReadBuffer { .. } => {
// TODO hook up read buffer API here when ready. Until
// now, fallback to using a full plan
// https://github.com/influxdata/influxdb_iox/issues/857
Ok(None)
}
Self::ParquetFile => {
unimplemented!("parquet file not implemented for column_values")
}
}
}
}

View File

@ -1,3 +1,4 @@
pub mod field_columns;
pub mod table_names;
pub mod tag_keys;
pub mod tag_values;

View File

@ -0,0 +1,253 @@
use arrow_deps::datafusion::logical_plan::{col, lit};
use query::{
exec::{
stringset::{IntoStringSet, StringSetRef},
Executor,
},
frontend::influxrpc::InfluxRPCPlanner,
predicate::PredicateBuilder,
};
use crate::query_tests::scenarios::*;
/// runs tag_value(predicate) and compares it to the expected
/// output
macro_rules! run_tag_values_test_case {
($DB_SETUP:expr, $TAG_NAME:expr, $PREDICATE:expr, $EXPECTED_VALUES:expr) => {
test_helpers::maybe_start_logging();
let predicate = $PREDICATE;
let tag_name = $TAG_NAME;
let expected_values = $EXPECTED_VALUES;
for scenario in $DB_SETUP.make().await {
let DBScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRPCPlanner::new();
let executor = Executor::new();
let plan = planner
.tag_values(&db, &tag_name, predicate.clone())
.await
.expect("built plan successfully");
let names = executor
.to_string_set(plan)
.await
.expect("converted plan to strings successfully");
assert_eq!(
names,
to_stringset(&expected_values),
"Error in scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
scenario_name,
expected_values,
names
);
}
};
}
#[tokio::test]
async fn list_tag_values_no_tag() {
let predicate = PredicateBuilder::default().build();
// If the tag is not present, expect no values back (not error)
let tag_name = "tag_not_in_chunks";
let expected_tag_keys = vec![];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_no_predicate_state_col() {
let predicate = PredicateBuilder::default().build();
let tag_name = "state";
let expected_tag_keys = vec!["CA", "MA", "NY"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_no_predicate_city_col() {
let tag_name = "city";
let predicate = PredicateBuilder::default().build();
let expected_tag_keys = vec!["Boston", "LA", "NYC"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_timestamp_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default().timestamp_range(50, 201).build();
let expected_tag_keys = vec!["CA", "MA"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_state_pred_state_col() {
let tag_name = "city";
let predicate = PredicateBuilder::default()
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build();
let expected_tag_keys = vec!["Boston"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_timestamp_and_state_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default()
.timestamp_range(150, 301)
.add_expr(col("state").eq(lit("MA"))) // state=MA
.build();
let expected_tag_keys = vec!["MA"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_table_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default().table("h2o").build();
let expected_tag_keys = vec!["CA", "MA"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_table_pred_city_col() {
let tag_name = "city";
let predicate = PredicateBuilder::default().table("o2").build();
let expected_tag_keys = vec!["Boston", "NYC"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_table_and_timestamp_and_table_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default()
.table("o2")
.timestamp_range(50, 201)
.build();
let expected_tag_keys = vec!["MA"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_table_and_state_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default()
.table("o2")
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build();
let expected_tag_keys = vec!["NY"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_table_and_timestamp_and_state_pred_state_col() {
let tag_name = "state";
let predicate = PredicateBuilder::default()
.table("o2")
.timestamp_range(1, 550)
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build();
let expected_tag_keys = vec!["NY"];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_table_and_timestamp_and_state_pred_state_col_no_rows() {
let tag_name = "state";
let predicate = PredicateBuilder::default()
.table("o2")
.timestamp_range(1, 300) // filters out the NY row
.add_expr(col("state").eq(lit("NY"))) // state=NY
.build();
let expected_tag_keys = vec![];
run_tag_values_test_case!(
TwoMeasurementsManyNulls {},
tag_name,
predicate,
expected_tag_keys
);
}
#[tokio::test]
async fn list_tag_values_field_col() {
let db_setup = TwoMeasurementsManyNulls {};
let predicate = PredicateBuilder::default().build();
for scenario in db_setup.make().await {
let DBScenario {
scenario_name, db, ..
} = scenario;
println!("Running scenario '{}'", scenario_name);
println!("Predicate: '{:#?}'", predicate);
let planner = InfluxRPCPlanner::new();
// Test: temp is a field, not a tag
let tag_name = "temp";
let plan_result = planner.tag_values(&db, &tag_name, predicate.clone()).await;
assert_eq!(
plan_result.unwrap_err().to_string(),
"gRPC planner error: column \'temp\' is not a tag, it is Some(Field(Float))"
);
}
}
fn to_stringset(v: &[&str]) -> StringSetRef {
v.into_stringset().unwrap()
}

View File

@ -1,8 +1,8 @@
//! Implementation of command line option for manipulating and showing server
//! config
use clap::arg_enum;
use std::{net::SocketAddr, net::ToSocketAddrs, path::PathBuf};
use structopt::StructOpt;
/// The default bind address for the HTTP API.
@ -91,16 +91,37 @@ pub struct Config {
#[structopt(long = "--data-dir", env = "INFLUXDB_IOX_DB_DIR")]
pub database_directory: Option<PathBuf>,
#[structopt(
long = "--object-store",
env = "INFLUXDB_IOX_OBJECT_STORE",
possible_values = &ObjectStore::variants(),
case_insensitive = true,
long_help = r#"Which object storage to use. If not specified, defaults to memory.
Possible values (case insensitive):
* memory (default): Effectively no object persistence.
* file: Stores objects in the local filesystem. Must also set `--data-dir`.
* s3: Amazon S3. Must also set `--bucket`, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and
AWS_DEFAULT_REGION.
* google: Google Cloud Storage. Must also set `--bucket` and SERVICE_ACCOUNT.
* azure: Microsoft Azure blob storage. Must also set `--bucket`, AZURE_STORAGE_ACCOUNT,
and AZURE_STORAGE_MASTER_KEY.
"#,
)]
pub object_store: Option<ObjectStore>,
/// Name of the bucket to use for the object store. Must also set
/// `--object_store` to a cloud object storage to have any effect.
///
/// If using Google Cloud Storage for the object store, this item, as well
/// as SERVICE_ACCOUNT must be set.
#[structopt(long = "--gcp-bucket", env = "INFLUXDB_IOX_GCP_BUCKET")]
pub gcp_bucket: Option<String>,
///
/// If using S3 for the object store, this item, as well
/// as AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_DEFAULT_REGION must
/// be set.
#[structopt(long = "--s3-bucket", env = "INFLUXDB_IOX_S3_BUCKET")]
pub s3_bucket: Option<String>,
#[structopt(long = "--bucket", env = "INFLUXDB_IOX_BUCKET")]
pub bucket: Option<String>,
/// If set, Jaeger traces are emitted to this host
/// using the OpenTelemetry tracer.
@ -167,6 +188,17 @@ fn strip_server(args: impl Iterator<Item = String>) -> Vec<String> {
.collect::<Vec<_>>()
}
arg_enum! {
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum ObjectStore {
Memory,
File,
S3,
Google,
Azure,
}
}
/// How to format output logging messages
#[derive(Debug, Clone, Copy)]
pub enum LogFormat {
@ -301,15 +333,6 @@ mod tests {
clap::ErrorKind::ValueValidation
);
assert_eq!(
Config::from_iter_safe(strip_server(
to_vec(&["cmd", "server", "--api-bind", "badhost.badtld:1234"]).into_iter(),
))
.map_err(|e| e.kind)
.expect_err("must fail"),
clap::ErrorKind::ValueValidation
);
Ok(())
}
}

View File

@ -12,7 +12,7 @@ use panic_logging::SendPanicsToTracing;
use server::{ConnectionManagerImpl as ConnectionManager, Server as AppServer};
use crate::commands::{
config::{load_config, Config},
config::{load_config, Config, ObjectStore as ObjStoreOpt},
logging::LoggingLevel,
};
@ -64,6 +64,12 @@ pub enum Error {
#[snafu(display("Error serving RPC: {}", source))]
ServingRPC { source: self::rpc::Error },
#[snafu(display("Specifed {} for the object store, but not a bucket", object_store))]
InvalidCloudObjectStoreConfiguration { object_store: ObjStoreOpt },
#[snafu(display("Specified file for the object store, but not a database directory"))]
InvalidFileObjectStoreConfiguration,
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -92,22 +98,53 @@ pub async fn main(logging_level: LoggingLevel, config: Option<Config>) -> Result
let f = SendPanicsToTracing::new();
std::mem::forget(f);
let db_dir = &config.database_directory;
let object_store = if let Some(bucket_name) = &config.gcp_bucket {
info!("Using GCP bucket {} for storage", bucket_name);
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name))
} else if let Some(bucket_name) = &config.s3_bucket {
info!("Using S3 bucket {} for storage", bucket_name);
// rusoto::Region's default takes the value from the AWS_DEFAULT_REGION env var.
ObjectStore::new_amazon_s3(AmazonS3::new(Default::default(), bucket_name))
} else if let Some(db_dir) = db_dir {
info!("Using local dir {:?} for storage", db_dir);
fs::create_dir_all(db_dir).context(CreatingDatabaseDirectory { path: db_dir })?;
ObjectStore::new_file(object_store::disk::File::new(&db_dir))
} else {
warn!("NO PERSISTENCE: using memory for object storage");
ObjectStore::new_in_memory(object_store::memory::InMemory::new())
let object_store = match (
config.object_store,
config.bucket,
config.database_directory,
) {
(Some(ObjStoreOpt::Google), Some(bucket), _) => {
info!("Using GCP bucket {} for storage", bucket);
ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket))
}
(Some(ObjStoreOpt::Google), None, _) => {
return InvalidCloudObjectStoreConfiguration {
object_store: ObjStoreOpt::Google,
}
.fail();
}
(Some(ObjStoreOpt::S3), Some(bucket), _) => {
info!("Using S3 bucket {} for storage", bucket);
// rusoto::Region's default takes the value from the AWS_DEFAULT_REGION env var.
ObjectStore::new_amazon_s3(AmazonS3::new(Default::default(), bucket))
}
(Some(ObjStoreOpt::S3), None, _) => {
return InvalidCloudObjectStoreConfiguration {
object_store: ObjStoreOpt::S3,
}
.fail();
}
(Some(ObjStoreOpt::File), _, Some(ref db_dir)) => {
info!("Using local dir {:?} for storage", db_dir);
fs::create_dir_all(db_dir).context(CreatingDatabaseDirectory { path: db_dir })?;
ObjectStore::new_file(object_store::disk::File::new(&db_dir))
}
(Some(ObjStoreOpt::File), _, None) => {
return InvalidFileObjectStoreConfiguration.fail();
}
(Some(ObjStoreOpt::Azure), Some(_bucket), _) => {
unimplemented!();
}
(Some(ObjStoreOpt::Azure), None, _) => {
return InvalidCloudObjectStoreConfiguration {
object_store: ObjStoreOpt::Azure,
}
.fail();
}
(Some(ObjStoreOpt::Memory), _, _) | (None, _, _) => {
warn!("NO PERSISTENCE: using memory for object storage");
ObjectStore::new_in_memory(object_store::memory::InMemory::new())
}
};
let object_storage = Arc::new(object_store);

View File

@ -1,5 +1,6 @@
//! This module contains a partial implementation of the /v2 HTTP api
//! routes for InfluxDB IOx.
//! This module contains the HTTP api for InfluxDB IOx, including a
//! partial implementation of the /v2 HTTP api routes from InfluxDB
//! for compatibility.
//!
//! Note that these routes are designed to be just helpers for now,
//! and "close enough" to the real /v2 api to be able to test InfluxDB IOx
@ -10,7 +11,7 @@
//! database names and may remove this quasi /v2 API.
// Influx crates
use arrow_deps::{arrow, datafusion::physical_plan::collect};
use arrow_deps::datafusion::physical_plan::collect;
use data_types::{
database_rules::DatabaseRules,
http::{ListDatabasesResponse, WalMetadataQuery},
@ -25,7 +26,7 @@ use server::{ConnectionManager, Server as AppServer};
// External crates
use bytes::{Bytes, BytesMut};
use futures::{self, StreamExt};
use http::header::CONTENT_ENCODING;
use http::header::{CONTENT_ENCODING, CONTENT_TYPE};
use hyper::{Body, Method, Request, Response, StatusCode};
use routerify::{prelude::*, Middleware, RequestInfo, Router, RouterError, RouterService};
use serde::{Deserialize, Serialize};
@ -35,6 +36,9 @@ use tracing::{debug, error, info};
use data_types::http::WalMetadataResponse;
use std::{fmt::Debug, str, sync::Arc};
mod format;
use format::QueryOutputFormat;
#[derive(Debug, Snafu)]
pub enum ApplicationError {
// Internal (unexpected) errors
@ -86,7 +90,9 @@ pub enum ApplicationError {
#[snafu(display("Expected query string in request, but none was provided"))]
ExpectedQueryString {},
#[snafu(display("Invalid query string '{}': {}", query_string, source))]
/// Error for when we could not parse the http query uri (e.g.
/// `?foo=bar&bar=baz)`
#[snafu(display("Invalid query string in HTTP URI '{}': {}", query_string, source))]
InvalidQueryString {
query_string: String,
source: serde_urlencoded::de::Error,
@ -151,6 +157,21 @@ pub enum ApplicationError {
#[snafu(display("Database {} does not have a WAL", name))]
WALNotFound { name: String },
#[snafu(display("Internal error creating HTTP response: {}", source))]
CreatingResponse { source: http::Error },
#[snafu(display(
"Error formatting results of SQL query '{}' using '{:?}': {}",
q,
format,
source
))]
FormattingResult {
q: String,
format: QueryOutputFormat,
source: format::Error,
},
}
impl ApplicationError {
@ -181,6 +202,8 @@ impl ApplicationError {
Self::DatabaseNameError { .. } => self.bad_request(),
Self::DatabaseNotFound { .. } => self.not_found(),
Self::WALNotFound { .. } => self.not_found(),
Self::CreatingResponse { .. } => self.internal_error(),
Self::FormattingResult { .. } => self.internal_error(),
}
}
@ -259,10 +282,11 @@ where
})) // this endpoint is for API backward compatibility with InfluxDB 2.x
.post("/api/v2/write", write::<M>)
.get("/ping", ping)
.get("/api/v2/read", read::<M>)
.get("/health", health)
.get("/iox/api/v1/databases", list_databases::<M>)
.put("/iox/api/v1/databases/:name", create_database::<M>)
.get("/iox/api/v1/databases/:name", get_database::<M>)
.get("/iox/api/v1/databases/:name/query", query::<M>)
.get("/iox/api/v1/databases/:name/wal/meta", get_wal_meta::<M>)
.put("/iox/api/v1/id", set_writer::<M>)
.get("/iox/api/v1/id", get_writer::<M>)
@ -406,53 +430,67 @@ where
.unwrap())
}
#[derive(Deserialize, Debug)]
/// Body of the request to the /read endpoint
struct ReadInfo {
org: String,
bucket: String,
// TODO This is currently a "SQL" request -- should be updated to conform
// to the V2 API for reading (using timestamps, etc).
sql_query: String,
#[derive(Deserialize, Debug, PartialEq)]
/// Parsed URI Parameters of the request to the .../query endpoint
struct QueryParams {
q: String,
#[serde(default)]
format: QueryOutputFormat,
}
// TODO: figure out how to stream read results out rather than rendering the
// whole thing in mem
#[tracing::instrument(level = "debug")]
async fn read<M: ConnectionManager + Send + Sync + Debug + 'static>(
async fn query<M: ConnectionManager + Send + Sync + Debug + 'static>(
req: Request<Body>,
) -> Result<Response<Body>, ApplicationError> {
let server = Arc::clone(&req.data::<Arc<AppServer<M>>>().expect("server state"));
let query = req.uri().query().context(ExpectedQueryString {})?;
let read_info: ReadInfo = serde_urlencoded::from_str(query).context(InvalidQueryString {
query_string: query,
})?;
let uri_query = req.uri().query().context(ExpectedQueryString {})?;
let QueryParams { q, format } =
serde_urlencoded::from_str(uri_query).context(InvalidQueryString {
query_string: uri_query,
})?;
let db_name_str = req
.param("name")
.expect("db name must have been set by routerify")
.clone();
let db_name = DatabaseName::new(&db_name_str).context(DatabaseNameError)?;
debug!(uri = ?req.uri(), %q, ?format, %db_name, "running SQL query");
let db = server
.db(&db_name)
.await
.context(DatabaseNotFound { name: &db_name_str })?;
let planner = SQLQueryPlanner::default();
let executor = server.executor();
let db_name = org_and_bucket_to_database(&read_info.org, &read_info.bucket)
.context(BucketMappingError)?;
let db = server.db(&db_name).await.context(BucketNotFound {
org: read_info.org.clone(),
bucket: read_info.bucket.clone(),
})?;
let physical_plan = planner
.query(db.as_ref(), &read_info.sql_query, executor.as_ref())
.query(db.as_ref(), &q, executor.as_ref())
.await
.context(PlanningSQLQuery { query })?;
.context(PlanningSQLQuery { query: &q })?;
// TODO: stream read results out rather than rendering the
// whole thing in mem
let batches = collect(physical_plan)
.await
.map_err(|e| Box::new(e) as _)
.context(Query { db_name })?;
let results = arrow::util::pretty::pretty_format_batches(&batches).unwrap();
let results = format
.format(&batches)
.context(FormattingResult { q, format })?;
Ok(Response::new(Body::from(results.into_bytes())))
let body = Body::from(results.into_bytes());
let response = Response::builder()
.header(CONTENT_TYPE, format.content_type())
.body(body)
.context(CreatingResponse)?;
Ok(response)
}
#[tracing::instrument(level = "debug")]
@ -637,11 +675,17 @@ async fn get_writer<M: ConnectionManager + Send + Sync + Debug + 'static>(
// Route to test that the server is alive
#[tracing::instrument(level = "debug")]
async fn ping(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
async fn ping(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
let response_body = "PONG";
Ok(Response::new(Body::from(response_body.to_string())))
}
#[tracing::instrument(level = "debug")]
async fn health(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
let response_body = "OK";
Ok(Response::new(Body::from(response_body.to_string())))
}
#[derive(Deserialize, Debug)]
/// Arguments in the query string of the request to /partitions
struct DatabaseInfo {
@ -749,7 +793,6 @@ mod tests {
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use arrow_deps::{arrow::record_batch::RecordBatch, assert_table_eq};
use http::header;
use query::exec::Executor;
use reqwest::{Client, Response};
@ -783,6 +826,22 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn test_health() -> Result<()> {
let test_storage = Arc::new(AppServer::new(
ConnectionManagerImpl {},
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
));
let server_url = test_server(Arc::clone(&test_storage));
let client = Client::new();
let response = client.get(&format!("{}/health", server_url)).send().await;
// Print the response so if the test fails, we have a log of what went wrong
check_response("health", response, StatusCode::OK, "OK").await;
Ok(())
}
#[tokio::test]
async fn test_write() -> Result<()> {
let test_storage = Arc::new(AppServer::new(
@ -833,6 +892,139 @@ mod tests {
Ok(())
}
/// Sets up a test database with some data for testing the query endpoint
/// returns a client for communicting with the server, and the server
/// endpoint
async fn setup_test_data() -> (Client, String) {
let test_storage: Arc<AppServer<ConnectionManagerImpl>> = Arc::new(AppServer::new(
ConnectionManagerImpl {},
Arc::new(ObjectStore::new_in_memory(InMemory::new())),
));
test_storage.set_id(1);
test_storage
.create_database("MyOrg_MyBucket", DatabaseRules::new())
.await
.unwrap();
let server_url = test_server(Arc::clone(&test_storage));
let client = Client::new();
let lp_data = "h2o_temperature,location=santa_monica,state=CA surface_degrees=65.2,bottom_degrees=50.4 1568756160";
// send write data
let bucket_name = "MyBucket";
let org_name = "MyOrg";
let response = client
.post(&format!(
"{}/api/v2/write?bucket={}&org={}",
server_url, bucket_name, org_name
))
.body(lp_data)
.send()
.await;
check_response("write", response, StatusCode::NO_CONTENT, "").await;
(client, server_url)
}
#[tokio::test]
async fn test_query_pretty() -> Result<()> {
let (client, server_url) = setup_test_data().await;
// send query data
let response = client
.get(&format!(
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}",
server_url, "select%20*%20from%20h2o_temperature"
))
.send()
.await;
assert_eq!(get_content_type(&response), "text/plain");
let res = "+----------------+--------------+-------+-----------------+------------+\n\
| bottom_degrees | location | state | surface_degrees | time |\n\
+----------------+--------------+-------+-----------------+------------+\n\
| 50.4 | santa_monica | CA | 65.2 | 1568756160 |\n\
+----------------+--------------+-------+-----------------+------------+\n";
check_response("query", response, StatusCode::OK, res).await;
// same response is expected if we explicitly request 'format=pretty'
let response = client
.get(&format!(
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=pretty",
server_url, "select%20*%20from%20h2o_temperature"
))
.send()
.await;
assert_eq!(get_content_type(&response), "text/plain");
check_response("query", response, StatusCode::OK, res).await;
Ok(())
}
#[tokio::test]
async fn test_query_csv() -> Result<()> {
let (client, server_url) = setup_test_data().await;
// send query data
let response = client
.get(&format!(
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=csv",
server_url, "select%20*%20from%20h2o_temperature"
))
.send()
.await;
assert_eq!(get_content_type(&response), "text/csv");
let res = "bottom_degrees,location,state,surface_degrees,time\n\
50.4,santa_monica,CA,65.2,1568756160\n";
check_response("query", response, StatusCode::OK, res).await;
Ok(())
}
#[tokio::test]
async fn test_query_json() -> Result<()> {
let (client, server_url) = setup_test_data().await;
// send a second line of data to demontrate how that works
let lp_data = "h2o_temperature,location=Boston,state=MA surface_degrees=50.2 1568756160";
// send write data
let bucket_name = "MyBucket";
let org_name = "MyOrg";
let response = client
.post(&format!(
"{}/api/v2/write?bucket={}&org={}",
server_url, bucket_name, org_name
))
.body(lp_data)
.send()
.await;
check_response("write", response, StatusCode::NO_CONTENT, "").await;
// send query data
let response = client
.get(&format!(
"{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=json",
server_url, "select%20*%20from%20h2o_temperature"
))
.send()
.await;
assert_eq!(get_content_type(&response), "application/json");
// Note two json records: one record on each line
let res = r#"[{"bottom_degrees":50.4,"location":"santa_monica","state":"CA","surface_degrees":65.2,"time":1568756160},{"location":"Boston","state":"MA","surface_degrees":50.2,"time":1568756160}]"#;
check_response("query", response, StatusCode::OK, res).await;
Ok(())
}
fn gzip_str(s: &str) -> Vec<u8> {
use flate2::{write::GzEncoder, Compression};
use std::io::Write;
@ -865,7 +1057,7 @@ mod tests {
"{}/api/v2/write?bucket={}&org={}",
server_url, bucket_name, org_name
))
.header(header::CONTENT_ENCODING, "gzip")
.header(CONTENT_ENCODING, "gzip")
.body(gzip_str(lp_data))
.send()
.await;
@ -1119,6 +1311,19 @@ mod tests {
assert_eq!(r4.segments.len(), 0);
}
fn get_content_type(response: &Result<Response, reqwest::Error>) -> String {
if let Ok(response) = response {
response
.headers()
.get(CONTENT_TYPE)
.map(|v| v.to_str().unwrap())
.unwrap_or("")
.to_string()
} else {
"".to_string()
}
}
/// checks a http response against expected results
async fn check_response(
description: &str,
@ -1191,4 +1396,59 @@ mod tests {
collect(physical_plan).await.unwrap()
}
#[test]
fn query_params_format_default() {
// default to pretty format when not otherwise specified
assert_eq!(
serde_urlencoded::from_str("q=foo"),
Ok(QueryParams {
q: "foo".to_string(),
format: QueryOutputFormat::Pretty
})
);
}
#[test]
fn query_params_format_pretty() {
assert_eq!(
serde_urlencoded::from_str("q=foo&format=pretty"),
Ok(QueryParams {
q: "foo".to_string(),
format: QueryOutputFormat::Pretty
})
);
}
#[test]
fn query_params_format_csv() {
assert_eq!(
serde_urlencoded::from_str("q=foo&format=csv"),
Ok(QueryParams {
q: "foo".to_string(),
format: QueryOutputFormat::CSV
})
);
}
#[test]
fn query_params_format_json() {
assert_eq!(
serde_urlencoded::from_str("q=foo&format=json"),
Ok(QueryParams {
q: "foo".to_string(),
format: QueryOutputFormat::JSON
})
);
}
#[test]
fn query_params_bad_format() {
assert_eq!(
serde_urlencoded::from_str::<QueryParams>("q=foo&format=jsob")
.unwrap_err()
.to_string(),
"unknown variant `jsob`, expected one of `pretty`, `csv`, `json`"
);
}
}

View File

@ -0,0 +1,242 @@
//! Output formatting utilities for query endpoint
use serde::Deserialize;
use snafu::{ResultExt, Snafu};
use std::io::Write;
use serde_json::Value;
use arrow_deps::arrow::{
self, csv::WriterBuilder, error::ArrowError, json::writer::record_batches_to_json_rows,
record_batch::RecordBatch,
};
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Arrow pretty printing error: {}", source))]
PrettyArrow { source: ArrowError },
#[snafu(display("Arrow csv printing error: {}", source))]
CsvArrow { source: ArrowError },
#[snafu(display("Arrow json printing error: {}", source))]
JsonArrow { source: ArrowError },
#[snafu(display("Json conversion error: {}", source))]
JsonConversion { source: serde_json::Error },
#[snafu(display("IO error during Json conversion: {}", source))]
JsonWrite { source: std::io::Error },
#[snafu(display("Error converting CSV output to UTF-8: {}", source))]
CsvUtf8 { source: std::string::FromUtf8Error },
#[snafu(display("Error converting JSON output to UTF-8: {}", source))]
JsonUtf8 { source: std::string::FromUtf8Error },
}
type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
/// Requested output format for the query endpoint
pub enum QueryOutputFormat {
/// Arrow pretty printer format (default)
#[serde(rename = "pretty")]
Pretty,
/// Comma separated values
#[serde(rename = "csv")]
CSV,
/// Arrow JSON format
#[serde(rename = "json")]
JSON,
}
impl Default for QueryOutputFormat {
fn default() -> Self {
Self::Pretty
}
}
impl QueryOutputFormat {
/// Return the content type of the relevant format
pub fn content_type(&self) -> &'static str {
match self {
Self::Pretty => "text/plain",
Self::CSV => "text/csv",
Self::JSON => "application/json",
}
}
}
impl QueryOutputFormat {
/// Format the [`RecordBatch`]es into a String in one of the
/// following formats:
///
/// Pretty:
/// ```text
/// +----------------+--------------+-------+-----------------+------------+
/// | bottom_degrees | location | state | surface_degrees | time |
/// +----------------+--------------+-------+-----------------+------------+
/// | 50.4 | santa_monica | CA | 65.2 | 1568756160 |
/// +----------------+--------------+-------+-----------------+------------+
/// ```
///
/// CSV:
/// ```text
/// bottom_degrees,location,state,surface_degrees,time
/// 50.4,santa_monica,CA,65.2,1568756160
/// ```
///
/// JSON:
///
/// Example (newline + whitespace added for clarity):
/// ```text
/// [
/// {"bottom_degrees":50.4,"location":"santa_monica","state":"CA","surface_degrees":65.2,"time":1568756160},
/// {"location":"Boston","state":"MA","surface_degrees":50.2,"time":1568756160}
/// ]
/// ```
pub fn format(&self, batches: &[RecordBatch]) -> Result<String> {
match self {
Self::Pretty => batches_to_pretty(&batches),
Self::CSV => batches_to_csv(&batches),
Self::JSON => batches_to_json(&batches),
}
}
}
fn batches_to_pretty(batches: &[RecordBatch]) -> Result<String> {
arrow::util::pretty::pretty_format_batches(batches).context(PrettyArrow)
}
fn batches_to_csv(batches: &[RecordBatch]) -> Result<String> {
let mut bytes = vec![];
{
let mut writer = WriterBuilder::new().has_headers(true).build(&mut bytes);
for batch in batches {
writer.write(batch).context(CsvArrow)?;
}
}
let csv = String::from_utf8(bytes).context(CsvUtf8)?;
Ok(csv)
}
fn batches_to_json(batches: &[RecordBatch]) -> Result<String> {
let mut bytes = vec![];
{
let mut writer = JsonArrayWriter::new(&mut bytes);
writer.write_batches(batches)?;
writer.finish()?;
}
let json = String::from_utf8(bytes).context(JsonUtf8)?;
Ok(json)
}
/// Writes out well formed JSON arays in a streaming fashion
///
/// [{"foo": "bar"}, {"foo": "baz"}]
///
/// This is based on the arrow JSON writer (json::writer::Writer)
///
/// TODO contribute this back to arrow: https://issues.apache.org/jira/browse/ARROW-11773
struct JsonArrayWriter<W>
where
W: Write,
{
started: bool,
finished: bool,
writer: W,
}
impl<W> JsonArrayWriter<W>
where
W: Write,
{
fn new(writer: W) -> Self {
Self {
writer,
started: false,
finished: false,
}
}
/// Consume self and return the inner writer
#[cfg(test)]
pub fn into_inner(self) -> W {
self.writer
}
pub fn write_row(&mut self, row: &Value) -> Result<()> {
if !self.started {
self.writer.write_all(b"[").context(JsonWrite)?;
self.started = true;
} else {
self.writer.write_all(b",").context(JsonWrite)?;
}
self.writer
.write_all(&serde_json::to_vec(row).context(JsonConversion)?)
.context(JsonWrite)?;
Ok(())
}
pub fn write_batches(&mut self, batches: &[RecordBatch]) -> Result<()> {
for row in record_batches_to_json_rows(batches) {
self.write_row(&Value::Object(row))?;
}
Ok(())
}
/// tell the writer there are is no more data to come so it can
/// write the final `'['`
pub fn finish(&mut self) -> Result<()> {
if self.started && !self.finished {
self.writer.write_all(b"]").context(JsonWrite)?;
self.finished = true;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::*;
#[test]
fn json_writer_empty() {
let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
writer.finish().unwrap();
assert_eq!(String::from_utf8(writer.into_inner()).unwrap(), "");
}
#[test]
fn json_writer_one_row() {
let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
let v = json!({ "an": "object" });
writer.write_row(&v).unwrap();
writer.finish().unwrap();
assert_eq!(
String::from_utf8(writer.into_inner()).unwrap(),
r#"[{"an":"object"}]"#
);
}
#[test]
fn json_writer_two_rows() {
let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
let v = json!({ "an": "object" });
writer.write_row(&v).unwrap();
let v = json!({ "another": "object" });
writer.write_row(&v).unwrap();
writer.finish().unwrap();
assert_eq!(
String::from_utf8(writer.into_inner()).unwrap(),
r#"[{"an":"object"},{"another":"object"}]"#
);
}
}

View File

@ -30,7 +30,22 @@ where
{
let stream = TcpListenerStream::new(socket);
let (mut health_reporter, health_service) = tonic_health::server::health_reporter();
let services = [
generated_types::STORAGE_SERVICE,
generated_types::IOX_TESTING_SERVICE,
generated_types::ARROW_SERVICE,
];
for service in &services {
health_reporter
.set_service_status(service, tonic_health::ServingStatus::Serving)
.await;
}
tonic::transport::Server::builder()
.add_service(health_service)
.add_service(testing::make_server())
.add_service(storage::make_server(Arc::clone(&server)))
.add_service(flight::make_server(server))

View File

@ -852,7 +852,7 @@ async fn tag_values_impl<T>(
rpc_predicate: Option<Predicate>,
) -> Result<StringValuesResponse>
where
T: DatabaseStore,
T: DatabaseStore + 'static,
{
let rpc_predicate_string = format!("{:?}", rpc_predicate);
@ -873,10 +873,12 @@ where
.await
.context(DatabaseNotFound { db_name })?;
let planner = InfluxRPCPlanner::new();
let executor = db_store.executor();
let tag_value_plan = db
.column_values(tag_name, predicate)
let tag_value_plan = planner
.tag_values(db.as_ref(), tag_name, predicate)
.await
.map_err(|e| Box::new(e) as _)
.context(ListingTagValues { db_name, tag_name })?;
@ -1107,11 +1109,11 @@ mod tests {
use arrow_deps::datafusion::logical_plan::{col, lit, Expr};
use panic_logging::SendPanicsToTracing;
use query::{
exec::SeriesSetPlans,
group_by::{Aggregate as QueryAggregate, WindowDuration as QueryWindowDuration},
plan::seriesset::SeriesSetPlans,
test::QueryGroupsRequest,
test::TestDatabaseStore,
test::{ColumnValuesRequest, QuerySeriesRequest, TestChunk},
test::{QuerySeriesRequest, TestChunk},
};
use std::{
convert::TryFrom,
@ -1478,11 +1480,18 @@ mod tests {
let db_info = OrgAndBucket::new(123, 456);
let partition_id = 1;
let test_db = fixture
// Add a chunk with a field
let chunk = TestChunk::new(0)
.with_time_column("TheMeasurement")
.with_tag_column("TheMeasurement", "state")
.with_one_row_of_null_data("TheMeasurement");
fixture
.test_storage
.db_or_create(&db_info.db_name)
.await
.expect("creating test database");
.unwrap()
.add_chunk("my_partition_key", Arc::new(chunk));
let source = Some(StorageClientWrapper::read_source(
db_info.org_id,
@ -1490,24 +1499,35 @@ mod tests {
partition_id,
));
let tag_values = vec!["k1", "k2", "k3", "k4"];
let request = TagValuesRequest {
tags_source: source.clone(),
range: make_timestamp_range(150, 200),
range: make_timestamp_range(150, 2000),
predicate: make_state_ma_predicate(),
tag_key: "the_tag_key".into(),
tag_key: "state".into(),
};
let expected_request = ColumnValuesRequest {
predicate: "Predicate { exprs: [#state Eq Utf8(\"MA\")] range: TimestampRange { start: 150, end: 200 }}".into(),
column_name: "the_tag_key".into(),
};
test_db.set_column_values(to_string_vec(&tag_values));
let actual_tag_values = fixture.storage_client.tag_values(request).await.unwrap();
assert_eq!(actual_tag_values, tag_values,);
assert_eq!(test_db.get_column_values_request(), Some(expected_request),);
assert_eq!(actual_tag_values, vec!["MA"]);
}
/// test the plumbing of the RPC layer for tag_values
///
/// For the special case of
///
/// tag_key = _measurement means listing all measurement names
#[tokio::test]
async fn test_storage_rpc_tag_values_with_measurement() {
// Start a test gRPC server on a randomally allocated port
let mut fixture = Fixture::new().await.expect("Connecting to test server");
let db_info = OrgAndBucket::new(123, 456);
let partition_id = 1;
let source = Some(StorageClientWrapper::read_source(
db_info.org_id,
db_info.bucket_id,
partition_id,
));
// ---
// test tag_key = _measurement means listing all measurement names
@ -1590,11 +1610,14 @@ mod tests {
let db_info = OrgAndBucket::new(123, 456);
let partition_id = 1;
let test_db = fixture
let chunk = TestChunk::new(0).with_error("Sugar we are going down");
fixture
.test_storage
.db_or_create(&db_info.db_name)
.await
.expect("creating test database");
.unwrap()
.add_chunk("my_partition_key", Arc::new(chunk));
let source = Some(StorageClientWrapper::read_source(
db_info.org_id,
@ -1612,12 +1635,13 @@ mod tests {
tag_key: "the_tag_key".into(),
};
// Note we don't set the column_names on the test database, so we expect an
// error
let response = fixture.storage_client.tag_values(request).await;
assert!(response.is_err());
let response_string = format!("{:?}", response);
let expected_error = "No saved column_values in TestDatabase";
let response_string = fixture
.storage_client
.tag_values(request)
.await
.unwrap_err()
.to_string();
let expected_error = "Sugar we are going down";
assert!(
response_string.contains(expected_error),
"'{}' did not contain expected content '{}'",
@ -1625,12 +1649,6 @@ mod tests {
expected_error
);
let expected_request = Some(ColumnValuesRequest {
predicate: "Predicate {}".into(),
column_name: "the_tag_key".into(),
});
assert_eq!(test_db.get_column_values_request(), expected_request);
// ---
// test error with non utf8 value
// ---
@ -1641,9 +1659,12 @@ mod tests {
tag_key: [0, 255].into(), // this is not a valid UTF-8 string
};
let response = fixture.storage_client.tag_values(request).await;
assert!(response.is_err());
let response_string = format!("{:?}", response);
let response_string = fixture
.storage_client
.tag_values(request)
.await
.unwrap_err()
.to_string();
let expected_error = "Error converting tag_key to UTF-8 in tag_values request";
assert!(
response_string.contains(expected_error),
@ -1653,22 +1674,27 @@ mod tests {
);
}
/// test the plumbing of the RPC layer for measurement_tag_values--
/// specifically that the right parameters are passed into the Database
/// interface and that the returned values are sent back via gRPC.
/// test the plumbing of the RPC layer for measurement_tag_values
#[tokio::test]
async fn test_storage_rpc_measurement_tag_values() {
// Start a test gRPC server on a randomally allocated port
test_helpers::maybe_start_logging();
let mut fixture = Fixture::new().await.expect("Connecting to test server");
let db_info = OrgAndBucket::new(123, 456);
let partition_id = 1;
let test_db = fixture
// Add a chunk with a field
let chunk = TestChunk::new(0)
.with_time_column("TheMeasurement")
.with_tag_column("TheMeasurement", "state")
.with_one_row_of_null_data("TheMeasurement");
fixture
.test_storage
.db_or_create(&db_info.db_name)
.await
.expect("creating test database");
.unwrap()
.add_chunk("my_partition_key", Arc::new(chunk));
let source = Some(StorageClientWrapper::read_source(
db_info.org_id,
@ -1676,22 +1702,14 @@ mod tests {
partition_id,
));
let tag_values = vec!["k1", "k2", "k3", "k4"];
let request = MeasurementTagValuesRequest {
measurement: "m4".into(),
measurement: "TheMeasurement".into(),
source: source.clone(),
range: make_timestamp_range(150, 200),
range: make_timestamp_range(150, 2000),
predicate: make_state_ma_predicate(),
tag_key: "the_tag_key".into(),
tag_key: "state".into(),
};
let expected_request = ColumnValuesRequest {
predicate: "Predicate { table_names: m4 exprs: [#state Eq Utf8(\"MA\")] range: TimestampRange { start: 150, end: 200 }}".into(),
column_name: "the_tag_key".into(),
};
test_db.set_column_values(to_string_vec(&tag_values));
let actual_tag_values = fixture
.storage_client
.measurement_tag_values(request)
@ -1699,15 +1717,34 @@ mod tests {
.unwrap();
assert_eq!(
actual_tag_values, tag_values,
actual_tag_values,
vec!["MA"],
"unexpected tag values while getting tag values",
);
}
assert_eq!(
test_db.get_column_values_request(),
Some(expected_request),
"unexpected request while getting tag values",
);
#[tokio::test]
async fn test_storage_rpc_measurement_tag_values_error() {
test_helpers::maybe_start_logging();
let mut fixture = Fixture::new().await.expect("Connecting to test server");
let db_info = OrgAndBucket::new(123, 456);
let partition_id = 1;
let chunk = TestChunk::new(0).with_error("Sugar we are going down");
fixture
.test_storage
.db_or_create(&db_info.db_name)
.await
.unwrap()
.add_chunk("my_partition_key", Arc::new(chunk));
let source = Some(StorageClientWrapper::read_source(
db_info.org_id,
db_info.bucket_id,
partition_id,
));
// ---
// test error
@ -1722,22 +1759,19 @@ mod tests {
// Note we don't set the column_names on the test database, so we expect an
// error
let response = fixture.storage_client.measurement_tag_values(request).await;
assert!(response.is_err());
let response_string = format!("{:?}", response);
let expected_error = "No saved column_values in TestDatabase";
let response_string = fixture
.storage_client
.measurement_tag_values(request)
.await
.unwrap_err()
.to_string();
let expected_error = "Sugar we are going down";
assert!(
response_string.contains(expected_error),
"'{}' did not contain expected content '{}'",
response_string,
expected_error
);
let expected_request = Some(ColumnValuesRequest {
predicate: "Predicate { table_names: m5}".into(),
column_name: "the_tag_key".into(),
});
assert_eq!(test_db.get_column_values_request(), expected_request);
}
#[tokio::test]

View File

@ -48,7 +48,7 @@ const HTTP_BIND_ADDR: &str = http_bind_addr!();
const GRPC_BIND_ADDR: &str = grpc_bind_addr!();
const HTTP_BASE: &str = concat!("http://", http_bind_addr!());
const API_BASE: &str = concat!("http://", http_bind_addr!(), "/api/v2");
const IOX_API_V1_BASE: &str = concat!("http://", http_bind_addr!(), "/iox/api/v1");
const GRPC_URL_BASE: &str = concat!("http://", grpc_bind_addr!(), "/");
const TOKEN: &str = "InfluxDB IOx doesn't have authentication yet";
@ -377,6 +377,27 @@ impl TestServer {
// different ports but both need to be up for the test to run
let try_grpc_connect = async {
let mut interval = tokio::time::interval(Duration::from_millis(500));
loop {
match influxdb_iox_client::health::Client::connect(GRPC_URL_BASE).await {
Ok(mut client) => {
println!("Successfully connected to server");
match client.check_storage().await {
Ok(_) => {
println!("Storage service is running");
break;
}
Err(e) => println!("Error checking storage service status: {}", e),
}
}
Err(e) => {
println!("Waiting for gRPC API to be up: {}", e);
}
}
interval.tick().await;
}
loop {
match StorageClient::connect(GRPC_URL_BASE).await {
Ok(storage_client) => {
@ -387,7 +408,7 @@ impl TestServer {
return;
}
Err(e) => {
println!("Waiting for gRPC server to be up: {}", e);
println!("Failed to create storage client: {}", e)
}
}
interval.tick().await;
@ -396,7 +417,7 @@ impl TestServer {
let try_http_connect = async {
let client = reqwest::Client::new();
let url = format!("{}/ping", HTTP_BASE);
let url = format!("{}/health", HTTP_BASE);
let mut interval = tokio::time::interval(Duration::from_millis(500));
loop {
match client.get(&url).send().await {

View File

@ -1,4 +1,4 @@
use crate::{Scenario, API_BASE};
use crate::{Scenario, IOX_API_V1_BASE};
pub async fn test(
client: &reqwest::Client,
@ -6,7 +6,7 @@ pub async fn test(
sql_query: &str,
expected_read_data: &[String],
) {
let text = read_data_as_sql(&client, "/read", scenario, sql_query).await;
let text = read_data_as_sql(&client, scenario, sql_query).await;
assert_eq!(
text, expected_read_data,
@ -17,18 +17,15 @@ pub async fn test(
async fn read_data_as_sql(
client: &reqwest::Client,
path: &str,
scenario: &Scenario,
sql_query: &str,
) -> Vec<String> {
let url = format!("{}{}", API_BASE, path);
let db_name = format!("{}_{}", scenario.org_id_str(), scenario.bucket_id_str());
let path = format!("/databases/{}/query", db_name);
let url = format!("{}{}", IOX_API_V1_BASE, path);
let lines = client
.get(&url)
.query(&[
("bucket", scenario.bucket_id_str()),
("org", scenario.org_id_str()),
("sql_query", sql_query),
])
.query(&[("q", sql_query)])
.send()
.await
.unwrap()