chore: merge main into branch

Merge branch 'main' into ntran/optimize_column_selection
2021-02-26 15:29:43 -05:00 · 2021-02-26 15:29:43 -05:00 · 18de3bdcab
parent ebfafc6374 c7343a4acf
commit 18de3bdcab
57 changed files with 3558 additions and 910 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -86,7 +86,7 @@ jobs:
  # out for parallel CI runs!
  #
  # To change the contents of the build container, modify docker/Dockerfile.ci
-  # To change the final release container, modify docker/Dockerfile.perf
+  # To change the final release container, modify docker/Dockerfile.iox
  perf_image:
    docker:
      - image: quay.io/influxdb/rust:ci
@ -105,7 +105,7 @@ jobs:
          echo "$QUAY_PASS" | docker login quay.io --username $QUAY_USER --password-stdin
      - run: |
          BRANCH=$(git rev-parse --abbrev-ref HEAD | tr '/' '.')
-          docker build -t quay.io/influxdb/fusion:$BRANCH -f docker/Dockerfile.perf .
+          docker build -t quay.io/influxdb/fusion:$BRANCH -f docker/Dockerfile.iox .
          docker push quay.io/influxdb/fusion:$BRANCH
          echo "export BRANCH=${BRANCH}" >> $BASH_ENV
      - run:
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,4 @@
+# Ignore everything
+**
+# Except
+!target/release/influxdb_iox
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@ -71,7 +71,7 @@ jobs:
          args: --workspace

  lints:
-    name: Lints
+    name: Rust Lints
    runs-on: ubuntu-latest
    container:
      image: quay.io/influxdb/rust:ci
@ -91,3 +91,13 @@ jobs:
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          args: --all-targets --workspace -- -D warnings
+
+  protobuf:
+    name: Protobuf Lints
+    runs-on: ubuntu-latest
+    container:
+      image: bufbuild/buf
+    steps:
+      - uses: actions/checkout@v2
+      - name: Lint IOx protobuf
+        run: buf lint
--- a/Cargo.lock
+++ b/Cargo.lock
@ -101,7 +101,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
 [[package]]
 name = "arrow"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
+source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
 dependencies = [
 "cfg_aliases",
 "chrono",
@ -124,7 +124,7 @@ dependencies = [
 [[package]]
 name = "arrow-flight"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
+source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
 dependencies = [
 "arrow",
 "bytes",
@ -411,9 +411,9 @@ dependencies = [

 [[package]]
 name = "bumpalo"
-version = "3.6.0"
+version = "3.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "099e596ef14349721d9016f6b80dd3419ea1bf289ab9b44df8e4dfd3a005d5d9"
+checksum = "63396b8a4b9de3f4fdfb320ab6080762242f66a8ef174c49d8e19b674db4cdbe"

 [[package]]
 name = "byteorder"
@ -438,9 +438,9 @@ dependencies = [

 [[package]]
 name = "cc"
-version = "1.0.66"
+version = "1.0.67"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
+checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
 dependencies = [
 "jobserver",
 ]
@ -488,9 +488,9 @@ dependencies = [

 [[package]]
 name = "clang-sys"
-version = "1.1.0"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cb92721cb37482245ed88428f72253ce422b3b4ee169c70a0642521bb5db4cc"
+checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1"
 dependencies = [
 "glob",
 "libc",
@ -669,9 +669,9 @@ dependencies = [
 "cfg-if 1.0.0",
 "crossbeam-channel 0.5.0",
 "crossbeam-deque 0.8.0",
- "crossbeam-epoch 0.9.1",
+ "crossbeam-epoch 0.9.2",
 "crossbeam-queue 0.3.1",
- "crossbeam-utils 0.8.1",
+ "crossbeam-utils 0.8.2",
 ]

 [[package]]
@ -691,7 +691,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
 dependencies = [
 "cfg-if 1.0.0",
- "crossbeam-utils 0.8.1",
+ "crossbeam-utils 0.8.2",
 ]

 [[package]]
@ -712,8 +712,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
 dependencies = [
 "cfg-if 1.0.0",
- "crossbeam-epoch 0.9.1",
- "crossbeam-utils 0.8.1",
+ "crossbeam-epoch 0.9.2",
+ "crossbeam-utils 0.8.2",
 ]

 [[package]]
@ -733,14 +733,14 @@ dependencies = [

 [[package]]
 name = "crossbeam-epoch"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
+checksum = "d60ab4a8dba064f2fbb5aa270c28da5cf4bbd0e72dae1140a6b0353a779dbe00"
 dependencies = [
 "cfg-if 1.0.0",
- "const_fn",
- "crossbeam-utils 0.8.1",
+ "crossbeam-utils 0.8.2",
 "lazy_static",
+ "loom",
 "memoffset 0.6.1",
 "scopeguard",
 ]
@ -763,7 +763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756"
 dependencies = [
 "cfg-if 1.0.0",
- "crossbeam-utils 0.8.1",
+ "crossbeam-utils 0.8.2",
 ]

 [[package]]
@ -779,13 +779,14 @@ dependencies = [

 [[package]]
 name = "crossbeam-utils"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
+checksum = "bae8f328835f8f5a6ceb6a7842a7f2d0c03692adb5c889347235d59194731fe3"
 dependencies = [
 "autocfg",
 "cfg-if 1.0.0",
 "lazy_static",
+ "loom",
 ]

 [[package]]
@ -850,7 +851,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
+source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
 dependencies = [
 "ahash 0.7.0",
 "arrow",
@ -871,6 +872,7 @@ dependencies = [
 "sha2",
 "sqlparser 0.8.0",
 "tokio",
+ "unicode-segmentation",
 ]

 [[package]]
@ -1115,9 +1117,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"

 [[package]]
 name = "form_urlencoded"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
+checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191"
 dependencies = [
 "matches",
 "percent-encoding",
@ -1135,9 +1137,9 @@ dependencies = [

 [[package]]
 name = "futures"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150"
+checksum = "7f55667319111d593ba876406af7c409c0ebb44dc4be6132a783ccf163ea14c1"
 dependencies = [
 "futures-channel",
 "futures-core",
@ -1150,9 +1152,9 @@ dependencies = [

 [[package]]
 name = "futures-channel"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846"
+checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
 dependencies = [
 "futures-core",
 "futures-sink",
@ -1160,15 +1162,15 @@ dependencies = [

 [[package]]
 name = "futures-core"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65"
+checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"

 [[package]]
 name = "futures-executor"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9"
+checksum = "891a4b7b96d84d5940084b2a37632dd65deeae662c114ceaa2c879629c9c0ad1"
 dependencies = [
 "futures-core",
 "futures-task",
@ -1177,15 +1179,15 @@ dependencies = [

 [[package]]
 name = "futures-io"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500"
+checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"

 [[package]]
 name = "futures-macro"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd"
+checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
 dependencies = [
 "proc-macro-hack",
 "proc-macro2",
@ -1195,24 +1197,21 @@ dependencies = [

 [[package]]
 name = "futures-sink"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6"
+checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"

 [[package]]
 name = "futures-task"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86"
-dependencies = [
- "once_cell",
-]
+checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"

 [[package]]
 name = "futures-test"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b30f48f6b9cd26d8739965d6e3345c511718884fb223795b80dc71d24a9ea9a"
+checksum = "f1fe5e51002528907757d5f1648101086f7197f792112db43ba23b06b09e6bce"
 dependencies = [
 "futures-core",
 "futures-executor",
@ -1220,16 +1219,15 @@ dependencies = [
 "futures-sink",
 "futures-task",
 "futures-util",
- "once_cell",
 "pin-project 1.0.5",
 "pin-utils",
 ]

 [[package]]
 name = "futures-util"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b"
+checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
 dependencies = [
 "futures-channel",
 "futures-core",
@ -1259,6 +1257,19 @@ dependencies = [
 "tonic-build",
 ]

+[[package]]
+name = "generator"
+version = "0.6.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9fed24fd1e18827652b4d55652899a1e9da8e54d91624dc3437a5bc3a9f9a9c"
+dependencies = [
+ "cc",
+ "libc",
+ "log",
+ "rustversion",
+ "winapi",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.4"
@ -1493,9 +1504,9 @@ dependencies = [

 [[package]]
 name = "idna"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de910d521f7cc3135c4de8db1cb910e0b5ed1dc6f57c381cd07e8e661ce10094"
+checksum = "89829a5d69c23d348314a7ac337fe39173b61149a9864deabd260983aed48c21"
 dependencies = [
 "matches",
 "unicode-bidi",
@ -1579,6 +1590,7 @@ dependencies = [
 "tokio",
 "tokio-stream",
 "tonic",
+ "tonic-health",
 "tracing",
 "tracing-futures",
 "tracing-opentelemetry",
@ -1593,6 +1605,7 @@ dependencies = [
 "arrow_deps",
 "data_types",
 "futures-util",
+ "generated_types",
 "rand 0.8.3",
 "reqwest",
 "serde",
@ -1754,9 +1767,9 @@ checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c"

 [[package]]
 name = "libloading"
-version = "0.6.7"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883"
+checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a"
 dependencies = [
 "cfg-if 1.0.0",
 "winapi",
@ -1803,6 +1816,17 @@ dependencies = [
 "tracing-subscriber",
 ]

+[[package]]
+name = "loom"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d44c73b4636e497b4917eb21c33539efa3816741a2d3ff26c6316f1b529481a4"
+dependencies = [
+ "cfg-if 1.0.0",
+ "generator",
+ "scoped-tls",
+]
+
 [[package]]
 name = "lz4"
 version = "1.23.2"
@ -1919,9 +1943,9 @@ dependencies = [

 [[package]]
 name = "mio"
-version = "0.7.7"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e50ae3f04d169fcc9bde0b547d1c205219b7157e07ded9c5aff03e0637cb3ed7"
+checksum = "a5dede4e2065b3842b8b0af444119f3aa331cc7cc2dd20388bfb0f5d5a38823a"
 dependencies = [
 "libc",
 "log",
@ -2190,9 +2214,9 @@ dependencies = [

 [[package]]
 name = "once_cell"
-version = "1.5.2"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
+checksum = "10acf907b94fc1b1a152d08ef97e7759650268cf986bf127f387e602b02c7e5a"
 dependencies = [
 "parking_lot",
 ]
@ -2351,7 +2375,7 @@ dependencies = [
 [[package]]
 name = "parquet"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=ad4504e8e85eb8e5babe0f01ca8cf9947499fc40#ad4504e8e85eb8e5babe0f01ca8cf9947499fc40"
+source = "git+https://github.com/apache/arrow.git?rev=b5ac048c75cc55f4039d279f554920be3112d7cd#b5ac048c75cc55f4039d279f554920be3112d7cd"
 dependencies = [
 "arrow",
 "base64 0.12.3",
@ -2798,7 +2822,7 @@ checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a"
 dependencies = [
 "crossbeam-channel 0.5.0",
 "crossbeam-deque 0.8.0",
- "crossbeam-utils 0.8.1",
+ "crossbeam-utils 0.8.2",
 "lazy_static",
 "num_cpus",
 ]
@ -2896,9 +2920,9 @@ dependencies = [

 [[package]]
 name = "reqwest"
-version = "0.11.0"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd281b1030aa675fb90aa994d07187645bb3c8fc756ca766e7c3070b439de9de"
+checksum = "0460542b551950620a3648c6aa23318ac6b3cd779114bd873209e6e8b5eb1c34"
 dependencies = [
 "async-compression",
 "base64 0.13.0",
@ -3053,7 +3077,7 @@ dependencies = [
 "base64 0.13.0",
 "blake2b_simd",
 "constant_time_eq",
- "crossbeam-utils 0.8.1",
+ "crossbeam-utils 0.8.2",
 ]

 [[package]]
@ -3102,6 +3126,12 @@ dependencies = [
 "security-framework",
 ]

+[[package]]
+name = "rustversion"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb5d2a036dc6d2d8fd16fde3498b04306e29bd193bf306a57427019b823d5acd"
+
 [[package]]
 name = "rustyline"
 version = "7.1.0"
@ -3148,6 +3178,12 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "scoped-tls"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
+
 [[package]]
 name = "scopeguard"
 version = "1.1.0"
@ -3166,9 +3202,9 @@ dependencies = [

 [[package]]
 name = "security-framework"
-version = "2.0.0"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1759c2e3c8580017a484a7ac56d3abc5a6c1feadf88db2f3633f12ae4268c69"
+checksum = "c6af1b6204f89cf0069736daf8b852573e3bc34898eee600e95d3dd855c12e81"
 dependencies = [
 "bitflags",
 "core-foundation",
@ -3179,9 +3215,9 @@ dependencies = [

 [[package]]
 name = "security-framework-sys"
-version = "2.0.0"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f99b9d5e26d2a71633cc4f2ebae7cc9f874044e0c351a27e17892d76dce5678b"
+checksum = "31531d257baab426203cf81c5ce1b0b55159dda7ed602ac81b582ccd62265741"
 dependencies = [
 "core-foundation-sys",
 "libc",
@ -3633,18 +3669,18 @@ dependencies = [

 [[package]]
 name = "thiserror"
-version = "1.0.23"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
+checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
 dependencies = [
 "thiserror-impl",
 ]

 [[package]]
 name = "thiserror-impl"
-version = "1.0.23"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
+checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
 dependencies = [
 "proc-macro2",
 "quote",
@ -3873,6 +3909,21 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "tonic-health"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a93d6649c8f5436d65337af08887a516183a096d785ef1fc3acf69ed60dbec6b"
+dependencies = [
+ "async-stream",
+ "bytes",
+ "prost",
+ "tokio",
+ "tokio-stream",
+ "tonic",
+ "tonic-build",
+]
+
 [[package]]
 name = "tower"
 version = "0.4.5"
@ -3907,9 +3958,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"

 [[package]]
 name = "tracing"
-version = "0.1.23"
+version = "0.1.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7d40a22fd029e33300d8d89a5cc8ffce18bb7c587662f54629e94c9de5487f3"
+checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f"
 dependencies = [
 "cfg-if 1.0.0",
 "log",
@ -3920,9 +3971,9 @@ dependencies = [

 [[package]]
 name = "tracing-attributes"
-version = "0.1.12"
+version = "0.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43f080ea7e4107844ef4766459426fa2d5c1ada2e47edba05dc7fa99d9629f47"
+checksum = "a8a9bd1db7706f2373a190b0d067146caa39350c486f3d455b0e33b431f94c07"
 dependencies = [
 "proc-macro2",
 "quote",
@ -3940,19 +3991,19 @@ dependencies = [

 [[package]]
 name = "tracing-futures"
-version = "0.2.4"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab7bb6f14721aa00656086e9335d363c5c8747bae02ebe32ea2c7dece5689b4c"
+checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
 dependencies = [
- "pin-project 0.4.27",
+ "pin-project 1.0.5",
 "tracing",
 ]

 [[package]]
 name = "tracing-log"
-version = "0.1.1"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e0f8c7178e13481ff6765bd169b33e8d554c5d2bbede5e32c356194be02b9b9"
+checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
 dependencies = [
 "lazy_static",
 "log",
@ -3984,9 +4035,9 @@ dependencies = [

 [[package]]
 name = "tracing-subscriber"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1fa8f0c8f4c594e4fc9debc1990deab13238077271ba84dd853d54902ee3401"
+checksum = "8ab8966ac3ca27126141f7999361cc97dd6fb4b71da04c02044fa9045d98bb96"
 dependencies = [
 "ansi_term 0.12.1",
 "chrono",
@ -4067,9 +4118,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"

 [[package]]
 name = "url"
-version = "2.2.0"
+version = "2.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
+checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b"
 dependencies = [
 "form_urlencoded",
 "idna",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -81,6 +81,7 @@ structopt = "0.3.21"
 tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "parking_lot"] }
 tokio-stream = { version = "0.1.2", features = ["net"] }
 tonic = "0.4.0"
+tonic-health = "0.3.0"
 tracing = { version = "0.1", features = ["release_max_level_debug"] }
 tracing-futures = "0.2.4"
 tracing-opentelemetry = "0.11.0"
--- a/README.md
+++ b/README.md
@ -176,6 +176,30 @@ The server will, by default, start an HTTP API server on port `8080` and a gRPC

 ### Writing and Reading Data

+Each IOx instance requires a writer ID.
+This can be set three ways:
+- set an environment variable `INFLUXDB_IOX_ID=42`
+- set a flag `--writer-id 42`
+- send an HTTP PUT request:
+```
+curl --request PUT \
+  --url http://localhost:8080/iox/api/v1/id \
+  --header 'Content-Type: application/json' \
+  --data '{
+  "id": 42
+  }'
+```
+
+To write data, you need a destination database.
+This is set via HTTP PUT, identifying the database by org `company` and bucket `sensors`:
+```
+curl --request PUT \
+  --url http://localhost:8080/iox/api/v1/databases/company_sensors \
+  --header 'Content-Type: application/json' \
+  --data '{
+}'
+```
+
 Data can be stored in InfluxDB IOx by sending it in [line protocol] format to the `/api/v2/write`
 endpoint. Data is stored by organization and bucket names. Here's an example using [`curl`] with
 the organization name `company` and the bucket name `sensors` that will send the data in the
@ -196,6 +220,22 @@ all data in the `company` organization's `sensors` bucket for the `processes` me
 curl -v -G -d 'org=company' -d 'bucket=sensors' --data-urlencode 'sql_query=select * from processes' "http://127.0.0.1:8080/api/v2/read"
 ```

+### Health Checks
+
+The HTTP API exposes a healthcheck endpoint at `/health`
+
+```shell
+$ curl http://127.0.0.1:8080/health
+OK
+```
+
+The gRPC API implements the [gRPC Health Checking Protocol](https://github.com/grpc/grpc/blob/master/doc/health-checking.md). This can be tested with [grpc-health-probe](https://github.com/grpc-ecosystem/grpc-health-probe)
+
+```shell
+$ grpc_health_probe -addr 127.0.0.1:8082 -service influxdata.platform.storage.Storage
+status: SERVING
+```
+
 ## Contributing

 We welcome community contributions from anyone!
--- a/arrow_deps/Cargo.toml
+++ b/arrow_deps/Cargo.toml
@ -8,11 +8,11 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
 [dependencies] # In alphabetical order
 # We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev

-# The version can be found here: https://github.com/apache/arrow/commit/ad4504e8e85eb8e5babe0f01ca8cf9947499fc40
+# The version can be found here: https://github.com/apache/arrow/commit/b5ac048c75cc55f4039d279f554920be3112d7cd
 #
-arrow = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" , features = ["simd"] }
-arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" }
-datafusion = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40" }
+arrow = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" , features = ["simd"] }
+arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" }
+datafusion = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd" }
 # Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
 # and we're not currently using it anyway
-parquet = { git = "https://github.com/apache/arrow.git", rev = "ad4504e8e85eb8e5babe0f01ca8cf9947499fc40", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
+parquet = { git = "https://github.com/apache/arrow.git", rev = "b5ac048c75cc55f4039d279f554920be3112d7cd", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
--- a/buf.yaml
+++ b/buf.yaml
@ -0,0 +1,18 @@
+version: v1beta1
+build:
+  roots:
+    - generated_types/protos/
+  excludes:
+    - generated_types/protos/com
+    - generated_types/protos/influxdata/platform
+    - generated_types/protos/grpc
+
+lint:
+  use:
+    - DEFAULT
+    - STYLE_DEFAULT
+
+breaking:
+  use:
+    - WIRE
+    - WIRE_JSON
--- a/data_types/src/database_rules.rs
+++ b/data_types/src/database_rules.rs
--- a/data_types/src/field_validation.rs
+++ b/data_types/src/field_validation.rs
@ -0,0 +1,112 @@
+//! A collection of extension traits for types that
+//! implement TryInto<U, Error=FieldViolation>
+//!
+//! Allows associating field context with the generated errors
+//! as they propagate up the struct topology
+
+use generated_types::google::FieldViolation;
+use std::convert::TryInto;
+
+/// An extension trait that adds the method `scope` to any type
+/// implementing `TryInto<U, Error = FieldViolation>`
+pub(crate) trait FromField<T> {
+    fn scope(self, field: impl Into<String>) -> Result<T, FieldViolation>;
+}
+
+impl<T, U> FromField<U> for T
+where
+    T: TryInto<U, Error = FieldViolation>,
+{
+    /// Try to convert type using TryInto calling `FieldViolation::scope`
+    /// on any returned error
+    fn scope(self, field: impl Into<String>) -> Result<U, FieldViolation> {
+        self.try_into().map_err(|e| e.scope(field))
+    }
+}
+
+/// An extension trait that adds the methods `optional` and `required` to any
+/// Option containing a type implementing `TryInto<U, Error = FieldViolation>`
+pub(crate) trait FromFieldOpt<T> {
+    /// Try to convert inner type, if any, using TryInto calling
+    /// `FieldViolation::scope` on any error encountered
+    ///
+    /// Returns None if empty
+    fn optional(self, field: impl Into<String>) -> Result<Option<T>, FieldViolation>;
+
+    /// Try to convert inner type, using TryInto calling `FieldViolation::scope`
+    /// on any error encountered
+    ///
+    /// Returns an error if empty
+    fn required(self, field: impl Into<String>) -> Result<T, FieldViolation>;
+}
+
+impl<T, U> FromFieldOpt<U> for Option<T>
+where
+    T: TryInto<U, Error = FieldViolation>,
+{
+    fn optional(self, field: impl Into<String>) -> Result<Option<U>, FieldViolation> {
+        self.map(|t| t.scope(field)).transpose()
+    }
+
+    fn required(self, field: impl Into<String>) -> Result<U, FieldViolation> {
+        match self {
+            None => Err(FieldViolation::required(field)),
+            Some(t) => t.scope(field),
+        }
+    }
+}
+
+/// An extension trait that adds the methods `optional` and `required` to any
+/// String
+///
+/// Prost will default string fields to empty, whereas IOx sometimes
+/// uses Option<String>, this helper aids mapping between them
+///
+/// TODO: Review mixed use of Option<String> and String in IOX
+pub(crate) trait FromFieldString {
+    /// Returns a Ok if the String is not empty
+    fn required(self, field: impl Into<String>) -> Result<String, FieldViolation>;
+
+    /// Wraps non-empty strings in Some(_), returns None for empty strings
+    fn optional(self) -> Option<String>;
+}
+
+impl FromFieldString for String {
+    fn required(self, field: impl Into<String>) -> Result<String, FieldViolation> {
+        if self.is_empty() {
+            return Err(FieldViolation::required(field));
+        }
+        Ok(self)
+    }
+
+    fn optional(self) -> Option<String> {
+        if self.is_empty() {
+            return None;
+        }
+        Some(self)
+    }
+}
+
+/// An extension trait that adds the method `vec_field` to any Vec of a type
+/// implementing `TryInto<U, Error = FieldViolation>`
+pub(crate) trait FromFieldVec<T> {
+    /// Converts to a `Vec<U>`, short-circuiting on the first error and
+    /// returning a correctly scoped `FieldViolation` for where the error
+    /// was encountered
+    fn vec_field(self, field: impl Into<String>) -> Result<T, FieldViolation>;
+}
+
+impl<T, U> FromFieldVec<Vec<U>> for Vec<T>
+where
+    T: TryInto<U, Error = FieldViolation>,
+{
+    fn vec_field(self, field: impl Into<String>) -> Result<Vec<U>, FieldViolation> {
+        let res: Result<_, _> = self
+            .into_iter()
+            .enumerate()
+            .map(|(i, t)| t.scope(i.to_string()))
+            .collect();
+
+        res.map_err(|e| e.scope(field))
+    }
+}
--- a/data_types/src/lib.rs
+++ b/data_types/src/lib.rs
@ -32,3 +32,5 @@ pub mod wal;

 mod database_name;
 pub use database_name::*;
+
+pub(crate) mod field_validation;
--- a/data_types/src/schema.rs
+++ b/data_types/src/schema.rs
@ -299,10 +299,44 @@ impl Schema {
    /// Returns an iterator of (Option<InfluxColumnType>, &Field) for
    /// all the columns of this schema, in order
    pub fn iter(&self) -> SchemaIter<'_> {
-        SchemaIter {
-            schema: self,
-            idx: 0,
-        }
+        SchemaIter::new(self)
+    }
+
+    /// Returns an iterator of `&Field` for all the tag columns of
+    /// this schema, in order
+    pub fn tags_iter(&self) -> impl Iterator<Item = &ArrowField> {
+        self.iter().filter_map(|(influx_column_type, field)| {
+            if matches!(influx_column_type, Some(InfluxColumnType::Tag)) {
+                Some(field)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns an iterator of `&Field` for all the field columns of
+    /// this schema, in order
+    pub fn fields_iter(&self) -> impl Iterator<Item = &ArrowField> {
+        self.iter().filter_map(|(influx_column_type, field)| {
+            if matches!(influx_column_type, Some(InfluxColumnType::Field(_))) {
+                Some(field)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns an iterator of `&Field` for all the timestamp columns
+    /// of this schema, in order. At the time of writing there should
+    /// be only one or 0 such columns
+    pub fn time_iter(&self) -> impl Iterator<Item = &ArrowField> {
+        self.iter().filter_map(|(influx_column_type, field)| {
+            if matches!(influx_column_type, Some(InfluxColumnType::Timestamp)) {
+                Some(field)
+            } else {
+                None
+            }
+        })
    }

    /// Merges any new columns from new_schema, consuming self. If the
@ -573,6 +607,12 @@ pub struct SchemaIter<'a> {
    idx: usize,
 }

+impl<'a> SchemaIter<'a> {
+    fn new(schema: &'a Schema) -> Self {
+        Self { schema, idx: 0 }
+    }
+}
+
 impl<'a> fmt::Debug for SchemaIter<'a> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "SchemaIter<{}>", self.idx)
@ -829,15 +869,47 @@ mod test {
        }
    }

+    /// Build an empty iterator
+    fn empty_schema() -> Schema {
+        SchemaBuilder::new().build().unwrap()
+    }
+
+    #[test]
+    fn test_iter_empty() {
+        assert_eq!(empty_schema().iter().count(), 0);
+    }
+
+    #[test]
+    fn test_tags_iter_empty() {
+        assert_eq!(empty_schema().tags_iter().count(), 0);
+    }
+
+    #[test]
+    fn test_fields_iter_empty() {
+        assert_eq!(empty_schema().fields_iter().count(), 0);
+    }
+
+    #[test]
+    fn test_time_iter_empty() {
+        assert_eq!(empty_schema().time_iter().count(), 0);
+    }
+
+    /// Build a schema for testing iterators
+    fn iter_schema() -> Schema {
+        SchemaBuilder::new()
+            .influx_field("field1", Float)
+            .tag("tag1")
+            .timestamp()
+            .influx_field("field2", String)
+            .influx_field("field3", String)
+            .tag("tag2")
+            .build()
+            .unwrap()
+    }
+
    #[test]
    fn test_iter() {
-        let schema = SchemaBuilder::new()
-            .influx_field("the_field", String)
-            .tag("the_tag")
-            .timestamp()
-            .measurement("the_measurement")
-            .build()
-            .unwrap();
+        let schema = iter_schema();

        // test schema iterator and field accessor match up
        for (i, (iter_col_type, iter_field)) in schema.iter().enumerate() {
@ -845,7 +917,40 @@ mod test {
            assert_eq!(iter_col_type, col_type);
            assert_eq!(iter_field, field);
        }
-        assert_eq!(schema.iter().count(), 3);
+        assert_eq!(schema.iter().count(), 6);
+    }
+
+    #[test]
+    fn test_tags_iter() {
+        let schema = iter_schema();
+
+        let mut iter = schema.tags_iter();
+        assert_eq!(iter.next().unwrap().name(), "tag1");
+        assert_eq!(iter.next().unwrap().name(), "tag2");
+        assert_eq!(iter.next(), None);
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_fields_iter() {
+        let schema = iter_schema();
+
+        let mut iter = schema.fields_iter();
+        assert_eq!(iter.next().unwrap().name(), "field1");
+        assert_eq!(iter.next().unwrap().name(), "field2");
+        assert_eq!(iter.next().unwrap().name(), "field3");
+        assert_eq!(iter.next(), None);
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_time_iter() {
+        let schema = iter_schema();
+
+        let mut iter = schema.time_iter();
+        assert_eq!(iter.next().unwrap().name(), "time");
+        assert_eq!(iter.next(), None);
+        assert_eq!(iter.next(), None);
    }

    #[test]
--- a/docker/Dockerfile.perf
+++ b/docker/Dockerfile.perf
@ -1,7 +1,11 @@
 ###
-# Dockerfile for the image used in CI performance tests
+# Dockerfile used for deploying IOx
 ##
-FROM rust:slim-buster
+FROM debian:buster-slim
+
+RUN apt-get update \
+    && apt-get install -y libssl1.1 libgcc1 libc6 \
+	&& rm -rf /var/lib/{apt,dpkg,cache,log}

 RUN groupadd -g 1500 rust \
  && useradd -u 1500 -g rust -s /bin/bash -m rust
@ -15,4 +19,4 @@ COPY target/release/influxdb_iox /usr/bin/influxdb_iox

 EXPOSE 8080 8082

-CMD ["influxdb_iox"]
+ENTRYPOINT ["influxdb_iox"]
--- a/docs/README.md
+++ b/docs/README.md
@ -5,9 +5,11 @@ interest for those who wish to understand how the code works. It is
 not intended to be general user facing documentation

 ## Table of Contents:
+
 * Rust style and Idiom guide: [style_guide.md](style_guide.md)
 * Tracing and logging Guide: [tracing.md](tracing.md)
 * How InfluxDB IOx manages the lifecycle of time series data: [data_management.md](data_management.md)
 * Thoughts on parquet encoding and compression for timeseries data: [encoding_thoughts.md](encoding_thoughts.md)
 * Thoughts on using multiple cores: [multi_core_tasks.md](multi_core_tasks.md)
 * [Query Engine Docs](../query/README.md)
+* [Testing documentation](testing.md) for developers of IOx
--- a/docs/env.example
+++ b/docs/env.example
@ -28,10 +28,10 @@
 # AWS_ACCESS_KEY_ID=access_key_value
 # AWS_SECRET_ACCESS_KEY=secret_access_key_value
 # AWS_DEFAULT_REGION=us-east-2
-# INFLUXDB_IOX_S3_BUCKET=bucket-name
+# INFLUXDB_IOX_BUCKET=bucket-name
 #
 # If using Google Cloud Storage as an object store:
-# INFLUXDB_IOX_GCP_BUCKET=bucket_name
+# INFLUXDB_IOX_BUCKET=bucket_name
 # Set one of SERVICE_ACCOUNT or GOOGLE_APPLICATION_CREDENTIALS, either to a path of a filename
 # containing Google credential JSON or to the JSON directly.
 # SERVICE_ACCOUNT=/path/to/auth/info.json
@ -41,7 +41,7 @@
 # The name you see when going to All Services > Storage accounts > [name]
 # AZURE_STORAGE_ACCOUNT=
 # The name of a container you've created in the storage account, under Blob Service > Containers
-# AZURE_STORAGE_CONTAINER=
+# INFLUXDB_IOX_BUCKET=
 # In the Storage account's Settings > Access keys, one of the Key values
 # AZURE_STORAGE_MASTER_KEY=
 #
--- a/docs/testing.md
+++ b/docs/testing.md
@ -0,0 +1,50 @@
+# Testing
+
+This document covers details that are only relevant if you are developing IOx and running the tests.
+
+## Object storage
+
+### To run the tests or not run the tests
+
+If you are testing integration with some or all of the object storage options, you'll have more
+setup to do.
+
+By default, `cargo test -p object_store` does not run any tests that actually contact
+any cloud services: tests that do contact the services will silently pass.
+
+To ensure you've configured object storage integration testing correctly, you can run
+`TEST_INTEGRATION=1 cargo test -p object_store`, which will run the tests that contact the cloud
+services and fail them if the required environment variables aren't set.
+
+If you don't specify the `TEST_INTEGRATION` environment variable but you do configure some or all
+of the object stores, the relevant tests will run.
+
+### Configuration differences when running the tests
+
+When running `influxdb_iox server`, you can pick one object store to use. When running the tests,
+you can run them against all the possible object stores. There's still only one
+`INFLUXDB_IOX_BUCKET` variable, though, so that will set the bucket name for all configured object
+stores. Use the same bucket name when setting up the different services.
+
+Other than possibly configuring multiple object stores, configuring the tests to use the object
+store services is the same as configuring the server to use an object store service. See the output
+of `influxdb_iox server --help` for instructions.
+
+## InfluxDB IOx Client
+
+The `influxdb_iox_client` crate might be used by people who are using a managed IOx server. In
+other words, they might only use the `influxdb_iox_client` crate and not the rest of the crates in
+this workspace. The tests in `influxdb_iox_client` see an IOx server in the same way as IOx servers
+see the object store services: sometimes you'll want to run the tests against an actual server, and
+sometimes you won't.
+
+Like in the `object_store` crate, the `influxdb_iox_client` crate's tests use the
+`TEST_INTEGRATION` environment variable to enforce running tests that use an actual IOx server.
+Running `cargo test -p influxdb_iox_client` will silently pass tests that contact a server.
+
+Start an IOx server in one terminal and run `TEST_INTEGRATION=1
+TEST_IOX_ENDPOINT=http://127.0.0.1:8080 cargo test -p influxdb_iox_client` in another (where
+`http://127.0.0.1:8080` is the address to the IOx HTTP server) to run the client tests against the
+server. If you set `TEST_INTEGRATION` but not `TEST_IOX_ENDPOINT`, the integration tests will fail
+because of the missed configuration. If you set `TEST_IOX_ENDPOINT` but not `TEST_INTEGRATION`, the
+integration tests will be run.
--- a/generated_types/build.rs
+++ b/generated_types/build.rs
@ -10,7 +10,7 @@ type Error = Box<dyn std::error::Error>;
 type Result<T, E = Error> = std::result::Result<T, E>;

 fn main() -> Result<()> {
-    let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");

    generate_grpc_types(&root)?;
    generate_wal_types(&root)?;
@ -20,16 +20,27 @@ fn main() -> Result<()> {

 /// Schema used with IOx specific gRPC requests
 ///
-/// Creates `influxdata.platform.storage.rs` and
-/// `com.github.influxdata.idpe.storage.read.rs`
+/// Creates
+/// - `influxdata.platform.storage.rs`
+/// - `com.github.influxdata.idpe.storage.read.rs`
+/// - `influxdata.iox.management.v1.rs`
 fn generate_grpc_types(root: &Path) -> Result<()> {
+    let storage_path = root.join("influxdata/platform/storage");
+    let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
+    let management_path = root.join("influxdata/iox/management/v1");
+    let grpc_path = root.join("grpc/health/v1");
+
    let proto_files = vec![
-        root.join("test.proto"),
-        root.join("predicate.proto"),
-        root.join("storage_common.proto"),
-        root.join("storage_common_idpe.proto"),
-        root.join("service.proto"),
-        root.join("source.proto"),
+        storage_path.join("test.proto"),
+        storage_path.join("predicate.proto"),
+        storage_path.join("storage_common.proto"),
+        storage_path.join("service.proto"),
+        storage_path.join("storage_common_idpe.proto"),
+        idpe_path.join("source.proto"),
+        management_path.join("base_types.proto"),
+        management_path.join("database_rules.proto"),
+        management_path.join("service.proto"),
+        grpc_path.join("service.proto"),
    ];

    // Tell cargo to recompile if any of these proto files are changed
--- a/generated_types/protos/com/github/influxdata/idpe/storage/read/source.proto
+++ b/generated_types/protos/com/github/influxdata/idpe/storage/read/source.proto
--- a/generated_types/protos/grpc/health/v1/service.proto
+++ b/generated_types/protos/grpc/health/v1/service.proto
@ -0,0 +1,23 @@
+syntax = "proto3";
+
+package grpc.health.v1;
+
+message HealthCheckRequest {
+  string service = 1;
+}
+
+message HealthCheckResponse {
+  enum ServingStatus {
+    UNKNOWN = 0;
+    SERVING = 1;
+    NOT_SERVING = 2;
+    SERVICE_UNKNOWN = 3;  // Used only by the Watch method.
+  }
+  ServingStatus status = 1;
+}
+
+service Health {
+  rpc Check(HealthCheckRequest) returns (HealthCheckResponse);
+
+  rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
+}
--- a/generated_types/protos/influxdata/iox/management/v1/base_types.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/base_types.proto
@ -0,0 +1,30 @@
+syntax = "proto3";
+package influxdata.iox.management.v1;
+
+enum Order {
+  ORDER_UNSPECIFIED = 0;
+  ORDER_ASC = 1;
+  ORDER_DESC = 2;
+}
+
+enum Aggregate {
+  AGGREGATE_UNSPECIFIED = 0;
+  AGGREGATE_MIN = 1;
+  AGGREGATE_MAX = 2;
+}
+
+enum ColumnType {
+  COLUMN_TYPE_UNSPECIFIED = 0;
+  COLUMN_TYPE_I64 = 1;
+  COLUMN_TYPE_U64 = 2;
+  COLUMN_TYPE_F64 = 3;
+  COLUMN_TYPE_STRING = 4;
+  COLUMN_TYPE_BOOL = 5;
+}
+
+message HostGroup {
+  string id = 1;
+
+  // connection strings for remote hosts.
+  repeated string hosts = 2;
+}
--- a/generated_types/protos/influxdata/iox/management/v1/database_rules.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/database_rules.proto
@ -0,0 +1,248 @@
+syntax = "proto3";
+package influxdata.iox.management.v1;
+
+import "google/protobuf/duration.proto";
+import "google/protobuf/empty.proto";
+import "influxdata/iox/management/v1/base_types.proto";
+
+// `PartitionTemplate` is used to compute the partition key of each row that
+// gets written. It can consist of the table name, a column name and its value,
+// a formatted time, or a string column and regex captures of its value. For
+// columns that do not appear in the input row, a blank value is output.
+//
+// The key is constructed in order of the template parts; thus ordering changes
+// what partition key is generated.
+message PartitionTemplate {
+  message Part {
+    message ColumnFormat {
+      string column = 1;
+      string format = 2;
+    }
+
+    oneof part {
+      google.protobuf.Empty table = 1;
+      string column = 2;
+      string time = 3;
+      ColumnFormat regex = 4;
+      ColumnFormat strf_time = 5;
+    }
+  }
+
+  repeated Part parts = 1;
+}
+
+message Matcher {
+  // A query predicate to filter rows
+  string predicate = 1;
+  // Restrict selection to a specific table or tables specified by a regex
+  oneof table_matcher {
+    google.protobuf.Empty all = 2;
+    string table = 3;
+    string regex = 4;
+  }
+}
+
+message ReplicationConfig {
+  // The set of host groups that data should be replicated to. Which host a
+  // write goes to within a host group is determined by consistent hashing of
+  // the partition key. We'd use this to create a host group per
+  // availability zone, so you might have 5 availability zones with 2
+  // hosts in each. Replication will ensure that N of those zones get a
+  // write. For each zone, only a single host needs to get the write.
+  // Replication is for ensuring a write exists across multiple hosts
+  // before returning success. Its purpose is to ensure write durability,
+  // rather than write availability for query (this is covered by
+  // subscriptions).
+  repeated string replications = 1;
+
+  // The minimum number of host groups to replicate a write to before success
+  // is returned. This can be overridden on a per request basis.
+  // Replication will continue to write to the other host groups in the
+  // background.
+  uint32 replication_count = 2;
+
+  // How long the replication queue can get before either rejecting writes or
+  // dropping missed writes. The queue is kept in memory on a
+  // per-database basis. A queue size of zero means it will only try to
+  // replicate synchronously and drop any failures.
+  uint64 replication_queue_max_size = 3;
+}
+
+message SubscriptionConfig {
+  message Subscription {
+    string name = 1;
+    string host_group_id = 2;
+    Matcher matcher = 3;
+  }
+
+  // `subscriptions` are used for query servers to get data via either push
+  // or pull as it arrives. They are separate from replication as they
+  // have a different purpose. They're for query servers or other clients
+  // that want to subscribe to some subset of data being written in. This
+  // could either be specific partitions, ranges of partitions, tables, or
+  // rows matching some predicate.
+  repeated Subscription subscriptions = 1;
+}
+
+message QueryConfig {
+  // If set to `true`, this server should answer queries from one or more of
+  // of its local write buffer and any read-only partitions that it knows
+  // about. In this case, results will be merged with any others from the
+  // remote goups or read-only partitions.
+  bool query_local = 1;
+
+  // Set `primary` to a host group if remote servers should be
+  // issued queries for this database. All hosts in the group should be
+  // queried with this server acting as the coordinator that merges
+  // results together.
+  string primary = 2;
+
+  // If a specific host in the primary group is unavailable,
+  // another host in the same position from a secondary group should be
+  // queried. For example, imagine we've partitioned the data in this DB into
+  // 4 partitions and we are replicating the data across 3 availability
+  // zones. We have 4 hosts in each of those AZs, thus they each have 1
+  // partition. We'd set the primary group to be the 4 hosts in the same
+  // AZ as this one, and the secondary groups as the hosts in the other 2 AZs.
+  repeated string secondaries = 3;
+
+  // Use `readOnlyPartitions` when a server should answer queries for
+  // partitions that come from object storage. This can be used to start
+  // up a new query server to handle queries by pointing it at a
+  // collection of partitions and then telling it to also pull
+  // data from the replication servers (writes that haven't been snapshotted
+  // into a partition).
+  repeated string read_only_partitions = 4;
+}
+
+message WalBufferConfig {
+  enum Rollover {
+    ROLLOVER_UNSPECIFIED = 0;
+
+    // Drop the old segment even though it hasn't been persisted. This part of
+    // the WAL will be lost on this server.
+    ROLLOVER_DROP_OLD_SEGMENT = 1;
+
+    // Drop the incoming write and fail silently. This favors making sure that
+    // older WAL data will be backed up.
+    ROLLOVER_DROP_INCOMING = 2;
+
+    // Reject the incoming write and return an error. The client may retry the
+    // request, which will succeed once the oldest segment has been
+    // persisted to object storage.
+    ROLLOVER_RETURN_ERROR = 3;
+  }
+
+  // The size the WAL buffer should be limited to. Once the buffer gets to
+  // this size it will drop old segments to remain below this size, but
+  // still try to hold as much in memory as possible while remaining
+  // below this threshold
+  uint64 buffer_size = 1;
+
+  // WAL segments become read-only after crossing over this size. Which means
+  // that segments will always be >= this size. When old segments are
+  // dropped from of memory, at least this much space will be freed from
+  // the buffer.
+  uint64 segment_size = 2;
+
+  // What should happen if a write comes in that would exceed the WAL buffer
+  // size and the oldest segment that could be dropped hasn't yet been
+  // persisted to object storage. If the oldest segment has been
+  // persisted, then it will be dropped from the buffer so that new writes
+  // can be accepted. This option is only for defining the behavior of what
+  // happens if that segment hasn't been persisted. If set to return an
+  // error, new writes will be rejected until the oldest segment has been
+  // persisted so that it can be cleared from memory. Alternatively, this
+  // can be set so that old segments are dropped even if they haven't been
+  // persisted. This setting is also useful for cases where persistence
+  // isn't being used and this is only for in-memory buffering.
+  Rollover buffer_rollover = 3;
+
+  // If set to true, buffer segments will be written to object storage.
+  bool persist_segments = 4;
+
+  // If set, segments will be rolled over after this period of time even
+  // if they haven't hit the size threshold. This allows them to be written
+  // out to object storage as they must be immutable first.
+  google.protobuf.Duration close_segment_after = 5;
+}
+
+message MutableBufferConfig {
+  message PartitionDropOrder {
+    message ColumnSort {
+      string column_name = 1;
+      ColumnType column_type = 2;
+      Aggregate column_value = 3;
+    }
+
+    // Sort partitions by this order. Last will be dropped first.
+    Order order = 1;
+
+    // Configure sort key
+    oneof sort {
+      // The last time the partition received a write.
+      google.protobuf.Empty last_write_time = 2;
+
+      // When the partition was opened in the mutable buffer.
+      google.protobuf.Empty created_at_time = 3;
+
+      // A column name, its expected type, and whether to use the min or max
+      // value. The ColumnType is necessary because the column can appear in
+      // any number of tables and be of a different type. This specifies that
+      // when sorting partitions, only columns with the given name and type
+      // should be used for the purposes of determining the partition order. If a
+      // partition doesn't have the given column in any way, the partition will
+      // appear at the beginning of the list with a null value where all
+      // partitions having null for that value will then be
+      // sorted by created_at_time desc. So if none of the partitions in the
+      // mutable buffer had this column with this type, then the partition
+      // that was created first would appear last in the list and thus be the
+      // first up to be dropped.
+      ColumnSort column = 4;
+    }
+  }
+  // The size the mutable buffer should be limited to. Once the buffer gets
+  // to this size it will drop partitions in the given order. If unable
+  // to drop partitions (because of later rules in this config) it will
+  // reject writes until it is able to drop partitions.
+  uint64 buffer_size = 1;
+
+  // If set, the mutable buffer will not drop partitions that have chunks
+  // that have not yet been persisted. Thus it will reject writes if it
+  // is over size and is unable to drop partitions. The default is to
+  // drop partitions in the sort order, regardless of whether they have
+  // unpersisted chunks or not. The WAL Buffer can be used to ensure
+  // persistence, but this may cause longer recovery times.
+  bool reject_if_not_persisted = 2;
+
+  // Configure order to drop partitions in
+  PartitionDropOrder partition_drop_order = 3;
+
+  // Attempt to persist partitions after they haven't received a write for
+  // this number of seconds. If not set, partitions won't be
+  // automatically persisted.
+  uint32 persist_after_cold_seconds = 4;
+}
+
+message DatabaseRules {
+  // The unencoded name of the database
+  string name = 1;
+
+  // Template that generates a partition key for each row inserted into the database
+  PartitionTemplate partition_template = 2;
+
+  // Synchronous replication configuration for this database
+  ReplicationConfig replication_config = 3;
+
+  // Asynchronous pull-based subscription configuration for this database
+  SubscriptionConfig subscription_config = 4;
+
+  // Query configuration for this database
+  QueryConfig query_config = 5;
+
+  // WAL configuration for this database
+  WalBufferConfig wal_buffer_config = 6;
+
+  // Mutable buffer configuration for this database
+  MutableBufferConfig mutable_buffer_config = 7;
+}
--- a/generated_types/protos/influxdata/iox/management/v1/service.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/service.proto
@ -0,0 +1,49 @@
+syntax = "proto3";
+package influxdata.iox.management.v1;
+
+import "google/protobuf/empty.proto";
+import "influxdata/iox/management/v1/database_rules.proto";
+
+service ManagementService {
+  rpc GetWriterId(GetWriterIdRequest) returns (GetWriterIdResponse);
+
+  rpc UpdateWriterId(UpdateWriterIdRequest) returns (UpdateWriterIdResponse);
+
+  rpc ListDatabases(ListDatabasesRequest) returns (ListDatabasesResponse);
+
+  rpc GetDatabase(GetDatabaseRequest) returns (GetDatabaseResponse);
+
+  rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse);
+}
+
+message GetWriterIdRequest {}
+
+message GetWriterIdResponse {
+  uint32 id = 1;
+}
+
+message UpdateWriterIdRequest {
+  uint32 id = 1;
+}
+
+message UpdateWriterIdResponse {}
+
+message ListDatabasesRequest {}
+
+message ListDatabasesResponse {
+  repeated string names = 1;
+}
+
+message GetDatabaseRequest {
+  string name = 1;
+}
+
+message GetDatabaseResponse {
+  DatabaseRules rules = 1;
+}
+
+message CreateDatabaseRequest {
+  DatabaseRules rules = 1;
+}
+
+message CreateDatabaseResponse {}
--- a/generated_types/protos/influxdata/platform/storage/predicate.proto
+++ b/generated_types/protos/influxdata/platform/storage/predicate.proto
--- a/generated_types/protos/influxdata/platform/storage/service.proto
+++ b/generated_types/protos/influxdata/platform/storage/service.proto
@ -8,9 +8,8 @@ syntax = "proto3";
 package influxdata.platform.storage;

 import "google/protobuf/empty.proto";
-import "storage_common.proto";
-import "storage_common_idpe.proto";
-
+import "influxdata/platform/storage/storage_common.proto";
+import "influxdata/platform/storage/storage_common_idpe.proto";

 service Storage {
    // ReadFilter performs a filter operation at storage
--- a/generated_types/protos/influxdata/platform/storage/storage_common.proto
+++ b/generated_types/protos/influxdata/platform/storage/storage_common.proto
@ -8,7 +8,7 @@ syntax = "proto3";
 package influxdata.platform.storage;

 import "google/protobuf/any.proto";
-import "predicate.proto";
+import "influxdata/platform/storage/predicate.proto";


 message ReadFilterRequest {
--- a/generated_types/protos/influxdata/platform/storage/storage_common_idpe.proto
+++ b/generated_types/protos/influxdata/platform/storage/storage_common_idpe.proto
@ -10,8 +10,8 @@ syntax = "proto3";
 package influxdata.platform.storage;

 import "google/protobuf/any.proto";
-import "predicate.proto";
-import "storage_common.proto";
+import "influxdata/platform/storage/predicate.proto";
+import "influxdata/platform/storage/storage_common.proto";

 message ReadSeriesCardinalityRequest {
  google.protobuf.Any read_series_cardinality_source = 1;
--- a/generated_types/protos/influxdata/platform/storage/test.proto
+++ b/generated_types/protos/influxdata/platform/storage/test.proto
--- a/generated_types/protos/wal.fbs
+++ b/generated_types/protos/wal.fbs
--- a/generated_types/src/lib.rs
+++ b/generated_types/src/lib.rs
@ -9,21 +9,71 @@
    clippy::clone_on_ref_ptr
 )]

-include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));
-include!(concat!(
-    env!("OUT_DIR"),
-    "/com.github.influxdata.idpe.storage.read.rs"
-));
-include!(concat!(env!("OUT_DIR"), "/wal_generated.rs"));
+mod pb {
+    pub mod influxdata {
+        pub mod platform {
+            pub mod storage {
+                include!(concat!(env!("OUT_DIR"), "/influxdata.platform.storage.rs"));

-// Can't implement `Default` because `prost::Message` implements `Default`
-impl TimestampRange {
-    pub fn max() -> Self {
-        TimestampRange {
-            start: std::i64::MIN,
-            end: std::i64::MAX,
+                // Can't implement `Default` because `prost::Message` implements `Default`
+                impl TimestampRange {
+                    pub fn max() -> Self {
+                        TimestampRange {
+                            start: std::i64::MIN,
+                            end: std::i64::MAX,
+                        }
+                    }
+                }
+            }
+        }
+
+        pub mod iox {
+            pub mod management {
+                pub mod v1 {
+                    include!(concat!(env!("OUT_DIR"), "/influxdata.iox.management.v1.rs"));
+                }
+            }
+        }
+    }
+
+    pub mod com {
+        pub mod github {
+            pub mod influxdata {
+                pub mod idpe {
+                    pub mod storage {
+                        pub mod read {
+                            include!(concat!(
+                                env!("OUT_DIR"),
+                                "/com.github.influxdata.idpe.storage.read.rs"
+                            ));
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Needed because of https://github.com/hyperium/tonic/issues/471
+    pub mod grpc {
+        pub mod health {
+            pub mod v1 {
+                include!(concat!(env!("OUT_DIR"), "/grpc.health.v1.rs"));
+            }
        }
    }
 }

+include!(concat!(env!("OUT_DIR"), "/wal_generated.rs"));
+
+/// gRPC Storage Service
+pub const STORAGE_SERVICE: &str = "influxdata.platform.storage.Storage";
+/// gRPC Testing Service
+pub const IOX_TESTING_SERVICE: &str = "influxdata.platform.storage.IOxTesting";
+/// gRPC Arrow Flight Service
+pub const ARROW_SERVICE: &str = "arrow.flight.protocol.FlightService";
+
+pub use pb::com::github::influxdata::idpe::storage::read::*;
+pub use pb::influxdata::platform::storage::*;
+
 pub use google_types as google;
+pub use pb::{grpc, influxdata};
--- a/influxdb_iox_client/Cargo.toml
+++ b/influxdb_iox_client/Cargo.toml
@ -5,12 +5,13 @@ authors = ["Dom Dwyer <dom@itsallbroken.com>"]
 edition = "2018"

 [features]
-flight = ["arrow_deps", "serde/derive", "tonic", "serde_json", "futures-util"]
+flight = ["arrow_deps", "serde/derive", "serde_json", "futures-util"]

 [dependencies]
 # Workspace dependencies, in alphabetical order
 arrow_deps = { path = "../arrow_deps", optional = true }
 data_types = { path = "../data_types" }
+generated_types = { path = "../generated_types" }

 # Crates.io dependencies, in alphabetical order
 futures-util = { version = "0.3.1", optional = true }
@ -19,7 +20,7 @@ serde = "1.0.118"
 serde_json = { version = "1.0.44", optional = true }
 thiserror = "1.0.23"
 tokio = { version = "1.0", features = ["macros"] }
-tonic = { version = "0.4.0", optional = true }
+tonic = { version = "0.4.0" }

 [dev-dependencies] # In alphabetical order
 rand = "0.8.1"
--- a/influxdb_iox_client/src/client.rs
+++ b/influxdb_iox_client/src/client.rs
@ -9,6 +9,9 @@ use data_types::{http::ListDatabasesResponse, DatabaseName};
 #[cfg(feature = "flight")]
 mod flight;

+/// Client for the gRPC health checking API
+pub mod health;
+
 // can't combine these into one statement that uses `{}` because of this bug in
 // the `unreachable_pub` lint: https://github.com/rust-lang/rust/issues/64762
 #[cfg(feature = "flight")]
--- a/influxdb_iox_client/src/client/health.rs
+++ b/influxdb_iox_client/src/client/health.rs
@ -0,0 +1,70 @@
+use generated_types::grpc::health::v1::*;
+use thiserror::Error;
+
+/// Error type for the health check client
+#[derive(Debug, Error)]
+pub enum Error {
+    /// Service is not serving
+    #[error("Service is not serving")]
+    NotServing,
+
+    /// Service returned an unexpected variant for the status enumeration
+    #[error("Received invalid response: {}", .0)]
+    InvalidResponse(i32),
+
+    /// Error connecting to the server
+    #[error("Connection error: {}", .0)]
+    ConnectionError(#[from] tonic::transport::Error),
+
+    /// Client received an unexpected error from the server
+    #[error("Unexpected server error: {}: {}", .0.code(), .0.message())]
+    UnexpectedError(#[from] tonic::Status),
+}
+
+/// Result type for the health check client
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+/// A client for the gRPC health checking API
+///
+/// Allows checking the status of a given service
+#[derive(Debug)]
+pub struct Client {
+    inner: health_client::HealthClient<tonic::transport::Channel>,
+}
+
+impl Client {
+    /// Create a new client with the provided endpoint
+    pub async fn connect<D>(dst: D) -> Result<Self>
+    where
+        D: std::convert::TryInto<tonic::transport::Endpoint>,
+        D::Error: Into<tonic::codegen::StdError>,
+    {
+        Ok(Self {
+            inner: health_client::HealthClient::connect(dst).await?,
+        })
+    }
+
+    /// Returns `Ok()` if the corresponding service is serving
+    pub async fn check(&mut self, service: impl Into<String>) -> Result<()> {
+        use health_check_response::ServingStatus;
+
+        let status = self
+            .inner
+            .check(HealthCheckRequest {
+                service: service.into(),
+            })
+            .await?
+            .into_inner();
+
+        match status.status() {
+            ServingStatus::Serving => Ok(()),
+            ServingStatus::NotServing => Err(Error::NotServing),
+            _ => Err(Error::InvalidResponse(status.status)),
+        }
+    }
+
+    /// Returns `Ok()` if the storage service is serving
+    pub async fn check_storage(&mut self) -> Result<()> {
+        self.check(generated_types::STORAGE_SERVICE).await
+    }
+}
--- a/mutable_buffer/src/chunk.rs
+++ b/mutable_buffer/src/chunk.rs
@ -24,9 +24,11 @@ use query::{
    util::{make_range_expr, AndExprBuilder},
 };

-use crate::dictionary::{Dictionary, Error as DictionaryError};
-use crate::table::Table;
-
+use crate::{
+    column::Column,
+    dictionary::{Dictionary, Error as DictionaryError},
+    table::Table,
+};
 use async_trait::async_trait;
 use snafu::{OptionExt, ResultExt, Snafu};

@ -50,6 +52,12 @@ pub enum Error {
        source: crate::table::Error,
    },

+    #[snafu(display("Error checking predicate in table '{}': {}", table_name, source))]
+    NamedTablePredicateCheck {
+        table_name: String,
+        source: crate::table::Error,
+    },
+
    #[snafu(display(
        "Unsupported predicate when mutable buffer table names. Found a general expression: {:?}",
        exprs
@ -85,12 +93,36 @@ pub enum Error {
    #[snafu(display("Attempt to write table batch without a name"))]
    TableWriteWithoutName,

+    #[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk_id))]
+    InternalColumnValueIdNotFoundInDictionary {
+        value_id: u32,
+        chunk_id: u64,
+        source: DictionaryError,
+    },
+
    #[snafu(display("Column ID {} not found in dictionary of chunk {}", column_id, chunk))]
    ColumnIdNotFoundInDictionary {
        column_id: u32,
        chunk: u64,
        source: DictionaryError,
    },
+
+    #[snafu(display(
+        "Column name {} not found in dictionary of chunk {}",
+        column_name,
+        chunk_id
+    ))]
+    ColumnNameNotFoundInDictionary {
+        column_name: String,
+        chunk_id: u64,
+        source: DictionaryError,
+    },
+
+    #[snafu(display(
+        "Column '{}' is not a string tag column and thus can not list values",
+        column_name
+    ))]
+    UnsupportedColumnTypeForListingValues { column_name: String },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -312,13 +344,7 @@ impl Chunk {
            return Ok(None);
        }

-        let table_name_id =
-            self.dictionary
-                .id(table_name)
-                .context(InternalTableNotFoundInDictionary {
-                    table_name,
-                    chunk_id: self.id(),
-                })?;
+        let table_name_id = self.table_name_id(table_name)?;

        let mut chunk_column_ids = BTreeSet::new();

@ -366,6 +392,115 @@ impl Chunk {
        Ok(Some(column_names))
    }

+    /// Return the id of the table in the chunk's dictionary
+    fn table_name_id(&self, table_name: &str) -> Result<u32> {
+        self.dictionary
+            .id(table_name)
+            .context(InternalTableNotFoundInDictionary {
+                table_name,
+                chunk_id: self.id(),
+            })
+    }
+
+    /// Returns the strings of the specified Tag column that satisfy
+    /// the predicate, if they can be determined entirely using metadata.
+    ///
+    /// If the predicate cannot be evaluated entirely with metadata,
+    /// return `Ok(None)`.
+    pub fn tag_column_values(
+        &self,
+        table_name: &str,
+        column_name: &str,
+        chunk_predicate: &ChunkPredicate,
+    ) -> Result<Option<BTreeSet<String>>> {
+        // No support for general purpose expressions
+        if !chunk_predicate.chunk_exprs.is_empty() {
+            return Ok(None);
+        }
+        let chunk_id = self.id();
+
+        let table_name_id = self.table_name_id(table_name)?;
+
+        // Is this table even in the chunk?
+        let table = self
+            .tables
+            .get(&table_name_id)
+            .context(NamedTableNotFoundInChunk {
+                table_name,
+                chunk_id,
+            })?;
+
+        // See if we can rule out the table entire on metadata
+        let could_match = table
+            .could_match_predicate(chunk_predicate)
+            .context(NamedTablePredicateCheck { table_name })?;
+
+        if !could_match {
+            // No columns could match, return empty set
+            return Ok(Default::default());
+        }
+
+        let column_id =
+            self.dictionary
+                .lookup_value(column_name)
+                .context(ColumnNameNotFoundInDictionary {
+                    column_name,
+                    chunk_id,
+                })?;
+
+        let column = table
+            .column(column_id)
+            .context(NamedTableError { table_name })?;
+
+        if let Column::Tag(column, _) = column {
+            // if we have a timestamp predicate, find all values
+            // where the timestamp is within range. Otherwise take
+            // all values.
+
+            // Collect matching ids into BTreeSet to deduplicate on
+            // ids *before* looking up Strings
+            let column_value_ids: BTreeSet<u32> = match chunk_predicate.range {
+                None => {
+                    // take all non-null values
+                    column.iter().filter_map(|&s| s).collect()
+                }
+                Some(range) => {
+                    // filter out all values that don't match the timestmap
+                    let time_column = table
+                        .column_i64(chunk_predicate.time_column_id)
+                        .context(NamedTableError { table_name })?;
+
+                    column
+                        .iter()
+                        .zip(time_column.iter())
+                        .filter_map(|(&column_value_id, &timestamp_value)| {
+                            if range.contains_opt(timestamp_value) {
+                                column_value_id
+                            } else {
+                                None
+                            }
+                        })
+                        .collect()
+                }
+            };
+
+            // convert all the (deduplicated) ids to Strings
+            let column_values = column_value_ids
+                .into_iter()
+                .map(|value_id| {
+                    let value = self.dictionary.lookup_id(value_id).context(
+                        InternalColumnValueIdNotFoundInDictionary { value_id, chunk_id },
+                    )?;
+                    Ok(value.to_string())
+                })
+                .collect::<Result<BTreeSet<String>>>()?;
+
+            Ok(Some(column_values))
+        } else {
+            UnsupportedColumnTypeForListingValues { column_name }.fail()
+        }
+    }
+
    /// Translates `predicate` into per-chunk ids that can be
    /// directly evaluated against tables in this chunk
    pub fn compile_predicate(&self, predicate: &Predicate) -> Result<ChunkPredicate> {
@ -627,6 +762,15 @@ impl query::PartitionChunk for Chunk {
    ) -> Result<Option<StringSet>, Self::Error> {
        unimplemented!("This function is slated for removal")
    }
+
+    async fn column_values(
+        &self,
+        _table_name: &str,
+        _column_name: &str,
+        _predicate: &Predicate,
+    ) -> Result<Option<StringSet>, Self::Error> {
+        unimplemented!("This function is slated for removal")
+    }
 }

 /// Used to figure out if we know how to deal with this kind of
--- a/mutable_buffer/src/database.rs
+++ b/mutable_buffer/src/database.rs
@ -6,11 +6,10 @@ use generated_types::wal;
 use query::group_by::GroupByAndAggregate;
 use query::group_by::WindowDuration;
 use query::{
-    exec::{stringset::StringSet, SeriesSetPlan, SeriesSetPlans},
-    predicate::Predicate,
-    Database,
+    group_by::Aggregate,
+    plan::seriesset::{SeriesSetPlan, SeriesSetPlans},
 };
-use query::{group_by::Aggregate, plan::stringset::StringSetPlan};
+use query::{predicate::Predicate, Database};

 use crate::column::Column;
 use crate::table::Table;
@ -19,10 +18,10 @@ use crate::{
    partition::Partition,
 };

-use std::collections::{BTreeSet, HashMap, HashSet};
+use std::collections::{HashMap, HashSet};
 use std::sync::Arc;

-use arrow_deps::datafusion::{error::DataFusionError, logical_plan::LogicalPlan};
+use arrow_deps::datafusion::error::DataFusionError;

 use crate::dictionary::Error as DictionaryError;

@ -46,30 +45,6 @@ pub enum Error {
        source: DictionaryError,
    },

-    #[snafu(display(
-        "Column name {} not found in dictionary of chunk {}",
-        column_name,
-        chunk
-    ))]
-    ColumnNameNotFoundInDictionary {
-        column_name: String,
-        chunk: u64,
-        source: DictionaryError,
-    },
-
-    #[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk))]
-    ColumnValueIdNotFoundInDictionary {
-        value_id: u32,
-        chunk: u64,
-        source: DictionaryError,
-    },
-
-    #[snafu(display(
-        "Column '{}' is not a tag column and thus can not list values",
-        column_name
-    ))]
-    UnsupportedColumnTypeForListingValues { column_name: String },
-
    #[snafu(display("id conversion error"))]
    IdConversionError { source: std::num::TryFromIntError },

@ -254,27 +229,6 @@ impl Database for MutableBufferDb {
        Ok(())
    }

-    /// return all column values in this database, while applying optional
-    /// predicates
-    async fn column_values(
-        &self,
-        column_name: &str,
-        predicate: Predicate,
-    ) -> Result<StringSetPlan, Self::Error> {
-        let has_exprs = predicate.has_exprs();
-        let mut filter = ChunkTableFilter::new(predicate);
-
-        if has_exprs {
-            let mut visitor = ValuePredVisitor::new(column_name);
-            self.accept(&mut filter, &mut visitor)?;
-            Ok(visitor.plans.into())
-        } else {
-            let mut visitor = ValueVisitor::new(column_name);
-            self.accept(&mut filter, &mut visitor)?;
-            Ok(visitor.column_values.into())
-        }
-    }
-
    async fn query_series(&self, predicate: Predicate) -> Result<SeriesSetPlans, Self::Error> {
        let mut filter = ChunkTableFilter::new(predicate);
        let mut visitor = SeriesVisitor::new();
@ -569,152 +523,6 @@ impl ChunkTableFilter {
    }
 }

-/// return all values in the `column_name` column
-/// in this database, while applying the timestamp range
-///
-/// Potential optimizations: Run this in parallel (in different
-/// futures) for each chunk / table, rather than a single one
-/// -- but that will require building up parallel hash tables.
-struct ValueVisitor<'a> {
-    column_name: &'a str,
-    // what column id we are looking for
-    column_id: Option<u32>,
-    chunk_value_ids: BTreeSet<u32>,
-    column_values: StringSet,
-}
-
-impl<'a> ValueVisitor<'a> {
-    fn new(column_name: &'a str) -> Self {
-        Self {
-            column_name,
-            column_id: None,
-            column_values: StringSet::new(),
-            chunk_value_ids: BTreeSet::new(),
-        }
-    }
-}
-
-impl<'a> Visitor for ValueVisitor<'a> {
-    fn pre_visit_chunk(&mut self, chunk: &Chunk) -> Result<()> {
-        self.chunk_value_ids.clear();
-
-        self.column_id = Some(chunk.dictionary.lookup_value(self.column_name).context(
-            ColumnNameNotFoundInDictionary {
-                column_name: self.column_name,
-                chunk: chunk.id,
-            },
-        )?);
-
-        Ok(())
-    }
-
-    fn visit_column(
-        &mut self,
-        table: &Table,
-        column_id: u32,
-        column: &Column,
-        filter: &mut ChunkTableFilter,
-    ) -> Result<()> {
-        if Some(column_id) != self.column_id {
-            return Ok(());
-        }
-
-        match column {
-            Column::Tag(column, _) => {
-                // if we have a timestamp prediate, find all values
-                // where the timestamp is within range. Otherwise take
-                // all values.
-                let chunk_predicate = filter.chunk_predicate();
-                match chunk_predicate.range {
-                    None => {
-                        // take all non-null values
-                        column.iter().filter_map(|&s| s).for_each(|value_id| {
-                            self.chunk_value_ids.insert(value_id);
-                        });
-                    }
-                    Some(range) => {
-                        // filter out all values that don't match the timestmap
-                        let time_column = table.column_i64(chunk_predicate.time_column_id)?;
-
-                        column
-                            .iter()
-                            .zip(time_column.iter())
-                            .filter_map(|(&column_value_id, &timestamp_value)| {
-                                if range.contains_opt(timestamp_value) {
-                                    column_value_id
-                                } else {
-                                    None
-                                }
-                            })
-                            .for_each(|value_id| {
-                                self.chunk_value_ids.insert(value_id);
-                            });
-                    }
-                }
-                Ok(())
-            }
-            _ => UnsupportedColumnTypeForListingValues {
-                column_name: self.column_name,
-            }
-            .fail(),
-        }
-    }
-
-    fn post_visit_chunk(&mut self, chunk: &Chunk) -> Result<()> {
-        // convert all the chunk's column_ids to Strings
-        for &value_id in &self.chunk_value_ids {
-            let value = chunk.dictionary.lookup_id(value_id).context(
-                ColumnValueIdNotFoundInDictionary {
-                    value_id,
-                    chunk: chunk.id,
-                },
-            )?;
-
-            if !self.column_values.contains(value) {
-                self.column_values.insert(value.to_string());
-            }
-        }
-        Ok(())
-    }
-}
-
-/// return all column values for the specified column in this
-/// database, while applying the timestamp range and predicate
-struct ValuePredVisitor<'a> {
-    column_name: &'a str,
-    plans: Vec<LogicalPlan>,
-}
-
-impl<'a> ValuePredVisitor<'a> {
-    fn new(column_name: &'a str) -> Self {
-        Self {
-            column_name,
-            plans: Vec::new(),
-        }
-    }
-}
-
-impl<'a> Visitor for ValuePredVisitor<'a> {
-    // TODO try and rule out entire tables based on the same critera
-    // as explained on NamePredVisitor
-    fn pre_visit_table(
-        &mut self,
-        table: &Table,
-        chunk: &Chunk,
-        filter: &mut ChunkTableFilter,
-    ) -> Result<()> {
-        // skip table entirely if there are no rows that fall in the timestamp
-        if table.could_match_predicate(filter.chunk_predicate())? {
-            self.plans.push(table.tag_values_plan(
-                self.column_name,
-                filter.chunk_predicate(),
-                chunk,
-            )?);
-        }
-        Ok(())
-    }
-}
-
 /// Return DataFusion plans to calculate which series pass the
 /// specified predicate.
 struct SeriesVisitor {
@ -843,10 +651,6 @@ mod tests {
    type TestError = Box<dyn std::error::Error + Send + Sync + 'static>;
    type Result<T = (), E = TestError> = std::result::Result<T, E>;

-    fn to_set(v: &[&str]) -> BTreeSet<String> {
-        v.iter().map(|s| s.to_string()).collect::<BTreeSet<_>>()
-    }
-
    #[tokio::test]
    async fn missing_tags_are_null() -> Result {
        let db = MutableBufferDb::new("mydb");
@ -906,158 +710,6 @@ mod tests {
        Ok(())
    }

-    #[tokio::test]
-    async fn list_column_values() -> Result {
-        let db = MutableBufferDb::new("column_namedb");
-
-        let lp_data = "h2o,state=CA,city=LA temp=70.4 100\n\
-                       h2o,state=MA,city=Boston temp=72.4 250\n\
-                       o2,state=MA,city=Boston temp=50.4 200\n\
-                       o2,state=CA temp=79.0 300\n\
-                       o2,state=NY temp=60.8 400\n";
-
-        let lines: Vec<_> = parse_lines(lp_data).map(|l| l.unwrap()).collect();
-        write_lines(&db, &lines).await;
-
-        #[derive(Debug)]
-        struct TestCase<'a> {
-            description: &'a str,
-            column_name: &'a str,
-            predicate: Predicate,
-            expected_column_values: Result<Vec<&'a str>>,
-        }
-
-        let test_cases = vec![
-            TestCase {
-                description: "No predicates, 'state' col",
-                column_name: "state",
-                predicate: PredicateBuilder::default().build(),
-                expected_column_values: Ok(vec!["CA", "MA", "NY"]),
-            },
-            TestCase {
-                description: "No predicates, 'city' col",
-                column_name: "city",
-                predicate: PredicateBuilder::default().build(),
-                expected_column_values: Ok(vec!["Boston", "LA"]),
-            },
-            TestCase {
-                description: "Restrictions: timestamp",
-                column_name: "state",
-                predicate: PredicateBuilder::default().timestamp_range(50, 201).build(),
-                expected_column_values: Ok(vec!["CA", "MA"]),
-            },
-            TestCase {
-                description: "Restrictions: predicate",
-                column_name: "city",
-                predicate: PredicateBuilder::default()
-                    .add_expr(col("state").eq(lit("MA"))) // state=MA
-                    .build(),
-                expected_column_values: Ok(vec!["Boston"]),
-            },
-            TestCase {
-                description: "Restrictions: timestamp and predicate",
-                column_name: "state",
-                predicate: PredicateBuilder::default()
-                    .timestamp_range(150, 301)
-                    .add_expr(col("state").eq(lit("MA"))) // state=MA
-                    .build(),
-                expected_column_values: Ok(vec!["MA"]),
-            },
-            TestCase {
-                description: "Restrictions: measurement name",
-                column_name: "state",
-                predicate: PredicateBuilder::default().table("h2o").build(),
-                expected_column_values: Ok(vec!["CA", "MA"]),
-            },
-            TestCase {
-                description: "Restrictions: measurement name, with nulls",
-                column_name: "city",
-                predicate: PredicateBuilder::default().table("o2").build(),
-                expected_column_values: Ok(vec!["Boston"]),
-            },
-            TestCase {
-                description: "Restrictions: measurement name and timestamp",
-                column_name: "state",
-                predicate: PredicateBuilder::default()
-                    .table("o2")
-                    .timestamp_range(50, 201)
-                    .build(),
-                expected_column_values: Ok(vec!["MA"]),
-            },
-            TestCase {
-                description: "Restrictions: measurement name and predicate",
-                column_name: "state",
-                predicate: PredicateBuilder::default()
-                    .table("o2")
-                    .add_expr(col("state").eq(lit("NY"))) // state=NY
-                    .build(),
-                expected_column_values: Ok(vec!["NY"]),
-            },
-            TestCase {
-                description: "Restrictions: measurement name, timestamp and predicate",
-                column_name: "state",
-                predicate: PredicateBuilder::default()
-                    .table("o2")
-                    .timestamp_range(1, 550)
-                    .add_expr(col("state").eq(lit("NY"))) // state=NY
-                    .build(),
-                expected_column_values: Ok(vec!["NY"]),
-            },
-            TestCase {
-                description: "Restrictions: measurement name, timestamp and predicate: no match",
-                column_name: "state",
-                predicate: PredicateBuilder::default()
-                    .table("o2")
-                    .timestamp_range(1, 300) // filters out the NY row
-                    .add_expr(col("state").eq(lit("NY"))) // state=NY
-                    .build(),
-                expected_column_values: Ok(vec![]),
-            },
-        ];
-
-        for test_case in test_cases.into_iter() {
-            let test_case_str = format!("{:#?}", test_case);
-            println!("Running test case: {:?}", test_case);
-
-            let column_values_plan = db
-                .column_values(test_case.column_name, test_case.predicate)
-                .await
-                .expect("Created tag_values plan successfully");
-
-            // run the execution plan (
-            let executor = Executor::default();
-            let actual_column_values = executor.to_string_set(column_values_plan).await;
-
-            let is_match = if let Ok(expected_column_values) = &test_case.expected_column_values {
-                let expected_column_values = to_set(expected_column_values);
-                if let Ok(actual_column_values) = &actual_column_values {
-                    **actual_column_values == expected_column_values
-                } else {
-                    false
-                }
-            } else if let Err(e) = &actual_column_values {
-                // use string compare to compare errors to avoid having to build exact errors
-                format!("{:?}", e) == format!("{:?}", test_case.expected_column_values)
-            } else {
-                false
-            };
-
-            assert!(
-                is_match,
-                "Mismatch\n\
-                     actual_column_values: \n\
-                     {:?}\n\
-                     expected_column_values: \n\
-                     {:?}\n\
-                     Test_case: \n\
-                     {}",
-                actual_column_values, test_case.expected_column_values, test_case_str
-            );
-        }
-
-        Ok(())
-    }
-
    #[tokio::test]
    async fn test_query_series() -> Result {
        // This test checks that everything is wired together
@ -1088,7 +740,7 @@ mod tests {
        let plans = db
            .query_series(predicate)
            .await
-            .expect("Created tag_values plan successfully");
+            .expect("Created query_series plan successfully");

        let results = run_and_gather_results(plans).await;

@ -1164,7 +816,7 @@ mod tests {
        let plans = db
            .query_series(predicate)
            .await
-            .expect("Created tag_values plan successfully");
+            .expect("Created query_series plan successfully");

        let results = run_and_gather_results(plans).await;

@ -1207,7 +859,7 @@ mod tests {
        let plans = db
            .query_series(predicate)
            .await
-            .expect("Created tag_values plan successfully");
+            .expect("Created query_series plan successfully");

        let results = run_and_gather_results(plans).await;
        assert!(results.is_empty());
@ -1220,7 +872,7 @@ mod tests {
        let plans = db
            .query_series(predicate)
            .await
-            .expect("Created tag_values plan successfully");
+            .expect("Created query_series plan successfully");

        let results = run_and_gather_results(plans).await;
        assert_eq!(results.len(), 1);
@ -1234,7 +886,7 @@ mod tests {
        let plans = db
            .query_series(predicate)
            .await
-            .expect("Created tag_values plan successfully");
+            .expect("Created query_series plan successfully");

        let results = run_and_gather_results(plans).await;
        assert!(results.is_empty());
--- a/mutable_buffer/src/table.rs
+++ b/mutable_buffer/src/table.rs
@ -1,9 +1,10 @@
 use generated_types::wal as wb;
 use query::{
-    exec::{field::FieldColumns, SeriesSetPlan},
+    exec::field::FieldColumns,
    func::selectors::{selector_first, selector_last, selector_max, selector_min, SelectorOutput},
    func::window::make_window_bound_expr,
    group_by::{Aggregate, WindowDuration},
+    plan::seriesset::SeriesSetPlan,
 };

 use std::{
@ -35,7 +36,7 @@ use arrow_deps::{
    },
    datafusion::{
        self,
-        logical_plan::{Expr, LogicalPlan, LogicalPlanBuilder},
+        logical_plan::{Expr, LogicalPlanBuilder},
        prelude::*,
    },
 };
@ -223,7 +224,7 @@ impl Table {
    }

    /// Returns a reference to the specified column
-    fn column(&self, column_id: u32) -> Result<&Column> {
+    pub(crate) fn column(&self, column_id: u32) -> Result<&Column> {
        self.columns.get(&column_id).context(ColumnIdNotFound {
            id: column_id,
            table_id: self.id,
@ -271,32 +272,6 @@ impl Table {
        }
    }

-    /// Creates a DataFusion LogicalPlan that returns column *values* as a
-    /// single column of Strings
-    ///
-    /// The created plan looks like:
-    ///
-    ///    Projection
-    ///        Filter(predicate)
-    ///          InMemoryScan
-    pub fn tag_values_plan(
-        &self,
-        column_name: &str,
-        chunk_predicate: &ChunkPredicate,
-        chunk: &Chunk,
-    ) -> Result<LogicalPlan> {
-        // Scan and Filter
-        let plan_builder = self.scan_with_predicates(chunk_predicate, chunk)?;
-
-        let select_exprs = vec![col(column_name)];
-
-        plan_builder
-            .project(&select_exprs)
-            .context(BuildingPlan)?
-            .build()
-            .context(BuildingPlan)
-    }
-
    /// Creates a SeriesSet plan that produces an output table with rows that
    /// match the predicate
    ///
@ -503,10 +478,7 @@ impl Table {
                        column_name: col_name,
                        chunk: chunk.id,
                    })?;
-            let column = self.columns.get(&column_id).context(ColumnIdNotFound {
-                id: column_id,
-                table_id: self.id,
-            })?;
+            let column = self.column(column_id)?;

            Ok(column.data_type())
        })?;
@ -735,10 +707,7 @@ impl Table {

        for col in &selection.cols {
            let column_name = col.column_name;
-            let column = self.columns.get(&col.column_id).context(ColumnIdNotFound {
-                id: col.column_id,
-                table_id: self.id,
-            })?;
+            let column = self.column(col.column_id)?;

            schema_builder = match column {
                Column::String(_, _) => schema_builder.field(column_name, ArrowDataType::Utf8),
@ -769,10 +738,7 @@ impl Table {
        let mut columns = Vec::with_capacity(selection.cols.len());

        for col in &selection.cols {
-            let column = self.columns.get(&col.column_id).context(ColumnIdNotFound {
-                id: col.column_id,
-                table_id: self.id,
-            })?;
+            let column = self.column(col.column_id)?;

            let array = match column {
                Column::String(vals, _) => {
@ -1221,6 +1187,7 @@ impl<'a> TableColSelection<'a> {
 mod tests {

    use arrow::util::pretty::pretty_format_batches;
+    use arrow_deps::datafusion::logical_plan::LogicalPlan;
    use data_types::data::split_lines_into_write_entry_partitions;
    use influxdb_line_protocol::{parse_lines, ParsedLine};
    use query::{
--- a/object_store/src/aws.rs
+++ b/object_store/src/aws.rs
@ -419,26 +419,26 @@ mod tests {
            dotenv::dotenv().ok();

            let region = env::var("AWS_DEFAULT_REGION");
-            let bucket_name = env::var("INFLUXDB_IOX_S3_BUCKET");
+            let bucket_name = env::var("INFLUXDB_IOX_BUCKET");
            let force = std::env::var("TEST_INTEGRATION");

            match (region.is_ok(), bucket_name.is_ok(), force.is_ok()) {
                (false, false, true) => {
                    panic!(
                        "TEST_INTEGRATION is set, \
-                            but AWS_DEFAULT_REGION and INFLUXDB_IOX_S3_BUCKET are not"
+                            but AWS_DEFAULT_REGION and INFLUXDB_IOX_BUCKET are not"
                    )
                }
                (false, true, true) => {
                    panic!("TEST_INTEGRATION is set, but AWS_DEFAULT_REGION is not")
                }
                (true, false, true) => {
-                    panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_S3_BUCKET is not")
+                    panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_BUCKET is not")
                }
                (false, false, false) => {
                    eprintln!(
                        "skipping integration test - set \
-                               AWS_DEFAULT_REGION and INFLUXDB_IOX_S3_BUCKET to run"
+                               AWS_DEFAULT_REGION and INFLUXDB_IOX_BUCKET to run"
                    );
                    return Ok(());
                }
@ -447,7 +447,7 @@ mod tests {
                    return Ok(());
                }
                (true, false, false) => {
-                    eprintln!("skipping integration test - set INFLUXDB_IOX_S3_BUCKET to run");
+                    eprintln!("skipping integration test - set INFLUXDB_IOX_BUCKET to run");
                    return Ok(());
                }
                _ => {}
@ -466,8 +466,8 @@ mod tests {
            "The environment variable AWS_DEFAULT_REGION must be set \
                 to a value like `us-east-2`"
        })?;
-        let bucket_name = env::var("INFLUXDB_IOX_S3_BUCKET")
-            .map_err(|_| "The environment variable INFLUXDB_IOX_S3_BUCKET must be set")?;
+        let bucket_name = env::var("INFLUXDB_IOX_BUCKET")
+            .map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?;

        Ok((region.parse()?, bucket_name))
    }
--- a/object_store/src/azure.rs
+++ b/object_store/src/azure.rs
@ -299,7 +299,7 @@ mod tests {

            let required_vars = [
                "AZURE_STORAGE_ACCOUNT",
-                "AZURE_STORAGE_CONTAINER",
+                "INFLUXDB_IOX_BUCKET",
                "AZURE_STORAGE_MASTER_KEY",
            ];
            let unset_vars: Vec<_> = required_vars
@ -334,8 +334,8 @@ mod tests {
    async fn azure_blob_test() -> Result<()> {
        maybe_skip_integration!();

-        let container_name = env::var("AZURE_STORAGE_CONTAINER")
-            .map_err(|_| "The environment variable AZURE_STORAGE_CONTAINER must be set")?;
+        let container_name = env::var("INFLUXDB_IOX_BUCKET")
+            .map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?;
        let integration = MicrosoftAzure::new_from_env(container_name);

        put_get_delete_list(&integration).await?;
--- a/object_store/src/gcp.rs
+++ b/object_store/src/gcp.rs
@ -267,15 +267,15 @@ mod test {
        () => {
            dotenv::dotenv().ok();

-            let bucket_name = env::var("GCS_BUCKET_NAME");
+            let bucket_name = env::var("INFLUXDB_IOX_BUCKET");
            let force = std::env::var("TEST_INTEGRATION");

            match (bucket_name.is_ok(), force.is_ok()) {
                (false, true) => {
-                    panic!("TEST_INTEGRATION is set, but GCS_BUCKET_NAME is not")
+                    panic!("TEST_INTEGRATION is set, but INFLUXDB_IOX_BUCKET is not")
                }
                (false, false) => {
-                    eprintln!("skipping integration test - set GCS_BUCKET_NAME to run");
+                    eprintln!("skipping integration test - set INFLUXDB_IOX_BUCKET to run");
                    return Ok(());
                }
                _ => {}
@ -284,8 +284,8 @@ mod test {
    }

    fn bucket_name() -> Result<String> {
-        Ok(env::var("GCS_BUCKET_NAME")
-            .map_err(|_| "The environment variable GCS_BUCKET_NAME must be set")?)
+        Ok(env::var("INFLUXDB_IOX_BUCKET")
+            .map_err(|_| "The environment variable INFLUXDB_IOX_BUCKET must be set")?)
    }

    #[tokio::test]
--- a/object_store/src/path/parts.rs
+++ b/object_store/src/path/parts.rs
@ -5,11 +5,17 @@ use super::DELIMITER;
 // percent_encode's API needs this as a byte
 const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];

+// special encoding of the empty string part.
+// Using '%' is the safest character since it will always be used in the
+// output of percent_encode no matter how we evolve the INVALID AsciiSet over
+// time.
+const EMPTY: &str = "%";
+
 /// The PathPart type exists to validate the directory/file names that form part
 /// of a path.
 ///
-/// A PathPart instance is guaranteed to contain no `/` characters as it can
-/// only be constructed by going through the `try_from` impl.
+/// A PathPart instance is guaranteed to be non-empty and to contain no `/`
+/// characters as it can only be constructed by going through the `from` impl.
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default)]
 pub struct PathPart(pub(super) String);

@ -48,6 +54,12 @@ impl From<&str> for PathPart {
            // to be equal to `.` or `..` to prevent file system traversal shenanigans.
            "." => Self(String::from("%2E")),
            ".." => Self(String::from("%2E%2E")),
+
+            // Every string except the empty string will be percent encoded.
+            // The empty string will be transformed into a sentinel value EMPTY
+            // which can safely be a prefix of an encoded value since it will be
+            // fully matched at decode time (see impl Display for PathPart).
+            "" => Self(String::from(EMPTY)),
            other => Self(percent_encode(other.as_bytes(), INVALID).to_string()),
        }
    }
@ -55,10 +67,13 @@ impl From<&str> for PathPart {

 impl std::fmt::Display for PathPart {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        percent_decode_str(&self.0)
-            .decode_utf8()
-            .expect("Valid UTF-8 that came from String")
-            .fmt(f)
+        match &self.0[..] {
+            EMPTY => "".fmt(f),
+            _ => percent_decode_str(&self.0)
+                .decode_utf8()
+                .expect("Valid UTF-8 that came from String")
+                .fmt(f),
+        }
    }
 }

@ -104,4 +119,21 @@ mod tests {
        assert_eq!(part, PathPart(String::from("%2E%2E")));
        assert_eq!(part.to_string(), "..");
    }
+
+    #[test]
+    fn path_part_cant_be_empty() {
+        let part: PathPart = "".into();
+        assert_eq!(part, PathPart(String::from(EMPTY)));
+        assert_eq!(part.to_string(), "");
+    }
+
+    #[test]
+    fn empty_is_safely_encoded() {
+        let part: PathPart = EMPTY.into();
+        assert_eq!(
+            part,
+            PathPart(percent_encode(EMPTY.as_bytes(), INVALID).to_string())
+        );
+        assert_eq!(part.to_string(), EMPTY);
+    }
 }
--- a/query/src/exec.rs
+++ b/query/src/exec.rs
@ -18,7 +18,6 @@ use arrow_deps::{
 use counters::ExecutionCounters;

 use context::IOxExecutionContext;
-use field::FieldColumns;
 use schema_pivot::SchemaPivotNode;

 use fieldlist::{FieldList, IntoFieldList};
@ -28,7 +27,11 @@ use tokio::sync::mpsc::{self, error::SendError};

 use snafu::{ResultExt, Snafu};

-use crate::plan::{fieldlist::FieldListPlan, stringset::StringSetPlan};
+use crate::plan::{
+    fieldlist::FieldListPlan,
+    seriesset::{SeriesSetPlan, SeriesSetPlans},
+    stringset::StringSetPlan,
+};

 #[derive(Debug, Snafu)]
 pub enum Error {
@ -85,91 +88,6 @@ pub enum Error {

 pub type Result<T, E = Error> = std::result::Result<T, E>;

-/// A plan that can be run to produce a logical stream of time series,
-/// as represented as sequence of SeriesSets from a single DataFusion
-/// plan, optionally grouped in some way.
-#[derive(Debug)]
-pub struct SeriesSetPlan {
-    /// The table name this came from
-    pub table_name: Arc<String>,
-
-    /// Datafusion plan to execute. The plan must produce
-    /// RecordBatches that have:
-    ///
-    /// * fields for each name in `tag_columns` and `field_columns`
-    /// * a timestamp column called 'time'
-    /// * each column in tag_columns must be a String (Utf8)
-    pub plan: LogicalPlan,
-
-    /// The names of the columns that define tags.
-    ///
-    /// Note these are `Arc` strings because they are duplicated for
-    /// *each* resulting `SeriesSet` that is produced when this type
-    /// of plan is executed.
-    pub tag_columns: Vec<Arc<String>>,
-
-    /// The names of the columns which are "fields"
-    ///
-    /// Note these are `Arc` strings because they are duplicated for
-    /// *each* resulting `SeriesSet` that is produced when this type
-    /// of plan is executed.
-    pub field_columns: FieldColumns,
-
-    /// If present, how many of the series_set_plan::tag_columns
-    /// should be used to compute the group
-    pub num_prefix_tag_group_columns: Option<usize>,
-}
-
-impl SeriesSetPlan {
-    /// Create a SeriesSetPlan that will not produce any Group items
-    pub fn new_from_shared_timestamp(
-        table_name: Arc<String>,
-        plan: LogicalPlan,
-        tag_columns: Vec<Arc<String>>,
-        field_columns: Vec<Arc<String>>,
-    ) -> Self {
-        Self::new(table_name, plan, tag_columns, field_columns.into())
-    }
-
-    /// Create a SeriesSetPlan that will not produce any Group items
-    pub fn new(
-        table_name: Arc<String>,
-        plan: LogicalPlan,
-        tag_columns: Vec<Arc<String>>,
-        field_columns: FieldColumns,
-    ) -> Self {
-        let num_prefix_tag_group_columns = None;
-
-        Self {
-            table_name,
-            plan,
-            tag_columns,
-            field_columns,
-            num_prefix_tag_group_columns,
-        }
-    }
-
-    /// Create a SeriesSetPlan that will produce Group items, according to
-    /// num_prefix_tag_group_columns.
-    pub fn grouped(mut self, num_prefix_tag_group_columns: usize) -> Self {
-        self.num_prefix_tag_group_columns = Some(num_prefix_tag_group_columns);
-        self
-    }
-}
-
-/// A container for plans which each produce a logical stream of
-/// timeseries (from across many potential tables).
-#[derive(Debug, Default)]
-pub struct SeriesSetPlans {
-    pub plans: Vec<SeriesSetPlan>,
-}
-
-impl From<Vec<SeriesSetPlan>> for SeriesSetPlans {
-    fn from(plans: Vec<SeriesSetPlan>) -> Self {
-        Self { plans }
-    }
-}
-
 /// Handles executing plans, and marshalling the results into rust
 /// native structures.
 #[derive(Debug, Default)]
--- a/query/src/frontend/influxrpc.rs
+++ b/query/src/frontend/influxrpc.rs
@ -3,16 +3,21 @@ use std::{
    sync::Arc,
 };

-use arrow_deps::datafusion::{
-    error::{DataFusionError, Result as DatafusionResult},
-    logical_plan::{Expr, ExpressionVisitor, LogicalPlan, LogicalPlanBuilder, Operator, Recursion},
-    prelude::col,
+use arrow_deps::{
+    arrow::datatypes::DataType,
+    datafusion::{
+        error::{DataFusionError, Result as DatafusionResult},
+        logical_plan::{
+            Expr, ExpressionVisitor, LogicalPlan, LogicalPlanBuilder, Operator, Recursion,
+        },
+        prelude::col,
+    },
 };
 use data_types::{
    schema::{InfluxColumnType, Schema},
    selection::Selection,
 };
-use snafu::{OptionExt, ResultExt, Snafu};
+use snafu::{ensure, OptionExt, ResultExt, Snafu};
 use tracing::debug;

 use crate::{
@ -44,6 +49,11 @@ pub enum Error {
        source: Box<dyn std::error::Error + Send + Sync>,
    },

+    #[snafu(display("gRPC planner got error finding column values: {}", source))]
+    FindingColumnValues {
+        source: Box<dyn std::error::Error + Send + Sync>,
+    },
+
    #[snafu(display(
        "gRPC planner got internal error making table_name with default predicate: {}",
        source
@ -68,7 +78,7 @@ pub enum Error {
        source: Box<dyn std::error::Error + Send + Sync>,
    },

-    #[snafu(display("gRPC planner got error creating string set: {}", source))]
+    #[snafu(display("gRPC planner got error creating string set plan: {}", source))]
    CreatingStringSet { source: StringSetError },

    #[snafu(display(
@ -81,13 +91,13 @@ pub enum Error {
        source: crate::provider::Error,
    },

-    #[snafu(display("Error building plan: {}", source))]
+    #[snafu(display("gRPC planner got error building plan: {}", source))]
    BuildingPlan {
        source: arrow_deps::datafusion::error::DataFusionError,
    },

    #[snafu(display(
-        "Error getting table schema for table '{}' in chunk {}: {}",
+        "gRPC planner got error getting table schema for table '{}' in chunk {}: {}",
        table_name,
        chunk_id,
        source
@ -98,8 +108,28 @@ pub enum Error {
        source: Box<dyn std::error::Error + Send + Sync>,
    },

-    #[snafu(display("Unsupported predicate: {}", source))]
+    #[snafu(display("gRPC planner error: unsupported predicate: {}", source))]
    UnsupportedPredicate { source: DataFusionError },
+
+    #[snafu(display(
+        "gRPC planner error: column '{}' is not a tag, it is {:?}",
+        tag_name,
+        influx_column_type
+    ))]
+    InvalidTagColumn {
+        tag_name: String,
+        influx_column_type: Option<InfluxColumnType>,
+    },
+
+    #[snafu(display(
+        "Internal error: tag column '{}' is not Utf8 type, it is {:?} ",
+        tag_name,
+        data_type
+    ))]
+    InternalInvalidTagType {
+        tag_name: String,
+        data_type: DataType,
+    },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -272,6 +302,155 @@ impl InfluxRPCPlanner {
            .context(CreatingStringSet)
    }

+    /// Returns a plan which finds the distinct, non-null tag values
+    /// in the specified `tag_name` column of this database which pass
+    /// the conditions specified by `predicate`.
+    pub async fn tag_values<D>(
+        &self,
+        database: &D,
+        tag_name: &str,
+        predicate: Predicate,
+    ) -> Result<StringSetPlan>
+    where
+        D: Database + 'static,
+    {
+        debug!(predicate=?predicate, tag_name, "planning tag_values");
+
+        // The basic algorithm is:
+        //
+        // 1. Find all the potential tables in the chunks
+        //
+        // 2. For each table/chunk pair, figure out which have
+        // distinct values that can be found from only metadata and
+        // which need full plans
+
+        // Key is table name, value is set of chunks which had data
+        // for that table but that we couldn't evaluate the predicate
+        // entirely using the metadata
+        let mut need_full_plans = BTreeMap::new();
+
+        let mut known_values = BTreeSet::new();
+        for chunk in self.filtered_chunks(database, &predicate).await? {
+            let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
+
+            for table_name in table_names {
+                debug!(
+                    table_name = table_name.as_str(),
+                    chunk_id = chunk.id(),
+                    "finding columns in table"
+                );
+
+                // use schema to validate column type
+                let schema = chunk
+                    .table_schema(&table_name, Selection::All)
+                    .await
+                    .expect("to be able to get table schema");
+
+                // Skip this table if the tag_name is not a column in this table
+                let idx = if let Some(idx) = schema.find_index_of(tag_name) {
+                    idx
+                } else {
+                    continue;
+                };
+
+                // Validate that this really is a Tag column
+                let (influx_column_type, field) = schema.field(idx);
+                ensure!(
+                    matches!(influx_column_type, Some(InfluxColumnType::Tag)),
+                    InvalidTagColumn {
+                        tag_name,
+                        influx_column_type,
+                    }
+                );
+                ensure!(
+                    field.data_type() == &DataType::Utf8,
+                    InternalInvalidTagType {
+                        tag_name,
+                        data_type: field.data_type().clone(),
+                    }
+                );
+
+                // try and get the list of values directly from metadata
+                let maybe_values = chunk
+                    .column_values(&table_name, tag_name, &predicate)
+                    .await
+                    .map_err(|e| Box::new(e) as _)
+                    .context(FindingColumnValues)?;
+
+                match maybe_values {
+                    Some(mut names) => {
+                        debug!(names=?names, chunk_id = chunk.id(), "column values found from metadata");
+                        known_values.append(&mut names);
+                    }
+                    None => {
+                        debug!(
+                            table_name = table_name.as_str(),
+                            chunk_id = chunk.id(),
+                            "need full plan to find column values"
+                        );
+                        // can't get columns only from metadata, need
+                        // a general purpose plan
+                        need_full_plans
+                            .entry(table_name)
+                            .or_insert_with(Vec::new)
+                            .push(Arc::clone(&chunk));
+                    }
+                }
+            }
+        }
+
+        let mut builder = StringSetPlanBuilder::new();
+
+        let select_exprs = vec![col(tag_name)];
+
+        // At this point, we have a set of tag_values we know at plan
+        // time in `known_columns`, and some tables in chunks that we
+        // need to run a plan to find what values pass the predicate.
+        for (table_name, chunks) in need_full_plans.into_iter() {
+            let scan_and_filter = self
+                .scan_and_filter(&table_name, &predicate, chunks)
+                .await?;
+
+            // if we have any data to scan, make a plan!
+            if let Some(TableScanAndFilter {
+                plan_builder,
+                schema: _,
+            }) = scan_and_filter
+            {
+                // TODO use Expr::is_null() here when this
+                // https://issues.apache.org/jira/browse/ARROW-11742
+                // is completed.
+                let tag_name_is_not_null = Expr::IsNotNull(Box::new(col(tag_name)));
+
+                // TODO: optimize this to use "DISINCT" or do
+                // something more intelligent that simply fetching all
+                // the values and reducing them in the query Executor
+                //
+                // Until then, simply use a plan which looks like:
+                //
+                //    Projection
+                //      Filter(is not null)
+                //        Filter(predicate)
+                //          InMemoryScan
+                let plan = plan_builder
+                    .project(&select_exprs)
+                    .context(BuildingPlan)?
+                    .filter(tag_name_is_not_null)
+                    .context(BuildingPlan)?
+                    .build()
+                    .context(BuildingPlan)?;
+
+                builder = builder.append(plan.into());
+            }
+        }
+
+        // add the known values we could find from metadata only
+        builder
+            .append(known_values.into())
+            .build()
+            .context(CreatingStringSet)
+    }
+
    /// Returns a plan that produces a list of columns and their
    /// datatypes (as defined in the data written via `write_lines`),
    /// and which have more than zero rows which pass the conditions
--- a/query/src/lib.rs
+++ b/query/src/lib.rs
@ -11,8 +11,8 @@ use async_trait::async_trait;
 use data_types::{
    data::ReplicatedWrite, partition_metadata::TableSummary, schema::Schema, selection::Selection,
 };
-use exec::{stringset::StringSet, Executor, SeriesSetPlans};
-use plan::stringset::StringSetPlan;
+use exec::{stringset::StringSet, Executor};
+use plan::seriesset::SeriesSetPlans;

 use std::{fmt::Debug, sync::Arc};

@ -55,15 +55,6 @@ pub trait Database: Debug + Send + Sync {
    // The functions below are slated for removal (migration into a gRPC query
    // frontend) ---------

-    /// Returns a plan which finds the distinct values in the
-    /// `column_name` column of this database which pass the
-    /// conditions specified by `predicate`.
-    async fn column_values(
-        &self,
-        column_name: &str,
-        predicate: Predicate,
-    ) -> Result<StringSetPlan, Self::Error>;
-
    /// Returns a plan that finds all rows rows which pass the
    /// conditions specified by `predicate` in the form of logical
    /// time series.
@ -132,7 +123,7 @@ pub trait PartitionChunk: Debug + Send + Sync {
    /// Returns a set of Strings with column names from the specified
    /// table that have at least one row that matches `predicate`, if
    /// the predicate can be evaluated entirely on the metadata of
-    /// this Chunk.
+    /// this Chunk. Returns `None` otherwise
    async fn column_names(
        &self,
        table_name: &str,
@ -140,6 +131,18 @@ pub trait PartitionChunk: Debug + Send + Sync {
        columns: Selection<'_>,
    ) -> Result<Option<StringSet>, Self::Error>;

+    /// Return a set of Strings containing the distinct values in the
+    /// specified columns. If the predicate can be evaluated entirely
+    /// on the metadata of this Chunk. Returns `None` otherwise
+    ///
+    /// The requested columns must all have String type.
+    async fn column_values(
+        &self,
+        table_name: &str,
+        column_name: &str,
+        predicate: &Predicate,
+    ) -> Result<Option<StringSet>, Self::Error>;
+
    /// Returns the Schema for a table in this chunk, with the
    /// specified column selection. An error is returned if the
    /// selection refers to columns that do not exist.
--- a/query/src/plan.rs
+++ b/query/src/plan.rs
@ -1,2 +1,3 @@
 pub mod fieldlist;
+pub mod seriesset;
 pub mod stringset;
--- a/query/src/plan/seriesset.rs
+++ b/query/src/plan/seriesset.rs
@ -0,0 +1,86 @@
+use std::sync::Arc;
+
+use arrow_deps::datafusion::logical_plan::LogicalPlan;
+
+use crate::exec::field::FieldColumns;
+
+/// A plan that can be run to produce a logical stream of time series,
+/// as represented as sequence of SeriesSets from a single DataFusion
+/// plan, optionally grouped in some way.
+#[derive(Debug)]
+pub struct SeriesSetPlan {
+    /// The table name this came from
+    pub table_name: Arc<String>,
+
+    /// Datafusion plan to execute. The plan must produce
+    /// RecordBatches that have:
+    ///
+    /// * fields for each name in `tag_columns` and `field_columns`
+    /// * a timestamp column called 'time'
+    /// * each column in tag_columns must be a String (Utf8)
+    pub plan: LogicalPlan,
+
+    /// The names of the columns that define tags.
+    ///
+    /// Note these are `Arc` strings because they are duplicated for
+    /// *each* resulting `SeriesSet` that is produced when this type
+    /// of plan is executed.
+    pub tag_columns: Vec<Arc<String>>,
+
+    /// The names of the columns which are "fields"
+    pub field_columns: FieldColumns,
+
+    /// If present, how many of the series_set_plan::tag_columns
+    /// should be used to compute the group
+    pub num_prefix_tag_group_columns: Option<usize>,
+}
+
+impl SeriesSetPlan {
+    /// Create a SeriesSetPlan that will not produce any Group items
+    pub fn new_from_shared_timestamp(
+        table_name: Arc<String>,
+        plan: LogicalPlan,
+        tag_columns: Vec<Arc<String>>,
+        field_columns: Vec<Arc<String>>,
+    ) -> Self {
+        Self::new(table_name, plan, tag_columns, field_columns.into())
+    }
+
+    /// Create a SeriesSetPlan that will not produce any Group items
+    pub fn new(
+        table_name: Arc<String>,
+        plan: LogicalPlan,
+        tag_columns: Vec<Arc<String>>,
+        field_columns: FieldColumns,
+    ) -> Self {
+        let num_prefix_tag_group_columns = None;
+
+        Self {
+            table_name,
+            plan,
+            tag_columns,
+            field_columns,
+            num_prefix_tag_group_columns,
+        }
+    }
+
+    /// Create a SeriesSetPlan that will produce Group items, according to
+    /// num_prefix_tag_group_columns.
+    pub fn grouped(mut self, num_prefix_tag_group_columns: usize) -> Self {
+        self.num_prefix_tag_group_columns = Some(num_prefix_tag_group_columns);
+        self
+    }
+}
+
+/// A container for plans which each produce a logical stream of
+/// timeseries (from across many potential tables).
+#[derive(Debug, Default)]
+pub struct SeriesSetPlans {
+    pub plans: Vec<SeriesSetPlan>,
+}
+
+impl From<Vec<SeriesSetPlan>> for SeriesSetPlans {
+    fn from(plans: Vec<SeriesSetPlan>) -> Self {
+        Self { plans }
+    }
+}
--- a/query/src/test.rs
+++ b/query/src/test.rs
@ -12,14 +12,11 @@ use arrow_deps::{
    datafusion::physical_plan::{common::SizedRecordBatchStream, SendableRecordBatchStream},
 };

-use crate::{exec::Executor, group_by::GroupByAndAggregate, plan::stringset::StringSetPlan};
 use crate::{
-    exec::{
-        stringset::{StringSet, StringSetRef},
-        SeriesSetPlans,
-    },
+    exec::stringset::{StringSet, StringSetRef},
    Database, DatabaseStore, PartitionChunk, Predicate,
 };
+use crate::{exec::Executor, group_by::GroupByAndAggregate, plan::seriesset::SeriesSetPlans};

 use data_types::{
    data::{lines_to_replicated_write, ReplicatedWrite},
@ -59,12 +56,6 @@ pub struct TestDatabase {
    /// `column_names` to return upon next request
    column_names: Arc<Mutex<Option<StringSetRef>>>,

-    /// `column_values` to return upon next request
-    column_values: Arc<Mutex<Option<StringSetRef>>>,
-
-    /// The last request for `column_values`
-    column_values_request: Arc<Mutex<Option<ColumnValuesRequest>>>,
-
    /// Responses to return on the next request to `query_series`
    query_series_values: Arc<Mutex<Option<SeriesSetPlans>>>,

@ -78,16 +69,6 @@ pub struct TestDatabase {
    query_groups_request: Arc<Mutex<Option<QueryGroupsRequest>>>,
 }

-/// Records the parameters passed to a column values request
-#[derive(Debug, PartialEq, Clone)]
-pub struct ColumnValuesRequest {
-    /// The name of the requested column
-    pub column_name: String,
-
-    /// Stringified '{:?}' version of the predicate
-    pub predicate: String,
-}
-
 /// Records the parameters passed to a `query_series` request
 #[derive(Debug, PartialEq, Clone)]
 pub struct QuerySeriesRequest {
@ -178,20 +159,6 @@ impl TestDatabase {
        *Arc::clone(&self.column_names).lock() = Some(column_names)
    }

-    /// Set the list of column values that will be returned on a call to
-    /// column_values
-    pub fn set_column_values(&self, column_values: Vec<String>) {
-        let column_values = column_values.into_iter().collect::<StringSet>();
-        let column_values = Arc::new(column_values);
-
-        *Arc::clone(&self.column_values).lock() = Some(column_values)
-    }
-
-    /// Get the parameters from the last column name request
-    pub fn get_column_values_request(&self) -> Option<ColumnValuesRequest> {
-        Arc::clone(&self.column_values_request).lock().take()
-    }
-
    /// Set the series that will be returned on a call to query_series
    pub fn set_query_series_values(&self, plan: SeriesSetPlans) {
        *Arc::clone(&self.query_series_values).lock() = Some(plan);
@ -267,34 +234,6 @@ impl Database for TestDatabase {
        Ok(())
    }

-    /// Return the mocked out column values, recording the request
-    async fn column_values(
-        &self,
-        column_name: &str,
-        predicate: Predicate,
-    ) -> Result<StringSetPlan, Self::Error> {
-        // save the request
-        let predicate = predicate_to_test_string(&predicate);
-
-        let new_column_values_request = Some(ColumnValuesRequest {
-            column_name: column_name.into(),
-            predicate,
-        });
-
-        *Arc::clone(&self.column_values_request).lock() = new_column_values_request;
-
-        // pull out the saved columns
-        let column_values = Arc::clone(&self.column_values)
-            .lock()
-            .take()
-            // Turn None into an error
-            .context(General {
-                message: "No saved column_values in TestDatabase",
-            })?;
-
-        Ok(StringSetPlan::Known(column_values))
-    }
-
    async fn query_series(&self, predicate: Predicate) -> Result<SeriesSetPlans, Self::Error> {
        let predicate = predicate_to_test_string(&predicate);

@ -595,6 +534,16 @@ impl PartitionChunk for TestChunk {
            })
    }

+    async fn column_values(
+        &self,
+        _table_name: &str,
+        _column_name: &str,
+        _predicate: &Predicate,
+    ) -> Result<Option<StringSet>, Self::Error> {
+        // Model not being able to get column values from metadata
+        Ok(None)
+    }
+
    fn has_table(&self, table_name: &str) -> bool {
        self.table_schemas.contains_key(table_name)
    }
--- a/server/src/db.rs
+++ b/server/src/db.rs
@ -13,7 +13,7 @@ use async_trait::async_trait;
 use data_types::{data::ReplicatedWrite, database_rules::DatabaseRules, selection::Selection};
 use mutable_buffer::MutableBufferDb;
 use parking_lot::Mutex;
-use query::{plan::stringset::StringSetPlan, Database, PartitionChunk};
+use query::{Database, PartitionChunk};
 use read_buffer::Database as ReadBufferDb;
 use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt, Snafu};
@ -306,23 +306,10 @@ impl Database for Db {
            .context(MutableBufferWrite)
    }

-    async fn column_values(
-        &self,
-        column_name: &str,
-        predicate: query::predicate::Predicate,
-    ) -> Result<StringSetPlan, Self::Error> {
-        self.mutable_buffer
-            .as_ref()
-            .context(DatabaseNotReadable)?
-            .column_values(column_name, predicate)
-            .await
-            .context(MutableBufferRead)
-    }
-
    async fn query_series(
        &self,
        predicate: query::predicate::Predicate,
-    ) -> Result<query::exec::SeriesSetPlans, Self::Error> {
+    ) -> Result<query::plan::seriesset::SeriesSetPlans, Self::Error> {
        self.mutable_buffer
            .as_ref()
            .context(DatabaseNotReadable)?
@ -335,7 +322,7 @@ impl Database for Db {
        &self,
        predicate: query::predicate::Predicate,
        gby_agg: query::group_by::GroupByAndAggregate,
-    ) -> Result<query::exec::SeriesSetPlans, Self::Error> {
+    ) -> Result<query::plan::seriesset::SeriesSetPlans, Self::Error> {
        self.mutable_buffer
            .as_ref()
            .context(DatabaseNotReadable)?
--- a/server/src/db/chunk.rs
+++ b/server/src/db/chunk.rs
@ -350,4 +350,40 @@ impl PartitionChunk for DBChunk {
            }
        }
    }
+
+    async fn column_values(
+        &self,
+        table_name: &str,
+        column_name: &str,
+        predicate: &Predicate,
+    ) -> Result<Option<StringSet>, Self::Error> {
+        match self {
+            Self::MutableBuffer { chunk } => {
+                use mutable_buffer::chunk::Error::UnsupportedColumnTypeForListingValues;
+
+                let chunk_predicate = chunk
+                    .compile_predicate(predicate)
+                    .context(MutableBufferChunk)?;
+
+                let values = chunk.tag_column_values(table_name, column_name, &chunk_predicate);
+
+                // if the mutable buffer doesn't support getting
+                // values for this kind of column, report back None
+                if let Err(UnsupportedColumnTypeForListingValues { .. }) = values {
+                    Ok(None)
+                } else {
+                    values.context(MutableBufferChunk)
+                }
+            }
+            Self::ReadBuffer { .. } => {
+                // TODO hook up read buffer API here when ready. Until
+                // now, fallback to using a full plan
+                // https://github.com/influxdata/influxdb_iox/issues/857
+                Ok(None)
+            }
+            Self::ParquetFile => {
+                unimplemented!("parquet file not implemented for column_values")
+            }
+        }
+    }
 }
--- a/server/src/query_tests/influxrpc.rs
+++ b/server/src/query_tests/influxrpc.rs
@ -1,3 +1,4 @@
 pub mod field_columns;
 pub mod table_names;
 pub mod tag_keys;
+pub mod tag_values;
--- a/server/src/query_tests/influxrpc/tag_values.rs
+++ b/server/src/query_tests/influxrpc/tag_values.rs
@ -0,0 +1,253 @@
+use arrow_deps::datafusion::logical_plan::{col, lit};
+use query::{
+    exec::{
+        stringset::{IntoStringSet, StringSetRef},
+        Executor,
+    },
+    frontend::influxrpc::InfluxRPCPlanner,
+    predicate::PredicateBuilder,
+};
+
+use crate::query_tests::scenarios::*;
+
+/// runs tag_value(predicate) and compares it to the expected
+/// output
+macro_rules! run_tag_values_test_case {
+    ($DB_SETUP:expr, $TAG_NAME:expr, $PREDICATE:expr, $EXPECTED_VALUES:expr) => {
+        test_helpers::maybe_start_logging();
+        let predicate = $PREDICATE;
+        let tag_name = $TAG_NAME;
+        let expected_values = $EXPECTED_VALUES;
+        for scenario in $DB_SETUP.make().await {
+            let DBScenario {
+                scenario_name, db, ..
+            } = scenario;
+            println!("Running scenario '{}'", scenario_name);
+            println!("Predicate: '{:#?}'", predicate);
+            let planner = InfluxRPCPlanner::new();
+            let executor = Executor::new();
+
+            let plan = planner
+                .tag_values(&db, &tag_name, predicate.clone())
+                .await
+                .expect("built plan successfully");
+            let names = executor
+                .to_string_set(plan)
+                .await
+                .expect("converted plan to strings successfully");
+
+            assert_eq!(
+                names,
+                to_stringset(&expected_values),
+                "Error in  scenario '{}'\n\nexpected:\n{:?}\nactual:\n{:?}",
+                scenario_name,
+                expected_values,
+                names
+            );
+        }
+    };
+}
+
+#[tokio::test]
+async fn list_tag_values_no_tag() {
+    let predicate = PredicateBuilder::default().build();
+    // If the tag is not present, expect no values back (not error)
+    let tag_name = "tag_not_in_chunks";
+    let expected_tag_keys = vec![];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_no_predicate_state_col() {
+    let predicate = PredicateBuilder::default().build();
+    let tag_name = "state";
+    let expected_tag_keys = vec!["CA", "MA", "NY"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_no_predicate_city_col() {
+    let tag_name = "city";
+    let predicate = PredicateBuilder::default().build();
+    let expected_tag_keys = vec!["Boston", "LA", "NYC"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_timestamp_pred_state_col() {
+    let tag_name = "state";
+    let predicate = PredicateBuilder::default().timestamp_range(50, 201).build();
+    let expected_tag_keys = vec!["CA", "MA"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_state_pred_state_col() {
+    let tag_name = "city";
+    let predicate = PredicateBuilder::default()
+        .add_expr(col("state").eq(lit("MA"))) // state=MA
+        .build();
+    let expected_tag_keys = vec!["Boston"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_timestamp_and_state_pred_state_col() {
+    let tag_name = "state";
+    let predicate = PredicateBuilder::default()
+        .timestamp_range(150, 301)
+        .add_expr(col("state").eq(lit("MA"))) // state=MA
+        .build();
+    let expected_tag_keys = vec!["MA"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_table_pred_state_col() {
+    let tag_name = "state";
+    let predicate = PredicateBuilder::default().table("h2o").build();
+    let expected_tag_keys = vec!["CA", "MA"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_table_pred_city_col() {
+    let tag_name = "city";
+    let predicate = PredicateBuilder::default().table("o2").build();
+    let expected_tag_keys = vec!["Boston", "NYC"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_table_and_timestamp_and_table_pred_state_col() {
+    let tag_name = "state";
+    let predicate = PredicateBuilder::default()
+        .table("o2")
+        .timestamp_range(50, 201)
+        .build();
+    let expected_tag_keys = vec!["MA"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_table_and_state_pred_state_col() {
+    let tag_name = "state";
+    let predicate = PredicateBuilder::default()
+        .table("o2")
+        .add_expr(col("state").eq(lit("NY"))) // state=NY
+        .build();
+    let expected_tag_keys = vec!["NY"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_table_and_timestamp_and_state_pred_state_col() {
+    let tag_name = "state";
+    let predicate = PredicateBuilder::default()
+        .table("o2")
+        .timestamp_range(1, 550)
+        .add_expr(col("state").eq(lit("NY"))) // state=NY
+        .build();
+    let expected_tag_keys = vec!["NY"];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_table_and_timestamp_and_state_pred_state_col_no_rows() {
+    let tag_name = "state";
+    let predicate = PredicateBuilder::default()
+        .table("o2")
+        .timestamp_range(1, 300) // filters out the NY row
+        .add_expr(col("state").eq(lit("NY"))) // state=NY
+        .build();
+    let expected_tag_keys = vec![];
+    run_tag_values_test_case!(
+        TwoMeasurementsManyNulls {},
+        tag_name,
+        predicate,
+        expected_tag_keys
+    );
+}
+
+#[tokio::test]
+async fn list_tag_values_field_col() {
+    let db_setup = TwoMeasurementsManyNulls {};
+    let predicate = PredicateBuilder::default().build();
+
+    for scenario in db_setup.make().await {
+        let DBScenario {
+            scenario_name, db, ..
+        } = scenario;
+        println!("Running scenario '{}'", scenario_name);
+        println!("Predicate: '{:#?}'", predicate);
+        let planner = InfluxRPCPlanner::new();
+
+        // Test: temp is a field, not a tag
+        let tag_name = "temp";
+        let plan_result = planner.tag_values(&db, &tag_name, predicate.clone()).await;
+
+        assert_eq!(
+            plan_result.unwrap_err().to_string(),
+            "gRPC planner error: column \'temp\' is not a tag, it is Some(Field(Float))"
+        );
+    }
+}
+
+fn to_stringset(v: &[&str]) -> StringSetRef {
+    v.into_stringset().unwrap()
+}
--- a/src/commands/config.rs
+++ b/src/commands/config.rs
@ -1,8 +1,8 @@
 //! Implementation of command line option for manipulating and showing server
 //! config

+use clap::arg_enum;
 use std::{net::SocketAddr, net::ToSocketAddrs, path::PathBuf};
-
 use structopt::StructOpt;

 /// The default bind address for the HTTP API.
@ -91,16 +91,37 @@ pub struct Config {
    #[structopt(long = "--data-dir", env = "INFLUXDB_IOX_DB_DIR")]
    pub database_directory: Option<PathBuf>,

+    #[structopt(
+        long = "--object-store",
+        env = "INFLUXDB_IOX_OBJECT_STORE",
+        possible_values = &ObjectStore::variants(),
+        case_insensitive = true,
+        long_help = r#"Which object storage to use. If not specified, defaults to memory.
+
+Possible values (case insensitive):
+
+* memory (default): Effectively no object persistence.
+* file: Stores objects in the local filesystem. Must also set `--data-dir`.
+* s3: Amazon S3. Must also set `--bucket`, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and
+   AWS_DEFAULT_REGION.
+* google: Google Cloud Storage. Must also set `--bucket` and SERVICE_ACCOUNT.
+* azure: Microsoft Azure blob storage. Must also set `--bucket`, AZURE_STORAGE_ACCOUNT,
+   and AZURE_STORAGE_MASTER_KEY.
+        "#,
+    )]
+    pub object_store: Option<ObjectStore>,
+
+    /// Name of the bucket to use for the object store. Must also set
+    /// `--object_store` to a cloud object storage to have any effect.
+    ///
    /// If using Google Cloud Storage for the object store, this item, as well
    /// as SERVICE_ACCOUNT must be set.
-    #[structopt(long = "--gcp-bucket", env = "INFLUXDB_IOX_GCP_BUCKET")]
-    pub gcp_bucket: Option<String>,
-
+    ///
    /// If using S3 for the object store, this item, as well
    /// as AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_DEFAULT_REGION must
    /// be set.
-    #[structopt(long = "--s3-bucket", env = "INFLUXDB_IOX_S3_BUCKET")]
-    pub s3_bucket: Option<String>,
+    #[structopt(long = "--bucket", env = "INFLUXDB_IOX_BUCKET")]
+    pub bucket: Option<String>,

    /// If set, Jaeger traces are emitted to this host
    /// using the OpenTelemetry tracer.
@ -167,6 +188,17 @@ fn strip_server(args: impl Iterator<Item = String>) -> Vec<String> {
        .collect::<Vec<_>>()
 }

+arg_enum! {
+    #[derive(Debug, Copy, Clone, PartialEq)]
+    pub enum ObjectStore {
+        Memory,
+        File,
+        S3,
+        Google,
+        Azure,
+    }
+}
+
 /// How to format output logging messages
 #[derive(Debug, Clone, Copy)]
 pub enum LogFormat {
@ -301,15 +333,6 @@ mod tests {
            clap::ErrorKind::ValueValidation
        );

-        assert_eq!(
-            Config::from_iter_safe(strip_server(
-                to_vec(&["cmd", "server", "--api-bind", "badhost.badtld:1234"]).into_iter(),
-            ))
-            .map_err(|e| e.kind)
-            .expect_err("must fail"),
-            clap::ErrorKind::ValueValidation
-        );
-
        Ok(())
    }
 }
--- a/src/influxdb_ioxd.rs
+++ b/src/influxdb_ioxd.rs
@ -12,7 +12,7 @@ use panic_logging::SendPanicsToTracing;
 use server::{ConnectionManagerImpl as ConnectionManager, Server as AppServer};

 use crate::commands::{
-    config::{load_config, Config},
+    config::{load_config, Config, ObjectStore as ObjStoreOpt},
    logging::LoggingLevel,
 };

@ -64,6 +64,12 @@ pub enum Error {

    #[snafu(display("Error serving RPC: {}", source))]
    ServingRPC { source: self::rpc::Error },
+
+    #[snafu(display("Specifed {} for the object store, but not a bucket", object_store))]
+    InvalidCloudObjectStoreConfiguration { object_store: ObjStoreOpt },
+
+    #[snafu(display("Specified file for the object store, but not a database directory"))]
+    InvalidFileObjectStoreConfiguration,
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -92,22 +98,53 @@ pub async fn main(logging_level: LoggingLevel, config: Option<Config>) -> Result
    let f = SendPanicsToTracing::new();
    std::mem::forget(f);

-    let db_dir = &config.database_directory;
-
-    let object_store = if let Some(bucket_name) = &config.gcp_bucket {
-        info!("Using GCP bucket {} for storage", bucket_name);
-        ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket_name))
-    } else if let Some(bucket_name) = &config.s3_bucket {
-        info!("Using S3 bucket {} for storage", bucket_name);
-        // rusoto::Region's default takes the value from the AWS_DEFAULT_REGION env var.
-        ObjectStore::new_amazon_s3(AmazonS3::new(Default::default(), bucket_name))
-    } else if let Some(db_dir) = db_dir {
-        info!("Using local dir {:?} for storage", db_dir);
-        fs::create_dir_all(db_dir).context(CreatingDatabaseDirectory { path: db_dir })?;
-        ObjectStore::new_file(object_store::disk::File::new(&db_dir))
-    } else {
-        warn!("NO PERSISTENCE: using memory for object storage");
-        ObjectStore::new_in_memory(object_store::memory::InMemory::new())
+    let object_store = match (
+        config.object_store,
+        config.bucket,
+        config.database_directory,
+    ) {
+        (Some(ObjStoreOpt::Google), Some(bucket), _) => {
+            info!("Using GCP bucket {} for storage", bucket);
+            ObjectStore::new_google_cloud_storage(GoogleCloudStorage::new(bucket))
+        }
+        (Some(ObjStoreOpt::Google), None, _) => {
+            return InvalidCloudObjectStoreConfiguration {
+                object_store: ObjStoreOpt::Google,
+            }
+            .fail();
+        }
+        (Some(ObjStoreOpt::S3), Some(bucket), _) => {
+            info!("Using S3 bucket {} for storage", bucket);
+            // rusoto::Region's default takes the value from the AWS_DEFAULT_REGION env var.
+            ObjectStore::new_amazon_s3(AmazonS3::new(Default::default(), bucket))
+        }
+        (Some(ObjStoreOpt::S3), None, _) => {
+            return InvalidCloudObjectStoreConfiguration {
+                object_store: ObjStoreOpt::S3,
+            }
+            .fail();
+        }
+        (Some(ObjStoreOpt::File), _, Some(ref db_dir)) => {
+            info!("Using local dir {:?} for storage", db_dir);
+            fs::create_dir_all(db_dir).context(CreatingDatabaseDirectory { path: db_dir })?;
+            ObjectStore::new_file(object_store::disk::File::new(&db_dir))
+        }
+        (Some(ObjStoreOpt::File), _, None) => {
+            return InvalidFileObjectStoreConfiguration.fail();
+        }
+        (Some(ObjStoreOpt::Azure), Some(_bucket), _) => {
+            unimplemented!();
+        }
+        (Some(ObjStoreOpt::Azure), None, _) => {
+            return InvalidCloudObjectStoreConfiguration {
+                object_store: ObjStoreOpt::Azure,
+            }
+            .fail();
+        }
+        (Some(ObjStoreOpt::Memory), _, _) | (None, _, _) => {
+            warn!("NO PERSISTENCE: using memory for object storage");
+            ObjectStore::new_in_memory(object_store::memory::InMemory::new())
+        }
    };
    let object_storage = Arc::new(object_store);

--- a/src/influxdb_ioxd/http.rs
+++ b/src/influxdb_ioxd/http.rs
@ -1,5 +1,6 @@
-//! This module contains a partial implementation of the /v2 HTTP api
-//! routes for InfluxDB IOx.
+//! This module contains the HTTP api for InfluxDB IOx, including a
+//! partial implementation of the /v2 HTTP api routes from InfluxDB
+//! for compatibility.
 //!
 //! Note that these routes are designed to be just helpers for now,
 //! and "close enough" to the real /v2 api to be able to test InfluxDB IOx
@ -10,7 +11,7 @@
 //! database names and may remove this quasi /v2 API.

 // Influx crates
-use arrow_deps::{arrow, datafusion::physical_plan::collect};
+use arrow_deps::datafusion::physical_plan::collect;
 use data_types::{
    database_rules::DatabaseRules,
    http::{ListDatabasesResponse, WalMetadataQuery},
@ -25,7 +26,7 @@ use server::{ConnectionManager, Server as AppServer};
 // External crates
 use bytes::{Bytes, BytesMut};
 use futures::{self, StreamExt};
-use http::header::CONTENT_ENCODING;
+use http::header::{CONTENT_ENCODING, CONTENT_TYPE};
 use hyper::{Body, Method, Request, Response, StatusCode};
 use routerify::{prelude::*, Middleware, RequestInfo, Router, RouterError, RouterService};
 use serde::{Deserialize, Serialize};
@ -35,6 +36,9 @@ use tracing::{debug, error, info};
 use data_types::http::WalMetadataResponse;
 use std::{fmt::Debug, str, sync::Arc};

+mod format;
+use format::QueryOutputFormat;
+
 #[derive(Debug, Snafu)]
 pub enum ApplicationError {
    // Internal (unexpected) errors
@ -86,7 +90,9 @@ pub enum ApplicationError {
    #[snafu(display("Expected query string in request, but none was provided"))]
    ExpectedQueryString {},

-    #[snafu(display("Invalid query string '{}': {}", query_string, source))]
+    /// Error for when we could not parse the http query uri (e.g.
+    /// `?foo=bar&bar=baz)`
+    #[snafu(display("Invalid query string in HTTP URI '{}': {}", query_string, source))]
    InvalidQueryString {
        query_string: String,
        source: serde_urlencoded::de::Error,
@ -151,6 +157,21 @@ pub enum ApplicationError {

    #[snafu(display("Database {} does not have a WAL", name))]
    WALNotFound { name: String },
+
+    #[snafu(display("Internal error creating HTTP response:  {}", source))]
+    CreatingResponse { source: http::Error },
+
+    #[snafu(display(
+        "Error formatting results of SQL query '{}' using '{:?}': {}",
+        q,
+        format,
+        source
+    ))]
+    FormattingResult {
+        q: String,
+        format: QueryOutputFormat,
+        source: format::Error,
+    },
 }

 impl ApplicationError {
@ -181,6 +202,8 @@ impl ApplicationError {
            Self::DatabaseNameError { .. } => self.bad_request(),
            Self::DatabaseNotFound { .. } => self.not_found(),
            Self::WALNotFound { .. } => self.not_found(),
+            Self::CreatingResponse { .. } => self.internal_error(),
+            Self::FormattingResult { .. } => self.internal_error(),
        }
    }

@ -259,10 +282,11 @@ where
        })) // this endpoint is for API backward compatibility with InfluxDB 2.x
        .post("/api/v2/write", write::<M>)
        .get("/ping", ping)
-        .get("/api/v2/read", read::<M>)
+        .get("/health", health)
        .get("/iox/api/v1/databases", list_databases::<M>)
        .put("/iox/api/v1/databases/:name", create_database::<M>)
        .get("/iox/api/v1/databases/:name", get_database::<M>)
+        .get("/iox/api/v1/databases/:name/query", query::<M>)
        .get("/iox/api/v1/databases/:name/wal/meta", get_wal_meta::<M>)
        .put("/iox/api/v1/id", set_writer::<M>)
        .get("/iox/api/v1/id", get_writer::<M>)
@ -406,53 +430,67 @@ where
        .unwrap())
 }

-#[derive(Deserialize, Debug)]
-/// Body of the request to the /read endpoint
-struct ReadInfo {
-    org: String,
-    bucket: String,
-    // TODO This is currently a "SQL" request -- should be updated to conform
-    // to the V2 API for reading (using timestamps, etc).
-    sql_query: String,
+#[derive(Deserialize, Debug, PartialEq)]
+/// Parsed URI Parameters of the request to the .../query endpoint
+struct QueryParams {
+    q: String,
+    #[serde(default)]
+    format: QueryOutputFormat,
 }

-// TODO: figure out how to stream read results out rather than rendering the
-// whole thing in mem
 #[tracing::instrument(level = "debug")]
-async fn read<M: ConnectionManager + Send + Sync + Debug + 'static>(
+async fn query<M: ConnectionManager + Send + Sync + Debug + 'static>(
    req: Request<Body>,
 ) -> Result<Response<Body>, ApplicationError> {
    let server = Arc::clone(&req.data::<Arc<AppServer<M>>>().expect("server state"));
-    let query = req.uri().query().context(ExpectedQueryString {})?;

-    let read_info: ReadInfo = serde_urlencoded::from_str(query).context(InvalidQueryString {
-        query_string: query,
-    })?;
+    let uri_query = req.uri().query().context(ExpectedQueryString {})?;
+
+    let QueryParams { q, format } =
+        serde_urlencoded::from_str(uri_query).context(InvalidQueryString {
+            query_string: uri_query,
+        })?;
+
+    let db_name_str = req
+        .param("name")
+        .expect("db name must have been set by routerify")
+        .clone();
+
+    let db_name = DatabaseName::new(&db_name_str).context(DatabaseNameError)?;
+    debug!(uri = ?req.uri(), %q, ?format, %db_name, "running SQL query");
+
+    let db = server
+        .db(&db_name)
+        .await
+        .context(DatabaseNotFound { name: &db_name_str })?;

    let planner = SQLQueryPlanner::default();
    let executor = server.executor();

-    let db_name = org_and_bucket_to_database(&read_info.org, &read_info.bucket)
-        .context(BucketMappingError)?;
-
-    let db = server.db(&db_name).await.context(BucketNotFound {
-        org: read_info.org.clone(),
-        bucket: read_info.bucket.clone(),
-    })?;
-
    let physical_plan = planner
-        .query(db.as_ref(), &read_info.sql_query, executor.as_ref())
+        .query(db.as_ref(), &q, executor.as_ref())
        .await
-        .context(PlanningSQLQuery { query })?;
+        .context(PlanningSQLQuery { query: &q })?;

+    // TODO: stream read results out rather than rendering the
+    // whole thing in mem
    let batches = collect(physical_plan)
        .await
        .map_err(|e| Box::new(e) as _)
        .context(Query { db_name })?;

-    let results = arrow::util::pretty::pretty_format_batches(&batches).unwrap();
+    let results = format
+        .format(&batches)
+        .context(FormattingResult { q, format })?;

-    Ok(Response::new(Body::from(results.into_bytes())))
+    let body = Body::from(results.into_bytes());
+
+    let response = Response::builder()
+        .header(CONTENT_TYPE, format.content_type())
+        .body(body)
+        .context(CreatingResponse)?;
+
+    Ok(response)
 }

 #[tracing::instrument(level = "debug")]
@ -637,11 +675,17 @@ async fn get_writer<M: ConnectionManager + Send + Sync + Debug + 'static>(

 // Route to test that the server is alive
 #[tracing::instrument(level = "debug")]
-async fn ping(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
+async fn ping(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
    let response_body = "PONG";
    Ok(Response::new(Body::from(response_body.to_string())))
 }

+#[tracing::instrument(level = "debug")]
+async fn health(_: Request<Body>) -> Result<Response<Body>, ApplicationError> {
+    let response_body = "OK";
+    Ok(Response::new(Body::from(response_body.to_string())))
+}
+
 #[derive(Deserialize, Debug)]
 /// Arguments in the query string of the request to /partitions
 struct DatabaseInfo {
@ -749,7 +793,6 @@ mod tests {
    use std::net::{IpAddr, Ipv4Addr, SocketAddr};

    use arrow_deps::{arrow::record_batch::RecordBatch, assert_table_eq};
-    use http::header;
    use query::exec::Executor;
    use reqwest::{Client, Response};

@ -783,6 +826,22 @@ mod tests {
        Ok(())
    }

+    #[tokio::test]
+    async fn test_health() -> Result<()> {
+        let test_storage = Arc::new(AppServer::new(
+            ConnectionManagerImpl {},
+            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
+        ));
+        let server_url = test_server(Arc::clone(&test_storage));
+
+        let client = Client::new();
+        let response = client.get(&format!("{}/health", server_url)).send().await;
+
+        // Print the response so if the test fails, we have a log of what went wrong
+        check_response("health", response, StatusCode::OK, "OK").await;
+        Ok(())
+    }
+
    #[tokio::test]
    async fn test_write() -> Result<()> {
        let test_storage = Arc::new(AppServer::new(
@ -833,6 +892,139 @@ mod tests {
        Ok(())
    }

+    /// Sets up a test database with some data for testing the query endpoint
+    /// returns a client for communicting with the server, and the server
+    /// endpoint
+    async fn setup_test_data() -> (Client, String) {
+        let test_storage: Arc<AppServer<ConnectionManagerImpl>> = Arc::new(AppServer::new(
+            ConnectionManagerImpl {},
+            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
+        ));
+        test_storage.set_id(1);
+        test_storage
+            .create_database("MyOrg_MyBucket", DatabaseRules::new())
+            .await
+            .unwrap();
+        let server_url = test_server(Arc::clone(&test_storage));
+
+        let client = Client::new();
+
+        let lp_data = "h2o_temperature,location=santa_monica,state=CA surface_degrees=65.2,bottom_degrees=50.4 1568756160";
+
+        // send write data
+        let bucket_name = "MyBucket";
+        let org_name = "MyOrg";
+        let response = client
+            .post(&format!(
+                "{}/api/v2/write?bucket={}&org={}",
+                server_url, bucket_name, org_name
+            ))
+            .body(lp_data)
+            .send()
+            .await;
+
+        check_response("write", response, StatusCode::NO_CONTENT, "").await;
+        (client, server_url)
+    }
+
+    #[tokio::test]
+    async fn test_query_pretty() -> Result<()> {
+        let (client, server_url) = setup_test_data().await;
+
+        // send query data
+        let response = client
+            .get(&format!(
+                "{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}",
+                server_url, "select%20*%20from%20h2o_temperature"
+            ))
+            .send()
+            .await;
+
+        assert_eq!(get_content_type(&response), "text/plain");
+
+        let res = "+----------------+--------------+-------+-----------------+------------+\n\
+                   | bottom_degrees | location     | state | surface_degrees | time       |\n\
+                   +----------------+--------------+-------+-----------------+------------+\n\
+                   | 50.4           | santa_monica | CA    | 65.2            | 1568756160 |\n\
+                   +----------------+--------------+-------+-----------------+------------+\n";
+        check_response("query", response, StatusCode::OK, res).await;
+
+        // same response is expected if we explicitly request 'format=pretty'
+        let response = client
+            .get(&format!(
+                "{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=pretty",
+                server_url, "select%20*%20from%20h2o_temperature"
+            ))
+            .send()
+            .await;
+        assert_eq!(get_content_type(&response), "text/plain");
+
+        check_response("query", response, StatusCode::OK, res).await;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_query_csv() -> Result<()> {
+        let (client, server_url) = setup_test_data().await;
+
+        // send query data
+        let response = client
+            .get(&format!(
+                "{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=csv",
+                server_url, "select%20*%20from%20h2o_temperature"
+            ))
+            .send()
+            .await;
+
+        assert_eq!(get_content_type(&response), "text/csv");
+
+        let res = "bottom_degrees,location,state,surface_degrees,time\n\
+                   50.4,santa_monica,CA,65.2,1568756160\n";
+        check_response("query", response, StatusCode::OK, res).await;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_query_json() -> Result<()> {
+        let (client, server_url) = setup_test_data().await;
+
+        // send a second line of data to demontrate how that works
+        let lp_data = "h2o_temperature,location=Boston,state=MA surface_degrees=50.2 1568756160";
+
+        // send write data
+        let bucket_name = "MyBucket";
+        let org_name = "MyOrg";
+        let response = client
+            .post(&format!(
+                "{}/api/v2/write?bucket={}&org={}",
+                server_url, bucket_name, org_name
+            ))
+            .body(lp_data)
+            .send()
+            .await;
+
+        check_response("write", response, StatusCode::NO_CONTENT, "").await;
+
+        // send query data
+        let response = client
+            .get(&format!(
+                "{}/iox/api/v1/databases/MyOrg_MyBucket/query?q={}&format=json",
+                server_url, "select%20*%20from%20h2o_temperature"
+            ))
+            .send()
+            .await;
+
+        assert_eq!(get_content_type(&response), "application/json");
+
+        // Note two json records: one record on each line
+        let res = r#"[{"bottom_degrees":50.4,"location":"santa_monica","state":"CA","surface_degrees":65.2,"time":1568756160},{"location":"Boston","state":"MA","surface_degrees":50.2,"time":1568756160}]"#;
+        check_response("query", response, StatusCode::OK, res).await;
+
+        Ok(())
+    }
+
    fn gzip_str(s: &str) -> Vec<u8> {
        use flate2::{write::GzEncoder, Compression};
        use std::io::Write;
@ -865,7 +1057,7 @@ mod tests {
                "{}/api/v2/write?bucket={}&org={}",
                server_url, bucket_name, org_name
            ))
-            .header(header::CONTENT_ENCODING, "gzip")
+            .header(CONTENT_ENCODING, "gzip")
            .body(gzip_str(lp_data))
            .send()
            .await;
@ -1119,6 +1311,19 @@ mod tests {
        assert_eq!(r4.segments.len(), 0);
    }

+    fn get_content_type(response: &Result<Response, reqwest::Error>) -> String {
+        if let Ok(response) = response {
+            response
+                .headers()
+                .get(CONTENT_TYPE)
+                .map(|v| v.to_str().unwrap())
+                .unwrap_or("")
+                .to_string()
+        } else {
+            "".to_string()
+        }
+    }
+
    /// checks a http response against expected results
    async fn check_response(
        description: &str,
@ -1191,4 +1396,59 @@ mod tests {

        collect(physical_plan).await.unwrap()
    }
+
+    #[test]
+    fn query_params_format_default() {
+        // default to pretty format when not otherwise specified
+        assert_eq!(
+            serde_urlencoded::from_str("q=foo"),
+            Ok(QueryParams {
+                q: "foo".to_string(),
+                format: QueryOutputFormat::Pretty
+            })
+        );
+    }
+
+    #[test]
+    fn query_params_format_pretty() {
+        assert_eq!(
+            serde_urlencoded::from_str("q=foo&format=pretty"),
+            Ok(QueryParams {
+                q: "foo".to_string(),
+                format: QueryOutputFormat::Pretty
+            })
+        );
+    }
+
+    #[test]
+    fn query_params_format_csv() {
+        assert_eq!(
+            serde_urlencoded::from_str("q=foo&format=csv"),
+            Ok(QueryParams {
+                q: "foo".to_string(),
+                format: QueryOutputFormat::CSV
+            })
+        );
+    }
+
+    #[test]
+    fn query_params_format_json() {
+        assert_eq!(
+            serde_urlencoded::from_str("q=foo&format=json"),
+            Ok(QueryParams {
+                q: "foo".to_string(),
+                format: QueryOutputFormat::JSON
+            })
+        );
+    }
+
+    #[test]
+    fn query_params_bad_format() {
+        assert_eq!(
+            serde_urlencoded::from_str::<QueryParams>("q=foo&format=jsob")
+                .unwrap_err()
+                .to_string(),
+            "unknown variant `jsob`, expected one of `pretty`, `csv`, `json`"
+        );
+    }
 }
--- a/src/influxdb_ioxd/http/format.rs
+++ b/src/influxdb_ioxd/http/format.rs
@ -0,0 +1,242 @@
+//! Output formatting utilities for query endpoint
+
+use serde::Deserialize;
+use snafu::{ResultExt, Snafu};
+use std::io::Write;
+
+use serde_json::Value;
+
+use arrow_deps::arrow::{
+    self, csv::WriterBuilder, error::ArrowError, json::writer::record_batches_to_json_rows,
+    record_batch::RecordBatch,
+};
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Arrow pretty printing error: {}", source))]
+    PrettyArrow { source: ArrowError },
+
+    #[snafu(display("Arrow csv printing error: {}", source))]
+    CsvArrow { source: ArrowError },
+
+    #[snafu(display("Arrow json printing error: {}", source))]
+    JsonArrow { source: ArrowError },
+
+    #[snafu(display("Json conversion error: {}", source))]
+    JsonConversion { source: serde_json::Error },
+
+    #[snafu(display("IO error during Json conversion: {}", source))]
+    JsonWrite { source: std::io::Error },
+
+    #[snafu(display("Error converting CSV output to UTF-8: {}", source))]
+    CsvUtf8 { source: std::string::FromUtf8Error },
+
+    #[snafu(display("Error converting JSON output to UTF-8: {}", source))]
+    JsonUtf8 { source: std::string::FromUtf8Error },
+}
+type Result<T, E = Error> = std::result::Result<T, E>;
+
+#[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
+/// Requested output format for the query endpoint
+pub enum QueryOutputFormat {
+    /// Arrow pretty printer format (default)
+    #[serde(rename = "pretty")]
+    Pretty,
+    /// Comma separated values
+    #[serde(rename = "csv")]
+    CSV,
+    /// Arrow JSON format
+    #[serde(rename = "json")]
+    JSON,
+}
+
+impl Default for QueryOutputFormat {
+    fn default() -> Self {
+        Self::Pretty
+    }
+}
+
+impl QueryOutputFormat {
+    /// Return the content type of the relevant format
+    pub fn content_type(&self) -> &'static str {
+        match self {
+            Self::Pretty => "text/plain",
+            Self::CSV => "text/csv",
+            Self::JSON => "application/json",
+        }
+    }
+}
+
+impl QueryOutputFormat {
+    /// Format the [`RecordBatch`]es into a String in one of the
+    /// following formats:
+    ///
+    /// Pretty:
+    /// ```text
+    /// +----------------+--------------+-------+-----------------+------------+
+    /// | bottom_degrees | location     | state | surface_degrees | time       |
+    /// +----------------+--------------+-------+-----------------+------------+
+    /// | 50.4           | santa_monica | CA    | 65.2            | 1568756160 |
+    /// +----------------+--------------+-------+-----------------+------------+
+    /// ```
+    ///
+    /// CSV:
+    /// ```text
+    /// bottom_degrees,location,state,surface_degrees,time
+    /// 50.4,santa_monica,CA,65.2,1568756160
+    /// ```
+    ///
+    /// JSON:
+    ///
+    /// Example (newline + whitespace added for clarity):
+    /// ```text
+    /// [
+    ///  {"bottom_degrees":50.4,"location":"santa_monica","state":"CA","surface_degrees":65.2,"time":1568756160},
+    ///  {"location":"Boston","state":"MA","surface_degrees":50.2,"time":1568756160}
+    /// ]
+    /// ```
+    pub fn format(&self, batches: &[RecordBatch]) -> Result<String> {
+        match self {
+            Self::Pretty => batches_to_pretty(&batches),
+            Self::CSV => batches_to_csv(&batches),
+            Self::JSON => batches_to_json(&batches),
+        }
+    }
+}
+
+fn batches_to_pretty(batches: &[RecordBatch]) -> Result<String> {
+    arrow::util::pretty::pretty_format_batches(batches).context(PrettyArrow)
+}
+
+fn batches_to_csv(batches: &[RecordBatch]) -> Result<String> {
+    let mut bytes = vec![];
+
+    {
+        let mut writer = WriterBuilder::new().has_headers(true).build(&mut bytes);
+
+        for batch in batches {
+            writer.write(batch).context(CsvArrow)?;
+        }
+    }
+    let csv = String::from_utf8(bytes).context(CsvUtf8)?;
+    Ok(csv)
+}
+
+fn batches_to_json(batches: &[RecordBatch]) -> Result<String> {
+    let mut bytes = vec![];
+
+    {
+        let mut writer = JsonArrayWriter::new(&mut bytes);
+        writer.write_batches(batches)?;
+        writer.finish()?;
+    }
+
+    let json = String::from_utf8(bytes).context(JsonUtf8)?;
+
+    Ok(json)
+}
+
+/// Writes out well formed JSON arays in a streaming fashion
+///
+/// [{"foo": "bar"}, {"foo": "baz"}]
+///
+/// This is based on the arrow JSON writer (json::writer::Writer)
+///
+/// TODO contribute this back to arrow: https://issues.apache.org/jira/browse/ARROW-11773
+struct JsonArrayWriter<W>
+where
+    W: Write,
+{
+    started: bool,
+    finished: bool,
+    writer: W,
+}
+
+impl<W> JsonArrayWriter<W>
+where
+    W: Write,
+{
+    fn new(writer: W) -> Self {
+        Self {
+            writer,
+            started: false,
+            finished: false,
+        }
+    }
+
+    /// Consume self and return the inner writer
+    #[cfg(test)]
+    pub fn into_inner(self) -> W {
+        self.writer
+    }
+
+    pub fn write_row(&mut self, row: &Value) -> Result<()> {
+        if !self.started {
+            self.writer.write_all(b"[").context(JsonWrite)?;
+            self.started = true;
+        } else {
+            self.writer.write_all(b",").context(JsonWrite)?;
+        }
+        self.writer
+            .write_all(&serde_json::to_vec(row).context(JsonConversion)?)
+            .context(JsonWrite)?;
+        Ok(())
+    }
+
+    pub fn write_batches(&mut self, batches: &[RecordBatch]) -> Result<()> {
+        for row in record_batches_to_json_rows(batches) {
+            self.write_row(&Value::Object(row))?;
+        }
+        Ok(())
+    }
+
+    /// tell the writer there are is no more data to come so it can
+    /// write the final `'['`
+    pub fn finish(&mut self) -> Result<()> {
+        if self.started && !self.finished {
+            self.writer.write_all(b"]").context(JsonWrite)?;
+            self.finished = true;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+
+    use super::*;
+
+    #[test]
+    fn json_writer_empty() {
+        let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
+        writer.finish().unwrap();
+        assert_eq!(String::from_utf8(writer.into_inner()).unwrap(), "");
+    }
+
+    #[test]
+    fn json_writer_one_row() {
+        let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
+        let v = json!({ "an": "object" });
+        writer.write_row(&v).unwrap();
+        writer.finish().unwrap();
+        assert_eq!(
+            String::from_utf8(writer.into_inner()).unwrap(),
+            r#"[{"an":"object"}]"#
+        );
+    }
+
+    #[test]
+    fn json_writer_two_rows() {
+        let mut writer = JsonArrayWriter::new(vec![] as Vec<u8>);
+        let v = json!({ "an": "object" });
+        writer.write_row(&v).unwrap();
+        let v = json!({ "another": "object" });
+        writer.write_row(&v).unwrap();
+        writer.finish().unwrap();
+        assert_eq!(
+            String::from_utf8(writer.into_inner()).unwrap(),
+            r#"[{"an":"object"},{"another":"object"}]"#
+        );
+    }
+}
--- a/src/influxdb_ioxd/rpc.rs
+++ b/src/influxdb_ioxd/rpc.rs
@ -30,7 +30,22 @@ where
 {
    let stream = TcpListenerStream::new(socket);

+    let (mut health_reporter, health_service) = tonic_health::server::health_reporter();
+
+    let services = [
+        generated_types::STORAGE_SERVICE,
+        generated_types::IOX_TESTING_SERVICE,
+        generated_types::ARROW_SERVICE,
+    ];
+
+    for service in &services {
+        health_reporter
+            .set_service_status(service, tonic_health::ServingStatus::Serving)
+            .await;
+    }
+
    tonic::transport::Server::builder()
+        .add_service(health_service)
        .add_service(testing::make_server())
        .add_service(storage::make_server(Arc::clone(&server)))
        .add_service(flight::make_server(server))
--- a/src/influxdb_ioxd/rpc/storage/service.rs
+++ b/src/influxdb_ioxd/rpc/storage/service.rs
@ -852,7 +852,7 @@ async fn tag_values_impl<T>(
    rpc_predicate: Option<Predicate>,
 ) -> Result<StringValuesResponse>
 where
-    T: DatabaseStore,
+    T: DatabaseStore + 'static,
 {
    let rpc_predicate_string = format!("{:?}", rpc_predicate);

@ -873,10 +873,12 @@ where
        .await
        .context(DatabaseNotFound { db_name })?;

+    let planner = InfluxRPCPlanner::new();
+
    let executor = db_store.executor();

-    let tag_value_plan = db
-        .column_values(tag_name, predicate)
+    let tag_value_plan = planner
+        .tag_values(db.as_ref(), tag_name, predicate)
        .await
        .map_err(|e| Box::new(e) as _)
        .context(ListingTagValues { db_name, tag_name })?;
@ -1107,11 +1109,11 @@ mod tests {
    use arrow_deps::datafusion::logical_plan::{col, lit, Expr};
    use panic_logging::SendPanicsToTracing;
    use query::{
-        exec::SeriesSetPlans,
        group_by::{Aggregate as QueryAggregate, WindowDuration as QueryWindowDuration},
+        plan::seriesset::SeriesSetPlans,
        test::QueryGroupsRequest,
        test::TestDatabaseStore,
-        test::{ColumnValuesRequest, QuerySeriesRequest, TestChunk},
+        test::{QuerySeriesRequest, TestChunk},
    };
    use std::{
        convert::TryFrom,
@ -1478,11 +1480,18 @@ mod tests {
        let db_info = OrgAndBucket::new(123, 456);
        let partition_id = 1;

-        let test_db = fixture
+        // Add a chunk with a field
+        let chunk = TestChunk::new(0)
+            .with_time_column("TheMeasurement")
+            .with_tag_column("TheMeasurement", "state")
+            .with_one_row_of_null_data("TheMeasurement");
+
+        fixture
            .test_storage
            .db_or_create(&db_info.db_name)
            .await
-            .expect("creating test database");
+            .unwrap()
+            .add_chunk("my_partition_key", Arc::new(chunk));

        let source = Some(StorageClientWrapper::read_source(
            db_info.org_id,
@ -1490,24 +1499,35 @@ mod tests {
            partition_id,
        ));

-        let tag_values = vec!["k1", "k2", "k3", "k4"];
        let request = TagValuesRequest {
            tags_source: source.clone(),
-            range: make_timestamp_range(150, 200),
+            range: make_timestamp_range(150, 2000),
            predicate: make_state_ma_predicate(),
-            tag_key: "the_tag_key".into(),
+            tag_key: "state".into(),
        };

-        let expected_request = ColumnValuesRequest {
-            predicate: "Predicate { exprs: [#state Eq Utf8(\"MA\")] range: TimestampRange { start: 150, end: 200 }}".into(),
-            column_name: "the_tag_key".into(),
-        };
-
-        test_db.set_column_values(to_string_vec(&tag_values));
-
        let actual_tag_values = fixture.storage_client.tag_values(request).await.unwrap();
-        assert_eq!(actual_tag_values, tag_values,);
-        assert_eq!(test_db.get_column_values_request(), Some(expected_request),);
+        assert_eq!(actual_tag_values, vec!["MA"]);
+    }
+
+    /// test the plumbing of the RPC layer for tag_values
+    ///
+    /// For the special case of
+    ///
+    /// tag_key = _measurement means listing all measurement names
+    #[tokio::test]
+    async fn test_storage_rpc_tag_values_with_measurement() {
+        // Start a test gRPC server on a randomally allocated port
+        let mut fixture = Fixture::new().await.expect("Connecting to test server");
+
+        let db_info = OrgAndBucket::new(123, 456);
+        let partition_id = 1;
+
+        let source = Some(StorageClientWrapper::read_source(
+            db_info.org_id,
+            db_info.bucket_id,
+            partition_id,
+        ));

        // ---
        // test tag_key = _measurement means listing all measurement names
@ -1590,11 +1610,14 @@ mod tests {
        let db_info = OrgAndBucket::new(123, 456);
        let partition_id = 1;

-        let test_db = fixture
+        let chunk = TestChunk::new(0).with_error("Sugar we are going down");
+
+        fixture
            .test_storage
            .db_or_create(&db_info.db_name)
            .await
-            .expect("creating test database");
+            .unwrap()
+            .add_chunk("my_partition_key", Arc::new(chunk));

        let source = Some(StorageClientWrapper::read_source(
            db_info.org_id,
@ -1612,12 +1635,13 @@ mod tests {
            tag_key: "the_tag_key".into(),
        };

-        // Note we don't set the column_names on the test database, so we expect an
-        // error
-        let response = fixture.storage_client.tag_values(request).await;
-        assert!(response.is_err());
-        let response_string = format!("{:?}", response);
-        let expected_error = "No saved column_values in TestDatabase";
+        let response_string = fixture
+            .storage_client
+            .tag_values(request)
+            .await
+            .unwrap_err()
+            .to_string();
+        let expected_error = "Sugar we are going down";
        assert!(
            response_string.contains(expected_error),
            "'{}' did not contain expected content '{}'",
@ -1625,12 +1649,6 @@ mod tests {
            expected_error
        );

-        let expected_request = Some(ColumnValuesRequest {
-            predicate: "Predicate {}".into(),
-            column_name: "the_tag_key".into(),
-        });
-        assert_eq!(test_db.get_column_values_request(), expected_request);
-
        // ---
        // test error with non utf8 value
        // ---
@ -1641,9 +1659,12 @@ mod tests {
            tag_key: [0, 255].into(), // this is not a valid UTF-8 string
        };

-        let response = fixture.storage_client.tag_values(request).await;
-        assert!(response.is_err());
-        let response_string = format!("{:?}", response);
+        let response_string = fixture
+            .storage_client
+            .tag_values(request)
+            .await
+            .unwrap_err()
+            .to_string();
        let expected_error = "Error converting tag_key to UTF-8 in tag_values request";
        assert!(
            response_string.contains(expected_error),
@ -1653,22 +1674,27 @@ mod tests {
        );
    }

-    /// test the plumbing of the RPC layer for measurement_tag_values--
-    /// specifically that the right parameters are passed into the Database
-    /// interface and that the returned values are sent back via gRPC.
+    /// test the plumbing of the RPC layer for measurement_tag_values
    #[tokio::test]
    async fn test_storage_rpc_measurement_tag_values() {
-        // Start a test gRPC server on a randomally allocated port
+        test_helpers::maybe_start_logging();
        let mut fixture = Fixture::new().await.expect("Connecting to test server");

        let db_info = OrgAndBucket::new(123, 456);
        let partition_id = 1;

-        let test_db = fixture
+        // Add a chunk with a field
+        let chunk = TestChunk::new(0)
+            .with_time_column("TheMeasurement")
+            .with_tag_column("TheMeasurement", "state")
+            .with_one_row_of_null_data("TheMeasurement");
+
+        fixture
            .test_storage
            .db_or_create(&db_info.db_name)
            .await
-            .expect("creating test database");
+            .unwrap()
+            .add_chunk("my_partition_key", Arc::new(chunk));

        let source = Some(StorageClientWrapper::read_source(
            db_info.org_id,
@ -1676,22 +1702,14 @@ mod tests {
            partition_id,
        ));

-        let tag_values = vec!["k1", "k2", "k3", "k4"];
        let request = MeasurementTagValuesRequest {
-            measurement: "m4".into(),
+            measurement: "TheMeasurement".into(),
            source: source.clone(),
-            range: make_timestamp_range(150, 200),
+            range: make_timestamp_range(150, 2000),
            predicate: make_state_ma_predicate(),
-            tag_key: "the_tag_key".into(),
+            tag_key: "state".into(),
        };

-        let expected_request = ColumnValuesRequest {
-            predicate: "Predicate { table_names: m4 exprs: [#state Eq Utf8(\"MA\")] range: TimestampRange { start: 150, end: 200 }}".into(),
-            column_name: "the_tag_key".into(),
-        };
-
-        test_db.set_column_values(to_string_vec(&tag_values));
-
        let actual_tag_values = fixture
            .storage_client
            .measurement_tag_values(request)
@ -1699,15 +1717,34 @@ mod tests {
            .unwrap();

        assert_eq!(
-            actual_tag_values, tag_values,
+            actual_tag_values,
+            vec!["MA"],
            "unexpected tag values while getting tag values",
        );
+    }

-        assert_eq!(
-            test_db.get_column_values_request(),
-            Some(expected_request),
-            "unexpected request while getting tag values",
-        );
+    #[tokio::test]
+    async fn test_storage_rpc_measurement_tag_values_error() {
+        test_helpers::maybe_start_logging();
+        let mut fixture = Fixture::new().await.expect("Connecting to test server");
+
+        let db_info = OrgAndBucket::new(123, 456);
+        let partition_id = 1;
+
+        let chunk = TestChunk::new(0).with_error("Sugar we are going down");
+
+        fixture
+            .test_storage
+            .db_or_create(&db_info.db_name)
+            .await
+            .unwrap()
+            .add_chunk("my_partition_key", Arc::new(chunk));
+
+        let source = Some(StorageClientWrapper::read_source(
+            db_info.org_id,
+            db_info.bucket_id,
+            partition_id,
+        ));

        // ---
        // test error
@ -1722,22 +1759,19 @@ mod tests {

        // Note we don't set the column_names on the test database, so we expect an
        // error
-        let response = fixture.storage_client.measurement_tag_values(request).await;
-        assert!(response.is_err());
-        let response_string = format!("{:?}", response);
-        let expected_error = "No saved column_values in TestDatabase";
+        let response_string = fixture
+            .storage_client
+            .measurement_tag_values(request)
+            .await
+            .unwrap_err()
+            .to_string();
+        let expected_error = "Sugar we are going down";
        assert!(
            response_string.contains(expected_error),
            "'{}' did not contain expected content '{}'",
            response_string,
            expected_error
        );
-
-        let expected_request = Some(ColumnValuesRequest {
-            predicate: "Predicate { table_names: m5}".into(),
-            column_name: "the_tag_key".into(),
-        });
-        assert_eq!(test_db.get_column_values_request(), expected_request);
    }

    #[tokio::test]
--- a/tests/end-to-end.rs
+++ b/tests/end-to-end.rs
@ -48,7 +48,7 @@ const HTTP_BIND_ADDR: &str = http_bind_addr!();
 const GRPC_BIND_ADDR: &str = grpc_bind_addr!();

 const HTTP_BASE: &str = concat!("http://", http_bind_addr!());
-const API_BASE: &str = concat!("http://", http_bind_addr!(), "/api/v2");
+const IOX_API_V1_BASE: &str = concat!("http://", http_bind_addr!(), "/iox/api/v1");
 const GRPC_URL_BASE: &str = concat!("http://", grpc_bind_addr!(), "/");

 const TOKEN: &str = "InfluxDB IOx doesn't have authentication yet";
@ -377,6 +377,27 @@ impl TestServer {
        // different ports but both need to be up for the test to run
        let try_grpc_connect = async {
            let mut interval = tokio::time::interval(Duration::from_millis(500));
+
+            loop {
+                match influxdb_iox_client::health::Client::connect(GRPC_URL_BASE).await {
+                    Ok(mut client) => {
+                        println!("Successfully connected to server");
+
+                        match client.check_storage().await {
+                            Ok(_) => {
+                                println!("Storage service is running");
+                                break;
+                            }
+                            Err(e) => println!("Error checking storage service status: {}", e),
+                        }
+                    }
+                    Err(e) => {
+                        println!("Waiting for gRPC API to be up: {}", e);
+                    }
+                }
+                interval.tick().await;
+            }
+
            loop {
                match StorageClient::connect(GRPC_URL_BASE).await {
                    Ok(storage_client) => {
@ -387,7 +408,7 @@ impl TestServer {
                        return;
                    }
                    Err(e) => {
-                        println!("Waiting for gRPC server to be up: {}", e);
+                        println!("Failed to create storage client: {}", e)
                    }
                }
                interval.tick().await;
@ -396,7 +417,7 @@ impl TestServer {

        let try_http_connect = async {
            let client = reqwest::Client::new();
-            let url = format!("{}/ping", HTTP_BASE);
+            let url = format!("{}/health", HTTP_BASE);
            let mut interval = tokio::time::interval(Duration::from_millis(500));
            loop {
                match client.get(&url).send().await {
--- a/tests/end_to_end_cases/read_api.rs
+++ b/tests/end_to_end_cases/read_api.rs
@ -1,4 +1,4 @@
-use crate::{Scenario, API_BASE};
+use crate::{Scenario, IOX_API_V1_BASE};

 pub async fn test(
    client: &reqwest::Client,
@ -6,7 +6,7 @@ pub async fn test(
    sql_query: &str,
    expected_read_data: &[String],
 ) {
-    let text = read_data_as_sql(&client, "/read", scenario, sql_query).await;
+    let text = read_data_as_sql(&client, scenario, sql_query).await;

    assert_eq!(
        text, expected_read_data,
@ -17,18 +17,15 @@ pub async fn test(

 async fn read_data_as_sql(
    client: &reqwest::Client,
-    path: &str,
    scenario: &Scenario,
    sql_query: &str,
 ) -> Vec<String> {
-    let url = format!("{}{}", API_BASE, path);
+    let db_name = format!("{}_{}", scenario.org_id_str(), scenario.bucket_id_str());
+    let path = format!("/databases/{}/query", db_name);
+    let url = format!("{}{}", IOX_API_V1_BASE, path);
    let lines = client
        .get(&url)
-        .query(&[
-            ("bucket", scenario.bucket_id_str()),
-            ("org", scenario.org_id_str()),
-            ("sql_query", sql_query),
-        ])
+        .query(&[("q", sql_query)])
        .send()
        .await
        .unwrap()