Merge branch 'main' into er/feat/read_buffer/meta_delete

2021-11-01 10:26:10 +00:00 · 2021-11-01 10:26:10 +00:00 · b1096d2a36
parent 7c3b0a00e5 f1dc01950e
commit b1096d2a36
328 changed files with 18360 additions and 9944 deletions
--- a/.cargo/audit.toml
+++ b/.cargo/audit.toml
@ -1,9 +1,14 @@
 [advisories]
 ignore = [
-    # title: term is looking for a new maintainer
-    # why needed: used by `prettytable-rs` which is directly used by IOx but also by arrow
-    # upstream issue: https://github.com/phsym/prettytable-rs/issues/119
-    "RUSTSEC-2018-0015",
+    # title: Potential segfault in the time crate
+    # why needed: used by `chrono`
+    # upstream issue: https://github.com/chronotope/chrono/issues/553
+    "RUSTSEC-2020-0071",
+
+    # title: Potential segfault in `localtime_r` invocations
+    # why needed: bug in `chrono`
+    # upstream issue: https://github.com/chronotope/chrono/issues/499
+    "RUSTSEC-2020-0159",

    # title: memmap is unmaintained
    # why needed: used by `symbolic` which is used by `pprof`
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -127,9 +127,11 @@ jobs:
          # excluding datafusion because it's effectively a dependency masqueraded as workspace crate.
          command: cargo doc --document-private-items --no-deps --workspace --exclude datafusion
      - cache_save
+      - run:
+          name: Compress Docs
+          command: tar -cvzf rustdoc.tar.gz target/doc/
      - store_artifacts:
-          path: target/doc/
-          destination: rustdoc
+          path: rustdoc.tar.gz

  test:
    docker:
@ -282,11 +284,27 @@ jobs:
  protobuf-lint:
    docker:
      - image: bufbuild/buf:0.40.0
+    environment:
+      # Value to look for to skip breaking changes check
+      SKIP_LABEL: "https://api.github.com/repos/influxdata/influxdb_iox/labels/incompatible%20protobuf"
    steps:
      - checkout
      - run:
          name: buf lint
          command: buf lint
+      - run:
+          name: buf breaking changes
+          command: |
+            echo "If you want to make changes forbidden by this lint, please"
+            echo "coordinate with the conductor team, add the 'incompatible protobuf' label"
+            echo "to the PR, and rerun this test"
+            # Check if label is present using github API:
+            # Inspired by https://discuss.circleci.com/t/tag-label-filter/11158
+            if wget -O -  https://api.github.com/repos/influxdata/influxdb_iox/issues/$(echo $CIRCLE_PULL_REQUEST | grep -oE "[^/pull]+$") | grep "$SKIP_LABEL" ; then echo "SKIPPING (FOUND LABEL)" && exit ; else echo "CHECKING (NO LABEL FOUND)"; fi
+            git fetch origin main
+            # compare against only changes in this branch (not against
+            # other stuff that may have been added to master since last merge)
+            MERGE_BASE=$(git merge-base origin/main $CIRCLE_BRANCH) sh -c 'buf breaking --against ".git#ref=$MERGE_BASE"'

  # Check that any generated files are  is up-to-date with the changes in this PR.
  # named "check-flatbuffers" because that name is hardcoded into github checks
@ -336,15 +354,15 @@ jobs:
      - checkout
      - rust_components
      - cache_restore
-      - run:
-          name: Print rustc target CPU options
-          command: cargo run --release --no-default-features --features="aws,gcp,azure,jemalloc_replacing_malloc" --bin print_cpu
      - run:
          name: Cargo release build with target arch set for CRoaring
          command: cargo build --release --no-default-features --features="aws,gcp,azure,jemalloc_replacing_malloc"
      - run: |
          echo sha256sum after build is
          sha256sum target/release/influxdb_iox
+      - run:
+          name: Print rustc target CPU options
+          command: target/release/influxdb_iox debug print-cpu
      - setup_remote_docker:
          # There seems to be a cache invalidation bug in docker
          # or in the way that circleci implements layer caching.
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -5,8 +5,6 @@ updates:
    schedule:
      interval: "weekly"
    ignore:
-      # https://github.com/tkaitchuck/aHash/issues/95
-      - dependency-name: "indexmap"
      # Thrift version needs to match the version of the thrift-compiler used to generate code,
      # and therefore this dependency requires a more manual upgrade
      #
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,10 +1,57 @@
-[package]
-name = "influxdb_iox"
-version = "0.1.0"
-authors = ["Paul Dix <paul@pauldix.net>"]
-edition = "2018"
-default-run = "influxdb_iox"
-readme = "README.md"
+[workspace]
+# In alphabetical order
+members = [
+    "arrow_util",
+    "client_util",
+    "data_types",
+    "datafusion",
+    "datafusion_util",
+    "entry",
+    "generated_types",
+    "grpc-router",
+    "grpc-router-test-gen",
+    "influxdb2_client",
+    "influxdb_iox",
+    "influxdb_iox_client",
+    "influxdb_line_protocol",
+    "influxdb_storage_client",
+    "influxdb_tsm",
+    "internal_types",
+    "iox_data_generator",
+    "iox_object_store",
+    "lifecycle",
+    "logfmt",
+    "metric",
+    "metric_exporters",
+    "mutable_batch",
+    "mutable_batch_entry",
+    "mutable_batch_lp",
+    "mutable_batch_pb",
+    "mutable_buffer",
+    "object_store",
+    "observability_deps",
+    "packers",
+    "panic_logging",
+    "parquet_catalog",
+    "parquet_file",
+    "persistence_windows",
+    "predicate",
+    "query",
+    "query_tests",
+    "read_buffer",
+    "schema",
+    "server",
+    "server_benchmarks",
+    "test_helpers",
+    "time",
+    "trace",
+    "trace_exporters",
+    "trace_http",
+    "tracker",
+    "trogging",
+    "write_buffer",
+]
+default-members = ["influxdb_iox"]

 exclude = [
    "*.md",
@ -22,179 +69,12 @@ exclude = [
    "massif.out.*",
    "perf/",
    "scripts/",
+    "test_fixtures/",
    "tools/",
 ]

-
-[[bin]]
-name = "print_cpu"
-path = "src/print_cpu.rs"
-
-[workspace] # In alphabetical order
-members = [
-    "arrow_util",
-    "data_types",
-    "client_util",
-    "datafusion",
-    "datafusion_util",
-    "entry",
-    "generated_types",
-    "influxdb2_client",
-    "influxdb_iox_client",
-    "influxdb_line_protocol",
-    "influxdb_storage_client",
-    "influxdb_tsm",
-    "internal_types",
-    "iox_data_generator",
-    "iox_object_store",
-    "lifecycle",
-    "logfmt",
-    "metric",
-    "metric_exporters",
-    "mutable_buffer",
-    "object_store",
-    "observability_deps",
-    "packers",
-    "panic_logging",
-    "persistence_windows",
-    "predicate",
-    "query",
-    "query_tests",
-    "read_buffer",
-    "server",
-    "server_benchmarks",
-    "test_helpers",
-    "time",
-    "trace",
-    "trace_exporters",
-    "trace_http",
-    "tracker",
-    "trogging",
-    "schema",
-    "grpc-router",
-    "grpc-router/grpc-router-test-gen",
-    "write_buffer",
-]
-
 [profile.release]
 debug = true

 [profile.bench]
 debug = true
-
-[dependencies]
-# Workspace dependencies, in alphabetical order
-datafusion = { path = "datafusion" }
-data_types = { path = "data_types" }
-entry = { path = "entry" }
-generated_types = { path = "generated_types" }
-
-influxdb_iox_client = { path = "influxdb_iox_client", features = ["format"] }
-influxdb_line_protocol = { path = "influxdb_line_protocol" }
-internal_types = { path = "internal_types" }
-iox_object_store = { path = "iox_object_store" }
-logfmt = { path = "logfmt" }
-metric = { path = "metric" }
-metric_exporters = { path = "metric_exporters" }
-mutable_buffer = { path = "mutable_buffer" }
-num_cpus = "1.13.0"
-object_store = { path = "object_store" }
-observability_deps = { path = "observability_deps" }
-panic_logging = { path = "panic_logging" }
-parquet_file = { path = "parquet_file" }
-predicate = { path = "predicate" }
-query = { path = "query" }
-read_buffer = { path = "read_buffer" }
-server = { path = "server" }
-trace = { path = "trace" }
-trace_exporters = { path = "trace_exporters" }
-trace_http = { path = "trace_http" }
-tracker = { path = "tracker" }
-trogging = { path = "trogging", default-features = false, features = ["structopt"] }
-time = { path = "time" }
-
-# Crates.io dependencies, in alphabetical order
-arrow = { version = "5.5", features = ["prettyprint"] }
-arrow-flight = "5.5"
-backtrace = "0.3"
-byteorder = "1.3.4"
-bytes = "1.0"
-chrono = "0.4"
-clap = "2.33.1"
-csv = "1.1"
-dirs = "4.0.0"
-dotenv = "0.15.0"
-flate2 = "1.0"
-futures = "0.3"
-hashbrown = "0.11"
-http = "0.2.0"
-humantime = "2.1.0"
-hyper = "0.14"
-libc = { version = "0.2" }
-log = "0.4"
-once_cell = { version = "1.4.0", features = ["parking_lot"] }
-parking_lot = "0.11.2"
-itertools = "0.10.1"
-parquet = "5.5"
-# used by arrow/datafusion anyway
-prettytable-rs = "0.8"
-pprof = { version = "^0.5", default-features = false, features = ["flamegraph", "protobuf"], optional = true }
-prost = "0.8"
-rustyline = { version = "9.0", default-features = false }
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0.67"
-serde_urlencoded = "0.7.0"
-snafu = "0.6.9"
-structopt = "0.3.23"
-thiserror = "1.0.30"
-tikv-jemalloc-ctl = { version = "0.4.0" }
-tokio = { version = "1.11", features = ["macros", "rt-multi-thread", "parking_lot", "signal"] }
-tokio-stream = { version = "0.1.2", features = ["net"] }
-tokio-util = { version = "0.6.3" }
-tonic = "0.5.0"
-tonic-health = "0.4.0"
-tonic-reflection = "0.2.0"
-tower = "0.4"
-uuid = { version = "0.8", features = ["v4"] }
-
-# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
-tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
-heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
-
-
-[dev-dependencies]
-# Workspace dependencies, in alphabetical order
-arrow_util = { path = "arrow_util" }
-entry = { path = "entry" }
-influxdb2_client = { path = "influxdb2_client" }
-influxdb_storage_client = { path = "influxdb_storage_client" }
-influxdb_iox_client = { path = "influxdb_iox_client", features = ["flight"] }
-test_helpers = { path = "test_helpers" }
-synchronized-writer = "1"
-parking_lot = "0.11.2"
-write_buffer = { path = "write_buffer" }
-
-# Crates.io dependencies, in alphabetical order
-assert_cmd = "2.0.2"
-flate2 = "1.0"
-hex = "0.4.2"
-predicates = "2.0.3"
-rand = "0.8.3"
-rdkafka = "0.26.0"
-reqwest = "0.11"
-tempfile = "3.1.0"
-
-[features]
-default = ["jemalloc_replacing_malloc"]
-
-azure = ["object_store/azure"] # Optional Azure Object store support
-gcp = ["object_store/gcp"] # Optional GCP object store support
-aws = ["object_store/aws"] # Optional AWS / S3 object store support
-# pprof is an optional feature for pprof support
-
-# heappy is an optional feature; Not on by default as it
-# runtime overhead on all allocations (calls to malloc).
-# Cargo cannot currently implement mutually exclusive features so let's force every build
-# to pick either heappy or jemalloc_replacing_malloc feature at least until we figure out something better.
-jemalloc_replacing_malloc = ["tikv-jemalloc-sys"]
-
--- a/README.md
+++ b/README.md
@ -33,7 +33,7 @@ We're also hosting monthly tech talks and community office hours on the project
 1. [Install dependencies](#install-dependencies)
 1. [Clone the repository](#clone-the-repository)
 1. [Configure the server](#configure-the-server)
-1. [Compile and start the server](#compile-and-start-the-server)  
+1. [Compile and start the server](#compile-and-start-the-server)
   (You can also [build a Docker image](#build-a-docker-image-optional) to run InfluxDB IOx.)
 1. [Write and read data](#write-and-read-data)
 1. [Use the CLI](#use-the-cli)
@ -47,6 +47,7 @@ To compile and run InfluxDB IOx from source, you'll need the following:

 - [Rust](#rust)
 - [Clang](#clang)
+- [lld (on Linux)](#lld)

 #### Rust

@ -72,6 +73,19 @@ If `clang` is not already present, it can typically be installed with the system

 [`croaring`]: https://github.com/saulius/croaring-rs

+#### lld
+
+If you are building InfluxDB IOx on Linux then you will need to ensure you have installed the `lld` LLVM linker.
+Check if you have already installed it by running `lld -version`.
+
+```shell
+lld -version
+lld is a generic driver.
+Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
+```
+
+If `lld` is not already present, it can typically be installed with the system package manager.
+
 ### Clone the repository

 Clone this repository using `git`.
@ -120,7 +134,7 @@ This which will create a binary at `target/debug/influxdb_iox`.
 To start the InfluxDB IOx server, run:

 ```shell
-./target/debug/influxdb_iox run
+./target/debug/influxdb_iox run database
 ```

 By default the server will start an HTTP server on port `8080` and a gRPC server on port `8082`.
@ -128,20 +142,20 @@ By default the server will start an HTTP server on port `8080` and a gRPC server
 You can also compile and run with one command:

 ```shell
-cargo run -- run
+cargo run -- run database
 ```

 To compile for performance testing, build in release mode:

 ```shell
 cargo build --release
-./target/release/influxdb_iox run
+./target/release/influxdb_iox run database
 ```

 You can also run in release mode with one step:

 ```shell
-cargo run --release -- run
+cargo run --release -- run database
 ```

 To run all available tests in debug mode, you may want to set min stack size to avoid the current known stack overflow issue:
--- a/arrow_util/Cargo.toml
+++ b/arrow_util/Cargo.toml
@ -2,19 +2,19 @@
 name = "arrow_util"
 version = "0.1.0"
 authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
-edition = "2018"
+edition = "2021"
 description = "Apache Arrow utilities"

 [dependencies]

-arrow = { version = "5.5", features = ["prettyprint"] }
+arrow = { version = "6.0", features = ["prettyprint"] }
 ahash = "0.7.5"
 num-traits = "0.2"
 snafu = "0.6"
 hashbrown = "0.11"
 # used by arrow anyway (needed for printing workaround)
 chrono = "0.4"
-prettytable-rs = "0.8"
+comfy-table = { version = "4.0", default-features = false }

 [dev-dependencies]
 rand = "0.8.3"
--- a/arrow_util/src/bitset.rs
+++ b/arrow_util/src/bitset.rs
@ -1,4 +1,5 @@
 use arrow::buffer::Buffer;
+use std::ops::Range;

 /// An arrow-compatible mutable bitset implementation
 ///
@ -28,6 +29,12 @@ impl BitSet {
        bitset
    }

+    /// Reserve space for `count` further bits
+    pub fn reserve(&mut self, count: usize) {
+        let new_buf_len = (self.len + count + 7) >> 3;
+        self.buffer.reserve(new_buf_len);
+    }
+
    /// Appends `count` unset bits
    pub fn append_unset(&mut self, count: usize) {
        self.len += count;
@ -35,8 +42,75 @@ impl BitSet {
        self.buffer.resize(new_buf_len, 0);
    }

+    /// Appends `count` set bits
+    pub fn append_set(&mut self, count: usize) {
+        let new_len = self.len + count;
+        let new_buf_len = (new_len + 7) >> 3;
+
+        let skew = self.len & 7;
+        if skew != 0 {
+            *self.buffer.last_mut().unwrap() |= 0xFF << skew;
+        }
+
+        self.buffer.resize(new_buf_len, 0xFF);
+
+        let rem = new_len & 7;
+        if rem != 0 {
+            *self.buffer.last_mut().unwrap() &= (1 << rem) - 1;
+        }
+
+        self.len = new_len;
+    }
+
+    /// Truncates the bitset to the provided length
+    pub fn truncate(&mut self, len: usize) {
+        let new_buf_len = (len + 7) >> 3;
+        self.buffer.truncate(new_buf_len);
+        let overrun = len & 7;
+        if overrun > 0 {
+            *self.buffer.last_mut().unwrap() &= (1 << overrun) - 1;
+        }
+        self.len = len;
+    }
+
+    /// Extends this [`BitSet`] by the context of `other`
+    pub fn extend_from(&mut self, other: &BitSet) {
+        self.append_bits(other.len, &other.buffer)
+    }
+
+    /// Extends this [`BitSet`] by `range` elements in `other`
+    pub fn extend_from_range(&mut self, other: &BitSet, range: Range<usize>) {
+        let count = range.end - range.start;
+        if count == 0 {
+            return;
+        }
+
+        let start_byte = range.start >> 3;
+        let end_byte = (range.end + 7) >> 3;
+        let skew = range.start & 7;
+
+        // `append_bits` requires the provided `to_set` to be byte aligned, therefore
+        // if the range being copied is not byte aligned we must first append
+        // the leading bits to reach a byte boundary
+        if skew == 0 {
+            // No skew can simply append bytes directly
+            self.append_bits(count, &other.buffer[start_byte..end_byte])
+        } else if start_byte + 1 == end_byte {
+            // Append bits from single byte
+            self.append_bits(count, &[other.buffer[start_byte] >> skew])
+        } else {
+            // Append trailing bits from first byte to reach byte boundary, then append
+            // bits from the remaining byte-aligned mask
+            let offset = 8 - skew;
+            self.append_bits(offset, &[other.buffer[start_byte] >> skew]);
+            self.append_bits(count - offset, &other.buffer[(start_byte + 1)..end_byte]);
+        }
+    }
+
    /// Appends `count` boolean values from the slice of packed bits
    pub fn append_bits(&mut self, count: usize, to_set: &[u8]) {
+        assert_eq!((count + 7) >> 3, to_set.len());
+
        let new_len = self.len + count;
        let new_buf_len = (new_len + 7) >> 3;
        self.buffer.reserve(new_buf_len - self.buffer.len());
@ -113,12 +187,30 @@ impl BitSet {
    pub fn byte_len(&self) -> usize {
        self.buffer.len()
    }
+
+    /// Return the raw packed bytes used by thie bitset
+    pub fn bytes(&self) -> &[u8] {
+        &self.buffer
+    }
 }

 /// Returns an iterator over set bit positions in increasing order
 pub fn iter_set_positions(bytes: &[u8]) -> impl Iterator<Item = usize> + '_ {
-    let mut byte_idx = 0;
-    let mut in_progress = bytes.get(0).cloned().unwrap_or(0);
+    iter_set_positions_with_offset(bytes, 0)
+}
+
+/// Returns an iterator over set bit positions in increasing order starting
+/// at the provided bit offset
+pub fn iter_set_positions_with_offset(
+    bytes: &[u8],
+    offset: usize,
+) -> impl Iterator<Item = usize> + '_ {
+    let mut byte_idx = offset >> 3;
+    let mut in_progress = bytes.get(byte_idx).cloned().unwrap_or(0);
+
+    let skew = offset & 7;
+    in_progress &= 0xFF << skew;
+
    std::iter::from_fn(move || loop {
        if in_progress != 0 {
            let bit_pos = in_progress.trailing_zeros();
@ -134,7 +226,8 @@ pub fn iter_set_positions(bytes: &[u8]) -> impl Iterator<Item = usize> + '_ {
 mod tests {
    use super::*;
    use arrow::array::BooleanBufferBuilder;
-    use rand::RngCore;
+    use rand::prelude::*;
+    use rand::rngs::OsRng;

    /// Computes a compacted representation of a given bool array
    fn compact_bools(bools: &[bool]) -> Vec<u8> {
@ -201,11 +294,17 @@ mod tests {
        assert!(mask.get(19));
    }

+    fn make_rng() -> StdRng {
+        let seed = OsRng::default().next_u64();
+        println!("Seed: {}", seed);
+        StdRng::seed_from_u64(seed)
+    }
+
    #[test]
    fn test_bit_mask_all_set() {
        let mut mask = BitSet::new();
        let mut all_bools = vec![];
-        let mut rng = rand::thread_rng();
+        let mut rng = make_rng();

        for _ in 0..100 {
            let mask_length = (rng.next_u32() % 50) as usize;
@ -228,7 +327,7 @@ mod tests {
    fn test_bit_mask_fuzz() {
        let mut mask = BitSet::new();
        let mut all_bools = vec![];
-        let mut rng = rand::thread_rng();
+        let mut rng = make_rng();

        for _ in 0..100 {
            let mask_length = (rng.next_u32() % 50) as usize;
@ -247,11 +346,105 @@ mod tests {
        let expected_indexes: Vec<_> = iter_set_bools(&all_bools).collect();
        let actual_indexes: Vec<_> = iter_set_positions(&mask.buffer).collect();
        assert_eq!(expected_indexes, actual_indexes);
+
+        if !all_bools.is_empty() {
+            for _ in 0..10 {
+                let offset = rng.next_u32() as usize % all_bools.len();
+
+                let expected_indexes: Vec<_> = iter_set_bools(&all_bools[offset..])
+                    .map(|x| x + offset)
+                    .collect();
+
+                let actual_indexes: Vec<_> =
+                    iter_set_positions_with_offset(&mask.buffer, offset).collect();
+
+                assert_eq!(expected_indexes, actual_indexes);
+            }
+        }
+
        for index in actual_indexes {
            assert!(mask.get(index));
        }
    }

+    #[test]
+    fn test_append_fuzz() {
+        let mut mask = BitSet::new();
+        let mut all_bools = vec![];
+        let mut rng = make_rng();
+
+        for _ in 0..100 {
+            let len = (rng.next_u32() % 32) as usize;
+            let set = rng.next_u32() & 1 == 0;
+
+            match set {
+                true => mask.append_set(len),
+                false => mask.append_unset(len),
+            }
+
+            all_bools.extend(std::iter::repeat(set).take(len));
+
+            let collected = compact_bools(&all_bools);
+            assert_eq!(mask.buffer, collected);
+        }
+    }
+
+    #[test]
+    fn test_truncate_fuzz() {
+        let mut mask = BitSet::new();
+        let mut all_bools = vec![];
+        let mut rng = make_rng();
+
+        for _ in 0..100 {
+            let mask_length = (rng.next_u32() % 32) as usize;
+            let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
+                .take(mask_length)
+                .collect();
+
+            let collected = compact_bools(&bools);
+            mask.append_bits(mask_length, &collected);
+            all_bools.extend_from_slice(&bools);
+
+            if !all_bools.is_empty() {
+                let truncate = rng.next_u32() as usize % all_bools.len();
+                mask.truncate(truncate);
+                all_bools.truncate(truncate);
+            }
+
+            let collected = compact_bools(&all_bools);
+            assert_eq!(mask.buffer, collected);
+        }
+    }
+
+    #[test]
+    fn test_extend_range_fuzz() {
+        let mut rng = make_rng();
+        let src_len = 32;
+        let src_bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
+            .take(src_len)
+            .collect();
+
+        let mut src_mask = BitSet::new();
+        src_mask.append_bits(src_len, &compact_bools(&src_bools));
+
+        let mut dst_bools = Vec::new();
+        let mut dst_mask = BitSet::new();
+
+        for _ in 0..100 {
+            let a = rng.next_u32() as usize % src_len;
+            let b = rng.next_u32() as usize % src_len;
+
+            let start = a.min(b);
+            let end = a.max(b);
+
+            dst_bools.extend_from_slice(&src_bools[start..end]);
+            dst_mask.extend_from_range(&src_mask, start..end);
+
+            let collected = compact_bools(&dst_bools);
+            assert_eq!(dst_mask.buffer, collected);
+        }
+    }
+
    #[test]
    fn test_arrow_compat() {
        let bools = &[
--- a/arrow_util/src/dictionary.rs
+++ b/arrow_util/src/dictionary.rs
@ -112,6 +112,19 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> StringDictionary<K> {
    pub fn into_inner(self) -> PackedStringArray<K> {
        self.storage
    }
+
+    /// Truncates this dictionary removing all keys larger than `id`
+    pub fn truncate(&mut self, id: K) {
+        let id = id.as_();
+        self.dedup.retain(|k, _| k.as_() <= id);
+        self.storage.truncate(id + 1)
+    }
+
+    /// Clears this dictionary removing all elements
+    pub fn clear(&mut self) {
+        self.storage.clear();
+        self.dedup.clear()
+    }
 }

 fn hash_str(hasher: &ahash::RandomState, value: &str) -> u64 {
@ -142,7 +155,10 @@ impl StringDictionary<i32> {
            array_builder = array_builder.null_bit_buffer(nulls);
        }

-        DictionaryArray::<Int32Type>::from(array_builder.build())
+        // TODO consider skipping the validation checks by using
+        // `build_unchecked()`
+        let array_data = array_builder.build().expect("Valid array data");
+        DictionaryArray::<Int32Type>::from(array_data)
    }
 }

@ -255,4 +271,30 @@ mod test {
        let err = TryInto::<StringDictionary<_>>::try_into(data).expect_err("expected failure");
        assert!(matches!(err, Error::DuplicateKeyFound { key } if &key == "cupcakes"))
    }
+
+    #[test]
+    fn test_truncate() {
+        let mut dictionary = StringDictionary::<i32>::new();
+        dictionary.lookup_value_or_insert("cupcake");
+        dictionary.lookup_value_or_insert("cupcake");
+        dictionary.lookup_value_or_insert("bingo");
+        let bingo = dictionary.lookup_value_or_insert("bingo");
+        let bongo = dictionary.lookup_value_or_insert("bongo");
+        dictionary.lookup_value_or_insert("bingo");
+        dictionary.lookup_value_or_insert("cupcake");
+
+        dictionary.truncate(bingo);
+
+        assert_eq!(dictionary.values().len(), 2);
+        assert_eq!(dictionary.dedup.len(), 2);
+
+        assert_eq!(dictionary.lookup_value("cupcake"), Some(0));
+        assert_eq!(dictionary.lookup_value("bingo"), Some(1));
+
+        assert!(dictionary.lookup_value("bongo").is_none());
+        assert!(dictionary.lookup_id(bongo).is_none());
+
+        dictionary.lookup_value_or_insert("bongo");
+        assert_eq!(dictionary.lookup_value("bongo"), Some(2));
+    }
 }
--- a/arrow_util/src/display.rs
+++ b/arrow_util/src/display.rs
@ -3,8 +3,7 @@ use arrow::datatypes::{DataType, TimeUnit};
 use arrow::error::Result;
 use arrow::record_batch::RecordBatch;

-use prettytable::format;
-use prettytable::{Cell, Row, Table};
+use comfy_table::{Cell, Table};

 use chrono::prelude::*;

@ -54,7 +53,7 @@ fn array_value_to_string(column: &ArrayRef, row: usize) -> Result<String> {
 /// NB: COPIED FROM ARROW
 fn create_table(results: &[RecordBatch]) -> Result<Table> {
    let mut table = Table::new();
-    table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
+    table.load_preset("||--+-++|    ++++++");

    if results.is_empty() {
        return Ok(table);
@ -66,7 +65,7 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {
    for field in schema.fields() {
        header.push(Cell::new(field.name()));
    }
-    table.set_titles(Row::new(header));
+    table.set_header(header);

    for batch in results {
        for row in 0..batch.num_rows() {
@ -75,7 +74,7 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {
                let column = batch.column(col);
                cells.push(Cell::new(&array_value_to_string(column, row)?));
            }
-            table.add_row(Row::new(cells));
+            table.add_row(cells);
        }
    }

--- a/arrow_util/src/optimize.rs
+++ b/arrow_util/src/optimize.rs
@ -297,7 +297,8 @@ mod tests {
        .add_buffer(keys.data().buffers()[0].clone())
        .null_bit_buffer(keys.data().null_buffer().unwrap().clone())
        .add_child_data(values.data().clone())
-        .build();
+        .build()
+        .unwrap();

        DictionaryArray::from(data)
    }
--- a/arrow_util/src/string.rs
+++ b/arrow_util/src/string.rs
@ -3,6 +3,7 @@ use arrow::array::StringArray;
 use arrow::buffer::Buffer;
 use num_traits::{AsPrimitive, FromPrimitive, Zero};
 use std::fmt::Debug;
+use std::ops::Range;

 /// A packed string array that stores start and end indexes into
 /// a contiguous string slice.
@ -62,6 +63,41 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> PackedStringArray<K> {
        id
    }

+    /// Extends this [`PackedStringArray`] by the contents of `other`
+    pub fn extend_from(&mut self, other: &PackedStringArray<K>) {
+        let offset = self.storage.len();
+        self.storage.push_str(other.storage.as_str());
+        // Copy offsets skipping the first element as this string start delimiter is already
+        // provided by the end delimiter of the current offsets array
+        self.offsets.extend(
+            other
+                .offsets
+                .iter()
+                .skip(1)
+                .map(|x| K::from_usize(x.as_() + offset).expect("failed to fit into offset type")),
+        )
+    }
+
+    /// Extends this [`PackedStringArray`] by `range` elements from `other`
+    pub fn extend_from_range(&mut self, other: &PackedStringArray<K>, range: Range<usize>) {
+        let first_offset: usize = other.offsets[range.start].as_();
+        let end_offset: usize = other.offsets[range.end].as_();
+
+        let insert_offset = self.storage.len();
+
+        self.storage
+            .push_str(&other.storage[first_offset..end_offset]);
+
+        self.offsets.extend(
+            (&other.offsets[(range.start + 1)..(range.end + 1)])
+                .iter()
+                .map(|x| {
+                    K::from_usize(x.as_() - first_offset + insert_offset)
+                        .expect("failed to fit into offset type")
+                }),
+        )
+    }
+
    /// Get the value at a given index
    pub fn get(&self, index: usize) -> Option<&str> {
        let start_offset = self.offsets.get(index)?.as_();
@ -76,6 +112,19 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> PackedStringArray<K> {
        self.offsets.resize(self.offsets.len() + len, offset);
    }

+    /// Truncates the array to the given length
+    pub fn truncate(&mut self, len: usize) {
+        self.offsets.truncate(len + 1);
+        let last_idx = self.offsets.last().expect("offsets empty");
+        self.storage.truncate(last_idx.as_());
+    }
+
+    /// Removes all elements from this array
+    pub fn clear(&mut self) {
+        self.offsets.truncate(1);
+        self.storage.clear();
+    }
+
    pub fn iter(&self) -> PackedStringIterator<'_, K> {
        PackedStringIterator {
            array: self,
@ -112,8 +161,10 @@ impl PackedStringArray<i32> {
            .len(len)
            .add_buffer(offsets)
            .add_buffer(values)
-            .build();
-
+            .build()
+            // TODO consider skipping the validation checks by using
+            // `new_unchecked`
+            .expect("Valid array data");
        StringArray::from(data)
    }
 }
@ -170,4 +221,91 @@ mod tests {
        assert_eq!(array.get(9).unwrap(), "");
        assert_eq!(array.get(3).unwrap(), "");
    }
+
+    #[test]
+    fn test_truncate() {
+        let mut array = PackedStringArray::<i32>::new();
+
+        array.append("hello");
+        array.append("world");
+        array.append("cupcake");
+
+        array.truncate(1);
+        assert_eq!(array.len(), 1);
+        assert_eq!(array.get(0).unwrap(), "hello");
+
+        array.append("world");
+        assert_eq!(array.len(), 2);
+        assert_eq!(array.get(0).unwrap(), "hello");
+        assert_eq!(array.get(1).unwrap(), "world");
+    }
+
+    #[test]
+    fn test_extend_from() {
+        let mut a = PackedStringArray::<i32>::new();
+
+        a.append("hello");
+        a.append("world");
+        a.append("cupcake");
+        a.append("");
+
+        let mut b = PackedStringArray::<i32>::new();
+
+        b.append("foo");
+        b.append("bar");
+
+        a.extend_from(&b);
+
+        let a_content: Vec<_> = a.iter().collect();
+        assert_eq!(
+            a_content,
+            vec!["hello", "world", "cupcake", "", "foo", "bar"]
+        );
+    }
+
+    #[test]
+    fn test_extend_from_range() {
+        let mut a = PackedStringArray::<i32>::new();
+
+        a.append("hello");
+        a.append("world");
+        a.append("cupcake");
+        a.append("");
+
+        let mut b = PackedStringArray::<i32>::new();
+
+        b.append("foo");
+        b.append("bar");
+        b.append("");
+        b.append("fiz");
+
+        a.extend_from_range(&b, 1..3);
+
+        assert_eq!(a.len(), 6);
+
+        let a_content: Vec<_> = a.iter().collect();
+        assert_eq!(a_content, vec!["hello", "world", "cupcake", "", "bar", ""]);
+
+        // Should be a no-op
+        a.extend_from_range(&b, 0..0);
+
+        let a_content: Vec<_> = a.iter().collect();
+        assert_eq!(a_content, vec!["hello", "world", "cupcake", "", "bar", ""]);
+
+        a.extend_from_range(&b, 0..1);
+
+        let a_content: Vec<_> = a.iter().collect();
+        assert_eq!(
+            a_content,
+            vec!["hello", "world", "cupcake", "", "bar", "", "foo"]
+        );
+
+        a.extend_from_range(&b, 1..4);
+
+        let a_content: Vec<_> = a.iter().collect();
+        assert_eq!(
+            a_content,
+            vec!["hello", "world", "cupcake", "", "bar", "", "foo", "bar", "", "fiz"]
+        );
+    }
 }
--- a/client_util/Cargo.toml
+++ b/client_util/Cargo.toml
@ -3,7 +3,7 @@ name = "client_util"
 version = "0.1.0"
 authors = ["Raphael Taylor-Davies <r.taylordavies@googlemail.com>"]
 description = "Shared code for IOx clients"
-edition = "2018"
+edition = "2021"

 [dependencies]
 http = "0.2.3"
@ -13,4 +13,4 @@ tonic = { version = "0.5.0" }
 tower = "0.4"

 [dev-dependencies]
-tokio = { version = "1.11", features = ["macros"] }
+tokio = { version = "1.11", features = ["macros"] }
--- a/data_types/Cargo.toml
+++ b/data_types/Cargo.toml
@ -3,7 +3,7 @@ name = "data_types"
 version = "0.1.0"
 authors = ["pauldix <paul@pauldix.net>"]
 description = "InfluxDB IOx data_types, shared between IOx instances and IOx clients"
-edition = "2018"
+edition = "2021"
 readme = "README.md"

 [dependencies] # In alphabetical order
--- a/data_types/src/detailed_database.rs
+++ b/data_types/src/detailed_database.rs
@ -1,34 +1,11 @@
 use crate::DatabaseName;
-use chrono::{DateTime, Utc};
-use std::{fmt, str::FromStr};
+use uuid::Uuid;

-/// Detailed metadata about a database.
+/// Detailed metadata about an active database.
 #[derive(Debug, Clone, PartialEq)]
-pub struct DetailedDatabase {
+pub struct ActiveDatabase {
    /// The name of the database
    pub name: DatabaseName<'static>,
-    /// The generation ID of the database in object storage
-    pub generation_id: GenerationId,
-    /// The UTC datetime at which this database was deleted, if applicable
-    pub deleted_at: Option<DateTime<Utc>>,
-}
-
-/// Identifier for a generation of a particular database
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
-pub struct GenerationId {
-    pub inner: usize,
-}
-
-impl FromStr for GenerationId {
-    type Err = std::num::ParseIntError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        Ok(Self { inner: s.parse()? })
-    }
-}
-
-impl fmt::Display for GenerationId {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.inner)
-    }
+    /// The UUID of the database
+    pub uuid: Uuid,
 }
--- a/data_types/src/partition_metadata.rs
+++ b/data_types/src/partition_metadata.rs
@ -421,6 +421,19 @@ impl<T> Default for StatValues<T> {
    }
 }

+impl<T> StatValues<T> {
+    /// Create new statistics with no values
+    pub fn new_empty() -> Self {
+        Self {
+            min: None,
+            max: None,
+            total_count: 0,
+            null_count: 0,
+            distinct_count: None,
+        }
+    }
+}
+
 impl<T> StatValues<T>
 where
    T: Clone + PartialEq + PartialOrd + IsNan,
@ -440,14 +453,14 @@ where
        Self::new_with_distinct(min, max, total_count, null_count, distinct_count)
    }

-    /// Create new statitics with the specified count and null count
+    /// Create new statistics with the specified count and null count
    pub fn new(min: Option<T>, max: Option<T>, total_count: u64, null_count: u64) -> Self {
        let distinct_count = None;
        Self::new_with_distinct(min, max, total_count, null_count, distinct_count)
    }

-    /// Create new statitics with the specified count and null count and distinct values
-    fn new_with_distinct(
+    /// Create new statistics with the specified count and null count and distinct values
+    pub fn new_with_distinct(
        min: Option<T>,
        max: Option<T>,
        total_count: u64,
@ -481,12 +494,21 @@ where
    }

    /// Create statistics for a column that only has nulls up to now
-    pub fn new_all_null(total_count: u64) -> Self {
+    pub fn new_all_null(total_count: u64, distinct_count: Option<u64>) -> Self {
        let min = None;
        let max = None;
        let null_count = total_count;
-        let distinct_count = NonZeroU64::new(1);
-        Self::new_with_distinct(min, max, total_count, null_count, distinct_count)
+
+        if let Some(count) = distinct_count {
+            assert!(count > 0);
+        }
+        Self::new_with_distinct(
+            min,
+            max,
+            total_count,
+            null_count,
+            distinct_count.map(|c| NonZeroU64::new(c).unwrap()),
+        )
    }

    pub fn update_from(&mut self, other: &Self) {
@ -647,6 +669,8 @@ impl IsNan for f64 {

 #[cfg(test)]
 mod tests {
+    use std::convert::TryFrom;
+
    use super::*;

    #[test]
@ -664,13 +688,25 @@ mod tests {

    #[test]
    fn statistics_new_all_null() {
-        let actual = StatValues::<i64>::new_all_null(3);
+        // i64 values do not have a distinct count
+        let actual = StatValues::<i64>::new_all_null(3, None);
        let expected = StatValues {
            min: None,
            max: None,
            total_count: 3,
            null_count: 3,
-            distinct_count: NonZeroU64::new(1),
+            distinct_count: None,
+        };
+        assert_eq!(actual, expected);
+
+        // string columns can have a distinct count
+        let actual = StatValues::<i64>::new_all_null(3, Some(1_u64));
+        let expected = StatValues {
+            min: None,
+            max: None,
+            total_count: 3,
+            null_count: 3,
+            distinct_count: Some(NonZeroU64::try_from(1_u64).unwrap()),
        };
        assert_eq!(actual, expected);
    }
--- a/data_types/src/write_summary.rs
+++ b/data_types/src/write_summary.rs
@ -66,4 +66,9 @@ impl TimestampSummary {
        self.counts[timestamp.minute() as usize] += 1;
        self.stats.update(&timestamp.timestamp_nanos())
    }
+
+    /// Records a timestamp value from nanos
+    pub fn record_nanos(&mut self, timestamp_nanos: i64) {
+        self.record(Time::from_timestamp_nanos(timestamp_nanos))
+    }
 }
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@ -2,11 +2,11 @@
 name = "datafusion"
 version = "0.1.0"
 authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
-edition = "2018"
+edition = "2021"
 description = "Re-exports datafusion at a specific version"

 [dependencies]

 # Rename to workaround doctest bug
 # Turn off optional datafusion features (e.g. don't get support for crypo functions or avro)
-upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="2454e468641d4d98af211c2800c0afec2732385b", default-features = false, package = "datafusion" }
+upstream = { git = "https://github.com/apache/arrow-datafusion.git", rev="57d7777fc0ce94e783a7f447631624c354b0b906", default-features = false, package = "datafusion" }
--- a/datafusion_util/Cargo.toml
+++ b/datafusion_util/Cargo.toml
@ -2,7 +2,7 @@
 name = "datafusion_util"
 version = "0.1.0"
 authors = ["Andrew Lamb <andrew@nerdnetworks.org>"]
-edition = "2018"
+edition = "2021"
 description = "Datafusion utilities"

 [dependencies]
--- a/docker/Dockerfile.ci.integration
+++ b/docker/Dockerfile.ci.integration
@ -21,4 +21,4 @@ ENV TEST_INTEGRATION=1
 ENV KAFKA_CONNECT=kafka:9092

 # Run the integration tests that connect to Kafka that will be running in another container
-CMD ["sh", "-c", "./docker/integration_test.sh"]
+CMD ["sh", "-c", "cargo test -p write_buffer kafka -- --nocapture"]
--- a/docker/integration_test.sh
+++ b/docker/integration_test.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-
-set -euxo pipefail
-
-cargo test -p write_buffer kafka -- --nocapture
-cargo test -p influxdb_iox --test end_to_end skip_replay -- --nocapture
-cargo test -p influxdb_iox --test end_to_end write_buffer -- --nocapture
--- a/docs/README.md
+++ b/docs/README.md
@ -6,7 +6,7 @@ not intended to be general user facing documentation

 ## IOx Tech Talks

-We hold monthly Tech Talks that explain the project's technical underpinnings. You can register for the [InfluxDB IOx Tech Talks here](https://www.influxdata.com/community-showcase/influxdb-tech-talks/), or you can find links to previous sessions below:
+We hold monthly Tech Talks that explain the project's technical underpinnings. You can register for the [InfluxDB IOx Tech Talks here](https://www.influxdata.com/community-showcase/influxdb-tech-talks/), or you can find links to previous sessions below or in the [YouTube playlist](https://www.youtube.com/playlist?list=PLYt2jfZorkDp-PKBS05kf2Yx2NrRyPAAz):

 * December 2020: Rusty Introduction to Apache Arrow [recording](https://www.youtube.com/watch?v=dQFjKa9vKhM)
 * Jan 2021: Data Lifecycle in InfluxDB IOx & How it Uses Object Storage for Persistence [recording](https://www.youtube.com/watch?v=KwdPifHC1Gc)
@ -16,7 +16,8 @@ We hold monthly Tech Talks that explain the project's technical underpinnings. Y
 * May 2021: Catalogs - Turning a Set of Parquet Files into a Data Set [recording](https://www.youtube.com/watch?v=Zaei3l3qk0c), [slides](https://www.slideshare.net/influxdata/catalogs-turning-a-set-of-parquet-files-into-a-data-set)
 * June 2021: Performance Profiling in Rust  [recording](https://www.youtube.com/watch?v=_ZNcg-nAVTM), [slides](https://www.slideshare.net/influxdata/performance-profiling-in-rust)
 * July 2021: Impacts of Sharding, Partitioning, Encoding & Sorting on Distributed Query Performance [recording](https://www.youtube.com/watch?v=VHYMpItvBZQ), [slides](https://www.slideshare.net/influxdata/impacts-of-sharding-partitioning-encoding-and-sorting-on-distributed-query-performance)
-
+* September 2021: Observability of InfluxDB IOx Tracing, Metrics and System Tables [recording](https://www.youtube.com/watch?v=tB-umdJCJQc)
+* October 2021: Query Processing in InfluxDB IOx [recording](https://www.youtube.com/watch?v=9DYkWuM8xco)


 ## Table of Contents:
--- a/docs/env.example
+++ b/docs/env.example
@ -7,7 +7,7 @@
 # The full list of available configuration values can be found by in
 # the command line help (e.g. `env: INFLUXDB_IOX_DB_DIR=`):
 #
-# ./influxdb_iox run --help
+# ./influxdb_iox run database --help
 #
 #
 # The identifier for the server. Used for writing to object storage and as
@ -58,4 +58,4 @@
 # To enable Jaeger tracing:
 # OTEL_SERVICE_NAME="iox" # defaults to iox
 # OTEL_EXPORTER_JAEGER_AGENT_HOST="jaeger.influxdata.net"
-# OTEL_EXPORTER_JAEGER_AGENT_PORT="6831"
+# OTEL_EXPORTER_JAEGER_AGENT_PORT="6831"
--- a/docs/logging.md
+++ b/docs/logging.md
@ -14,21 +14,21 @@ Some examples

 ```bash
 # Default verbosity
-$ ./influxdb_iox run
+$ ./influxdb_iox run database
 # More verbose
-$ ./influxdb_iox run -v
+$ ./influxdb_iox run database -v
 # Even more verbose
-$ ./influxdb_iox run -vv
+$ ./influxdb_iox run database -vv
 # Everything!!
-$ ./influxdb_iox run --log-filter trace
+$ ./influxdb_iox run database --log-filter trace
 # Default info, but debug within http module
-$ ./influxdb_iox run --log-filter info,influxdb_iox::influxdb_ioxd::http=debug
+$ ./influxdb_iox run database --log-filter info,influxdb_iox::influxdb_ioxd::http=debug
 ```

 Additionally, the output format can be controlled with `--log-format`

 ```bash
-$ ./influxdb_iox run --log-filter debug --log-format logfmt
+$ ./influxdb_iox run database --log-filter debug --log-format logfmt
 ```

 ## Developer Guide
@ -69,7 +69,7 @@ will strip out all trace level callsites from the release binary.

 ### Format

-IOx supports logging in many formats. For a list run `influxdb_iox run --help` and view the help output
+IOx supports logging in many formats. For a list run `influxdb_iox run database --help` and view the help output
 for `--log-format`.

 <sup>1.</sup> This span propagation uses thread-local storage and therefore does not automatically carry across
--- a/docs/server_startup.md
+++ b/docs/server_startup.md
@ -3,10 +3,10 @@
 An IOx node can be started from the command line:

 ```shell
-influxdb_iox run
+influxdb_iox run database
 ```

-See help (via `influxdb_iox run --help`) for arguments.
+See help (via `influxdb_iox run database --help`) for arguments.


 ## Server ID
--- a/docs/testing.md
+++ b/docs/testing.md
@ -87,14 +87,14 @@ set.

 ### Configuration differences when running the tests

-When running `influxdb_iox run`, you can pick one object store to use. When running the tests,
+When running `influxdb_iox run database`, you can pick one object store to use. When running the tests,
 you can run them against all the possible object stores. There's still only one
 `INFLUXDB_IOX_BUCKET` variable, though, so that will set the bucket name for all configured object
 stores. Use the same bucket name when setting up the different services.

 Other than possibly configuring multiple object stores, configuring the tests to use the object
 store services is the same as configuring the server to use an object store service. See the output
-of `influxdb_iox run --help` for instructions.
+of `influxdb_iox run database --help` for instructions.

 ## InfluxDB 2 Client

@ -136,7 +136,7 @@ You can then run the tests with `KAFKA_CONNECT=localhost:9093`. To run just the
 tests, the full command would then be:

 ```
-TEST_INTEGRATION=1 KAFKA_CONNECT=localhost:9093 cargo test -p influxdb_iox --test end_to_end write_buffer
+TEST_INTEGRATION=1 KAFKA_CONNECT=localhost:9093 cargo test -p write_buffer kafka --nocapture
 ```

 ### Running `cargo test` in a Docker container
--- a/entry/Cargo.toml
+++ b/entry/Cargo.toml
@ -2,7 +2,7 @@
 name = "entry"
 version = "0.1.0"
 authors = ["Paul Dix <paul@pauldix.net>"]
-edition = "2018"
+edition = "2021"
 description = "The entry format used by the write buffer"

 [dependencies]
--- a/generated_types/Cargo.toml
+++ b/generated_types/Cargo.toml
@ -2,7 +2,7 @@
 name = "generated_types"
 version = "0.1.0"
 authors = ["Paul Dix <paul@pauldix.net>"]
-edition = "2018"
+edition = "2021"

 [dependencies] # In alphabetical order
 bytes = "1.0"
@ -13,7 +13,6 @@ pbjson-types = "0.1"
 prost = "0.8"
 regex = "1.4"
 serde = { version = "1.0", features = ["derive"] }
-thiserror = "1.0.30"
 tonic = "0.5"
 time = { path = "../time" }

@ -29,4 +28,4 @@ pbjson-build = "0.1"

 [features]
 default = []
-data_types_conversions = ["data_types"]
+data_types_conversions = ["data_types"]
--- a/generated_types/build.rs
+++ b/generated_types/build.rs
@ -21,37 +21,44 @@ fn main() -> Result<()> {
 /// - `com.github.influxdata.idpe.storage.read.rs`
 /// - `influxdata.iox.catalog.v1.rs`
 /// - `influxdata.iox.management.v1.rs`
+/// - `influxdata.iox.router.v1.rs`
 /// - `influxdata.iox.write.v1.rs`
 /// - `influxdata.platform.storage.rs`
 fn generate_grpc_types(root: &Path) -> Result<()> {
-    let storage_path = root.join("influxdata/platform/storage");
-    let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
    let catalog_path = root.join("influxdata/iox/catalog/v1");
+    let idpe_path = root.join("com/github/influxdata/idpe/storage/read");
    let management_path = root.join("influxdata/iox/management/v1");
+    let router_path = root.join("influxdata/iox/router/v1");
+    let storage_path = root.join("influxdata/platform/storage");
    let write_path = root.join("influxdata/iox/write/v1");

    let proto_files = vec![
-        storage_path.join("test.proto"),
-        storage_path.join("predicate.proto"),
-        storage_path.join("storage_common.proto"),
-        storage_path.join("service.proto"),
-        storage_path.join("storage_common_idpe.proto"),
-        idpe_path.join("source.proto"),
        catalog_path.join("catalog.proto"),
        catalog_path.join("parquet_metadata.proto"),
        catalog_path.join("predicate.proto"),
-        management_path.join("database_rules.proto"),
+        idpe_path.join("source.proto"),
        management_path.join("chunk.proto"),
+        management_path.join("database_rules.proto"),
+        management_path.join("jobs.proto"),
        management_path.join("partition.proto"),
+        management_path.join("partition_template.proto"),
+        management_path.join("server_config.proto"),
        management_path.join("service.proto"),
        management_path.join("shard.proto"),
-        management_path.join("jobs.proto"),
-        write_path.join("service.proto"),
-        root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"),
-        root.join("grpc/health/v1/service.proto"),
+        management_path.join("write_buffer.proto"),
        root.join("google/longrunning/operations.proto"),
        root.join("google/rpc/error_details.proto"),
        root.join("google/rpc/status.proto"),
+        root.join("grpc/health/v1/service.proto"),
+        root.join("influxdata/pbdata/v1/influxdb_pb_data_protocol.proto"),
+        router_path.join("router.proto"),
+        router_path.join("service.proto"),
+        storage_path.join("predicate.proto"),
+        storage_path.join("service.proto"),
+        storage_path.join("storage_common.proto"),
+        storage_path.join("storage_common_idpe.proto"),
+        storage_path.join("test.proto"),
+        write_path.join("service.proto"),
    ];

    // Tell cargo to recompile if any of these proto files are changed
--- a/generated_types/protos/influxdata/iox/management/v1/database_rules.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/database_rules.proto
@ -3,34 +3,9 @@ package influxdata.iox.management.v1;
 option go_package = "github.com/influxdata/iox/management/v1";

 import "google/protobuf/duration.proto";
-import "google/protobuf/empty.proto";
+import "influxdata/iox/management/v1/partition_template.proto";
 import "influxdata/iox/management/v1/shard.proto";
-
-// `PartitionTemplate` is used to compute the partition key of each row that
-// gets written. It can consist of the table name, a column name and its value,
-// a formatted time, or a string column and regex captures of its value. For
-// columns that do not appear in the input row, a blank value is output.
-//
-// The key is constructed in order of the template parts; thus ordering changes
-// what partition key is generated.
-message PartitionTemplate {
-  message Part {
-    message ColumnFormat {
-      string column = 1;
-      string format = 2;
-    }
-
-    oneof part {
-      google.protobuf.Empty table = 1;
-      string column = 2;
-      string time = 3;
-      ColumnFormat regex = 4;
-      ColumnFormat strf_time = 5;
-    }
-  }
-
-  repeated Part parts = 1;
-}
+import "influxdata/iox/management/v1/write_buffer.proto";

 message LifecycleRules {
  // Once the total amount of buffered data in memory reaches this size start
@ -111,6 +86,9 @@ message LifecycleRules {
  uint64 parquet_cache_limit = 17;
 }

+// Database rules.
+//
+// TODO(marco): add `WriteSources` to this message.
 message DatabaseRules {
  // The unencoded name of the database
  //
@ -128,6 +106,8 @@ message DatabaseRules {
  LifecycleRules lifecycle_rules = 3;

  // If not specified, does not configure any routing
+  //
+  // TODO(marco): remove this
  oneof routing_rules {
    // Shard config
    ShardConfig shard_config = 8;
@ -146,6 +126,8 @@ message DatabaseRules {
  // Optionally, the connection for the write buffer for writing or reading/restoring data.
  //
  // If not specified, does not configure a write buffer
+  //
+  // TODO(marco): remove this
  WriteBufferConnection write_buffer_connection = 13;
 }

@ -158,61 +140,6 @@ message PersistedDatabaseRules {
  DatabaseRules rules = 2;
 }

-// Configures the use of a write buffer.
-message WriteBufferConnection {
-  enum Direction {
-    // Unspecified direction, will be treated as an error.
-    DIRECTION_UNSPECIFIED = 0;
-
-    // Writes into the buffer aka "producer".
-    DIRECTION_WRITE = 1;
-
-    // Reads from the buffer aka "consumer".
-    DIRECTION_READ = 2;
-  }
-
-  // If the buffer is used for reading or writing.
-  Direction direction = 1;
-
-  // Which type should be used (e.g. "kafka", "mock")
-  string type = 2;
-
-  // Connection string, depends on `type`.
-  string connection = 3;
-
-  // Old non-nested auto-creation config.
-  reserved 4, 5, 7;
-
-  // Special configs to be applied when establishing the connection.
-  //
-  // This depends on `type` and can configure aspects like timeouts.
-  map<string, string> connection_config = 6;
-
-  // Specifies if the sequencers (e.g. for Kafka in form of a topic w/ `n_sequencers` partitions) should be
-  // automatically created if they do not existing prior to reading or writing.
-  WriteBufferCreationConfig creation_config = 8;
-}
-
-// Configs sequencer auto-creation for write buffers.
-//
-// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/ `n_sequencers`
-// partitions.
-message WriteBufferCreationConfig {
-  // Number of sequencers.
-  //
-  // How they are implemented depends on `type`, e.g. for Kafka this is mapped to the number of partitions.
-  //
-  // If 0, a server-side default is used
-  uint32 n_sequencers = 1;
-
-  // Special configs to by applied when sequencers are created.
-  //
-  // This depends on `type` and can setup parameters like retention policy.
-  //
-  // Contains 0 or more key value pairs
-  map<string, string> options = 2;
-}
-
 message RoutingConfig {
  Sink sink = 2;
 }
--- a/generated_types/protos/influxdata/iox/management/v1/partition_template.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/partition_template.proto
@ -0,0 +1,31 @@
+syntax = "proto3";
+package influxdata.iox.management.v1;
+option go_package = "github.com/influxdata/iox/management/v1";
+
+import "google/protobuf/empty.proto";
+
+// `PartitionTemplate` is used to compute the partition key of each row that
+// gets written. It can consist of the table name, a column name and its value,
+// a formatted time, or a string column and regex captures of its value. For
+// columns that do not appear in the input row, a blank value is output.
+//
+// The key is constructed in order of the template parts; thus ordering changes
+// what partition key is generated.
+message PartitionTemplate {
+  message Part {
+    message ColumnFormat {
+      string column = 1;
+      string format = 2;
+    }
+
+    oneof part {
+      google.protobuf.Empty table = 1;
+      string column = 2;
+      string time = 3;
+      ColumnFormat regex = 4;
+      ColumnFormat strf_time = 5;
+    }
+  }
+
+  repeated Part parts = 1;
+}
--- a/generated_types/protos/influxdata/iox/management/v1/server_config.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/server_config.proto
@ -0,0 +1,23 @@
+syntax = "proto3";
+package influxdata.iox.management.v1;
+option go_package = "github.com/influxdata/iox/management/v1";
+
+// Stores a server's map of the databases it owns. The keys are the database names and the values
+// are the database's location in object storage.
+//
+// Example (current): "foo" => "/1/foo" ("/[server id]/[database name]")
+// Example (after completing the switch to floating databases):
+//         "foo" => "/dbs/3f25185a-0773-4ae8-abda-f9c3786f242b" ("/dbs/[database uuid]")
+message ServerConfig {
+  map<string, string> databases = 1;
+}
+
+// Stores information about a server that owns a database. To be stored in a database's object
+// store directory as verification of ownership.
+message OwnerInfo {
+  // The ID of the server that owns this database
+  uint32 id = 1;
+
+  // The path to this server's config file in object storage
+  string location = 2;
+}
--- a/generated_types/protos/influxdata/iox/management/v1/service.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/service.proto
@ -37,10 +37,7 @@ service ManagementService {

  rpc RestoreDatabase(RestoreDatabaseRequest) returns (RestoreDatabaseResponse);

-  // List deleted databases and their metadata.
-  rpc ListDeletedDatabases(ListDeletedDatabasesRequest) returns (ListDeletedDatabasesResponse);
-
-  // List all databases and their metadata.
+  // List databases with their metadata.
  rpc ListDetailedDatabases(ListDetailedDatabasesRequest) returns (ListDetailedDatabasesResponse);

  // List chunks available on this database
@ -160,7 +157,9 @@ message CreateDatabaseRequest {
  DatabaseRules rules = 1;
 }

-message CreateDatabaseResponse {}
+message CreateDatabaseResponse {
+  bytes uuid = 1;
+}

 // Update a database.
 message UpdateDatabaseRequest {
@ -177,24 +176,24 @@ message DeleteDatabaseRequest {
  string db_name = 1;
 }

-message DeleteDatabaseResponse {}
+message DeleteDatabaseResponse {
+  bytes uuid = 1;
+}

 message RestoreDatabaseRequest {
-  // The generation ID of the deleted database.
-  uint64 generation_id = 1;
+  // Was the generation ID of the deleted database.
+  reserved 1;
+  reserved "generation_id";

  // the name of the database
  string db_name = 2;
+
+  // The UUID of the deleted database.
+  string uuid = 3;
 }

 message RestoreDatabaseResponse {}

-message ListDeletedDatabasesRequest {}
-
-message ListDeletedDatabasesResponse {
-  repeated DetailedDatabase deleted_databases = 1;
-}
-
 message ListDetailedDatabasesRequest {}

 message ListDetailedDatabasesResponse {
@ -203,14 +202,19 @@ message ListDetailedDatabasesResponse {

 // This resource represents detailed information about a database.
 message DetailedDatabase {
-  // The generation ID of the database.
-  uint64 generation_id = 1;
+  // Was the generation ID of the database.
+  reserved 1;
+  reserved "generation_id";

-  // The UTC datetime at which this database was deleted, if applicable.
-  google.protobuf.Timestamp deleted_at = 2;
+  // Was the datetime at which this database was deleted, if applicable.
+  reserved 2;
+  reserved "deleted_at";

  // The name of the database.
  string db_name = 3;
+
+  // The UUID of the database.
+  bytes uuid = 4;
 }

 message ListChunksRequest {
@ -403,6 +407,9 @@ message DatabaseStatus {
    // No active database
    DATABASE_STATE_NO_ACTIVE_DATABASE = 10;

+    // Database owner info has been loaded
+    DATABASE_STATE_OWNER_INFO_LOADED = 11;
+
    // Rules are loaded
    DATABASE_STATE_RULES_LOADED = 2;

@ -415,13 +422,16 @@ message DatabaseStatus {
    // Error loading rules
    DATABASE_STATE_RULES_LOAD_ERROR = 5;

+    // Error loading owner info
+    DATABASE_STATE_OWNER_INFO_LOAD_ERROR = 12;
+
    // Error during catalog load
    DATABASE_STATE_CATALOG_LOAD_ERROR = 6;

    // Error during replay
    DATABASE_STATE_REPLAY_ERROR = 7;

-    // Error encountered identifying active generation
+    // Error encountered finding the database's directory in object storage
    DATABASE_STATE_DATABASE_OBJECT_STORE_LOOKUP_ERROR = 9;
  }

--- a/generated_types/protos/influxdata/iox/management/v1/shard.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/shard.proto
@ -30,10 +30,14 @@ message ShardConfig {
  /// If set to true the router will ignore any errors sent by the remote
  /// targets in this route. That is, the write request will succeed
  /// regardless of this route's success.
+  ///
+  /// TODO(marco): remove this
  bool ignore_errors = 3;

  /// Mapping between shard IDs and node groups. Other sharding rules use
  /// ShardId as targets.
+  ///
+  /// TODO(marco): remove this
  map<uint32, Sink> shards = 4;
 }

--- a/generated_types/protos/influxdata/iox/management/v1/write_buffer.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/write_buffer.proto
@ -0,0 +1,58 @@
+syntax = "proto3";
+package influxdata.iox.management.v1;
+option go_package = "github.com/influxdata/iox/management/v1";
+
+// Configures the use of a write buffer.
+message WriteBufferConnection {
+  enum Direction {
+    // Unspecified direction, will be treated as an error.
+    DIRECTION_UNSPECIFIED = 0;
+
+    // Writes into the buffer aka "producer".
+    DIRECTION_WRITE = 1;
+
+    // Reads from the buffer aka "consumer".
+    DIRECTION_READ = 2;
+  }
+
+  // If the buffer is used for reading or writing.
+  Direction direction = 1;
+
+  // Which type should be used (e.g. "kafka", "mock")
+  string type = 2;
+
+  // Connection string, depends on `type`.
+  string connection = 3;
+
+  // Old non-nested auto-creation config.
+  reserved 4, 5, 7;
+
+  // Special configs to be applied when establishing the connection.
+  //
+  // This depends on `type` and can configure aspects like timeouts.
+  map<string, string> connection_config = 6;
+
+  // Specifies if the sequencers (e.g. for Kafka in form of a topic w/ `n_sequencers` partitions) should be
+  // automatically created if they do not existing prior to reading or writing.
+  WriteBufferCreationConfig creation_config = 8;
+}
+
+// Configs sequencer auto-creation for write buffers.
+//
+// What that means depends on the used write buffer, e.g. for Kafka this will create a new topic w/ `n_sequencers`
+// partitions.
+message WriteBufferCreationConfig {
+  // Number of sequencers.
+  //
+  // How they are implemented depends on `type`, e.g. for Kafka this is mapped to the number of partitions.
+  //
+  // If 0, a server-side default is used
+  uint32 n_sequencers = 1;
+
+  // Special configs to by applied when sequencers are created.
+  //
+  // This depends on `type` and can setup parameters like retention policy.
+  //
+  // Contains 0 or more key value pairs
+  map<string, string> options = 2;
+}
--- a/generated_types/protos/influxdata/iox/router/v1/router.proto
+++ b/generated_types/protos/influxdata/iox/router/v1/router.proto
@ -0,0 +1,148 @@
+syntax = "proto3";
+package influxdata.iox.router.v1;
+option go_package = "github.com/influxdata/iox/router/v1";
+
+import "influxdata/iox/management/v1/partition_template.proto";
+import "influxdata/iox/management/v1/shard.proto";
+import "influxdata/iox/management/v1/write_buffer.proto";
+
+// Router for writes and queries.
+//
+// A router acts similar to a "real" database except that it does NOT store or manage any data by itself but forwards
+// this responsiblity to other nodes (which then in turn provide an actual database or another routing layer).
+//
+// # Write Routing
+//
+// ## Overall Picture
+// Data is accepted from all sources, is sharded, and is (according to the sharding) written into the sink sets. There
+// may be a prioritization for sources that is "HTTP and gRPC first, and write buffers in declared order".
+//
+// ```text
+//           ( HTTP )--+        +------->( sink set 1 )
+//                     |        |
+//           ( gRPC )--+-->( sharder )--> ...
+//                     |        |
+// ( Write Buffer 1 )--+        +------->( sink set n )
+//        ...          |
+// ( Write Buffer n )--+
+// ```
+//
+// ## Sharder
+// A sharder takes data and for every row/line:
+//
+// 1. Checks if a matcher matches the row, first matcher wins. If that's the case, the row/line is directly sent to the
+//    sink set.
+// 2. If no matcher matches the row/line is handled by the hash ring.
+//
+// ```text
+// --->[ matcher 1? ]-{no}---...--->[ matcher n? ]-{no}---+
+//           |                            |               |
+//         {yes}                        {yes}             |
+//           |                            |               |
+//           V                            V               |
+//     ( sink set 1 )               ( sink set n )        |
+//           ^                            ^               |
+//           |                            |               |
+//           +--------( hash ring )-------+               |
+//                          ^                             |
+//                          |                             |
+//                          +-----------------------------+
+// ```
+//
+// ## Sink Set
+// Data is written to all sinks in the set in implementation-defined order. Errors do NOT short-circuit. If an error
+// occurs for at least one sink that has `ignore_errors = false`, an error is returned. An empty sink set acts as NULL
+// sink and always succeeds.
+//
+// **IMPORTANT: Queries are NOT distributed! The are always only answered by a single node.**
+//
+// # Query Routing
+// Queries always arrive via gRPC and are forwarded one sink. The specific sink is selected via an engine that might
+// take the following features into account:
+//
+// - **freshness:** For each sink what are the lasted sequence numbers pulled from the write buffer.
+// - **stickyness:** The same client should ideally reach the same sink in subsequent requests to improve caching.
+// - **circuit breaking:** If a sink is unhealthy it should be excluded from the candidate list for a while.
+//
+// ```text
+// ( gRPC )-->[ selection engine ]-->( sink 1 )
+//                             |        ...
+//                             +---->( sink n )
+// ```
+message Router {
+  // Router name.
+  //
+  // The name is unique for this node.
+  string name = 1;
+
+  // Sources of write requests.
+  WriteSources write_sources = 2;
+
+  // Write sharder.
+  //
+  // NOTE: This only uses the `specific_targets` and `hash_ring` config of the sharder. The other fields are ignored.
+  //
+  // TODO(marco): remove the note above once the `ShardConfig` was cleaned up.
+  influxdata.iox.management.v1.ShardConfig write_sharder = 3;
+
+  // Sinks for write requests.
+  map<uint32, WriteSinkSet> write_sinks = 4;
+
+  // Sinks for query requests.
+  QuerySinks query_sinks = 5;
+
+  // Template that generates a partition key for each row inserted into the database.
+  //
+  // This is a temporary config until the partition is moved entirely into the database.
+  //
+  // If not specified, a server-side default is used
+  //
+  // TODO(marco): remove this
+  influxdata.iox.management.v1.PartitionTemplate partition_template = 6;
+}
+
+// Sources of write request aka new data.
+//
+// Data is accepted from these sources and a status is provided back to it.
+message WriteSources {
+  // If set writes via gRPC and HTTP are accepted.
+  //
+  // You may want to disable this when incoming data should solely be received via write buffer(s).
+  bool allow_unsequenced_inputs = 2;
+
+  // Write buffer connections.
+  repeated influxdata.iox.management.v1.WriteBufferConnection write_buffers = 3;
+}
+
+// Sink of write requests aka new data.
+//
+// Data is sent to this sink and a status is received from it.
+message WriteSink {
+  // Where the data goes.
+  oneof sink {
+    // gRPC-based remote, addressed by its server ID.
+    uint32 grpc_remote = 1;
+
+    // Write buffer connection.
+    influxdata.iox.management.v1.WriteBufferConnection write_buffer = 2;
+  }
+
+  // If set, errors during writing to this sink are ignored and do NOT lead to an overall failure.
+  bool ignore_errors = 3;
+}
+
+// Set of write sinks.
+message WriteSinkSet {
+  // Sinks within the set.
+  repeated WriteSink sinks = 1;
+}
+
+// Sinks for query requests.
+//
+// Queries are sent to one of these sinks and the resulting data is received from it.
+//
+// Note that the query results are flowing into the opposite direction (aka a query sink is a result source).
+message QuerySinks {
+  // gRPC-based remotes, addressed by their server IDs.
+  repeated uint32 grpc_remotes = 1;
+}
--- a/generated_types/protos/influxdata/iox/router/v1/service.proto
+++ b/generated_types/protos/influxdata/iox/router/v1/service.proto
@ -0,0 +1,76 @@
+syntax = "proto3";
+package influxdata.iox.router.v1;
+option go_package = "github.com/influxdata/iox/router/v1";
+
+import "influxdata/iox/router/v1/router.proto";
+
+service RouterService {
+  // List remote IOx servers we know about.
+  rpc ListRemotes(ListRemotesRequest) returns (ListRemotesResponse);
+
+  // Update information about a remote IOx server (upsert).
+  rpc UpdateRemote(UpdateRemoteRequest) returns (UpdateRemoteResponse);
+
+  // Delete a reference to remote IOx server.
+  rpc DeleteRemote(DeleteRemoteRequest) returns (DeleteRemoteResponse);
+
+  // List configured routers.
+  rpc ListRouter(ListRouterRequest) returns (ListRouterResponse);
+
+  // Update router config (upsert).
+  rpc UpdateRouter(UpdateRouterRequest) returns (UpdateRouterResponse);
+
+  // Delete router.
+  rpc DeleteRouter(DeleteRouterRequest) returns (DeleteRouterResponse);
+}
+
+message ListRemotesRequest {}
+
+message ListRemotesResponse {
+  repeated Remote remotes = 1;
+}
+
+// This resource represents a remote IOx server.
+message Remote {
+  // The server ID associated with a remote IOx server.
+  uint32 id = 1;
+
+  // The address of the remote IOx server gRPC endpoint.
+  string connection_string = 2;
+}
+
+// Updates information about a remote IOx server.
+//
+// If a remote for a given `id` already exists, it is updated in place.
+message UpdateRemoteRequest {
+  // If omitted, the remote associated with `id` will be removed.
+  Remote remote = 1;
+
+  // TODO(#917): add an optional flag to test the connection or not before adding it.
+}
+
+message UpdateRemoteResponse {}
+
+message ListRouterRequest {}
+
+message ListRouterResponse {
+  repeated Router routers = 1;
+}
+
+message DeleteRemoteRequest{
+  uint32 id = 1;
+}
+
+message DeleteRemoteResponse {}
+
+message UpdateRouterRequest {
+  Router router = 1;
+}
+
+message UpdateRouterResponse {}
+
+message DeleteRouterRequest {
+  string router_name = 1;
+}
+
+message DeleteRouterResponse {}
--- a/generated_types/src/database_rules.rs
+++ b/generated_types/src/database_rules.rs
@ -1,6 +1,7 @@
 use crate::{
    google::{FieldViolation, FieldViolationExt, FromFieldOpt},
    influxdata::iox::management::v1 as management,
+    DecodeError, EncodeError,
 };
 use data_types::{
    database_rules::{
@ -14,7 +15,6 @@ use std::{
    num::NonZeroU32,
    time::Duration,
 };
-use thiserror::Error;

 mod lifecycle;
 mod partition;
@ -122,42 +122,19 @@ impl TryFrom<management::RoutingConfig> for RoutingConfig {
    }
 }

-/// Wrapper around a `prost` error so that
-/// users of this crate do not have a direct dependency
-/// on the prost crate.
-#[derive(Debug, Error)]
-pub enum ProstError {
-    #[error("failed to encode protobuf: {0}")]
-    EncodeError(#[from] prost::EncodeError),
-
-    #[error("failed to decode protobuf: {0}")]
-    DecodeError(#[from] prost::DecodeError),
-}
-
 /// Decode database rules that were encoded using `encode_persisted_database_rules`
 pub fn decode_persisted_database_rules(
    bytes: prost::bytes::Bytes,
-) -> Result<management::PersistedDatabaseRules, ProstError> {
-    Ok(prost::Message::decode(bytes)?)
+) -> Result<management::PersistedDatabaseRules, DecodeError> {
+    prost::Message::decode(bytes)
 }

-/// TEMPORARY FOR TRANSITION PURPOSES - if decoding rules file as `PersistedDatabaseRules` (which
-/// includes the database UUID) fails, use this to try instead to decode as `DatabaseRules`. Then
-/// next time the database rules are updated, the rules file will be writted as
-/// `PersistedDatabaseRules`.
-pub fn decode_database_rules(
-    bytes: prost::bytes::Bytes,
-) -> Result<management::DatabaseRules, ProstError> {
-    Ok(prost::Message::decode(bytes)?)
-}
-
-/// Encode database rules into a serialized format suitable for
-/// storage in objet store
+/// Encode database rules into a serialized format suitable for storage in object store
 pub fn encode_persisted_database_rules(
    rules: &management::PersistedDatabaseRules,
    bytes: &mut prost::bytes::BytesMut,
-) -> Result<(), ProstError> {
-    Ok(prost::Message::encode(rules, bytes)?)
+) -> Result<(), EncodeError> {
+    prost::Message::encode(rules, bytes)
 }

 impl From<WriteBufferConnection> for management::WriteBufferConnection {
--- a/generated_types/src/database_state.rs
+++ b/generated_types/src/database_state.rs
@ -15,6 +15,8 @@ impl DatabaseState {
            DatabaseState::DatabaseObjectStoreFound => "DatabaseObjectStoreFound",
            DatabaseState::DatabaseObjectStoreLookupError => "DatabaseObjectStoreLookupError",
            DatabaseState::NoActiveDatabase => "NoActiveDatabase",
+            DatabaseState::OwnerInfoLoaded => "OwnerInfoLoaded",
+            DatabaseState::OwnerInfoLoadError => "OwnerInfoLoadError",
            DatabaseState::Unspecified => "Unspecified",
        }
    }
--- a/generated_types/src/detailed_database.rs
+++ b/generated_types/src/detailed_database.rs
@ -1,18 +1,13 @@
 use crate::influxdata::iox::management::v1 as management;
-use data_types::detailed_database::DetailedDatabase;
+use data_types::detailed_database::ActiveDatabase;

-impl From<DetailedDatabase> for management::DetailedDatabase {
-    fn from(database: DetailedDatabase) -> Self {
-        let DetailedDatabase {
-            name,
-            generation_id,
-            deleted_at,
-        } = database;
+impl From<ActiveDatabase> for management::DetailedDatabase {
+    fn from(database: ActiveDatabase) -> Self {
+        let ActiveDatabase { name, uuid } = database;

        Self {
            db_name: name.to_string(),
-            generation_id: generation_id.inner as u64,
-            deleted_at: deleted_at.map(Into::into),
+            uuid: uuid.as_bytes().to_vec(),
        }
    }
 }
--- a/generated_types/src/lib.rs
+++ b/generated_types/src/lib.rs
@ -152,6 +152,10 @@ pub mod database_state;
 pub mod detailed_database;
 #[cfg(feature = "data_types_conversions")]
 pub mod job;
+#[cfg(feature = "data_types_conversions")]
+pub mod server_config;
+
+pub use prost::{DecodeError, EncodeError};

 #[cfg(test)]
 mod tests {
--- a/generated_types/src/server_config.rs
+++ b/generated_types/src/server_config.rs
@ -0,0 +1,33 @@
+use crate::{influxdata::iox::management::v1 as management, DecodeError, EncodeError};
+
+/// Decode server config that was encoded using `encode_persisted_server_config`
+pub fn decode_persisted_server_config(
+    bytes: prost::bytes::Bytes,
+) -> Result<management::ServerConfig, DecodeError> {
+    prost::Message::decode(bytes)
+}
+
+/// Encode server config into a serialized format suitable for storage in object store
+pub fn encode_persisted_server_config(
+    server_config: &management::ServerConfig,
+    bytes: &mut prost::bytes::BytesMut,
+) -> Result<(), EncodeError> {
+    prost::Message::encode(server_config, bytes)
+}
+
+/// Encode server information to be serialized into a database's object store directory and used to
+/// identify that database's owning server
+pub fn encode_database_owner_info(
+    owner_info: &management::OwnerInfo,
+    bytes: &mut prost::bytes::BytesMut,
+) -> Result<(), EncodeError> {
+    prost::Message::encode(owner_info, bytes)
+}
+
+/// Encode server information that was encoded using `encode_database_owner_info` to compare
+/// with the currently-running server
+pub fn decode_database_owner_info(
+    bytes: prost::bytes::Bytes,
+) -> Result<management::OwnerInfo, DecodeError> {
+    prost::Message::decode(bytes)
+}
--- a/grpc-router/grpc-router-test-gen/Cargo.toml
+++ b/grpc-router/grpc-router-test-gen/Cargo.toml
@ -2,7 +2,7 @@
 name = "grpc-router-test-gen"
 version = "0.1.0"
 authors = ["Marko Mikulicic <mkm@influxdata.com>"]
-edition = "2018"
+edition = "2021"
 description = "Protobuf used in test for the grpc-router crate; need to be in a separate create because of linter limitations"

 [dependencies]
--- a/grpc-router/grpc-router-test-gen/build.rs
+++ b/grpc-router/grpc-router-test-gen/build.rs
--- a/grpc-router/grpc-router-test-gen/protos/test.proto
+++ b/grpc-router/grpc-router-test-gen/protos/test.proto
--- a/grpc-router/grpc-router-test-gen/src/lib.rs
+++ b/grpc-router/grpc-router-test-gen/src/lib.rs
--- a/grpc-router/Cargo.toml
+++ b/grpc-router/Cargo.toml
@ -2,7 +2,7 @@
 name = "grpc-router"
 version = "0.1.0"
 authors = ["Marko Mikulicic <mkm@influxdata.com>"]
-edition = "2018"
+edition = "2021"

 [dependencies]
 bytes = "1.0"
@ -25,4 +25,4 @@ prost-build = "0.8"
 tonic-build = "0.5"

 [dev-dependencies]
-grpc-router-test-gen = { path = "./grpc-router-test-gen" }
+grpc-router-test-gen = { path = "../grpc-router-test-gen" }
--- a/influxdb2_client/Cargo.toml
+++ b/influxdb2_client/Cargo.toml
@ -2,7 +2,7 @@
 name = "influxdb2_client"
 version = "0.1.0"
 authors = ["Paul Dix <paul@pauldix.net>"]
-edition = "2018"
+edition = "2021"

 [dependencies] # In alphabetical order
 bytes = "1.0"
--- a/influxdb2_client/tests/common/server_fixture.rs
+++ b/influxdb2_client/tests/common/server_fixture.rs
@ -215,6 +215,7 @@ impl TestServer {

            Command::new("docker")
                .arg("run")
+                .arg("database")
                .arg("--name")
                .arg(&container_name)
                .arg("--publish")
--- a/influxdb_iox/Cargo.toml
+++ b/influxdb_iox/Cargo.toml
@ -0,0 +1,123 @@
+[package]
+name = "influxdb_iox"
+version = "0.1.0"
+authors = ["Paul Dix <paul@pauldix.net>"]
+edition = "2021"
+default-run = "influxdb_iox"
+
+[dependencies]
+# Workspace dependencies, in alphabetical order
+datafusion = { path = "../datafusion" }
+data_types = { path = "../data_types" }
+entry = { path = "../entry" }
+generated_types = { path = "../generated_types" }
+
+influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight", "format"] }
+influxdb_line_protocol = { path = "../influxdb_line_protocol" }
+internal_types = { path = "../internal_types" }
+iox_object_store = { path = "../iox_object_store" }
+logfmt = { path = "../logfmt" }
+metric = { path = "../metric" }
+metric_exporters = { path = "../metric_exporters" }
+mutable_buffer = { path = "../mutable_buffer" }
+num_cpus = "1.13.0"
+object_store = { path = "../object_store" }
+observability_deps = { path = "../observability_deps" }
+panic_logging = { path = "../panic_logging" }
+parquet_catalog = { path = "../parquet_catalog" }
+parquet_file = { path = "../parquet_file" }
+predicate = { path = "../predicate" }
+query = { path = "../query" }
+read_buffer = { path = "../read_buffer" }
+server = { path = "../server" }
+trace = { path = "../trace" }
+trace_exporters = { path = "../trace_exporters" }
+trace_http = { path = "../trace_http" }
+tracker = { path = "../tracker" }
+trogging = { path = "../trogging", default-features = false, features = ["structopt"] }
+time = { path = "../time" }
+
+# Crates.io dependencies, in alphabetical order
+arrow = { version = "6.0", features = ["prettyprint"] }
+arrow-flight = "6.0"
+async-trait = "0.1"
+backtrace = "0.3"
+byteorder = "1.3.4"
+bytes = "1.0"
+chrono = "0.4"
+clap = "2.33.1"
+csv = "1.1"
+dirs = "4.0.0"
+dotenv = "0.15.0"
+flate2 = "1.0"
+futures = "0.3"
+hashbrown = "0.11"
+http = "0.2.0"
+humantime = "2.1.0"
+hyper = "0.14"
+libc = { version = "0.2" }
+log = "0.4"
+once_cell = { version = "1.4.0", features = ["parking_lot"] }
+parking_lot = "0.11.2"
+itertools = "0.10.1"
+parquet = "6.0"
+pin-project = "1.0"
+# used by arrow/datafusion anyway
+comfy-table = { version = "4.0", default-features = false }
+pprof = { version = "^0.5", default-features = false, features = ["flamegraph", "protobuf"], optional = true }
+prost = "0.8"
+rustyline = { version = "9.0", default-features = false }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0.67"
+serde_urlencoded = "0.7.0"
+snafu = "0.6.9"
+structopt = "0.3.25"
+thiserror = "1.0.30"
+tikv-jemalloc-ctl = { version = "0.4.0" }
+tokio = { version = "1.11", features = ["macros", "rt-multi-thread", "parking_lot", "signal"] }
+tokio-stream = { version = "0.1.2", features = ["net"] }
+tokio-util = { version = "0.6.3" }
+tonic = "0.5.0"
+tonic-health = "0.4.0"
+tonic-reflection = "0.2.0"
+tower = "0.4"
+uuid = { version = "0.8", features = ["v4"] }
+
+# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
+tikv-jemalloc-sys = { version = "0.4.0", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
+heappy = { git = "https://github.com/mkmik/heappy", rev = "20aa466524ac9ce34a4bae29f27ec11869b50e21", features = ["enable_heap_profiler", "jemalloc_shim", "measure_free"], optional = true }
+
+
+[dev-dependencies]
+# Workspace dependencies, in alphabetical order
+arrow_util = { path = "../arrow_util" }
+entry = { path = "../entry" }
+influxdb2_client = { path = "../influxdb2_client" }
+influxdb_storage_client = { path = "../influxdb_storage_client" }
+influxdb_iox_client = { path = "../influxdb_iox_client", features = ["flight"] }
+test_helpers = { path = "../test_helpers" }
+parking_lot = "0.11.2"
+write_buffer = { path = "../write_buffer" }
+
+# Crates.io dependencies, in alphabetical order
+assert_cmd = "2.0.2"
+flate2 = "1.0"
+hex = "0.4.2"
+predicates = "2.0.3"
+rand = "0.8.3"
+reqwest = "0.11"
+tempfile = "3.1.0"
+
+[features]
+default = ["jemalloc_replacing_malloc"]
+
+azure = ["object_store/azure"] # Optional Azure Object store support
+gcp = ["object_store/gcp"] # Optional GCP object store support
+aws = ["object_store/aws"] # Optional AWS / S3 object store support
+# pprof is an optional feature for pprof support
+
+# heappy is an optional feature; Not on by default as it
+# runtime overhead on all allocations (calls to malloc).
+# Cargo cannot currently implement mutually exclusive features so let's force every build
+# to pick either heappy or jemalloc_replacing_malloc feature at least until we figure out something better.
+jemalloc_replacing_malloc = ["tikv-jemalloc-sys"]
--- a/influxdb_iox/src/commands/database.rs
+++ b/influxdb_iox/src/commands/database.rs
@ -1,6 +1,7 @@
 //! This module implements the `database` CLI command

-use chrono::{DateTime, Utc};
+use crate::TABLE_STYLE_SINGLE_LINE_BORDERS;
+use comfy_table::{Cell, Table};
 use influxdb_iox_client::{
    connection::Connection,
    flight,
@ -11,13 +12,10 @@ use influxdb_iox_client::{
    },
    write::{self, WriteError},
 };
-use prettytable::{format, Cell, Row, Table};
-use std::{
-    convert::TryInto, fs::File, io::Read, num::NonZeroU64, path::PathBuf, str::FromStr,
-    time::Duration,
-};
+use std::{fs::File, io::Read, num::NonZeroU64, path::PathBuf, str::FromStr, time::Duration};
 use structopt::StructOpt;
 use thiserror::Error;
+use uuid::Uuid;

 mod chunk;
 mod partition;
@ -139,12 +137,7 @@ struct Create {
 /// Get list of databases
 #[derive(Debug, StructOpt)]
 struct List {
-    /// Whether to list databases marked as deleted instead, to restore or permanently delete.
-    #[structopt(long)]
-    deleted: bool,
-
-    /// Whether to list detailed information, including generation IDs, about all databases,
-    /// whether they are active or marked as deleted.
+    /// Whether to list detailed information about the databases along with their names.
    #[structopt(long)]
    detailed: bool,
 }
@ -193,14 +186,14 @@ struct Delete {
    name: String,
 }

-/// Restore a deleted database generation
+/// Restore a deleted database
 #[derive(Debug, StructOpt)]
 struct Restore {
-    /// The generation ID of the database to restore
-    generation_id: usize,
-
-    /// The name of the database to delete
+    /// The name to give the database upon restoring it
    name: String,
+
+    /// The UUID of the database to restore
+    uuid: String,
 }

 /// All possible subcommands for database
@ -224,7 +217,7 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
            let mut client = management::Client::new(connection);
            #[allow(deprecated)]
            let rules = DatabaseRules {
-                name: command.name,
+                name: command.name.clone(),
                lifecycle_rules: Some(LifecycleRules {
                    buffer_size_soft: command.buffer_size_soft as _,
                    buffer_size_hard: command.buffer_size_hard as _,
@ -258,46 +251,35 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
                ..Default::default()
            };

-            client.create_database(rules).await?;
+            let uuid = client.create_database(rules).await?;

-            println!("Ok");
+            println!("Created database {} ({})", command.name, uuid);
        }
        Command::List(list) => {
            let mut client = management::Client::new(connection);
-            if list.deleted || list.detailed {
-                let databases = if list.deleted {
-                    client.list_deleted_databases().await?
-                } else {
-                    client.list_detailed_databases().await?
-                };
+            if list.detailed {
+                let databases = client.list_detailed_databases().await?;

-                let mut table = Table::new();
-                table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-                table.set_titles(Row::new(vec![
-                    Cell::new("Deleted at"),
-                    Cell::new("Generation ID"),
-                    Cell::new("Name"),
-                ]));
+                if !databases.is_empty() {
+                    let mut table = Table::new();
+                    table.load_preset(TABLE_STYLE_SINGLE_LINE_BORDERS);
+                    table.set_header(vec![Cell::new("Name"), Cell::new("UUID")]);

-                for database in databases {
-                    let deleted_at = database
-                        .deleted_at
-                        .and_then(|t| {
-                            let dt: Result<DateTime<Utc>, _> = t.try_into();
-                            dt.ok().map(|d| d.to_string())
-                        })
-                        .unwrap_or_else(String::new);
-                    table.add_row(Row::new(vec![
-                        Cell::new(&deleted_at),
-                        Cell::new(&database.generation_id.to_string()),
-                        Cell::new(&database.db_name),
-                    ]));
+                    for database in databases {
+                        let uuid = Uuid::from_slice(&database.uuid)
+                            .map(|u| u.to_string())
+                            .unwrap_or_else(|_| String::from("<UUID parsing failed>"));
+
+                        table.add_row(vec![Cell::new(&database.db_name), Cell::new(&uuid)]);
+                    }
+
+                    print!("{}", table);
                }
-
-                print!("{}", table);
            } else {
                let names = client.list_database_names().await?;
-                println!("{}", names.join("\n"))
+                if !names.is_empty() {
+                    println!("{}", names.join("\n"))
+                }
            }
        }
        Command::Get(get) => {
@ -362,15 +344,16 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
        }
        Command::Delete(command) => {
            let mut client = management::Client::new(connection);
-            client.delete_database(&command.name).await?;
+            let uuid = client.delete_database(&command.name).await?;
            println!("Deleted database {}", command.name);
+            println!("{}", uuid);
        }
        Command::Restore(command) => {
            let mut client = management::Client::new(connection);
            client
-                .restore_database(&command.name, command.generation_id)
+                .restore_database(&command.name, &command.uuid)
                .await?;
-            println!("Restored database {}", command.name);
+            println!("Restored database {} ({})", command.name, command.uuid);
        }
    }

--- a/influxdb_iox/src/commands/database/chunk.rs
+++ b/influxdb_iox/src/commands/database/chunk.rs
--- a/influxdb_iox/src/commands/database/partition.rs
+++ b/influxdb_iox/src/commands/database/partition.rs
--- a/influxdb_iox/src/commands/database/recover.rs
+++ b/influxdb_iox/src/commands/database/recover.rs
--- a/influxdb_iox/src/commands/debug/dump_catalog.rs
+++ b/influxdb_iox/src/commands/debug/dump_catalog.rs
@ -1,59 +1,47 @@
-use data_types::DatabaseName;
+use crate::structopt_blocks::{object_store::ObjectStoreConfig, server_id::ServerIdConfig};
 use iox_object_store::IoxObjectStore;
 use object_store::ObjectStore;
 use snafu::{OptionExt, ResultExt, Snafu};
 use std::{convert::TryFrom, sync::Arc};
 use structopt::StructOpt;

-use crate::{object_store::ObjectStoreConfig, server_id::ServerIdConfig};
-
 #[derive(Debug, Snafu)]
 pub enum Error {
    #[snafu(display("Cannot parse object store config: {}", source))]
    ObjectStoreParsing {
-        source: crate::object_store::ParseError,
+        source: crate::structopt_blocks::object_store::ParseError,
    },

    #[snafu(display("No server ID provided"))]
    NoServerId,

-    #[snafu(display("Invalid database name: {}", source))]
-    InvalidDbName {
-        source: data_types::DatabaseNameError,
+    #[snafu(display("Can't read server config from object storage: {}", source))]
+    CantReadServerConfig { source: object_store::Error },
+
+    #[snafu(display("Error deserializing server config from protobuf: {}", source))]
+    CantDeserializeServerConfig {
+        source: generated_types::DecodeError,
    },

+    #[snafu(display("Can't find a database with this name on this server"))]
+    CantFindDatabase,
+
    #[snafu(display("Cannot open IOx object store: {}", source))]
    IoxObjectStoreFailure {
        source: iox_object_store::IoxObjectStoreError,
    },

-    #[snafu(display("Cannot find existing IOx object store"))]
-    NoIoxObjectStore,
-
    #[snafu(display("Cannot dump catalog: {}", source))]
    DumpCatalogFailure {
-        source: parquet_file::catalog::dump::Error,
+        source: parquet_catalog::dump::Error,
    },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;

-/// Interrogate internal database data
-#[derive(Debug, StructOpt)]
-pub struct Config {
-    #[structopt(subcommand)]
-    command: Command,
-}
-
-#[derive(Debug, StructOpt)]
-enum Command {
-    /// Dump preserved catalog.
-    DumpCatalog(DumpCatalog),
-}
-
 /// Dump preserved catalog.
 #[derive(Debug, StructOpt)]
-struct DumpCatalog {
+pub struct Config {
    // object store config
    #[structopt(flatten)]
    object_store_config: ObjectStoreConfig,
@ -71,11 +59,12 @@ struct DumpCatalog {
 }

 #[derive(Debug, StructOpt)]
-struct DumpOptions {
-    /// Show debug output of `DecodedIoxParquetMetaData` if decoding succeeds, show the decoding error otherwise.
+pub struct DumpOptions {
+    /// Show debug output of `DecodedIoxParquetMetaData` if decoding succeeds, show the decoding
+    /// error otherwise.
    ///
-    /// Since this contains the entire Apache Parquet metadata object this is quite verbose and is usually not
-    /// recommended.
+    /// Since this contains the entire Apache Parquet metadata object this is quite verbose and is
+    /// usually not recommended.
    #[structopt(long = "--show-parquet-metadata")]
    show_parquet_metadata: bool,

@ -94,16 +83,17 @@ struct DumpOptions {
    #[structopt(long = "--show-statistics")]
    show_statistics: bool,

-    /// Show unparsed `IoxParquetMetaData` -- which are Apache Thrift bytes -- as part of the transaction actions.
+    /// Show unparsed `IoxParquetMetaData` -- which are Apache Thrift bytes -- as part of the
+    /// transaction actions.
    ///
-    /// Since this binary data is usually quite hard to read, it is recommended to set this to `false` which will
-    /// replace the actual bytes with `b"metadata omitted"`. Use the other toggles to instead show the content of the
-    /// Apache Thrift message.
+    /// Since this binary data is usually quite hard to read, it is recommended to set this to
+    /// `false` which will replace the actual bytes with `b"metadata omitted"`. Use the other
+    /// toggles to instead show the content of the Apache Thrift message.
    #[structopt(long = "--show-unparsed-metadata")]
    show_unparsed_metadata: bool,
 }

-impl From<DumpOptions> for parquet_file::catalog::dump::DumpOptions {
+impl From<DumpOptions> for parquet_catalog::dump::DumpOptions {
    fn from(options: DumpOptions) -> Self {
        Self {
            show_parquet_metadata: options.show_parquet_metadata,
@ -116,29 +106,32 @@ impl From<DumpOptions> for parquet_file::catalog::dump::DumpOptions {
 }

 pub async fn command(config: Config) -> Result<()> {
-    match config.command {
-        Command::DumpCatalog(dump_catalog) => {
-            let object_store = ObjectStore::try_from(&dump_catalog.object_store_config)
-                .context(ObjectStoreParsing)?;
-            let database_name =
-                DatabaseName::try_from(dump_catalog.db_name).context(InvalidDbName)?;
-            let server_id = dump_catalog
-                .server_id_config
-                .server_id
-                .context(NoServerId)?;
-            let iox_object_store =
-                IoxObjectStore::find_existing(Arc::new(object_store), server_id, &database_name)
-                    .await
-                    .context(IoxObjectStoreFailure)?
-                    .context(NoIoxObjectStore)?;
+    let object_store =
+        Arc::new(ObjectStore::try_from(&config.object_store_config).context(ObjectStoreParsing)?);
+    let server_id = config.server_id_config.server_id.context(NoServerId)?;
+    let server_config_bytes = IoxObjectStore::get_server_config_file(&object_store, server_id)
+        .await
+        .context(CantReadServerConfig)?;

-            let mut writer = std::io::stdout();
-            let options = dump_catalog.dump_options.into();
-            parquet_file::catalog::dump::dump(&iox_object_store, &mut writer, options)
-                .await
-                .context(DumpCatalogFailure)?;
-        }
-    }
+    let server_config =
+        generated_types::server_config::decode_persisted_server_config(server_config_bytes)
+            .context(CantDeserializeServerConfig)?;
+
+    let database_location = server_config
+        .databases
+        .get(&config.db_name)
+        .context(CantFindDatabase)?;
+
+    let iox_object_store =
+        IoxObjectStore::load_at_root_path(Arc::clone(&object_store), server_id, database_location)
+            .await
+            .context(IoxObjectStoreFailure)?;
+
+    let mut writer = std::io::stdout();
+    let options = config.dump_options.into();
+    parquet_catalog::dump::dump(&iox_object_store, &mut writer, options)
+        .await
+        .context(DumpCatalogFailure)?;

    Ok(())
 }
--- a/influxdb_iox/src/commands/debug/mod.rs
+++ b/influxdb_iox/src/commands/debug/mod.rs
@ -0,0 +1,41 @@
+use snafu::{ResultExt, Snafu};
+use structopt::StructOpt;
+
+mod dump_catalog;
+mod print_cpu;
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Error in dump-catalog subcommand: {}", source))]
+    DumpCatalogError { source: dump_catalog::Error },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+/// Interrogate internal database data
+#[derive(Debug, StructOpt)]
+pub struct Config {
+    #[structopt(subcommand)]
+    command: Command,
+}
+
+#[derive(Debug, StructOpt)]
+enum Command {
+    /// Dump preserved catalog.
+    DumpCatalog(dump_catalog::Config),
+
+    /// Prints what CPU features are used by the compiler by default.
+    PrintCpu,
+}
+
+pub async fn command(config: Config) -> Result<()> {
+    match config.command {
+        Command::DumpCatalog(dump_catalog) => dump_catalog::command(dump_catalog)
+            .await
+            .context(DumpCatalogError),
+        Command::PrintCpu => {
+            print_cpu::main();
+            Ok(())
+        }
+    }
+}
--- a/influxdb_iox/src/commands/debug/print_cpu.rs
+++ b/influxdb_iox/src/commands/debug/print_cpu.rs
@ -1,4 +1,3 @@
-#![recursion_limit = "512"]
 /// Prints what CPU features are used by the compiler by default.
 ///
 /// Script from:
@ -29,7 +28,7 @@ macro_rules! print_if_feature_enabled {
    }
 }

-fn main() {
+pub fn main() {
    println!("rustc is using the following target options");

    print_if_feature_enabled!(
--- a/influxdb_iox/src/commands/operations.rs
+++ b/influxdb_iox/src/commands/operations.rs
--- a/influxdb_iox/src/commands/run/database.rs
+++ b/influxdb_iox/src/commands/run/database.rs
@ -0,0 +1,106 @@
+//! Implementation of command line option for running server
+
+use std::sync::Arc;
+
+use crate::{
+    influxdb_ioxd::{
+        self,
+        server_type::{
+            common_state::{CommonServerState, CommonServerStateError},
+            database::{
+                setup::{make_application, make_server},
+                DatabaseServerType,
+            },
+        },
+    },
+    structopt_blocks::{boolean_flag::BooleanFlag, run_config::RunConfig},
+};
+use structopt::StructOpt;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum Error {
+    #[error("Run: {0}")]
+    Run(#[from] influxdb_ioxd::Error),
+
+    #[error("Cannot setup server: {0}")]
+    Setup(#[from] crate::influxdb_ioxd::server_type::database::setup::Error),
+
+    #[error("Invalid config: {0}")]
+    InvalidConfig(#[from] CommonServerStateError),
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+#[derive(Debug, StructOpt)]
+#[structopt(
+    name = "run",
+    about = "Runs in database mode",
+    long_about = "Run the IOx database server.\n\nThe configuration options below can be \
+    set either with the command line flags or with the specified environment \
+    variable. If there is a file named '.env' in the current working directory, \
+    it is sourced before loading the configuration.
+
+Configuration is loaded from the following sources (highest precedence first):
+        - command line arguments
+        - user set environment variables
+        - .env file contents
+        - pre-configured default values"
+)]
+pub struct Config {
+    #[structopt(flatten)]
+    pub(crate) run_config: RunConfig,
+
+    /// The number of threads to use for all worker pools.
+    ///
+    /// IOx uses a pool with `--num-threads` threads *each* for
+    /// 1. Handling API requests
+    /// 2. Running queries.
+    /// 3. Reorganizing data (e.g. compacting chunks)
+    ///
+    /// If not specified, defaults to the number of cores on the system
+    #[structopt(long = "--num-worker-threads", env = "INFLUXDB_IOX_NUM_WORKER_THREADS")]
+    pub num_worker_threads: Option<usize>,
+
+    // TODO(marco): Remove once the database-run-mode (aka the `server` crate) cannot handle routing anymore and we're
+    //              fully migrated to the new router code.
+    /// When IOx nodes need to talk to remote peers they consult an internal remote address
+    /// mapping. This mapping is populated via API calls. If the mapping doesn't produce
+    /// a result, this config entry allows to generate a hostname from at template:
+    /// occurrences of the "{id}" substring will be replaced with the remote Server ID.
+    ///
+    /// Example: http://node-{id}.ioxmydomain.com:8082
+    #[structopt(long = "--remote-template", env = "INFLUXDB_IOX_REMOTE_TEMPLATE")]
+    pub remote_template: Option<String>,
+
+    /// Automatically wipe the preserved catalog on error
+    #[structopt(
+        long = "--wipe-catalog-on-error",
+        env = "INFLUXDB_IOX_WIPE_CATALOG_ON_ERROR",
+        // TODO: Don't automatically wipe on error (#1522)
+        default_value = "yes"
+    )]
+    pub wipe_catalog_on_error: BooleanFlag,
+
+    /// Skip replaying the write buffer and seek to high watermark instead.
+    #[structopt(
+        long = "--skip-replay",
+        env = "INFLUXDB_IOX_SKIP_REPLAY",
+        default_value = "no"
+    )]
+    pub skip_replay_and_seek_instead: BooleanFlag,
+}
+
+pub async fn command(config: Config) -> Result<()> {
+    let common_state = CommonServerState::from_config(config.run_config.clone())?;
+
+    let application = make_application(&config, common_state.trace_collector()).await?;
+    let app_server = make_server(Arc::clone(&application), &config);
+    let server_type = Arc::new(DatabaseServerType::new(
+        Arc::clone(&application),
+        Arc::clone(&app_server),
+        &common_state,
+    ));
+
+    Ok(influxdb_ioxd::main(common_state, server_type).await?)
+}
--- a/influxdb_iox/src/commands/run/mod.rs
+++ b/influxdb_iox/src/commands/run/mod.rs
@ -0,0 +1,53 @@
+use snafu::{ResultExt, Snafu};
+use structopt::StructOpt;
+
+use crate::structopt_blocks::run_config::RunConfig;
+
+pub mod database;
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Error in database subcommand: {}", source))]
+    DatabaseError { source: database::Error },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+#[derive(Debug, StructOpt)]
+pub struct Config {
+    // TODO(marco) remove this
+    /// Config for database mode, for backwards compatibility reasons.
+    #[structopt(flatten)]
+    database_config: database::Config,
+
+    #[structopt(subcommand)]
+    command: Option<Command>,
+}
+
+impl Config {
+    pub fn run_config(&self) -> &RunConfig {
+        match &self.command {
+            None => &self.database_config.run_config,
+            Some(Command::Database(config)) => &config.run_config,
+        }
+    }
+}
+
+#[derive(Debug, StructOpt)]
+enum Command {
+    Database(database::Config),
+}
+
+pub async fn command(config: Config) -> Result<()> {
+    match config.command {
+        None => {
+            println!(
+                "WARNING: Not specifying the run-mode is deprecated. Defaulting to 'database'."
+            );
+            database::command(config.database_config)
+                .await
+                .context(DatabaseError)
+        }
+        Some(Command::Database(config)) => database::command(config).await.context(DatabaseError),
+    }
+}
--- a/influxdb_iox/src/commands/server.rs
+++ b/influxdb_iox/src/commands/server.rs
--- a/influxdb_iox/src/commands/server_remote.rs
+++ b/influxdb_iox/src/commands/server_remote.rs
@ -1,9 +1,9 @@
+use crate::TABLE_STYLE_SINGLE_LINE_BORDERS;
+use comfy_table::{Cell, Table};
 use influxdb_iox_client::{connection::Connection, management};
 use structopt::StructOpt;
 use thiserror::Error;

-use prettytable::{format, Cell, Row, Table};
-
 #[allow(clippy::enum_variant_names)]
 #[derive(Debug, Error)]
 pub enum Error {
@ -51,17 +51,14 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
                println!("no remotes configured");
            } else {
                let mut table = Table::new();
-                table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-                table.set_titles(Row::new(vec![
-                    Cell::new("ID"),
-                    Cell::new("Connection string"),
-                ]));
+                table.load_preset(TABLE_STYLE_SINGLE_LINE_BORDERS);
+                table.set_header(vec![Cell::new("ID"), Cell::new("Connection string")]);

                for i in remotes {
-                    table.add_row(Row::new(vec![
+                    table.add_row(vec![
                        Cell::new(&format!("{}", i.id)),
                        Cell::new(&i.connection_string),
-                    ]));
+                    ]);
                }
                print!("{}", table);
            }
--- a/influxdb_iox/src/commands/sql.rs
+++ b/influxdb_iox/src/commands/sql.rs
--- a/influxdb_iox/src/commands/sql/observer.rs
+++ b/influxdb_iox/src/commands/sql/observer.rs
--- a/influxdb_iox/src/commands/sql/repl.rs
+++ b/influxdb_iox/src/commands/sql/repl.rs
--- a/influxdb_iox/src/commands/sql/repl_command.rs
+++ b/influxdb_iox/src/commands/sql/repl_command.rs
--- a/influxdb_iox/src/commands/tracing.rs
+++ b/influxdb_iox/src/commands/tracing.rs
@ -17,7 +17,7 @@ pub fn init_logs_and_tracing(
    log_verbose_count: u8,
    config: &crate::commands::run::Config,
 ) -> Result<TroggingGuard, trogging::Error> {
-    let mut logging_config = config.logging_config.clone();
+    let mut logging_config = config.run_config().logging_config.clone();

    // Handle the case if -v/-vv is specified both before and after the server
    // command
--- a/influxdb_iox/src/influxdb_ioxd.rs
+++ b/influxdb_iox/src/influxdb_ioxd.rs
@ -0,0 +1,298 @@
+use crate::influxdb_ioxd::server_type::{common_state::CommonServerState, ServerType};
+use futures::{future::FusedFuture, pin_mut, FutureExt};
+use hyper::server::conn::AddrIncoming;
+use observability_deps::tracing::{error, info};
+use panic_logging::SendPanicsToTracing;
+use snafu::{ResultExt, Snafu};
+use std::{net::SocketAddr, sync::Arc};
+use trace_http::ctx::TraceHeaderParser;
+
+mod http;
+mod jemalloc;
+mod planner;
+pub(crate) mod rpc;
+pub(crate) mod server_type;
+pub(crate) mod serving_readiness;
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Unable to bind to listen for HTTP requests on {}: {}", addr, source))]
+    StartListeningHttp {
+        addr: SocketAddr,
+        source: hyper::Error,
+    },
+
+    #[snafu(display("Unable to bind to listen for gRPC requests on {}: {}", addr, source))]
+    StartListeningGrpc {
+        addr: SocketAddr,
+        source: std::io::Error,
+    },
+
+    #[snafu(display("Error serving HTTP: {}", source))]
+    ServingHttp { source: hyper::Error },
+
+    #[snafu(display("Error serving RPC: {}", source))]
+    ServingRpc { source: server_type::RpcError },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+/// On unix platforms we want to intercept SIGINT and SIGTERM
+/// This method returns if either are signalled
+#[cfg(unix)]
+async fn wait_for_signal() {
+    use tokio::signal::unix::{signal, SignalKind};
+    let mut term = signal(SignalKind::terminate()).expect("failed to register signal handler");
+    let mut int = signal(SignalKind::interrupt()).expect("failed to register signal handler");
+
+    tokio::select! {
+        _ = term.recv() => info!("Received SIGTERM"),
+        _ = int.recv() => info!("Received SIGINT"),
+    }
+}
+
+#[cfg(windows)]
+/// ctrl_c is the cross-platform way to intercept the equivalent of SIGINT
+/// This method returns if this occurs
+async fn wait_for_signal() {
+    let _ = tokio::signal::ctrl_c().await;
+}
+
+#[cfg(all(not(feature = "heappy"), not(feature = "jemalloc_replacing_malloc")))]
+fn build_malloc_conf() -> String {
+    "system".to_string()
+}
+
+#[cfg(all(feature = "heappy", not(feature = "jemalloc_replacing_malloc")))]
+fn build_malloc_conf() -> String {
+    "heappy".to_string()
+}
+
+#[cfg(all(not(feature = "heappy"), feature = "jemalloc_replacing_malloc"))]
+fn build_malloc_conf() -> String {
+    tikv_jemalloc_ctl::config::malloc_conf::mib()
+        .unwrap()
+        .read()
+        .unwrap()
+        .to_string()
+}
+
+#[cfg(all(feature = "heappy", feature = "jemalloc_replacing_malloc"))]
+fn build_malloc_conf() -> String {
+    compile_error!("must use exactly one memory allocator")
+}
+
+/// This is the entry point for the IOx server.
+///
+/// The precise server type depends on `T`. This entry point ensures that the given `server_type` is started using best
+/// practice, e.g. that we print the GIT-hash and malloc-configs, that a panic handler is installed, etc.
+///
+/// Due to the invasive nature of the setup routine, this should not be used during unit tests.
+pub async fn main<T>(common_state: CommonServerState, server_type: Arc<T>) -> Result<()>
+where
+    T: ServerType,
+{
+    let git_hash = option_env!("GIT_HASH").unwrap_or("UNKNOWN");
+    let num_cpus = num_cpus::get();
+    let build_malloc_conf = build_malloc_conf();
+    info!(
+        git_hash,
+        num_cpus,
+        %build_malloc_conf,
+        "InfluxDB IOx server starting",
+    );
+
+    if (common_state.run_config().grpc_bind_address == common_state.run_config().http_bind_address)
+        && (common_state.run_config().grpc_bind_address.port() != 0)
+    {
+        error!(
+            grpc_bind_address=%common_state.run_config().grpc_bind_address,
+            http_bind_address=%common_state.run_config().http_bind_address,
+            "grpc and http bind addresses must differ",
+        );
+        std::process::exit(1);
+    }
+
+    // Install custom panic handler and forget about it.
+    //
+    // This leaks the handler and prevents it from ever being dropped during the
+    // lifetime of the program - this is actually a good thing, as it prevents
+    // the panic handler from being removed while unwinding a panic (which in
+    // turn, causes a panic - see #548)
+    let f = SendPanicsToTracing::new();
+    std::mem::forget(f);
+
+    // Register jemalloc metrics
+    server_type
+        .metric_registry()
+        .register_instrument("jemalloc_metrics", jemalloc::JemallocMetrics::new);
+
+    let grpc_listener = grpc_listener(common_state.run_config().grpc_bind_address.into()).await?;
+    let http_listener = http_listener(common_state.run_config().http_bind_address.into()).await?;
+
+    let trace_exporter = common_state.trace_exporter();
+    let r = serve(common_state, grpc_listener, http_listener, server_type).await;
+
+    if let Some(trace_exporter) = trace_exporter {
+        if let Err(e) = trace_exporter.drain().await {
+            error!(%e, "error draining trace exporter");
+        }
+    }
+    r
+}
+
+pub async fn grpc_listener(addr: SocketAddr) -> Result<tokio::net::TcpListener> {
+    let listener = tokio::net::TcpListener::bind(addr)
+        .await
+        .context(StartListeningGrpc { addr })?;
+
+    match listener.local_addr() {
+        Ok(local_addr) => info!(%local_addr, "bound gRPC listener"),
+        Err(_) => info!(%addr, "bound gRPC listener"),
+    }
+
+    Ok(listener)
+}
+
+pub async fn http_listener(addr: SocketAddr) -> Result<AddrIncoming> {
+    let listener = AddrIncoming::bind(&addr).context(StartListeningHttp { addr })?;
+    info!(bind_addr=%listener.local_addr(), "bound HTTP listener");
+
+    Ok(listener)
+}
+
+/// Instantiates the gRPC and HTTP listeners and returns a Future that completes when
+/// these listeners, the Server, Databases, etc... have all exited.
+///
+/// This is effectively the "main loop" for influxdb_iox
+async fn serve<T>(
+    common_state: CommonServerState,
+    grpc_listener: tokio::net::TcpListener,
+    http_listener: AddrIncoming,
+    server_type: Arc<T>,
+) -> Result<()>
+where
+    T: ServerType,
+{
+    // Construct a token to trigger shutdown of API services
+    let frontend_shutdown = tokio_util::sync::CancellationToken::new();
+
+    let trace_header_parser = TraceHeaderParser::new()
+        .with_jaeger_trace_context_header_name(
+            &common_state
+                .run_config()
+                .tracing_config
+                .traces_jaeger_trace_context_header_name,
+        )
+        .with_jaeger_debug_name(
+            &common_state
+                .run_config()
+                .tracing_config
+                .traces_jaeger_debug_name,
+        );
+
+    // Construct and start up gRPC server
+
+    let grpc_server = rpc::serve(
+        grpc_listener,
+        Arc::clone(&server_type),
+        trace_header_parser.clone(),
+        frontend_shutdown.clone(),
+        common_state.serving_readiness().clone(),
+    )
+    .fuse();
+
+    info!("gRPC server listening");
+
+    let http_server = http::serve(
+        http_listener,
+        Arc::clone(&server_type),
+        frontend_shutdown.clone(),
+        trace_header_parser,
+    )
+    .fuse();
+    info!("HTTP server listening");
+
+    // Purposefully use log not tokio-tracing to ensure correctly hooked up
+    log::info!("InfluxDB IOx server ready");
+
+    // Get IOx background worker task
+    let server_worker = Arc::clone(&server_type).background_worker().fuse();
+
+    // Shutdown signal
+    let signal = wait_for_signal().fuse();
+
+    // There are two different select macros - tokio::select and futures::select
+    //
+    // tokio::select takes ownership of the passed future "moving" it into the
+    // select block. This works well when not running select inside a loop, or
+    // when using a future that can be dropped and recreated, often the case
+    // with tokio's futures e.g. `channel.recv()`
+    //
+    // futures::select is more flexible as it doesn't take ownership of the provided
+    // future. However, to safely provide this it imposes some additional
+    // requirements
+    //
+    // All passed futures must implement FusedFuture - it is IB to poll a future
+    // that has returned Poll::Ready(_). A FusedFuture has an is_terminated()
+    // method that indicates if it is safe to poll - e.g. false if it has
+    // returned Poll::Ready(_). futures::select uses this to implement its
+    // functionality. futures::FutureExt adds a fuse() method that
+    // wraps an arbitrary future and makes it a FusedFuture
+    //
+    // The additional requirement of futures::select is that if the future passed
+    // outlives the select block, it must be Unpin or already Pinned
+
+    // pin_mut constructs a Pin<&mut T> from a T by preventing moving the T
+    // from the current stack frame and constructing a Pin<&mut T> to it
+    pin_mut!(signal);
+    pin_mut!(server_worker);
+    pin_mut!(grpc_server);
+    pin_mut!(http_server);
+
+    // Return the first error encountered
+    let mut res = Ok(());
+
+    // Graceful shutdown can be triggered by sending SIGINT or SIGTERM to the
+    // process, or by a background task exiting - most likely with an error
+    //
+    // Graceful shutdown should then proceed in the following order
+    // 1. Stop accepting new HTTP and gRPC requests and drain existing connections
+    // 2. Trigger shutdown of internal background workers loops
+    //
+    // This is important to ensure background tasks, such as polling the tracker
+    // registry, don't exit before HTTP and gRPC requests dependent on them
+    while !grpc_server.is_terminated() && !http_server.is_terminated() {
+        futures::select! {
+            _ = signal => info!("Shutdown requested"),
+            _ = server_worker => {
+                info!("server worker shutdown prematurely");
+            },
+            result = grpc_server => match result {
+                Ok(_) => info!("gRPC server shutdown"),
+                Err(error) => {
+                    error!(%error, "gRPC server error");
+                    res = res.and(Err(Error::ServingRpc{source: error}))
+                }
+            },
+            result = http_server => match result {
+                Ok(_) => info!("HTTP server shutdown"),
+                Err(error) => {
+                    error!(%error, "HTTP server error");
+                    res = res.and(Err(Error::ServingHttp{source: error}))
+                }
+            },
+        }
+
+        frontend_shutdown.cancel()
+    }
+    info!("frontend shutdown completed");
+
+    server_type.shutdown_background_worker();
+    if !server_worker.is_terminated() {
+        server_worker.await;
+    }
+    info!("backend shutdown completed");
+
+    res
+}
--- a/influxdb_iox/src/influxdb_ioxd/http/heappy.rs
+++ b/influxdb_iox/src/influxdb_ioxd/http/heappy.rs
--- a/influxdb_iox/src/influxdb_ioxd/http/metrics.rs
+++ b/influxdb_iox/src/influxdb_ioxd/http/metrics.rs
@ -1,5 +1,5 @@
 use hashbrown::HashMap;
-use metric::{Attributes, Metric, U64Counter};
+use metric::{Attributes, Metric, U64Counter, U64Histogram, U64HistogramOptions};
 use parking_lot::{MappedMutexGuard, Mutex, MutexGuard};

 /// Line protocol ingest metrics
@ -14,6 +14,9 @@ pub struct LineProtocolMetrics {
    /// The number of LP bytes ingested
    ingest_bytes: Metric<U64Counter>,

+    /// Distribution of LP batch sizes.
+    ingest_batch_size_bytes: Metric<U64Histogram>,
+
    /// Database metrics keyed by database name
    databases: Mutex<HashMap<String, LineProtocolDatabaseMetrics>>,
 }
@ -38,6 +41,12 @@ struct LineProtocolDatabaseMetrics {

    /// The number of LP bytes ingested unsuccessfully
    ingest_bytes_error: U64Counter,
+
+    /// Distribution of LP batch sizes ingested successfully
+    ingest_batch_size_bytes_ok: U64Histogram,
+
+    /// Distribution of LP batch sizes ingested unsuccessfully
+    ingest_batch_size_bytes_error: U64Histogram,
 }

 impl LineProtocolMetrics {
@ -47,6 +56,28 @@ impl LineProtocolMetrics {
            ingest_fields: registry
                .register_metric("ingest_fields", "total LP field values ingested"),
            ingest_bytes: registry.register_metric("ingest_bytes", "total LP bytes ingested"),
+            ingest_batch_size_bytes: registry.register_metric_with_options(
+                "ingest_batch_size_bytes",
+                "distribution of ingested LP batch sizes",
+                || {
+                    U64HistogramOptions::new([
+                        1024,
+                        16 * 1024,
+                        32 * 1024,
+                        128 * 1024,
+                        256 * 1024,
+                        512 * 1024,
+                        768 * 1024,
+                        1024 * 1024,
+                        4 * 1024 * 1024,
+                        8 * 1024 * 1024,
+                        16 * 1024 * 1024,
+                        24 * 1024 * 1024,
+                        32 * 1024 * 1024,
+                        u64::MAX,
+                    ])
+                },
+            ),
            databases: Default::default(),
        }
    }
@ -66,11 +97,13 @@ impl LineProtocolMetrics {
                metrics.ingest_lines_ok.inc(lines as u64);
                metrics.ingest_fields_ok.inc(fields as u64);
                metrics.ingest_bytes_ok.inc(bytes as u64);
+                metrics.ingest_batch_size_bytes_ok.record(bytes as u64);
            }
            false => {
                metrics.ingest_lines_error.inc(lines as u64);
                metrics.ingest_fields_error.inc(fields as u64);
                metrics.ingest_bytes_error.inc(bytes as u64);
+                metrics.ingest_batch_size_bytes_error.record(bytes as u64);
            }
        }
    }
@ -97,11 +130,15 @@ impl LineProtocolDatabaseMetrics {
        let ingest_lines_ok = metrics.ingest_lines.recorder(attributes.clone());
        let ingest_fields_ok = metrics.ingest_fields.recorder(attributes.clone());
        let ingest_bytes_ok = metrics.ingest_bytes.recorder(attributes.clone());
+        let ingest_batch_size_bytes_ok =
+            metrics.ingest_batch_size_bytes.recorder(attributes.clone());

        attributes.insert("status", "error");
        let ingest_lines_error = metrics.ingest_lines.recorder(attributes.clone());
        let ingest_fields_error = metrics.ingest_fields.recorder(attributes.clone());
        let ingest_bytes_error = metrics.ingest_bytes.recorder(attributes.clone());
+        let ingest_batch_size_bytes_error =
+            metrics.ingest_batch_size_bytes.recorder(attributes.clone());

        Self {
            ingest_lines_ok,
@ -110,6 +147,8 @@ impl LineProtocolDatabaseMetrics {
            ingest_fields_error,
            ingest_bytes_ok,
            ingest_bytes_error,
+            ingest_batch_size_bytes_ok,
+            ingest_batch_size_bytes_error,
        }
    }
 }
--- a/influxdb_iox/src/influxdb_ioxd/http/mod.rs
+++ b/influxdb_iox/src/influxdb_ioxd/http/mod.rs
@ -0,0 +1,318 @@
+use std::{convert::Infallible, num::NonZeroI32, sync::Arc};
+
+use hyper::{
+    http::HeaderValue,
+    server::conn::{AddrIncoming, AddrStream},
+    Body, Method, Request, Response,
+};
+use observability_deps::tracing::{debug, error};
+use serde::Deserialize;
+use snafu::{ResultExt, Snafu};
+use tokio_util::sync::CancellationToken;
+use tower::Layer;
+use trace_http::{ctx::TraceHeaderParser, tower::TraceLayer};
+
+use crate::influxdb_ioxd::server_type::{RouteError, ServerType};
+
+#[cfg(feature = "heappy")]
+mod heappy;
+
+#[cfg(feature = "pprof")]
+mod pprof;
+
+pub mod metrics;
+
+#[cfg(test)]
+pub mod test_utils;
+
+#[allow(clippy::large_enum_variant)]
+#[derive(Debug, Snafu)]
+pub enum ApplicationError {
+    /// Error for when we could not parse the http query uri (e.g.
+    /// `?foo=bar&bar=baz)`
+    #[snafu(display("Invalid query string in HTTP URI '{}': {}", query_string, source))]
+    InvalidQueryString {
+        query_string: String,
+        source: serde_urlencoded::de::Error,
+    },
+
+    #[snafu(display("PProf error: {}", source))]
+    PProf {
+        source: Box<dyn std::error::Error + Send + Sync>,
+    },
+
+    #[cfg(feature = "heappy")]
+    #[snafu(display("Heappy error: {}", source))]
+    HeappyError { source: heappy::Error },
+
+    #[snafu(display("Protobuf error: {}", source))]
+    Prost { source: prost::EncodeError },
+
+    #[snafu(display("Protobuf error: {}", source))]
+    ProstIO { source: std::io::Error },
+
+    #[snafu(display("Empty flamegraph"))]
+    EmptyFlamegraph,
+
+    #[snafu(display("heappy support is not compiled"))]
+    HeappyIsNotCompiled,
+
+    #[snafu(display("pprof support is not compiled"))]
+    PProfIsNotCompiled,
+
+    #[snafu(display("Route error from run mode: {}", source))]
+    RunModeRouteError { source: Box<dyn RouteError> },
+}
+
+impl RouteError for ApplicationError {
+    fn response(&self) -> Response<Body> {
+        match self {
+            Self::InvalidQueryString { .. } => self.bad_request(),
+            Self::PProf { .. } => self.internal_error(),
+            Self::Prost { .. } => self.internal_error(),
+            Self::ProstIO { .. } => self.internal_error(),
+            Self::EmptyFlamegraph => self.no_content(),
+            Self::HeappyIsNotCompiled => self.internal_error(),
+            Self::PProfIsNotCompiled => self.internal_error(),
+            #[cfg(feature = "heappy")]
+            Self::HeappyError { .. } => self.internal_error(),
+            Self::RunModeRouteError { source } => source.response(),
+        }
+    }
+}
+
+pub async fn serve<M>(
+    addr: AddrIncoming,
+    server_type: Arc<M>,
+    shutdown: CancellationToken,
+    trace_header_parser: TraceHeaderParser,
+) -> Result<(), hyper::Error>
+where
+    M: ServerType,
+{
+    let metric_registry = server_type.metric_registry();
+    let trace_collector = server_type.trace_collector();
+
+    let trace_layer = TraceLayer::new(trace_header_parser, metric_registry, trace_collector, false);
+
+    hyper::Server::builder(addr)
+        .serve(hyper::service::make_service_fn(|_conn: &AddrStream| {
+            let server_type = Arc::clone(&server_type);
+            let service = hyper::service::service_fn(move |request: Request<_>| {
+                route_request(Arc::clone(&server_type), request)
+            });
+
+            let service = trace_layer.layer(service);
+            futures::future::ready(Ok::<_, Infallible>(service))
+        }))
+        .with_graceful_shutdown(shutdown.cancelled())
+        .await
+}
+
+async fn route_request<M>(
+    server_type: Arc<M>,
+    mut req: Request<Body>,
+) -> Result<Response<Body>, Infallible>
+where
+    M: ServerType,
+{
+    // we don't need the authorization header and we don't want to accidentally log it.
+    req.headers_mut().remove("authorization");
+    debug!(request = ?req,"Processing request");
+
+    let method = req.method().clone();
+    let uri = req.uri().clone();
+    let content_length = req.headers().get("content-length").cloned();
+
+    let response = match (method.clone(), uri.path()) {
+        (Method::GET, "/health") => health(),
+        (Method::GET, "/metrics") => handle_metrics(server_type.as_ref()),
+        (Method::GET, "/debug/pprof") => pprof_home(req).await,
+        (Method::GET, "/debug/pprof/profile") => pprof_profile(req).await,
+        (Method::GET, "/debug/pprof/allocs") => pprof_heappy_profile(req).await,
+        _ => server_type
+            .route_http_request(req)
+            .await
+            .map_err(|e| Box::new(e) as _)
+            .context(RunModeRouteError),
+    };
+
+    // TODO: Move logging to TraceLayer
+    match response {
+        Ok(response) => {
+            debug!(?response, "Successfully processed request");
+            Ok(response)
+        }
+        Err(error) => {
+            error!(%error, %method, %uri, ?content_length, "Error while handling request");
+            Ok(error.response())
+        }
+    }
+}
+
+fn health() -> Result<Response<Body>, ApplicationError> {
+    let response_body = "OK";
+    Ok(Response::new(Body::from(response_body.to_string())))
+}
+
+fn handle_metrics<M>(server_type: &M) -> Result<Response<Body>, ApplicationError>
+where
+    M: ServerType,
+{
+    let mut body: Vec<u8> = Default::default();
+    let mut reporter = metric_exporters::PrometheusTextEncoder::new(&mut body);
+    server_type.metric_registry().report(&mut reporter);
+
+    Ok(Response::new(Body::from(body)))
+}
+
+async fn pprof_home(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
+    let default_host = HeaderValue::from_static("localhost");
+    let host = req
+        .headers()
+        .get("host")
+        .unwrap_or(&default_host)
+        .to_str()
+        .unwrap_or_default();
+    let profile_cmd = format!(
+        "/debug/pprof/profile?seconds={}",
+        PProfArgs::default_seconds()
+    );
+    let allocs_cmd = format!(
+        "/debug/pprof/allocs?seconds={}",
+        PProfAllocsArgs::default_seconds()
+    );
+    Ok(Response::new(Body::from(format!(
+        r#"<a href="{}">http://{}{}</a><br><a href="{}">http://{}{}</a>"#,
+        profile_cmd, host, profile_cmd, allocs_cmd, host, allocs_cmd,
+    ))))
+}
+
+#[derive(Debug, Deserialize)]
+struct PProfArgs {
+    #[serde(default = "PProfArgs::default_seconds")]
+    seconds: u64,
+    #[serde(default = "PProfArgs::default_frequency")]
+    frequency: NonZeroI32,
+}
+
+impl PProfArgs {
+    fn default_seconds() -> u64 {
+        30
+    }
+
+    // 99Hz to avoid coinciding with special periods
+    fn default_frequency() -> NonZeroI32 {
+        NonZeroI32::new(99).unwrap()
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct PProfAllocsArgs {
+    #[serde(default = "PProfAllocsArgs::default_seconds")]
+    seconds: u64,
+    // The sampling interval is a number of bytes that have to cumulatively allocated for a sample to be taken.
+    //
+    // For example if the sampling interval is 99, and you're doing a million of 40 bytes allocations,
+    // the allocations profile will account for 16MB instead of 40MB.
+    // Heappy will adjust the estimate for sampled recordings, but now that feature is not yet implemented.
+    #[serde(default = "PProfAllocsArgs::default_interval")]
+    interval: NonZeroI32,
+}
+
+impl PProfAllocsArgs {
+    fn default_seconds() -> u64 {
+        30
+    }
+
+    // 1 means: sample every allocation.
+    fn default_interval() -> NonZeroI32 {
+        NonZeroI32::new(1).unwrap()
+    }
+}
+
+#[cfg(feature = "pprof")]
+async fn pprof_profile(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
+    use ::pprof::protos::Message;
+    let query_string = req.uri().query().unwrap_or_default();
+    let query: PProfArgs =
+        serde_urlencoded::from_str(query_string).context(InvalidQueryString { query_string })?;
+
+    let report = self::pprof::dump_rsprof(query.seconds, query.frequency.get())
+        .await
+        .map_err(|e| Box::new(e) as _)
+        .context(PProf)?;
+
+    let mut body: Vec<u8> = Vec::new();
+
+    // render flamegraph when opening in the browser
+    // otherwise render as protobuf; works great with: go tool pprof http://..../debug/pprof/profile
+    if req
+        .headers()
+        .get_all("Accept")
+        .iter()
+        .flat_map(|i| i.to_str().unwrap_or_default().split(','))
+        .any(|i| i == "text/html" || i == "image/svg+xml")
+    {
+        report
+            .flamegraph(&mut body)
+            .map_err(|e| Box::new(e) as _)
+            .context(PProf)?;
+        if body.is_empty() {
+            return EmptyFlamegraph.fail();
+        }
+    } else {
+        let profile = report
+            .pprof()
+            .map_err(|e| Box::new(e) as _)
+            .context(PProf)?;
+        profile.encode(&mut body).context(Prost)?;
+    }
+
+    Ok(Response::new(Body::from(body)))
+}
+
+#[cfg(not(feature = "pprof"))]
+async fn pprof_profile(_req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
+    PProfIsNotCompiled {}.fail()
+}
+
+// If heappy support is enabled, call it
+#[cfg(feature = "heappy")]
+async fn pprof_heappy_profile(req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
+    let query_string = req.uri().query().unwrap_or_default();
+    let query: PProfAllocsArgs =
+        serde_urlencoded::from_str(query_string).context(InvalidQueryString { query_string })?;
+
+    let report = self::heappy::dump_heappy_rsprof(query.seconds, query.interval.get())
+        .await
+        .context(HeappyError)?;
+
+    let mut body: Vec<u8> = Vec::new();
+
+    // render flamegraph when opening in the browser
+    // otherwise render as protobuf;
+    // works great with: go tool pprof http://..../debug/pprof/allocs
+    if req
+        .headers()
+        .get_all("Accept")
+        .iter()
+        .flat_map(|i| i.to_str().unwrap_or_default().split(','))
+        .any(|i| i == "text/html" || i == "image/svg+xml")
+    {
+        report.flamegraph(&mut body);
+        if body.is_empty() {
+            return EmptyFlamegraph.fail();
+        }
+    } else {
+        report.write_pprof(&mut body).context(ProstIO)?
+    }
+
+    Ok(Response::new(Body::from(body)))
+}
+
+//  Return error if heappy not enabled
+#[cfg(not(feature = "heappy"))]
+async fn pprof_heappy_profile(_req: Request<Body>) -> Result<Response<Body>, ApplicationError> {
+    HeappyIsNotCompiled {}.fail()
+}
--- a/influxdb_iox/src/influxdb_ioxd/http/pprof.rs
+++ b/influxdb_iox/src/influxdb_ioxd/http/pprof.rs
--- a/influxdb_iox/src/influxdb_ioxd/http/test_utils.rs
+++ b/influxdb_iox/src/influxdb_ioxd/http/test_utils.rs
@ -0,0 +1,144 @@
+use std::{
+    fmt::Debug,
+    net::{IpAddr, Ipv4Addr, SocketAddr},
+    sync::Arc,
+};
+
+use http::header::CONTENT_TYPE;
+use hyper::{server::conn::AddrIncoming, StatusCode};
+use serde::de::DeserializeOwned;
+use tokio::task::JoinHandle;
+use tokio_util::sync::CancellationToken;
+
+use crate::influxdb_ioxd::{http::serve, server_type::ServerType};
+
+/// checks a http response against expected results
+pub async fn check_response(
+    description: &str,
+    response: Result<reqwest::Response, reqwest::Error>,
+    expected_status: StatusCode,
+    expected_body: Option<&str>,
+) {
+    // Print the response so if the test fails, we have a log of
+    // what went wrong
+    println!("{} response: {:?}", description, response);
+
+    if let Ok(response) = response {
+        let status = response.status();
+        let body = response
+            .text()
+            .await
+            .expect("Converting request body to string");
+
+        assert_eq!(status, expected_status);
+        if let Some(expected_body) = expected_body {
+            assert!(
+                body.contains(expected_body),
+                "Could not find expected in body.\n\nExpected:\n{}\n\nBody:\n{}",
+                expected_body,
+                body
+            );
+        }
+    } else {
+        panic!("Unexpected error response: {:?}", response);
+    }
+}
+
+#[allow(dead_code)]
+pub async fn check_json_response<T: DeserializeOwned + Eq + Debug>(
+    client: &reqwest::Client,
+    url: &str,
+    expected_status: StatusCode,
+) -> T {
+    let response = client.get(url).send().await;
+
+    // Print the response so if the test fails, we have a log of
+    // what went wrong
+    println!("{} response: {:?}", url, response);
+
+    if let Ok(response) = response {
+        let status = response.status();
+        let body: T = response
+            .json()
+            .await
+            .expect("Converting request body to string");
+
+        assert_eq!(status, expected_status);
+        body
+    } else {
+        panic!("Unexpected error response: {:?}", response);
+    }
+}
+
+pub fn get_content_type(response: &Result<reqwest::Response, reqwest::Error>) -> String {
+    if let Ok(response) = response {
+        response
+            .headers()
+            .get(CONTENT_TYPE)
+            .map(|v| v.to_str().unwrap())
+            .unwrap_or("")
+            .to_string()
+    } else {
+        "".to_string()
+    }
+}
+
+pub struct TestServer<M>
+where
+    M: ServerType,
+{
+    join_handle: JoinHandle<()>,
+    url: String,
+    server_type: Arc<M>,
+}
+
+impl<M> TestServer<M>
+where
+    M: ServerType,
+{
+    pub fn new(server_type: Arc<M>) -> Self {
+        // NB: specify port 0 to let the OS pick the port.
+        let bind_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 0);
+        let addr = AddrIncoming::bind(&bind_addr).expect("failed to bind server");
+        let url = format!("http://{}", addr.local_addr());
+
+        let trace_header_parser = trace_http::ctx::TraceHeaderParser::new()
+            .with_jaeger_trace_context_header_name("uber-trace-id");
+
+        let server_type_captured = Arc::clone(&server_type);
+        let join_handle = tokio::task::spawn(async {
+            serve(
+                addr,
+                server_type_captured,
+                CancellationToken::new(),
+                trace_header_parser,
+            )
+            .await
+            .unwrap();
+        });
+        println!("Started server at {}", url);
+
+        Self {
+            join_handle,
+            url,
+            server_type,
+        }
+    }
+
+    pub fn url(&self) -> &str {
+        &self.url
+    }
+
+    pub fn server_type(&self) -> &Arc<M> {
+        &self.server_type
+    }
+}
+
+impl<M> Drop for TestServer<M>
+where
+    M: ServerType,
+{
+    fn drop(&mut self) {
+        self.join_handle.abort();
+    }
+}
--- a/influxdb_iox/src/influxdb_ioxd/jemalloc.rs
+++ b/influxdb_iox/src/influxdb_ioxd/jemalloc.rs
--- a/influxdb_iox/src/influxdb_ioxd/planner.rs
+++ b/influxdb_iox/src/influxdb_ioxd/planner.rs
--- a/influxdb_iox/src/influxdb_ioxd/rpc.rs
+++ b/influxdb_iox/src/influxdb_ioxd/rpc.rs
@ -0,0 +1,195 @@
+use std::sync::Arc;
+
+use tokio::net::TcpListener;
+use tokio_util::sync::CancellationToken;
+use tonic::transport::NamedService;
+use tonic_health::server::HealthReporter;
+use trace_http::ctx::TraceHeaderParser;
+
+use crate::influxdb_ioxd::{
+    server_type::{RpcError, ServerType},
+    serving_readiness::ServingReadiness,
+};
+
+pub mod error;
+pub(crate) mod testing;
+
+/// Returns the name of the gRPC service S.
+pub fn service_name<S: NamedService>(_: &S) -> &'static str {
+    S::NAME
+}
+
+#[derive(Debug)]
+pub struct RpcBuilderInput {
+    pub socket: TcpListener,
+    pub trace_header_parser: TraceHeaderParser,
+    pub shutdown: CancellationToken,
+    pub serving_readiness: ServingReadiness,
+}
+
+#[derive(Debug)]
+pub struct RpcBuilder<T> {
+    pub inner: T,
+    pub health_reporter: HealthReporter,
+    pub shutdown: CancellationToken,
+    pub socket: TcpListener,
+    pub serving_readiness: ServingReadiness,
+}
+
+/// Adds a gRPC service to the builder, and registers it with the
+/// health reporter
+macro_rules! add_service {
+    ($builder:ident, $svc:expr) => {
+        let $builder = {
+            // `inner` might be required to be `mut` or not depending if we're acting on:
+            // - a `Server`, no service added yet, no `mut` required
+            // - a `Router`, some service was added already, `mut` required
+            #[allow(unused_mut)]
+            {
+                use $crate::influxdb_ioxd::rpc::{service_name, RpcBuilder};
+
+                let RpcBuilder {
+                    mut inner,
+                    mut health_reporter,
+                    shutdown,
+                    socket,
+                    serving_readiness,
+                } = $builder;
+                let service = $svc;
+
+                let status = tonic_health::ServingStatus::Serving;
+                health_reporter
+                    .set_service_status(service_name(&service), status)
+                    .await;
+
+                let inner = inner.add_service(service);
+
+                RpcBuilder {
+                    inner,
+                    health_reporter,
+                    shutdown,
+                    socket,
+                    serving_readiness,
+                }
+            }
+        };
+    };
+}
+
+pub(crate) use add_service;
+
+/// Adds a gRPC service to the builder gated behind the serving
+/// readiness check, and registers it with the health reporter
+macro_rules! add_gated_service {
+    ($builder:ident, $svc:expr) => {
+        let $builder = {
+            let service = $svc;
+
+            let interceptor = $builder.serving_readiness.clone().into_interceptor();
+            let service = tonic::codegen::InterceptedService::new(service, interceptor);
+
+            add_service!($builder, service);
+
+            $builder
+        };
+    };
+}
+
+pub(crate) use add_gated_service;
+
+/// Creates a [`RpcBuilder`] from [`RpcBuilderInput`].
+///
+/// The resulting builder can be used w/ [`add_service`] and [`add_gated_service`]. After adding all services it should
+/// be used w/ [`serve_builder`].
+macro_rules! setup_builder {
+    ($input:ident, $server_type:ident) => {{
+        use $crate::influxdb_ioxd::{
+            rpc::{add_service, testing, RpcBuilder},
+            server_type::ServerType,
+        };
+
+        let RpcBuilderInput {
+            socket,
+            trace_header_parser,
+            shutdown,
+            serving_readiness,
+        } = $input;
+
+        let (health_reporter, health_service) = tonic_health::server::health_reporter();
+        let reflection_service = tonic_reflection::server::Builder::configure()
+            .register_encoded_file_descriptor_set(generated_types::FILE_DESCRIPTOR_SET)
+            .build()
+            .expect("gRPC reflection data broken");
+
+        let builder = tonic::transport::Server::builder();
+        let builder = builder.layer(trace_http::tower::TraceLayer::new(
+            trace_header_parser,
+            $server_type.metric_registry(),
+            $server_type.trace_collector(),
+            true,
+        ));
+
+        let builder = RpcBuilder {
+            inner: builder,
+            health_reporter,
+            shutdown,
+            socket,
+            serving_readiness,
+        };
+
+        // important that this one is NOT gated so that it can answer health requests
+        add_service!(builder, health_service);
+        add_service!(builder, reflection_service);
+        add_service!(builder, testing::make_server());
+
+        builder
+    }};
+}
+
+pub(crate) use setup_builder;
+
+/// Serve a server constructed using [`RpcBuilder`].
+macro_rules! serve_builder {
+    ($builder:ident) => {{
+        use tokio_stream::wrappers::TcpListenerStream;
+        use $crate::influxdb_ioxd::rpc::RpcBuilder;
+
+        let RpcBuilder {
+            inner,
+            shutdown,
+            socket,
+            ..
+        } = $builder;
+
+        let stream = TcpListenerStream::new(socket);
+        inner
+            .serve_with_incoming_shutdown(stream, shutdown.cancelled())
+            .await?;
+    }};
+}
+
+pub(crate) use serve_builder;
+
+/// Instantiate a server listening on the specified address
+/// implementing the IOx, Storage, and Flight gRPC interfaces, the
+/// underlying hyper server instance. Resolves when the server has
+/// shutdown.
+pub async fn serve<T>(
+    socket: TcpListener,
+    server_type: Arc<T>,
+    trace_header_parser: TraceHeaderParser,
+    shutdown: CancellationToken,
+    serving_readiness: ServingReadiness,
+) -> Result<(), RpcError>
+where
+    T: ServerType,
+{
+    let builder_input = RpcBuilderInput {
+        socket,
+        trace_header_parser,
+        shutdown,
+        serving_readiness,
+    };
+
+    server_type.server_grpc(builder_input).await
+}
--- a/influxdb_iox/src/influxdb_ioxd/rpc/error.rs
+++ b/influxdb_iox/src/influxdb_ioxd/rpc/error.rs
@ -67,9 +67,7 @@ pub fn default_server_error_handler(error: server::Error) -> tonic::Status {
        Error::DatabaseInit { source } => {
            tonic::Status::invalid_argument(format!("Cannot initialize database: {}", source))
        }
-        e @ Error::StoreSequencedEntryFailures { .. } => {
-            tonic::Status::invalid_argument(e.to_string())
-        }
+        e @ Error::StoreWriteErrors { .. } => tonic::Status::invalid_argument(e.to_string()),
        error => {
            error!(?error, "Unexpected error");
            InternalError {}.into()
@ -130,18 +128,11 @@ pub fn default_database_error_handler(error: server::database::Error) -> tonic::
            error!(%source, "Unexpected error deleting database");
            InternalError {}.into()
        }
-        Error::NoActiveDatabaseToDelete { db_name } => NotFound {
-            resource_type: "database".to_string(),
-            resource_name: db_name,
-            ..Default::default()
-        }
-        .into(),
-        Error::CannotRestoreActiveDatabase { .. } => {
+        Error::CannotDeleteInactiveDatabase { .. } => {
            tonic::Status::failed_precondition(error.to_string())
        }
-        Error::CannotRestoreDatabaseInObjectStorage { source } => {
-            error!(%source, "Unexpected error restoring database");
-            InternalError {}.into()
+        Error::CannotRestoreActiveDatabase { .. } => {
+            tonic::Status::failed_precondition(error.to_string())
        }
    }
 }
--- a/influxdb_iox/src/influxdb_ioxd/rpc/testing.rs
+++ b/influxdb_iox/src/influxdb_ioxd/rpc/testing.rs
--- a/influxdb_iox/src/influxdb_ioxd/server_type/common_state.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/common_state.rs
@ -0,0 +1,64 @@
+use std::sync::Arc;
+
+use snafu::{ResultExt, Snafu};
+use trace::TraceCollector;
+
+use crate::{
+    influxdb_ioxd::serving_readiness::ServingReadiness, structopt_blocks::run_config::RunConfig,
+};
+
+#[derive(Debug, Snafu)]
+pub enum CommonServerStateError {
+    #[snafu(display("Cannot create tracing pipeline: {}", source))]
+    Tracing { source: trace_exporters::Error },
+}
+
+/// Common state used by all server types (e.g. `Database` and `Router`)
+#[derive(Debug)]
+pub struct CommonServerState {
+    run_config: RunConfig,
+    serving_readiness: ServingReadiness,
+    trace_exporter: Option<Arc<trace_exporters::export::AsyncExporter>>,
+}
+
+impl CommonServerState {
+    pub fn from_config(run_config: RunConfig) -> Result<Self, CommonServerStateError> {
+        let serving_readiness = run_config.initial_serving_state.clone().into();
+        let trace_exporter = run_config.tracing_config.build().context(Tracing)?;
+
+        Ok(Self {
+            run_config,
+            serving_readiness,
+            trace_exporter,
+        })
+    }
+
+    #[cfg(test)]
+    pub fn for_testing() -> Self {
+        use structopt::StructOpt;
+
+        Self::from_config(
+            RunConfig::from_iter_safe(["not_used".to_string()].into_iter())
+                .expect("default parsing should work"),
+        )
+        .expect("default configs should work")
+    }
+
+    pub fn run_config(&self) -> &RunConfig {
+        &self.run_config
+    }
+
+    pub fn serving_readiness(&self) -> &ServingReadiness {
+        &self.serving_readiness
+    }
+
+    pub fn trace_exporter(&self) -> Option<Arc<trace_exporters::export::AsyncExporter>> {
+        self.trace_exporter.clone()
+    }
+
+    pub fn trace_collector(&self) -> Option<Arc<dyn TraceCollector>> {
+        self.trace_exporter
+            .clone()
+            .map(|x| -> Arc<dyn TraceCollector> { x })
+    }
+}
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/http.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/http.rs
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/mod.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/mod.rs
@ -1,389 +1,140 @@
-use crate::{
-    commands::run::Config,
-    object_store::{check_object_store, warn_about_inmem_store},
-};
-use futures::{future::FusedFuture, pin_mut, FutureExt};
-use hyper::server::conn::AddrIncoming;
-use object_store::{self, ObjectStore};
-use observability_deps::tracing::{error, info, warn};
-use panic_logging::SendPanicsToTracing;
-use server::{
-    connection::ConnectionManagerImpl as ConnectionManager, ApplicationState, RemoteTemplate,
-    Server as AppServer, ServerConfig,
-};
-use snafu::{ResultExt, Snafu};
-use std::{convert::TryFrom, net::SocketAddr, sync::Arc};
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use futures::{future::FusedFuture, FutureExt};
+use hyper::{Body, Request, Response};
+use metric::Registry;
+use observability_deps::tracing::{error, info};
+use server::{connection::ConnectionManager, ApplicationState, Server};
+use tokio_util::sync::CancellationToken;
 use trace::TraceCollector;
-use trace_http::ctx::TraceHeaderParser;
+
+use crate::influxdb_ioxd::{
+    http::metrics::LineProtocolMetrics,
+    rpc::RpcBuilderInput,
+    server_type::{RpcError, ServerType},
+    serving_readiness::ServingReadiness,
+};

 mod http;
-mod jemalloc;
-mod planner;
 mod rpc;
-pub(crate) mod serving_readiness;
+pub mod setup;

-#[derive(Debug, Snafu)]
-pub enum Error {
-    #[snafu(display("Unable to bind to listen for HTTP requests on {}: {}", addr, source))]
-    StartListeningHttp {
-        addr: SocketAddr,
-        source: hyper::Error,
-    },
+pub use self::http::ApplicationError;

-    #[snafu(display("Unable to bind to listen for gRPC requests on {}: {}", addr, source))]
-    StartListeningGrpc {
-        addr: SocketAddr,
-        source: std::io::Error,
-    },
+use super::common_state::CommonServerState;

-    #[snafu(display("Error serving HTTP: {}", source))]
-    ServingHttp { source: hyper::Error },
-
-    #[snafu(display("Error serving RPC: {}", source))]
-    ServingRpc { source: rpc::Error },
-
-    #[snafu(display("Cannot parse object store config: {}", source))]
-    ObjectStoreParsing {
-        source: crate::object_store::ParseError,
-    },
-
-    #[snafu(display("Cannot check object store config: {}", source))]
-    ObjectStoreCheck {
-        source: crate::object_store::CheckError,
-    },
-
-    #[snafu(display("Cannot create tracing pipeline: {}", source))]
-    Tracing { source: trace_exporters::Error },
+#[derive(Debug)]
+pub struct DatabaseServerType<M>
+where
+    M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
+{
+    pub application: Arc<ApplicationState>,
+    pub server: Arc<Server<M>>,
+    pub lp_metrics: Arc<LineProtocolMetrics>,
+    pub max_request_size: usize,
+    pub serving_readiness: ServingReadiness,
+    shutdown: CancellationToken,
 }

-pub type Result<T, E = Error> = std::result::Result<T, E>;
+impl<M> DatabaseServerType<M>
+where
+    M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
+{
+    pub fn new(
+        application: Arc<ApplicationState>,
+        server: Arc<Server<M>>,
+        common_state: &CommonServerState,
+    ) -> Self {
+        let lp_metrics = Arc::new(LineProtocolMetrics::new(
+            application.metric_registry().as_ref(),
+        ));

-/// On unix platforms we want to intercept SIGINT and SIGTERM
-/// This method returns if either are signalled
-#[cfg(unix)]
-async fn wait_for_signal() {
-    use tokio::signal::unix::{signal, SignalKind};
-    let mut term = signal(SignalKind::terminate()).expect("failed to register signal handler");
-    let mut int = signal(SignalKind::interrupt()).expect("failed to register signal handler");
-
-    tokio::select! {
-        _ = term.recv() => info!("Received SIGTERM"),
-        _ = int.recv() => info!("Received SIGINT"),
-    }
-}
-
-#[cfg(windows)]
-/// ctrl_c is the cross-platform way to intercept the equivalent of SIGINT
-/// This method returns if this occurs
-async fn wait_for_signal() {
-    let _ = tokio::signal::ctrl_c().await;
-}
-
-async fn make_application(config: &Config) -> Result<Arc<ApplicationState>> {
-    warn_about_inmem_store(&config.object_store_config);
-    let object_store =
-        ObjectStore::try_from(&config.object_store_config).context(ObjectStoreParsing)?;
-    check_object_store(&object_store)
-        .await
-        .context(ObjectStoreCheck)?;
-    let object_storage = Arc::new(object_store);
-
-    Ok(Arc::new(ApplicationState::new(
-        object_storage,
-        config.num_worker_threads,
-    )))
-}
-
-fn make_server(
-    application: Arc<ApplicationState>,
-    config: &Config,
-) -> Arc<AppServer<ConnectionManager>> {
-    let server_config = ServerConfig {
-        remote_template: config.remote_template.clone().map(RemoteTemplate::new),
-        wipe_catalog_on_error: config.wipe_catalog_on_error.into(),
-        skip_replay_and_seek_instead: config.skip_replay_and_seek_instead.into(),
-    };
-
-    if config.grpc_bind_address == config.http_bind_address && config.grpc_bind_address.port() != 0
-    {
-        error!(
-            %config.grpc_bind_address,
-            %config.http_bind_address,
-            "grpc and http bind addresses must differ",
-        );
-        std::process::exit(1);
-    }
-
-    let connection_manager = ConnectionManager::new();
-    let app_server = Arc::new(AppServer::new(
-        connection_manager,
-        application,
-        server_config,
-    ));
-
-    // if this ID isn't set the server won't be usable until this is set via an API
-    // call
-    if let Some(id) = config.server_id_config.server_id {
-        app_server.set_id(id).expect("server id already set");
-    } else {
-        warn!("server ID not set. ID must be set via the INFLUXDB_IOX_ID config or API before writing or querying data.");
-    }
-
-    app_server
-}
-
-#[cfg(all(not(feature = "heappy"), not(feature = "jemalloc_replacing_malloc")))]
-fn build_malloc_conf() -> String {
-    "system".to_string()
-}
-
-#[cfg(all(feature = "heappy", not(feature = "jemalloc_replacing_malloc")))]
-fn build_malloc_conf() -> String {
-    "heappy".to_string()
-}
-
-#[cfg(all(not(feature = "heappy"), feature = "jemalloc_replacing_malloc"))]
-fn build_malloc_conf() -> String {
-    tikv_jemalloc_ctl::config::malloc_conf::mib()
-        .unwrap()
-        .read()
-        .unwrap()
-        .to_string()
-}
-
-#[cfg(all(feature = "heappy", feature = "jemalloc_replacing_malloc"))]
-fn build_malloc_conf() -> String {
-    compile_error!("must use exactly one memory allocator")
-}
-
-/// This is the entry point for the IOx server. `config` represents
-/// command line arguments, if any.
-pub async fn main(config: Config) -> Result<()> {
-    let git_hash = option_env!("GIT_HASH").unwrap_or("UNKNOWN");
-    let num_cpus = num_cpus::get();
-    let build_malloc_conf = build_malloc_conf();
-    info!(
-        git_hash,
-        num_cpus,
-        %build_malloc_conf,
-        "InfluxDB IOx server starting",
-    );
-
-    // Install custom panic handler and forget about it.
-    //
-    // This leaks the handler and prevents it from ever being dropped during the
-    // lifetime of the program - this is actually a good thing, as it prevents
-    // the panic handler from being removed while unwinding a panic (which in
-    // turn, causes a panic - see #548)
-    let f = SendPanicsToTracing::new();
-    std::mem::forget(f);
-
-    let application = make_application(&config).await?;
-
-    // Register jemalloc metrics
-    application
-        .metric_registry()
-        .register_instrument("jemalloc_metrics", jemalloc::JemallocMetrics::new);
-
-    let app_server = make_server(Arc::clone(&application), &config);
-
-    let grpc_listener = grpc_listener(config.grpc_bind_address).await?;
-    let http_listener = http_listener(config.http_bind_address).await?;
-    let async_exporter = config.tracing_config.build().context(Tracing)?;
-    let trace_collector = async_exporter
-        .clone()
-        .map(|x| -> Arc<dyn TraceCollector> { x });
-
-    let r = serve(
-        config,
-        application,
-        grpc_listener,
-        http_listener,
-        trace_collector,
-        app_server,
-    )
-    .await;
-
-    if let Some(async_exporter) = async_exporter {
-        if let Err(e) = async_exporter.drain().await {
-            error!(%e, "error draining trace exporter");
+        Self {
+            application,
+            server,
+            lp_metrics,
+            max_request_size: common_state.run_config().max_http_request_size,
+            serving_readiness: common_state.serving_readiness().clone(),
+            shutdown: CancellationToken::new(),
        }
    }
-    r
 }

-async fn grpc_listener(addr: SocketAddr) -> Result<tokio::net::TcpListener> {
-    let listener = tokio::net::TcpListener::bind(addr)
-        .await
-        .context(StartListeningGrpc { addr })?;
+#[async_trait]
+impl<M> ServerType for DatabaseServerType<M>
+where
+    M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
+{
+    type RouteError = ApplicationError;

-    match listener.local_addr() {
-        Ok(local_addr) => info!(%local_addr, "bound gRPC listener"),
-        Err(_) => info!(%addr, "bound gRPC listener"),
+    fn metric_registry(&self) -> Arc<Registry> {
+        Arc::clone(self.application.metric_registry())
    }

-    Ok(listener)
-}
+    fn trace_collector(&self) -> Option<Arc<dyn TraceCollector>> {
+        self.application.trace_collector().clone()
+    }

-async fn http_listener(addr: SocketAddr) -> Result<AddrIncoming> {
-    let listener = AddrIncoming::bind(&addr).context(StartListeningHttp { addr })?;
-    info!(bind_addr=%listener.local_addr(), "bound HTTP listener");
+    async fn route_http_request(
+        &self,
+        req: Request<Body>,
+    ) -> Result<Response<Body>, Self::RouteError> {
+        self::http::route_request(self, req).await
+    }

-    Ok(listener)
-}
+    async fn server_grpc(self: Arc<Self>, builder_input: RpcBuilderInput) -> Result<(), RpcError> {
+        self::rpc::server_grpc(self, builder_input).await
+    }

-/// Instantiates the gRPC and HTTP listeners and returns a Future that completes when
-/// these listeners, the Server, Databases, etc... have all exited.
-///
-/// This is effectively the "main loop" for influxdb_iox
-async fn serve(
-    config: Config,
-    application: Arc<ApplicationState>,
-    grpc_listener: tokio::net::TcpListener,
-    http_listener: AddrIncoming,
-    trace_collector: Option<Arc<dyn TraceCollector>>,
-    app_server: Arc<AppServer<ConnectionManager>>,
-) -> Result<()> {
-    // Construct a token to trigger shutdown of API services
-    let frontend_shutdown = tokio_util::sync::CancellationToken::new();
+    async fn background_worker(self: Arc<Self>) {
+        let server_worker = self.server.join().fuse();
+        futures::pin_mut!(server_worker);

-    let trace_header_parser = TraceHeaderParser::new()
-        .with_jaeger_trace_context_header_name(
-            config
-                .tracing_config
-                .traces_jaeger_trace_context_header_name,
-        )
-        .with_jaeger_debug_name(config.tracing_config.traces_jaeger_debug_name);
-
-    // Construct and start up gRPC server
-
-    let grpc_server = rpc::serve(
-        grpc_listener,
-        Arc::clone(&application),
-        Arc::clone(&app_server),
-        trace_header_parser.clone(),
-        trace_collector.clone(),
-        frontend_shutdown.clone(),
-        config.initial_serving_state.into(),
-    )
-    .fuse();
-
-    info!("gRPC server listening");
-
-    let max_http_request_size = config.max_http_request_size;
-
-    let http_server = http::serve(
-        http_listener,
-        Arc::clone(&application),
-        Arc::clone(&app_server),
-        frontend_shutdown.clone(),
-        max_http_request_size,
-        trace_header_parser,
-        trace_collector,
-    )
-    .fuse();
-    info!("HTTP server listening");
-
-    // Purposefully use log not tokio-tracing to ensure correctly hooked up
-    log::info!("InfluxDB IOx server ready");
-
-    // Get IOx background worker task
-    let server_worker = app_server.join().fuse();
-
-    // Shutdown signal
-    let signal = wait_for_signal().fuse();
-
-    // There are two different select macros - tokio::select and futures::select
-    //
-    // tokio::select takes ownership of the passed future "moving" it into the
-    // select block. This works well when not running select inside a loop, or
-    // when using a future that can be dropped and recreated, often the case
-    // with tokio's futures e.g. `channel.recv()`
-    //
-    // futures::select is more flexible as it doesn't take ownership of the provided
-    // future. However, to safely provide this it imposes some additional
-    // requirements
-    //
-    // All passed futures must implement FusedFuture - it is IB to poll a future
-    // that has returned Poll::Ready(_). A FusedFuture has an is_terminated()
-    // method that indicates if it is safe to poll - e.g. false if it has
-    // returned Poll::Ready(_). futures::select uses this to implement its
-    // functionality. futures::FutureExt adds a fuse() method that
-    // wraps an arbitrary future and makes it a FusedFuture
-    //
-    // The additional requirement of futures::select is that if the future passed
-    // outlives the select block, it must be Unpin or already Pinned
-
-    // pin_mut constructs a Pin<&mut T> from a T by preventing moving the T
-    // from the current stack frame and constructing a Pin<&mut T> to it
-    pin_mut!(signal);
-    pin_mut!(server_worker);
-    pin_mut!(grpc_server);
-    pin_mut!(http_server);
-
-    // Return the first error encountered
-    let mut res = Ok(());
-
-    // Graceful shutdown can be triggered by sending SIGINT or SIGTERM to the
-    // process, or by a background task exiting - most likely with an error
-    //
-    // Graceful shutdown should then proceed in the following order
-    // 1. Stop accepting new HTTP and gRPC requests and drain existing connections
-    // 2. Trigger shutdown of internal background workers loops
-    //
-    // This is important to ensure background tasks, such as polling the tracker
-    // registry, don't exit before HTTP and gRPC requests dependent on them
-    while !grpc_server.is_terminated() && !http_server.is_terminated() {
        futures::select! {
-            _ = signal => info!("Shutdown requested"),
-            _ = server_worker => {
-                info!("server worker shutdown prematurely");
-            },
-            result = grpc_server => match result {
-                Ok(_) => info!("gRPC server shutdown"),
-                Err(error) => {
-                    error!(%error, "gRPC server error");
-                    res = res.and(Err(Error::ServingRpc{source: error}))
-                }
-            },
-            result = http_server => match result {
-                Ok(_) => info!("HTTP server shutdown"),
-                Err(error) => {
-                    error!(%error, "HTTP server error");
-                    res = res.and(Err(Error::ServingHttp{source: error}))
-                }
-            },
+            _ = server_worker => {},
+            _ = self.shutdown.cancelled().fuse() => {},
        }

-        frontend_shutdown.cancel()
-    }
+        self.server.shutdown();

-    info!("frontend shutdown completed");
-    app_server.shutdown();
-
-    if !server_worker.is_terminated() {
-        match server_worker.await {
-            Ok(_) => info!("server worker shutdown"),
-            Err(error) => error!(%error, "server worker error"),
+        if !server_worker.is_terminated() {
+            match server_worker.await {
+                Ok(_) => info!("server worker shutdown"),
+                Err(error) => error!(%error, "server worker error"),
+            }
        }
+
+        info!("server completed shutting down");
+
+        self.application.join();
+        info!("shared application state completed shutting down");
    }

-    info!("server completed shutting down");
-
-    application.join();
-    info!("shared application state completed shutting down");
-
-    res
+    fn shutdown_background_worker(&self) {
+        self.server.shutdown();
+        self.application.join();
+    }
 }

 #[cfg(test)]
 mod tests {
+    use crate::{
+        commands::run::database::Config,
+        influxdb_ioxd::{
+            grpc_listener, http_listener, serve,
+            server_type::database::setup::{make_application, make_server},
+        },
+        structopt_blocks::run_config::RunConfig,
+    };
+
    use super::*;
    use ::http::{header::HeaderName, HeaderValue};
    use data_types::{database_rules::DatabaseRules, DatabaseName};
-    use influxdb_iox_client::connection::Connection;
-    use server::rules::ProvidedDatabaseRules;
-    use std::{convert::TryInto, num::NonZeroU64};
+    use futures::pin_mut;
+    use influxdb_iox_client::{connection::Connection, flight::PerformQuery};
+    use server::{connection::ConnectionManagerImpl, rules::ProvidedDatabaseRules};
+    use std::{convert::TryInto, net::SocketAddr, num::NonZeroU64};
    use structopt::StructOpt;
    use tokio::task::JoinHandle;
    use trace::{
@ -400,28 +151,28 @@ mod tests {
            "--grpc-bind",
            "127.0.0.1:0",
        ]);
-        config.server_id_config.server_id = server_id.map(|x| x.try_into().unwrap());
+        config.run_config.server_id_config.server_id = server_id.map(|x| x.try_into().unwrap());
        config
    }

    async fn test_serve(
-        config: Config,
+        config: RunConfig,
        application: Arc<ApplicationState>,
-        server: Arc<AppServer<ConnectionManager>>,
+        server: Arc<Server<ConnectionManagerImpl>>,
    ) {
-        let grpc_listener = grpc_listener(config.grpc_bind_address).await.unwrap();
-        let http_listener = http_listener(config.grpc_bind_address).await.unwrap();
+        let grpc_listener = grpc_listener(config.grpc_bind_address.into())
+            .await
+            .unwrap();
+        let http_listener = http_listener(config.grpc_bind_address.into())
+            .await
+            .unwrap();

-        serve(
-            config,
-            application,
-            grpc_listener,
-            http_listener,
-            None,
-            server,
-        )
-        .await
-        .unwrap()
+        let common_state = CommonServerState::from_config(config).unwrap();
+        let server_type = Arc::new(DatabaseServerType::new(application, server, &common_state));
+
+        serve(common_state, grpc_listener, http_listener, server_type)
+            .await
+            .unwrap()
    }

    #[tokio::test]
@ -430,12 +181,12 @@ mod tests {

        // Create a server and wait for it to initialize
        let config = test_config(Some(23));
-        let application = make_application(&config).await.unwrap();
+        let application = make_application(&config, None).await.unwrap();
        let server = make_server(Arc::clone(&application), &config);
        server.wait_for_init().await.unwrap();

        // Start serving
-        let serve_fut = test_serve(config, application, Arc::clone(&server)).fuse();
+        let serve_fut = test_serve(config.run_config, application, Arc::clone(&server)).fuse();
        pin_mut!(serve_fut);

        // Nothing to trigger termination, so serve future should continue running
@ -458,10 +209,10 @@ mod tests {
    async fn test_server_shutdown_uninit() {
        // Create a server but don't set a server id
        let config = test_config(None);
-        let application = make_application(&config).await.unwrap();
+        let application = make_application(&config, None).await.unwrap();
        let server = make_server(Arc::clone(&application), &config);

-        let serve_fut = test_serve(config, application, Arc::clone(&server)).fuse();
+        let serve_fut = test_serve(config.run_config, application, Arc::clone(&server)).fuse();
        pin_mut!(serve_fut);

        // Nothing should have triggered shutdown so serve shouldn't finish
@ -489,11 +240,11 @@ mod tests {
    async fn test_server_panic() {
        // Create a server and wait for it to initialize
        let config = test_config(Some(999999999));
-        let application = make_application(&config).await.unwrap();
+        let application = make_application(&config, None).await.unwrap();
        let server = make_server(Arc::clone(&application), &config);
        server.wait_for_init().await.unwrap();

-        let serve_fut = test_serve(config, application, Arc::clone(&server)).fuse();
+        let serve_fut = test_serve(config.run_config, application, Arc::clone(&server)).fuse();
        pin_mut!(serve_fut);

        // Nothing should have triggered shutdown so serve shouldn't finish
@ -516,7 +267,7 @@ mod tests {
    async fn test_database_panic() {
        // Create a server and wait for it to initialize
        let config = test_config(Some(23));
-        let application = make_application(&config).await.unwrap();
+        let application = make_application(&config, None).await.unwrap();
        let server = make_server(Arc::clone(&application), &config);
        server.wait_for_init().await.unwrap();

@ -529,7 +280,12 @@ mod tests {

        let other_db = server.database(&other_db_name).unwrap();

-        let serve_fut = test_serve(config, Arc::clone(&application), Arc::clone(&server)).fuse();
+        let serve_fut = test_serve(
+            config.run_config,
+            Arc::clone(&application),
+            Arc::clone(&server),
+        )
+        .fuse();
        pin_mut!(serve_fut);

        // Nothing should have triggered shutdown so serve shouldn't finish
@ -593,27 +349,33 @@ mod tests {
        collector: &Arc<T>,
    ) -> (
        SocketAddr,
-        Arc<AppServer<ConnectionManager>>,
-        JoinHandle<Result<()>>,
+        Arc<Server<ConnectionManagerImpl>>,
+        JoinHandle<crate::influxdb_ioxd::Result<()>>,
    ) {
        let config = test_config(Some(23));
-        let application = make_application(&config).await.unwrap();
+        let application = make_application(&config, Some(Arc::<T>::clone(collector)))
+            .await
+            .unwrap();
        let server = make_server(Arc::clone(&application), &config);
        server.wait_for_init().await.unwrap();

-        let grpc_listener = grpc_listener(config.grpc_bind_address).await.unwrap();
-        let http_listener = http_listener(config.grpc_bind_address).await.unwrap();
+        let grpc_listener = grpc_listener(config.run_config.grpc_bind_address.into())
+            .await
+            .unwrap();
+        let http_listener = http_listener(config.run_config.grpc_bind_address.into())
+            .await
+            .unwrap();

        let addr = grpc_listener.local_addr().unwrap();

-        let fut = serve(
-            config,
+        let common_state = CommonServerState::from_config(config.run_config.clone()).unwrap();
+        let server_type = Arc::new(DatabaseServerType::new(
            application,
-            grpc_listener,
-            http_listener,
-            Some(Arc::<T>::clone(collector)),
            Arc::clone(&server),
-        );
+            &common_state,
+        ));
+
+        let fut = serve(common_state, grpc_listener, http_listener, server_type);

        let join = tokio::spawn(fut);
        (addr, server, join)
@ -690,6 +452,11 @@ mod tests {
        join.await.unwrap().unwrap();
    }

+    /// Ensure that query is fully executed.
+    async fn consume_query(mut query: PerformQuery) {
+        while query.next().await.unwrap().is_some() {}
+    }
+
    #[tokio::test]
    async fn test_query_tracing() {
        let collector = Arc::new(RingBufferTraceCollector::new(100));
@ -721,10 +488,13 @@ mod tests {
            .unwrap();

        let mut flight = influxdb_iox_client::flight::Client::new(conn.clone());
-        flight
-            .perform_query(db_info.db_name(), "select * from cpu;")
-            .await
-            .unwrap();
+        consume_query(
+            flight
+                .perform_query(db_info.db_name(), "select * from cpu;")
+                .await
+                .unwrap(),
+        )
+        .await;

        flight
            .perform_query("nonexistent", "select * from cpu;")
@ -774,8 +544,7 @@ mod tests {
        let prepare_sql_span = child(sql_span, "prepare_sql").unwrap();
        child(prepare_sql_span, "prepare_plan").unwrap();

-        let collect_span = child(ctx_span, "collect").unwrap();
-        let execute_span = child(collect_span, "execute_stream_partitioned").unwrap();
+        let execute_span = child(ctx_span, "execute_stream_partitioned").unwrap();
        let coalesce_span = child(execute_span, "CoalescePartitionsEx").unwrap();

        // validate spans from DataFusion ExecutionPlan are present
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/flight.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/flight.rs
@ -1,5 +1,6 @@
 //! Implements the native gRPC IOx query API using Arrow Flight
 use std::fmt::Debug;
+use std::task::Poll;
 use std::{pin::Pin, sync::Arc};

 use arrow::{
@ -13,19 +14,20 @@ use arrow_flight::{
    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
    HandshakeRequest, HandshakeResponse, PutResult, SchemaAsIpc, SchemaResult, Ticket,
 };
-use futures::Stream;
+use datafusion::physical_plan::ExecutionPlan;
+use futures::{SinkExt, Stream, StreamExt};
+use pin_project::{pin_project, pinned_drop};
 use serde::Deserialize;
 use snafu::{ResultExt, Snafu};
+use tokio::task::JoinHandle;
 use tonic::{Request, Response, Streaming};

 use data_types::{DatabaseName, DatabaseNameError};
 use observability_deps::tracing::{info, warn};
-use query::exec::ExecutionContextProvider;
+use query::exec::{ExecutionContextProvider, IOxExecutionContext};
 use server::{connection::ConnectionManager, Server};

-use crate::influxdb_ioxd::rpc::error::default_server_error_handler;
-
-use super::super::planner::Planner;
+use crate::influxdb_ioxd::{planner::Planner, rpc::error::default_server_error_handler};

 #[allow(clippy::enum_variant_names)]
 #[derive(Debug, Snafu)]
@ -65,7 +67,7 @@ pub enum Error {

    #[snafu(display("Error while planning query: {}", source))]
    Planning {
-        source: super::super::planner::Error,
+        source: crate::influxdb_ioxd::planner::Error,
    },
 }
 pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -153,8 +155,6 @@ where
        Err(tonic::Status::unimplemented("Not yet implemented"))
    }

-    // TODO: Stream results back directly by using `execute` instead of `collect`
-    // https://docs.rs/datafusion/3.0.0/datafusion/physical_plan/trait.ExecutionPlan.html#tymethod.execute
    async fn do_get(
        &self,
        request: Request<Ticket>,
@ -182,32 +182,7 @@ where
            .await
            .context(Planning)?;

-        // execute the query
-        let results = ctx
-            .collect(Arc::clone(&physical_plan))
-            .await
-            .map_err(|e| Box::new(e) as _)
-            .context(Query {
-                database_name: &read_info.database_name,
-            })?;
-
-        let options = arrow::ipc::writer::IpcWriteOptions::default();
-        let schema = Arc::new(optimize_schema(&physical_plan.schema()));
-        let schema_flight_data = SchemaAsIpc::new(&schema, &options).into();
-
-        let mut flights = vec![schema_flight_data];
-
-        for batch in results {
-            let batch = optimize_record_batch(&batch, Arc::clone(&schema))?;
-
-            let (flight_dictionaries, flight_batch) =
-                arrow_flight::utils::flight_data_from_arrow_batch(&batch, &options);
-
-            flights.extend(flight_dictionaries);
-            flights.push(flight_batch);
-        }
-
-        let output = futures::stream::iter(flights.into_iter().map(Ok));
+        let output = GetStream::new(ctx, physical_plan, read_info.database_name).await?;

        Ok(Response::new(Box::pin(output) as Self::DoGetStream))
    }
@ -268,6 +243,132 @@ where
    }
 }

+#[pin_project(PinnedDrop)]
+struct GetStream {
+    #[pin]
+    rx: futures::channel::mpsc::Receiver<Result<FlightData, tonic::Status>>,
+    join_handle: JoinHandle<()>,
+    done: bool,
+}
+
+impl GetStream {
+    async fn new(
+        ctx: IOxExecutionContext,
+        physical_plan: Arc<dyn ExecutionPlan>,
+        database_name: String,
+    ) -> Result<Self, tonic::Status> {
+        // setup channel
+        let (mut tx, rx) = futures::channel::mpsc::channel::<Result<FlightData, tonic::Status>>(1);
+
+        // get schema
+        let schema = Arc::new(optimize_schema(&physical_plan.schema()));
+
+        // setup stream
+        let options = arrow::ipc::writer::IpcWriteOptions::default();
+        let schema_flight_data = SchemaAsIpc::new(&schema, &options).into();
+        let mut stream_record_batches = ctx
+            .execute_stream(Arc::clone(&physical_plan))
+            .await
+            .map_err(|e| Box::new(e) as _)
+            .context(Query {
+                database_name: &database_name,
+            })?;
+
+        let join_handle = tokio::spawn(async move {
+            if tx.send(Ok(schema_flight_data)).await.is_err() {
+                // receiver gone
+                return;
+            }
+
+            while let Some(batch_or_err) = stream_record_batches.next().await {
+                match batch_or_err {
+                    Ok(batch) => {
+                        match optimize_record_batch(&batch, Arc::clone(&schema)) {
+                            Ok(batch) => {
+                                let (flight_dictionaries, flight_batch) =
+                                    arrow_flight::utils::flight_data_from_arrow_batch(
+                                        &batch, &options,
+                                    );
+
+                                for dict in flight_dictionaries {
+                                    if tx.send(Ok(dict)).await.is_err() {
+                                        // receiver is gone
+                                        return;
+                                    }
+                                }
+
+                                if tx.send(Ok(flight_batch)).await.is_err() {
+                                    // receiver is gone
+                                    return;
+                                }
+                            }
+                            Err(e) => {
+                                // failure sending here is OK because we're cutting the stream anyways
+                                tx.send(Err(e.into())).await.ok();
+
+                                // end stream
+                                return;
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        // failure sending here is OK because we're cutting the stream anyways
+                        tx.send(Err(Error::Query {
+                            database_name: database_name.clone(),
+                            source: Box::new(e),
+                        }
+                        .into()))
+                            .await
+                            .ok();
+
+                        // end stream
+                        return;
+                    }
+                }
+            }
+        });
+
+        Ok(Self {
+            rx,
+            join_handle,
+            done: false,
+        })
+    }
+}
+
+#[pinned_drop]
+impl PinnedDrop for GetStream {
+    fn drop(self: Pin<&mut Self>) {
+        self.join_handle.abort();
+    }
+}
+
+impl Stream for GetStream {
+    type Item = Result<FlightData, tonic::Status>;
+
+    fn poll_next(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Option<Self::Item>> {
+        let this = self.project();
+        if *this.done {
+            Poll::Ready(None)
+        } else {
+            match this.rx.poll_next(cx) {
+                Poll::Ready(None) => {
+                    *this.done = true;
+                    Poll::Ready(None)
+                }
+                e @ Poll::Ready(Some(Err(_))) => {
+                    *this.done = true;
+                    e
+                }
+                other => other,
+            }
+        }
+    }
+}
+
 /// Some batches are small slices of the underlying arrays.
 /// At this stage we only know the number of rows in the record batch
 /// and the sizes in bytes of the backing buffers of the column arrays.
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/management.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/management.rs
@ -1,16 +1,16 @@
-use std::convert::TryFrom;
-use std::fmt::Debug;
-use std::sync::Arc;
-
-use data_types::chunk_metadata::ChunkId;
-use data_types::{server_id::ServerId, DatabaseName};
-use generated_types::google::{AlreadyExists, FieldViolation, FieldViolationExt, NotFound};
-use generated_types::influxdata::iox::management::v1::{Error as ProtobufError, *};
+use data_types::{chunk_metadata::ChunkId, server_id::ServerId, DatabaseName};
+use generated_types::{
+    google::{AlreadyExists, FieldViolation, FieldViolationExt, NotFound},
+    influxdata::iox::management::v1::{Error as ProtobufError, *},
+};
 use predicate::delete_predicate::DeletePredicate;
 use query::QueryDatabase;
-use server::rules::ProvidedDatabaseRules;
-use server::{connection::ConnectionManager, ApplicationState, Error, Server};
+use server::{
+    connection::ConnectionManager, rules::ProvidedDatabaseRules, ApplicationState, Error, Server,
+};
+use std::{convert::TryFrom, fmt::Debug, str::FromStr, sync::Arc};
 use tonic::{Request, Response, Status};
+use uuid::Uuid;

 struct ManagementService<M: ConnectionManager> {
    application: Arc<ApplicationState>,
@ -18,7 +18,7 @@ struct ManagementService<M: ConnectionManager> {
    serving_readiness: ServingReadiness,
 }

-use super::error::{
+use crate::influxdb_ioxd::rpc::error::{
    default_database_error_handler, default_db_error_handler, default_server_error_handler,
 };
 use crate::influxdb_ioxd::serving_readiness::ServingReadiness;
@ -128,18 +128,28 @@ where
                description: e.to_string(),
            })?;

-        match self.server.create_database(provided_rules).await {
-            Ok(_) => Ok(Response::new(CreateDatabaseResponse {})),
-            Err(Error::DatabaseAlreadyExists { db_name }) => {
-                return Err(AlreadyExists {
+        let database = self
+            .server
+            .create_database(provided_rules)
+            .await
+            .map_err(|e| match e {
+                Error::DatabaseAlreadyExists { db_name } => AlreadyExists {
                    resource_type: "database".to_string(),
                    resource_name: db_name,
                    ..Default::default()
                }
-                .into())
-            }
-            Err(e) => Err(default_server_error_handler(e)),
-        }
+                .into(),
+                _ => default_server_error_handler(e),
+            })?;
+
+        let uuid = database
+            .provided_rules()
+            .expect("Database should be initialized or an error should have been returned")
+            .uuid();
+
+        Ok(Response::new(CreateDatabaseResponse {
+            uuid: uuid.as_bytes().to_vec(),
+        }))
    }

    async fn update_database(
@ -157,10 +167,9 @@ where
                description: e.to_string(),
            })?;

-        let db_name = provided_rules.db_name().clone();
        let updated_rules = self
            .server
-            .update_db_rules(&db_name, provided_rules)
+            .update_db_rules(provided_rules)
            .await
            .map_err(default_server_error_handler)?;

@ -175,12 +184,15 @@ where
    ) -> Result<Response<DeleteDatabaseResponse>, Status> {
        let db_name = DatabaseName::new(request.into_inner().db_name).field("db_name")?;

-        self.server
+        let uuid = self
+            .server
            .delete_database(&db_name)
            .await
            .map_err(default_server_error_handler)?;

-        Ok(Response::new(DeleteDatabaseResponse {}))
+        Ok(Response::new(DeleteDatabaseResponse {
+            uuid: uuid.as_bytes().to_vec(),
+        }))
    }

    async fn restore_database(
@ -189,34 +201,16 @@ where
    ) -> Result<Response<RestoreDatabaseResponse>, Status> {
        let request = request.into_inner();
        let db_name = DatabaseName::new(request.db_name).field("db_name")?;
-        let generation_id = request.generation_id;
+        let uuid = Uuid::from_str(&request.uuid).field("uuid")?;

        self.server
-            .restore_database(&db_name, generation_id)
+            .restore_database(&db_name, uuid)
            .await
            .map_err(default_server_error_handler)?;

        Ok(Response::new(RestoreDatabaseResponse {}))
    }

-    async fn list_deleted_databases(
-        &self,
-        _: Request<ListDeletedDatabasesRequest>,
-    ) -> Result<Response<ListDeletedDatabasesResponse>, Status> {
-        let deleted_databases = self
-            .server
-            .list_deleted_databases()
-            .await
-            .map_err(default_server_error_handler)?
-            .into_iter()
-            .map(Into::into)
-            .collect();
-
-        Ok(Response::new(ListDeletedDatabasesResponse {
-            deleted_databases,
-        }))
-    }
-
    async fn list_detailed_databases(
        &self,
        _: Request<ListDetailedDatabasesRequest>,
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/mod.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/mod.rs
@ -0,0 +1,57 @@
+use std::sync::Arc;
+
+use server::connection::ConnectionManager;
+
+use crate::influxdb_ioxd::{
+    rpc::{add_gated_service, add_service, serve_builder, setup_builder, RpcBuilderInput},
+    server_type::{database::DatabaseServerType, RpcError},
+};
+
+mod flight;
+mod management;
+mod operations;
+mod storage;
+mod write;
+mod write_pb;
+
+pub async fn server_grpc<M>(
+    server_type: Arc<DatabaseServerType<M>>,
+    builder_input: RpcBuilderInput,
+) -> Result<(), RpcError>
+where
+    M: ConnectionManager + std::fmt::Debug + Send + Sync + 'static,
+{
+    let builder = setup_builder!(builder_input, server_type);
+
+    add_gated_service!(
+        builder,
+        storage::make_server(Arc::clone(&server_type.server),)
+    );
+    add_gated_service!(
+        builder,
+        flight::make_server(Arc::clone(&server_type.server))
+    );
+    add_gated_service!(builder, write::make_server(Arc::clone(&server_type.server)));
+    add_gated_service!(
+        builder,
+        write_pb::make_server(Arc::clone(&server_type.server))
+    );
+    // Also important this is not behind a readiness check (as it is
+    // used to change the check!)
+    add_service!(
+        builder,
+        management::make_server(
+            Arc::clone(&server_type.application),
+            Arc::clone(&server_type.server),
+            server_type.serving_readiness.clone(),
+        )
+    );
+    add_service!(
+        builder,
+        operations::make_server(Arc::clone(server_type.application.job_registry()))
+    );
+
+    serve_builder!(builder);
+
+    Ok(())
+}
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/operations.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/operations.rs
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage.rs
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/data.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/data.rs
@ -0,0 +1,543 @@
+//! This module contains code to translate from InfluxDB IOx data
+//! formats into the formats needed by gRPC
+
+use std::{collections::BTreeSet, fmt, sync::Arc};
+
+use arrow::datatypes::DataType as ArrowDataType;
+
+use observability_deps::tracing::trace;
+use query::exec::{
+    fieldlist::FieldList,
+    seriesset::series::{self, Either},
+};
+
+use generated_types::{
+    measurement_fields_response::{FieldType, MessageField},
+    read_response::{
+        frame::Data, BooleanPointsFrame, DataType, FloatPointsFrame, Frame, GroupFrame,
+        IntegerPointsFrame, SeriesFrame, StringPointsFrame, UnsignedPointsFrame,
+    },
+    MeasurementFieldsResponse, ReadResponse, Tag,
+};
+
+use super::{TAG_KEY_FIELD, TAG_KEY_MEASUREMENT};
+use snafu::Snafu;
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Error converting series set to gRPC: {}", source))]
+    ConvertingSeries {
+        source: query::exec::seriesset::series::Error,
+    },
+
+    #[snafu(display("Unsupported field data type in gRPC data translation: {}", data_type))]
+    UnsupportedFieldType { data_type: ArrowDataType },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+/// Convert a set of tag_keys into a form suitable for gRPC transport,
+/// adding the special 0x00 (_m) and 0xff (_f) tag keys
+///
+/// Namely, a Vec<Vec<u8>>, including the measurement and field names
+pub fn tag_keys_to_byte_vecs(tag_keys: Arc<BTreeSet<String>>) -> Vec<Vec<u8>> {
+    // special case measurement (0x00) and field (0xff)
+    // ensuring they are in the correct sort order (first and last, respectively)
+    let mut byte_vecs = Vec::with_capacity(2 + tag_keys.len());
+    byte_vecs.push(TAG_KEY_MEASUREMENT.to_vec()); // Shown as _m == _measurement
+    tag_keys.iter().for_each(|name| {
+        byte_vecs.push(name.bytes().collect());
+    });
+    byte_vecs.push(TAG_KEY_FIELD.to_vec()); // Shown as _f == _field
+    byte_vecs
+}
+
+/// Convert Series and Groups ` into a form suitable for gRPC transport:
+///
+/// ```
+/// (GroupFrame) potentially
+///
+/// (SeriesFrame for field1)
+/// (*Points for field1)
+/// (SeriesFrame for field12)
+/// (*Points for field1)
+/// (....)
+/// (SeriesFrame for field1)
+/// (*Points for field1)
+/// (SeriesFrame for field12)
+/// (*Points for field1)
+/// (....)
+/// ```
+///
+/// The specific type of (*Points) depends on the type of field column.
+pub fn series_or_groups_to_read_response(series_or_groups: Vec<Either>) -> ReadResponse {
+    let mut frames = vec![];
+
+    for series_or_group in series_or_groups {
+        match series_or_group {
+            Either::Series(series) => {
+                series_to_frames(&mut frames, series);
+            }
+            Either::Group(group) => {
+                frames.push(group_to_frame(group));
+            }
+        }
+    }
+
+    trace!(frames=%DisplayableFrames::new(&frames), "Response gRPC frames");
+    ReadResponse { frames }
+}
+
+/// Converts a `Series` into frames for GRPC transport
+fn series_to_frames(frames: &mut Vec<Frame>, series: series::Series) {
+    let series::Series { tags, data } = series;
+
+    let (data_type, data_frame) = match data {
+        series::Data::FloatPoints { timestamps, values } => (
+            DataType::Float,
+            Data::FloatPoints(FloatPointsFrame { timestamps, values }),
+        ),
+        series::Data::IntegerPoints { timestamps, values } => (
+            DataType::Integer,
+            Data::IntegerPoints(IntegerPointsFrame { timestamps, values }),
+        ),
+        series::Data::UnsignedPoints { timestamps, values } => (
+            DataType::Unsigned,
+            Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values }),
+        ),
+        series::Data::BooleanPoints { timestamps, values } => (
+            DataType::Boolean,
+            Data::BooleanPoints(BooleanPointsFrame { timestamps, values }),
+        ),
+        series::Data::StringPoints { timestamps, values } => (
+            DataType::String,
+            Data::StringPoints(StringPointsFrame { timestamps, values }),
+        ),
+    };
+
+    let series_frame = Data::Series(SeriesFrame {
+        tags: convert_tags(tags),
+        data_type: data_type.into(),
+    });
+
+    frames.push(Frame {
+        data: Some(series_frame),
+    });
+    frames.push(Frame {
+        data: Some(data_frame),
+    });
+}
+
+/// Converts a [`series::Group`] into a storage gRPC `GroupFrame`
+/// format that can be returned to the client.
+fn group_to_frame(group: series::Group) -> Frame {
+    let series::Group {
+        tag_keys,
+        partition_key_vals,
+    } = group;
+
+    let group_frame = GroupFrame {
+        tag_keys: arcs_to_bytes(tag_keys),
+        partition_key_vals: arcs_to_bytes(partition_key_vals),
+    };
+
+    let data = Data::Group(group_frame);
+
+    Frame { data: Some(data) }
+}
+
+/// Convert the tag=value pairs from Arc<str> to Vec<u8> for gRPC transport
+fn convert_tags(tags: Vec<series::Tag>) -> Vec<Tag> {
+    tags.into_iter()
+        .map(|series::Tag { key, value }| Tag {
+            key: key.bytes().collect(),
+            value: value.bytes().collect(),
+        })
+        .collect()
+}
+
+fn arcs_to_bytes(s: Vec<Arc<str>>) -> Vec<Vec<u8>> {
+    s.into_iter().map(|s| s.bytes().collect()).collect()
+}
+
+/// Translates FieldList into the gRPC format
+pub fn fieldlist_to_measurement_fields_response(
+    fieldlist: FieldList,
+) -> Result<MeasurementFieldsResponse> {
+    let fields = fieldlist
+        .fields
+        .into_iter()
+        .map(|f| {
+            Ok(MessageField {
+                key: f.name,
+                r#type: datatype_to_measurement_field_enum(&f.data_type)? as i32,
+                timestamp: f.last_timestamp,
+            })
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    Ok(MeasurementFieldsResponse { fields })
+}
+
+fn datatype_to_measurement_field_enum(data_type: &ArrowDataType) -> Result<FieldType> {
+    match data_type {
+        ArrowDataType::Float64 => Ok(FieldType::Float),
+        ArrowDataType::Int64 => Ok(FieldType::Integer),
+        ArrowDataType::UInt64 => Ok(FieldType::Unsigned),
+        ArrowDataType::Utf8 => Ok(FieldType::String),
+        ArrowDataType::Boolean => Ok(FieldType::Boolean),
+        _ => UnsupportedFieldType {
+            data_type: data_type.clone(),
+        }
+        .fail(),
+    }
+}
+
+/// Wrapper struture that implements [`std::fmt::Display`] for a slice
+/// of `Frame`s
+struct DisplayableFrames<'a> {
+    frames: &'a [Frame],
+}
+
+impl<'a> DisplayableFrames<'a> {
+    fn new(frames: &'a [Frame]) -> Self {
+        Self { frames }
+    }
+}
+
+impl<'a> fmt::Display for DisplayableFrames<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.frames.iter().try_for_each(|frame| {
+            format_frame(frame, f)?;
+            writeln!(f)
+        })
+    }
+}
+
+fn format_frame(frame: &Frame, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    let data = &frame.data;
+    match data {
+        Some(Data::Series(SeriesFrame { tags, data_type })) => write!(
+            f,
+            "SeriesFrame, tags: {}, type: {:?}",
+            dump_tags(tags),
+            data_type
+        ),
+        Some(Data::FloatPoints(FloatPointsFrame { timestamps, values })) => write!(
+            f,
+            "FloatPointsFrame, timestamps: {:?}, values: {:?}",
+            timestamps,
+            dump_values(values)
+        ),
+        Some(Data::IntegerPoints(IntegerPointsFrame { timestamps, values })) => write!(
+            f,
+            "IntegerPointsFrame, timestamps: {:?}, values: {:?}",
+            timestamps,
+            dump_values(values)
+        ),
+        Some(Data::UnsignedPoints(UnsignedPointsFrame { timestamps, values })) => write!(
+            f,
+            "UnsignedPointsFrame, timestamps: {:?}, values: {:?}",
+            timestamps,
+            dump_values(values)
+        ),
+        Some(Data::BooleanPoints(BooleanPointsFrame { timestamps, values })) => write!(
+            f,
+            "BooleanPointsFrame, timestamps: {:?}, values: {}",
+            timestamps,
+            dump_values(values)
+        ),
+        Some(Data::StringPoints(StringPointsFrame { timestamps, values })) => write!(
+            f,
+            "StringPointsFrame, timestamps: {:?}, values: {}",
+            timestamps,
+            dump_values(values)
+        ),
+        Some(Data::Group(GroupFrame {
+            tag_keys,
+            partition_key_vals,
+        })) => write!(
+            f,
+            "GroupFrame, tag_keys: {}, partition_key_vals: {}",
+            dump_u8_vec(tag_keys),
+            dump_u8_vec(partition_key_vals)
+        ),
+        None => write!(f, "<NO data field>"),
+    }
+}
+
+fn dump_values<T>(v: &[T]) -> String
+where
+    T: std::fmt::Display,
+{
+    v.iter()
+        .map(|item| format!("{}", item))
+        .collect::<Vec<_>>()
+        .join(",")
+}
+
+fn dump_u8_vec(encoded_strings: &[Vec<u8>]) -> String {
+    encoded_strings
+        .iter()
+        .map(|b| String::from_utf8_lossy(b))
+        .collect::<Vec<_>>()
+        .join(",")
+}
+
+fn dump_tags(tags: &[Tag]) -> String {
+    tags.iter()
+        .map(|tag| {
+            format!(
+                "{}={}",
+                String::from_utf8_lossy(&tag.key),
+                String::from_utf8_lossy(&tag.value),
+            )
+        })
+        .collect::<Vec<_>>()
+        .join(",")
+}
+
+#[cfg(test)]
+mod tests {
+    use std::convert::TryInto;
+
+    use arrow::{
+        array::{
+            ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray,
+            TimestampNanosecondArray, UInt64Array,
+        },
+        datatypes::DataType as ArrowDataType,
+        record_batch::RecordBatch,
+    };
+    use query::exec::{
+        field::FieldIndexes,
+        fieldlist::Field,
+        seriesset::{
+            series::{Group, Series},
+            SeriesSet,
+        },
+    };
+
+    use super::*;
+
+    #[test]
+    fn test_tag_keys_to_byte_vecs() {
+        fn convert_keys(tag_keys: &[&str]) -> Vec<Vec<u8>> {
+            let tag_keys = tag_keys
+                .iter()
+                .map(|s| s.to_string())
+                .collect::<BTreeSet<_>>();
+
+            tag_keys_to_byte_vecs(Arc::new(tag_keys))
+        }
+
+        assert_eq!(convert_keys(&[]), vec![[0].to_vec(), [255].to_vec()]);
+        assert_eq!(
+            convert_keys(&["key_a"]),
+            vec![[0].to_vec(), b"key_a".to_vec(), [255].to_vec()]
+        );
+        assert_eq!(
+            convert_keys(&["key_a", "key_b"]),
+            vec![
+                [0].to_vec(),
+                b"key_a".to_vec(),
+                b"key_b".to_vec(),
+                [255].to_vec()
+            ]
+        );
+    }
+
+    #[test]
+    fn test_series_set_conversion() {
+        let series_set = SeriesSet {
+            table_name: Arc::from("the_table"),
+            tags: vec![(Arc::from("tag1"), Arc::from("val1"))],
+            field_indexes: FieldIndexes::from_timestamp_and_value_indexes(5, &[0, 1, 2, 3, 4]),
+            start_row: 1,
+            num_rows: 2,
+            batch: make_record_batch(),
+        };
+
+        let series: Vec<Series> = series_set
+            .try_into()
+            .expect("Correctly converted series set");
+        let series: Vec<Either> = series.into_iter().map(|s| s.into()).collect();
+
+        let response = series_or_groups_to_read_response(series);
+
+        let dumped_frames = dump_frames(&response.frames);
+
+        let expected_frames = vec![
+            "SeriesFrame, tags: _field=string_field,_measurement=the_table,tag1=val1, type: 4",
+            "StringPointsFrame, timestamps: [2000, 3000], values: bar,baz",
+            "SeriesFrame, tags: _field=int_field,_measurement=the_table,tag1=val1, type: 1",
+            "IntegerPointsFrame, timestamps: [2000, 3000], values: \"2,3\"",
+            "SeriesFrame, tags: _field=uint_field,_measurement=the_table,tag1=val1, type: 2",
+            "UnsignedPointsFrame, timestamps: [2000, 3000], values: \"22,33\"",
+            "SeriesFrame, tags: _field=float_field,_measurement=the_table,tag1=val1, type: 0",
+            "FloatPointsFrame, timestamps: [2000, 3000], values: \"20.1,30.1\"",
+            "SeriesFrame, tags: _field=boolean_field,_measurement=the_table,tag1=val1, type: 3",
+            "BooleanPointsFrame, timestamps: [2000, 3000], values: false,true",
+        ];
+
+        assert_eq!(
+            dumped_frames, expected_frames,
+            "Expected:\n{:#?}\nActual:\n{:#?}",
+            expected_frames, dumped_frames
+        );
+    }
+
+    #[test]
+    fn test_group_group_conversion() {
+        let group = Group {
+            tag_keys: vec![
+                Arc::from("_field"),
+                Arc::from("_measurement"),
+                Arc::from("tag1"),
+                Arc::from("tag2"),
+            ],
+            partition_key_vals: vec![Arc::from("val1"), Arc::from("val2")],
+        };
+
+        let response = series_or_groups_to_read_response(vec![group.into()]);
+
+        let dumped_frames = dump_frames(&response.frames);
+
+        let expected_frames = vec![
+            "GroupFrame, tag_keys: _field,_measurement,tag1,tag2, partition_key_vals: val1,val2",
+        ];
+
+        assert_eq!(
+            dumped_frames, expected_frames,
+            "Expected:\n{:#?}\nActual:\n{:#?}",
+            expected_frames, dumped_frames
+        );
+    }
+
+    #[test]
+    fn test_field_list_conversion() {
+        let input = FieldList {
+            fields: vec![
+                Field {
+                    name: "float".into(),
+                    data_type: ArrowDataType::Float64,
+                    last_timestamp: 1000,
+                },
+                Field {
+                    name: "int".into(),
+                    data_type: ArrowDataType::Int64,
+                    last_timestamp: 2000,
+                },
+                Field {
+                    name: "uint".into(),
+                    data_type: ArrowDataType::UInt64,
+                    last_timestamp: 3000,
+                },
+                Field {
+                    name: "string".into(),
+                    data_type: ArrowDataType::Utf8,
+                    last_timestamp: 4000,
+                },
+                Field {
+                    name: "bool".into(),
+                    data_type: ArrowDataType::Boolean,
+                    last_timestamp: 5000,
+                },
+            ],
+        };
+
+        let expected = MeasurementFieldsResponse {
+            fields: vec![
+                MessageField {
+                    key: "float".into(),
+                    r#type: FieldType::Float as i32,
+                    timestamp: 1000,
+                },
+                MessageField {
+                    key: "int".into(),
+                    r#type: FieldType::Integer as i32,
+                    timestamp: 2000,
+                },
+                MessageField {
+                    key: "uint".into(),
+                    r#type: FieldType::Unsigned as i32,
+                    timestamp: 3000,
+                },
+                MessageField {
+                    key: "string".into(),
+                    r#type: FieldType::String as i32,
+                    timestamp: 4000,
+                },
+                MessageField {
+                    key: "bool".into(),
+                    r#type: FieldType::Boolean as i32,
+                    timestamp: 5000,
+                },
+            ],
+        };
+
+        let actual = fieldlist_to_measurement_fields_response(input).unwrap();
+        assert_eq!(
+            actual, expected,
+            "Expected:\n{:#?}\nActual:\n{:#?}",
+            expected, actual
+        );
+    }
+
+    #[test]
+    fn test_field_list_conversion_error() {
+        let input = FieldList {
+            fields: vec![Field {
+                name: "unsupported".into(),
+                data_type: ArrowDataType::Int8,
+                last_timestamp: 1000,
+            }],
+        };
+        let result = fieldlist_to_measurement_fields_response(input);
+        match result {
+            Ok(r) => panic!("Unexpected success: {:?}", r),
+            Err(e) => {
+                let expected = "Unsupported field data type in gRPC data translation: Int8";
+                let actual = format!("{}", e);
+                assert!(
+                    actual.contains(expected),
+                    "Could not find expected '{}' in actual '{}'",
+                    expected,
+                    actual
+                );
+            }
+        }
+    }
+
+    fn make_record_batch() -> RecordBatch {
+        let string_array: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar", "baz", "foo"]));
+        let int_array: ArrayRef = Arc::new(Int64Array::from(vec![1, 2, 3, 4]));
+        let uint_array: ArrayRef = Arc::new(UInt64Array::from(vec![11, 22, 33, 44]));
+        let float_array: ArrayRef = Arc::new(Float64Array::from(vec![10.1, 20.1, 30.1, 40.1]));
+        let bool_array: ArrayRef = Arc::new(BooleanArray::from(vec![true, false, true, false]));
+
+        let timestamp_array: ArrayRef = Arc::new(TimestampNanosecondArray::from_vec(
+            vec![1000, 2000, 3000, 4000],
+            None,
+        ));
+
+        RecordBatch::try_from_iter_with_nullable(vec![
+            ("string_field", string_array, true),
+            ("int_field", int_array, true),
+            ("uint_field", uint_array, true),
+            ("float_field", float_array, true),
+            ("boolean_field", bool_array, true),
+            ("time", timestamp_array, true),
+        ])
+        .expect("created new record batch")
+    }
+
+    fn dump_frames(frames: &[Frame]) -> Vec<String> {
+        DisplayableFrames::new(frames)
+            .to_string()
+            .trim()
+            .split('\n')
+            .map(|s| s.to_string())
+            .collect()
+    }
+}
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/expr.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/expr.rs
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/id.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/id.rs
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/input.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/input.rs
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/service.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/storage/service.rs
@ -21,15 +21,15 @@ use generated_types::{
 use observability_deps::tracing::{error, info};
 use predicate::predicate::PredicateBuilder;
 use query::exec::{
-    fieldlist::FieldList, seriesset::Error as SeriesSetError, ExecutionContextProvider,
+    fieldlist::FieldList, seriesset::converter::Error as SeriesSetError, ExecutionContextProvider,
 };
 use server::DatabaseStore;

 use crate::influxdb_ioxd::{
    planner::Planner,
-    rpc::storage::{
+    server_type::database::rpc::storage::{
        data::{
-            fieldlist_to_measurement_fields_response, series_set_item_to_read_response,
+            fieldlist_to_measurement_fields_response, series_or_groups_to_read_response,
            tag_keys_to_byte_vecs,
        },
        expr::{self, AddRpcNode, GroupByAndAggregate, Loggable, SpecialTagKeys},
@ -130,18 +130,12 @@ pub enum Error {
        source: super::expr::Error,
    },

-    #[snafu(display("Error computing series: {}", source))]
-    ComputingSeriesSet { source: SeriesSetError },
-
    #[snafu(display("Error converting tag_key to UTF-8 in tag_values request, tag_key value '{}': {}", String::from_utf8_lossy(source.as_bytes()), source))]
    ConvertingTagKeyInTagValues { source: std::string::FromUtf8Error },

    #[snafu(display("Error computing groups series: {}", source))]
    ComputingGroupedSeriesSet { source: SeriesSetError },

-    #[snafu(display("Error converting time series into gRPC response:  {}", source))]
-    ConvertingSeriesSet { source: super::data::Error },
-
    #[snafu(display("Converting field information series into gRPC response:  {}", source))]
    ConvertingFieldList { source: super::data::Error },

@ -195,10 +189,8 @@ impl Error {
            Self::ConvertingReadGroupAggregate { .. } => Status::invalid_argument(self.to_string()),
            Self::ConvertingReadGroupType { .. } => Status::invalid_argument(self.to_string()),
            Self::ConvertingWindowAggregate { .. } => Status::invalid_argument(self.to_string()),
-            Self::ComputingSeriesSet { .. } => Status::invalid_argument(self.to_string()),
            Self::ConvertingTagKeyInTagValues { .. } => Status::invalid_argument(self.to_string()),
            Self::ComputingGroupedSeriesSet { .. } => Status::invalid_argument(self.to_string()),
-            Self::ConvertingSeriesSet { .. } => Status::invalid_argument(self.to_string()),
            Self::ConvertingFieldList { .. } => Status::invalid_argument(self.to_string()),
            Self::SendingResults { .. } => Status::internal(self.to_string()),
            Self::InternalHintsFieldNotSupported { .. } => Status::internal(self.to_string()),
@ -905,8 +897,8 @@ where
        .context(PlanningFilteringSeries { db_name })?;

    // Execute the plans.
-    let ss_items = ctx
-        .to_series_set(series_plan)
+    let series_or_groups = ctx
+        .to_series_and_groups(series_plan)
        .await
        .map_err(|e| Box::new(e) as _)
        .context(FilteringSeries {
@ -914,11 +906,9 @@ where
        })
        .log_if_error("Running series set plan")?;

-    // Convert results into API responses
-    ss_items
-        .into_iter()
-        .map(|series_set| series_set_item_to_read_response(series_set).context(ConvertingSeriesSet))
-        .collect::<Result<Vec<ReadResponse>, Error>>()
+    let response = series_or_groups_to_read_response(series_or_groups);
+
+    Ok(vec![response])
 }

 /// Launch async tasks that send the result of executing read_group to `tx`
@ -971,8 +961,8 @@ where
    // if big queries are causing a significant latency in TTFB.

    // Execute the plans
-    let ss_items = ctx
-        .to_series_set(grouped_series_set_plan)
+    let series_or_groups = ctx
+        .to_series_and_groups(grouped_series_set_plan)
        .await
        .map_err(|e| Box::new(e) as _)
        .context(GroupingSeries {
@ -980,11 +970,9 @@ where
        })
        .log_if_error("Running Grouped SeriesSet Plan")?;

-    // Convert plans to API responses
-    ss_items
-        .into_iter()
-        .map(|series_set| series_set_item_to_read_response(series_set).context(ConvertingSeriesSet))
-        .collect::<Result<Vec<ReadResponse>, Error>>()
+    let response = series_or_groups_to_read_response(series_or_groups);
+
+    Ok(vec![response])
 }

 /// Return field names, restricted via optional measurement, timestamp and
@ -1128,11 +1116,11 @@ mod tests {

        let chunk0 = TestChunk::new("h2o")
            .with_id(0)
-            .with_predicate_match(PredicateMatch::AtLeastOne);
+            .with_predicate_match(PredicateMatch::AtLeastOneNonNullField);

        let chunk1 = TestChunk::new("o2")
            .with_id(1)
-            .with_predicate_match(PredicateMatch::AtLeastOne);
+            .with_predicate_match(PredicateMatch::AtLeastOneNonNullField);

        fixture
            .test_storage
@ -1486,7 +1474,8 @@ mod tests {
            tag_key: [0].into(),
        };

-        let chunk = TestChunk::new("h2o").with_predicate_match(PredicateMatch::AtLeastOne);
+        let chunk =
+            TestChunk::new("h2o").with_predicate_match(PredicateMatch::AtLeastOneNonNullField);

        fixture
            .test_storage
@ -1736,7 +1725,8 @@ mod tests {
        // Note we don't include the actual line / column in the
        // expected panic message to avoid needing to update the test
        // whenever the source code file changed.
-        let expected_error = "panicked at 'This is a test panic', src/influxdb_ioxd/rpc/testing.rs";
+        let expected_error =
+            "panicked at 'This is a test panic', influxdb_iox/src/influxdb_ioxd/rpc/testing.rs";
        assert_contains!(captured_logs, expected_error);

        // Ensure that panics don't exhaust the tokio executor by
@ -1841,6 +1831,7 @@ mod tests {

        let chunk = TestChunk::new("TheMeasurement")
            .with_time_column()
+            .with_i64_field_column("my field")
            .with_tag_column("state")
            .with_one_row_of_data();

@ -1869,7 +1860,11 @@ mod tests {

        let frames = fixture.storage_client.read_group(request).await.unwrap();

-        assert_eq!(frames.len(), 1);
+        // three frames:
+        // GroupFrame
+        // SeriesFrame (tag=state, field=my field)
+        // DataFrame
+        assert_eq!(frames.len(), 3);

        grpc_request_metric_has_count(&fixture, "ReadGroup", "ok", 1);
    }
@ -2287,9 +2282,11 @@ mod tests {
                    true,
                ))
                .add_service(crate::influxdb_ioxd::rpc::testing::make_server())
-                .add_service(crate::influxdb_ioxd::rpc::storage::make_server(Arc::clone(
-                    &test_storage,
-                )));
+                .add_service(
+                    crate::influxdb_ioxd::server_type::database::rpc::storage::make_server(
+                        Arc::clone(&test_storage),
+                    ),
+                );

            let server = async move {
                let stream = TcpListenerStream::new(socket);
--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/write.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/write.rs
@ -14,7 +14,7 @@ use influxdb_line_protocol::parse_lines;
 use observability_deps::tracing::debug;
 use server::{connection::ConnectionManager, Server};

-use super::error::default_server_error_handler;
+use crate::influxdb_ioxd::rpc::error::default_server_error_handler;

 /// Implementation of the write service
 struct WriteService<M: ConnectionManager> {
@ -30,6 +30,7 @@ where
        &self,
        request: tonic::Request<WriteRequest>,
    ) -> Result<tonic::Response<WriteResponse>, tonic::Status> {
+        let span_ctx = request.extensions().get().cloned();
        let request = request.into_inner();

        // The time, in nanoseconds since the epoch, to assign to any points that don't
@ -57,7 +58,7 @@ where
        debug!(%db_name, %lp_chars, lp_line_count, body_size=lp_data.len(), num_fields, "Writing lines into database");

        self.server
-            .write_lines(&db_name, &lines, default_time)
+            .write_lines(&db_name, &lines, default_time, span_ctx)
            .await
            .map_err(default_server_error_handler)?;

@ -69,6 +70,7 @@ where
        &self,
        request: tonic::Request<WriteEntryRequest>,
    ) -> Result<tonic::Response<WriteEntryResponse>, tonic::Status> {
+        let span_ctx = request.extensions().get().cloned();
        let request = request.into_inner();
        let db_name = DatabaseName::new(&request.db_name).field("db_name")?;

@ -79,7 +81,7 @@ where
        let entry = entry::Entry::try_from(request.entry).field("entry")?;

        self.server
-            .write_entry_local(&db_name, entry)
+            .write_entry_local(&db_name, entry, span_ctx)
            .await
            .map_err(default_server_error_handler)?;

--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/write_pb.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/rpc/write_pb.rs
@ -1,10 +1,11 @@
-use super::error::default_server_error_handler;
 use generated_types::google::FieldViolation;
 use generated_types::influxdata::pbdata::v1::*;
 use server::{connection::ConnectionManager, Server};
 use std::fmt::Debug;
 use std::sync::Arc;

+use crate::influxdb_ioxd::rpc::error::default_server_error_handler;
+
 struct PBWriteService<M: ConnectionManager> {
    server: Arc<Server<M>>,
 }
@ -18,13 +19,14 @@ where
        &self,
        request: tonic::Request<WriteRequest>,
    ) -> Result<tonic::Response<WriteResponse>, tonic::Status> {
+        let span_ctx = request.extensions().get().cloned();
        let database_batch = request
            .into_inner()
            .database_batch
            .ok_or_else(|| FieldViolation::required("database_batch"))?;

        self.server
-            .write_pb(database_batch)
+            .write_pb(database_batch, span_ctx)
            .await
            .map_err(default_server_error_handler)?;

--- a/influxdb_iox/src/influxdb_ioxd/server_type/database/setup.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/database/setup.rs
@ -0,0 +1,72 @@
+use std::sync::Arc;
+
+use object_store::ObjectStore;
+use observability_deps::tracing::warn;
+use server::{
+    connection::ConnectionManagerImpl, ApplicationState, RemoteTemplate, Server, ServerConfig,
+};
+use snafu::{ResultExt, Snafu};
+use trace::TraceCollector;
+
+use crate::{
+    commands::run::database::Config,
+    structopt_blocks::object_store::{check_object_store, warn_about_inmem_store},
+};
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Cannot parse object store config: {}", source))]
+    ObjectStoreParsing {
+        source: crate::structopt_blocks::object_store::ParseError,
+    },
+
+    #[snafu(display("Cannot check object store config: {}", source))]
+    ObjectStoreCheck {
+        source: crate::structopt_blocks::object_store::CheckError,
+    },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+pub async fn make_application(
+    config: &Config,
+    trace_collector: Option<Arc<dyn TraceCollector>>,
+) -> Result<Arc<ApplicationState>> {
+    warn_about_inmem_store(&config.run_config.object_store_config);
+    let object_store = ObjectStore::try_from(&config.run_config.object_store_config)
+        .context(ObjectStoreParsing)?;
+    check_object_store(&object_store)
+        .await
+        .context(ObjectStoreCheck)?;
+    let object_storage = Arc::new(object_store);
+
+    Ok(Arc::new(ApplicationState::new(
+        object_storage,
+        config.num_worker_threads,
+        trace_collector,
+    )))
+}
+
+pub fn make_server(
+    application: Arc<ApplicationState>,
+    config: &Config,
+) -> Arc<Server<ConnectionManagerImpl>> {
+    let server_config = ServerConfig {
+        remote_template: config.remote_template.clone().map(RemoteTemplate::new),
+        wipe_catalog_on_error: config.wipe_catalog_on_error.into(),
+        skip_replay_and_seek_instead: config.skip_replay_and_seek_instead.into(),
+    };
+
+    let connection_manager = ConnectionManagerImpl::new();
+    let app_server = Arc::new(Server::new(connection_manager, application, server_config));
+
+    // if this ID isn't set the server won't be usable until this is set via an API
+    // call
+    if let Some(id) = config.run_config.server_id_config.server_id {
+        app_server.set_id(id).expect("server id already set");
+    } else {
+        warn!("server ID not set. ID must be set via the INFLUXDB_IOX_ID config or API before writing or querying data.");
+    }
+
+    app_server
+}
--- a/influxdb_iox/src/influxdb_ioxd/server_type/mod.rs
+++ b/influxdb_iox/src/influxdb_ioxd/server_type/mod.rs
@ -0,0 +1,128 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use hyper::{Body, Request, Response, StatusCode};
+use metric::Registry;
+use snafu::Snafu;
+use trace::TraceCollector;
+
+use super::rpc::RpcBuilderInput;
+
+pub mod common_state;
+pub mod database;
+
+/// Constants used in API error codes.
+///
+/// Expressing this as a enum prevents reuse of discriminants, and as they're
+/// effectively consts this uses UPPER_SNAKE_CASE.
+#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
+#[derive(Debug, PartialEq)]
+pub enum ApiErrorCode {
+    /// An unknown/unhandled error
+    UNKNOWN = 100,
+
+    /// The database name in the request is invalid.
+    DB_INVALID_NAME = 101,
+
+    /// The database referenced already exists.
+    DB_ALREADY_EXISTS = 102,
+
+    /// The database referenced does not exist.
+    DB_NOT_FOUND = 103,
+}
+
+impl From<ApiErrorCode> for u32 {
+    fn from(v: ApiErrorCode) -> Self {
+        v as Self
+    }
+}
+
+pub trait RouteError: std::error::Error + snafu::AsErrorSource {
+    fn response(&self) -> Response<Body>;
+
+    fn bad_request(&self) -> Response<Body> {
+        Response::builder()
+            .status(StatusCode::BAD_REQUEST)
+            .body(self.body())
+            .unwrap()
+    }
+
+    fn internal_error(&self) -> Response<Body> {
+        Response::builder()
+            .status(StatusCode::INTERNAL_SERVER_ERROR)
+            .body(self.body())
+            .unwrap()
+    }
+
+    fn not_found(&self) -> Response<Body> {
+        Response::builder()
+            .status(StatusCode::NOT_FOUND)
+            .body(Body::empty())
+            .unwrap()
+    }
+
+    fn no_content(&self) -> Response<Body> {
+        Response::builder()
+            .status(StatusCode::NO_CONTENT)
+            .body(self.body())
+            .unwrap()
+    }
+
+    fn body(&self) -> Body {
+        let json =
+            serde_json::json!({"error": self.to_string(), "error_code": self.api_error_code()})
+                .to_string();
+        Body::from(json)
+    }
+
+    /// Map the error type into an API error code.
+    fn api_error_code(&self) -> u32 {
+        ApiErrorCode::UNKNOWN.into()
+    }
+}
+
+#[derive(Debug, Snafu)]
+pub enum RpcError {
+    #[snafu(display("gRPC transport error: {}{}", source, details))]
+    TransportError {
+        source: tonic::transport::Error,
+        details: String,
+    },
+}
+
+// Custom impl to include underlying source (not included in tonic
+// transport error)
+impl From<tonic::transport::Error> for RpcError {
+    fn from(source: tonic::transport::Error) -> Self {
+        use std::error::Error;
+        let details = source
+            .source()
+            .map(|e| format!(" ({})", e))
+            .unwrap_or_else(|| "".to_string());
+
+        Self::TransportError { source, details }
+    }
+}
+
+#[async_trait]
+pub trait ServerType: std::fmt::Debug + Send + Sync + 'static {
+    type RouteError: RouteError;
+
+    fn metric_registry(&self) -> Arc<Registry>;
+
+    fn trace_collector(&self) -> Option<Arc<dyn TraceCollector>>;
+
+    /// Route given HTTP request.
+    ///
+    /// Note that this is only called if none of the shared, common routes (e.g. `/health`) match.
+    async fn route_http_request(
+        &self,
+        req: Request<Body>,
+    ) -> Result<Response<Body>, Self::RouteError>;
+
+    async fn server_grpc(self: Arc<Self>, builder_input: RpcBuilderInput) -> Result<(), RpcError>;
+
+    async fn background_worker(self: Arc<Self>);
+
+    fn shutdown_background_worker(&self);
+}
--- a/influxdb_iox/src/influxdb_ioxd/serving_readiness.rs
+++ b/influxdb_iox/src/influxdb_ioxd/serving_readiness.rs
--- a/influxdb_iox/src/main.rs
+++ b/influxdb_iox/src/main.rs
@ -1,4 +1,5 @@
 //! Entrypoint of InfluxDB IOx binary
+#![recursion_limit = "512"] // required for print_cpu
 #![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
 #![warn(
    missing_debug_implementations,
@ -8,18 +9,15 @@
    clippy::future_not_send
 )]

+use crate::commands::tracing::{init_logs_and_tracing, init_simple_logs, TroggingGuard};
 use dotenv::dotenv;
+use influxdb_iox_client::connection::Builder;
+use observability_deps::tracing::warn;
 use once_cell::sync::Lazy;
+use std::str::FromStr;
 use structopt::StructOpt;
 use tokio::runtime::Runtime;

-use commands::tracing::{init_logs_and_tracing, init_simple_logs};
-use observability_deps::tracing::warn;
-
-use crate::commands::tracing::TroggingGuard;
-use influxdb_iox_client::connection::Builder;
-use std::str::FromStr;
-
 mod commands {
    pub mod database;
    pub mod debug;
@ -31,8 +29,7 @@ mod commands {
    pub mod tracing;
 }

-mod object_store;
-mod server_id;
+mod structopt_blocks;

 pub mod influxdb_ioxd;

@ -48,6 +45,19 @@ static VERSION_STRING: Lazy<String> = Lazy::new(|| {
    )
 });

+/// A comfy_table style that uses single ASCII lines for all borders with plusses at intersections.
+///
+/// Example:
+///
+/// ```
+/// +------+--------------------------------------+
+/// | Name | UUID                                 |
+/// +------+--------------------------------------+
+/// | bar  | ccc2b8bc-f25d-4341-9b64-b9cfe50d26de |
+/// | foo  | 3317ff2b-bbab-43ae-8c63-f0e9ea2f3bdb |
+/// +------+--------------------------------------+
+const TABLE_STYLE_SINGLE_LINE_BORDERS: &str = "||--+-++|    ++++++";
+
 #[cfg(all(feature = "heappy", feature = "jemalloc_replacing_malloc"))]
 compile_error!("heappy and jemalloc_replacing_malloc features are mutually exclusive");

@ -60,19 +70,19 @@ compile_error!("heappy and jemalloc_replacing_malloc features are mutually exclu

 Examples:
    # Run the InfluxDB IOx server:
-    influxdb_iox run
+    influxdb_iox run database

    # Run the interactive SQL prompt
    influxdb_iox sql

    # Display all server settings
-    influxdb_iox run --help
+    influxdb_iox run database --help

    # Run the InfluxDB IOx server with extra verbose logging
-    influxdb_iox run -v
+    influxdb_iox run database -v

    # Run InfluxDB IOx with full debug logging specified with RUST_LOG
-    RUST_LOG=debug influxdb_iox run
+    RUST_LOG=debug influxdb_iox run database

 Command are generally structured in the form:
    <type of object> <action> <arguments>
--- a/influxdb_iox/src/structopt_blocks/boolean_flag.rs
+++ b/influxdb_iox/src/structopt_blocks/boolean_flag.rs
@ -0,0 +1,48 @@
+/// Boolean flag that works with environment variables.
+///
+/// Workaround for <https://github.com/TeXitoi/structopt/issues/428>
+#[derive(Debug, Clone, Copy)]
+pub enum BooleanFlag {
+    True,
+    False,
+}
+
+impl std::str::FromStr for BooleanFlag {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.to_ascii_lowercase().as_str() {
+            "yes" | "y" | "true" | "t" | "1" => Ok(Self::True),
+            "no" | "n" | "false" | "f" | "0" => Ok(Self::False),
+            _ => Err(format!(
+                "Invalid boolean flag '{}'. Valid options: yes, no, y, n, true, false, t, f, 1, 0",
+                s
+            )),
+        }
+    }
+}
+
+impl From<BooleanFlag> for bool {
+    fn from(yes_no: BooleanFlag) -> Self {
+        matches!(yes_no, BooleanFlag::True)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::str::FromStr;
+
+    use super::*;
+
+    #[test]
+    fn test_parsing() {
+        assert!(bool::from(BooleanFlag::from_str("yes").unwrap()));
+        assert!(bool::from(BooleanFlag::from_str("Yes").unwrap()));
+        assert!(bool::from(BooleanFlag::from_str("YES").unwrap()));
+
+        assert!(!bool::from(BooleanFlag::from_str("No").unwrap()));
+        assert!(!bool::from(BooleanFlag::from_str("FaLse").unwrap()));
+
+        BooleanFlag::from_str("foo").unwrap_err();
+    }
+}
--- a/influxdb_iox/src/structopt_blocks/mod.rs
+++ b/influxdb_iox/src/structopt_blocks/mod.rs
@ -0,0 +1,8 @@
+//! Building blocks for [`structopt`]-driven configs.
+//!
+//! They can easily be re-used using `#[structopt(flatten)]`.
+pub mod boolean_flag;
+pub mod object_store;
+pub mod run_config;
+pub mod server_id;
+pub mod socket_addr;
--- a/influxdb_iox/src/structopt_blocks/object_store.rs
+++ b/influxdb_iox/src/structopt_blocks/object_store.rs
--- a/Show More
+++ b/Show More